(detect_coding_iso2022): Don't check the byte length of

[bpt/emacs.git] / src / coding.c
diff --git a/src/coding.c b/src/coding.c

index 6973a6d..9e6333f 100644 (file)
--- a/src/coding.c
+++ b/src/coding.c
@@ -25,10 +25,11 @@ Boston, MA 02111-1307, USA.  */
    2. Emacs' internal format (emacs-mule) handlers
    3. ISO2022 handlers
    4. Shift-JIS and BIG5 handlers
-  5. End-of-line handlers
-  6. C library functions
-  7. Emacs Lisp library functions
-  8. Post-amble
+  5. CCL handlers
+  6. End-of-line handlers
+  7. C library functions
+  8. Emacs Lisp library functions
+  9. Post-amble
  
  */
  
@@ -277,6 +278,7 @@ Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
  Lisp_Object Qno_conversion, Qundecided;
  Lisp_Object Qcoding_system_history;
  Lisp_Object Qsafe_charsets;
+Lisp_Object Qvalid_codes;
  
  extern Lisp_Object Qinsert_file_contents, Qwrite_region;
  Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
@@ -315,7 +317,7 @@ Lisp_Object Vcoding_system_for_write;
  Lisp_Object Vlast_coding_system_used;
  
  /* A vector of length 256 which contains information about special
-   Latin codes (espepcially for dealing with Microsoft code).  */
+   Latin codes (especially for dealing with Microsoft codes).  */
  Lisp_Object Vlatin_extra_code_table;
  
  /* Flag to inhibit code conversion of end-of-line format.  */
@@ -334,6 +336,9 @@ struct coding_system safe_terminal_coding;
  /* Coding system of what is sent from terminal keyboard.  */
  struct coding_system keyboard_coding;
  
+/* Default coding system to be used to write a file.  */
+struct coding_system default_buffer_file_coding;
+
  Lisp_Object Vfile_coding_system_alist;
  Lisp_Object Vprocess_coding_system_alist;
  Lisp_Object Vnetwork_coding_system_alist;
@@ -358,26 +363,33 @@ char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
    "coding-category-iso-8-2",
    "coding-category-iso-7-else",
    "coding-category-iso-8-else",
+  "coding-category-ccl",
    "coding-category-big5",
    "coding-category-raw-text",
    "coding-category-binary"
  };
  
-/* Table pointers to coding systems corresponding to each coding
+/* Table of pointers to coding systems corresponding to each coding
     categories.  */
  struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
  
-/* Flag to tell if we look up unification table on character code
+/* Table of coding category masks.  Nth element is a mask for a coding
+   cateogry of which priority is Nth.  */
+static
+int coding_priorities[CODING_CATEGORY_IDX_MAX];
+
+/* Flag to tell if we look up translation table on character code
     conversion.  */
-Lisp_Object Venable_character_unification;
-/* Standard unification table to look up on decoding (reading).  */
-Lisp_Object Vstandard_character_unification_table_for_decode;
-/* Standard unification table to look up on encoding (writing).  */
-Lisp_Object Vstandard_character_unification_table_for_encode;
+Lisp_Object Venable_character_translation;
+/* Standard translation table to look up on decoding (reading).  */
+Lisp_Object Vstandard_translation_table_for_decode;
+/* Standard translation table to look up on encoding (writing).  */
+Lisp_Object Vstandard_translation_table_for_encode;
  
-Lisp_Object Qcharacter_unification_table;
-Lisp_Object Qcharacter_unification_table_for_decode;
-Lisp_Object Qcharacter_unification_table_for_encode;
+Lisp_Object Qtranslation_table;
+Lisp_Object Qtranslation_table_id;
+Lisp_Object Qtranslation_table_for_decode;
+Lisp_Object Qtranslation_table_for_encode;
  
  /* Alist of charsets vs revision number.  */
  Lisp_Object Vcharset_revision_alist;
@@ -643,11 +655,12 @@ detect_coding_emacs_mule (src, src_end)
  
  enum iso_code_class_type iso_code_class[256];
  
-#define CHARSET_OK(idx, charset)                       \
-  (coding_system_table[idx]->safe_charsets[charset]    \
-   || (CODING_SPEC_ISO_REQUESTED_DESIGNATION           \
-       (coding_system_table[idx], charset)             \
-       != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
+#define CHARSET_OK(idx, charset)                               \
+  (coding_system_table[idx]                                    \
+   && (coding_system_table[idx]->safe_charsets[charset]                \
+       || (CODING_SPEC_ISO_REQUESTED_DESIGNATION               \
+            (coding_system_table[idx], charset)                        \
+           != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))
  
  #define SHIFT_OUT_OK(idx) \
    (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
@@ -670,7 +683,7 @@ detect_coding_iso2022 (src, src_end)
  {
    int mask = CODING_CATEGORY_MASK_ISO;
    int mask_found = 0;
-  int reg[4], shift_out = 0;
+  int reg[4], shift_out = 0, single_shifting = 0;
    int c, c1, i, charset;
  
    reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
@@ -680,6 +693,7 @@ detect_coding_iso2022 (src, src_end)
        switch (c)
         {
         case ISO_CODE_ESC:
+         single_shifting = 0;
           if (src >= src_end)
             break;
           c = *src++;
@@ -768,6 +782,7 @@ detect_coding_iso2022 (src, src_end)
           break;
  
         case ISO_CODE_SO:
+         single_shifting = 0;
           if (shift_out == 0
               && (reg[1] >= 0
                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
@@ -780,6 +795,7 @@ detect_coding_iso2022 (src, src_end)
           break;
           
         case ISO_CODE_SI:
+         single_shifting = 0;
           if (shift_out == 1)
             {
               /* Locking shift in.  */
@@ -789,6 +805,7 @@ detect_coding_iso2022 (src, src_end)
           break;
  
         case ISO_CODE_CSI:
+         single_shifting = 0;
         case ISO_CODE_SS2:
         case ISO_CODE_SS3:
           {
@@ -802,6 +819,7 @@ detect_coding_iso2022 (src, src_end)
                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
                     & CODING_FLAG_ISO_SINGLE_SHIFT)
                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
+               single_shifting = 1;
               }
             if (VECTORP (Vlatin_extra_code_table)
                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
@@ -820,9 +838,13 @@ detect_coding_iso2022 (src, src_end)
  
         default:
           if (c < 0x80)
-           break;
+           {
+             single_shifting = 0;
+             break;
+           }
           else if (c < 0xA0)
             {
+             single_shifting = 0;
               if (VECTORP (Vlatin_extra_code_table)
                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
                 {
@@ -847,12 +869,19 @@ detect_coding_iso2022 (src, src_end)
               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
-             while (src < src_end && *src >= 0xA0)
-               src++;
-             if ((src - src_begin - 1) & 1 && src < src_end)
-               mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
-             else
-               mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
+             /* Check the length of succeeding codes of the range
+                 0xA0..0FF.  If the byte length is odd, we exclude
+                 CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
+                 when we are not single shifting.  */
+             if (!single_shifting)
+               {
+                 while (src < src_end && *src >= 0xA0)
+                   src++;
+                 if ((src - src_begin - 1) & 1 && src < src_end)
+                   mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
+                 else
+                   mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
+               }
             }
           break;
         }
@@ -878,21 +907,21 @@ detect_coding_iso2022 (src, src_end)
           *dst++ = 0xFF;                                                \
         coding->composing += 2;                                         \
        }                                                                        \
-    if ((charset) >= 0)                                                        \
+    if (charset_alt >= 0)                                              \
        {                                                                        \
-       if (CHARSET_DIMENSION (charset) == 2)                           \
+       if (CHARSET_DIMENSION (charset_alt) == 2)                       \
           {                                                             \
             ONE_MORE_BYTE (c2);                                         \
             if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F         \
                 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0)  \
               {                                                         \
                 src--;                                                  \
-               c2 = ' ';                                               \
+               charset_alt = CHARSET_ASCII;                            \
               }                                                         \
           }                                                             \
-       if (!NILP (unification_table)                                   \
-           && ((c_alt = unify_char (unification_table,                 \
-                                    -1, (charset), c1, c2)) >= 0))     \
+       if (!NILP (translation_table)                                   \
+           && ((c_alt = translate_char (translation_table,             \
+                                        -1, charset_alt, c1, c2)) >= 0)) \
           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
        }                                                                        \
      if (charset_alt == CHARSET_ASCII || charset_alt < 0)               \
@@ -943,7 +972,8 @@ detect_coding_iso2022 (src, src_end)
     Else, if it contains only valid codes, return 0.
     Else return the length of the composing sequence.  */
  
-int check_composing_code (coding, src, src_end)
+int
+check_composing_code (coding, src, src_end)
       struct coding_system *coding;
       unsigned char *src, *src_end;
  {
@@ -982,7 +1012,9 @@ int check_composing_code (coding, src, src_end)
             invalid_code_found = 1;
         }
      }
-  return ((coding->mode & CODING_MODE_LAST_BLOCK) ? src_end - src_start : -1);
+  return (invalid_code_found
+         ? src - src_start
+         : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
  }
  
  /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
@@ -1005,12 +1037,12 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
    /* Charsets invoked to graphic plane 0 and 1 respectively.  */
    int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
    int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
-  Lisp_Object unification_table
-    = coding->character_unification_table_for_decode;
+  Lisp_Object translation_table
+    = coding->translation_table_for_decode;
    int result = CODING_FINISH_NORMAL;
  
-  if (!NILP (Venable_character_unification) && NILP (unification_table))
-    unification_table = Vstandard_character_unification_table_for_decode;
+  if (!NILP (Venable_character_translation) && NILP (translation_table))
+    translation_table = Vstandard_translation_table_for_decode;
  
    coding->produced_char = 0;
    coding->fake_multibyte = 0;
@@ -1222,9 +1254,12 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
  
                 result1 = check_composing_code (coding, src, src_end);
                 if (result1 == 0)
-                 coding->composing = (c1 == '0'
-                                      ? COMPOSING_NO_RULE_HEAD
-                                      : COMPOSING_WITH_RULE_HEAD);
+                 {
+                   coding->composing = (c1 == '0'
+                                        ? COMPOSING_NO_RULE_HEAD
+                                        : COMPOSING_WITH_RULE_HEAD);
+                   coding->produced_char++;
+                 }
                 else if (result1 > 0)
                   {
                     if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
@@ -1247,7 +1282,6 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
  
             case '1':           /* end composing */
               coding->composing = COMPOSING_NO;
-             coding->produced_char++;
               break;
  
             case '[':           /* specification of direction */
@@ -1552,32 +1586,33 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
        dst = encode_invocation_designation (charset, coding, dst);      \
    } while (1)
  
-#define ENCODE_ISO_CHARACTER(charset, c1, c2)                            \
-  do {                                                                   \
-    int c_alt, charset_alt;                                              \
-    if (!NILP (unification_table)                                        \
-       && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
-           >= 0))                                                        \
-      SPLIT_CHAR (c_alt, charset_alt, c1, c2);                           \
-    else                                                                 \
-      charset_alt = charset;                                             \
-    if (CHARSET_DIMENSION (charset_alt) == 1)                            \
-      {                                                                          \
-       if (charset == CHARSET_ASCII                                      \
-           && coding->flags & CODING_FLAG_ISO_USE_ROMAN)                 \
-         charset_alt = charset_latin_jisx0201;                           \
-       ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);                \
-      }                                                                          \
-    else                                                                 \
-      {                                                                          \
-       if (charset == charset_jisx0208                                   \
-           && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)                \
-         charset_alt = charset_jisx0208_1978;                            \
-       ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);            \
-      }                                                                          \
-    if (! COMPOSING_P (coding->composing))                               \
-      coding->consumed_char++;                                           \
-     } while (0)
+#define ENCODE_ISO_CHARACTER(charset, c1, c2)                  \
+  do {                                                         \
+    int c_alt, charset_alt;                                    \
+    if (!NILP (translation_table)                              \
+       && ((c_alt = translate_char (translation_table, -1,     \
+                                    charset, c1, c2))          \
+           >= 0))                                              \
+      SPLIT_CHAR (c_alt, charset_alt, c1, c2);                 \
+    else                                                       \
+      charset_alt = charset;                                   \
+    if (CHARSET_DIMENSION (charset_alt) == 1)                  \
+      {                                                                \
+       if (charset == CHARSET_ASCII                            \
+           && coding->flags & CODING_FLAG_ISO_USE_ROMAN)       \
+         charset_alt = charset_latin_jisx0201;                 \
+       ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);      \
+      }                                                                \
+    else                                                       \
+      {                                                                \
+       if (charset == charset_jisx0208                         \
+           && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)      \
+         charset_alt = charset_jisx0208_1978;                  \
+       ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);  \
+      }                                                                \
+    if (! COMPOSING_P (coding->composing))                     \
+      coding->consumed_char++;                                 \
+  } while (0)
  
  /* Produce designation and invocation codes at a place pointed by DST
     to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
@@ -1710,7 +1745,7 @@ encode_designation_at_bol (coding, table, src, src_end, dstp)
           unsigned char c1, c2;
  
           SPLIT_STRING(src, bytes, charset, c1, c2);
-         if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0)
+         if ((c_alt = translate_char (table, -1, charset, c1, c2)) >= 0)
             charset = CHAR_CHARSET (c_alt);
         }
  
@@ -1750,12 +1785,12 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
       from DST_END to assure overflow checking is necessary only at the
       head of loop.  */
    unsigned char *adjusted_dst_end = dst_end - 19;
-  Lisp_Object unification_table
-      = coding->character_unification_table_for_encode;
+  Lisp_Object translation_table
+      = coding->translation_table_for_encode;
    int result = CODING_FINISH_NORMAL;
  
-  if (!NILP (Venable_character_unification) && NILP (unification_table))
-    unification_table = Vstandard_character_unification_table_for_encode;
+  if (!NILP (Venable_character_translation) && NILP (translation_table))
+    translation_table = Vstandard_translation_table_for_encode;
  
    coding->consumed_char = 0;
    coding->fake_multibyte = 0;
@@ -1775,7 +1810,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
           && CODING_SPEC_ISO_BOL (coding))
         {
           /* We have to produce designation sequences if any now.  */
-         encode_designation_at_bol (coding, unification_table,
+         encode_designation_at_bol (coding, translation_table,
                                      src, src_end, &dst);
           CODING_SPEC_ISO_BOL (coding) = 0;
         }
@@ -1868,8 +1903,8 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
             {
               /* invalid sequence */
               *dst++ = c1;
-             *dst++ = c2;
-             coding->consumed_char += 2;
+             src--;
+             coding->consumed_char++;
             }
           else
             ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
@@ -1881,9 +1916,8 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
             {
               /* invalid sequence */
               *dst++ = c1;
-             *dst++ = c2;
-             *dst++ = c3;
-             coding->consumed_char += 3;
+             src -= 2;
+             coding->consumed_char++;
             }
           else if (c1 < LEADING_CODE_PRIVATE_11)
             ENCODE_ISO_CHARACTER (c1, c2, c3);
@@ -1897,10 +1931,8 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
             {
               /* invalid sequence */
               *dst++ = c1;
-             *dst++ = c2;
-             *dst++ = c3;
-             *dst++ = c4;
-             coding->consumed_char += 4;
+             src -= 3;
+             coding->consumed_char++;
             }
           else
             ENCODE_ISO_CHARACTER (c2, c3, c4);
@@ -1912,8 +1944,8 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
             {
               /* invalid sequence */
               *dst++ = c1;
-             *dst++ = c2;
-             coding->consumed_char += 2;
+             src--;
+             coding->consumed_char++;
             }
           else if (c2 == 0xFF)
             {
@@ -1946,18 +1978,18 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
        break;
      }
  
-  if (src < src_end)
+  if (src < src_end && result == CODING_FINISH_NORMAL)
+    result = CODING_FINISH_INSUFFICIENT_DST;
+
+  /* If this is the last block of the text to be encoded, we must
+     reset graphic planes and registers to the initial state, and
+     flush out the carryover if any.  */
+  if (coding->mode & CODING_MODE_LAST_BLOCK)
      {
-      if (result == CODING_FINISH_NORMAL)
-       result = CODING_FINISH_INSUFFICIENT_DST;
-      else
-       /* If this is the last block of the text to be encoded, we
-          must reset graphic planes and registers to the initial
-          state, and flush out the carryover if any.  */
-       if (coding->mode & CODING_MODE_LAST_BLOCK)
-         ENCODE_RESET_PLANE_AND_REGISTER;
+      ENCODE_RESET_PLANE_AND_REGISTER;
+      if (COMPOSING_P (coding->composing))
+       ENCODE_COMPOSITION_END;
      }
-
    coding->consumed = src - source;
    coding->produced = coding->produced_char = dst - destination;
    return result;
@@ -1981,7 +2013,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
     (character set)     (range)
     ASCII               0x00 .. 0x7F
     KATAKANA-JISX0201   0xA0 .. 0xDF
-   JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xFF
+   JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xEF
             (2nd byte)  0x40 .. 0xFF
     -------------------------------
  
@@ -2041,9 +2073,9 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
  #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                    \
    do {                                                                 \
      int c_alt, charset_alt = (charset);                                        \
-    if (!NILP (unification_table)                                      \
-       && ((c_alt = unify_char (unification_table,                     \
-                                -1, (charset), c1, c2)) >= 0))         \
+    if (!NILP (translation_table)                                      \
+       && ((c_alt = translate_char (translation_table,                 \
+                                    -1, (charset), c1, c2)) >= 0))     \
           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
      if (charset_alt == CHARSET_ASCII || charset_alt < 0)               \
        DECODE_CHARACTER_ASCII (c1);                                     \
@@ -2053,54 +2085,55 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
        DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);               \
    } while (0)
  
-#define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                      \
-  do {                                                                   \
-    int c_alt, charset_alt;                                              \
-    if (!NILP (unification_table)                                        \
-        && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
-           >= 0))                                                        \
-      SPLIT_CHAR (c_alt, charset_alt, c1, c2);                           \
-    else                                                                 \
-      charset_alt = charset;                                             \
-    if (charset_alt == charset_ascii)                                    \
-      *dst++ = c1;                                                       \
-    else if (CHARSET_DIMENSION (charset_alt) == 1)                       \
-      {                                                                          \
-       if (sjis_p && charset_alt == charset_katakana_jisx0201)           \
-         *dst++ = c1;                                                    \
-       else                                                              \
-         {                                                               \
-           *dst++ = charset_alt, *dst++ = c1;                            \
-           coding->fake_multibyte = 1;                                   \
-         }                                                               \
-      }                                                                          \
-    else                                                                 \
-      {                                                                          \
-       c1 &= 0x7F, c2 &= 0x7F;                                           \
-       if (sjis_p && charset_alt == charset_jisx0208)                    \
-         {                                                               \
-           unsigned char s1, s2;                                         \
-                                                                         \
-           ENCODE_SJIS (c1, c2, s1, s2);                                 \
-           *dst++ = s1, *dst++ = s2;                                     \
-           coding->fake_multibyte = 1;                                   \
-         }                                                               \
-       else if (!sjis_p                                                  \
-                && (charset_alt == charset_big5_1                        \
-                    || charset_alt == charset_big5_2))                   \
-         {                                                               \
-           unsigned char b1, b2;                                         \
-                                                                         \
-           ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);                    \
-           *dst++ = b1, *dst++ = b2;                                     \
-         }                                                               \
-       else                                                              \
-         {                                                               \
-           *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;               \
-           coding->fake_multibyte = 1;                                   \
-         }                                                               \
-      }                                                                          \
-    coding->consumed_char++;                                             \
+#define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)            \
+  do {                                                         \
+    int c_alt, charset_alt;                                    \
+    if (!NILP (translation_table)                              \
+       && ((c_alt = translate_char (translation_table, -1,     \
+                                    charset, c1, c2))          \
+           >= 0))                                              \
+      SPLIT_CHAR (c_alt, charset_alt, c1, c2);                 \
+    else                                                       \
+      charset_alt = charset;                                   \
+    if (charset_alt == charset_ascii)                          \
+      *dst++ = c1;                                             \
+    else if (CHARSET_DIMENSION (charset_alt) == 1)             \
+      {                                                                \
+       if (sjis_p && charset_alt == charset_katakana_jisx0201) \
+         *dst++ = c1;                                          \
+       else                                                    \
+         {                                                     \
+           *dst++ = charset_alt, *dst++ = c1;                  \
+           coding->fake_multibyte = 1;                         \
+         }                                                     \
+      }                                                                \
+    else                                                       \
+      {                                                                \
+       c1 &= 0x7F, c2 &= 0x7F;                                 \
+       if (sjis_p && charset_alt == charset_jisx0208)          \
+         {                                                     \
+           unsigned char s1, s2;                               \
+                                                               \
+           ENCODE_SJIS (c1, c2, s1, s2);                       \
+           *dst++ = s1, *dst++ = s2;                           \
+           coding->fake_multibyte = 1;                         \
+         }                                                     \
+       else if (!sjis_p                                        \
+                && (charset_alt == charset_big5_1              \
+                    || charset_alt == charset_big5_2))         \
+         {                                                     \
+           unsigned char b1, b2;                               \
+                                                               \
+           ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);          \
+           *dst++ = b1, *dst++ = b2;                           \
+         }                                                     \
+       else                                                    \
+         {                                                     \
+           *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;     \
+           coding->fake_multibyte = 1;                         \
+         }                                                     \
+      }                                                                \
+    coding->consumed_char++;                                   \
    } while (0);
  
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
@@ -2169,12 +2202,12 @@ decode_coding_sjis_big5 (coding, source, destination,
       from DST_END to assure overflow checking is necessary only at the
       head of loop.  */
    unsigned char *adjusted_dst_end = dst_end - 3;
-  Lisp_Object unification_table
-      = coding->character_unification_table_for_decode;
+  Lisp_Object translation_table
+      = coding->translation_table_for_decode;
    int result = CODING_FINISH_NORMAL;
  
-  if (!NILP (Venable_character_unification) && NILP (unification_table))
-    unification_table = Vstandard_character_unification_table_for_decode;
+  if (!NILP (Venable_character_translation) && NILP (translation_table))
+    translation_table = Vstandard_translation_table_for_decode;
  
    coding->produced_char = 0;
    coding->fake_multibyte = 0;
@@ -2226,69 +2259,47 @@ decode_coding_sjis_big5 (coding, source, destination,
         }
        else if (c1 < 0x80)
         DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
-      else if (c1 < 0xA0)
+      else
         {
-         /* SJIS -> JISX0208 */
           if (sjis_p)
             {
-             ONE_MORE_BYTE (c2);
-             if (c2 >= 0x40)
+             if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0))
                 {
-                 DECODE_SJIS (c1, c2, c3, c4);
-                 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
+                 /* SJIS -> JISX0208 */
+                 ONE_MORE_BYTE (c2);
+                 if (c2 >= 0x40)
+                   {
+                     DECODE_SJIS (c1, c2, c3, c4);
+                     DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
+                   }
+                 else
+                   goto label_invalid_code_2;
                 }
+             else if (c1 < 0xE0)
+               /* SJIS -> JISX0201-Kana */
+               DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
+                                           /* dummy */ c2);
               else
-               goto label_invalid_code_2;
+               goto label_invalid_code_1;
             }
           else
-           goto label_invalid_code_1;
-       }
-      else if (c1 < 0xE0)
-       {
-         /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
-         if (sjis_p)
-           DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
-                                       /* dummy */ c2);
-         else
-           {
-             int charset;
-
-             ONE_MORE_BYTE (c2);
-             if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
-               {
-                 DECODE_BIG5 (c1, c2, charset, c3, c4);
-                 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
-               }
-             else
-               goto label_invalid_code_2;
-           }
-       }
-      else                     /* C1 >= 0xE0 */
-       {
-         /* SJIS -> JISX0208, BIG5 -> Big5 */
-         if (sjis_p)
             {
-             ONE_MORE_BYTE (c2);
-             if (c2 >= 0x40)
+             /* BIG5 -> Big5 */
+             if (c1 >= 0xA1 && c1 <= 0xFE)
                 {
-                 DECODE_SJIS (c1, c2, c3, c4);
-                 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
-               }
-             else
-               goto label_invalid_code_2;
-           }
-         else
-           {
-             int charset;
+                 ONE_MORE_BYTE (c2);
+                 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
+                   {
+                     int charset;
  
-             ONE_MORE_BYTE (c2);
-             if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
-               {
-                 DECODE_BIG5 (c1, c2, charset, c3, c4);
-                 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
+                     DECODE_BIG5 (c1, c2, charset, c3, c4);
+                     DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
+                   }
+                 else
+                   goto label_invalid_code_2;
                 }
               else
-               goto label_invalid_code_2;
+               goto label_invalid_code_1;
             }
         }
        continue;
@@ -2358,12 +2369,12 @@ encode_coding_sjis_big5 (coding, source, destination,
       from DST_END to assure overflow checking is necessary only at the
       head of loop.  */
    unsigned char *adjusted_dst_end = dst_end - 1;
-  Lisp_Object unification_table
-      = coding->character_unification_table_for_encode;
+  Lisp_Object translation_table
+      = coding->translation_table_for_encode;
    int result = CODING_FINISH_NORMAL;
  
-  if (!NILP (Venable_character_unification) && NILP (unification_table))
-    unification_table = Vstandard_character_unification_table_for_encode;
+  if (!NILP (Venable_character_translation) && NILP (translation_table))
+    translation_table = Vstandard_translation_table_for_encode;
  
    coding->consumed_char = 0;
    coding->fake_multibyte = 0;
@@ -2463,7 +2474,34 @@ encode_coding_sjis_big5 (coding, source, destination,
  }
  
  \f
-/*** 5. End-of-line handlers ***/
+/*** 5. CCL handlers ***/
+
+/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
+   Check if a text is encoded in a coding system of which
+   encoder/decoder are written in CCL program.  If it is, return
+   CODING_CATEGORY_MASK_CCL, else return 0.  */
+
+int
+detect_coding_ccl (src, src_end)
+     unsigned char *src, *src_end;
+{
+  unsigned char *valid;
+
+  /* No coding system is assigned to coding-category-ccl.  */
+  if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
+    return 0;
+
+  valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
+  while (src < src_end)
+    {
+      if (! valid[*src]) return 0;
+      src++;
+    }
+  return CODING_CATEGORY_MASK_CCL;
+}
+
+\f
+/*** 6. End-of-line handlers ***/
  
  /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
     This function is called only when `coding->eol_type' is
@@ -2585,7 +2623,7 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes)
        else
         safe_bcopy (source, destination, src_bytes);
        src += src_bytes;
-      dst += dst_bytes;
+      dst += src_bytes;
        coding->fake_multibyte = 1;
        break;
      }
@@ -2652,18 +2690,16 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes)
        if (dst_bytes)
         bcopy (source, destination, src_bytes);
        else
-       {
-         safe_bcopy (source, destination, src_bytes);
-         dst_bytes = src_bytes;
-       }
-      if (coding->eol_type == CODING_EOL_CRLF)
+       safe_bcopy (source, destination, src_bytes);
+      dst_bytes = src_bytes;
+      if (coding->eol_type == CODING_EOL_CR)
         {
           while (src_bytes--)
             {
               if ((c = *dst++) == '\n')
                 dst[-1] = '\r';
               else if (BASE_LEADING_CODE_P (c))
-                 coding->fake_multibyte = 1;
+               coding->fake_multibyte = 1;
             }
         }
        else
@@ -2685,7 +2721,7 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes)
  }
  
  \f
-/*** 6. C library functions ***/
+/*** 7. C library functions ***/
  
  /* In Emacs Lisp, coding system is represented by a Lisp symbol which
     has a property `coding-system'.  The value of this property is a
@@ -2811,26 +2847,23 @@ setup_coding_system (coding_system, coding)
  
    /* Initialize remaining fields.  */
    coding->composing = 0;
-  coding->character_unification_table_for_decode = Qnil;
-  coding->character_unification_table_for_encode = Qnil;
+  coding->translation_table_for_decode = Qnil;
+  coding->translation_table_for_encode = Qnil;
  
    /* Get values of coding system properties:
       `post-read-conversion', `pre-write-conversion',
-     `character-unification-table-for-decode',
-     `character-unification-table-for-encode'.  */
+     `translation-table-for-decode', `translation-table-for-encode'.  */
    plist = XVECTOR (coding_spec)->contents[3];
    coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
    coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
-  val = Fplist_get (plist, Qcharacter_unification_table_for_decode);
+  val = Fplist_get (plist, Qtranslation_table_for_decode);
    if (SYMBOLP (val))
-    val = Fget (val, Qcharacter_unification_table_for_decode);
-  coding->character_unification_table_for_decode
-    = CHAR_TABLE_P (val) ? val : Qnil;
-  val = Fplist_get (plist, Qcharacter_unification_table_for_encode);
+    val = Fget (val, Qtranslation_table_for_decode);
+  coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
+  val = Fplist_get (plist, Qtranslation_table_for_encode);
    if (SYMBOLP (val))
-    val = Fget (val, Qcharacter_unification_table_for_encode);
-  coding->character_unification_table_for_encode
-    = CHAR_TABLE_P (val) ? val : Qnil;
+    val = Fget (val, Qtranslation_table_for_encode);
+  coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
    val = Fplist_get (plist, Qcoding_category);
    if (!NILP (val))
      {
@@ -2960,7 +2993,8 @@ setup_coding_system (coding_system, coding)
               }
             else if (CONSP (flags[i]))
               {
-               Lisp_Object tail = flags[i];
+               Lisp_Object tail;
+               tail = flags[i];
  
                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
                 if (INTEGERP (XCONS (tail)->car)
@@ -3044,16 +3078,49 @@ setup_coding_system (coding_system, coding)
        coding->common_flags
         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
        {
-       Lisp_Object val = XVECTOR (coding_spec)->contents[4];
+       Lisp_Object val;
+       Lisp_Object decoder, encoder;
+
+       val = XVECTOR (coding_spec)->contents[4];
         if (CONSP  (val)
-           && VECTORP (XCONS (val)->car)
-           && VECTORP (XCONS (val)->cdr))
+           && SYMBOLP (XCONS (val)->car)
+           && !NILP (decoder = Fget (XCONS (val)->car, Qccl_program_idx))
+           && !NILP (decoder = Fcdr (Faref (Vccl_program_table, decoder)))
+           && SYMBOLP (XCONS (val)->cdr)
+           && !NILP (encoder = Fget (XCONS (val)->cdr, Qccl_program_idx))
+           && !NILP (encoder = Fcdr (Faref (Vccl_program_table, encoder))))
           {
-           setup_ccl_program (&(coding->spec.ccl.decoder), XCONS (val)->car);
-           setup_ccl_program (&(coding->spec.ccl.encoder), XCONS (val)->cdr);
+           setup_ccl_program (&(coding->spec.ccl.decoder), decoder);
+           setup_ccl_program (&(coding->spec.ccl.encoder), encoder);
           }
         else
           goto label_invalid_coding_system;
+
+       bzero (coding->spec.ccl.valid_codes, 256);
+       val = Fplist_get (plist, Qvalid_codes);
+       if (CONSP (val))
+         {
+           Lisp_Object this;
+
+           for (; CONSP (val); val = XCONS (val)->cdr)
+             {
+               this = XCONS (val)->car;
+               if (INTEGERP (this)
+                   && XINT (this) >= 0 && XINT (this) < 256)
+                 coding->spec.ccl.valid_codes[XINT (this)] = 1;
+               else if (CONSP (this)
+                        && INTEGERP (XCONS (this)->car)
+                        && INTEGERP (XCONS (this)->cdr))
+                 {
+                   int start = XINT (XCONS (this)->car);
+                   int end = XINT (XCONS (this)->cdr);
+
+                   if (start >= 0 && start <= end && end < 256)
+                     while (start < end)
+                       coding->spec.ccl.valid_codes[start++] = 1;
+                 }
+             }
+         }
        }
        coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
        break;
@@ -3076,6 +3143,33 @@ setup_coding_system (coding_system, coding)
    return -1;
  }
  
+/* Setup raw-text or one of its subsidiaries in the structure
+   coding_system CODING according to the already setup value eol_type
+   in CODING.  CODING should be setup for some coding system in
+   advance.  */
+
+void
+setup_raw_text_coding_system (coding)
+     struct coding_system *coding;
+{
+  if (coding->type != coding_type_raw_text)
+    {
+      coding->symbol = Qraw_text;
+      coding->type = coding_type_raw_text;
+      if (coding->eol_type != CODING_EOL_UNDECIDED)
+       {
+         Lisp_Object subsidiaries;
+         subsidiaries = Fget (Qraw_text, Qeol_type);
+
+         if (VECTORP (subsidiaries)
+             && XVECTOR (subsidiaries)->size == 3)
+           coding->symbol
+             = XVECTOR (subsidiaries)->contents[coding->eol_type];
+       }
+    }
+  return;
+}
+
  /* Emacs has a mechanism to automatically detect a coding system if it
     is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
     it's impossible to distinguish some coding systems accurately
@@ -3143,6 +3237,12 @@ setup_coding_system (coding_system, coding)
         as BIG5.  Assigned the coding-system (Lisp symbol)
         `cn-big5' by default.
  
+   o coding-category-ccl
+
+       The category for a coding system of which encoder/decoder is
+       written in CCL programs.  The default value is nil, i.e., no
+       coding system is assigned.
+
     o coding-category-binary
  
         The category for a coding system not categorized in any of the
@@ -3159,6 +3259,9 @@ setup_coding_system (coding_system, coding)
  
  */
  
+static
+int ascii_skip_code[256];
+
  /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
     If it detects possible coding systems, return an integer in which
     appropriate flag bits are set.  Flag bits are defined by macros
@@ -3173,30 +3276,24 @@ detect_coding_mask (source, src_bytes, priorities, skip)
  {
    register unsigned char c;
    unsigned char *src = source, *src_end = source + src_bytes;
-  unsigned int mask = (CODING_CATEGORY_MASK_ISO_7BIT
-                      | CODING_CATEGORY_MASK_ISO_SHIFT);
+  unsigned int mask;
    int i;
  
    /* At first, skip all ASCII characters and control characters except
       for three ISO2022 specific control characters.  */
+  ascii_skip_code[ISO_CODE_SO] = 0;
+  ascii_skip_code[ISO_CODE_SI] = 0;
+  ascii_skip_code[ISO_CODE_ESC] = 0;
+
   label_loop_detect_coding:
-  while (src < src_end)
-    {
-      c = *src;
-      if (c >= 0x80
-         || ((mask & CODING_CATEGORY_MASK_ISO_7BIT)
-             && c == ISO_CODE_ESC)
-         || ((mask & CODING_CATEGORY_MASK_ISO_SHIFT)
-             && (c == ISO_CODE_SI || c == ISO_CODE_SO)))
-       break;
-      src++;
-    }
+  while (src < src_end && ascii_skip_code[*src]) src++;
    *skip = src - source;
  
    if (src >= src_end)
      /* We found nothing other than ASCII.  There's nothing to do.  */
      return 0;
  
+  c = *src;
    /* The text seems to be encoded in some multilingual coding system.
       Now, try to find in which coding system the text is encoded.  */
    if (c < 0x80)
@@ -3208,9 +3305,10 @@ detect_coding_mask (source, src_bytes, priorities, skip)
         {
           /* No valid ISO2022 code follows C.  Try again.  */
           src++;
-         mask = (c != ISO_CODE_ESC
-                 ? CODING_CATEGORY_MASK_ISO_7BIT
-                 : CODING_CATEGORY_MASK_ISO_SHIFT);
+         if (c == ISO_CODE_ESC)
+           ascii_skip_code[ISO_CODE_ESC] = 1;
+         else
+           ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
           goto label_loop_detect_coding;
         }
        if (priorities)
@@ -3251,20 +3349,31 @@ detect_coding_mask (source, src_bytes, priorities, skip)
                 | CODING_CATEGORY_MASK_SJIS
                 | CODING_CATEGORY_MASK_BIG5);
  
+      /* Or, we may have to consider the possibility of CCL.  */
+      if (coding_system_table[CODING_CATEGORY_IDX_CCL]
+         && (coding_system_table[CODING_CATEGORY_IDX_CCL]
+             ->spec.ccl.valid_codes)[c])
+       try |= CODING_CATEGORY_MASK_CCL;
+
        mask = 0;
        if (priorities)
         {
           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
             {
-             priorities[i] &= try;
-             if (priorities[i] & CODING_CATEGORY_MASK_ISO)
+             if (priorities[i] & try & CODING_CATEGORY_MASK_ISO)
                 mask = detect_coding_iso2022 (src, src_end);
-             else if (priorities[i] & CODING_CATEGORY_MASK_SJIS)
+             else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
                 mask = detect_coding_sjis (src, src_end);
-             else if (priorities[i] & CODING_CATEGORY_MASK_BIG5)
+             else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
                 mask = detect_coding_big5 (src, src_end);      
-             else if (priorities[i] & CODING_CATEGORY_MASK_EMACS_MULE)
+             else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
                 mask = detect_coding_emacs_mule (src, src_end);      
+             else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
+               mask = detect_coding_ccl (src, src_end);
+             else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
+               mask = CODING_CATEGORY_MASK_RAW_TEXT;
+             else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
+               mask = CODING_CATEGORY_MASK_BINARY;
               if (mask)
                 goto label_return_highest_only;
             }
@@ -3277,9 +3386,11 @@ detect_coding_mask (source, src_bytes, priorities, skip)
        if (try & CODING_CATEGORY_MASK_BIG5)
         mask |= detect_coding_big5 (src, src_end);      
        if (try & CODING_CATEGORY_MASK_EMACS_MULE)
-       mask |= detect_coding_emacs_mule (src, src_end);      
+       mask |= detect_coding_emacs_mule (src, src_end);
+      if (try & CODING_CATEGORY_MASK_CCL)
+       mask |= detect_coding_ccl (src, src_end);
      }
-  return (mask | CODING_CATEGORY_MASK_RAW_TEXT);
+  return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
  
   label_return_highest_only:
    for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
@@ -3301,27 +3412,10 @@ detect_coding (coding, src, src_bytes)
  {
    unsigned int idx;
    int skip, mask, i;
-  int priorities[CODING_CATEGORY_IDX_MAX];
-  Lisp_Object val = Vcoding_category_list;
-
-  i = 0;
-  while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
-    {
-      if (! SYMBOLP (XCONS (val)->car))
-       break;
-      idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
-      if (idx >= CODING_CATEGORY_IDX_MAX)
-       break;
-      priorities[i++] = (1 << idx);
-      val = XCONS (val)->cdr;
-    }
-  /* If coding-category-list is valid and contains all coding
-     categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
-     the following code saves Emacs from craching.  */
-  while (i < CODING_CATEGORY_IDX_MAX)
-    priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
+  Lisp_Object val;
  
-  mask = detect_coding_mask (src, src_bytes, priorities, &skip);
+  val = Vcoding_category_list;
+  mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
    coding->heading_ascii = skip;
  
    if (!mask) return;
@@ -3336,8 +3430,9 @@ detect_coding (coding, src, src_bytes)
  
    if (coding->eol_type != CODING_EOL_UNDECIDED)
      {
-      Lisp_Object tmp = Fget (val, Qeol_type);
+      Lisp_Object tmp;
  
+      tmp = Fget (val, Qeol_type);
        if (VECTORP (tmp))
         val = XVECTOR (tmp)->contents[coding->eol_type];
      }
@@ -3528,6 +3623,9 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
      = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
    int result;
  
+  if (encodep)
+    ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
+
    coding->produced = ccl_driver (ccl, source, destination,
                                  src_bytes, dst_bytes, &(coding->consumed));
    if (encodep)
@@ -3750,7 +3848,9 @@ shrink_decoding_region (beg, end, coding, str)
        return;
      }
  
-  if (coding->heading_ascii >= 0)
+  eol_conversion = (coding->eol_type != CODING_EOL_LF);
+
+  if ((! eol_conversion) && (coding->heading_ascii >= 0))
      /* Detection routine has already found how much we can skip at the
         head.  */
      *beg += coding->heading_ascii;
@@ -3766,8 +3866,6 @@ shrink_decoding_region (beg, end, coding, str)
        endp_orig = endp = begp + *end - *beg;
      }
  
-  eol_conversion = (coding->eol_type != CODING_EOL_LF);
-
    switch (coding->type)
      {
      case coding_type_emacs_mule:
@@ -3776,8 +3874,12 @@ shrink_decoding_region (beg, end, coding, str)
         {
           if (coding->heading_ascii < 0)
             while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
-         while (begp < endp && *(endp - 1) != '\r' && *(endp - 1) < 0x80)
+         while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80)
             endp--;
+         /* Do not consider LF as ascii if preceded by CR, since that
+             confuses eol decoding. */
+         if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
+           endp++;
         }
        else
         begp = endp;
@@ -3799,6 +3901,10 @@ shrink_decoding_region (beg, end, coding, str)
         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
        else
         while (begp < endp && endp[-1] < 0x80) endp--;
+      /* Do not consider LF as ascii if preceded by CR, since that
+        confuses eol decoding. */
+      if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
+       endp++;
        if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
         endp++;
        break;
@@ -3823,6 +3929,10 @@ shrink_decoding_region (beg, end, coding, str)
             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
           else
             while (begp < endp && endp[-1] < 0x80) endp--;
+         /* Do not consider LF as ascii if preceded by CR, since that
+             confuses eol decoding. */
+         if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
+           endp++;
           break;
  
         case CODING_CATEGORY_IDX_ISO_7:
@@ -3837,6 +3947,10 @@ shrink_decoding_region (beg, end, coding, str)
             while (begp < endp
                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC)
               endp--;
+         /* Do not consider LF as ascii if preceded by CR, since that
+             confuses eol decoding. */
+         if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
+           endp++;
           if (begp < endp && endp[-1] == ISO_CODE_ESC)
             {
               if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
@@ -3960,12 +4074,18 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
    int len = to - from, len_byte = to_byte - from_byte;
    int require, inserted, inserted_byte;
    int head_skip, tail_skip, total_skip;
-  Lisp_Object saved_coding_symbol = Qnil;
+  Lisp_Object saved_coding_symbol;
    int multibyte = !NILP (current_buffer->enable_multibyte_characters);
    int first = 1;
    int fake_multibyte = 0;
    unsigned char *src, *dst;
-  Lisp_Object deletion = Qnil;
+  Lisp_Object deletion;
+
+  deletion = Qnil;
+  saved_coding_symbol = Qnil;
+
+  if (from < PT && PT < to)
+    SET_PT_BOTH (from, from_byte);
  
    if (replace)
      {
@@ -4213,7 +4333,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
           inserted += len_byte;
           inserted_byte += len_byte;
           while (len_byte--)
-           *src++ = *dst++;
+           *dst++ = *src++;
           fake_multibyte = 1;
           break;
         }
@@ -4314,9 +4434,10 @@ code_convert_string (str, coding, encodep, nocopy)
    int from = 0, to = XSTRING (str)->size;
    int to_byte = STRING_BYTES (XSTRING (str));
    struct gcpro gcpro1;
-  Lisp_Object saved_coding_symbol = Qnil;
+  Lisp_Object saved_coding_symbol;
    int result;
  
+  saved_coding_symbol = Qnil;
    if (encodep && !NILP (coding->pre_write_conversion)
        || !encodep && !NILP (coding->post_read_conversion))
      {
@@ -4416,14 +4537,19 @@ code_convert_string (str, coding, encodep, nocopy)
    if (encodep)
      str = make_unibyte_string (buf, len + coding->produced);
    else
-    str = make_string_from_bytes (buf, len + coding->produced_char,
-                                 len + coding->produced);
+    {
+      int chars= (coding->fake_multibyte
+                 ? multibyte_chars_in_text (buf + from, coding->produced)
+                 : coding->produced_char);
+      str = make_multibyte_string (buf, len + chars, len + coding->produced);
+    }
+
    return str;
  }
  
  \f
  #ifdef emacs
-/*** 7. Emacs Lisp library functions ***/
+/*** 8. Emacs Lisp library functions ***/
  
  DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
    "Return t if OBJECT is nil or a coding-system.\n\
@@ -4501,7 +4627,7 @@ detect_coding_system (src, src_bytes, highest)
    coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
    eol_type  = detect_eol_type (src, src_bytes, &dummy);
    if (eol_type == CODING_EOL_INCONSISTENT)
-    eol_type == CODING_EOL_UNDECIDED;
+    eol_type = CODING_EOL_UNDECIDED;
  
    if (!coding_mask)
      {
@@ -4513,7 +4639,7 @@ detect_coding_system (src, src_bytes, highest)
           if (VECTORP (val2))
             val = XVECTOR (val2)->contents[eol_type];
         }
-      return val;
+      return (highest ? val : Fcons (val, Qnil));
      }
  
    /* At first, gather possible coding systems in VAL.  */
@@ -4532,10 +4658,11 @@ detect_coding_system (src, src_bytes, highest)
    if (!highest)
      val = Fnreverse (val);
  
-  /* Then, substitute the elements by subsidiary coding systems.  */
+  /* Then, replace the elements with subsidiary coding systems.  */
    for (tmp = val; !NILP (tmp); tmp = XCONS (tmp)->cdr)
      {
-      if (eol_type != CODING_EOL_UNDECIDED)
+      if (eol_type != CODING_EOL_UNDECIDED
+         && eol_type != CODING_EOL_INCONSISTENT)
         {
           Lisp_Object eol;
           eol = Fget (XCONS (tmp)->car, Qeol_type);
@@ -4551,8 +4678,9 @@ DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
    "Detect coding system of the text in the region between START and END.\n\
  Return a list of possible coding systems ordered by priority.\n\
  \n\
-If only ASCII characters are found, it returns `undecided'\n\
-or its subsidiary coding system according to a detected end-of-line format.\n\
+If only ASCII characters are found, it returns a list of single element\n\
+`undecided' or its subsidiary coding system according to a detected\n\
+end-of-line format.\n\
  \n\
  If optional argument HIGHEST is non-nil, return the coding system of\n\
  highest priority.")
@@ -4583,8 +4711,9 @@ DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
    "Detect coding system of the text in STRING.\n\
  Return a list of possible coding systems ordered by priority.\n\
  \n\
-If only ASCII characters are found, it returns `undecided'\n\
-or its subsidiary coding system according to a detected end-of-line format.\n\
+If only ASCII characters are found, it returns a list of single element\n\
+`undecided' or its subsidiary coding system according to a detected\n\
+end-of-line format.\n\
  \n\
  If optional argument HIGHEST is non-nil, return the coding system of\n\
  highest priority.")
@@ -4623,6 +4752,7 @@ code_convert_region1 (start, end, coding_system, encodep)
    coding.mode |= CODING_MODE_LAST_BLOCK;
    code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
                        &coding, encodep, 1);
+  Vlast_coding_system_used = coding.symbol;
    return make_number (coding.produced_char);
  }
  
@@ -4631,7 +4761,10 @@ DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
    "Decode the current region by specified coding system.\n\
  When called from a program, takes three arguments:\n\
  START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
-Return length of decoded text.")
+This function sets `last-coding-system-used' to the precise coding system\n\
+used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
+not fully specified.)\n\
+It returns the length of the decoded text.")
    (start, end, coding_system)
       Lisp_Object start, end, coding_system;
  {
@@ -4643,7 +4776,10 @@ DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
    "Encode the current region by specified coding system.\n\
  When called from a program, takes three arguments:\n\
  START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
-Return length of encoded text.")
+This function sets `last-coding-system-used' to the precise coding system\n\
+used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
+not fully specified.)\n\
+It returns the length of the encoded text.")
    (start, end, coding_system)
       Lisp_Object start, end, coding_system;
  {
@@ -4667,6 +4803,7 @@ code_convert_string1 (string, coding_system, nocopy, encodep)
      error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
  
    coding.mode |= CODING_MODE_LAST_BLOCK;
+  Vlast_coding_system_used = coding.symbol;
    return code_convert_string (string, &coding, encodep, !NILP (nocopy));
  }
  
@@ -4674,24 +4811,52 @@ DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
         2, 3, 0,
    "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
  Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
-if the decoding operation is trivial.")
+if the decoding operation is trivial.\n\
+This function sets `last-coding-system-used' to the precise coding system\n\
+used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
+not fully specified.)")
    (string, coding_system, nocopy)
       Lisp_Object string, coding_system, nocopy;
  {
-  return code_convert_string1(string, coding_system, nocopy, 0);
+  return code_convert_string1 (string, coding_system, nocopy, 0);
  }
  
  DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
         2, 3, 0,
    "Encode STRING to CODING-SYSTEM, and return the result.\n\
  Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
-if the encoding operation is trivial.")
+if the encoding operation is trivial.\n\
+This function sets `last-coding-system-used' to the precise coding system\n\
+used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
+not fully specified.)")
    (string, coding_system, nocopy)
       Lisp_Object string, coding_system, nocopy;
  {
-  return code_convert_string1(string, coding_system, nocopy, 1);
+  return code_convert_string1 (string, coding_system, nocopy, 1);
  }
  
+/* Encode or decode STRING according to CODING_SYSTEM.
+   Do not set Vlast_coding_system_used.  */
+
+Lisp_Object
+code_convert_string_norecord (string, coding_system, encodep)
+     Lisp_Object string, coding_system;
+     int encodep;
+{
+  struct coding_system coding;
+
+  CHECK_STRING (string, 0);
+  CHECK_SYMBOL (coding_system, 1);
+
+  if (NILP (coding_system))
+    return string;
+
+  if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
+    error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
+
+  coding.mode |= CODING_MODE_LAST_BLOCK;
+  return code_convert_string (string, &coding, encodep, Qt);
+}
  \f
  DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
    "Decode a JISX0208 character of shift-jis encoding.\n\
@@ -4918,35 +5083,81 @@ which is a list of all the arguments given to this function.")
    return Qnil;
  }
  
-DEFUN ("update-iso-coding-systems", Fupdate_iso_coding_systems,
-       Supdate_iso_coding_systems, 0, 0, 0,
-  "Update internal database for ISO2022 based coding systems.\n\
+DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
+       Supdate_coding_systems_internal, 0, 0, 0,
+  "Update internal database for ISO2022 and CCL based coding systems.\n\
  When values of the following coding categories are changed, you must\n\
  call this function:\n\
    coding-category-iso-7, coding-category-iso-7-tight,\n\
    coding-category-iso-8-1, coding-category-iso-8-2,\n\
-  coding-category-iso-7-else, coding-category-iso-8-else")
+  coding-category-iso-7-else, coding-category-iso-8-else,\n\
+  coding-category-ccl")
    ()
  {
    int i;
  
-  for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_ISO_8_ELSE;
-       i++)
+  for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_CCL; i++)
      {
-      if (! coding_system_table[i])
-       coding_system_table[i]
-         = (struct coding_system *) xmalloc (sizeof (struct coding_system));
-      setup_coding_system
-       (XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value,
-        coding_system_table[i]);
+      Lisp_Object val;
+
+      val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
+      if (!NILP (val))
+       {
+         if (! coding_system_table[i])
+           coding_system_table[i] = ((struct coding_system *)
+                                     xmalloc (sizeof (struct coding_system)));
+         setup_coding_system (val, coding_system_table[i]);
+       }
+      else if (coding_system_table[i])
+       {
+         xfree (coding_system_table[i]);
+         coding_system_table[i] = NULL;
+       }
      }
+
+  return Qnil;
+}
+
+DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
+       Sset_coding_priority_internal, 0, 0, 0,
+  "Update internal database for the current value of `coding-category-list'.\n\
+This function is internal use only.")
+  ()
+{
+  int i = 0, idx;
+  Lisp_Object val;
+
+  val = Vcoding_category_list;
+
+  while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
+    {
+      if (! SYMBOLP (XCONS (val)->car))
+       break;
+      idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
+      if (idx >= CODING_CATEGORY_IDX_MAX)
+       break;
+      coding_priorities[i++] = (1 << idx);
+      val = XCONS (val)->cdr;
+    }
+  /* If coding-category-list is valid and contains all coding
+     categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
+     the following code saves Emacs from craching.  */
+  while (i < CODING_CATEGORY_IDX_MAX)
+    coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
+
    return Qnil;
  }
  
  #endif /* emacs */
  
  \f
-/*** 8. Post-amble ***/
+/*** 9. Post-amble ***/
+
+void
+init_coding ()
+{
+  conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
+}
  
  void
  init_coding_once ()
@@ -4990,14 +5201,18 @@ init_coding_once ()
    iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
  
    conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
-  conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
  
    setup_coding_system (Qnil, &keyboard_coding);
    setup_coding_system (Qnil, &terminal_coding);
    setup_coding_system (Qnil, &safe_terminal_coding);
+  setup_coding_system (Qnil, &default_buffer_file_coding);
  
    bzero (coding_system_table, sizeof coding_system_table);
  
+  bzero (ascii_skip_code, sizeof ascii_skip_code);
+  for (i = 0; i < 128; i++)
+    ascii_skip_code[i] = 1;
+
  #if defined (MSDOS) || defined (WINDOWSNT)
    system_eol_type = CODING_EOL_CRLF;
  #else
@@ -5093,22 +5308,25 @@ syms_of_coding ()
        }
    }
  
-  Qcharacter_unification_table = intern ("character-unification-table");
-  staticpro (&Qcharacter_unification_table);
-  Fput (Qcharacter_unification_table, Qchar_table_extra_slots,
-       make_number (0));
+  Qtranslation_table = intern ("translation-table");
+  staticpro (&Qtranslation_table);
+  Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
  
-  Qcharacter_unification_table_for_decode
-    = intern ("character-unification-table-for-decode");
-  staticpro (&Qcharacter_unification_table_for_decode);
+  Qtranslation_table_id = intern ("translation-table-id");
+  staticpro (&Qtranslation_table_id);
  
-  Qcharacter_unification_table_for_encode
-    = intern ("character-unification-table-for-encode");
-  staticpro (&Qcharacter_unification_table_for_encode);
+  Qtranslation_table_for_decode = intern ("translation-table-for-decode");
+  staticpro (&Qtranslation_table_for_decode);
+
+  Qtranslation_table_for_encode = intern ("translation-table-for-encode");
+  staticpro (&Qtranslation_table_for_encode);
  
    Qsafe_charsets = intern ("safe-charsets");
    staticpro (&Qsafe_charsets);
  
+  Qvalid_codes = intern ("valid-codes");
+  staticpro (&Qvalid_codes);
+
    Qemacs_mule = intern ("emacs-mule");
    staticpro (&Qemacs_mule);
  
@@ -5135,7 +5353,8 @@ syms_of_coding ()
    defsubr (&Sset_keyboard_coding_system_internal);
    defsubr (&Skeyboard_coding_system);
    defsubr (&Sfind_operation_coding_system);
-  defsubr (&Supdate_iso_coding_systems);
+  defsubr (&Supdate_coding_systems_internal);
+  defsubr (&Sset_coding_priority_internal);
  
    DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
      "List of coding systems.\n\
@@ -5211,7 +5430,8 @@ and the cdr part is used for encoding.\n\
  If VAL is a function symbol, the function must return a coding system\n\
  or a cons of coding systems which are used as above.\n\
  \n\
-See also the function `find-operation-coding-system'.");
+See also the function `find-operation-coding-system'.\n\
+and the variable `auto-coding-alist'.");
    Vfile_coding_system_alist = Qnil;
  
    DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
@@ -5261,19 +5481,19 @@ See also the function `find-operation-coding-system'.");
      "Mnemonic character indicating end-of-line format is not yet decided.");
    eol_mnemonic_undecided = ':';
  
-  DEFVAR_LISP ("enable-character-unification", &Venable_character_unification,
-    "Non-nil means ISO 2022 encoder/decoder do character unification.");
-  Venable_character_unification = Qt;
+  DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
+    "*Non-nil enables character translation while encoding and decoding.");
+  Venable_character_translation = Qt;
  
-  DEFVAR_LISP ("standard-character-unification-table-for-decode",
-    &Vstandard_character_unification_table_for_decode,
-    "Table for unifying characters when reading.");
-  Vstandard_character_unification_table_for_decode = Qnil;
+  DEFVAR_LISP ("standard-translation-table-for-decode",
+    &Vstandard_translation_table_for_decode,
+    "Table for translating characters while decoding.");
+  Vstandard_translation_table_for_decode = Qnil;
  
-  DEFVAR_LISP ("standard-character-unification-table-for-encode",
-    &Vstandard_character_unification_table_for_encode,
-    "Table for unifying characters when writing.");
-  Vstandard_character_unification_table_for_encode = Qnil;
+  DEFVAR_LISP ("standard-translation-table-for-encode",
+    &Vstandard_translation_table_for_encode,
+    "Table for translationg characters while encoding.");
+  Vstandard_translation_table_for_encode = Qnil;
  
    DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
      "Alist of charsets vs revision numbers.\n\