(detected_mask): Delete unused variable.

[bpt/emacs.git] / src / coding.c
diff --git a/src/coding.c b/src/coding.c

index d23a5ff..e2b5ed6 100644 (file)
--- a/src/coding.c
+++ b/src/coding.c
@@ -144,26 +144,23 @@ STRUCT CODING_SYSTEM
  /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
  
    These functions check if a byte sequence specified as a source in
-  CODING conforms to the format of XXX.  Return 1 if the data contains
-  a byte sequence which can be decoded into non-ASCII characters by
-  the coding system.  Otherwize (i.e. the data contains only ASCII
-  characters or invalid sequence) return 0.
+  CODING conforms to the format of XXX, and update the members of
+  DETECT_INFO.
  
-  It also resets some bits of an integer pointed by MASK.  The macros
-  CATEGORY_MASK_XXX specifies each bit of this integer.
+  Return 1 if the byte sequence conforms to XXX, otherwise return 0.
  
    Below is the template of these functions.  */
  
  #if 0
  static int
-detect_coding_XXX (coding, mask)
+detect_coding_XXX (coding, detect_info)
       struct coding_system *coding;
-     int *mask;
+     struct coding_detection_info *detect_info;
  {
    unsigned char *src = coding->source;
    unsigned char *src_end = coding->source + coding->src_bytes;
    int multibytep = coding->src_multibyte;
-  int c;
+  int consumed_chars = 0;
    int found = 0;
    ...;
  
@@ -172,18 +169,19 @@ detect_coding_XXX (coding, mask)
        /* Get one byte from the source.  If the souce is exausted, jump
          to no_more_source:.  */
        ONE_MORE_BYTE (c);
-      /* Check if it conforms to XXX.  If not, break the loop.  */
+
+      if (! __C_conforms_to_XXX___ (c))
+       break;
+      if (! __C_strongly_suggests_XXX__ (c))
+       found = CATEGORY_MASK_XXX;
      }
-  /* As the data is invalid for XXX, reset a proper bits.  */
-  *mask &= ~CODING_CATEGORY_XXX;
+  /* The byte sequence is invalid for XXX.  */
+  detect_info->rejected |= CATEGORY_MASK_XXX;
    return 0;
+
   no_more_source:
-  /* The source exausted.  */
-  if (!found)
-    /* ASCII characters only. */
-    return 0;
-  /* Some data should be decoded into non-ASCII characters.  */
-  *mask &= CODING_CATEGORY_XXX;
+  /* The source exausted successfully.  */
+  detect_info->found |= found;
    return 1;
  }
  #endif
@@ -310,8 +308,7 @@ Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
  Lisp_Object Qdefault_char;
  Lisp_Object Qno_conversion, Qundecided;
  Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
-Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
-Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
+Lisp_Object Qbig, Qlittle;
  Lisp_Object Qcoding_system_history;
  Lisp_Object Qvalid_codes;
  
@@ -408,31 +405,38 @@ Lisp_Object Vsjis_coding_system;
  Lisp_Object Vbig5_coding_system;
  
  
-static int detect_coding_utf_8 P_ ((struct coding_system *, int *));
+static int detect_coding_utf_8 P_ ((struct coding_system *,
+                                   struct coding_detection_info *info));
  static void decode_coding_utf_8 P_ ((struct coding_system *));
  static int encode_coding_utf_8 P_ ((struct coding_system *));
  
-static int detect_coding_utf_16 P_ ((struct coding_system *, int *));
+static int detect_coding_utf_16 P_ ((struct coding_system *,
+                                    struct coding_detection_info *info));
  static void decode_coding_utf_16 P_ ((struct coding_system *));
  static int encode_coding_utf_16 P_ ((struct coding_system *));
  
-static int detect_coding_iso_2022 P_ ((struct coding_system *, int *));
+static int detect_coding_iso_2022 P_ ((struct coding_system *,
+                                      struct coding_detection_info *info));
  static void decode_coding_iso_2022 P_ ((struct coding_system *));
  static int encode_coding_iso_2022 P_ ((struct coding_system *));
  
-static int detect_coding_emacs_mule P_ ((struct coding_system *, int *));
+static int detect_coding_emacs_mule P_ ((struct coding_system *,
+                                        struct coding_detection_info *info));
  static void decode_coding_emacs_mule P_ ((struct coding_system *));
  static int encode_coding_emacs_mule P_ ((struct coding_system *));
  
-static int detect_coding_sjis P_ ((struct coding_system *, int *));
+static int detect_coding_sjis P_ ((struct coding_system *,
+                                  struct coding_detection_info *info));
  static void decode_coding_sjis P_ ((struct coding_system *));
  static int encode_coding_sjis P_ ((struct coding_system *));
  
-static int detect_coding_big5 P_ ((struct coding_system *, int *));
+static int detect_coding_big5 P_ ((struct coding_system *,
+                                  struct coding_detection_info *info));
  static void decode_coding_big5 P_ ((struct coding_system *));
  static int encode_coding_big5 P_ ((struct coding_system *));
  
-static int detect_coding_ccl P_ ((struct coding_system *, int *));
+static int detect_coding_ccl P_ ((struct coding_system *,
+                                 struct coding_detection_info *info));
  static void decode_coding_ccl P_ ((struct coding_system *));
  static int encode_coding_ccl P_ ((struct coding_system *));
  
@@ -622,6 +626,7 @@ enum coding_category
  #define CATEGORY_MASK_ISO_7_ELSE       (1 << coding_category_iso_7_else)
  #define CATEGORY_MASK_ISO_8_ELSE       (1 << coding_category_iso_8_else)
  #define CATEGORY_MASK_UTF_8            (1 << coding_category_utf_8)
+#define CATEGORY_MASK_UTF_16_AUTO      (1 << coding_category_utf_16_auto)
  #define CATEGORY_MASK_UTF_16_BE                (1 << coding_category_utf_16_be)
  #define CATEGORY_MASK_UTF_16_LE                (1 << coding_category_utf_16_le)
  #define CATEGORY_MASK_UTF_16_BE_NOSIG  (1 << coding_category_utf_16_be_nosig)
@@ -631,6 +636,7 @@ enum coding_category
  #define CATEGORY_MASK_BIG5             (1 << coding_category_big5)
  #define CATEGORY_MASK_CCL              (1 << coding_category_ccl)
  #define CATEGORY_MASK_EMACS_MULE       (1 << coding_category_emacs_mule)
+#define CATEGORY_MASK_RAW_TEXT         (1 << coding_category_raw_text)
  
  /* This value is returned if detect_coding_mask () find nothing other
     than ASCII characters.  */
@@ -695,26 +701,6 @@ static enum coding_category coding_priorities[coding_category_max];
     Nth coding category.  */
  static struct coding_system coding_categories[coding_category_max];
  
-static int detected_mask[coding_category_raw_text] =
-  { CATEGORY_MASK_ISO,
-    CATEGORY_MASK_ISO,
-    CATEGORY_MASK_ISO,
-    CATEGORY_MASK_ISO,
-    CATEGORY_MASK_ISO,
-    CATEGORY_MASK_ISO,
-    CATEGORY_MASK_UTF_8,
-    CATEGORY_MASK_UTF_16,
-    CATEGORY_MASK_UTF_16,
-    CATEGORY_MASK_UTF_16,
-    CATEGORY_MASK_UTF_16,
-    CATEGORY_MASK_UTF_16,
-    CATEGORY_MASK_CHARSET,
-    CATEGORY_MASK_SJIS,
-    CATEGORY_MASK_BIG5,
-    CATEGORY_MASK_CCL,
-    CATEGORY_MASK_EMACS_MULE
-  };
-
  /*** Commonly used macros and functions ***/
  
  #ifndef min
@@ -894,18 +880,12 @@ coding_set_source (coding)
  {
    if (BUFFERP (coding->src_object))
      {
+      struct buffer *buf = XBUFFER (coding->src_object);
+
        if (coding->src_pos < 0)
-       coding->source = GAP_END_ADDR + coding->src_pos_byte;
+       coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
        else
-       {
-         struct buffer *buf = XBUFFER (coding->src_object);
-         EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
-         unsigned char *beg_addr = BUF_BEG_ADDR (buf);
-
-         coding->source = beg_addr + coding->src_pos_byte - 1;
-         if (coding->src_pos_byte >= gpt_byte)
-           coding->source += BUF_GAP_SIZE (buf);
-       }
+       coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
      }
    else if (STRINGP (coding->src_object))
      {
@@ -1002,6 +982,54 @@ alloc_destination (coding, nbytes, dst)
    return dst;
  }
  
+/** Macros for annotations.  */
+
+/* Maximum length of annotation data (sum of annotations for
+   composition and charset).  */
+#define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
+
+/* An annotation data is stored in the array coding->charbuf in this
+   format:
+     [ -LENGTH ANNOTATION_MASK FROM TO ... ]
+   LENGTH is the number of elements in the annotation.
+   ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
+   FROM and TO specify the range of text annotated.  They are relative
+   to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
+
+   The format of the following elements depend on ANNOTATION_MASK.
+
+   In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
+   follows:
+     ... METHOD [ COMPOSITION-COMPONENTS ... ]
+   METHOD is one of enum composition_method.
+   Optionnal COMPOSITION-COMPONENTS are characters and composition
+   rules.
+
+   In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
+   follows.  */
+
+#define ADD_ANNOTATION_DATA(buf, len, mask, from, to)  \
+  do {                                                 \
+    *(buf)++ = -(len);                                 \
+    *(buf)++ = (mask);                                 \
+    *(buf)++ = (from);                                 \
+    *(buf)++ = (to);                                   \
+    coding->annotated = 1;                             \
+  } while (0);
+
+#define ADD_COMPOSITION_DATA(buf, from, to, method)                          \
+  do {                                                                       \
+    ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
+    *buf++ = method;                                                         \
+  } while (0)
+
+
+#define ADD_CHARSET_DATA(buf, from, to, id)                              \
+  do {                                                                   \
+    ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
+    *buf++ = id;                                                         \
+  } while (0)
+
  \f
  /*** 2. Emacs' internal format (emacs-utf-8) ***/
  
@@ -1011,8 +1039,8 @@ alloc_destination (coding, nbytes, dst)
  /*** 3. UTF-8 ***/
  
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
-   Check if a text is encoded in UTF-8.  If it is, return
-   CATEGORY_MASK_UTF_8, else return 0.  */
+   Check if a text is encoded in UTF-8.  If it is, return 1, else
+   return 0.  */
  
  #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
  #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
@@ -1022,9 +1050,9 @@ alloc_destination (coding, nbytes, dst)
  #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
  
  static int
-detect_coding_utf_8 (coding, mask)
+detect_coding_utf_8 (coding, detect_info)
       struct coding_system *coding;
-     int *mask;
+     struct coding_detection_info *detect_info;
  {
    unsigned char *src = coding->source, *src_base = src;
    unsigned char *src_end = coding->source + coding->src_bytes;
@@ -1033,6 +1061,7 @@ detect_coding_utf_8 (coding, mask)
    int found = 0;
    int incomplete;
  
+  detect_info->checked |= CATEGORY_MASK_UTF_8;
    /* A coding system of this category is always ASCII compatible.  */
    src += coding->head_ascii;
  
@@ -1050,7 +1079,7 @@ detect_coding_utf_8 (coding, mask)
         break;
        if (UTF_8_2_OCTET_LEADING_P (c))
         {
-         found++;
+         found = CATEGORY_MASK_UTF_8;
           continue;
         }
        ONE_MORE_BYTE (c2);
@@ -1058,7 +1087,7 @@ detect_coding_utf_8 (coding, mask)
         break;
        if (UTF_8_3_OCTET_LEADING_P (c))
         {
-         found++;
+         found = CATEGORY_MASK_UTF_8;
           continue;
         }
        ONE_MORE_BYTE (c3);
@@ -1066,7 +1095,7 @@ detect_coding_utf_8 (coding, mask)
         break;
        if (UTF_8_4_OCTET_LEADING_P (c))
         {
-         found++;
+         found = CATEGORY_MASK_UTF_8;
           continue;
         }
        ONE_MORE_BYTE (c4);
@@ -1074,21 +1103,22 @@ detect_coding_utf_8 (coding, mask)
         break;
        if (UTF_8_5_OCTET_LEADING_P (c))
         {
-         found++;
+         found = CATEGORY_MASK_UTF_8;
           continue;
         }
        break;
      }
-  *mask &= ~CATEGORY_MASK_UTF_8;
+  detect_info->rejected |= CATEGORY_MASK_UTF_8;
    return 0;
  
   no_more_source:
    if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
      {
-      *mask &= ~CATEGORY_MASK_UTF_8;
+      detect_info->rejected |= CATEGORY_MASK_UTF_8;
        return 0;
      }
-  return found;
+  detect_info->found |= found;
+  return 1;
  }
  
  
@@ -1126,7 +1156,10 @@ decode_coding_utf_8 (coding)
               if (EQ (eol_type, Qdos))
                 {
                   if (src == src_end)
-                   goto no_more_source;
+                   {
+                     coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+                     goto no_more_source;
+                   }
                   if (*src == '\n')
                     ONE_MORE_BYTE (c);
                 }
@@ -1266,10 +1299,8 @@ encode_coding_utf_8 (coding)
  
  
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
-   Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
-   Little Endian (otherwise).  If it is, return
-   CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
-   else return 0.  */
+   Check if a text is encoded in one of UTF-16 based coding systems.
+   If it is, return 1, else return 0.  */
  
  #define UTF_16_HIGH_SURROGATE_P(val) \
    (((val) & 0xFC00) == 0xD800)
@@ -1284,9 +1315,9 @@ encode_coding_utf_8 (coding)
  
  
  static int
-detect_coding_utf_16 (coding, mask)
+detect_coding_utf_16 (coding, detect_info)
       struct coding_system *coding;
-     int *mask;
+     struct coding_detection_info *detect_info;
  {
    unsigned char *src = coding->source, *src_base = src;
    unsigned char *src_end = coding->source + coding->src_bytes;
@@ -1294,21 +1325,31 @@ detect_coding_utf_16 (coding, mask)
    int consumed_chars = 0;
    int c1, c2;
  
-  *mask &= ~CATEGORY_MASK_UTF_16;
+  detect_info->checked |= CATEGORY_MASK_UTF_16;
  
+  if (coding->mode & CODING_MODE_LAST_BLOCK
+      && (coding->src_bytes & 1))
+    {
+      detect_info->rejected |= CATEGORY_MASK_UTF_16;
+      return 0;
+    }
    ONE_MORE_BYTE (c1);
    ONE_MORE_BYTE (c2);
  
    if ((c1 == 0xFF) && (c2 == 0xFE))
-    *mask |= CATEGORY_MASK_UTF_16_LE;
+    {
+      detect_info->found |= (CATEGORY_MASK_UTF_16_LE
+                            | CATEGORY_MASK_UTF_16_AUTO);
+      detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
+    }
    else if ((c1 == 0xFE) && (c2 == 0xFF))
-    *mask |= CATEGORY_MASK_UTF_16_BE;
-  else
-    *mask |= CATEGORY_MASK_UTF_16_BE_NOSIG | CATEGORY_MASK_UTF_16_LE_NOSIG;
-  return 1;
-
+    {
+      detect_info->found |= (CATEGORY_MASK_UTF_16_BE
+                            | CATEGORY_MASK_UTF_16_AUTO);
+      detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
+    }
   no_more_source:
-  return 0;
+  return 1;
  }
  
  static void
@@ -1329,7 +1370,7 @@ decode_coding_utf_16 (coding)
  
    CODING_GET_INFO (coding, attr, eol_type, charset_list);
  
-  if (bom != utf_16_without_bom)
+  if (bom == utf_16_with_bom)
      {
        int c, c1, c2;
  
@@ -1337,33 +1378,22 @@ decode_coding_utf_16 (coding)
        ONE_MORE_BYTE (c1);
        ONE_MORE_BYTE (c2);
        c = (c1 << 8) | c2;
-      if (bom == utf_16_with_bom)
-       {
-         if (endian == utf_16_big_endian
-             ? c != 0xFFFE : c != 0xFEFF)
-           {
-             /* We are sure that there's enouph room at CHARBUF.  */
-             *charbuf++ = c1;
-             *charbuf++ = c2;
-             coding->errors++;
-           }
-       }
-      else
+
+      if (endian == utf_16_big_endian
+         ? c != 0xFEFF : c != 0xFFFE)
         {
-         if (c == 0xFFFE)
-           CODING_UTF_16_ENDIAN (coding)
-             = endian = utf_16_big_endian;
-         else if (c == 0xFEFF)
-           CODING_UTF_16_ENDIAN (coding)
-             = endian = utf_16_little_endian;
-         else
-           {
-             CODING_UTF_16_ENDIAN (coding)
-               = endian = utf_16_big_endian;
-             src = src_base;
-           }
+         /* The first two bytes are not BOM.  Treat them as bytes
+            for a normal character.  */
+         src = src_base;
+         coding->errors++;
         }
-      CODING_UTF_16_BOM (coding) = utf_16_with_bom;
+      CODING_UTF_16_BOM (coding) = utf_16_without_bom;
+    }
+  else if (bom == utf_16_detect_bom)
+    {
+      /* We have already tried to detect BOM and failed in
+        detect_coding.  */
+      CODING_UTF_16_BOM (coding) = utf_16_without_bom;
      }
  
    while (1)
@@ -1436,13 +1466,13 @@ encode_coding_utf_16 (coding)
  
    CODING_GET_INFO (coding, attrs, eol_type, charset_list);
  
-  if (bom == utf_16_with_bom)
+  if (bom != utf_16_without_bom)
      {
        ASSURE_DESTINATION (safe_room);
        if (big_endian)
-       EMIT_TWO_BYTES (0xFF, 0xFE);
-      else
         EMIT_TWO_BYTES (0xFE, 0xFF);
+      else
+       EMIT_TWO_BYTES (0xFF, 0xFE);
        CODING_UTF_16_BOM (coding) = utf_16_without_bom;
      }
  
@@ -1556,10 +1586,10 @@ encode_coding_utf_16 (coding)
  char emacs_mule_bytes[256];
  
  int
-emacs_mule_char (coding, src, nbytes, nchars)
+emacs_mule_char (coding, src, nbytes, nchars, id)
       struct coding_system *coding;
       unsigned char *src;
-     int *nbytes, *nchars;
+     int *nbytes, *nchars, *id;
  {
    unsigned char *src_end = coding->source + coding->src_bytes;
    int multibytep = coding->src_multibyte;
@@ -1624,6 +1654,8 @@ emacs_mule_char (coding, src, nbytes, nchars)
      goto invalid_code;
    *nbytes = src - src_base;
    *nchars = consumed_chars;
+  if (id)
+    *id = charset->id;
    return c;
  
   no_more_source:
@@ -1635,12 +1667,13 @@ emacs_mule_char (coding, src, nbytes, nchars)
  
  
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
-   Check if a text is encoded in `emacs-mule'.  */
+   Check if a text is encoded in `emacs-mule'.  If it is, return 1,
+   else return 0.  */
  
  static int
-detect_coding_emacs_mule (coding, mask)
+detect_coding_emacs_mule (coding, detect_info)
       struct coding_system *coding;
-     int *mask;
+     struct coding_detection_info *detect_info;
  {
    unsigned char *src = coding->source, *src_base = src;
    unsigned char *src_end = coding->source + coding->src_bytes;
@@ -1650,6 +1683,7 @@ detect_coding_emacs_mule (coding, mask)
    int found = 0;
    int incomplete;
  
+  detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
    /* A coding system of this category is always ASCII compatible.  */
    src += coding->head_ascii;
  
@@ -1677,7 +1711,7 @@ detect_coding_emacs_mule (coding, mask)
  
           if (src - src_base <= 4)
             break;
-         found = 1;
+         found = CATEGORY_MASK_EMACS_MULE;
           if (c == 0x80)
             goto repeat;
         }
@@ -1699,19 +1733,20 @@ detect_coding_emacs_mule (coding, mask)
           while (c >= 0xA0);
           if (src - src_base != emacs_mule_bytes[*src_base])
             break;
-         found = 1;
+         found = CATEGORY_MASK_EMACS_MULE;
         }
      }
-  *mask &= ~CATEGORY_MASK_EMACS_MULE;
+  detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
    return 0;
  
   no_more_source:
    if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
      {
-      *mask &= ~CATEGORY_MASK_EMACS_MULE;
+      detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
        return 0;
      }
-  return found;
+  detect_info->found |= found;
+  return 1;
  }
  
  
@@ -1732,7 +1767,7 @@ detect_coding_emacs_mule (coding, mask)
                                                                 \
        if (src == src_end)                                      \
         break;                                                  \
-      c = emacs_mule_char (coding, src, &nbytes, &nchars);     \
+      c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
        if (c < 0)                                               \
         {                                                       \
           if (c == -2)                                          \
@@ -1789,16 +1824,6 @@ detect_coding_emacs_mule (coding, mask)
    } while (0)
  
  
-#define ADD_COMPOSITION_DATA(buf, method, nchars)      \
-  do {                                                 \
-    *buf++ = -5;                                       \
-    *buf++ = coding->produced_char + char_offset;      \
-    *buf++ = CODING_ANNOTATE_COMPOSITION_MASK;         \
-    *buf++ = method;                                   \
-    *buf++ = nchars;                                   \
-  } while (0)
-
-
  #define DECODE_EMACS_MULE_21_COMPOSITION(c)                            \
    do {                                                                 \
      /* Emacs 21 style format.  The first three bytes at SRC are                \
@@ -1807,6 +1832,7 @@ detect_coding_emacs_mule (coding, mask)
         number of characters composed by this composition.  */          \
      enum composition_method method = c - 0xF2;                         \
      int *charbuf_base = charbuf;                                       \
+    int from, to;                                                      \
      int consumed_chars_limit;                                          \
      int nbytes, nchars;                                                        \
                                                                         \
@@ -1816,7 +1842,9 @@ detect_coding_emacs_mule (coding, mask)
        goto invalid_code;                                               \
      ONE_MORE_BYTE (c);                                                 \
      nchars = c - 0xA0;                                                 \
-    ADD_COMPOSITION_DATA (charbuf, method, nchars);                    \
+    from = coding->produced + char_offset;                             \
+    to = from + nchars;                                                        \
+    ADD_COMPOSITION_DATA (charbuf, from, to, method);                  \
      consumed_chars_limit = consumed_chars_base + nbytes;               \
      if (method != COMPOSITION_RELATIVE)                                        \
        {                                                                        \
@@ -1840,9 +1868,11 @@ detect_coding_emacs_mule (coding, mask)
    do {                                                         \
      /* Emacs 20 style format for relative composition.  */     \
      /* Store multibyte form of characters to be composed.  */  \
+    enum composition_method method = COMPOSITION_RELATIVE;     \
      int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];                \
      int *buf = components;                                     \
      int i, j;                                                  \
+    int from, to;                                              \
                                                                 \
      src = src_base;                                            \
      ONE_MORE_BYTE (c);         /* skip 0x80 */                 \
@@ -1850,7 +1880,9 @@ detect_coding_emacs_mule (coding, mask)
        DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                        \
      if (i < 2)                                                 \
        goto invalid_code;                                       \
-    ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i);   \
+    from = coding->produced_char + char_offset;                        \
+    to = from + i;                                             \
+    ADD_COMPOSITION_DATA (charbuf, from, to, method);          \
      for (j = 0; j < i; j++)                                    \
        *charbuf++ = components[j];                              \
    } while (0)
@@ -1860,9 +1892,11 @@ detect_coding_emacs_mule (coding, mask)
    do {                                                         \
      /* Emacs 20 style format for rule-base composition.  */    \
      /* Store multibyte form of characters to be composed.  */  \
+    enum composition_method method = COMPOSITION_WITH_RULE;    \
      int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];                \
      int *buf = components;                                     \
      int i, j;                                                  \
+    int from, to;                                              \
                                                                 \
      DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                  \
      for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)           \
@@ -1874,7 +1908,9 @@ detect_coding_emacs_mule (coding, mask)
        goto invalid_code;                                       \
      if (charbuf + i + (i / 2) + 1 < charbuf_end)               \
        goto no_more_source;                                     \
-    ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i);      \
+    from = coding->produced_char + char_offset;                        \
+    to = from + i;                                             \
+    ADD_COMPOSITION_DATA (buf, from, to, method);              \
      for (j = 0; j < i; j++)                                    \
        *charbuf++ = components[j];                              \
      for (j = 0; j < i; j += 2)                                 \
@@ -1890,11 +1926,13 @@ decode_coding_emacs_mule (coding)
    unsigned char *src_end = coding->source + coding->src_bytes;
    unsigned char *src_base;
    int *charbuf = coding->charbuf;
-  int *charbuf_end = charbuf + coding->charbuf_size;
+  int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
    int consumed_chars = 0, consumed_chars_base;
-  int char_offset = 0;
    int multibytep = coding->src_multibyte;
    Lisp_Object attrs, eol_type, charset_list;
+  int char_offset = coding->produced_char;
+  int last_offset = char_offset;
+  int last_id = charset_ascii;
  
    CODING_GET_INFO (coding, attrs, eol_type, charset_list);
  
@@ -1917,7 +1955,10 @@ decode_coding_emacs_mule (coding)
               if (EQ (eol_type, Qdos))
                 {
                   if (src == src_end)
-                   goto no_more_source;
+                   {
+                     coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+                     goto no_more_source;
+                   }
                   if (*src == '\n')
                     ONE_MORE_BYTE (c);
                 }
@@ -1929,8 +1970,6 @@ decode_coding_emacs_mule (coding)
         }
        else if (c == 0x80)
         {
-         if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
-           break;
           ONE_MORE_BYTE (c);
           if (c - 0xF2 >= COMPOSITION_RELATIVE
               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
@@ -1941,20 +1980,28 @@ decode_coding_emacs_mule (coding)
             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
           else
             goto invalid_code;
-         coding->annotated = 1;
         }
        else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
         {
           int nbytes, nchars;
+         int id;
+
           src = src_base;
           consumed_chars = consumed_chars_base;
-         c = emacs_mule_char (coding, src, &nbytes, &nchars);
+         c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
           if (c < 0)
             {
               if (c == -2)
                 break;
               goto invalid_code;
             }
+         if (last_id != id)
+           {
+             if (last_id != charset_ascii)
+               ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+             last_id = id;
+             last_offset = char_offset;
+           }
           *charbuf++ = c;
           src += nbytes;
           consumed_chars += nchars;
@@ -1967,10 +2014,13 @@ decode_coding_emacs_mule (coding)
        consumed_chars = consumed_chars_base;
        ONE_MORE_BYTE (c);
        *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+      char_offset++;
        coding->errors++;
      }
  
   no_more_source:
+  if (last_id != charset_ascii)
+    ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
    coding->consumed_char += consumed_chars_base;
    coding->consumed = src_base - coding->source;
    coding->charbuf_used = charbuf - coding->charbuf;
@@ -2005,6 +2055,7 @@ encode_coding_emacs_mule (coding)
    int produced_chars = 0;
    Lisp_Object attrs, eol_type, charset_list;
    int c;
+  int preferred_charset_id = -1;
  
    CODING_GET_INFO (coding, attrs, eol_type, charset_list);
  
@@ -2012,6 +2063,29 @@ encode_coding_emacs_mule (coding)
      {
        ASSURE_DESTINATION (safe_room);
        c = *charbuf++;
+
+      if (c < 0)
+       {
+         /* Handle an annotation.  */
+         switch (*charbuf)
+           {
+           case CODING_ANNOTATE_COMPOSITION_MASK:
+             /* Not yet implemented.  */
+             break;
+           case CODING_ANNOTATE_CHARSET_MASK:
+             preferred_charset_id = charbuf[3];
+             if (preferred_charset_id >= 0
+                 && NILP (Fmemq (make_number (preferred_charset_id),
+                                 charset_list)))
+               preferred_charset_id = -1;
+             break;
+           default:
+             abort ();
+           }
+         charbuf += -c - 1;
+         continue;
+       }
+
        if (ASCII_CHAR_P (c))
         EMIT_ONE_ASCII_BYTE (c);
        else if (CHAR_BYTE8_P (c))
@@ -2027,7 +2101,14 @@ encode_coding_emacs_mule (coding)
           int emacs_mule_id;
           unsigned char leading_codes[2];
  
-         charset = char_charset (c, charset_list, &code);
+         if (preferred_charset_id >= 0)
+           {
+             charset = CHARSET_FROM_ID (preferred_charset_id);
+             if (! CHAR_CHARSET_P (c, charset))
+               charset = char_charset (c, charset_list, NULL);
+           }
+         else
+           charset = char_charset (c, charset_list, &code);
           if (! charset)
             {
               c = coding->default_char;
@@ -2313,32 +2394,26 @@ setup_iso_safe_charsets (attrs)
  
  
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
-   Check if a text is encoded in ISO2022.  If it is, returns an
-   integer in which appropriate flag bits any of:
-       CATEGORY_MASK_ISO_7
-       CATEGORY_MASK_ISO_7_TIGHT
-       CATEGORY_MASK_ISO_8_1
-       CATEGORY_MASK_ISO_8_2
-       CATEGORY_MASK_ISO_7_ELSE
-       CATEGORY_MASK_ISO_8_ELSE
-   are set.  If a code which should never appear in ISO2022 is found,
-   returns 0.  */
+   Check if a text is encoded in one of ISO-2022 based codig systems.
+   If it is, return 1, else return 0.  */
  
  static int
-detect_coding_iso_2022 (coding, mask)
+detect_coding_iso_2022 (coding, detect_info)
       struct coding_system *coding;
-     int *mask;
+     struct coding_detection_info *detect_info;
  {
    unsigned char *src = coding->source, *src_base = src;
    unsigned char *src_end = coding->source + coding->src_bytes;
    int multibytep = coding->src_multibyte;
-  int mask_iso = CATEGORY_MASK_ISO;
-  int mask_found = 0, mask_8bit_found = 0;
-  int reg[4], shift_out = 0, single_shifting = 0;
+  int single_shifting = 0;
    int id;
    int c, c1;
    int consumed_chars = 0;
    int i;
+  int rejected = 0;
+  int found = 0;
+
+  detect_info->checked |= CATEGORY_MASK_ISO;
  
    for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
      {
@@ -2357,8 +2432,7 @@ detect_coding_iso_2022 (coding, mask)
    /* A coding system of this category is always ASCII compatible.  */
    src += coding->head_ascii;
  
-  reg[0] = charset_ascii, reg[1] = reg[2] = reg[3] = -1;
-  while (mask_iso && src < src_end)
+  while (rejected != CATEGORY_MASK_ISO)
      {
        ONE_MORE_BYTE (c);
        switch (c)
@@ -2376,7 +2450,6 @@ detect_coding_iso_2022 (coding, mask)
                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
                 /* Invalid designation sequence.  Just ignore.  */
                 break;
-             reg[(c - '(') % 4] = id;
             }
           else if (c == '$')
             {
@@ -2384,7 +2457,7 @@ detect_coding_iso_2022 (coding, mask)
               ONE_MORE_BYTE (c);
               if (c >= '@' && c <= 'B')
                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
-               reg[0] = id = iso_charset_table[1][0][c];
+               id = iso_charset_table[1][0][c];
               else if (c >= '(' && c <= '/')
                 {
                   ONE_MORE_BYTE (c1);
@@ -2392,116 +2465,86 @@ detect_coding_iso_2022 (coding, mask)
                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
                     /* Invalid designation sequence.  Just ignore.  */
                     break;
-                 reg[(c - '(') % 4] = id;
                 }
               else
-               /* Invalid designation sequence.  Just ignore.  */
+               /* Invalid designation sequence.  Just ignore it.  */
                 break;
             }
           else if (c == 'N' || c == 'O')
             {
               /* ESC <Fe> for SS2 or SS3.  */
-             mask_iso &= CATEGORY_MASK_ISO_7_ELSE;
+             single_shifting = 1;
+             rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
               break;
             }
           else if (c >= '0' && c <= '4')
             {
               /* ESC <Fp> for start/end composition.  */
-             mask_found |= CATEGORY_MASK_ISO;
+             found |= CATEGORY_MASK_ISO;
               break;
             }
           else
             {
-             /* Invalid escape sequence.  */
-             mask_iso &= ~CATEGORY_MASK_ISO_ESCAPE;
+             /* Invalid escape sequence.  Just ignore it.  */
               break;
             }
  
           /* We found a valid designation sequence for CHARSET.  */
-         mask_iso &= ~CATEGORY_MASK_ISO_8BIT;
+         rejected |= CATEGORY_MASK_ISO_8BIT;
           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
                               id))
-           mask_found |= CATEGORY_MASK_ISO_7;
+           found |= CATEGORY_MASK_ISO_7;
           else
-           mask_iso &= ~CATEGORY_MASK_ISO_7;
+           rejected |= CATEGORY_MASK_ISO_7;
           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
                               id))
-           mask_found |= CATEGORY_MASK_ISO_7_TIGHT;
+           found |= CATEGORY_MASK_ISO_7_TIGHT;
           else
-           mask_iso &= ~CATEGORY_MASK_ISO_7_TIGHT;
+           rejected |= CATEGORY_MASK_ISO_7_TIGHT;
           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
                               id))
-           mask_found |= CATEGORY_MASK_ISO_7_ELSE;
+           found |= CATEGORY_MASK_ISO_7_ELSE;
           else
-           mask_iso &= ~CATEGORY_MASK_ISO_7_ELSE;
+           rejected |= CATEGORY_MASK_ISO_7_ELSE;
           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
                               id))
-           mask_found |= CATEGORY_MASK_ISO_8_ELSE;
+           found |= CATEGORY_MASK_ISO_8_ELSE;
           else
-           mask_iso &= ~CATEGORY_MASK_ISO_8_ELSE;
+           rejected |= CATEGORY_MASK_ISO_8_ELSE;
           break;
  
         case ISO_CODE_SO:
-         if (inhibit_iso_escape_detection)
-           break;
-         single_shifting = 0;
-         if (shift_out == 0
-             && (reg[1] >= 0
-                 || SHIFT_OUT_OK (coding_category_iso_7_else)
-                 || SHIFT_OUT_OK (coding_category_iso_8_else)))
-           {
-             /* Locking shift out.  */
-             mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
-             mask_found |= CATEGORY_MASK_ISO_ELSE;
-           }
-         break;
-         
         case ISO_CODE_SI:
+         /* Locking shift out/in.  */
           if (inhibit_iso_escape_detection)
             break;
           single_shifting = 0;
-         if (shift_out == 1)
-           {
-             /* Locking shift in.  */
-             mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
-             mask_found |= CATEGORY_MASK_ISO_ELSE;
-           }
+         rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
+         found |= CATEGORY_MASK_ISO_ELSE;
           break;
-
+         
         case ISO_CODE_CSI:
+         /* Control sequence introducer.  */
           single_shifting = 0;
+         rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
+         found |= CATEGORY_MASK_ISO_8_ELSE;
+         goto check_extra_latin;
+
+
         case ISO_CODE_SS2:
         case ISO_CODE_SS3:
-         {
-           int newmask = CATEGORY_MASK_ISO_8_ELSE;
-
-           mask_8bit_found = 1;
-           if (inhibit_iso_escape_detection)
-             break;
-           if (c != ISO_CODE_CSI)
-             {
-               if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
-                   & CODING_ISO_FLAG_SINGLE_SHIFT)
-                 newmask |= CATEGORY_MASK_ISO_8_1;
-               if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
-                   & CODING_ISO_FLAG_SINGLE_SHIFT)
-                 newmask |= CATEGORY_MASK_ISO_8_2;
-               single_shifting = 1;
-             }
-           if (VECTORP (Vlatin_extra_code_table)
-               && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
-             {
-               if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
-                   & CODING_ISO_FLAG_LATIN_EXTRA)
-                 newmask |= CATEGORY_MASK_ISO_8_1;
-               if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
-                   & CODING_ISO_FLAG_LATIN_EXTRA)
-                 newmask |= CATEGORY_MASK_ISO_8_2;
-             }
-           mask_iso &= newmask;
-           mask_found |= newmask;
-         }
-         break;
+         /* Single shift.   */
+         if (inhibit_iso_escape_detection)
+           break;
+         single_shifting = 1;
+         rejected |= CATEGORY_MASK_ISO_7BIT;
+         if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
+             & CODING_ISO_FLAG_SINGLE_SHIFT)
+           found |= CATEGORY_MASK_ISO_8_1;
+         if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
+             & CODING_ISO_FLAG_SINGLE_SHIFT)
+           found |= CATEGORY_MASK_ISO_8_2;
+         goto check_extra_latin;
  
         default:
           if (c < 0x80)
@@ -2509,39 +2552,16 @@ detect_coding_iso_2022 (coding, mask)
               single_shifting = 0;
               break;
             }
-         else if (c < 0xA0)
-           {
-             single_shifting = 0;
-             mask_8bit_found = 1;
-             if (VECTORP (Vlatin_extra_code_table)
-                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
-               {
-                 int newmask = 0;
-
-                 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
-                     & CODING_ISO_FLAG_LATIN_EXTRA)
-                   newmask |= CATEGORY_MASK_ISO_8_1;
-                 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
-                     & CODING_ISO_FLAG_LATIN_EXTRA)
-                   newmask |= CATEGORY_MASK_ISO_8_2;
-                 mask_iso &= newmask;
-                 mask_found |= newmask;
-               }
-             else
-               return 0;
-           }
-         else
+         if (c >= 0xA0)
             {
-             mask_iso &= ~(CATEGORY_MASK_ISO_7BIT
-                           | CATEGORY_MASK_ISO_7_ELSE);
-             mask_found |= CATEGORY_MASK_ISO_8_1;
-             mask_8bit_found = 1;
+             rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
+             found |= CATEGORY_MASK_ISO_8_1;
               /* Check the length of succeeding codes of the range
-                 0xA0..0FF.  If the byte length is odd, we exclude
-                 CATEGORY_MASK_ISO_8_2.  We can check this only
-                 when we are not single shifting.  */
-             if (!single_shifting
-                 && mask_iso & CATEGORY_MASK_ISO_8_2)
+                 0xA0..0FF.  If the byte length is even, we include
+                 CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
+                 only when we are not single shifting.  */
+             if (! single_shifting
+                 && ! (rejected & CATEGORY_MASK_ISO_8_2))
                 {
                   int i = 1;
                   while (src < src_end)
@@ -2553,26 +2573,38 @@ detect_coding_iso_2022 (coding, mask)
                     }
  
                   if (i & 1 && src < src_end)
-                   mask_iso &= ~CATEGORY_MASK_ISO_8_2;
+                   rejected |= CATEGORY_MASK_ISO_8_2;
                   else
-                   mask_found |= CATEGORY_MASK_ISO_8_2;
+                   found |= CATEGORY_MASK_ISO_8_2;
                 }
+             break;
             }
-         break;
+       check_extra_latin:
+         single_shifting = 0;
+         if (! VECTORP (Vlatin_extra_code_table)
+             || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
+           {
+             rejected = CATEGORY_MASK_ISO;
+             break;
+           }
+         if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
+             & CODING_ISO_FLAG_LATIN_EXTRA)
+           found |= CATEGORY_MASK_ISO_8_1;
+         else
+           rejected |= CATEGORY_MASK_ISO_8_1;
+         if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
+             & CODING_ISO_FLAG_LATIN_EXTRA)
+           found |= CATEGORY_MASK_ISO_8_2;
+         else
+           rejected |= CATEGORY_MASK_ISO_8_2;
         }
      }
+  detect_info->rejected |= CATEGORY_MASK_ISO;
+  return 0;
+
   no_more_source:
-  if (!mask_iso)
-    {
-      *mask &= ~CATEGORY_MASK_ISO;
-      return 0;
-    }
-  if (!mask_found)
-    return 0;
-  *mask &= ~CATEGORY_MASK_ISO;
-  *mask |= mask_iso & mask_found; 
-  if (! mask_8bit_found)
-    *mask &= ~(CATEGORY_MASK_ISO_8BIT | CATEGORY_MASK_ISO_8_ELSE);
+  detect_info->rejected |= rejected;
+  detect_info->found |= (found & ~rejected);
    return 1;
  }
  
@@ -2688,8 +2720,10 @@ detect_coding_iso_2022 (coding, mask)
                   : (component_idx + 1) / 2);                           \
      int i;                                                             \
      int *saved_charbuf = charbuf;                                      \
+    int from = coding->produced_char + char_offset;                    \
+    int to = from + nchars;                                            \
                                                                         \
-    ADD_COMPOSITION_DATA (charbuf, method, nchars);                    \
+    ADD_COMPOSITION_DATA (charbuf, from, to, method);                  \
      if (method != COMPOSITION_RELATIVE)                                        \
        {                                                                        \
         if (component_len == 0)                                         \
@@ -2746,9 +2780,9 @@ decode_coding_iso_2022 (coding)
    unsigned char *src_end = coding->source + coding->src_bytes;
    unsigned char *src_base;
    int *charbuf = coding->charbuf;
-  int *charbuf_end = charbuf + coding->charbuf_size - 4;
+  int *charbuf_end
+    = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
    int consumed_chars = 0, consumed_chars_base;
-  int char_offset = 0;
    int multibytep = coding->src_multibyte;
    /* Charsets invoked to graphic plane 0 and 1 respectively.  */
    int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
@@ -2768,6 +2802,9 @@ decode_coding_iso_2022 (coding)
    int component_idx;
    int component_len;
    Lisp_Object attrs, eol_type, charset_list;
+  int char_offset = coding->produced_char;
+  int last_offset = char_offset;
+  int last_id = charset_ascii;
  
    CODING_GET_INFO (coding, attrs, eol_type, charset_list);
    setup_iso_safe_charsets (attrs);
@@ -2784,7 +2821,7 @@ decode_coding_iso_2022 (coding)
  
        ONE_MORE_BYTE (c1);
  
-      /* We produce no character or one character.  */
+      /* We produce at most one character.  */
        switch (iso_code_class [c1])
         {
         case ISO_0x20_or_0x7F:
@@ -2841,7 +2878,10 @@ decode_coding_iso_2022 (coding)
               if (EQ (eol_type, Qdos))
                 {
                   if (src == src_end)
-                   goto no_more_source;
+                   {
+                     coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+                     goto no_more_source;
+                   }                 
                   if (*src == '\n')
                     ONE_MORE_BYTE (c1);
                 }
@@ -3020,6 +3060,70 @@ decode_coding_iso_2022 (coding)
                 }
               continue;
  
+           case '%':
+             ONE_MORE_BYTE (c1);
+             if (c1 == '/')
+               {
+                 /* CTEXT extended segment:
+                    ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
+                    We keep these bytes as is for the moment.
+                    They may be decoded by post-read-conversion.  */
+                 int dim, M, L;
+                 int size;
+                 
+                 ONE_MORE_BYTE (dim);
+                 ONE_MORE_BYTE (M);
+                 ONE_MORE_BYTE (L);
+                 size = ((M - 128) * 128) + (L - 128);
+                 if (charbuf + 8 + size > charbuf_end)
+                   goto break_loop;
+                 *charbuf++ = ISO_CODE_ESC;
+                 *charbuf++ = '%';
+                 *charbuf++ = '/';
+                 *charbuf++ = dim;
+                 *charbuf++ = BYTE8_TO_CHAR (M);
+                 *charbuf++ = BYTE8_TO_CHAR (L);
+                 while (size-- > 0)
+                   {
+                     ONE_MORE_BYTE (c1);
+                     *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+                   }
+               }
+             else if (c1 == 'G')
+               {
+                 /* XFree86 extension for embedding UTF-8 in CTEXT:
+                    ESC % G --UTF-8-BYTES-- ESC % @
+                    We keep these bytes as is for the moment.
+                    They may be decoded by post-read-conversion.  */
+                 int *p = charbuf;
+
+                 if (p + 6 > charbuf_end)
+                   goto break_loop;
+                 *p++ = ISO_CODE_ESC;
+                 *p++ = '%';
+                 *p++ = 'G';
+                 while (p < charbuf_end)
+                   {
+                     ONE_MORE_BYTE (c1);
+                     if (c1 == ISO_CODE_ESC
+                         && src + 1 < src_end
+                         && src[0] == '%'
+                         && src[1] == '@')
+                       break;
+                     *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+                   }
+                 if (p + 3 > charbuf_end)
+                   goto break_loop;
+                 *p++ = ISO_CODE_ESC;
+                 *p++ = '%';
+                 *p++ = '@';
+                 charbuf = p;
+               }
+             else
+               goto invalid_code;
+             continue;
+             break;
+
             default:
               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
                 goto invalid_code;
@@ -3042,6 +3146,15 @@ decode_coding_iso_2022 (coding)
             }
         }
  
+      if (charset->id != charset_ascii
+         && last_id != charset->id)
+       {
+         if (last_id != charset_ascii)
+           ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+         last_id = charset->id;
+         last_offset = char_offset;
+       }
+
        /* Now we know CHARSET and 1st position code C1 of a character.
           Produce a decoded character while getting 2nd position code
           C2 if necessary.  */
@@ -3073,6 +3186,7 @@ decode_coding_iso_2022 (coding)
                 *charbuf++ = *src_base;
               else
                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
+             char_offset++;
             }
         }
        else if (composition_state == COMPOSING_NO)
@@ -3096,10 +3210,17 @@ decode_coding_iso_2022 (coding)
        consumed_chars = consumed_chars_base;
        ONE_MORE_BYTE (c);
        *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+      char_offset++;
        coding->errors++;
+      continue;
+
+    break_loop:
+      break;
      }
  
   no_more_source:
+  if (last_id != charset_ascii)
+    ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
    coding->consumed_char += consumed_chars_base;
    coding->consumed = src_base - coding->source;
    coding->charbuf_used = charbuf - coding->charbuf;
@@ -3521,9 +3642,12 @@ encode_coding_iso_2022 (coding)
    Lisp_Object attrs, eol_type, charset_list;
    int ascii_compatible;
    int c;
+  int preferred_charset_id = -1;
  
    CODING_GET_INFO (coding, attrs, eol_type, charset_list);
    setup_iso_safe_charsets (attrs);
+  /* Charset list may have been changed.  */
+  charset_list = CODING_ATTR_CHARSET_LIST (attrs);             \
    coding->safe_charsets
      = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
  
@@ -3546,6 +3670,28 @@ encode_coding_iso_2022 (coding)
  
        c = *charbuf++;
  
+      if (c < 0)
+       {
+         /* Handle an annotation.  */
+         switch (*charbuf)
+           {
+           case CODING_ANNOTATE_COMPOSITION_MASK:
+             /* Not yet implemented.  */
+             break;
+           case CODING_ANNOTATE_CHARSET_MASK:
+             preferred_charset_id = charbuf[3];
+             if (preferred_charset_id >= 0
+                 && NILP (Fmemq (make_number (preferred_charset_id),
+                                 charset_list)))
+               preferred_charset_id = -1;
+             break;
+           default:
+             abort ();
+           }
+         charbuf += -c - 1;
+         continue;
+       }
+
        /* Now encode the character C.  */
        if (c < 0x20 || c == 0x7F)
         {
@@ -3586,8 +3732,16 @@ encode_coding_iso_2022 (coding)
         }
        else
         {
-         struct charset *charset = char_charset (c, charset_list, NULL);
+         struct charset *charset;
  
+         if (preferred_charset_id >= 0)
+           {
+             charset = CHARSET_FROM_ID (preferred_charset_id);
+             if (! CHAR_CHARSET_P (c, charset))
+               charset = char_charset (c, charset_list, NULL);
+           }
+         else
+           charset = char_charset (c, charset_list, NULL);
           if (!charset)
             {
               if (coding->mode & CODING_MODE_SAFE_ENCODING)
@@ -3660,9 +3814,9 @@ encode_coding_iso_2022 (coding)
     CATEGORY_MASK_SJIS, else return 0.  */
  
  static int
-detect_coding_sjis (coding, mask)
+detect_coding_sjis (coding, detect_info)
       struct coding_system *coding;
-     int *mask;
+     struct coding_detection_info *detect_info;
  {
    unsigned char *src = coding->source, *src_base = src;
    unsigned char *src_end = coding->source + coding->src_bytes;
@@ -3672,6 +3826,7 @@ detect_coding_sjis (coding, mask)
    int c;
    int incomplete;
  
+  detect_info->checked |= CATEGORY_MASK_SJIS;
    /* A coding system of this category is always ASCII compatible.  */
    src += coding->head_ascii;
  
@@ -3687,23 +3842,24 @@ detect_coding_sjis (coding, mask)
           ONE_MORE_BYTE (c);
           if (c < 0x40 || c == 0x7F || c > 0xFC)
             break;
-         found = 1;
+         found = CATEGORY_MASK_SJIS;
         }
        else if (c >= 0xA0 && c < 0xE0)
-       found = 1;
+       found = CATEGORY_MASK_SJIS;
        else
         break;
      }
-  *mask &= ~CATEGORY_MASK_SJIS;
+  detect_info->rejected |= CATEGORY_MASK_SJIS;
    return 0;
  
   no_more_source:
    if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
      {
-      *mask &= ~CATEGORY_MASK_SJIS;
+      detect_info->rejected |= CATEGORY_MASK_SJIS;
        return 0;
      }
-  return found;
+  detect_info->found |= found;
+  return 1;
  }
  
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
@@ -3711,9 +3867,9 @@ detect_coding_sjis (coding, mask)
     CATEGORY_MASK_BIG5, else return 0.  */
  
  static int
-detect_coding_big5 (coding, mask)
+detect_coding_big5 (coding, detect_info)
       struct coding_system *coding;
-     int *mask;
+     struct coding_detection_info *detect_info;
  {
    unsigned char *src = coding->source, *src_base = src;
    unsigned char *src_end = coding->source + coding->src_bytes;
@@ -3723,6 +3879,7 @@ detect_coding_big5 (coding, mask)
    int c;
    int incomplete;
  
+  detect_info->checked |= CATEGORY_MASK_BIG5;
    /* A coding system of this category is always ASCII compatible.  */
    src += coding->head_ascii;
  
@@ -3738,21 +3895,22 @@ detect_coding_big5 (coding, mask)
           ONE_MORE_BYTE (c);
           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
             return 0;
-         found = 1;
+         found = CATEGORY_MASK_BIG5;
         }
        else
         break;
      }
-  *mask &= ~CATEGORY_MASK_BIG5;
+  detect_info->rejected |= CATEGORY_MASK_BIG5;
    return 0;
  
   no_more_source:
    if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
      {
-      *mask &= ~CATEGORY_MASK_BIG5;
+      detect_info->rejected |= CATEGORY_MASK_BIG5;
        return 0;
      }
-  return found;
+  detect_info->found |= found;
+  return 1;
  }
  
  /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
@@ -3766,11 +3924,14 @@ decode_coding_sjis (coding)
    unsigned char *src_end = coding->source + coding->src_bytes;
    unsigned char *src_base;
    int *charbuf = coding->charbuf;
-  int *charbuf_end = charbuf + coding->charbuf_size;
+  int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
    int consumed_chars = 0, consumed_chars_base;
    int multibytep = coding->src_multibyte;
    struct charset *charset_roman, *charset_kanji, *charset_kana;
    Lisp_Object attrs, eol_type, charset_list, val;
+  int char_offset = coding->produced_char;
+  int last_offset = char_offset;
+  int last_id = charset_ascii;
  
    CODING_GET_INFO (coding, attrs, eol_type, charset_list);
  
@@ -3796,7 +3957,10 @@ decode_coding_sjis (coding)
           if (EQ (eol_type, Qdos))
             {
               if (src == src_end)
-               goto no_more_source;
+               {
+                 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+                 goto no_more_source;
+               }
               if (*src == '\n')
                 ONE_MORE_BYTE (c);
             }
@@ -3830,9 +3994,18 @@ decode_coding_sjis (coding)
                   charset = charset_kana;
                 }
             }
+         if (charset->id != charset_ascii
+             && last_id != charset->id)
+           {
+             if (last_id != charset_ascii)
+               ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+             last_id = charset->id;
+             last_offset = char_offset;
+           }
           CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
         }
        *charbuf++ = c;
+      char_offset++;
        continue;
  
      invalid_code:
@@ -3840,10 +4013,13 @@ decode_coding_sjis (coding)
        consumed_chars = consumed_chars_base;
        ONE_MORE_BYTE (c);
        *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+      char_offset++;
        coding->errors++;
      }
  
   no_more_source:
+  if (last_id != charset_ascii)
+    ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
    coding->consumed_char += consumed_chars_base;
    coding->consumed = src_base - coding->source;
    coding->charbuf_used = charbuf - coding->charbuf;
@@ -3857,11 +4033,14 @@ decode_coding_big5 (coding)
    unsigned char *src_end = coding->source + coding->src_bytes;
    unsigned char *src_base;
    int *charbuf = coding->charbuf;
-  int *charbuf_end = charbuf + coding->charbuf_size;
+  int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
    int consumed_chars = 0, consumed_chars_base;
    int multibytep = coding->src_multibyte;
    struct charset *charset_roman, *charset_big5;
    Lisp_Object attrs, eol_type, charset_list, val;
+  int char_offset = coding->produced_char;
+  int last_offset = char_offset;
+  int last_id = charset_ascii;
  
    CODING_GET_INFO (coding, attrs, eol_type, charset_list);
    val = charset_list;
@@ -3885,7 +4064,10 @@ decode_coding_big5 (coding)
           if (EQ (eol_type, Qdos))
             {
               if (src == src_end)
-               goto no_more_source;
+               {
+                 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+                 goto no_more_source;
+               }
               if (*src == '\n')
                 ONE_MORE_BYTE (c);
             }
@@ -3908,10 +4090,19 @@ decode_coding_big5 (coding)
               c = c << 8 | c1;
               charset = charset_big5;
             }
+         if (charset->id != charset_ascii
+             && last_id != charset->id)
+           {
+             if (last_id != charset_ascii)
+               ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+             last_id = charset->id;
+             last_offset = char_offset;
+           }
           CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
         }
  
        *charbuf++ = c;
+      char_offset++;
        continue;
  
      invalid_code:
@@ -3919,10 +4110,13 @@ decode_coding_big5 (coding)
        consumed_chars = consumed_chars_base;
        ONE_MORE_BYTE (c);
        *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+      char_offset++;
        coding->errors++;
      }
  
   no_more_source:
+  if (last_id != charset_ascii)
+    ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
    coding->consumed_char += consumed_chars_base;
    coding->consumed = src_base - coding->source;
    coding->charbuf_used = charbuf - coding->charbuf;
@@ -4091,9 +4285,9 @@ encode_coding_big5 (coding)
     CATEGORY_MASK_CCL, else return 0.  */
  
  static int
-detect_coding_ccl (coding, mask)
+detect_coding_ccl (coding, detect_info)
       struct coding_system *coding;
-     int *mask;
+     struct coding_detection_info *detect_info;
  {
    unsigned char *src = coding->source, *src_base = src;
    unsigned char *src_end = coding->source + coding->src_bytes;
@@ -4104,6 +4298,8 @@ detect_coding_ccl (coding, mask)
    int head_ascii = coding->head_ascii;
    Lisp_Object attrs;
  
+  detect_info->checked |= CATEGORY_MASK_CCL;
+
    coding = &coding_categories[coding_category_ccl];
    attrs = CODING_ID_ATTRS (coding->id);
    if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
@@ -4115,14 +4311,15 @@ detect_coding_ccl (coding, mask)
        ONE_MORE_BYTE (c);
        if (! valids[c])
         break;
-      if (!found && valids[c] > 1)
-       found = 1;
+      if ((valids[c] > 1))
+       found = CATEGORY_MASK_CCL;
      }
-  *mask &= ~CATEGORY_MASK_CCL;
+  detect_info->rejected |= CATEGORY_MASK_CCL;
    return 0;
  
   no_more_source:
-  return found;
+  detect_info->found |= found;
+  return 1;
  }
  
  static void
@@ -4138,7 +4335,9 @@ decode_coding_ccl (coding)
    struct ccl_program ccl;
    int source_charbuf[1024];
    int source_byteidx[1024];
+  Lisp_Object attrs, eol_type, charset_list;
  
+  CODING_GET_INFO (coding, attrs, eol_type, charset_list);
    setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
  
    while (src < src_end)
@@ -4165,7 +4364,8 @@ decode_coding_ccl (coding)
        while (source < source_end)
         {
           ccl_driver (&ccl, source, charbuf,
-                     source_end - source, charbuf_end - charbuf);
+                     source_end - source, charbuf_end - charbuf,
+                     charset_list);
           source += ccl.consumed;
           charbuf += ccl.produced;
           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
@@ -4215,7 +4415,9 @@ encode_coding_ccl (coding)
    unsigned char *adjusted_dst_end = dst_end - 1;
    int destination_charbuf[1024];
    int i, produced_chars = 0;
+  Lisp_Object attrs, eol_type, charset_list;
  
+  CODING_GET_INFO (coding, attrs, eol_type, charset_list);
    setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
  
    ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
@@ -4228,7 +4430,7 @@ encode_coding_ccl (coding)
         dst_bytes = 1024;
  
        ccl_driver (&ccl, charbuf, destination_charbuf,
-                 charbuf_end - charbuf, dst_bytes);
+                 charbuf_end - charbuf, dst_bytes, charset_list);
        charbuf += ccl.consumed;
        if (multibytep)
         for (i = 0; i < ccl.produced; i++)
@@ -4360,16 +4562,23 @@ encode_coding_raw_text (coding)
    return 0;
  }
  
+/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
+   Check if a text is encoded in a charset-based coding system.  If it
+   is, return 1, else return 0.  */
+
  static int
-detect_coding_charset (coding, mask)
+detect_coding_charset (coding, detect_info)
       struct coding_system *coding;
-     int *mask;
+     struct coding_detection_info *detect_info;
  {
    unsigned char *src = coding->source, *src_base = src;
    unsigned char *src_end = coding->source + coding->src_bytes;
    int multibytep = coding->src_multibyte;
    int consumed_chars = 0;
    Lisp_Object attrs, valids;
+  int found = 0;
+
+  detect_info->checked |= CATEGORY_MASK_CHARSET;
  
    coding = &coding_categories[coding_category_charset];
    attrs = CODING_ID_ATTRS (coding->id);
@@ -4385,11 +4594,14 @@ detect_coding_charset (coding, mask)
        ONE_MORE_BYTE (c);
        if (NILP (AREF (valids, c)))
         break;
+      if (c >= 0x80)
+       found = CATEGORY_MASK_CHARSET;
      }
-  *mask &= ~CATEGORY_MASK_CHARSET;
+  detect_info->rejected |= CATEGORY_MASK_CHARSET;
    return 0;
  
   no_more_source:
+  detect_info->found |= found;
    return 1;
  }
  
@@ -4401,10 +4613,13 @@ decode_coding_charset (coding)
    unsigned char *src_end = coding->source + coding->src_bytes;
    unsigned char *src_base;
    int *charbuf = coding->charbuf;
-  int *charbuf_end = charbuf + coding->charbuf_size;
+  int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
    int consumed_chars = 0, consumed_chars_base;
    int multibytep = coding->src_multibyte;
    Lisp_Object attrs, eol_type, charset_list, valids;
+  int char_offset = coding->produced_char;
+  int last_offset = char_offset;
+  int last_id = charset_ascii;
  
    CODING_GET_INFO (coding, attrs, eol_type, charset_list);
    valids = AREF (attrs, coding_attr_charset_valids);
@@ -4426,8 +4641,12 @@ decode_coding_charset (coding)
              else.  */
           if (EQ (eol_type, Qdos))
             {
-             if (src < src_end
-                 && *src == '\n')
+             if (src == src_end)
+               {
+                 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+                 goto no_more_source;
+               }
+             if (*src == '\n')
                 ONE_MORE_BYTE (c);
             }
           else if (EQ (eol_type, Qmac))
@@ -4481,8 +4700,17 @@ decode_coding_charset (coding)
             }
           if (c < 0)
             goto invalid_code;
+         if (charset->id != charset_ascii
+             && last_id != charset->id)
+           {
+             if (last_id != charset_ascii)
+               ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+             last_id = charset->id;
+             last_offset = char_offset;
+           }
         }
        *charbuf++ = c;
+      char_offset++;
        continue;
  
      invalid_code:
@@ -4490,10 +4718,13 @@ decode_coding_charset (coding)
        consumed_chars = consumed_chars_base;
        ONE_MORE_BYTE (c);
        *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+      char_offset++;
        coding->errors++;
      }
  
   no_more_source:
+  if (last_id != charset_ascii)
+    ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
    coding->consumed_char += consumed_chars_base;
    coding->consumed = src_base - coding->source;
    coding->charbuf_used = charbuf - coding->charbuf;
@@ -4633,6 +4864,8 @@ setup_coding_system (coding_system, coding)
             | CODING_REQUIRE_FLUSHING_MASK);
        if (flags & CODING_ISO_FLAG_COMPOSITION)
         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
+      if (flags & CODING_ISO_FLAG_DESIGNATION)
+       coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
        if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
         {
           setup_iso_safe_charsets (attrs);
@@ -4665,7 +4898,7 @@ setup_coding_system (coding_system, coding)
                                     : EQ (val, Qt) ? utf_16_with_bom
                                     : utf_16_without_bom);
        val = AREF (attrs, coding_attr_utf_16_endian);
-      CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
+      CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
                                        : utf_16_little_endian);
        CODING_UTF_16_SURROGATE (coding) = 0;
        coding->detector = detect_coding_utf_16;
@@ -4673,6 +4906,8 @@ setup_coding_system (coding_system, coding)
        coding->encoder = encode_coding_utf_16;
        coding->common_flags
         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
+      if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
+       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
      }
    else if (EQ (coding_type, Qccl))
      {
@@ -4908,9 +5143,12 @@ coding_inherit_eol_type (coding_system, parent)
  #define EOL_SEEN_CR    2
  #define EOL_SEEN_CRLF  4
  
-/* Detect how end-of-line of a text of length CODING->src_bytes
-   pointed by CODING->source is encoded.  Return one of
-   EOL_SEEN_XXX.  */
+/* Detect how end-of-line of a text of length SRC_BYTES pointed by
+   SOURCE is encoded.  If CATEGORY is one of
+   coding_category_utf_16_XXXX, assume that CR and LF are encoded by
+   two-byte, else they are encoded by one-byte.
+
+   Return one of EOL_SEEN_XXX.  */
  
  #define MAX_EOL_CHECK_COUNT 3
  
@@ -5035,7 +5273,6 @@ detect_coding (coding)
       now.  */
    if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
      {
-      int mask = CATEGORY_MASK_ANY;
        int c, i;
  
        for (src = coding->source; src < src_end; src++)
@@ -5050,43 +5287,62 @@ detect_coding (coding)
  
        if (coding->head_ascii < coding->src_bytes)
         {
-         int detected = 0;
+         struct coding_detection_info detect_info;
+         enum coding_category category;
+         struct coding_system *this;
  
+         detect_info.checked = detect_info.found = detect_info.rejected = 0;
           for (i = 0; i < coding_category_raw_text; i++)
             {
-             enum coding_category category = coding_priorities[i];
-             struct coding_system *this = coding_categories + category;
-
+             category = coding_priorities[i];
+             this = coding_categories + category;
               if (this->id < 0)
                 {
                   /* No coding system of this category is defined.  */
-                 mask &= ~(1 << category);
+                 detect_info.rejected |= (1 << category);
                 }
-             else if (category >= coding_category_raw_text
-                      || detected & (1 << category))
+             else if (category >= coding_category_raw_text)
                 continue;
-             else
+             else if (detect_info.checked & (1 << category))
                 {
-                 detected |= detected_mask[category];
-                 if ((*(this->detector)) (coding, &mask)
-                     && (mask & (1 << category)))
+                 if (detect_info.found & (1 << category))
                     break;
                 }
+             else if ((*(this->detector)) (coding, &detect_info)
+                      && detect_info.found & (1 << category))
+               break;
             }
-         if (! mask)
+         if (i < coding_category_raw_text)
+           setup_coding_system (CODING_ID_NAME (this->id), coding);
+         else if (detect_info.rejected == CATEGORY_MASK_ANY)
             setup_coding_system (Qraw_text, coding);
-         else if (mask != CATEGORY_MASK_ANY)
+         else if (detect_info.rejected)
             for (i = 0; i < coding_category_raw_text; i++)
-             {
-               enum coding_category category = coding_priorities[i];
-               struct coding_system *this = coding_categories + category;
+             if (! (detect_info.rejected & (1 << coding_priorities[i])))
+               {
+                 this = coding_categories + coding_priorities[i];
+                 setup_coding_system (CODING_ID_NAME (this->id), coding);
+                 break;
+               }
+       }
+    }
+  else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16))
+    {
+      Lisp_Object coding_systems;
+      struct coding_detection_info detect_info;
  
-               if (mask & (1 << category))
-                 {
-                   setup_coding_system (CODING_ID_NAME (this->id), coding);
-                   break;
-                 }
-             }
+      coding_systems
+       = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
+      detect_info.found = detect_info.rejected = 0;
+      if (CONSP (coding_systems)
+         && detect_coding_utf_16 (coding, &detect_info)
+         && (detect_info.found & (CATEGORY_MASK_UTF_16_LE
+                                  | CATEGORY_MASK_UTF_16_BE)))
+       {
+         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+           setup_coding_system (XCAR (coding_systems), coding);
+         else
+           setup_coding_system (XCDR (coding_systems), coding);
         }
      }
  
@@ -5237,7 +5493,7 @@ produce_chars (coding)
               produced_chars++;
             }
           else
-           /* This is an annotation data.  */
+           /* This is an annotation datum.  */
             buf -= c + 1;
         }
      }
@@ -5266,8 +5522,12 @@ produce_chars (coding)
                     {
                       if (EQ (eol_type, Qdos))
                         {
-                         if (src < src_end
-                             && *src == '\n')
+                         if (src == src_end)
+                           {
+                             coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+                             goto no_more_source;
+                           }
+                         if (*src == '\n')
                             c = *src++;
                         }
                       else if (EQ (eol_type, Qmac))
@@ -5379,9 +5639,9 @@ produce_chars (coding)
    return produced_chars;
  }
  
-/* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
-       or
-   [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
+/* Compose text in CODING->object according to the annotation data at
+   CHARBUF.  CHARBUF is an array:
+     [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
   */
  
  static INLINE void
@@ -5389,18 +5649,15 @@ produce_composition (coding, charbuf)
       struct coding_system *coding;
       int *charbuf;
  {
-  Lisp_Object buffer;
    int len;
-  EMACS_INT pos;
+  EMACS_INT from, to;
    enum composition_method method;
-  int cmp_len;
    Lisp_Object components;
  
-  buffer = coding->dst_object;
    len = -charbuf[0];
-  pos = coding->dst_pos + charbuf[1];
-  method = (enum composition_method) (charbuf[3]);
-  cmp_len = charbuf[4];
+  from = coding->dst_pos + charbuf[2];
+  to = coding->dst_pos + charbuf[3];
+  method = (enum composition_method) (charbuf[4]);
  
    if (method == COMPOSITION_RELATIVE)
      components = Qnil;
@@ -5416,65 +5673,30 @@ produce_composition (coding, charbuf)
        components = (method == COMPOSITION_WITH_ALTCHARS
                     ? Fstring (len, args) : Fvector (len, args));
      }
-  compose_text (pos, pos + cmp_len, components, Qnil, Qnil);
+  compose_text (from, to, components, Qnil, coding->dst_object);
  }
  
-static int *
-save_composition_data (buf, buf_end, prop)
-     int *buf, *buf_end;
-     Lisp_Object prop;
-{
-  enum composition_method method = COMPOSITION_METHOD (prop);
-  int cmp_len = COMPOSITION_LENGTH (prop);
-
-  if (buf + 4 + (MAX_COMPOSITION_COMPONENTS * 2 - 1) > buf_end)
-    return NULL;
  
-  buf[1] = CODING_ANNOTATE_COMPOSITION_MASK;
-  buf[2] = method;
-  buf[3] = cmp_len;
-
-  if (method == COMPOSITION_RELATIVE)
-    buf[0] = 4;
-  else
-    {
-      Lisp_Object components;
-      int len, i;
+/* Put `charset' property on text in CODING->object according to
+   the annotation data at CHARBUF.  CHARBUF is an array:
+     [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
+ */
  
-      components = COMPOSITION_COMPONENTS (prop);
-      if (VECTORP (components))
-       {
-         len = XVECTOR (components)->size;
-         for (i = 0; i < len; i++)
-           buf[4 + i] = XINT (AREF (components, i));
-       }
-      else if (STRINGP (components))
-       {
-         int i_byte;
+static INLINE void
+produce_charset (coding, charbuf)
+     struct coding_system *coding;
+     int *charbuf;
+{
+  EMACS_INT from = coding->dst_pos + charbuf[2];
+  EMACS_INT to = coding->dst_pos + charbuf[3];
+  struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
  
-         len = XSTRING (components)->size;
-         i = i_byte = 0;
-         while (i < len)
-           FETCH_STRING_CHAR_ADVANCE (buf[4 + i], components, i, i_byte);
-       }
-      else if (INTEGERP (components))
-       {
-         len = 1;
-         buf[4] = XINT (components);
-       }
-      else if (CONSP (components))
-       {
-         for (len = 0; CONSP (components);
-              len++, components = XCDR (components))
-           buf[4 + len] = XINT (XCAR (components));
-       }
-      else
-       abort ();
-      buf[0] = 4 + len;
-    }
-  return (buf + buf[0]);
+  Fput_text_property (make_number (from), make_number (to),
+                     Qcharset, CHARSET_NAME (charset),
+                     coding->dst_object);
  }
  
+
  #define CHARBUF_SIZE 0x4000
  
  #define ALLOC_CONVERSION_WORK_AREA(coding)                             \
@@ -5505,6 +5727,9 @@ produce_annotation (coding)
    int *charbuf = coding->charbuf;
    int *charbuf_end = charbuf + coding->charbuf_used;
  
+  if (NILP (coding->dst_object))
+    return;
+
    while (charbuf < charbuf_end)
      {
        if (*charbuf >= 0)
@@ -5512,11 +5737,14 @@ produce_annotation (coding)
        else
         {
           int len = -*charbuf;
-         switch (charbuf[2])
+         switch (charbuf[1])
             {
             case CODING_ANNOTATE_COMPOSITION_MASK:
               produce_composition (coding, charbuf);
               break;
+           case CODING_ANNOTATE_CHARSET_MASK:
+             produce_charset (coding, charbuf);
+             break;
             default:
               abort ();
             }
@@ -5584,7 +5812,9 @@ decode_coding (coding)
        coding->annotated = 0;
        (*(coding->decoder)) (coding);
        if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
-       translate_chars (CODING_ATTR_DECODE_TBL (attrs), coding);
+       translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
+      else if (!NILP (Vstandard_translation_table_for_decode))
+       translate_chars (coding, Vstandard_translation_table_for_decode);
        coding_set_destination (coding);
        produce_chars (coding);
        if (coding->annotated)
@@ -5613,12 +5843,11 @@ decode_coding (coding)
           /* Flush out unprocessed data as binary chars.  We are sure
              that the number of data is less than the size of
              coding->charbuf.  */
-         int *charbuf = coding->charbuf;
-
           while (nbytes-- > 0)
             {
               int c = *src++;
-             *charbuf++ =  (c & 0x80 ? - c : c);
+
+             coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
             }
           produce_chars (coding);
         }
@@ -5639,64 +5868,186 @@ decode_coding (coding)
    return coding->result;
  }
  
+
+/* Extract an annotation datum from a composition starting at POS and
+   ending before LIMIT of CODING->src_object (buffer or string), store
+   the data in BUF, set *STOP to a starting position of the next
+   composition (if any) or to LIMIT, and return the address of the
+   next element of BUF.
+
+   If such an annotation is not found, set *STOP to a starting
+   position of a composition after POS (if any) or to LIMIT, and
+   return BUF.  */
+
+static INLINE int *
+handle_composition_annotation (pos, limit, coding, buf, stop)
+     EMACS_INT pos, limit;
+     struct coding_system *coding;
+     int *buf;
+     EMACS_INT *stop;
+{
+  EMACS_INT start, end;
+  Lisp_Object prop;
+
+  if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
+      || end > limit)
+    *stop = limit;
+  else if (start > pos)
+    *stop = start;
+  else
+    {
+      if (start == pos)
+       {
+         /* We found a composition.  Store the corresponding
+            annotation data in BUF.  */
+         int *head = buf;
+         enum composition_method method = COMPOSITION_METHOD (prop);
+         int nchars = COMPOSITION_LENGTH (prop);
+
+         ADD_COMPOSITION_DATA (buf, 0, nchars, method);
+         if (method != COMPOSITION_RELATIVE)
+           {
+             Lisp_Object components;
+             int len, i, i_byte;
+
+             components = COMPOSITION_COMPONENTS (prop);
+             if (VECTORP (components))
+               {
+                 len = XVECTOR (components)->size;
+                 for (i = 0; i < len; i++)
+                   *buf++ = XINT (AREF (components, i));
+               }
+             else if (STRINGP (components))
+               {
+                 len = XSTRING (components)->size;
+                 i = i_byte = 0;
+                 while (i < len)
+                   {
+                     FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
+                     buf++;
+                   }
+               }
+             else if (INTEGERP (components))
+               {
+                 len = 1;
+                 *buf++ = XINT (components);
+               }
+             else if (CONSP (components))
+               {
+                 for (len = 0; CONSP (components);
+                      len++, components = XCDR (components))
+                   *buf++ = XINT (XCAR (components));
+               }
+             else
+               abort ();
+             *head -= len;
+           }
+       }
+
+      if (find_composition (end, limit, &start, &end, &prop,
+                           coding->src_object)
+         && end <= limit)
+       *stop = start;
+      else
+       *stop = limit;
+    }
+  return buf;
+}
+
+
+/* Extract an annotation datum from a text property `charset' at POS of
+   CODING->src_object (buffer of string), store the data in BUF, set
+   *STOP to the position where the value of `charset' property changes
+   (limiting by LIMIT), and return the address of the next element of
+   BUF.
+
+   If the property value is nil, set *STOP to the position where the
+   property value is non-nil (limiting by LIMIT), and return BUF.  */
+
+static INLINE int *
+handle_charset_annotation (pos, limit, coding, buf, stop)
+     EMACS_INT pos, limit;
+     struct coding_system *coding;
+     int *buf;
+     EMACS_INT *stop;
+{
+  Lisp_Object val, next;
+  int id;
+
+  val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
+  if (! NILP (val) && CHARSETP (val))
+    id = XINT (CHARSET_SYMBOL_ID (val));
+  else
+    id = -1;
+  ADD_CHARSET_DATA (buf, 0, 0, id);
+  next = Fnext_single_property_change (make_number (pos), Qcharset,
+                                      coding->src_object,
+                                      make_number (limit));
+  *stop = XINT (next);
+  return buf;
+}
+
+
  static void
  consume_chars (coding)
       struct coding_system *coding;
  {
    int *buf = coding->charbuf;
-  /* -1 is to compensate for CRLF.  */
-  int *buf_end = coding->charbuf + coding->charbuf_size - 1;
+  int *buf_end = coding->charbuf + coding->charbuf_size;
    const unsigned char *src = coding->source + coding->consumed;
-  int pos = coding->src_pos + coding->consumed_char;
-  int end_pos = coding->src_pos + coding->src_chars;
+  const unsigned char *src_end = coding->source + coding->src_bytes;
+  EMACS_INT pos = coding->src_pos + coding->consumed_char;
+  EMACS_INT end_pos = coding->src_pos + coding->src_chars;
    int multibytep = coding->src_multibyte;
    Lisp_Object eol_type;
    int c;
-  int start, end, stop;
-  Lisp_Object object, prop;
+  EMACS_INT stop, stop_composition, stop_charset;
  
    eol_type = CODING_ID_EOL_TYPE (coding->id);
    if (VECTORP (eol_type))
      eol_type = Qunix;
  
-  object = coding->src_object;
-
    /* Note: composition handling is not yet implemented.  */
    coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
  
-  if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK
-      && find_composition (pos, end_pos, &start, &end, &prop, object)
-      && end <= end_pos
-      && (start >= pos
-         || (find_composition (end, end_pos, &start, &end, &prop, object)
-             && end <= end_pos)))
-    stop = start;
+  if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
+    stop = stop_composition = pos;
+  else
+    stop = stop_composition = end_pos;
+  if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
+    stop = stop_charset = pos;
    else
-    stop = end_pos;
+    stop_charset = end_pos;
  
+  /* Compensate for CRLF and annotation.  */
+  buf_end -= 1 + MAX_ANNOTATION_LENGTH;
    while (buf < buf_end)
      {
        if (pos == stop)
         {
-         int *p;
-
           if (pos == end_pos)
             break;
-         p = save_composition_data (buf, buf_end, prop);
-         if (p == NULL)
-           break;
-         buf = p;
-         if (find_composition (end, end_pos, &start, &end, &prop, object)
-             && end <= end_pos)
-           stop = start;
-         else
-           stop = end_pos;
+         if (pos == stop_composition)
+           buf = handle_composition_annotation (pos, end_pos, coding,
+                                                buf, &stop_composition);
+         if (pos == stop_charset)
+           buf = handle_charset_annotation (pos, end_pos, coding,
+                                            buf, &stop_charset);
+         stop = (stop_composition < stop_charset
+                 ? stop_composition : stop_charset);
         }
  
        if (! multibytep)
-       c = *src++;
+       {
+         EMACS_INT bytes = MULTIBYTE_LENGTH (src, src_end);
+
+         if (bytes > 0)
+           c = STRING_CHAR_ADVANCE (src), pos += bytes;
+         else
+           c = *src++, pos++;
+       }
        else
-       c = STRING_CHAR_ADVANCE (src);
+       c = STRING_CHAR_ADVANCE (src), pos++;
        if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
         c = '\n';
        if (! EQ (eol_type, Qunix))
@@ -5710,7 +6061,6 @@ consume_chars (coding)
             }
         }
        *buf++ = c;
-      pos++;
      }
  
    coding->consumed = src - coding->source;
@@ -5768,7 +6118,9 @@ encode_coding (coding)
      consume_chars (coding);
  
      if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
-      translate_chars (CODING_ATTR_ENCODE_TBL (attrs), coding);
+      translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
+    else if (!NILP (Vstandard_translation_table_for_encode))
+      translate_chars (coding, Vstandard_translation_table_for_encode);
  
      coding_set_destination (coding);
      (*(coding->encoder)) (coding);
@@ -5780,9 +6132,10 @@ encode_coding (coding)
    return (coding->result);
  }
  
-/* Work buffer */
  
-/* List of currently used working buffer.  */
+/* Stack of working buffers used in code conversion.  An nil element
+   means that the code conversion of that level is not using a working
+   buffer.  */
  Lisp_Object Vcode_conversion_work_buf_list;
  
  /* A working buffer used by the top level conversion.  */
@@ -5794,32 +6147,35 @@ Lisp_Object Vcode_conversion_reused_work_buf;
     buffer.  */
  
  Lisp_Object
-make_conversion_work_buffer (multibytep)
-     int multibytep;
+make_conversion_work_buffer (multibytep, depth)
+     int multibytep, depth;
  {
    struct buffer *current = current_buffer;
-  Lisp_Object buf;
+  Lisp_Object buf, name;
  
-  if (NILP (Vcode_conversion_work_buf_list))
+  if (depth == 0)
      {
        if (NILP (Vcode_conversion_reused_work_buf))
         Vcode_conversion_reused_work_buf
-         = Fget_buffer_create (build_string (" *code-conversion-work*"));
-      Vcode_conversion_work_buf_list
-       = Fcons (Vcode_conversion_reused_work_buf, Qnil);
+         = Fget_buffer_create (build_string (" *code-conversion-work<0>*"));
+      buf = Vcode_conversion_reused_work_buf;
      }
    else
      {
-      int depth = XINT (Flength (Vcode_conversion_work_buf_list));
-      char str[128];
+      if (depth < 0)
+       {
+         name = build_string (" *code-conversion-work*");
+         name = Fgenerate_new_buffer_name (name, Qnil);
+       }
+      else
+       {
+         char str[128];
  
-      sprintf (str, " *code-conversion-work*<%d>", depth);
-      Vcode_conversion_work_buf_list
-       = Fcons (Fget_buffer_create (build_string (str)),
-                Vcode_conversion_work_buf_list);
+         sprintf (str, " *code-conversion-work*<%d>", depth);
+         name = build_string (str);
+       }
+      buf = Fget_buffer_create (name);
      }
-
-  buf = XCAR (Vcode_conversion_work_buf_list);
    set_buffer_internal (XBUFFER (buf));
    current_buffer->undo_list = Qt;
    Ferase_buffer ();
@@ -5828,42 +6184,55 @@ make_conversion_work_buffer (multibytep)
    return buf;
  }
  
-static struct coding_system *saved_coding;
+static Lisp_Object
+code_conversion_restore (buffer)
+     Lisp_Object buffer;
+{
+  Lisp_Object workbuf;
+
+  workbuf = XCAR (Vcode_conversion_work_buf_list);
+  if (! NILP (workbuf)
+      && ! EQ (workbuf, Vcode_conversion_reused_work_buf)
+      && ! NILP (Fbuffer_live_p (workbuf)))
+    Fkill_buffer (workbuf);
+  Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
+  set_buffer_internal (XBUFFER (buffer));
+  return Qnil;
+}
  
-Lisp_Object
-code_conversion_restore (info)
-     Lisp_Object info;
+static Lisp_Object
+code_conversion_save (buffer, with_work_buf, multibyte)
+     Lisp_Object buffer;
+     int with_work_buf, multibyte;
  {
-  int depth = XINT (Flength (Vcode_conversion_work_buf_list));
-  Lisp_Object buf;
+  Lisp_Object workbuf;
  
-  if (depth > 0)
+  if (with_work_buf)
      {
-      buf = XCAR (Vcode_conversion_work_buf_list);
-      Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
-      if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
-       Fkill_buffer (buf);
-    }
-
-  if (EQ (saved_coding->dst_object, Qt)
-      && saved_coding->destination)
-    xfree (saved_coding->destination);
+      int depth = XINT (Flength (Vcode_conversion_work_buf_list));
  
-  return save_excursion_restore (info);
+      workbuf = make_conversion_work_buffer (multibyte, depth);
+    }
+  else
+    workbuf = Qnil;
+  Vcode_conversion_work_buf_list
+    = Fcons (workbuf, Vcode_conversion_work_buf_list);
+  record_unwind_protect (code_conversion_restore, buffer);
+  return workbuf;
  }
  
-
  int
  decode_coding_gap (coding, chars, bytes)
       struct coding_system *coding;
       EMACS_INT chars, bytes;
  {
    int count = specpdl_ptr - specpdl;
+  Lisp_Object buffer;
  
-  saved_coding = coding;
-  record_unwind_protect (code_conversion_restore, save_excursion_save ());
+  buffer = Fcurrent_buffer ();
+  code_conversion_save (buffer, 0, 0);
  
-  coding->src_object = Fcurrent_buffer ();
+  coding->src_object = buffer;
    coding->src_chars = chars;
    coding->src_bytes = bytes;
    coding->src_pos = -chars;
@@ -5873,6 +6242,7 @@ decode_coding_gap (coding, chars, bytes)
    coding->dst_pos = PT;
    coding->dst_pos_byte = PT_BYTE;
    coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
+  coding->mode |= CODING_MODE_LAST_BLOCK;
  
    if (CODING_REQUIRE_DETECTION (coding))
      detect_coding (coding);
@@ -5891,10 +6261,9 @@ encode_coding_gap (coding, chars, bytes)
    int count = specpdl_ptr - specpdl;
    Lisp_Object buffer;
  
-  saved_coding = coding;
-  record_unwind_protect (code_conversion_restore, save_excursion_save ());
-
    buffer = Fcurrent_buffer ();
+  code_conversion_save (buffer, 0, 0);
+
    coding->src_object = buffer;
    coding->src_chars = chars;
    coding->src_bytes = bytes;
@@ -5936,7 +6305,7 @@ encode_coding_gap (coding, chars, bytes)
     set in CODING->dst_object.
  
     If it is Qnil, the decoded text is stored at CODING->destination.
-   The called must allocate CODING->dst_bytes bytes at
+   The caller must allocate CODING->dst_bytes bytes at
     CODING->destination by xmalloc.  If the decoded text is longer than
     CODING->dst_bytes, CODING->destination is relocated by xrealloc.
   */
@@ -5955,9 +6324,10 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
    EMACS_INT chars = to - from;
    EMACS_INT bytes = to_byte - from_byte;
    Lisp_Object attrs;
+  Lisp_Object buffer;
+  int saved_pt = -1, saved_pt_byte;
  
-  saved_coding = coding;
-  record_unwind_protect (code_conversion_restore, save_excursion_save ());
+  buffer = Fcurrent_buffer ();
  
    if (NILP (dst_object))
      {
@@ -5982,6 +6352,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
         move_gap_both (from, from_byte);
        if (EQ (src_object, dst_object))
         {
+         saved_pt = PT, saved_pt_byte = PT_BYTE;
           TEMP_SET_PT_BOTH (from, from_byte);
           del_range_both (from, from_byte, to, to_byte, 1);
           coding->src_pos = -chars;
@@ -5998,16 +6369,18 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
      detect_coding (coding);
    attrs = CODING_ID_ATTRS (coding->id);
  
-  if (! NILP (CODING_ATTR_POST_READ (attrs))
-      || EQ (dst_object, Qt))
+  if (EQ (dst_object, Qt)
+      || (! NILP (CODING_ATTR_POST_READ (attrs))
+         && NILP (dst_object)))
      {
-      coding->dst_object = make_conversion_work_buffer (1);
+      coding->dst_object = code_conversion_save (buffer, 1, 1);
        coding->dst_pos = BEG;
        coding->dst_pos_byte = BEG_BYTE;
        coding->dst_multibyte = 1;
      }
    else if (BUFFERP (dst_object))
      {
+      code_conversion_save (buffer, 0, 0);
        coding->dst_object = dst_object;
        coding->dst_pos = BUF_PT (XBUFFER (dst_object));
        coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
@@ -6016,6 +6389,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
      }
    else
      {
+      code_conversion_save (buffer, 0, 0);
        coding->dst_object = Qnil;
        coding->dst_multibyte = 1;
      }
@@ -6065,6 +6439,25 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
         }
      }
  
+  if (saved_pt >= 0)
+    {
+      /* This is the case of:
+        (BUFFERP (src_object) && EQ (src_object, dst_object))
+        As we have moved PT while replacing the original buffer
+        contents, we must recover it now.  */
+      set_buffer_internal (XBUFFER (src_object));
+      if (saved_pt < from)
+       TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
+      else if (saved_pt < from + chars)
+       TEMP_SET_PT_BOTH (from, from_byte);
+      else if (! NILP (current_buffer->enable_multibyte_characters))
+       TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
+                         saved_pt_byte + (coding->produced - bytes));
+      else
+       TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
+                         saved_pt_byte + (coding->produced - bytes));
+    }
+
    unbind_to (count, Qnil);
  }
  
@@ -6081,9 +6474,10 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
    EMACS_INT chars = to - from;
    EMACS_INT bytes = to_byte - from_byte;
    Lisp_Object attrs;
+  Lisp_Object buffer;
+  int saved_pt = -1, saved_pt_byte;
  
-  saved_coding = coding;
-  record_unwind_protect (code_conversion_restore, save_excursion_save ());
+  buffer = Fcurrent_buffer ();
  
    coding->src_object = src_object;
    coding->src_chars = chars;
@@ -6094,7 +6488,8 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
  
    if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
      {
-      coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
+      coding->src_object = code_conversion_save (buffer, 1,
+                                                coding->src_multibyte);
        set_buffer_internal (XBUFFER (coding->src_object));
        if (STRINGP (src_object))
         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
@@ -6106,6 +6501,7 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
        if (EQ (src_object, dst_object))
         {
           set_buffer_internal (XBUFFER (src_object));
+         saved_pt = PT, saved_pt_byte = PT_BYTE;
           del_range_both (from, from_byte, to, to_byte, 1);
           set_buffer_internal (XBUFFER (coding->src_object));
         }
@@ -6123,26 +6519,31 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
      }
    else if (STRINGP (src_object))
      {
+      code_conversion_save (buffer, 0, 0);
        coding->src_pos = from;
        coding->src_pos_byte = from_byte;
      }
    else if (BUFFERP (src_object))
      {
+      code_conversion_save (buffer, 0, 0);
        set_buffer_internal (XBUFFER (src_object));
-      if (from != GPT)
-       move_gap_both (from, from_byte);
        if (EQ (src_object, dst_object))
         {
-         del_range_both (from, from_byte, to, to_byte, 1);
-         coding->src_pos = -chars;
-         coding->src_pos_byte = -bytes;
+         saved_pt = PT, saved_pt_byte = PT_BYTE;
+         coding->src_object = del_range_1 (from, to, 1, 1);
+         coding->src_pos = 0;
+         coding->src_pos_byte = 0;
         }
        else
         {
+         if (from < GPT && to >= GPT)
+           move_gap_both (from, from_byte);
           coding->src_pos = from;
           coding->src_pos_byte = from_byte;
         }
      }
+  else
+    code_conversion_save (buffer, 0, 0);
  
    if (BUFFERP (dst_object))
      {
@@ -6190,6 +6591,25 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
         }
      }
  
+  if (saved_pt >= 0)
+    {
+      /* This is the case of:
+        (BUFFERP (src_object) && EQ (src_object, dst_object))
+        As we have moved PT while replacing the original buffer
+        contents, we must recover it now.  */
+      set_buffer_internal (XBUFFER (src_object));
+      if (saved_pt < from)
+       TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
+      else if (saved_pt < from + chars)
+       TEMP_SET_PT_BOTH (from, from_byte);
+      else if (! NILP (current_buffer->enable_multibyte_characters))
+       TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
+                         saved_pt_byte + (coding->produced - bytes));
+      else
+       TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
+                         saved_pt_byte + (coding->produced - bytes));
+    }
+
    unbind_to (count, Qnil);
  }
  
@@ -6250,9 +6670,7 @@ If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */
  DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
         1, 1, 0,
         doc: /* Check validity of CODING-SYSTEM.
-If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
-It is valid if it is a symbol with a non-nil `coding-system' property.
-The value of property should be a vector of length 5.  */)
+If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.  */)
    (coding_system)
       Lisp_Object coding_system;
  {
@@ -6288,13 +6706,11 @@ detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
       Lisp_Object coding_system;
  {
    unsigned char *src_end = src + src_bytes;
-  int mask = CATEGORY_MASK_ANY;
-  int detected = 0;
-  int c, i;
    Lisp_Object attrs, eol_type;
    Lisp_Object val;
    struct coding_system coding;
    int id;
+  struct coding_detection_info detect_info;
  
    if (NILP (coding_system))
      coding_system = Qundecided;
@@ -6309,18 +6725,22 @@ detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
    coding.consumed = 0;
    coding.mode |= CODING_MODE_LAST_BLOCK;
  
+  detect_info.checked = detect_info.found = detect_info.rejected = 0;
+
    /* At first, detect text-format if necessary.  */
    if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
      {
+      enum coding_category category;
+      struct coding_system *this;
+      int c, i;
+
        for (; src < src_end; src++)
         {
           c = *src;
           if (c & 0x80
               || (c < 0x20 && (c == ISO_CODE_ESC
                                || c == ISO_CODE_SI
-                              || c == ISO_CODE_SO
-                              /* Most UTF-16 text contains '\0'. */
-                              || !c)))
+                              || c == ISO_CODE_SO)))
             break;
         }
        coding.head_ascii = src - coding.source;
@@ -6328,64 +6748,92 @@ detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
        if (src < src_end)
         for (i = 0; i < coding_category_raw_text; i++)
           {
-           enum coding_category category = coding_priorities[i];
-           struct coding_system *this = coding_categories + category;
+           category = coding_priorities[i];
+           this = coding_categories + category;
  
             if (this->id < 0)
               {
                 /* No coding system of this category is defined.  */
-               mask &= ~(1 << category);
+               detect_info.rejected |= (1 << category);
               }
-           else if (category >= coding_category_raw_text
-                    || detected & (1 << category))
+           else if (category >= coding_category_raw_text)
               continue;
+           else if (detect_info.checked & (1 << category))
+             {
+               if (highest
+                   && (detect_info.found & (1 << category)))
+                 break;
+             }
             else
               {
-               detected |= detected_mask[category];
-               if ((*(coding_categories[category].detector)) (&coding, &mask)
+               if ((*(this->detector)) (&coding, &detect_info)
                     && highest
-                   && (mask & (1 << category)))
-                 {
-                   mask = 1 << category;
-                   break;
-                 }
+                   && (detect_info.found & (1 << category)))
+                 break;
               }
           }
  
-      if (!mask)
+
+      if (detect_info.rejected == CATEGORY_MASK_ANY)
         {
+         detect_info.found = CATEGORY_MASK_RAW_TEXT;
           id = coding_categories[coding_category_raw_text].id;
           val = Fcons (make_number (id), Qnil);
         }
-      else if (mask == CATEGORY_MASK_ANY)
+      else if (! detect_info.rejected && ! detect_info.found)
         {
+         detect_info.found = CATEGORY_MASK_ANY;
           id = coding_categories[coding_category_undecided].id;
           val = Fcons (make_number (id), Qnil);
         }
        else if (highest)
         {
-         for (i = 0; i < coding_category_raw_text; i++)
-           if (mask & (1 << coding_priorities[i]))
-             {
-               id = coding_categories[coding_priorities[i]].id;
-               val = Fcons (make_number (id), Qnil);
-               break;
-             }
-       }       
+         if (detect_info.found)
+           {
+             detect_info.found = 1 << category;
+             val = Fcons (make_number (this->id), Qnil);
+           }
+         else
+           for (i = 0; i < coding_category_raw_text; i++)
+             if (! (detect_info.rejected & (1 << coding_priorities[i])))
+               {
+                 detect_info.found = 1 << coding_priorities[i];
+                 id = coding_categories[coding_priorities[i]].id;
+                 val = Fcons (make_number (id), Qnil);
+                 break;
+               }
+       }
        else
         {
+         int mask = detect_info.rejected | detect_info.found;
+         int found = 0;
           val = Qnil;
+
           for (i = coding_category_raw_text - 1; i >= 0; i--)
-           if (mask & (1 << coding_priorities[i]))
-             {
-               id = coding_categories[coding_priorities[i]].id;
-               val = Fcons (make_number (id), val);
-             }
+           {
+             category = coding_priorities[i];
+             if (! (mask & (1 << category)))
+               {
+                 found |= 1 << category;
+                 id = coding_categories[category].id;
+                 val = Fcons (make_number (id), val);
+               }
+           }
+         for (i = coding_category_raw_text - 1; i >= 0; i--)
+           {
+             category = coding_priorities[i];
+             if (detect_info.found & (1 << category))
+               {
+                 id = coding_categories[category].id;
+                 val = Fcons (make_number (id), val);
+               }
+           }
+         detect_info.found |= found;
         }
      }
    else
      {
-      mask = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
+      detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
        val = Fcons (make_number (coding.id), Qnil);
      }
  
@@ -6396,13 +6844,15 @@ detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
  
      if (VECTORP (eol_type))
        {
-       if (mask & ~CATEGORY_MASK_UTF_16)
+       if (detect_info.found & ~CATEGORY_MASK_UTF_16)
           normal_eol = detect_eol (coding.source, src_bytes,
                                    coding_category_raw_text);
-       if (mask & (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_BE_NOSIG))
+       if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
+                                | CATEGORY_MASK_UTF_16_BE_NOSIG))
           utf_16_be_eol = detect_eol (coding.source, src_bytes,
                                       coding_category_utf_16_be);
-       if (mask & (CATEGORY_MASK_UTF_16_LE | CATEGORY_MASK_UTF_16_LE_NOSIG))
+       if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
+                                | CATEGORY_MASK_UTF_16_LE_NOSIG))
           utf_16_le_eol = detect_eol (coding.source, src_bytes,
                                       coding_category_utf_16_le);
        }
@@ -6573,12 +7023,12 @@ DEFUN ("find-coding-systems-region-internal",
        if (XINT (end) - XINT (start) == end_byte - start_byte)
         return Qt;
  
-      if (start < GPT && end > GPT)
+      if (XINT (start) < GPT && XINT (end) > GPT)
         {
-         if ((GPT - start) < (end - GPT))
-           move_gap_both (start, start_byte);
+         if ((GPT - XINT (start)) < (XINT (end) - GPT))
+           move_gap_both (XINT (start), start_byte);
           else
-           move_gap_both (end, end_byte);
+           move_gap_both (XINT (end), end_byte);
         }
      }
  
@@ -6704,14 +7154,14 @@ buffer positions.  END is ignored.  */)
        if (XINT (end) - XINT (start) == end_byte - start_byte)
         return Qt;
  
-      if (start < GPT && end > GPT)
+      if (XINT (start) < GPT && XINT (end) > GPT)
         {
-         if ((GPT - start) < (end - GPT))
-           move_gap_both (start, start_byte);
+         if ((GPT - XINT (start)) < (XINT (end) - GPT))
+           move_gap_both (XINT (start), start_byte);
           else
-           move_gap_both (end, end_byte);
+           move_gap_both (XINT (end), end_byte);
         }
-      pos = start;
+      pos = XINT (start);
      }
  
    list = Qnil;
@@ -7466,6 +7916,8 @@ usage: (define-coding-system-internal ...)  */)
      XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
    CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
  
+  CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
+
    val = args[coding_arg_decode_translation_table];
    if (! NILP (val))
      CHECK_CHAR_TABLE (val);
@@ -7499,6 +7951,7 @@ usage: (define-coding-system-internal ...)  */)
  
    if (EQ (coding_type, Qcharset))
      {
+      Lisp_Object list;
        /* Generate a lisp vector of 256 elements.  Each element is nil,
          integer, or a list of charset IDs.
  
@@ -7512,14 +7965,31 @@ usage: (define-coding-system-internal ...)  */)
          of one of them.  The list is sorted by dimensions of the
          charsets.  A charset of smaller dimension comes firtst.
        */
+      for (list = Qnil, tail = charset_list; CONSP (tail); tail = XCDR (tail))
+       {
+         struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
+
+         if (charset->method == CHARSET_METHOD_SUPERSET)
+           {
+             val = CHARSET_SUPERSET (charset);
+             for (; CONSP (val); val = XCDR (val))
+               list = Fcons (XCAR (XCAR (val)), list); 
+           }
+         else
+           list = Fcons (XCAR (tail), list);
+       }
+
        val = Fmake_vector (make_number (256), Qnil);
  
-      for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
+      for (tail = Fnreverse (list); CONSP (tail); tail = XCDR (tail))
         {
           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
           int dim = CHARSET_DIMENSION (charset);
           int idx = (dim - 1) * 4;
           
+         if (CHARSET_ASCII_COMPATIBLE_P (charset))
+           CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+
           for (i = charset->code_space[idx];
                i <= charset->code_space[idx + 1]; i++)
             {
@@ -7582,21 +8052,31 @@ usage: (define-coding-system-internal ...)  */)
        valids = Fmake_string (make_number (256), make_number (0));
        for (tail = val; !NILP (tail); tail = Fcdr (tail))
         {
+         int from, to;
+
           val = Fcar (tail);
           if (INTEGERP (val))
-           ASET (valids, XINT (val), make_number (1));
+           {
+             from = to = XINT (val);
+             if (from < 0 || from > 255)
+               args_out_of_range_3 (val, make_number (0), make_number (255));
+           }
           else
             {
-             int from, to;
-
               CHECK_CONS (val);
               CHECK_NUMBER (XCAR (val));
               CHECK_NUMBER (XCDR (val));
               from = XINT (XCAR (val));
+             if (from < 0 || from > 255)
+               args_out_of_range_3 (XCAR (val),
+                                    make_number (0), make_number (255));
               to = XINT (XCDR (val));
-             for (i = from; i <= to; i++)
-               ASET (valids, i, make_number (1));
+             if (to < from || to > 255)
+               args_out_of_range_3 (XCDR (val),
+                                    XCAR (val), make_number (255));
             }
+         for (i = from; i <= to; i++)
+           XSTRING (valids)->data[i] = 1;
         }
        ASET (attrs, coding_attr_ccl_valids, valids);
        
@@ -7606,6 +8086,8 @@ usage: (define-coding-system-internal ...)  */)
      {
        Lisp_Object bom, endian;
  
+      CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
+
        if (nargs < coding_arg_utf16_max)
         goto short_args;
  
@@ -7619,22 +8101,27 @@ usage: (define-coding-system-internal ...)  */)
        ASET (attrs, coding_attr_utf_16_bom, bom);
  
        endian = args[coding_arg_utf16_endian];
+      CHECK_SYMBOL (endian);
+      if (NILP (endian))
+       endian = Qbig;
+      else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
+       error ("Invalid endian: %s", XSYMBOL (endian)->name->data);
        ASET (attrs, coding_attr_utf_16_endian, endian);
  
        category = (CONSP (bom)
                   ? coding_category_utf_16_auto
                   : NILP (bom)
-                 ? (NILP (endian)
+                 ? (EQ (endian, Qbig)
                      ? coding_category_utf_16_be_nosig
                      : coding_category_utf_16_le_nosig)
-                 : (NILP (endian)
+                 : (EQ (endian, Qbig)
                      ? coding_category_utf_16_be
                      : coding_category_utf_16_le));
      }
    else if (EQ (coding_type, Qiso_2022))
      {
        Lisp_Object initial, reg_usage, request, flags;
-      int i, id;
+      int i;
  
        if (nargs < coding_arg_iso2022_max)
         goto short_args;
@@ -7646,8 +8133,12 @@ usage: (define-coding-system-internal ...)  */)
           val = Faref (initial, make_number (i));
           if (! NILP (val))
             {
-             CHECK_CHARSET_GET_ID (val, id);
-             ASET (initial, i, make_number (id));
+             struct charset *charset;
+
+             CHECK_CHARSET_GET_CHARSET (val, charset);
+             ASET (initial, i, make_number (CHARSET_ID (charset)));
+             if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
+               CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
             }
           else
             ASET (initial, i, make_number (-1));
@@ -7703,12 +8194,15 @@ usage: (define-coding-system-internal ...)  */)
                       ? coding_category_iso_8_1
                       : coding_category_iso_8_2);
         }
+      if (category != coding_category_iso_8_1
+         && category != coding_category_iso_8_2)
+       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
      }
    else if (EQ (coding_type, Qemacs_mule))
      {
        if (EQ (args[coding_arg_charset_list], Qemacs_mule))
         ASET (attrs, coding_attr_emacs_mule_full, Qt);
-
+      CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
        category = coding_category_emacs_mule;
      }
    else if (EQ (coding_type, Qshift_jis))
@@ -7723,6 +8217,8 @@ usage: (define-coding-system-internal ...)  */)
        if (CHARSET_DIMENSION (charset) != 1)
         error ("Dimension of charset %s is not one",
                XSYMBOL (CHARSET_NAME (charset))->name->data);
+      if (CHARSET_ASCII_COMPATIBLE_P (charset))
+       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
  
        charset_list = XCDR (charset_list);
        charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
@@ -7750,6 +8246,8 @@ usage: (define-coding-system-internal ...)  */)
        if (CHARSET_DIMENSION (charset) != 1)
         error ("Dimension of charset %s is not one",
                XSYMBOL (CHARSET_NAME (charset))->name->data);
+      if (CHARSET_ASCII_COMPATIBLE_P (charset))
+       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
  
        charset_list = XCDR (charset_list);
        charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
@@ -7761,9 +8259,15 @@ usage: (define-coding-system-internal ...)  */)
        Vbig5_coding_system = name;
      }
    else if (EQ (coding_type, Qraw_text))
-    category = coding_category_raw_text;
+    {
+      category = coding_category_raw_text;
+      CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+    }
    else if (EQ (coding_type, Qutf_8))
-    category = coding_category_utf_8;
+    {
+      category = coding_category_utf_8;
+      CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+    }
    else if (EQ (coding_type, Qundecided))
      category = coding_category_undecided;
    else
@@ -7826,7 +8330,7 @@ usage: (define-coding-system-internal ...)  */)
  }
  
  /* Fixme: should this record the alias relationships for
-   diagnostics?  */
+   diagnostics?  Should it update coding-system-list?  */
  DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
         Sdefine_coding_system_alias, 2, 2, 0,
         doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
@@ -8052,12 +8556,6 @@ syms_of_coding ()
    DEFSYM (Qutf_8, "utf-8");
  
    DEFSYM (Qutf_16, "utf-16");
-  DEFSYM (Qutf_16_be, "utf-16-be");
-  DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
-  DEFSYM (Qutf_16_le, "utf-16-l3");
-  DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
-  DEFSYM (Qsignature, "signature");
-  DEFSYM (Qendian, "endian");
    DEFSYM (Qbig, "big");
    DEFSYM (Qlittle, "little");