(CATEGORY_MASK_ANY): Add CATEGORY_MASK_UTF_16_AUTO.
[bpt/emacs.git] / src / coding.c
index bca7575..1044309 100644 (file)
@@ -1,8 +1,8 @@
 /* Coding system handler (conversion, detection, etc).
    Copyright (C) 2001, 2002, 2003, 2004, 2005,
 /* Coding system handler (conversion, detection, etc).
    Copyright (C) 2001, 2002, 2003, 2004, 2005,
-                 2006, 2007 Free Software Foundation, Inc.
+                 2006, 2007, 2008 Free Software Foundation, Inc.
    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-     2005, 2006, 2007
+     2005, 2006, 2007, 2008
      National Institute of Advanced Industrial Science and Technology (AIST)
      Registration Number H14PRO021
    Copyright (C) 2003
      National Institute of Advanced Industrial Science and Technology (AIST)
      Registration Number H14PRO021
    Copyright (C) 2003
@@ -625,6 +625,7 @@ enum coding_category
    | CATEGORY_MASK_ISO_7_ELSE          \
    | CATEGORY_MASK_ISO_8_ELSE          \
    | CATEGORY_MASK_UTF_8               \
    | CATEGORY_MASK_ISO_7_ELSE          \
    | CATEGORY_MASK_ISO_8_ELSE          \
    | CATEGORY_MASK_UTF_8               \
+   | CATEGORY_MASK_UTF_16_AUTO         \
    | CATEGORY_MASK_UTF_16_BE           \
    | CATEGORY_MASK_UTF_16_LE           \
    | CATEGORY_MASK_UTF_16_BE_NOSIG     \
    | CATEGORY_MASK_UTF_16_BE           \
    | CATEGORY_MASK_UTF_16_LE           \
    | CATEGORY_MASK_UTF_16_BE_NOSIG     \
@@ -657,7 +658,8 @@ enum coding_category
      | CATEGORY_MASK_ISO_ELSE)
 
 #define CATEGORY_MASK_UTF_16           \
      | CATEGORY_MASK_ISO_ELSE)
 
 #define CATEGORY_MASK_UTF_16           \
-  (CATEGORY_MASK_UTF_16_BE             \
+  (CATEGORY_MASK_UTF_16_AUTO           \
+   | CATEGORY_MASK_UTF_16_BE           \
    | CATEGORY_MASK_UTF_16_LE           \
    | CATEGORY_MASK_UTF_16_BE_NOSIG     \
    | CATEGORY_MASK_UTF_16_LE_NOSIG)
    | CATEGORY_MASK_UTF_16_LE           \
    | CATEGORY_MASK_UTF_16_BE_NOSIG     \
    | CATEGORY_MASK_UTF_16_LE_NOSIG)
@@ -955,6 +957,11 @@ record_conversion_result (struct coding_system *coding,
   } while (0)
 
 
   } while (0)
 
 
+/* If there are at least BYTES length of room at dst, allocate memory
+   for coding->destination and update dst and dst_end.  We don't have
+   to take care of coding->source which will be relocated.  It is
+   handled by calling coding_set_source in encode_coding.  */
+
 #define ASSURE_DESTINATION(bytes)                              \
   do {                                                         \
     if (dst + (bytes) >= dst_end)                              \
 #define ASSURE_DESTINATION(bytes)                              \
   do {                                                         \
     if (dst + (bytes) >= dst_end)                              \
@@ -967,6 +974,66 @@ record_conversion_result (struct coding_system *coding,
   } while (0)
 
 
   } while (0)
 
 
+/* Store multibyte form of the character C in P, and advance P to the
+   end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
+   never calls MAYBE_UNIFY_CHAR.  */
+
+#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)     \
+  do {                                         \
+    if ((c) <= MAX_1_BYTE_CHAR)                        \
+      *(p)++ = (c);                            \
+    else if ((c) <= MAX_2_BYTE_CHAR)           \
+      *(p)++ = (0xC0 | ((c) >> 6)),            \
+       *(p)++ = (0x80 | ((c) & 0x3F));         \
+    else if ((c) <= MAX_3_BYTE_CHAR)           \
+      *(p)++ = (0xE0 | ((c) >> 12)),           \
+       *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
+       *(p)++ = (0x80 | ((c) & 0x3F));         \
+    else if ((c) <= MAX_4_BYTE_CHAR)           \
+      *(p)++ = (0xF0 | (c >> 18)),             \
+       *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
+       *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
+       *(p)++ = (0x80 | (c & 0x3F));           \
+    else if ((c) <= MAX_5_BYTE_CHAR)           \
+      *(p)++ = 0xF8,                           \
+       *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
+       *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
+       *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
+       *(p)++ = (0x80 | (c & 0x3F));           \
+    else                                       \
+      (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \
+  } while (0)
+
+
+/* Return the character code of character whose multibyte form is at
+   P, and advance P to the end of the multibyte form.  This is like
+   STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
+
+#define STRING_CHAR_ADVANCE_NO_UNIFY(p)                                \
+  (!((p)[0] & 0x80)                                            \
+   ? *(p)++                                                    \
+   : ! ((p)[0] & 0x20)                                         \
+   ? ((p) += 2,                                                        \
+      ((((p)[-2] & 0x1F) << 6)                                 \
+       | ((p)[-1] & 0x3F)                                      \
+       | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))   \
+   : ! ((p)[0] & 0x10)                                         \
+   ? ((p) += 3,                                                        \
+      ((((p)[-3] & 0x0F) << 12)                                        \
+       | (((p)[-2] & 0x3F) << 6)                               \
+       | ((p)[-1] & 0x3F)))                                    \
+   : ! ((p)[0] & 0x08)                                         \
+   ? ((p) += 4,                                                        \
+      ((((p)[-4] & 0xF) << 18)                                 \
+       | (((p)[-3] & 0x3F) << 12)                              \
+       | (((p)[-2] & 0x3F) << 6)                               \
+       | ((p)[-1] & 0x3F)))                                    \
+   : ((p) += 5,                                                        \
+      ((((p)[-4] & 0x3F) << 18)                                        \
+       | (((p)[-3] & 0x3F) << 12)                              \
+       | (((p)[-2] & 0x3F) << 6)                               \
+       | ((p)[-1] & 0x3F))))
+
 
 static void
 coding_set_source (coding)
 
 static void
 coding_set_source (coding)
@@ -999,7 +1066,7 @@ coding_set_destination (coding)
     {
       if (coding->src_pos < 0)
        {
     {
       if (coding->src_pos < 0)
        {
-         coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
+         coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
          coding->dst_bytes = (GAP_END_ADDR
                               - (coding->src_bytes - coding->consumed)
                               - coding->destination);
          coding->dst_bytes = (GAP_END_ADDR
                               - (coding->src_bytes - coding->consumed)
                               - coding->destination);
@@ -1009,7 +1076,7 @@ coding_set_destination (coding)
          /* We are sure that coding->dst_pos_byte is before the gap
             of the buffer. */
          coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
          /* We are sure that coding->dst_pos_byte is before the gap
             of the buffer. */
          coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
-                                + coding->dst_pos_byte - 1);
+                                + coding->dst_pos_byte - BEG_BYTE);
          coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
                               - coding->destination);
        }
          coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
                               - coding->destination);
        }
@@ -1032,20 +1099,23 @@ coding_alloc_by_realloc (coding, bytes)
 }
 
 static void
 }
 
 static void
-coding_alloc_by_making_gap (coding, offset, bytes)
+coding_alloc_by_making_gap (coding, gap_head_used, bytes)
      struct coding_system *coding;
      struct coding_system *coding;
-     EMACS_INT offset, bytes;
+     EMACS_INT gap_head_used, bytes;
 {
 {
-  if (BUFFERP (coding->dst_object)
-      && EQ (coding->src_object, coding->dst_object))
+  if (EQ (coding->src_object, coding->dst_object))
     {
     {
-      EMACS_INT add = offset + (coding->src_bytes - coding->consumed);
+      /* The gap may contain the produced data at the head and not-yet
+        consumed data at the tail.  To preserve those data, we at
+        first make the gap size to zero, then increase the gap
+        size.  */
+      EMACS_INT add = GAP_SIZE;
 
 
-      GPT += offset, GPT_BYTE += offset;
-      GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
+      GPT += gap_head_used, GPT_BYTE += gap_head_used;
+      GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
       make_gap (bytes);
       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
       make_gap (bytes);
       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
-      GPT -= offset, GPT_BYTE -= offset;
+      GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
     }
   else
     {
     }
   else
     {
@@ -1068,7 +1138,11 @@ alloc_destination (coding, nbytes, dst)
   EMACS_INT offset = dst - coding->destination;
 
   if (BUFFERP (coding->dst_object))
   EMACS_INT offset = dst - coding->destination;
 
   if (BUFFERP (coding->dst_object))
-    coding_alloc_by_making_gap (coding, offset, nbytes);
+    {
+      struct buffer *buf = XBUFFER (coding->dst_object);
+
+      coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
+    }
   else
     coding_alloc_by_realloc (coding, nbytes);
   record_conversion_result (coding, CODING_RESULT_SUCCESS);
   else
     coding_alloc_by_realloc (coding, nbytes);
   record_conversion_result (coding, CODING_RESULT_SUCCESS);
@@ -1225,6 +1299,8 @@ decode_coding_utf_8 (coding)
   int consumed_chars = 0, consumed_chars_base;
   int multibytep = coding->src_multibyte;
   Lisp_Object attr, charset_list;
   int consumed_chars = 0, consumed_chars_base;
   int multibytep = coding->src_multibyte;
   Lisp_Object attr, charset_list;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attr, charset_list);
 
 
   CODING_GET_INFO (coding, attr, charset_list);
 
@@ -1238,13 +1314,18 @@ decode_coding_utf_8 (coding)
       if (charbuf >= charbuf_end)
        break;
 
       if (charbuf >= charbuf_end)
        break;
 
-      ONE_MORE_BYTE (c1);
+      if (byte_after_cr >= 0)
+       c1 = byte_after_cr, byte_after_cr = -1;
+      else
+       ONE_MORE_BYTE (c1);
       if (c1 < 0)
        {
          c = - c1;
        }
       else if (UTF_8_1_OCTET_P(c1))
        {
       if (c1 < 0)
        {
          c = - c1;
        }
       else if (UTF_8_1_OCTET_P(c1))
        {
+         if (eol_crlf && c1 == '\r')
+           ONE_MORE_BYTE (byte_after_cr);
          c = c1;
        }
       else
          c = c1;
        }
       else
@@ -1353,7 +1434,7 @@ encode_coding_utf_8 (coding)
            }
          else
            {
            }
          else
            {
-             CHAR_STRING_ADVANCE (c, pend);
+             CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
              for (p = str; p < pend; p++)
                EMIT_ONE_BYTE (*p);
            }
              for (p = str; p < pend; p++)
                EMIT_ONE_BYTE (*p);
            }
@@ -1370,7 +1451,7 @@ encode_coding_utf_8 (coding)
          if (CHAR_BYTE8_P (c))
            *dst++ = CHAR_TO_BYTE8 (c);
          else
          if (CHAR_BYTE8_P (c))
            *dst++ = CHAR_TO_BYTE8 (c);
          else
-           dst += CHAR_STRING (c, dst);
+           CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
          produced_chars++;
        }
     }
          produced_chars++;
        }
     }
@@ -1434,11 +1515,44 @@ detect_coding_utf_16 (coding, detect_info)
                                | CATEGORY_MASK_UTF_16_BE_NOSIG
                                | CATEGORY_MASK_UTF_16_LE_NOSIG);
     }
                                | CATEGORY_MASK_UTF_16_BE_NOSIG
                                | CATEGORY_MASK_UTF_16_LE_NOSIG);
     }
-  else if (c1 >= 0 && c2 >= 0)
+  else
     {
     {
+      /* We check the dispersion of Eth and Oth bytes where E is even and
+        O is odd.  If both are high, we assume binary data.*/
+      unsigned char e[256], o[256];
+      unsigned e_num = 1, o_num = 1;
+
+      memset (e, 0, 256);
+      memset (o, 0, 256);
+      e[c1] = 1;
+      o[c2] = 1;
+
       detect_info->rejected
        |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
       detect_info->rejected
        |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
+
+      while (1)
+       {
+         ONE_MORE_BYTE (c1);
+         ONE_MORE_BYTE (c2);
+         if (! e[c1])
+           {
+             e[c1] = 1;
+             e_num++;
+             if (e_num >= 128)
+               break;
+           }
+         if (! o[c2])
+           {
+             o[c1] = 1;
+             o_num++;
+             if (o_num >= 128)
+               break;
+           }
+       }
+      detect_info->rejected |= CATEGORY_MASK_UTF_16;
+      return 0;
     }
     }
+
  no_more_source:
   return 1;
 }
  no_more_source:
   return 1;
 }
@@ -1458,6 +1572,8 @@ decode_coding_utf_16 (coding)
   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
   int surrogate = CODING_UTF_16_SURROGATE (coding);
   Lisp_Object attr, charset_list;
   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
   int surrogate = CODING_UTF_16_SURROGATE (coding);
   Lisp_Object attr, charset_list;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr1 = -1, byte_after_cr2 = -1;
 
   CODING_GET_INFO (coding, attr, charset_list);
 
 
   CODING_GET_INFO (coding, attr, charset_list);
 
@@ -1497,13 +1613,19 @@ decode_coding_utf_16 (coding)
       if (charbuf + 2 >= charbuf_end)
        break;
 
       if (charbuf + 2 >= charbuf_end)
        break;
 
-      ONE_MORE_BYTE (c1);
+      if (byte_after_cr1 >= 0)
+       c1 = byte_after_cr1, byte_after_cr1 = -1;
+      else
+       ONE_MORE_BYTE (c1);
       if (c1 < 0)
        {
          *charbuf++ = -c1;
          continue;
        }
       if (c1 < 0)
        {
          *charbuf++ = -c1;
          continue;
        }
-      ONE_MORE_BYTE (c2);
+      if (byte_after_cr2 >= 0)
+       c2 = byte_after_cr2, byte_after_cr2 = -1;
+      else
+       ONE_MORE_BYTE (c2);
       if (c2 < 0)
        {
          *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
       if (c2 < 0)
        {
          *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
@@ -1512,6 +1634,7 @@ decode_coding_utf_16 (coding)
        }
       c = (endian == utf_16_big_endian
           ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
        }
       c = (endian == utf_16_big_endian
           ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
+
       if (surrogate)
        {
          if (! UTF_16_LOW_SURROGATE_P (c))
       if (surrogate)
        {
          if (! UTF_16_LOW_SURROGATE_P (c))
@@ -1540,7 +1663,14 @@ decode_coding_utf_16 (coding)
          if (UTF_16_HIGH_SURROGATE_P (c))
            CODING_UTF_16_SURROGATE (coding) = surrogate = c;
          else
          if (UTF_16_HIGH_SURROGATE_P (c))
            CODING_UTF_16_SURROGATE (coding) = surrogate = c;
          else
-           *charbuf++ = c;
+           {
+             if (eol_crlf && c == '\r')
+               {
+                 ONE_MORE_BYTE (byte_after_cr1);
+                 ONE_MORE_BYTE (byte_after_cr2);
+               }
+             *charbuf++ = c;
+           }
        }
     }
 
        }
     }
 
@@ -1711,7 +1841,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id)
     {
       if (c >= 0xA0)
        {
     {
       if (c >= 0xA0)
        {
-         /* Old style component character of a compostion.  */
+         /* Old style component character of a composition.  */
          if (c == 0xA0)
            {
              ONE_MORE_BYTE (c);
          if (c == 0xA0)
            {
              ONE_MORE_BYTE (c);
@@ -1898,7 +2028,7 @@ detect_coding_emacs_mule (coding, detect_info)
    value 0.  */
 
 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                        \
    value 0.  */
 
 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                        \
-  if (1)                                                       \
+  do                                                           \
     {                                                          \
       int c;                                                   \
       int nbytes, nchars;                                      \
     {                                                          \
       int c;                                                   \
       int nbytes, nchars;                                      \
@@ -1916,7 +2046,7 @@ detect_coding_emacs_mule (coding, detect_info)
       src += nbytes;                                           \
       consumed_chars += nchars;                                        \
     }                                                          \
       src += nbytes;                                           \
       consumed_chars += nchars;                                        \
     }                                                          \
-  else
+  while (0)
 
 
 /* Decode a composition rule represented as a component of composition
 
 
 /* Decode a composition rule represented as a component of composition
@@ -2072,6 +2202,8 @@ decode_coding_emacs_mule (coding)
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attrs, charset_list);
 
 
   CODING_GET_INFO (coding, attrs, charset_list);
 
@@ -2085,7 +2217,10 @@ decode_coding_emacs_mule (coding)
       if (charbuf >= charbuf_end)
        break;
 
       if (charbuf >= charbuf_end)
        break;
 
-      ONE_MORE_BYTE (c);
+      if (byte_after_cr >= 0)
+       c = byte_after_cr, byte_after_cr = -1;
+      else
+       ONE_MORE_BYTE (c);
       if (c < 0)
        {
          *charbuf++ = -c;
       if (c < 0)
        {
          *charbuf++ = -c;
@@ -2093,6 +2228,8 @@ decode_coding_emacs_mule (coding)
        }
       else if (c < 0x80)
        {
        }
       else if (c < 0x80)
        {
+         if (eol_crlf && c == '\r')
+           ONE_MORE_BYTE (byte_after_cr);
          *charbuf++ = c;
          char_offset++;
        }
          *charbuf++ = c;
          char_offset++;
        }
@@ -2945,6 +3082,8 @@ decode_coding_iso_2022 (coding)
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attrs, charset_list);
   setup_iso_safe_charsets (attrs);
 
   CODING_GET_INFO (coding, attrs, charset_list);
   setup_iso_safe_charsets (attrs);
@@ -2962,7 +3101,10 @@ decode_coding_iso_2022 (coding)
       if (charbuf >= charbuf_end)
        break;
 
       if (charbuf >= charbuf_end)
        break;
 
-      ONE_MORE_BYTE (c1);
+      if (byte_after_cr >= 0)
+       c1 = byte_after_cr, byte_after_cr = -1;
+      else
+       ONE_MORE_BYTE (c1);
       if (c1 < 0)
        goto invalid_code;
 
       if (c1 < 0)
        goto invalid_code;
 
@@ -3021,6 +3163,8 @@ decode_coding_iso_2022 (coding)
          break;
 
        case ISO_control_0:
          break;
 
        case ISO_control_0:
+         if (eol_crlf && c1 == '\r')
+           ONE_MORE_BYTE (byte_after_cr);
          MAYBE_FINISH_COMPOSITION ();
          charset = CHARSET_FROM_ID (charset_ascii);
          break;
          MAYBE_FINISH_COMPOSITION ();
          charset = CHARSET_FROM_ID (charset_ascii);
          break;
@@ -4091,6 +4235,8 @@ decode_coding_sjis (coding)
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attrs, charset_list);
 
 
   CODING_GET_INFO (coding, attrs, charset_list);
 
@@ -4111,11 +4257,18 @@ decode_coding_sjis (coding)
       if (charbuf >= charbuf_end)
        break;
 
       if (charbuf >= charbuf_end)
        break;
 
-      ONE_MORE_BYTE (c);
+      if (byte_after_cr >= 0)
+       c = byte_after_cr, byte_after_cr = -1;
+      else
+       ONE_MORE_BYTE (c);
       if (c < 0)
        goto invalid_code;
       if (c < 0x80)
       if (c < 0)
        goto invalid_code;
       if (c < 0x80)
-       charset = charset_roman;
+       {
+         if (eol_crlf && c == '\r')
+           ONE_MORE_BYTE (byte_after_cr);
+         charset = charset_roman;
+       }
       else if (c == 0x80 || c == 0xA0)
        goto invalid_code;
       else if (c >= 0xA1 && c <= 0xDF)
       else if (c == 0x80 || c == 0xA0)
        goto invalid_code;
       else if (c >= 0xA1 && c <= 0xDF)
@@ -4193,6 +4346,8 @@ decode_coding_big5 (coding)
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attrs, charset_list);
   val = charset_list;
 
   CODING_GET_INFO (coding, attrs, charset_list);
   val = charset_list;
@@ -4210,12 +4365,19 @@ decode_coding_big5 (coding)
       if (charbuf >= charbuf_end)
        break;
 
       if (charbuf >= charbuf_end)
        break;
 
-      ONE_MORE_BYTE (c);
+      if (byte_after_cr >= 0)
+       c = byte_after_cr, byte_after_cr = -1;
+      else
+       ONE_MORE_BYTE (c);
 
       if (c < 0)
        goto invalid_code;
       if (c < 0x80)
 
       if (c < 0)
        goto invalid_code;
       if (c < 0x80)
-       charset = charset_roman;
+       {
+         if (eol_crlf && c == '\r')
+           ONE_MORE_BYTE (byte_after_cr);
+         charset = charset_roman;
+       }
       else
        {
          /* BIG5 -> Big5 */
       else
        {
          /* BIG5 -> Big5 */
@@ -4632,10 +4794,19 @@ static void
 decode_coding_raw_text (coding)
      struct coding_system *coding;
 {
 decode_coding_raw_text (coding)
      struct coding_system *coding;
 {
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+
   coding->chars_at_source = 1;
   coding->chars_at_source = 1;
-  coding->consumed_char = 0;
-  coding->consumed = 0;
-  record_conversion_result (coding, CODING_RESULT_SUCCESS);
+  coding->consumed_char = coding->src_chars;
+  coding->consumed = coding->src_bytes;
+  if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
+    {
+      coding->consumed_char--;
+      coding->consumed--;
+      record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
+    }
+  else
+    record_conversion_result (coding, CODING_RESULT_SUCCESS);
 }
 
 static int
 }
 
 static int
@@ -4829,6 +5000,8 @@ decode_coding_charset (coding)
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attrs, charset_list);
   valids = AREF (attrs, coding_attr_charset_valids);
 
   CODING_GET_INFO (coding, attrs, charset_list);
   valids = AREF (attrs, coding_attr_charset_valids);
@@ -4848,7 +5021,17 @@ decode_coding_charset (coding)
       if (charbuf >= charbuf_end)
        break;
 
       if (charbuf >= charbuf_end)
        break;
 
-      ONE_MORE_BYTE (c);
+      if (byte_after_cr >= 0)
+       {
+         c = byte_after_cr;
+         byte_after_cr = -1;
+       }
+      else
+       {
+         ONE_MORE_BYTE (c);
+         if (eol_crlf && c == '\r')
+           ONE_MORE_BYTE (byte_after_cr);
+       }
       if (c < 0)
        goto invalid_code;
       code = c;
       if (c < 0)
        goto invalid_code;
       code = c;
@@ -5529,32 +5712,53 @@ detect_coding (coding)
     {
       int c, i;
       struct coding_detection_info detect_info;
     {
       int c, i;
       struct coding_detection_info detect_info;
+      int null_byte_found = 0, eight_bit_found = 0;
 
       detect_info.checked = detect_info.found = detect_info.rejected = 0;
 
       detect_info.checked = detect_info.found = detect_info.rejected = 0;
-      for (i = 0, src = coding->source; src < src_end; i++, src++)
+      coding->head_ascii = -1;
+      for (src = coding->source; src < src_end; src++)
        {
          c = *src;
          if (c & 0x80)
        {
          c = *src;
          if (c & 0x80)
-           break;
-         if (c < 0x20
-             && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
-             && ! inhibit_iso_escape_detection
-             && ! detect_info.checked)
            {
            {
-             coding->head_ascii = src - (coding->source + coding->consumed);
-             if (detect_coding_iso_2022 (coding, &detect_info))
+             eight_bit_found = 1;
+             if (coding->head_ascii < 0)
+               coding->head_ascii = src - coding->source;
+             if (null_byte_found)
+               break;
+           }
+         else if (c < 0x20)
+           {
+             if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+                 && ! inhibit_iso_escape_detection
+                 && ! detect_info.checked)
                {
                {
-                 /* We have scanned the whole data.  */
-                 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
-                   /* We didn't find an 8-bit code.  */
-                   src = src_end;
-                 break;
+                 if (coding->head_ascii < 0)
+                   coding->head_ascii = src - coding->source;
+                 if (detect_coding_iso_2022 (coding, &detect_info))
+                   {
+                     /* We have scanned the whole data.  */
+                     if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
+                       /* We didn't find an 8-bit code.  We may have
+                          found a null-byte, but it's very rare that
+                          a binary file confirm to ISO-2022.  */
+                       src = src_end;
+                     break;
+                   }
+               }
+             else if (! c)
+               {
+                 null_byte_found = 1;
+                 if (eight_bit_found)
+                   break;
                }
            }
        }
                }
            }
        }
-      coding->head_ascii = src - (coding->source + coding->consumed);
+      if (coding->head_ascii < 0)
+       coding->head_ascii = src - coding->source;
 
 
-      if (coding->head_ascii < coding->src_bytes
+      if (null_byte_found || eight_bit_found
+         || coding->head_ascii < coding->src_bytes
          || detect_info.found)
        {
          enum coding_category category;
          || detect_info.found)
        {
          enum coding_category category;
@@ -5570,48 +5774,58 @@ detect_coding (coding)
                  break;
              }
          else
                  break;
              }
          else
-           for (i = 0; i < coding_category_raw_text; i++)
-             {
-               category = coding_priorities[i];
-               this = coding_categories + category;
-               if (this->id < 0)
-                 {
-                   /* No coding system of this category is defined.  */
-                   detect_info.rejected |= (1 << category);
-                 }
-               else if (category >= coding_category_raw_text)
-                 continue;
-               else if (detect_info.checked & (1 << category))
-                 {
-                   if (detect_info.found & (1 << category))
-                     break;
-                 }
-               else if ((*(this->detector)) (coding, &detect_info)
-                        && detect_info.found & (1 << category))
-                 {
-                   if (category == coding_category_utf_16_auto)
-                     {
-                       if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
-                         category = coding_category_utf_16_le;
-                       else
-                         category = coding_category_utf_16_be;
-                     }
-                   break;
-                 }
-             }
-         
-         if (i < coding_category_raw_text)
-           setup_coding_system (CODING_ID_NAME (this->id), coding);
-         else if (detect_info.rejected == CATEGORY_MASK_ANY)
-           setup_coding_system (Qraw_text, coding);
-         else if (detect_info.rejected)
-           for (i = 0; i < coding_category_raw_text; i++)
-             if (! (detect_info.rejected & (1 << coding_priorities[i])))
+           {
+             if (null_byte_found)
                {
                {
-                 this = coding_categories + coding_priorities[i];
-                 setup_coding_system (CODING_ID_NAME (this->id), coding);
-                 break;
+                 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
+                 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
                }
                }
+             for (i = 0; i < coding_category_raw_text; i++)
+               {
+                 category = coding_priorities[i];
+                 this = coding_categories + category;
+                 if (this->id < 0)
+                   {
+                     /* No coding system of this category is defined.  */
+                     detect_info.rejected |= (1 << category);
+                   }
+                 else if (category >= coding_category_raw_text)
+                   continue;
+                 else if (detect_info.checked & (1 << category))
+                   {
+                     if (detect_info.found & (1 << category))
+                       break;
+                   }
+                 else if ((*(this->detector)) (coding, &detect_info)
+                          && detect_info.found & (1 << category))
+                   {
+                     if (category == coding_category_utf_16_auto)
+                       {
+                         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+                           category = coding_category_utf_16_le;
+                         else
+                           category = coding_category_utf_16_be;
+                       }
+                     break;
+                   }
+               }
+         
+             if (i < coding_category_raw_text)
+               setup_coding_system (CODING_ID_NAME (this->id), coding);
+             else if (null_byte_found)
+               setup_coding_system (Qno_conversion, coding);
+             else if ((detect_info.rejected & CATEGORY_MASK_ANY)
+                      == CATEGORY_MASK_ANY)
+               setup_coding_system (Qraw_text, coding);
+             else if (detect_info.rejected)
+               for (i = 0; i < coding_category_raw_text; i++)
+                 if (! (detect_info.rejected & (1 << coding_priorities[i])))
+                   {
+                     this = coding_categories + coding_priorities[i];
+                     setup_coding_system (CODING_ID_NAME (this->id), coding);
+                     break;
+                   }
+           }
        }
     }
   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
        }
     }
   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
@@ -5717,7 +5931,10 @@ decode_eol (coding)
                  pos_end--;
                }
              pos++;
                  pos_end--;
                }
              pos++;
-             pos_byte += BYTES_BY_CHAR_HEAD (*p);
+             if (coding->dst_multibyte)
+               pos_byte += BYTES_BY_CHAR_HEAD (*p);
+             else
+               pos_byte++;
            }
        }
       coding->produced -= n;
            }
        }
       coding->produced -= n;
@@ -5877,19 +6094,21 @@ produce_chars (coding, translation_table, last_block)
 {
   unsigned char *dst = coding->destination + coding->produced;
   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 {
   unsigned char *dst = coding->destination + coding->produced;
   unsigned char *dst_end = coding->destination + coding->dst_bytes;
-  int produced;
-  int produced_chars = 0;
+  EMACS_INT produced;
+  EMACS_INT produced_chars = 0;
   int carryover = 0;
 
   if (! coding->chars_at_source)
     {
   int carryover = 0;
 
   if (! coding->chars_at_source)
     {
-      /* Characters are in coding->charbuf.  */
+      /* Source characters are in coding->charbuf.  */
       int *buf = coding->charbuf;
       int *buf_end = buf + coding->charbuf_used;
 
       int *buf = coding->charbuf;
       int *buf_end = buf + coding->charbuf_used;
 
-      if (BUFFERP (coding->src_object)
-         && EQ (coding->src_object, coding->dst_object))
-       dst_end = ((unsigned char *) coding->source) + coding->consumed;
+      if (EQ (coding->src_object, coding->dst_object))
+       {
+         coding_set_source (coding);
+         dst_end = ((unsigned char *) coding->source) + coding->consumed;
+       }
 
       while (buf < buf_end)
        {
 
       while (buf < buf_end)
        {
@@ -5916,7 +6135,13 @@ produce_chars (coding, translation_table, last_block)
                                           buf_end - buf
                                           + MAX_MULTIBYTE_LENGTH * to_nchars,
                                           dst);
                                           buf_end - buf
                                           + MAX_MULTIBYTE_LENGTH * to_nchars,
                                           dst);
-                 dst_end = coding->destination + coding->dst_bytes;
+                 if (EQ (coding->src_object, coding->dst_object))
+                   {
+                     coding_set_source (coding);
+                     dst_end = ((unsigned char *) coding->source) + coding->consumed;
+                   }
+                 else
+                   dst_end = coding->destination + coding->dst_bytes;
                }
 
              for (i = 0; i < to_nchars; i++)
                }
 
              for (i = 0; i < to_nchars; i++)
@@ -5925,7 +6150,7 @@ produce_chars (coding, translation_table, last_block)
                    c = XINT (AREF (trans, i));
                  if (coding->dst_multibyte
                      || ! CHAR_BYTE8_P (c))
                    c = XINT (AREF (trans, i));
                  if (coding->dst_multibyte
                      || ! CHAR_BYTE8_P (c))
-                   CHAR_STRING_ADVANCE (c, dst);
+                   CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
                  else
                    *dst++ = CHAR_TO_BYTE8 (c);
                }
                  else
                    *dst++ = CHAR_TO_BYTE8 (c);
                }
@@ -5942,18 +6167,18 @@ produce_chars (coding, translation_table, last_block)
     }
   else
     {
     }
   else
     {
+      /* Source characters are at coding->source.  */
       const unsigned char *src = coding->source;
       const unsigned char *src = coding->source;
-      const unsigned char *src_end = src + coding->src_bytes;
-      Lisp_Object eol_type;
-
-      eol_type = CODING_ID_EOL_TYPE (coding->id);
+      const unsigned char *src_end = src + coding->consumed;
 
 
+      if (EQ (coding->dst_object, coding->src_object))
+       dst_end = (unsigned char *) src;
       if (coding->src_multibyte != coding->dst_multibyte)
        {
          if (coding->src_multibyte)
            {
              int multibytep = 1;
       if (coding->src_multibyte != coding->dst_multibyte)
        {
          if (coding->src_multibyte)
            {
              int multibytep = 1;
-             int consumed_chars;
+             EMACS_INT consumed_chars;
 
              while (1)
                {
 
              while (1)
                {
@@ -5961,37 +6186,23 @@ produce_chars (coding, translation_table, last_block)
                  int c;
 
                  ONE_MORE_BYTE (c);
                  int c;
 
                  ONE_MORE_BYTE (c);
-                 if (c == '\r')
+                 if (dst == dst_end)
                    {
                    {
-                     if (EQ (eol_type, Qdos))
+                     if (EQ (coding->src_object, coding->dst_object))
+                       dst_end = (unsigned char *) src;
+                     if (dst == dst_end)
                        {
                        {
-                         if (src == src_end)
-                           {
-                             record_conversion_result
-                               (coding, CODING_RESULT_INSUFFICIENT_SRC);
-                             goto no_more_source;
-                           }
-                         if (*src == '\n')
-                           c = *src++;
+                         EMACS_INT offset = src - coding->source;
+
+                         dst = alloc_destination (coding, src_end - src + 1,
+                                                  dst);
+                         dst_end = coding->destination + coding->dst_bytes;
+                         coding_set_source (coding);
+                         src = coding->source + offset;
+                         src_end = coding->source + coding->src_bytes;
+                         if (EQ (coding->src_object, coding->dst_object))
+                           dst_end = (unsigned char *) src;
                        }
                        }
-                     else if (EQ (eol_type, Qmac))
-                       c = '\n';
-                   }
-                 if (dst == dst_end)
-                   {
-                     coding->consumed = src - coding->source;
-
-                   if (EQ (coding->src_object, coding->dst_object))
-                     dst_end = (unsigned char *) src;
-                   if (dst == dst_end)
-                     {
-                       dst = alloc_destination (coding, src_end - src + 1,
-                                                dst);
-                       dst_end = coding->destination + coding->dst_bytes;
-                       coding_set_source (coding);
-                       src = coding->source + coding->consumed;
-                       src_end = coding->source + coding->src_bytes;
-                     }
                    }
                  *dst++ = c;
                  produced_chars++;
                    }
                  *dst++ = c;
                  produced_chars++;
@@ -6005,31 +6216,26 @@ produce_chars (coding, translation_table, last_block)
                int multibytep = 1;
                int c = *src++;
 
                int multibytep = 1;
                int c = *src++;
 
-               if (c == '\r')
-                 {
-                   if (EQ (eol_type, Qdos))
-                     {
-                       if (src < src_end
-                           && *src == '\n')
-                         c = *src++;
-                     }
-                   else if (EQ (eol_type, Qmac))
-                     c = '\n';
-                 }
                if (dst >= dst_end - 1)
                  {
                if (dst >= dst_end - 1)
                  {
-                   coding->consumed = src - coding->source;
-
                    if (EQ (coding->src_object, coding->dst_object))
                      dst_end = (unsigned char *) src;
                    if (dst >= dst_end - 1)
                      {
                    if (EQ (coding->src_object, coding->dst_object))
                      dst_end = (unsigned char *) src;
                    if (dst >= dst_end - 1)
                      {
-                       dst = alloc_destination (coding, src_end - src + 2,
-                                                dst);
+                       EMACS_INT offset = src - coding->source;
+                       EMACS_INT more_bytes;
+
+                       if (EQ (coding->src_object, coding->dst_object))
+                         more_bytes = ((src_end - src) / 2) + 2;
+                       else
+                         more_bytes = src_end - src + 2;
+                       dst = alloc_destination (coding, more_bytes, dst);
                        dst_end = coding->destination + coding->dst_bytes;
                        coding_set_source (coding);
                        dst_end = coding->destination + coding->dst_bytes;
                        coding_set_source (coding);
-                       src = coding->source + coding->consumed;
+                       src = coding->source + offset;
                        src_end = coding->source + coding->src_bytes;
                        src_end = coding->source + coding->src_bytes;
+                       if (EQ (coding->src_object, coding->dst_object))
+                         dst_end = (unsigned char *) src;
                      }
                  }
                EMIT_ONE_BYTE (c);
                      }
                  }
                EMIT_ONE_BYTE (c);
@@ -6039,7 +6245,7 @@ produce_chars (coding, translation_table, last_block)
        {
          if (!EQ (coding->src_object, coding->dst_object))
            {
        {
          if (!EQ (coding->src_object, coding->dst_object))
            {
-             int require = coding->src_bytes - coding->dst_bytes;
+             EMACS_INT require = coding->src_bytes - coding->dst_bytes;
 
              if (require > 0)
                {
 
              if (require > 0)
                {
@@ -6051,28 +6257,10 @@ produce_chars (coding, translation_table, last_block)
                  src_end = coding->source + coding->src_bytes;
                }
            }
                  src_end = coding->source + coding->src_bytes;
                }
            }
-         produced_chars = coding->src_chars;
+         produced_chars = coding->consumed_char;
          while (src < src_end)
          while (src < src_end)
-           {
-             int c = *src++;
-
-             if (c == '\r')
-               {
-                 if (EQ (eol_type, Qdos))
-                   {
-                     if (src < src_end
-                         && *src == '\n')
-                       c = *src++;
-                     produced_chars--;
-                   }
-                 else if (EQ (eol_type, Qmac))
-                   c = '\n';
-               }
-             *dst++ = c;
-           }
+           *dst++ = *src++;
        }
        }
-      coding->consumed = coding->src_bytes;
-      coding->consumed_char = coding->src_chars;
     }
 
   produced = dst - (coding->destination + coding->produced);
     }
 
   produced = dst - (coding->destination + coding->produced);
@@ -6535,12 +6723,12 @@ consume_chars (coding, translation_table, max_lookup)
          if (coding->encoder == encode_coding_raw_text)
            c = *src++, pos++;
          else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
          if (coding->encoder == encode_coding_raw_text)
            c = *src++, pos++;
          else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
-           c = STRING_CHAR_ADVANCE (src), pos += bytes;
+           c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
          else
            c = BYTE8_TO_CHAR (*src), src++, pos++;
        }
       else
          else
            c = BYTE8_TO_CHAR (*src), src++, pos++;
        }
       else
-       c = STRING_CHAR_ADVANCE (src), pos++;
+       c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
        c = '\n';
       if (! EQ (eol_type, Qunix))
       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
        c = '\n';
       if (! EQ (eol_type, Qunix))
@@ -6849,10 +7037,11 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
   EMACS_INT chars = to - from;
   EMACS_INT bytes = to_byte - from_byte;
   Lisp_Object attrs;
   EMACS_INT chars = to - from;
   EMACS_INT bytes = to_byte - from_byte;
   Lisp_Object attrs;
-  Lisp_Object buffer;
   int saved_pt = -1, saved_pt_byte;
   int saved_pt = -1, saved_pt_byte;
+  int need_marker_adjustment = 0;
+  Lisp_Object old_deactivate_mark;
 
 
-  buffer = Fcurrent_buffer ();
+  old_deactivate_mark = Vdeactivate_mark;
 
   if (NILP (dst_object))
     {
 
   if (NILP (dst_object))
     {
@@ -6877,8 +7066,17 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
        move_gap_both (from, from_byte);
       if (EQ (src_object, dst_object))
        {
        move_gap_both (from, from_byte);
       if (EQ (src_object, dst_object))
        {
+         struct Lisp_Marker *tail;
+
+         for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
+           {
+             tail->need_adjustment
+               = tail->charpos == (tail->insertion_type ? from : to);
+             need_marker_adjustment |= tail->need_adjustment;
+           }
          saved_pt = PT, saved_pt_byte = PT_BYTE;
          TEMP_SET_PT_BOTH (from, from_byte);
          saved_pt = PT, saved_pt_byte = PT_BYTE;
          TEMP_SET_PT_BOTH (from, from_byte);
+         current_buffer->text->inhibit_shrinking = 1;
          del_range_both (from, from_byte, to, to_byte, 1);
          coding->src_pos = -chars;
          coding->src_pos_byte = -bytes;
          del_range_both (from, from_byte, to, to_byte, 1);
          coding->src_pos = -chars;
          coding->src_pos_byte = -bytes;
@@ -6898,10 +7096,10 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
       || (! NILP (CODING_ATTR_POST_READ (attrs))
          && NILP (dst_object)))
     {
       || (! NILP (CODING_ATTR_POST_READ (attrs))
          && NILP (dst_object)))
     {
-      coding->dst_object = code_conversion_save (1, 1);
+      coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
+      coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
       coding->dst_pos = BEG;
       coding->dst_pos_byte = BEG_BYTE;
       coding->dst_pos = BEG;
       coding->dst_pos_byte = BEG_BYTE;
-      coding->dst_multibyte = 1;
     }
   else if (BUFFERP (dst_object))
     {
     }
   else if (BUFFERP (dst_object))
     {
@@ -6916,6 +7114,9 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
     {
       code_conversion_save (0, 0);
       coding->dst_object = Qnil;
     {
       code_conversion_save (0, 0);
       coding->dst_object = Qnil;
+      /* Most callers presume this will return a multibyte result, and they
+        won't use `binary' or `raw-text' anyway, so let's not worry about
+        CODING_FOR_UNIBYTE.  */
       coding->dst_multibyte = 1;
     }
 
       coding->dst_multibyte = 1;
     }
 
@@ -6926,12 +7127,13 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
 
   if (! NILP (CODING_ATTR_POST_READ (attrs)))
     {
 
   if (! NILP (CODING_ATTR_POST_READ (attrs)))
     {
-      struct gcpro gcpro1, gcpro2;
+      struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
       Lisp_Object val;
 
       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
       Lisp_Object val;
 
       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
-      GCPRO2 (coding->src_object, coding->dst_object);
+      GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
+             old_deactivate_mark);
       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
                        make_number (coding->produced_char));
       UNGCPRO;
       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
                        make_number (coding->produced_char));
       UNGCPRO;
@@ -6949,8 +7151,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
       set_buffer_internal (XBUFFER (coding->dst_object));
       if (dst_bytes < coding->produced)
        {
       set_buffer_internal (XBUFFER (coding->dst_object));
       if (dst_bytes < coding->produced)
        {
-         destination
-           = (unsigned char *) xrealloc (destination, coding->produced);
+         destination = xrealloc (destination, coding->produced);
          if (! destination)
            {
              record_conversion_result (coding,
          if (! destination)
            {
              record_conversion_result (coding,
@@ -6972,6 +7173,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
         As we have moved PT while replacing the original buffer
         contents, we must recover it now.  */
       set_buffer_internal (XBUFFER (src_object));
         As we have moved PT while replacing the original buffer
         contents, we must recover it now.  */
       set_buffer_internal (XBUFFER (src_object));
+      current_buffer->text->inhibit_shrinking = 0;
       if (saved_pt < from)
        TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
       else if (saved_pt < from + chars)
       if (saved_pt < from)
        TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
       else if (saved_pt < from + chars)
@@ -6982,8 +7184,32 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
       else
        TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
                          saved_pt_byte + (coding->produced - bytes));
       else
        TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
                          saved_pt_byte + (coding->produced - bytes));
+
+      if (need_marker_adjustment)
+       {
+         struct Lisp_Marker *tail;
+
+         for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
+           if (tail->need_adjustment)
+             {
+               tail->need_adjustment = 0;
+               if (tail->insertion_type)
+                 {
+                   tail->bytepos = from_byte;
+                   tail->charpos = from;
+                 }
+               else
+                 {
+                   tail->bytepos = from_byte + coding->produced;
+                   tail->charpos
+                     = (NILP (current_buffer->enable_multibyte_characters)
+                        ? tail->bytepos : from + coding->produced_char);
+                 }
+             }
+       }
     }
 
     }
 
+  Vdeactivate_mark = old_deactivate_mark;
   unbind_to (count, coding->dst_object);
 }
 
   unbind_to (count, coding->dst_object);
 }
 
@@ -7000,11 +7226,12 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
   EMACS_INT chars = to - from;
   EMACS_INT bytes = to_byte - from_byte;
   Lisp_Object attrs;
   EMACS_INT chars = to - from;
   EMACS_INT bytes = to_byte - from_byte;
   Lisp_Object attrs;
-  Lisp_Object buffer;
   int saved_pt = -1, saved_pt_byte;
   int saved_pt = -1, saved_pt_byte;
+  int need_marker_adjustment = 0;
   int kill_src_buffer = 0;
   int kill_src_buffer = 0;
+  Lisp_Object old_deactivate_mark;
 
 
-  buffer = Fcurrent_buffer ();
+  old_deactivate_mark = Vdeactivate_mark;
 
   coding->src_object = src_object;
   coding->src_chars = chars;
 
   coding->src_object = src_object;
   coding->src_chars = chars;
@@ -7013,6 +7240,18 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
 
   attrs = CODING_ID_ATTRS (coding->id);
 
 
   attrs = CODING_ID_ATTRS (coding->id);
 
+  if (EQ (src_object, dst_object))
+    {
+      struct Lisp_Marker *tail;
+
+      for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
+       {
+         tail->need_adjustment
+           = tail->charpos == (tail->insertion_type ? from : to);
+         need_marker_adjustment |= tail->need_adjustment;
+       }
+    }
+
   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
     {
       coding->src_object = code_conversion_save (1, coding->src_multibyte);
   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
     {
       coding->src_object = code_conversion_save (1, coding->src_multibyte);
@@ -7034,11 +7273,15 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
 
       {
        Lisp_Object args[3];
 
       {
        Lisp_Object args[3];
+       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
 
 
+       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
+               old_deactivate_mark);
        args[0] = CODING_ATTR_PRE_WRITE (attrs);
        args[1] = make_number (BEG);
        args[2] = make_number (Z);
        safe_call (3, args);
        args[0] = CODING_ATTR_PRE_WRITE (attrs);
        args[1] = make_number (BEG);
        args[2] = make_number (Z);
        safe_call (3, args);
+       UNGCPRO;
       }
       if (XBUFFER (coding->src_object) != current_buffer)
        kill_src_buffer = 1;
       }
       if (XBUFFER (coding->src_object) != current_buffer)
        kill_src_buffer = 1;
@@ -7142,10 +7385,35 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
       else
        TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
                          saved_pt_byte + (coding->produced - bytes));
       else
        TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
                          saved_pt_byte + (coding->produced - bytes));
+
+      if (need_marker_adjustment)
+       {
+         struct Lisp_Marker *tail;
+
+         for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
+           if (tail->need_adjustment)
+             {
+               tail->need_adjustment = 0;
+               if (tail->insertion_type)
+                 {
+                   tail->bytepos = from_byte;
+                   tail->charpos = from;
+                 }
+               else
+                 {
+                   tail->bytepos = from_byte + coding->produced;
+                   tail->charpos
+                     = (NILP (current_buffer->enable_multibyte_characters)
+                        ? tail->bytepos : from + coding->produced_char);
+                 }
+             }
+       }
     }
 
   if (kill_src_buffer)
     Fkill_buffer (coding->src_object);
     }
 
   if (kill_src_buffer)
     Fkill_buffer (coding->src_object);
+
+  Vdeactivate_mark = old_deactivate_mark;
   unbind_to (count, Qnil);
 }
 
   unbind_to (count, Qnil);
 }
 
@@ -7258,7 +7526,8 @@ Lisp_Object
 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
                      coding_system)
      const unsigned char *src;
 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
                      coding_system)
      const unsigned char *src;
-     int src_chars, src_bytes, highest;
+     EMACS_INT src_chars, src_bytes;
+     int highest;
      int multibytep;
      Lisp_Object coding_system;
 {
      int multibytep;
      Lisp_Object coding_system;
 {
@@ -7269,6 +7538,7 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
   int id;
   struct coding_detection_info detect_info;
   enum coding_category base_category;
   int id;
   struct coding_detection_info detect_info;
   enum coding_category base_category;
+  int null_byte_found = 0, eight_bit_found = 0;
 
   if (NILP (coding_system))
     coding_system = Qundecided;
 
   if (NILP (coding_system))
     coding_system = Qundecided;
@@ -7294,33 +7564,54 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
       struct coding_system *this;
       int c, i;
 
       struct coding_system *this;
       int c, i;
 
+      coding.head_ascii = -1;
       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
-      for (i = 0; src < src_end; i++, src++)
+      for (; src < src_end; src++)
        {
          c = *src;
          if (c & 0x80)
        {
          c = *src;
          if (c & 0x80)
-           break;
-         if (c < 0x20
-             && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
-             && ! inhibit_iso_escape_detection)
            {
            {
-             coding.head_ascii = src - coding.source;
-             if (detect_coding_iso_2022 (&coding, &detect_info))
+             eight_bit_found = 1;
+             if (coding.head_ascii < 0)
+               coding.head_ascii = src - coding.source;
+             if (null_byte_found)
+               break;
+           }
+         if (c < 0x20)
+           {
+             if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+                 && ! inhibit_iso_escape_detection
+                 && ! detect_info.checked)
                {
                {
-                 /* We have scanned the whole data.  */
-                 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
-                   /* We didn't find an 8-bit code.  */
-                   src = src_end;
-                 break;
+                 if (coding.head_ascii < 0)
+                   coding.head_ascii = src - coding.source;
+                 if (detect_coding_iso_2022 (&coding, &detect_info))
+                   {
+                     /* We have scanned the whole data.  */
+                     if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
+                       /* We didn't find an 8-bit code.  We may have
+                          found a null-byte, but it's very rare that
+                          a binary file confirm to ISO-2022.  */
+                       src = src_end;
+                     break;
+                   }
+               }
+             else if (! c)
+               {
+                 null_byte_found = 1;
+                 if (eight_bit_found)
+                   break;
                }
            }
        }
                }
            }
        }
-      coding.head_ascii = src - coding.source;
+      if (coding.head_ascii < 0)
+       coding.head_ascii = src - coding.source;
 
 
-      if (src < src_end
+      if (null_byte_found || eight_bit_found
+         || coding.head_ascii < coding.src_bytes
          || detect_info.found)
        {
          || detect_info.found)
        {
-         if (src == src_end)
+         if (coding.head_ascii == coding.src_bytes)
            /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
            for (i = 0; i < coding_category_raw_text; i++)
              {
            /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
            for (i = 0; i < coding_category_raw_text; i++)
              {
@@ -7330,44 +7621,48 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
                  break;
              }
          else
                  break;
              }
          else
-           for (i = 0; i < coding_category_raw_text; i++)
-             {
-               category = coding_priorities[i];
-               this = coding_categories + category;
+           {
+             if (null_byte_found)
+               {
+                 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
+                 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
+               }
+             for (i = 0; i < coding_category_raw_text; i++)
+               {
+                 category = coding_priorities[i];
+                 this = coding_categories + category;
 
 
-               if (this->id < 0)
-                 {
-                   /* No coding system of this category is defined.  */
-                   detect_info.rejected |= (1 << category);
-                 }
-               else if (category >= coding_category_raw_text)
-                 continue;
-               else if (detect_info.checked & (1 << category))
-                 {
-                   if (highest
-                       && (detect_info.found & (1 << category)))
-                     break;
-                 }
-               else
-                 {
-                   if ((*(this->detector)) (&coding, &detect_info)
-                       && highest
-                       && (detect_info.found & (1 << category)))
-                     {
-                       if (category == coding_category_utf_16_auto)
-                         {
-                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
-                             category = coding_category_utf_16_le;
-                           else
-                             category = coding_category_utf_16_be;
-                         }
+                 if (this->id < 0)
+                   {
+                     /* No coding system of this category is defined.  */
+                     detect_info.rejected |= (1 << category);
+                   }
+                 else if (category >= coding_category_raw_text)
+                   continue;
+                 else if (detect_info.checked & (1 << category))
+                   {
+                     if (highest
+                         && (detect_info.found & (1 << category)))
                        break;
                        break;
-                     }
-                 }
-             }
+                   }
+                 else if ((*(this->detector)) (&coding, &detect_info)
+                          && highest
+                          && (detect_info.found & (1 << category)))
+                   {
+                     if (category == coding_category_utf_16_auto)
+                       {
+                         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+                           category = coding_category_utf_16_le;
+                         else
+                           category = coding_category_utf_16_be;
+                       }
+                     break;
+                   }
+               }
+           }
        }
 
        }
 
-      if (detect_info.rejected == CATEGORY_MASK_ANY)
+      if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY)
        {
          detect_info.found = CATEGORY_MASK_RAW_TEXT;
          id = coding_categories[coding_category_raw_text].id;
        {
          detect_info.found = CATEGORY_MASK_RAW_TEXT;
          id = coding_categories[coding_category_raw_text].id;
@@ -7456,8 +7751,13 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
     if (VECTORP (eol_type))
       {
        if (detect_info.found & ~CATEGORY_MASK_UTF_16)
     if (VECTORP (eol_type))
       {
        if (detect_info.found & ~CATEGORY_MASK_UTF_16)
-         normal_eol = detect_eol (coding.source, src_bytes,
-                                  coding_category_raw_text);
+         {
+           if (null_byte_found)
+             normal_eol = EOL_SEEN_LF;
+           else
+             normal_eol = detect_eol (coding.source, src_bytes,
+                                      coding_category_raw_text);
+         }
        if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
                                 | CATEGORY_MASK_UTF_16_BE_NOSIG))
          utf_16_be_eol = detect_eol (coding.source, src_bytes,
        if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
                                 | CATEGORY_MASK_UTF_16_BE_NOSIG))
          utf_16_be_eol = detect_eol (coding.source, src_bytes,
@@ -9742,7 +10042,9 @@ Function to call to select safe coding system for encoding a text.
 
 If set, this function is called to force a user to select a proper
 coding system which can encode the text in the case that a default
 
 If set, this function is called to force a user to select a proper
 coding system which can encode the text in the case that a default
-coding system used in each operation can't encode the text.
+coding system used in each operation can't encode the text.  The
+function should take care that the buffer is not modified while
+the coding system is being selected.
 
 The default value is `select-safe-coding-system' (which see).  */);
   Vselect_safe_coding_system_function = Qnil;
 
 The default value is `select-safe-coding-system' (which see).  */);
   Vselect_safe_coding_system_function = Qnil;