declare smobs in alloc.c

[bpt/emacs.git] / src / coding.c
diff --git a/src/coding.c b/src/coding.c

index 497c26d..fc21795 100644 (file)
--- a/src/coding.c
+++ b/src/coding.c
@@ -1,5 +1,5 @@
  /* Coding system handler (conversion, detection, etc).
-   Copyright (C) 2001-2013 Free Software Foundation, Inc.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
     Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
       2005, 2006, 2007, 2008, 2009, 2010, 2011
       National Institute of Advanced Industrial Science and Technology (AIST)
@@ -493,6 +493,8 @@ enum iso_code_class_type
  
  #define CODING_ISO_FLAG_USE_OLDJIS     0x10000
  
+#define CODING_ISO_FLAG_LEVEL_4                0x20000
+
  #define CODING_ISO_FLAG_FULL_SUPPORT   0x100000
  
  /* A character to be produced on output if encoding of the original
@@ -649,6 +651,23 @@ static struct coding_system coding_categories[coding_category_max];
  #define max(a, b) ((a) > (b) ? (a) : (b))
  #endif
  
+/* Encode a flag that can be nil, something else, or t as -1, 0, 1.  */
+
+static int
+encode_inhibit_flag (Lisp_Object flag)
+{
+  return NILP (flag) ? -1 : EQ (flag, Qt);
+}
+
+/* True if the value of ENCODED_FLAG says a flag should be treated as set.
+   1 means yes, -1 means no, 0 means ask the user variable VAR.  */
+
+static bool
+inhibit_flag (int encoded_flag, bool var)
+{
+  return 0 < encoded_flag + var;
+}
+
  #define CODING_GET_INFO(coding, attrs, charset_list)   \
    do {                                                 \
      (attrs) = CODING_ID_ATTRS ((coding)->id);          \
@@ -1183,7 +1202,7 @@ detect_coding_utf_8 (struct coding_system *coding,
    bool multibytep = coding->src_multibyte;
    ptrdiff_t consumed_chars = 0;
    bool bom_found = 0;
-  int nchars = coding->head_ascii;
+  ptrdiff_t nchars = coding->head_ascii;
    int eol_seen = coding->eol_seen;
  
    detect_info->checked |= CATEGORY_MASK_UTF_8;
@@ -1281,6 +1300,7 @@ detect_coding_utf_8 (struct coding_system *coding,
            means that we found a valid non-ASCII characters.  */
         detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG;
      }
+  coding->detected_utf8_bytes = src_base - coding->source;
    coding->detected_utf8_chars = nchars;
    return 1;
  }
@@ -1346,6 +1366,45 @@ decode_coding_utf_8 (struct coding_system *coding)
           break;
         }
  
+      /* In the simple case, rapidly handle ordinary characters */
+      if (multibytep && ! eol_dos
+         && charbuf < charbuf_end - 6 && src < src_end - 6)
+       {
+         while (charbuf < charbuf_end - 6 && src < src_end - 6)
+           {
+             c1 = *src;
+             if (c1 & 0x80)
+               break;
+             src++;
+             consumed_chars++;
+             *charbuf++ = c1;
+
+             c1 = *src;
+             if (c1 & 0x80)
+               break;
+             src++;
+             consumed_chars++;
+             *charbuf++ = c1;
+
+             c1 = *src;
+             if (c1 & 0x80)
+               break;
+             src++;
+             consumed_chars++;
+             *charbuf++ = c1;
+
+             c1 = *src;
+             if (c1 & 0x80)
+               break;
+             src++;
+             consumed_chars++;
+             *charbuf++ = c1;
+           }
+         /* If we handled at least one character, restart the main loop.  */
+         if (src != src_base)
+           continue;
+       }
+
        if (byte_after_cr >= 0)
         c1 = byte_after_cr, byte_after_cr = -1;
        else
@@ -1426,7 +1485,7 @@ decode_coding_utf_8 (struct coding_system *coding)
        src = src_base;
        consumed_chars = consumed_chars_base;
        ONE_MORE_BYTE (c);
-      *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+      *charbuf++ = ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
        coding->errors++;
      }
  
@@ -1490,8 +1549,8 @@ encode_coding_utf_8 (struct coding_system *coding)
             *dst++ = CHAR_TO_BYTE8 (c);
           else
             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
-         produced_chars++;
         }
+      produced_chars = dst - (coding->destination + coding->produced);
      }
    record_conversion_result (coding, CODING_RESULT_SUCCESS);
    coding->produced_char += produced_chars;
@@ -1666,7 +1725,7 @@ decode_coding_utf_16 (struct coding_system *coding)
         ONE_MORE_BYTE (c2);
        if (c2 < 0)
         {
-         *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+         *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
           *charbuf++ = -c2;
           continue;
         }
@@ -1955,7 +2014,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
    int charset_ID;
    unsigned code;
    int c;
-  int consumed_chars = 0;
+  ptrdiff_t consumed_chars = 0;
    bool mseq_found = 0;
  
    ONE_MORE_BYTE (c);
@@ -2049,7 +2108,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
  
         case 1:
           code = c;
-         charset_ID = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit;
+         charset_ID = ASCII_CHAR_P (code) ? charset_ascii : charset_eight_bit;
           break;
  
         default:
@@ -2537,7 +2596,7 @@ decode_coding_emacs_mule (struct coding_system *coding)
        src = src_base;
        consumed_chars = consumed_chars_base;
        ONE_MORE_BYTE (c);
-      *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+      *charbuf++ = ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
        char_offset++;
        coding->errors++;
      }
@@ -3132,7 +3191,7 @@ detect_coding_iso_2022 (struct coding_system *coding,
               if (! single_shifting
                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
                 {
-                 int len = 1;
+                 ptrdiff_t len = 1;
                   while (src < src_end)
                     {
                       src_base = src;
@@ -3514,7 +3573,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
  
        if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
         {
-         *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+         *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
           char_offset++;
           CODING_ISO_EXTSEGMENT_LEN (coding)--;
           continue;
@@ -3541,7 +3600,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
             }
           else
             {
-             *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+             *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
               char_offset++;
             }
           continue;
@@ -3716,7 +3775,10 @@ decode_coding_iso_2022 (struct coding_system *coding)
               else
                 charset = CHARSET_FROM_ID (charset_id_2);
               ONE_MORE_BYTE (c1);
-             if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
+             if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)
+                 || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
+                     && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4)
+                         ? c1 >= 0x80 : c1 < 0x80)))
                 goto invalid_code;
               break;
  
@@ -3730,7 +3792,10 @@ decode_coding_iso_2022 (struct coding_system *coding)
               else
                 charset = CHARSET_FROM_ID (charset_id_3);
               ONE_MORE_BYTE (c1);
-             if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
+             if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)
+                 || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
+                     && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4)
+                         ? c1 >= 0x80 : c1 < 0x80)))
                 goto invalid_code;
               break;
  
@@ -3909,7 +3974,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
           MAYBE_FINISH_COMPOSITION ();
           for (; src_base < src; src_base++, char_offset++)
             {
-             if (ASCII_BYTE_P (*src_base))
+             if (ASCII_CHAR_P (*src_base))
                 *charbuf++ = *src_base;
               else
                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
@@ -3939,7 +4004,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
        src = src_base;
        consumed_chars = consumed_chars_base;
        ONE_MORE_BYTE (c);
-      *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+      *charbuf++ = c < 0 ? -c : ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
        char_offset++;
        coding->errors++;
        /* Reset the invocation and designation status to the safest
@@ -4392,7 +4457,7 @@ encode_coding_iso_2022 (struct coding_system *coding)
         {
           /* We have to produce designation sequences if any now.  */
           unsigned char desig_buf[16];
-         int nbytes;
+         ptrdiff_t nbytes;
           ptrdiff_t offset;
  
           charset_map_loaded = 0;
@@ -5135,7 +5200,7 @@ decode_coding_ccl (struct coding_system *coding)
           source_charbuf[i++] = *p++;
  
        if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
-       ccl->last_block = 1;
+       ccl->last_block = true;
        /* As ccl_driver calls DECODE_CHAR, buffer may be relocated.  */
        charset_map_loaded = 0;
        ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
@@ -5195,7 +5260,7 @@ encode_coding_ccl (struct coding_system *coding)
    CODING_GET_INFO (coding, attrs, charset_list);
    if (coding->consumed_char == coding->src_chars
        && coding->mode & CODING_MODE_LAST_BLOCK)
-    ccl->last_block = 1;
+    ccl->last_block = true;
  
    do
      {
@@ -5575,7 +5640,7 @@ decode_coding_charset (struct coding_system *coding)
        src = src_base;
        consumed_chars = consumed_chars_base;
        ONE_MORE_BYTE (c);
-      *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+      *charbuf++ = c < 0 ? -c : ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
        char_offset++;
        coding->errors++;
      }
@@ -5697,6 +5762,7 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
    coding->safe_charsets = SDATA (val);
    coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
    coding->carryover_bytes = 0;
+  coding->raw_destination = 0;
  
    coding_type = CODING_ATTR_TYPE (attrs);
    if (EQ (coding_type, Qundecided))
@@ -5705,6 +5771,14 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
        coding->decoder = decode_coding_raw_text;
        coding->encoder = encode_coding_raw_text;
        coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
+      coding->spec.undecided.inhibit_nbd
+       = (encode_inhibit_flag
+          (AREF (attrs, coding_attr_undecided_inhibit_null_byte_detection)));
+      coding->spec.undecided.inhibit_ied
+       = (encode_inhibit_flag
+          (AREF (attrs, coding_attr_undecided_inhibit_iso_escape_detection)));
+      coding->spec.undecided.prefer_utf_8
+       = ! NILP (AREF (attrs, coding_attr_undecided_prefer_utf_8));
      }
    else if (EQ (coding_type, Qiso_2022))
      {
@@ -6138,7 +6212,7 @@ static Lisp_Object adjust_coding_eol_type (struct coding_system *coding,
     EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is
     reliable only when all the source bytes are ASCII.  */
  
-static int
+static ptrdiff_t
  check_ascii (struct coding_system *coding)
  {
    const unsigned char *src, *end;
@@ -6210,12 +6284,12 @@ check_ascii (struct coding_system *coding)
     the value is reliable only when all the source bytes are valid
     UTF-8.  */
  
-static int
+static ptrdiff_t
  check_utf_8 (struct coding_system *coding)
  {
    const unsigned char *src, *end;
    int eol_seen;
-  int nchars = coding->head_ascii;
+  ptrdiff_t nchars = coding->head_ascii;
  
    if (coding->head_ascii < 0)
      check_ascii (coding);
@@ -6462,6 +6536,11 @@ detect_coding (struct coding_system *coding)
        int c, i;
        struct coding_detection_info detect_info;
        bool null_byte_found = 0, eight_bit_found = 0;
+      bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd,
+                                      inhibit_null_byte_detection);
+      bool inhibit_ied = inhibit_flag (coding->spec.undecided.inhibit_ied,
+                                      inhibit_iso_escape_detection);
+      bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8;
  
        coding->head_ascii = 0;
        detect_info.checked = detect_info.found = detect_info.rejected = 0;
@@ -6477,7 +6556,7 @@ detect_coding (struct coding_system *coding)
           else if (c < 0x20)
             {
               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
-                 && ! inhibit_iso_escape_detection
+                 && ! inhibit_ied
                   && ! detect_info.checked)
                 {
                   if (detect_coding_iso_2022 (coding, &detect_info))
@@ -6496,7 +6575,7 @@ detect_coding (struct coding_system *coding)
                       break;
                     }
                 }
-             else if (! c && !inhibit_null_byte_detection)
+             else if (! c && !inhibit_nbd)
                 {
                   null_byte_found = 1;
                   if (eight_bit_found)
@@ -6553,6 +6632,12 @@ detect_coding (struct coding_system *coding)
                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
                 }
+             else if (prefer_utf_8
+                      && detect_coding_utf_8 (coding, &detect_info))
+               {
+                 detect_info.checked |= ~CATEGORY_MASK_UTF_8;
+                 detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
+               }
               for (i = 0; i < coding_category_raw_text; i++)
                 {
                   category = coding_priorities[i];
@@ -6828,11 +6913,9 @@ get_translation_table (Lisp_Object attrs, bool encodep, int *max_lookup)
        if (CHAR_TABLE_P (standard))
         {
           if (CONSP (translation_table))
-           translation_table = nconc2 (translation_table,
-                                       Fcons (standard, Qnil));
+           translation_table = nconc2 (translation_table, list1 (standard));
           else
-           translation_table = Fcons (translation_table,
-                                      Fcons (standard, Qnil));
+           translation_table = list2 (translation_table, standard);
         }
      }
  
@@ -7182,13 +7265,16 @@ produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
                       coding->dst_object);
  }
  
+#define MAX_CHARBUF_SIZE 0x4000
+#define MIN_CHARBUF_SIZE 0x10
  
-#define CHARBUF_SIZE 0x4000
-
-#define ALLOC_CONVERSION_WORK_AREA(coding)                             \
-  do {                                                                 \
-    coding->charbuf = SAFE_ALLOCA (CHARBUF_SIZE * sizeof (int));       \
-    coding->charbuf_size = CHARBUF_SIZE;                               \
+#define ALLOC_CONVERSION_WORK_AREA(coding, size)               \
+  do {                                                         \
+    int units = ((size) > MAX_CHARBUF_SIZE ? MAX_CHARBUF_SIZE  \
+                : (size) < MIN_CHARBUF_SIZE ? MIN_CHARBUF_SIZE \
+                : size);                                       \
+    coding->charbuf = SAFE_ALLOCA ((units) * sizeof (int));    \
+    coding->charbuf_size = (units);                            \
    } while (0)
  
  
@@ -7290,7 +7376,7 @@ decode_coding (struct coding_system *coding)
    record_conversion_result (coding, CODING_RESULT_SUCCESS);
    coding->errors = 0;
  
-  ALLOC_CONVERSION_WORK_AREA (coding);
+  ALLOC_CONVERSION_WORK_AREA (coding, coding->src_bytes);
  
    attrs = CODING_ID_ATTRS (coding->id);
    translation_table = get_translation_table (attrs, 0, NULL);
@@ -7332,7 +7418,7 @@ decode_coding (struct coding_system *coding)
    coding->carryover_bytes = 0;
    if (coding->consumed < coding->src_bytes)
      {
-      int nbytes = coding->src_bytes - coding->consumed;
+      ptrdiff_t nbytes = coding->src_bytes - coding->consumed;
        const unsigned char *src;
  
        coding_set_source (coding);
@@ -7416,7 +7502,7 @@ handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
           /* We found a composition.  Store the corresponding
              annotation data in BUF.  */
           int *head = buf;
-         enum composition_method method = COMPOSITION_METHOD (prop);
+         enum composition_method method = composition_method (prop);
           int nchars = COMPOSITION_LENGTH (prop);
  
           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
@@ -7686,7 +7772,7 @@ encode_coding (struct coding_system *coding)
    record_conversion_result (coding, CODING_RESULT_SUCCESS);
    coding->errors = 0;
  
-  ALLOC_CONVERSION_WORK_AREA (coding);
+  ALLOC_CONVERSION_WORK_AREA (coding, coding->src_chars);
  
    if (coding->encoder == encode_coding_ccl)
      {
@@ -7757,7 +7843,7 @@ make_conversion_work_buffer (bool multibyte)
  }
  
  
-static Lisp_Object
+static void
  code_conversion_restore (Lisp_Object arg)
  {
    Lisp_Object current, workbuf;
@@ -7775,7 +7861,6 @@ code_conversion_restore (Lisp_Object arg)
      }
    set_buffer_internal (XBUFFER (current));
    UNGCPRO;
-  return Qnil;
  }
  
  Lisp_Object
@@ -7794,7 +7879,7 @@ void
  decode_coding_gap (struct coding_system *coding,
                    ptrdiff_t chars, ptrdiff_t bytes)
  {
-  ptrdiff_t count = SPECPDL_INDEX ();
+  dynwind_begin ();
    Lisp_Object attrs;
  
    coding->src_object = Fcurrent_buffer ();
@@ -7809,7 +7894,7 @@ decode_coding_gap (struct coding_system *coding,
    coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
  
    coding->head_ascii = -1;
-  coding->detected_utf8_chars = -1;
+  coding->detected_utf8_bytes = coding->detected_utf8_chars = -1;
    coding->eol_seen = EOL_SEEN_NONE;
    if (CODING_REQUIRE_DETECTION (coding))
      detect_coding (coding);
@@ -7826,7 +7911,8 @@ decode_coding_gap (struct coding_system *coding,
        if (chars != bytes)
         {
           /* There exists a non-ASCII byte.  */
-         if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8))
+         if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8)
+             && coding->detected_utf8_bytes == coding->src_bytes)
             {
               if (coding->detected_utf8_chars >= 0)
                 chars = coding->detected_utf8_chars;
@@ -7887,6 +7973,7 @@ decode_coding_gap (struct coding_system *coding,
           coding->produced = bytes;
           coding->produced_char = chars;
           insert_from_gap (chars, bytes, 1);
+         dynwind_end ();
           return;
         }
      }
@@ -7910,7 +7997,7 @@ decode_coding_gap (struct coding_system *coding,
        coding->produced += Z_BYTE - prev_Z_BYTE;
      }
  
-  unbind_to (count, Qnil);
+  dynwind_end ();
  }
  
  
@@ -7950,7 +8037,7 @@ decode_coding_object (struct coding_system *coding,
                       ptrdiff_t to, ptrdiff_t to_byte,
                       Lisp_Object dst_object)
  {
-  ptrdiff_t count = SPECPDL_INDEX ();
+  dynwind_begin ();
    unsigned char *destination IF_LINT (= NULL);
    ptrdiff_t dst_bytes IF_LINT (= 0);
    ptrdiff_t chars = to - from;
@@ -8123,7 +8210,7 @@ decode_coding_object (struct coding_system *coding,
      }
  
    Vdeactivate_mark = old_deactivate_mark;
-  unbind_to (count, coding->dst_object);
+  dynwind_end ();
  }
  
  
@@ -8134,7 +8221,7 @@ encode_coding_object (struct coding_system *coding,
                       ptrdiff_t to, ptrdiff_t to_byte,
                       Lisp_Object dst_object)
  {
-  ptrdiff_t count = SPECPDL_INDEX ();
+  dynwind_begin ();
    ptrdiff_t chars = to - from;
    ptrdiff_t bytes = to_byte - from_byte;
    Lisp_Object attrs;
@@ -8256,7 +8343,7 @@ encode_coding_object (struct coding_system *coding,
      {
        ptrdiff_t dst_bytes = max (1, coding->src_chars);
        coding->dst_object = Qnil;
-      coding->destination = xmalloc (dst_bytes);
+      coding->destination = xmalloc_atomic (dst_bytes);
        coding->dst_bytes = dst_bytes;
        coding->dst_multibyte = 0;
      }
@@ -8272,6 +8359,11 @@ encode_coding_object (struct coding_system *coding,
      {
        if (BUFFERP (coding->dst_object))
         coding->dst_object = Fbuffer_string ();
+      else if (coding->raw_destination)
+       /* This is used to avoid creating huge Lisp string.
+          NOTE: caller who sets `raw_destination' is also
+          responsible for freeing `destination' buffer.  */
+       coding->dst_object = Qnil;
        else
         {
           coding->dst_object
@@ -8327,7 +8419,7 @@ encode_coding_object (struct coding_system *coding,
      Fkill_buffer (coding->src_object);
  
    Vdeactivate_mark = old_deactivate_mark;
-  unbind_to (count, Qnil);
+  dynwind_end ();
  }
  
  
@@ -8355,11 +8447,11 @@ from_unicode (Lisp_Object str)
  }
  
  Lisp_Object
-from_unicode_buffer (const wchar_t* wstr)
+from_unicode_buffer (const wchar_t *wstr)
  {
      return from_unicode (
          make_unibyte_string (
-            (char*) wstr,
+            (char *) wstr,
              /* we get one of the two final 0 bytes for free. */
              1 + sizeof (wchar_t) * wcslen (wstr)));
  }
@@ -8425,7 +8517,7 @@ are lower-case).  */)
    (Lisp_Object prompt, Lisp_Object default_coding_system)
  {
    Lisp_Object val;
-  ptrdiff_t count = SPECPDL_INDEX ();
+  dynwind_begin ();
  
    if (SYMBOLP (default_coding_system))
      default_coding_system = SYMBOL_NAME (default_coding_system);
@@ -8433,7 +8525,7 @@ are lower-case).  */)
    val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
                           Qt, Qnil, Qcoding_system_history,
                           default_coding_system, Qnil);
-  unbind_to (count, Qnil);
+  dynwind_end ();
    return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
  }
  
@@ -8514,6 +8606,11 @@ detect_coding_system (const unsigned char *src,
        enum coding_category category IF_LINT (= 0);
        struct coding_system *this IF_LINT (= NULL);
        int c, i;
+      bool inhibit_nbd = inhibit_flag (coding.spec.undecided.inhibit_nbd,
+                                      inhibit_null_byte_detection);
+      bool inhibit_ied = inhibit_flag (coding.spec.undecided.inhibit_ied,
+                                      inhibit_iso_escape_detection);
+      bool prefer_utf_8 = coding.spec.undecided.prefer_utf_8;
  
        /* Skip all ASCII bytes except for a few ISO2022 controls.  */
        for (; src < src_end; src++)
@@ -8528,7 +8625,7 @@ detect_coding_system (const unsigned char *src,
           else if (c < 0x20)
             {
               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
-                 && ! inhibit_iso_escape_detection
+                 && ! inhibit_ied
                   && ! detect_info.checked)
                 {
                   if (detect_coding_iso_2022 (&coding, &detect_info))
@@ -8547,7 +8644,7 @@ detect_coding_system (const unsigned char *src,
                       break;
                     }
                 }
-             else if (! c && !inhibit_null_byte_detection)
+             else if (! c && !inhibit_nbd)
                 {
                   null_byte_found = 1;
                   if (eight_bit_found)
@@ -8580,6 +8677,12 @@ detect_coding_system (const unsigned char *src,
                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
                 }
+             else if (prefer_utf_8
+                      && detect_coding_utf_8 (&coding, &detect_info))
+               {
+                 detect_info.checked |= ~CATEGORY_MASK_UTF_8;
+                 detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
+               }
               for (i = 0; i < coding_category_raw_text; i++)
                 {
                   category = coding_priorities[i];
@@ -8620,20 +8723,20 @@ detect_coding_system (const unsigned char *src,
         {
           detect_info.found = CATEGORY_MASK_RAW_TEXT;
           id = CODING_SYSTEM_ID (Qno_conversion);
-         val = Fcons (make_number (id), Qnil);
+         val = list1 (make_number (id));
         }
        else if (! detect_info.rejected && ! detect_info.found)
         {
           detect_info.found = CATEGORY_MASK_ANY;
           id = coding_categories[coding_category_undecided].id;
-         val = Fcons (make_number (id), Qnil);
+         val = list1 (make_number (id));
         }
        else if (highest)
         {
           if (detect_info.found)
             {
               detect_info.found = 1 << category;
-             val = Fcons (make_number (this->id), Qnil);
+             val = list1 (make_number (this->id));
             }
           else
             for (i = 0; i < coding_category_raw_text; i++)
@@ -8641,7 +8744,7 @@ detect_coding_system (const unsigned char *src,
                 {
                   detect_info.found = 1 << coding_priorities[i];
                   id = coding_categories[coding_priorities[i]].id;
-                 val = Fcons (make_number (id), Qnil);
+                 val = list1 (make_number (id));
                   break;
                 }
         }
@@ -8658,7 +8761,7 @@ detect_coding_system (const unsigned char *src,
                   found |= 1 << category;
                   id = coding_categories[category].id;
                   if (id >= 0)
-                   val = Fcons (make_number (id), val);
+                   val = list1 (make_number (id));
                 }
             }
           for (i = coding_category_raw_text - 1; i >= 0; i--)
@@ -8683,7 +8786,7 @@ detect_coding_system (const unsigned char *src,
             this = coding_categories + coding_category_utf_8_sig;
           else
             this = coding_categories + coding_category_utf_8_nosig;
-         val = Fcons (make_number (this->id), Qnil);
+         val = list1 (make_number (this->id));
         }
      }
    else if (base_category == coding_category_utf_16_auto)
@@ -8700,13 +8803,13 @@ detect_coding_system (const unsigned char *src,
             this = coding_categories + coding_category_utf_16_be_nosig;
           else
             this = coding_categories + coding_category_utf_16_le_nosig;
-         val = Fcons (make_number (this->id), Qnil);
+         val = list1 (make_number (this->id));
         }
      }
    else
      {
        detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
-      val = Fcons (make_number (coding.id), Qnil);
+      val = list1 (make_number (coding.id));
      }
  
    /* Then, detect eol-format if necessary.  */
@@ -8918,8 +9021,7 @@ DEFUN ("find-coding-systems-region-internal",
         Lisp_Object attrs;
  
         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
-       if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
-           && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
+       if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)))
           {
             ASET (attrs, coding_attr_trans_tbl,
                   get_translation_table (attrs, 1, NULL));
@@ -8933,13 +9035,13 @@ DEFUN ("find-coding-systems-region-internal",
      p = pbeg = BYTE_POS_ADDR (start_byte);
    pend = p + (end_byte - start_byte);
  
-  while (p < pend && ASCII_BYTE_P (*p)) p++;
-  while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
+  while (p < pend && ASCII_CHAR_P (*p)) p++;
+  while (p < pend && ASCII_CHAR_P (*(pend - 1))) pend--;
  
    work_table = Fmake_char_table (Qnil, Qnil);
    while (p < pend)
      {
-      if (ASCII_BYTE_P (*p))
+      if (ASCII_CHAR_P (*p))
         p++;
        else
         {
@@ -8993,8 +9095,7 @@ DEFUN ("find-coding-systems-region-internal",
  
  DEFUN ("unencodable-char-position", Funencodable_char_position,
         Sunencodable_char_position, 3, 5, 0,
-       doc: /*
-Return position of first un-encodable character in a region.
+       doc: /* Return position of first un-encodable character in a region.
  START and END specify the region and CODING-SYSTEM specifies the
  encoding to check.  Return nil if CODING-SYSTEM does encode the region.
  
@@ -9004,8 +9105,9 @@ list of positions.
  
  If optional 5th argument STRING is non-nil, it is a string to search
  for un-encodable characters.  In that case, START and END are indexes
-to the string.  */)
-  (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object count, Lisp_Object string)
+to the string and treated as in `substring'.  */)
+  (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system,
+   Lisp_Object count, Lisp_Object string)
  {
    EMACS_INT n;
    struct coding_system coding;
@@ -9042,12 +9144,7 @@ to the string.  */)
    else
      {
        CHECK_STRING (string);
-      CHECK_NATNUM (start);
-      CHECK_NATNUM (end);
-      if (! (XINT (start) <= XINT (end) && XINT (end) <= SCHARS (string)))
-       args_out_of_range_3 (string, start, end);
-      from = XINT (start);
-      to = XINT (end);
+      validate_subarray (string, start, end, SCHARS (string), &from, &to);
        if (! STRING_MULTIBYTE (string))
         return Qnil;
        p = SDATA (string) + string_char_to_byte (string, from);
@@ -9071,7 +9168,7 @@ to the string.  */)
        int c;
  
        if (ascii_compatible)
-       while (p < stop && ASCII_BYTE_P (*p))
+       while (p < stop && ASCII_CHAR_P (*p))
           p++, from++;
        if (p >= stop)
         {
@@ -9178,7 +9275,7 @@ is nil.  */)
        attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
        ASET (attrs, coding_attr_trans_tbl,
             get_translation_table (attrs, 1, NULL));
-      list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
+      list = Fcons (list2 (elt, attrs), list);
      }
  
    if (STRINGP (start))
@@ -9187,12 +9284,12 @@ is nil.  */)
      p = pbeg = BYTE_POS_ADDR (start_byte);
    pend = p + (end_byte - start_byte);
  
-  while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
-  while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
+  while (p < pend && ASCII_CHAR_P (*p)) p++, pos++;
+  while (p < pend && ASCII_CHAR_P (*(pend - 1))) pend--;
  
    while (p < pend)
      {
-      if (ASCII_BYTE_P (*p))
+      if (ASCII_CHAR_P (*p))
         p++;
        else
         {
@@ -9262,6 +9359,14 @@ code_convert_region (Lisp_Object start, Lisp_Object end,
    setup_coding_system (coding_system, &coding);
    coding.mode |= CODING_MODE_LAST_BLOCK;
  
+  if (BUFFERP (dst_object) && !EQ (dst_object, src_object))
+    {
+      struct buffer *buf = XBUFFER (dst_object);
+      ptrdiff_t buf_pt = BUF_PT (buf);
+
+      invalidate_buffer_caches (buf, buf_pt, buf_pt);
+    }
+
    if (encodep)
      encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
                           dst_object);
@@ -9351,6 +9456,15 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system,
    coding.mode |= CODING_MODE_LAST_BLOCK;
    chars = SCHARS (string);
    bytes = SBYTES (string);
+
+  if (BUFFERP (dst_object))
+    {
+      struct buffer *buf = XBUFFER (dst_object);
+      ptrdiff_t buf_pt = BUF_PT (buf);
+
+      invalidate_buffer_caches (buf, buf_pt, buf_pt);
+    }
+
    if (encodep)
      encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
    else
@@ -9377,6 +9491,55 @@ code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
    return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
  }
  
+/* Encode or decode a file name, to or from a unibyte string suitable
+   for passing to C library functions.  */
+Lisp_Object
+decode_file_name (Lisp_Object fname)
+{
+#ifdef WINDOWSNT
+  /* The w32 build pretends to use UTF-8 for file-name encoding, and
+     converts the file names either to UTF-16LE or to the system ANSI
+     codepage internally, depending on the underlying OS; see w32.c.  */
+  if (! NILP (Fcoding_system_p (Qutf_8)))
+    return code_convert_string_norecord (fname, Qutf_8, 0);
+  return fname;
+#else  /* !WINDOWSNT */
+  if (! NILP (Vfile_name_coding_system))
+    return code_convert_string_norecord (fname, Vfile_name_coding_system, 0);
+  else if (! NILP (Vdefault_file_name_coding_system))
+    return code_convert_string_norecord (fname,
+                                        Vdefault_file_name_coding_system, 0);
+  else
+    return fname;
+#endif
+}
+
+Lisp_Object
+encode_file_name (Lisp_Object fname)
+{
+  /* This is especially important during bootstrap and dumping, when
+     file-name encoding is not yet known, and therefore any non-ASCII
+     file names are unibyte strings, and could only be thrashed if we
+     try to encode them.  */
+  if (!STRING_MULTIBYTE (fname))
+    return fname;
+#ifdef WINDOWSNT
+  /* The w32 build pretends to use UTF-8 for file-name encoding, and
+     converts the file names either to UTF-16LE or to the system ANSI
+     codepage internally, depending on the underlying OS; see w32.c.  */
+  if (! NILP (Fcoding_system_p (Qutf_8)))
+    return code_convert_string_norecord (fname, Qutf_8, 1);
+  return fname;
+#else  /* !WINDOWSNT */
+  if (! NILP (Vfile_name_coding_system))
+    return code_convert_string_norecord (fname, Vfile_name_coding_system, 1);
+  else if (! NILP (Vdefault_file_name_coding_system))
+    return code_convert_string_norecord (fname,
+                                        Vdefault_file_name_coding_system, 1);
+  else
+    return fname;
+#endif
+}
  
  DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
         2, 4, 0,
@@ -9434,7 +9597,7 @@ Return the corresponding character.  */)
    CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
    attrs = AREF (spec, 0);
  
-  if (ASCII_BYTE_P (ch)
+  if (ASCII_CHAR_P (ch)
        && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
      return code;
  
@@ -9515,7 +9678,7 @@ Return the corresponding character.  */)
    CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
    attrs = AREF (spec, 0);
  
-  if (ASCII_BYTE_P (ch)
+  if (ASCII_CHAR_P (ch)
        && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
      return code;
  
@@ -9589,7 +9752,7 @@ DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_intern
    tset_charset_list
      (term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK
             ? coding_charset_list (terminal_coding)
-           : Fcons (make_number (charset_ascii), Qnil)));
+           : list1 (make_number (charset_ascii))));
    return Qnil;
  }
  
@@ -10034,9 +10197,9 @@ usage: (define-coding-system-internal ...)  */)
                 {
                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
                   if (dim < dim2)
-                   tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
+                   tmp = list2 (XCAR (tail), tmp);
                   else
-                   tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
+                   tmp = list2 (tmp, XCAR (tail));
                 }
               else
                 {
@@ -10047,7 +10210,7 @@ usage: (define-coding-system-internal ...)  */)
                         break;
                     }
                   if (NILP (tmp2))
-                   tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
+                   tmp = nconc2 (tmp, list1 (XCAR (tail)));
                   else
                     {
                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
@@ -10333,7 +10496,17 @@ usage: (define-coding-system-internal ...)  */)
                   : coding_category_utf_8_sig);
      }
    else if (EQ (coding_type, Qundecided))
-    category = coding_category_undecided;
+    {
+      if (nargs < coding_arg_undecided_max)
+       goto short_args;
+      ASET (attrs, coding_attr_undecided_inhibit_null_byte_detection,
+           args[coding_arg_undecided_inhibit_null_byte_detection]);
+      ASET (attrs, coding_attr_undecided_inhibit_iso_escape_detection,
+           args[coding_arg_undecided_inhibit_iso_escape_detection]);
+      ASET (attrs, coding_attr_undecided_prefer_utf_8,
+           args[coding_arg_undecided_prefer_utf_8]);
+      category = coding_category_undecided;
+    }
    else
      error ("Invalid coding system type: %s",
            SDATA (SYMBOL_NAME (coding_type)));
@@ -10355,7 +10528,7 @@ usage: (define-coding-system-internal ...)  */)
        && ! EQ (eol_type, Qmac))
      error ("Invalid eol-type");
  
-  aliases = Fcons (name, Qnil);
+  aliases = list1 (name);
  
    if (NILP (eol_type))
      {
@@ -10365,7 +10538,7 @@ usage: (define-coding-system-internal ...)  */)
           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
  
           this_name = AREF (eol_type, i);
-         this_aliases = Fcons (this_name, Qnil);
+         this_aliases = list1 (this_name);
           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
           this_spec = make_uninit_vector (3);
           ASET (this_spec, 0, attrs);
@@ -10480,7 +10653,7 @@ DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
       list.  */
    while (!NILP (XCDR (aliases)))
      aliases = XCDR (aliases);
-  XSETCDR (aliases, Fcons (alias, Qnil));
+  XSETCDR (aliases, list1 (alias));
  
    eol_type = AREF (spec, 2);
    if (VECTORP (eol_type))
@@ -10625,6 +10798,8 @@ init_coding_once (void)
  void
  syms_of_coding (void)
  {
+#include "coding.x"
+
    staticpro (&Vcoding_system_hash_table);
    {
      Lisp_Object args[2];
@@ -10714,11 +10889,6 @@ syms_of_coding (void)
    Fput (Qcoding_system_error, Qerror_message,
         build_pure_c_string ("Invalid coding system"));
  
-  /* Intern this now in case it isn't already done.
-     Setting this variable twice is harmless.
-     But don't staticpro it here--that is done in alloc.c.  */
-  Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
-
    DEFSYM (Qtranslation_table, "translation-table");
    Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
    DEFSYM (Qtranslation_table_id, "translation-table-id");
@@ -10791,39 +10961,6 @@ syms_of_coding (void)
    DEFSYM (Qinterrupted, "interrupted");
    DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
  
-  defsubr (&Scoding_system_p);
-  defsubr (&Sread_coding_system);
-  defsubr (&Sread_non_nil_coding_system);
-  defsubr (&Scheck_coding_system);
-  defsubr (&Sdetect_coding_region);
-  defsubr (&Sdetect_coding_string);
-  defsubr (&Sfind_coding_systems_region_internal);
-  defsubr (&Sunencodable_char_position);
-  defsubr (&Scheck_coding_systems_region);
-  defsubr (&Sdecode_coding_region);
-  defsubr (&Sencode_coding_region);
-  defsubr (&Sdecode_coding_string);
-  defsubr (&Sencode_coding_string);
-  defsubr (&Sdecode_sjis_char);
-  defsubr (&Sencode_sjis_char);
-  defsubr (&Sdecode_big5_char);
-  defsubr (&Sencode_big5_char);
-  defsubr (&Sset_terminal_coding_system_internal);
-  defsubr (&Sset_safe_terminal_coding_system_internal);
-  defsubr (&Sterminal_coding_system);
-  defsubr (&Sset_keyboard_coding_system_internal);
-  defsubr (&Skeyboard_coding_system);
-  defsubr (&Sfind_operation_coding_system);
-  defsubr (&Sset_coding_system_priority);
-  defsubr (&Sdefine_coding_system_internal);
-  defsubr (&Sdefine_coding_system_alias);
-  defsubr (&Scoding_system_put);
-  defsubr (&Scoding_system_base);
-  defsubr (&Scoding_system_plist);
-  defsubr (&Scoding_system_aliases);
-  defsubr (&Scoding_system_eol_type);
-  defsubr (&Scoding_system_priority_list);
-
    DEFVAR_LISP ("coding-system-list", Vcoding_system_list,
                doc: /* List of coding systems.
  
@@ -11121,11 +11258,11 @@ internal character representation.  */);
      Vtranslation_table_for_input = Qnil;
  
    {
-    Lisp_Object args[coding_arg_max];
+    Lisp_Object args[coding_arg_undecided_max];
      Lisp_Object plist[16];
      int i;
  
-    for (i = 0; i < coding_arg_max; i++)
+    for (i = 0; i < coding_arg_undecided_max; i++)
        args[i] = Qnil;
  
      plist[0] = intern_c_string (":name");
@@ -11162,7 +11299,9 @@ character.");
      plist[13] = build_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
      plist[15] = args[coding_arg_eol_type] = Qnil;
      args[coding_arg_plist] = Flist (16, plist);
-    Fdefine_coding_system_internal (coding_arg_max, args);
+    args[coding_arg_undecided_inhibit_null_byte_detection] = make_number (0);
+    args[coding_arg_undecided_inhibit_iso_escape_detection] = make_number (0);
+    Fdefine_coding_system_internal (coding_arg_undecided_max, args);
    }
  
    setup_coding_system (Qno_conversion, &safe_terminal_coding);