merge emacs-23

[bpt/emacs.git] / src / coding.c
diff --git a/src/coding.c b/src/coding.c

index 6dbf05c..555e662 100644 (file)
--- a/src/coding.c
+++ b/src/coding.c
@@ -1,8 +1,8 @@
  /* Coding system handler (conversion, detection, etc).
     Copyright (C) 2001, 2002, 2003, 2004, 2005,
-                 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+                 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
     Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-     2005, 2006, 2007, 2008, 2009
+     2005, 2006, 2007, 2008, 2009, 2010, 2011
       National Institute of Advanced Industrial Science and Technology (AIST)
       Registration Number H14PRO021
     Copyright (C) 2003
@@ -167,7 +167,7 @@ detect_coding_XXX (coding, detect_info)
  
    while (1)
      {
-      /* Get one byte from the source.  If the souce is exausted, jump
+      /* Get one byte from the source.  If the source is exhausted, jump
          to no_more_source:.  */
        ONE_MORE_BYTE (c);
  
@@ -181,7 +181,7 @@ detect_coding_XXX (coding, detect_info)
    return 0;
  
   no_more_source:
-  /* The source exausted successfully.  */
+  /* The source exhausted successfully.  */
    detect_info->found |= found;
    return 1;
  }
@@ -289,6 +289,7 @@ encode_coding_XXX (coding)
  
  #include <config.h>
  #include <stdio.h>
+#include <setjmp.h>
  
  #include "lisp.h"
  #include "buffer.h"
@@ -536,7 +537,7 @@ enum iso_code_class_type
     on output.  */
  #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
  
-/* If set, do not encode unsafe charactes on output.  */
+/* If set, do not encode unsafe characters on output.  */
  #define CODING_ISO_FLAG_SAFE           0x0800
  
  /* If set, extra latin codes (128..159) are accepted as a valid code
@@ -692,7 +693,7 @@ enum coding_category
  static Lisp_Object Vcoding_category_list;
  
  /* Table of coding categories (Lisp symbols).  This variable is for
-   internal use oly.  */
+   internal use only.  */
  static Lisp_Object Vcoding_category_table;
  
  /* Table of coding-categories ordered by priority.  */
@@ -824,7 +825,7 @@ static struct coding_system coding_categories[coding_category_max];
    } while (0)
  
  
-/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
+/* Like EMIT_ONE_ASCII_BYTE but store two bytes; C1 and C2.  */
  
  #define EMIT_TWO_ASCII_BYTES(c1, c2)   \
    do {                                 \
@@ -992,6 +993,11 @@ record_conversion_result (struct coding_system *coding,
      case CODING_RESULT_INSUFFICIENT_MEM:
        Vlast_code_conversion_error = Qinsufficient_memory;
        break;
+    case CODING_RESULT_INSUFFICIENT_DST:
+      /* Don't record this error in Vlast_code_conversion_error
+        because it happens just temporarily and is resolved when the
+        whole conversion is finished.  */
+      break;
      case CODING_RESULT_SUCCESS:
        break;
      default:
@@ -999,6 +1005,10 @@ record_conversion_result (struct coding_system *coding,
      }
  }
  
+/* This wrapper macro is used to preserve validity of pointers into
+   buffer text across calls to decode_char, which could cause
+   relocation of buffers if it loads a charset map, because loading a
+   charset map allocates large structures.  */
  #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
    do {                                                                      \
      charset_map_loaded = 0;                                                 \
@@ -1231,7 +1241,7 @@ alloc_destination (coding, nbytes, dst)
  
     METHOD is one of enum composition_method.
  
-   Optionnal COMPOSITION-COMPONENTS are characters and composition
+   Optional COMPOSITION-COMPONENTS are characters and composition
     rules.
  
     In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
@@ -1858,7 +1868,7 @@ encode_coding_utf_16 (coding)
      {
        ASSURE_DESTINATION (safe_room);
        c = *charbuf++;
-      if (c >= MAX_UNICODE_CHAR)
+      if (c > MAX_UNICODE_CHAR)
         c = coding->default_char;
  
        if (c < 0x10000)
@@ -1944,7 +1954,7 @@ encode_coding_utf_16 (coding)
         CHARS is 0xA0 plus a number of characters composed by this
         data,
  
-       COMPONENTs are characters of multibye form or composition
+       COMPONENTs are characters of multibyte form or composition
         rules encoded by two-byte of ASCII codes.
  
     In addition, for backward compatibility, the following formats are
@@ -2021,7 +2031,7 @@ detect_coding_emacs_mule (coding, detect_info)
         }
        else
         {
-         int more_bytes = emacs_mule_bytes[*src_base] - 1;
+         int more_bytes = emacs_mule_bytes[c] - 1;
  
           while (more_bytes > 0)
             {
@@ -2055,7 +2065,7 @@ detect_coding_emacs_mule (coding, detect_info)
  /* Parse emacs-mule multibyte sequence at SRC and return the decoded
     character.  If CMP_STATUS indicates that we must expect MSEQ or
     RULE described above, decode it and return the negative value of
-   the deocded character or rule.  If an invalid byte is found, return
+   the decoded character or rule.  If an invalid byte is found, return
     -1.  If SRC is too short, return -2.  */
  
  int
@@ -2068,7 +2078,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
    const unsigned char *src_end = coding->source + coding->src_bytes;
    const unsigned char *src_base = src;
    int multibytep = coding->src_multibyte;
-  struct charset *charset;
+  int charset_id;
    unsigned code;
    int c;
    int consumed_chars = 0;
@@ -2078,7 +2088,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
    if (c < 0)
      {
        c = -c;
-      charset = emacs_mule_charset[0];
+      charset_id = emacs_mule_charset[0];
      }
    else
      {
@@ -2114,7 +2124,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
        switch (emacs_mule_bytes[c])
         {
         case 2:
-         if (! (charset = emacs_mule_charset[c]))
+         if ((charset_id = emacs_mule_charset[c]) < 0)
             goto invalid_code;
           ONE_MORE_BYTE (c);
           if (c < 0xA0)
@@ -2127,7 +2137,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
             {
               ONE_MORE_BYTE (c);
-             if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
+             if (c < 0xA0 || (charset_id = emacs_mule_charset[c]) < 0)
                 goto invalid_code;
               ONE_MORE_BYTE (c);
               if (c < 0xA0)
@@ -2136,7 +2146,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
             }
           else
             {
-             if (! (charset = emacs_mule_charset[c]))
+             if ((charset_id = emacs_mule_charset[c]) < 0)
                 goto invalid_code;
               ONE_MORE_BYTE (c);
               if (c < 0xA0)
@@ -2151,7 +2161,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
  
         case 4:
           ONE_MORE_BYTE (c);
-         if (c < 0 || ! (charset = emacs_mule_charset[c]))
+         if (c < 0 || (charset_id = emacs_mule_charset[c]) < 0)
             goto invalid_code;
           ONE_MORE_BYTE (c);
           if (c < 0xA0)
@@ -2165,21 +2175,21 @@ emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
  
         case 1:
           code = c;
-         charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
-                                    ? charset_ascii : charset_eight_bit);
+         charset_id = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit;
           break;
  
         default:
           abort ();
         }
-      c = DECODE_CHAR (charset, code);
+      CODING_DECODE_CHAR (coding, src, src_base, src_end,
+                         CHARSET_FROM_ID (charset_id), code, c);
        if (c < 0)
         goto invalid_code;
      }
    *nbytes = src - src_base;
    *nchars = consumed_chars;
    if (id)
-    *id = charset->id;
+    *id = charset_id;
    return (mseq_found ? -c : c);
  
   no_more_source:
@@ -2445,8 +2455,8 @@ decode_coding_emacs_mule (coding)
    const unsigned char *src_end = coding->source + coding->src_bytes;
    const unsigned char *src_base;
    int *charbuf = coding->charbuf + coding->charbuf_used;
-  /* We may produce two annocations (charset and composition) in one
-     loop and one more charset annocation at the end.  */
+  /* We may produce two annotations (charset and composition) in one
+     loop and one more charset annotation at the end.  */
    int *charbuf_end
      = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
    int consumed_chars = 0, consumed_chars_base;
@@ -2519,9 +2529,23 @@ decode_coding_emacs_mule (coding)
        else
         {
           int nchars, nbytes;
+         /* emacs_mule_char can load a charset map from a file, which
+            allocates a large structure and might cause buffer text
+            to be relocated as result.  Thus, we need to remember the
+            original pointer to buffer text, and fix up all related
+            pointers after the call.  */
+         const unsigned char *orig = coding->source;
+         EMACS_INT offset;
  
           c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
                                cmp_status);
+         offset = coding->source - orig;
+         if (offset)
+           {
+             src += offset;
+             src_base += offset;
+             src_end += offset;
+           }
           if (c < 0)
             {
               if (c == -1)
@@ -2535,7 +2559,7 @@ decode_coding_emacs_mule (coding)
             cmp_status->ncomps -= nchars;
         }
  
-      /* Now if C >= 0, we found a normally encoded characer, if C <
+      /* Now if C >= 0, we found a normally encoded character, if C <
          0, we found an old-style composition component character or
          rule.  */
  
@@ -3048,7 +3072,7 @@ setup_iso_safe_charsets (attrs)
  
  
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
-   Check if a text is encoded in one of ISO-2022 based codig systems.
+   Check if a text is encoded in one of ISO-2022 based coding systems.
     If it is, return 1, else return 0.  */
  
  static int
@@ -3238,9 +3262,13 @@ detect_coding_iso_2022 (coding, detect_info)
                   int i = 1;
                   while (src < src_end)
                     {
+                     src_base = src;
                       ONE_MORE_BYTE (c);
                       if (c < 0xA0)
-                       break;
+                       {
+                         src = src_base;
+                         break;
+                       }
                       i++;
                     }
  
@@ -3456,7 +3484,7 @@ finish_composition (charbuf, cmp_status)
    return new_chars;
  }
  
-/* If characers are under composition, finish the composition.  */
+/* If characters are under composition, finish the composition.  */
  #define MAYBE_FINISH_COMPOSITION()                             \
    do {                                                         \
      if (cmp_status->state != COMPOSING_NO)                     \
@@ -3563,8 +3591,8 @@ decode_coding_iso_2022 (coding)
    const unsigned char *src_end = coding->source + coding->src_bytes;
    const unsigned char *src_base;
    int *charbuf = coding->charbuf + coding->charbuf_used;
-  /* We may produce two annocations (charset and composition) in one
-     loop and one more charset annocation at the end.  */
+  /* We may produce two annotations (charset and composition) in one
+     loop and one more charset annotation at the end.  */
    int *charbuf_end
      = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
    int consumed_chars = 0, consumed_chars_base;
@@ -3600,7 +3628,7 @@ decode_coding_iso_2022 (coding)
  
    while (1)
      {
-      int c1, c2;
+      int c1, c2, c3;
  
        src_base = src;
        consumed_chars_base = consumed_chars;
@@ -3725,6 +3753,8 @@ decode_coding_iso_2022 (coding)
           continue;
  
         case ISO_single_shift_2_7:
+         if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
+           goto invalid_code;
         case ISO_single_shift_2:
           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
             goto invalid_code;
@@ -3860,11 +3890,11 @@ decode_coding_iso_2022 (coding)
               continue;
  
             case '[':           /* specification of direction */
-             if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
+             if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
                 goto invalid_code;
               /* For the moment, nested direction is not supported.
                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
-                left-to-right, and nozero means right-to-left.  */
+                left-to-right, and nonzero means right-to-left.  */
               ONE_MORE_BYTE (c1);
               switch (c1)
                 {
@@ -3905,7 +3935,7 @@ decode_coding_iso_2022 (coding)
                   int size;
  
                   ONE_MORE_BYTE (dim);
-                 if (dim < 0 || dim > 4)
+                 if (dim < '0' || dim > '4')
                     goto invalid_code;
                   ONE_MORE_BYTE (M);
                   if (M < 128)
@@ -3984,26 +4014,28 @@ decode_coding_iso_2022 (coding)
         }
  
        /* Now we know CHARSET and 1st position code C1 of a character.
-         Produce a decoded character while getting 2nd position code
-         C2 if necessary.  */
-      c1 &= 0x7F;
+         Produce a decoded character while getting 2nd and 3rd
+         position codes C2, C3 if necessary.  */
        if (CHARSET_DIMENSION (charset) > 1)
         {
           ONE_MORE_BYTE (c2);
-         if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
+         if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
+             || ((c1 & 0x80) != (c2 & 0x80)))
             /* C2 is not in a valid range.  */
             goto invalid_code;
-         c1 = (c1 << 8) | (c2 & 0x7F);
-         if (CHARSET_DIMENSION (charset) > 2)
+         if (CHARSET_DIMENSION (charset) == 2)
+           c1 = (c1 << 8) | c2;
+         else
             {
-             ONE_MORE_BYTE (c2);
-             if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
-               /* C2 is not in a valid range.  */
+             ONE_MORE_BYTE (c3);
+             if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
+                 || ((c1 & 0x80) != (c3 & 0x80)))
+               /* C3 is not in a valid range.  */
                 goto invalid_code;
-             c1 = (c1 << 8) | (c2 & 0x7F);
+             c1 = (c1 << 16) | (c2 << 8) | c2;
             }
         }
-
+      c1 &= 0x7F7F7F;
        CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
        if (c < 0)
         {
@@ -4496,7 +4528,10 @@ encode_coding_iso_2022 (coding)
    charset_list = CODING_ATTR_CHARSET_LIST (attrs);
    coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
  
-  ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
+  ascii_compatible
+    = (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
+       && ! (CODING_ISO_FLAGS (coding) & (CODING_ISO_FLAG_DESIGNATION
+                                         | CODING_ISO_FLAG_LOCKING_SHIFT)));
  
    while (charbuf < charbuf_end)
      {
@@ -4669,6 +4704,12 @@ detect_coding_sjis (coding, detect_info)
    int consumed_chars = 0;
    int found = 0;
    int c;
+  Lisp_Object attrs, charset_list;
+  int max_first_byte_of_2_byte_code;
+
+  CODING_GET_INFO (coding, attrs, charset_list);
+  max_first_byte_of_2_byte_code
+    = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
  
    detect_info->checked |= CATEGORY_MASK_SJIS;
    /* A coding system of this category is always ASCII compatible.  */
@@ -4680,7 +4721,8 @@ detect_coding_sjis (coding, detect_info)
        ONE_MORE_BYTE (c);
        if (c < 0x80)
         continue;
-      if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
+      if ((c >= 0x81 && c <= 0x9F)
+         || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
         {
           ONE_MORE_BYTE (c);
           if (c < 0x40 || c == 0x7F || c > 0xFC)
@@ -4765,7 +4807,7 @@ decode_coding_sjis (coding)
    const unsigned char *src_end = coding->source + coding->src_bytes;
    const unsigned char *src_base;
    int *charbuf = coding->charbuf + coding->charbuf_used;
-  /* We may produce one charset annocation in one loop and one more at
+  /* We may produce one charset annotation in one loop and one more at
       the end.  */
    int *charbuf_end
      = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
@@ -4884,7 +4926,7 @@ decode_coding_big5 (coding)
    const unsigned char *src_end = coding->source + coding->src_bytes;
    const unsigned char *src_base;
    int *charbuf = coding->charbuf + coding->charbuf_used;
-  /* We may produce one charset annocation in one loop and one more at
+  /* We may produce one charset annotation in one loop and one more at
       the end.  */
    int *charbuf_end
      = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
@@ -5053,7 +5095,8 @@ encode_coding_sjis (coding)
               int c1, c2;
  
               c1 = code >> 8;
-             if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
+             if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
+                 || c1 == 0x28
                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
                 {
                   JIS_TO_SJIS2 (code);
@@ -5203,62 +5246,52 @@ decode_coding_ccl (coding)
    int *charbuf_end = coding->charbuf + coding->charbuf_size;
    int consumed_chars = 0;
    int multibytep = coding->src_multibyte;
-  struct ccl_program ccl;
+  struct ccl_program *ccl = &coding->spec.ccl->ccl;
    int source_charbuf[1024];
-  int source_byteidx[1024];
+  int source_byteidx[1025];
    Lisp_Object attrs, charset_list;
  
    CODING_GET_INFO (coding, attrs, charset_list);
-  setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
  
-  while (src < src_end)
+  while (1)
      {
        const unsigned char *p = src;
-      int *source, *source_end;
        int i = 0;
  
        if (multibytep)
-       while (i < 1024 && p < src_end)
-         {
-           source_byteidx[i] = p - src;
-           source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
-         }
+       {
+         while (i < 1024 && p < src_end)
+           {
+             source_byteidx[i] = p - src;
+             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
+           }
+         source_byteidx[i] = p - src;
+       }
        else
         while (i < 1024 && p < src_end)
           source_charbuf[i++] = *p++;
  
        if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
-       ccl.last_block = 1;
-
-      source = source_charbuf;
-      source_end = source + i;
-      while (source < source_end)
-       {
-         ccl_driver (&ccl, source, charbuf,
-                     source_end - source, charbuf_end - charbuf,
-                     charset_list);
-         source += ccl.consumed;
-         charbuf += ccl.produced;
-         if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
-           break;
-       }
-      if (source < source_end)
-       src += source_byteidx[source - source_charbuf];
+       ccl->last_block = 1;
+      ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
+                 charset_list);
+      charbuf += ccl->produced;
+      if (multibytep)
+       src += source_byteidx[ccl->consumed];
        else
-       src = p;
-      consumed_chars += source - source_charbuf;
-
-      if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
-         && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
+       src += ccl->consumed;
+      consumed_chars += ccl->consumed;
+      if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
         break;
      }
  
-  switch (ccl.status)
+  switch (ccl->status)
      {
      case CCL_STAT_SUSPEND_BY_SRC:
        record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
        break;
      case CCL_STAT_SUSPEND_BY_DST:
+      record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
        break;
      case CCL_STAT_QUIT:
      case CCL_STAT_INVALID_CMD:
@@ -5277,7 +5310,7 @@ static int
  encode_coding_ccl (coding)
       struct coding_system *coding;
  {
-  struct ccl_program ccl;
+  struct ccl_program *ccl = &coding->spec.ccl->ccl;
    int multibytep = coding->dst_multibyte;
    int *charbuf = coding->charbuf;
    int *charbuf_end = charbuf + coding->charbuf_used;
@@ -5288,35 +5321,34 @@ encode_coding_ccl (coding)
    Lisp_Object attrs, charset_list;
  
    CODING_GET_INFO (coding, attrs, charset_list);
-  setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
-
-  ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
-  ccl.dst_multibyte = coding->dst_multibyte;
+  if (coding->consumed_char == coding->src_chars
+      && coding->mode & CODING_MODE_LAST_BLOCK)
+    ccl->last_block = 1;
  
    while (charbuf < charbuf_end)
      {
-      ccl_driver (&ccl, charbuf, destination_charbuf,
+      ccl_driver (ccl, charbuf, destination_charbuf,
                   charbuf_end - charbuf, 1024, charset_list);
        if (multibytep)
         {
-         ASSURE_DESTINATION (ccl.produced * 2);
-         for (i = 0; i < ccl.produced; i++)
+         ASSURE_DESTINATION (ccl->produced * 2);
+         for (i = 0; i < ccl->produced; i++)
             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
         }
        else
         {
-         ASSURE_DESTINATION (ccl.produced);
-         for (i = 0; i < ccl.produced; i++)
+         ASSURE_DESTINATION (ccl->produced);
+         for (i = 0; i < ccl->produced; i++)
             *dst++ = destination_charbuf[i] & 0xFF;
-         produced_chars += ccl.produced;
+         produced_chars += ccl->produced;
         }
-      charbuf += ccl.consumed;
-      if (ccl.status == CCL_STAT_QUIT
-         || ccl.status == CCL_STAT_INVALID_CMD)
+      charbuf += ccl->consumed;
+      if (ccl->status == CCL_STAT_QUIT
+         || ccl->status == CCL_STAT_INVALID_CMD)
         break;
      }
  
-  switch (ccl.status)
+  switch (ccl->status)
      {
      case CCL_STAT_SUSPEND_BY_SRC:
        record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
@@ -5560,7 +5592,7 @@ decode_coding_charset (coding)
    const unsigned char *src_end = coding->source + coding->src_bytes;
    const unsigned char *src_base;
    int *charbuf = coding->charbuf + coding->charbuf_used;
-  /* We may produce one charset annocation in one loop and one more at
+  /* We may produce one charset annotation in one loop and one more at
       the end.  */
    int *charbuf_end
      = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
@@ -5791,6 +5823,7 @@ setup_coding_system (coding_system, coding)
    coding->max_charset_id = SCHARS (val) - 1;
    coding->safe_charsets = SDATA (val);
    coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
+  coding->carryover_bytes = 0;
  
    coding_type = CODING_ATTR_TYPE (attrs);
    if (EQ (coding_type, Qundecided))
@@ -6040,10 +6073,9 @@ raw_text_coding_system (coding_system)
  }
  
  
-/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
-   does, return one of the subsidiary that has the same eol-spec as
-   PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
-   inherit end-of-line format from the system's setting
+/* If CODING_SYSTEM doesn't specify end-of-line format, return one of
+   the subsidiary that has the same eol-spec as PARENT (if it is not
+   nil and specifies end-of-line format) or the system's setting
     (system_eol_type).  */
  
  Lisp_Object
@@ -6066,6 +6098,8 @@ coding_inherit_eol_type (coding_system, parent)
  
           parent_spec = CODING_SYSTEM_SPEC (parent);
           parent_eol_type = AREF (parent_spec, 2);
+         if (VECTORP (parent_eol_type))
+           parent_eol_type = system_eol_type;      
         }
        else
         parent_eol_type = system_eol_type;
@@ -6079,6 +6113,46 @@ coding_inherit_eol_type (coding_system, parent)
    return coding_system;
  }
  
+
+/* Check if text-conversion and eol-conversion of CODING_SYSTEM are
+   decided for writing to a process.  If not, complement them, and
+   return a new coding system.  */
+
+Lisp_Object
+complement_process_encoding_system (coding_system)
+     Lisp_Object coding_system;
+{
+  Lisp_Object coding_base = Qnil, eol_base = Qnil;
+  Lisp_Object spec, attrs;
+  int i;
+
+  for (i = 0; i < 3; i++)
+    {
+      if (i == 1)
+       coding_system = CDR_SAFE (Vdefault_process_coding_system);
+      else if (i == 2)
+       coding_system = preferred_coding_system ();
+      spec = CODING_SYSTEM_SPEC (coding_system);
+      if (NILP (spec))
+       continue;
+      attrs = AREF (spec, 0);
+      if (NILP (coding_base) && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
+       coding_base = CODING_ATTR_BASE_NAME (attrs);
+      if (NILP (eol_base) && ! VECTORP (AREF (spec, 2)))
+       eol_base = coding_system;
+      if (! NILP (coding_base) && ! NILP (eol_base))
+       break;
+    }
+
+  if (i > 0)
+    /* The original CODING_SYSTEM didn't specify text-conversion or
+       eol-conversion.  Be sure that we return a fully complemented
+       coding system.  */
+    coding_system = coding_inherit_eol_type (coding_base, eol_base);
+  return coding_system;
+}
+
+
  /* Emacs has a mechanism to automatically detect a coding system if it
     is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
     it's impossible to distinguish some coding systems accurately
@@ -6129,14 +6203,14 @@ coding_inherit_eol_type (coding_system, parent)
     o coding-category-iso-7-else
  
         The category for a coding system which has the same code range
-       as ISO2022 of 7-bit environemnt but uses locking shift or
+       as ISO2022 of 7-bit environment but uses locking shift or
         single shift functions.  Assigned the coding-system (Lisp
         symbol) `iso-2022-7bit-lock' by default.
  
     o coding-category-iso-8-else
  
         The category for a coding system which has the same code range
-       as ISO2022 of 8-bit environemnt but uses locking shift or
+       as ISO2022 of 8-bit environment but uses locking shift or
         single shift functions.  Assigned the coding-system (Lisp
         symbol) `iso-2022-8bit-ss2' by default.
  
@@ -7099,6 +7173,7 @@ decode_coding (coding)
    Lisp_Object attrs;
    Lisp_Object undo_list;
    Lisp_Object translation_table;
+  struct ccl_spec cclspec;
    int carryover;
    int i;
  
@@ -7131,6 +7206,11 @@ decode_coding (coding)
    translation_table = get_translation_table (attrs, 0, NULL);
  
    carryover = 0;
+  if (coding->decoder == decode_coding_ccl)
+    {
+      coding->spec.ccl = &cclspec;
+      setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
+    }
    do
      {
        EMACS_INT pos = coding->dst_pos + coding->produced_char;
@@ -7147,9 +7227,10 @@ decode_coding (coding)
         coding->charbuf[i]
           = coding->charbuf[coding->charbuf_used - carryover + i];
      }
-  while (coding->consumed < coding->src_bytes
-        && (coding->result == CODING_RESULT_SUCCESS
-            || coding->result == CODING_RESULT_INVALID_SRC));
+  while (coding->result == CODING_RESULT_INSUFFICIENT_DST
+        || (coding->consumed < coding->src_bytes
+            && (coding->result == CODING_RESULT_SUCCESS
+                || coding->result == CODING_RESULT_INVALID_SRC)));
  
    if (carryover > 0)
      {
@@ -7258,7 +7339,7 @@ handle_composition_annotation (pos, limit, coding, buf, stop)
               components = COMPOSITION_COMPONENTS (prop);
               if (VECTORP (components))
                 {
-                 len = XVECTOR (components)->size;
+                 len = XVECTOR_SIZE (components);
                   for (i = 0; i < len; i++)
                     *buf++ = XINT (AREF (components, i));
                 }
@@ -7399,7 +7480,8 @@ consume_chars (coding, translation_table, max_lookup)
         {
           EMACS_INT bytes;
  
-         if (coding->encoder == encode_coding_raw_text)
+         if (coding->encoder == encode_coding_raw_text
+             || coding->encoder == encode_coding_ccl)
             c = *src++, pos++;
           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
@@ -7498,6 +7580,7 @@ encode_coding (coding)
    Lisp_Object attrs;
    Lisp_Object translation_table;
    int max_lookup;
+  struct ccl_spec cclspec;
  
    attrs = CODING_ID_ATTRS (coding->id);
    if (coding->encoder == encode_coding_raw_text)
@@ -7519,6 +7602,11 @@ encode_coding (coding)
  
    ALLOC_CONVERSION_WORK_AREA (coding);
  
+  if (coding->encoder == encode_coding_ccl)
+    {
+      coding->spec.ccl = &cclspec;
+      setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
+    }
    do {
      coding_set_source (coding);
      consume_chars (coding, translation_table, max_lookup);
@@ -7547,7 +7635,7 @@ static Lisp_Object Vcode_conversion_reused_workbuf;
  static int reused_workbuf_in_use;
  
  
-/* Return a working buffer of code convesion.  MULTIBYTE specifies the
+/* Return a working buffer of code conversion.  MULTIBYTE specifies the
     multibyteness of returning buffer.  */
  
  static Lisp_Object
@@ -7852,7 +7940,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
           if (! destination)
             {
               record_conversion_result (coding,
-                                       CODING_RESULT_INSUFFICIENT_DST);
+                                       CODING_RESULT_INSUFFICIENT_MEM);
               unbind_to (count, Qnil);
               return;
             }
@@ -8210,7 +8298,7 @@ function `define-coding-system'.  */)
  \f
  /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
     HIGHEST is nonzero, return the coding system of the highest
-   priority among the detected coding systems.  Otherwize return a
+   priority among the detected coding systems.  Otherwise return a
     list of detected coding systems sorted by their priorities.  If
     MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
     multibyte form but contains only ASCII and eight-bit chars.
@@ -8638,7 +8726,7 @@ DEFUN ("find-coding-systems-region-internal",
    EMACS_INT start_byte, end_byte;
    const unsigned char *p, *pbeg, *pend;
    int c;
-  Lisp_Object tail, elt;
+  Lisp_Object tail, elt, work_table;
  
    if (STRINGP (start))
      {
@@ -8696,6 +8784,7 @@ DEFUN ("find-coding-systems-region-internal",
    while (p < pend && ASCII_BYTE_P (*p)) p++;
    while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
  
+  work_table = Fmake_char_table (Qnil, Qnil);
    while (p < pend)
      {
        if (ASCII_BYTE_P (*p))
@@ -8703,6 +8792,9 @@ DEFUN ("find-coding-systems-region-internal",
        else
         {
           c = STRING_CHAR_ADVANCE (p);
+         if (!NILP (char_table_ref (work_table, c)))
+           /* This character was already checked.  Ignore it.  */
+           continue;
  
           charset_map_loaded = 0;
           for (tail = coding_attrs_list; CONSP (tail);)
@@ -8734,6 +8826,7 @@ DEFUN ("find-coding-systems-region-internal",
               p = pbeg + p_offset;
               pend = pbeg + pend_offset;
             }
+         char_table_set (work_table, c, Qt);
         }
      }
  
@@ -9330,7 +9423,7 @@ DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_intern
    setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
    /* We had better not send unsafe characters to terminal.  */
    terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
-  /* Characer composition should be disabled.  */
+  /* Character composition should be disabled.  */
    terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
    terminal_coding->src_multibyte = 1;
    terminal_coding->dst_multibyte = 0;
@@ -9347,7 +9440,7 @@ DEFUN ("set-safe-terminal-coding-system-internal",
    CHECK_SYMBOL (coding_system);
    setup_coding_system (Fcheck_coding_system (coding_system),
                        &safe_terminal_coding);
-  /* Characer composition should be disabled.  */
+  /* Character composition should be disabled.  */
    safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
    safe_terminal_coding.src_multibyte = 1;
    safe_terminal_coding.dst_multibyte = 0;
@@ -9379,9 +9472,12 @@ DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_intern
  {
    struct terminal *t = get_terminal (terminal, 1);
    CHECK_SYMBOL (coding_system);
-  setup_coding_system (Fcheck_coding_system (coding_system),
-                      TERMINAL_KEYBOARD_CODING (t));
-  /* Characer composition should be disabled.  */
+  if (NILP (coding_system))
+    coding_system = Qno_conversion;
+  else
+    Fcheck_coding_system (coding_system);
+  setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
+  /* Character composition should be disabled.  */
    TERMINAL_KEYBOARD_CODING (t)->common_flags
      &= ~CODING_ANNOTATE_COMPOSITION_MASK;
    return Qnil;
@@ -9601,7 +9697,7 @@ HIGHESTP non-nil means just return the highest priority one.  */)
    return Fnreverse (val);
  }
  
-static char *suffixes[] = { "-unix", "-dos", "-mac" };
+static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
  
  static Lisp_Object
  make_subsidiaries (base)
@@ -9758,7 +9854,7 @@ usage: (define-coding-system-internal ...)  */)
  
          If Nth element is a list of charset IDs, N is the first byte
          of one of them.  The list is sorted by dimensions of the
-        charsets.  A charset of smaller dimension comes firtst. */
+        charsets.  A charset of smaller dimension comes first. */
        val = Fmake_vector (make_number (256), Qnil);
  
        for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
@@ -10395,7 +10491,7 @@ syms_of_coding ()
    Vcode_conversion_reused_workbuf = Qnil;
  
    staticpro (&Vcode_conversion_workbuf_name);
-  Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
+  Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
  
    reused_workbuf_in_use = 0;
  
@@ -10456,14 +10552,14 @@ syms_of_coding ()
  
    DEFSYM (Qcoding_system_error, "coding-system-error");
    Fput (Qcoding_system_error, Qerror_conditions,
-       Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
+       pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
    Fput (Qcoding_system_error, Qerror_message,
-       build_string ("Invalid coding system"));
+       make_pure_c_string ("Invalid coding system"));
  
    /* Intern this now in case it isn't already done.
       Setting this variable twice is harmless.
       But don't staticpro it here--that is done in alloc.c.  */
-  Qchar_table_extra_slots = intern ("char-table-extra-slots");
+  Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
  
    DEFSYM (Qtranslation_table, "translation-table");
    Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
@@ -10489,48 +10585,48 @@ syms_of_coding ()
    staticpro (&Vcoding_category_table);
    /* Followings are target of code detection.  */
    ASET (Vcoding_category_table, coding_category_iso_7,
-       intern ("coding-category-iso-7"));
+       intern_c_string ("coding-category-iso-7"));
    ASET (Vcoding_category_table, coding_category_iso_7_tight,
-       intern ("coding-category-iso-7-tight"));
+       intern_c_string ("coding-category-iso-7-tight"));
    ASET (Vcoding_category_table, coding_category_iso_8_1,
-       intern ("coding-category-iso-8-1"));
+       intern_c_string ("coding-category-iso-8-1"));
    ASET (Vcoding_category_table, coding_category_iso_8_2,
-       intern ("coding-category-iso-8-2"));
+       intern_c_string ("coding-category-iso-8-2"));
    ASET (Vcoding_category_table, coding_category_iso_7_else,
-       intern ("coding-category-iso-7-else"));
+       intern_c_string ("coding-category-iso-7-else"));
    ASET (Vcoding_category_table, coding_category_iso_8_else,
-       intern ("coding-category-iso-8-else"));
+       intern_c_string ("coding-category-iso-8-else"));
    ASET (Vcoding_category_table, coding_category_utf_8_auto,
-       intern ("coding-category-utf-8-auto"));
+       intern_c_string ("coding-category-utf-8-auto"));
    ASET (Vcoding_category_table, coding_category_utf_8_nosig,
-       intern ("coding-category-utf-8"));
+       intern_c_string ("coding-category-utf-8"));
    ASET (Vcoding_category_table, coding_category_utf_8_sig,
-       intern ("coding-category-utf-8-sig"));
+       intern_c_string ("coding-category-utf-8-sig"));
    ASET (Vcoding_category_table, coding_category_utf_16_be,
-       intern ("coding-category-utf-16-be"));
+       intern_c_string ("coding-category-utf-16-be"));
    ASET (Vcoding_category_table, coding_category_utf_16_auto,
-       intern ("coding-category-utf-16-auto"));
+       intern_c_string ("coding-category-utf-16-auto"));
    ASET (Vcoding_category_table, coding_category_utf_16_le,
-       intern ("coding-category-utf-16-le"));
+       intern_c_string ("coding-category-utf-16-le"));
    ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
-       intern ("coding-category-utf-16-be-nosig"));
+       intern_c_string ("coding-category-utf-16-be-nosig"));
    ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
-       intern ("coding-category-utf-16-le-nosig"));
+       intern_c_string ("coding-category-utf-16-le-nosig"));
    ASET (Vcoding_category_table, coding_category_charset,
-       intern ("coding-category-charset"));
+       intern_c_string ("coding-category-charset"));
    ASET (Vcoding_category_table, coding_category_sjis,
-       intern ("coding-category-sjis"));
+       intern_c_string ("coding-category-sjis"));
    ASET (Vcoding_category_table, coding_category_big5,
-       intern ("coding-category-big5"));
+       intern_c_string ("coding-category-big5"));
    ASET (Vcoding_category_table, coding_category_ccl,
-       intern ("coding-category-ccl"));
+       intern_c_string ("coding-category-ccl"));
    ASET (Vcoding_category_table, coding_category_emacs_mule,
-       intern ("coding-category-emacs-mule"));
+       intern_c_string ("coding-category-emacs-mule"));
    /* Followings are NOT target of code detection.  */
    ASET (Vcoding_category_table, coding_category_raw_text,
-       intern ("coding-category-raw-text"));
+       intern_c_string ("coding-category-raw-text"));
    ASET (Vcoding_category_table, coding_category_undecided,
-       intern ("coding-category-undecided"));
+       intern_c_string ("coding-category-undecided"));
  
    DEFSYM (Qinsufficient_source, "insufficient-source");
    DEFSYM (Qinconsistent_eol, "inconsistent-eol");
@@ -10731,22 +10827,22 @@ Also used for decoding keyboard input on X Window system.  */);
    DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
                doc: /*
  *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
-  eol_mnemonic_unix = build_string (":");
+  eol_mnemonic_unix = make_pure_c_string (":");
  
    DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
                doc: /*
  *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
-  eol_mnemonic_dos = build_string ("\\");
+  eol_mnemonic_dos = make_pure_c_string ("\\");
  
    DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
                doc: /*
  *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
-  eol_mnemonic_mac = build_string ("/");
+  eol_mnemonic_mac = make_pure_c_string ("/");
  
    DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
                doc: /*
  *String displayed in mode line when end-of-line format is not yet determined.  */);
-  eol_mnemonic_undecided = build_string (":");
+  eol_mnemonic_undecided = make_pure_c_string (":");
  
    DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
                doc: /*
@@ -10871,25 +10967,25 @@ internal character representation.  */);
      for (i = 0; i < coding_arg_max; i++)
        args[i] = Qnil;
  
-    plist[0] = intern (":name");
+    plist[0] = intern_c_string (":name");
      plist[1] = args[coding_arg_name] = Qno_conversion;
-    plist[2] = intern (":mnemonic");
+    plist[2] = intern_c_string (":mnemonic");
      plist[3] = args[coding_arg_mnemonic] = make_number ('=');
-    plist[4] = intern (":coding-type");
+    plist[4] = intern_c_string (":coding-type");
      plist[5] = args[coding_arg_coding_type] = Qraw_text;
-    plist[6] = intern (":ascii-compatible-p");
+    plist[6] = intern_c_string (":ascii-compatible-p");
      plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
-    plist[8] = intern (":default-char");
+    plist[8] = intern_c_string (":default-char");
      plist[9] = args[coding_arg_default_char] = make_number (0);
-    plist[10] = intern (":for-unibyte");
+    plist[10] = intern_c_string (":for-unibyte");
      plist[11] = args[coding_arg_for_unibyte] = Qt;
-    plist[12] = intern (":docstring");
-    plist[13] = build_string ("Do no conversion.\n\
+    plist[12] = intern_c_string (":docstring");
+    plist[13] = make_pure_c_string ("Do no conversion.\n\
  \n\
  When you visit a file with this coding, the file is read into a\n\
  unibyte buffer as is, thus each byte of a file is treated as a\n\
  character.");
-    plist[14] = intern (":eol-type");
+    plist[14] = intern_c_string (":eol-type");
      plist[15] = args[coding_arg_eol_type] = Qunix;
      args[coding_arg_plist] = Flist (16, plist);
      Fdefine_coding_system_internal (coding_arg_max, args);
@@ -10899,10 +10995,10 @@ character.");
      plist[5] = args[coding_arg_coding_type] = Qundecided;
      /* This is already set.
         plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
-    plist[8] = intern (":charset-list");
+    plist[8] = intern_c_string (":charset-list");
      plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
      plist[11] = args[coding_arg_for_unibyte] = Qnil;
-    plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
+    plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
      plist[15] = args[coding_arg_eol_type] = Qnil;
      args[coding_arg_plist] = Flist (16, plist);
      Fdefine_coding_system_internal (coding_arg_max, args);