X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/ba3189039adc8ec5eba5ed3e21d42019a4616b7c..e0d9c3c9a26ba2982595ec2ec4a1167ee7e39ddb:/src/coding.c

diff --git a/src/coding.c b/src/coding.c
index e4b9238599..5dbaf96840 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -1202,7 +1202,7 @@ detect_coding_utf_8 (struct coding_system *coding,
   bool multibytep = coding->src_multibyte;
   ptrdiff_t consumed_chars = 0;
   bool bom_found = 0;
-  int nchars = coding->head_ascii;
+  ptrdiff_t nchars = coding->head_ascii;
   int eol_seen = coding->eol_seen;
 
   detect_info->checked |= CATEGORY_MASK_UTF_8;
@@ -1300,6 +1300,7 @@ detect_coding_utf_8 (struct coding_system *coding,
 	   means that we found a valid non-ASCII characters.  */
 	detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG;
     }
+  coding->detected_utf8_bytes = src_base - coding->source;
   coding->detected_utf8_chars = nchars;
   return 1;
 }
@@ -1484,7 +1485,7 @@ decode_coding_utf_8 (struct coding_system *coding)
       src = src_base;
       consumed_chars = consumed_chars_base;
       ONE_MORE_BYTE (c);
-      *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+      *charbuf++ = ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
       coding->errors++;
     }
 
@@ -1548,8 +1549,8 @@ encode_coding_utf_8 (struct coding_system *coding)
 	    *dst++ = CHAR_TO_BYTE8 (c);
 	  else
 	    CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
-	  produced_chars++;
 	}
+      produced_chars = dst - (coding->destination + coding->produced);
     }
   record_conversion_result (coding, CODING_RESULT_SUCCESS);
   coding->produced_char += produced_chars;
@@ -1724,7 +1725,7 @@ decode_coding_utf_16 (struct coding_system *coding)
 	ONE_MORE_BYTE (c2);
       if (c2 < 0)
 	{
-	  *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+	  *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
 	  *charbuf++ = -c2;
 	  continue;
 	}
@@ -2013,7 +2014,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
   int charset_ID;
   unsigned code;
   int c;
-  int consumed_chars = 0;
+  ptrdiff_t consumed_chars = 0;
   bool mseq_found = 0;
 
   ONE_MORE_BYTE (c);
@@ -2107,7 +2108,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
 
 	case 1:
 	  code = c;
-	  charset_ID = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit;
+	  charset_ID = ASCII_CHAR_P (code) ? charset_ascii : charset_eight_bit;
 	  break;
 
 	default:
@@ -2595,7 +2596,7 @@ decode_coding_emacs_mule (struct coding_system *coding)
       src = src_base;
       consumed_chars = consumed_chars_base;
       ONE_MORE_BYTE (c);
-      *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+      *charbuf++ = ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
       char_offset++;
       coding->errors++;
     }
@@ -3190,7 +3191,7 @@ detect_coding_iso_2022 (struct coding_system *coding,
 	      if (! single_shifting
 		  && ! (rejected & CATEGORY_MASK_ISO_8_2))
 		{
-		  int len = 1;
+		  ptrdiff_t len = 1;
 		  while (src < src_end)
 		    {
 		      src_base = src;
@@ -3572,7 +3573,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
 
       if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
 	{
-	  *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+	  *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
 	  char_offset++;
 	  CODING_ISO_EXTSEGMENT_LEN (coding)--;
 	  continue;
@@ -3599,7 +3600,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
 	    }
 	  else
 	    {
-	      *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+	      *charbuf++ = ASCII_CHAR_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
 	      char_offset++;
 	    }
 	  continue;
@@ -3973,7 +3974,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
 	  MAYBE_FINISH_COMPOSITION ();
 	  for (; src_base < src; src_base++, char_offset++)
 	    {
-	      if (ASCII_BYTE_P (*src_base))
+	      if (ASCII_CHAR_P (*src_base))
 		*charbuf++ = *src_base;
 	      else
 		*charbuf++ = BYTE8_TO_CHAR (*src_base);
@@ -4003,7 +4004,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
       src = src_base;
       consumed_chars = consumed_chars_base;
       ONE_MORE_BYTE (c);
-      *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+      *charbuf++ = c < 0 ? -c : ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
       char_offset++;
       coding->errors++;
       /* Reset the invocation and designation status to the safest
@@ -4456,7 +4457,7 @@ encode_coding_iso_2022 (struct coding_system *coding)
 	{
 	  /* We have to produce designation sequences if any now.  */
 	  unsigned char desig_buf[16];
-	  int nbytes;
+	  ptrdiff_t nbytes;
 	  ptrdiff_t offset;
 
 	  charset_map_loaded = 0;
@@ -5639,7 +5640,7 @@ decode_coding_charset (struct coding_system *coding)
       src = src_base;
       consumed_chars = consumed_chars_base;
       ONE_MORE_BYTE (c);
-      *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+      *charbuf++ = c < 0 ? -c : ASCII_CHAR_P (c) ? c : BYTE8_TO_CHAR (c);
       char_offset++;
       coding->errors++;
     }
@@ -6211,7 +6212,7 @@ static Lisp_Object adjust_coding_eol_type (struct coding_system *coding,
    EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is
    reliable only when all the source bytes are ASCII.  */
 
-static int
+static ptrdiff_t
 check_ascii (struct coding_system *coding)
 {
   const unsigned char *src, *end;
@@ -6283,12 +6284,12 @@ check_ascii (struct coding_system *coding)
    the value is reliable only when all the source bytes are valid
    UTF-8.  */
 
-static int
+static ptrdiff_t
 check_utf_8 (struct coding_system *coding)
 {
   const unsigned char *src, *end;
   int eol_seen;
-  int nchars = coding->head_ascii;
+  ptrdiff_t nchars = coding->head_ascii;
 
   if (coding->head_ascii < 0)
     check_ascii (coding);
@@ -7264,13 +7265,16 @@ produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
 		      coding->dst_object);
 }
 
+#define MAX_CHARBUF_SIZE 0x4000
+#define MIN_CHARBUF_SIZE 0x10
 
-#define CHARBUF_SIZE 0x4000
-
-#define ALLOC_CONVERSION_WORK_AREA(coding)				\
-  do {									\
-    coding->charbuf = SAFE_ALLOCA (CHARBUF_SIZE * sizeof (int));	\
-    coding->charbuf_size = CHARBUF_SIZE;				\
+#define ALLOC_CONVERSION_WORK_AREA(coding, size)		\
+  do {								\
+    int units = ((size) > MAX_CHARBUF_SIZE ? MAX_CHARBUF_SIZE	\
+		 : (size) < MIN_CHARBUF_SIZE ? MIN_CHARBUF_SIZE	\
+		 : size);					\
+    coding->charbuf = SAFE_ALLOCA ((units) * sizeof (int));	\
+    coding->charbuf_size = (units);				\
   } while (0)
 
 
@@ -7372,7 +7376,7 @@ decode_coding (struct coding_system *coding)
   record_conversion_result (coding, CODING_RESULT_SUCCESS);
   coding->errors = 0;
 
-  ALLOC_CONVERSION_WORK_AREA (coding);
+  ALLOC_CONVERSION_WORK_AREA (coding, coding->src_bytes);
 
   attrs = CODING_ID_ATTRS (coding->id);
   translation_table = get_translation_table (attrs, 0, NULL);
@@ -7414,7 +7418,7 @@ decode_coding (struct coding_system *coding)
   coding->carryover_bytes = 0;
   if (coding->consumed < coding->src_bytes)
     {
-      int nbytes = coding->src_bytes - coding->consumed;
+      ptrdiff_t nbytes = coding->src_bytes - coding->consumed;
       const unsigned char *src;
 
       coding_set_source (coding);
@@ -7768,7 +7772,7 @@ encode_coding (struct coding_system *coding)
   record_conversion_result (coding, CODING_RESULT_SUCCESS);
   coding->errors = 0;
 
-  ALLOC_CONVERSION_WORK_AREA (coding);
+  ALLOC_CONVERSION_WORK_AREA (coding, coding->src_chars);
 
   if (coding->encoder == encode_coding_ccl)
     {
@@ -7890,7 +7894,7 @@ decode_coding_gap (struct coding_system *coding,
   coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
 
   coding->head_ascii = -1;
-  coding->detected_utf8_chars = -1;
+  coding->detected_utf8_bytes = coding->detected_utf8_chars = -1;
   coding->eol_seen = EOL_SEEN_NONE;
   if (CODING_REQUIRE_DETECTION (coding))
     detect_coding (coding);
@@ -7907,7 +7911,8 @@ decode_coding_gap (struct coding_system *coding,
       if (chars != bytes)
 	{
 	  /* There exists a non-ASCII byte.  */
-	  if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8))
+	  if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8)
+	      && coding->detected_utf8_bytes == coding->src_bytes)
 	    {
 	      if (coding->detected_utf8_chars >= 0)
 		chars = coding->detected_utf8_chars;
@@ -8441,11 +8446,11 @@ from_unicode (Lisp_Object str)
 }
 
 Lisp_Object
-from_unicode_buffer (const wchar_t* wstr)
+from_unicode_buffer (const wchar_t *wstr)
 {
     return from_unicode (
         make_unibyte_string (
-            (char*) wstr,
+            (char *) wstr,
             /* we get one of the two final 0 bytes for free. */
             1 + sizeof (wchar_t) * wcslen (wstr)));
 }
@@ -9029,13 +9034,13 @@ DEFUN ("find-coding-systems-region-internal",
     p = pbeg = BYTE_POS_ADDR (start_byte);
   pend = p + (end_byte - start_byte);
 
-  while (p < pend && ASCII_BYTE_P (*p)) p++;
-  while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
+  while (p < pend && ASCII_CHAR_P (*p)) p++;
+  while (p < pend && ASCII_CHAR_P (*(pend - 1))) pend--;
 
   work_table = Fmake_char_table (Qnil, Qnil);
   while (p < pend)
     {
-      if (ASCII_BYTE_P (*p))
+      if (ASCII_CHAR_P (*p))
 	p++;
       else
 	{
@@ -9089,8 +9094,7 @@ DEFUN ("find-coding-systems-region-internal",
 
 DEFUN ("unencodable-char-position", Funencodable_char_position,
        Sunencodable_char_position, 3, 5, 0,
-       doc: /*
-Return position of first un-encodable character in a region.
+       doc: /* Return position of first un-encodable character in a region.
 START and END specify the region and CODING-SYSTEM specifies the
 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
 
@@ -9100,8 +9104,9 @@ list of positions.
 
 If optional 5th argument STRING is non-nil, it is a string to search
 for un-encodable characters.  In that case, START and END are indexes
-to the string.  */)
-  (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object count, Lisp_Object string)
+to the string and treated as in `substring'.  */)
+  (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system,
+   Lisp_Object count, Lisp_Object string)
 {
   EMACS_INT n;
   struct coding_system coding;
@@ -9138,12 +9143,7 @@ to the string.  */)
   else
     {
       CHECK_STRING (string);
-      CHECK_NATNUM (start);
-      CHECK_NATNUM (end);
-      if (! (XINT (start) <= XINT (end) && XINT (end) <= SCHARS (string)))
-	args_out_of_range_3 (string, start, end);
-      from = XINT (start);
-      to = XINT (end);
+      validate_subarray (string, start, end, SCHARS (string), &from, &to);
       if (! STRING_MULTIBYTE (string))
 	return Qnil;
       p = SDATA (string) + string_char_to_byte (string, from);
@@ -9167,7 +9167,7 @@ to the string.  */)
       int c;
 
       if (ascii_compatible)
-	while (p < stop && ASCII_BYTE_P (*p))
+	while (p < stop && ASCII_CHAR_P (*p))
 	  p++, from++;
       if (p >= stop)
 	{
@@ -9283,12 +9283,12 @@ is nil.  */)
     p = pbeg = BYTE_POS_ADDR (start_byte);
   pend = p + (end_byte - start_byte);
 
-  while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
-  while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
+  while (p < pend && ASCII_CHAR_P (*p)) p++, pos++;
+  while (p < pend && ASCII_CHAR_P (*(pend - 1))) pend--;
 
   while (p < pend)
     {
-      if (ASCII_BYTE_P (*p))
+      if (ASCII_CHAR_P (*p))
 	p++;
       else
 	{
@@ -9596,7 +9596,7 @@ Return the corresponding character.  */)
   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
   attrs = AREF (spec, 0);
 
-  if (ASCII_BYTE_P (ch)
+  if (ASCII_CHAR_P (ch)
       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
     return code;
 
@@ -9677,7 +9677,7 @@ Return the corresponding character.  */)
   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
   attrs = AREF (spec, 0);
 
-  if (ASCII_BYTE_P (ch)
+  if (ASCII_CHAR_P (ch)
       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
     return code;