From: Paul Eggert <eggert@cs.ucla.edu>
Date: Thu, 21 Apr 2011 06:03:09 +0000 (-0700)
Subject: Treat large integers as floats in the Lisp reader and in string-to-number.
X-Git-Url: http://git.hcoop.net/bpt/emacs.git/commitdiff_plain/452f4150134e4ba7bbd2bad9ce87d19c200505de?hp=6703b2e490339a624bb83c9543f1e51ede26b52b

Treat large integers as floats in the Lisp reader and in string-to-number.
---

diff --git a/src/ChangeLog b/src/ChangeLog
index 7eaa153f79..2b9978f3d6 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,32 @@
+2011-04-21  Paul Eggert  <eggert@cs.ucla.edu>
+
+	Make the Lisp reader and string-to-float more consistent.
+	* data.c (atof): Remove decl; no longer used or needed.
+	(digit_to_number): Move to lread.c.
+	(Fstring_to_number): Use new string_to_number function, to be
+	consistent with how the Lisp reader treats infinities and NaNs.
+	Do not assume that floating-point numbers represent EMACS_INT
+	without losing information; this is not true on most 64-bit hosts.
+	Avoid double-rounding errors, by insisting on integers when
+	parsing non-base-10 numbers, as the documentation specifies.
+	* lisp.h (string_to_number): New decl, replacing ...
+	(isfloat_string): Remove.
+	* lread.c (read1): Do not accept +. and -. as integers; this
+	appears to have been a coding error.  Similarly, do not accept
+	strings like +-1e0 as floating point numbers.  Do not report
+	overflow for integer overflows unless the base is not 10 which
+	means we have no simple and reliable way to continue.
+	Break out the floating-point parsing into a new
+	function string_to_number, so that Fstring_to_number parses
+	floating point numbers consistently with the Lisp reader.
+	(digit_to_number): Moved here from data.c.  Make it static inline.
+	(E_CHAR, EXP_INT): Remove, replacing with ...
+	(E_EXP): New macro, to solve the "1.0e+" problem mentioned below.
+	(string_to_number): New function, replacing isfloat_string.
+	This function checks for valid syntax and produces the resulting
+	Lisp float number too.  Rework it so that string-to-number
+	no longer mishandles examples like "1.0e+".
+
 2011-04-20  Paul Eggert  <eggert@cs.ucla.edu>
 
 	* textprop.c (set_text_properties_1): Rewrite for clarity,
@@ -15,29 +44,6 @@
 	* alloc.c (overrun_check_malloc, overrun_check_realloc): Now static.
 	(overrun_check_free): Likewise.
 
-	Make the Lisp reader and string-to-float more consistent.
-	* data.c (atof): Remove decl; no longer used or needed.
-	(Fstring_to_number): Use new string_to_float function, to be
-	consistent with how the Lisp reader treats infinities and NaNs.
-	Do not assume that floating-point numbers represent EMACS_INT
-	without losing information; this is not true on most 64-bit hosts.
-	Avoid double-rounding errors, by insisting on integers when
-	parsing non-base-10 numbers, as the documentation specifies.
-	Report integer overflow instead of silently converting to
-	integers.
-	* lisp.h (string_to_float): New decl, replacing ...
-	(isfloat_string): Remove.
-	* lread.c (read1): Do not accept +. and -. as integers; this
-	appears to have been a coding error.  Similarly, do not accept
-	strings like +-1e0 as floating point numbers.  Do not report
-	overflow for some integer overflows and not others; instead,
-	report them all.  Break out the floating-point parsing into a new
-	function string_to_float, so that Fstring_to_number parses
-	floating point numbers consistently with the Lisp reader.
-	(string_to_float): New function, replacing isfloat_string.
-	This function checks for valid syntax and produces the resulting
-	Lisp float number too.
-
 	* alloc.c (SDATA_SIZE) [!GC_CHECK_STRING_BYTES]: Avoid runtime check
 	in the common case where SDATA_DATA_OFFSET is a multiple of Emacs
 	word size.
diff --git a/src/data.c b/src/data.c
index 486816cac7..4e81c80d0e 100644
--- a/src/data.c
+++ b/src/data.c
@@ -2374,26 +2374,6 @@ NUMBER may be an integer or a floating point number.  */)
   return build_string (buffer);
 }
 
-INLINE static int
-digit_to_number (int character, int base)
-{
-  int digit;
-
-  if (character >= '0' && character <= '9')
-    digit = character - '0';
-  else if (character >= 'a' && character <= 'z')
-    digit = character - 'a' + 10;
-  else if (character >= 'A' && character <= 'Z')
-    digit = character - 'A' + 10;
-  else
-    return -1;
-
-  if (digit >= base)
-    return -1;
-  else
-    return digit;
-}
-
 DEFUN ("string-to-number", Fstring_to_number, Sstring_to_number, 1, 2, 0,
        doc: /* Parse STRING as a decimal number and return the number.
 This parses both integers and floating point numbers.
@@ -2406,7 +2386,7 @@ If the base used is not 10, STRING is always parsed as integer.  */)
 {
   register char *p;
   register int b;
-  EMACS_INT n;
+  Lisp_Object val;
 
   CHECK_STRING (string);
 
@@ -2420,25 +2400,13 @@ If the base used is not 10, STRING is always parsed as integer.  */)
 	xsignal1 (Qargs_out_of_range, base);
     }
 
-  /* Skip any whitespace at the front of the number.  Typically strtol does
-     this anyway, so we might as well be consistent.  */
   p = SSDATA (string);
   while (*p == ' ' || *p == '\t')
     p++;
 
-  if (b == 10)
-    {
-      Lisp_Object val = string_to_float (p, 1);
-      if (FLOATP (val))
-	return val;
-    }
-
-  n = strtol (p, NULL, b);
-  if (FIXNUM_OVERFLOW_P (n))
-    xsignal (Qoverflow_error, list1 (string));
-  return make_number (n);
+  val = string_to_number (p, b, 1);
+  return NILP (val) ? make_number (0) : val;
 }
-
 
 enum arithop
   {
diff --git a/src/lisp.h b/src/lisp.h
index 8d333a3999..5bace90e53 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2782,7 +2782,7 @@ extern Lisp_Object oblookup (Lisp_Object, const char *, EMACS_INT, EMACS_INT);
   } while (0)
 extern int openp (Lisp_Object, Lisp_Object, Lisp_Object,
                   Lisp_Object *, Lisp_Object);
-Lisp_Object string_to_float (char const *, int);
+Lisp_Object string_to_number (char const *, int, int);
 extern void map_obarray (Lisp_Object, void (*) (Lisp_Object, Lisp_Object),
                          Lisp_Object);
 extern void dir_warning (const char *, Lisp_Object);
diff --git a/src/lread.c b/src/lread.c
index a872929e08..390c57d167 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -3005,32 +3005,8 @@ read1 (register Lisp_Object readcharfun, int *pch, int first_in_list)
 
 	if (!quoted && !uninterned_symbol)
 	  {
-	    register char *p1;
-	    Lisp_Object result;
-	    p1 = read_buffer;
-	    if (*p1 == '+' || *p1 == '-') p1++;
-	    /* Is it an integer? */
-	    if ('0' <= *p1 && *p1 <= '9')
-	      {
-		do
-		  p1++;
-		while ('0' <= *p1 && *p1 <= '9');
-
-		/* Integers can have trailing decimal points.  */
-		p1 += (*p1 == '.');
-		if (p1 == p)
-		  {
-		    /* It is an integer. */
-		    EMACS_INT n = strtol (read_buffer, NULL, 10);
-		    if (FIXNUM_OVERFLOW_P (n))
-		      xsignal (Qoverflow_error,
-			       list1 (build_string (read_buffer)));
-		    return make_number (n);
-		  }
-	      }
-
-	    result = string_to_float (read_buffer, 0);
-	    if (FLOATP (result))
+	    Lisp_Object result = string_to_number (read_buffer, 10, 0);
+	    if (! NILP (result))
 	      return result;
 	  }
 	{
@@ -3189,23 +3165,44 @@ substitute_in_interval (INTERVAL interval, Lisp_Object arg)
 }
 
 
+static inline int
+digit_to_number (int character, int base)
+{
+  int digit;
+
+  if ('0' <= character && character <= '9')
+    digit = character - '0';
+  else if ('a' <= character && character <= 'z')
+    digit = character - 'a' + 10;
+  else if ('A' <= character && character <= 'Z')
+    digit = character - 'A' + 10;
+  else
+    return -1;
+
+  return digit < base ? digit : -1;
+}
+
 #define LEAD_INT 1
 #define DOT_CHAR 2
 #define TRAIL_INT 4
-#define E_CHAR 8
-#define EXP_INT 16
+#define E_EXP 16
 
 
-/* Convert CP to a floating point number.  Return a non-float value if CP does
-   not have valid floating point syntax.  If IGNORE_TRAILING is nonzero,
-   consider just the longest prefix of CP that has valid floating point
-   syntax.  */
+/* Convert STRING to a number, assuming base BASE.  Return a fixnum if CP has
+   integer syntax and fits in a fixnum, else return the nearest float if CP has
+   either floating point or integer syntax and BASE is 10, else return nil.  If
+   IGNORE_TRAILING is nonzero, consider just the longest prefix of CP that has
+   valid floating point syntax.  Signal an overflow if BASE is not 10 and the
+   number has integer syntax but does not fit.  */
 
 Lisp_Object
-string_to_float (char const *cp, int ignore_trailing)
+string_to_number (char const *string, int base, int ignore_trailing)
 {
   int state;
-  const char *start = cp;
+  char const *cp = string;
+  int leading_digit;
+  int float_syntax = 0;
+  double value = 0;
 
   /* Compute NaN and infinities using a variable, to cope with compilers that
      think they are smarter than we are.  */
@@ -3216,88 +3213,137 @@ string_to_float (char const *cp, int ignore_trailing)
      atof ("-0.0") drops the sign.  */
   int negative = *cp == '-';
 
-  double value = 0;
+  int signedp = negative || *cp == '+';
+  cp += signedp;
 
   state = 0;
-  if (negative || *cp == '+')
-    cp++;
 
-  if (*cp >= '0' && *cp <= '9')
+  leading_digit = digit_to_number (*cp, base);
+  if (0 <= leading_digit)
     {
       state |= LEAD_INT;
-      while (*cp >= '0' && *cp <= '9')
-	cp++;
+      do
+	++cp;
+      while (0 <= digit_to_number (*cp, base));
     }
+
   if (*cp == '.')
     {
       state |= DOT_CHAR;
       cp++;
     }
-  if (*cp >= '0' && *cp <= '9')
-    {
-      state |= TRAIL_INT;
-      while (*cp >= '0' && *cp <= '9')
-	cp++;
-    }
-  if (*cp == 'e' || *cp == 'E')
-    {
-      state |= E_CHAR;
-      cp++;
-      if (*cp == '+' || *cp == '-')
-	cp++;
-    }
 
-  if (*cp >= '0' && *cp <= '9')
+  if (base == 10)
     {
-      state |= EXP_INT;
-      while (*cp >= '0' && *cp <= '9')
-	cp++;
-    }
-  else if (cp == start)
-    ;
-  else if (cp[-1] == '+' && cp[0] == 'I' && cp[1] == 'N' && cp[2] == 'F')
-    {
-      state |= EXP_INT;
-      cp += 3;
-      value = 1.0 / zero;
+      if ('0' <= *cp && *cp <= '9')
+	{
+	  state |= TRAIL_INT;
+	  do
+	    cp++;
+	  while ('0' <= *cp && *cp <= '9');
+	}
+      if (*cp == 'e' || *cp == 'E')
+	{
+	  char const *ecp = cp;
+	  cp++;
+	  if (*cp == '+' || *cp == '-')
+	    cp++;
+	  if ('0' <= *cp && *cp <= '9')
+	    {
+	      state |= E_EXP;
+	      do
+		cp++;
+	      while ('0' <= *cp && *cp <= '9');
+	    }
+	  else if (cp[-1] == '+'
+		   && cp[0] == 'I' && cp[1] == 'N' && cp[2] == 'F')
+	    {
+	      state |= E_EXP;
+	      cp += 3;
+	      value = 1.0 / zero;
+	    }
+	  else if (cp[-1] == '+'
+		   && cp[0] == 'N' && cp[1] == 'a' && cp[2] == 'N')
+	    {
+	      state |= E_EXP;
+	      cp += 3;
+	      value = zero / zero;
+
+	      /* If that made a "negative" NaN, negate it.  */
+	      {
+		int i;
+		union { double d; char c[sizeof (double)]; }
+		  u_data, u_minus_zero;
+		u_data.d = value;
+		u_minus_zero.d = -0.0;
+		for (i = 0; i < sizeof (double); i++)
+		  if (u_data.c[i] & u_minus_zero.c[i])
+		    {
+		      value = -value;
+		      break;
+		    }
+	      }
+	      /* Now VALUE is a positive NaN.  */
+	    }
+	  else
+	    cp = ecp;
+	}
+
+      float_syntax = ((state & (DOT_CHAR|TRAIL_INT)) == (DOT_CHAR|TRAIL_INT)
+		      || state == (LEAD_INT|E_EXP));
     }
-  else if (cp[-1] == '+' && cp[0] == 'N' && cp[1] == 'a' && cp[2] == 'N')
-    {
-      state |= EXP_INT;
-      cp += 3;
-      value = zero / zero;
 
-      /* If that made a "negative" NaN, negate it.  */
-      {
-	int i;
-	union { double d; char c[sizeof (double)]; } u_data, u_minus_zero;
+  /* Return nil if the number uses invalid syntax.  If IGNORE_TRAILING, accept
+     any prefix that matches.  Otherwise, the entire string must match.  */
+  if (! (ignore_trailing
+	 ? ((state & LEAD_INT) != 0 || float_syntax)
+	 : (!*cp && ((state & ~DOT_CHAR) == LEAD_INT || float_syntax))))
+    return Qnil;
 
-	u_data.d = value;
-	u_minus_zero.d = - 0.0;
-	for (i = 0; i < sizeof (double); i++)
-	  if (u_data.c[i] & u_minus_zero.c[i])
+  /* If the number does not use float syntax, and fits into a fixnum, return
+     the fixnum.  */
+  if (0 <= leading_digit && ! float_syntax)
+    {
+      /* Convert string to EMACS_INT.  Do not use strtol, to avoid assuming
+	 that EMACS_INT is no wider than 'long', and because when BASE is 16
+	 strtol might accept numbers like "0x1" that are not allowed here.  */
+      EMACS_INT n = leading_digit;
+      EMACS_INT abs_bound =
+	(negative ? -MOST_NEGATIVE_FIXNUM : MOST_POSITIVE_FIXNUM);
+      EMACS_INT abs_bound_over_base = abs_bound / base;
+
+      for (cp = string + signedp + 1; ; cp++)
+	{
+	  int d = digit_to_number (*cp, base);
+	  if (d < 0)
 	    {
-	      value = - value;
+	      if (n <= abs_bound)
+		return make_number (negative ? -n : n);
 	      break;
 	    }
-      }
-      /* Now VALUE is a positive NaN.  */
-    }
+	  if (abs_bound_over_base < n)
+	    break;
+	  n = base * n + d;
+	}
 
-  if (! (state == (LEAD_INT|DOT_CHAR|TRAIL_INT)
-	 || state == (DOT_CHAR|TRAIL_INT)
-	 || state == (LEAD_INT|E_CHAR|EXP_INT)
-	 || state == (LEAD_INT|DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)
-	 || state == (DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)))
-    return make_number (0); /* Any non-float value will do.  */
+      /* Unfortunately there's no simple and reliable way to convert
+	 non-base-10 to floating point.  */
+      if (base != 10)
+	xsignal (Qoverflow_error, list1 (build_string (string)));
+    }
 
+  /* Either the number uses float syntax, or it does not fit into a fixnum.
+     Convert it from string to floating point, unless the value is already
+     known because it is an infinity or a NAN.  */
   if (! value)
-    value = atof (start + negative);
+    value = atof (string + signedp);
+
   if (negative)
-    value = - value;
+    value = -value;
   return make_float (value);
 }
 
+
 
 static Lisp_Object
 read_vector (Lisp_Object readcharfun, int bytecodeflag)