Treat large integers as floats in the Lisp reader and in string-to-number.

author Paul Eggert <eggert@cs.ucla.edu>

Thu, 21 Apr 2011 06:03:09 +0000 (23:03 -0700)

committer Paul Eggert <eggert@cs.ucla.edu>

Thu, 21 Apr 2011 06:03:09 +0000 (23:03 -0700)
author Paul Eggert <eggert@cs.ucla.edu>
Thu, 21 Apr 2011 06:03:09 +0000 (23:03 -0700)
committer Paul Eggert <eggert@cs.ucla.edu>
Thu, 21 Apr 2011 06:03:09 +0000 (23:03 -0700)
diff --git a/src/ChangeLog b/src/ChangeLog

index 7eaa153..2b9978f 100644 (file)
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,32 @@
+2011-04-21  Paul Eggert  <eggert@cs.ucla.edu>
+
+       Make the Lisp reader and string-to-float more consistent.
+       * data.c (atof): Remove decl; no longer used or needed.
+       (digit_to_number): Move to lread.c.
+       (Fstring_to_number): Use new string_to_number function, to be
+       consistent with how the Lisp reader treats infinities and NaNs.
+       Do not assume that floating-point numbers represent EMACS_INT
+       without losing information; this is not true on most 64-bit hosts.
+       Avoid double-rounding errors, by insisting on integers when
+       parsing non-base-10 numbers, as the documentation specifies.
+       * lisp.h (string_to_number): New decl, replacing ...
+       (isfloat_string): Remove.
+       * lread.c (read1): Do not accept +. and -. as integers; this
+       appears to have been a coding error.  Similarly, do not accept
+       strings like +-1e0 as floating point numbers.  Do not report
+       overflow for integer overflows unless the base is not 10 which
+       means we have no simple and reliable way to continue.
+       Break out the floating-point parsing into a new
+       function string_to_number, so that Fstring_to_number parses
+       floating point numbers consistently with the Lisp reader.
+       (digit_to_number): Moved here from data.c.  Make it static inline.
+       (E_CHAR, EXP_INT): Remove, replacing with ...
+       (E_EXP): New macro, to solve the "1.0e+" problem mentioned below.
+       (string_to_number): New function, replacing isfloat_string.
+       This function checks for valid syntax and produces the resulting
+       Lisp float number too.  Rework it so that string-to-number
+       no longer mishandles examples like "1.0e+".
+
  2011-04-20  Paul Eggert  <eggert@cs.ucla.edu>
  
         * textprop.c (set_text_properties_1): Rewrite for clarity,
@@ -15,29 +44,6 @@
         * alloc.c (overrun_check_malloc, overrun_check_realloc): Now static.
         (overrun_check_free): Likewise.
  
-       Make the Lisp reader and string-to-float more consistent.
-       * data.c (atof): Remove decl; no longer used or needed.
-       (Fstring_to_number): Use new string_to_float function, to be
-       consistent with how the Lisp reader treats infinities and NaNs.
-       Do not assume that floating-point numbers represent EMACS_INT
-       without losing information; this is not true on most 64-bit hosts.
-       Avoid double-rounding errors, by insisting on integers when
-       parsing non-base-10 numbers, as the documentation specifies.
-       Report integer overflow instead of silently converting to
-       integers.
-       * lisp.h (string_to_float): New decl, replacing ...
-       (isfloat_string): Remove.
-       * lread.c (read1): Do not accept +. and -. as integers; this
-       appears to have been a coding error.  Similarly, do not accept
-       strings like +-1e0 as floating point numbers.  Do not report
-       overflow for some integer overflows and not others; instead,
-       report them all.  Break out the floating-point parsing into a new
-       function string_to_float, so that Fstring_to_number parses
-       floating point numbers consistently with the Lisp reader.
-       (string_to_float): New function, replacing isfloat_string.
-       This function checks for valid syntax and produces the resulting
-       Lisp float number too.
-
         * alloc.c (SDATA_SIZE) [!GC_CHECK_STRING_BYTES]: Avoid runtime check
         in the common case where SDATA_DATA_OFFSET is a multiple of Emacs
         word size.
diff --git a/src/data.c b/src/data.c

index 486816c..4e81c80 100644 (file)
--- a/src/data.c
+++ b/src/data.c
@@ -2374,26 +2374,6 @@ NUMBER may be an integer or a floating point number.  */)
    return build_string (buffer);
  }
  
-INLINE static int
-digit_to_number (int character, int base)
-{
-  int digit;
-
-  if (character >= '0' && character <= '9')
-    digit = character - '0';
-  else if (character >= 'a' && character <= 'z')
-    digit = character - 'a' + 10;
-  else if (character >= 'A' && character <= 'Z')
-    digit = character - 'A' + 10;
-  else
-    return -1;
-
-  if (digit >= base)
-    return -1;
-  else
-    return digit;
-}
-
  DEFUN ("string-to-number", Fstring_to_number, Sstring_to_number, 1, 2, 0,
         doc: /* Parse STRING as a decimal number and return the number.
  This parses both integers and floating point numbers.
@@ -2406,7 +2386,7 @@ If the base used is not 10, STRING is always parsed as integer.  */)
  {
    register char *p;
    register int b;
-  EMACS_INT n;
+  Lisp_Object val;
  
    CHECK_STRING (string);
  
@@ -2420,25 +2400,13 @@ If the base used is not 10, STRING is always parsed as integer.  */)
         xsignal1 (Qargs_out_of_range, base);
      }
  
-  /* Skip any whitespace at the front of the number.  Typically strtol does
-     this anyway, so we might as well be consistent.  */
    p = SSDATA (string);
    while (*p == ' ' || *p == '\t')
      p++;
  
-  if (b == 10)
-    {
-      Lisp_Object val = string_to_float (p, 1);
-      if (FLOATP (val))
-       return val;
-    }
-
-  n = strtol (p, NULL, b);
-  if (FIXNUM_OVERFLOW_P (n))
-    xsignal (Qoverflow_error, list1 (string));
-  return make_number (n);
+  val = string_to_number (p, b, 1);
+  return NILP (val) ? make_number (0) : val;
  }
-
  \f
  enum arithop
    {
diff --git a/src/lisp.h b/src/lisp.h

index 8d333a3..5bace90 100644 (file)
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2782,7 +2782,7 @@ extern Lisp_Object oblookup (Lisp_Object, const char *, EMACS_INT, EMACS_INT);
    } while (0)
  extern int openp (Lisp_Object, Lisp_Object, Lisp_Object,
                    Lisp_Object *, Lisp_Object);
-Lisp_Object string_to_float (char const *, int);
+Lisp_Object string_to_number (char const *, int, int);
  extern void map_obarray (Lisp_Object, void (*) (Lisp_Object, Lisp_Object),
                           Lisp_Object);
  extern void dir_warning (const char *, Lisp_Object);
diff --git a/src/lread.c b/src/lread.c

index a872929..390c57d 100644 (file)
--- a/src/lread.c
+++ b/src/lread.c
@@ -3005,32 +3005,8 @@ read1 (register Lisp_Object readcharfun, int *pch, int first_in_list)
  
         if (!quoted && !uninterned_symbol)
           {
-           register char *p1;
-           Lisp_Object result;
-           p1 = read_buffer;
-           if (*p1 == '+' || *p1 == '-') p1++;
-           /* Is it an integer? */
-           if ('0' <= *p1 && *p1 <= '9')
-             {
-               do
-                 p1++;
-               while ('0' <= *p1 && *p1 <= '9');
-
-               /* Integers can have trailing decimal points.  */
-               p1 += (*p1 == '.');
-               if (p1 == p)
-                 {
-                   /* It is an integer. */
-                   EMACS_INT n = strtol (read_buffer, NULL, 10);
-                   if (FIXNUM_OVERFLOW_P (n))
-                     xsignal (Qoverflow_error,
-                              list1 (build_string (read_buffer)));
-                   return make_number (n);
-                 }
-             }
-
-           result = string_to_float (read_buffer, 0);
-           if (FLOATP (result))
+           Lisp_Object result = string_to_number (read_buffer, 10, 0);
+           if (! NILP (result))
               return result;
           }
         {
@@ -3189,23 +3165,44 @@ substitute_in_interval (INTERVAL interval, Lisp_Object arg)
  }
  
  \f
+static inline int
+digit_to_number (int character, int base)
+{
+  int digit;
+
+  if ('0' <= character && character <= '9')
+    digit = character - '0';
+  else if ('a' <= character && character <= 'z')
+    digit = character - 'a' + 10;
+  else if ('A' <= character && character <= 'Z')
+    digit = character - 'A' + 10;
+  else
+    return -1;
+
+  return digit < base ? digit : -1;
+}
+
  #define LEAD_INT 1
  #define DOT_CHAR 2
  #define TRAIL_INT 4
-#define E_CHAR 8
-#define EXP_INT 16
+#define E_EXP 16
  
  
-/* Convert CP to a floating point number.  Return a non-float value if CP does
-   not have valid floating point syntax.  If IGNORE_TRAILING is nonzero,
-   consider just the longest prefix of CP that has valid floating point
-   syntax.  */
+/* Convert STRING to a number, assuming base BASE.  Return a fixnum if CP has
+   integer syntax and fits in a fixnum, else return the nearest float if CP has
+   either floating point or integer syntax and BASE is 10, else return nil.  If
+   IGNORE_TRAILING is nonzero, consider just the longest prefix of CP that has
+   valid floating point syntax.  Signal an overflow if BASE is not 10 and the
+   number has integer syntax but does not fit.  */
  
  Lisp_Object
-string_to_float (char const *cp, int ignore_trailing)
+string_to_number (char const *string, int base, int ignore_trailing)
  {
    int state;
-  const char *start = cp;
+  char const *cp = string;
+  int leading_digit;
+  int float_syntax = 0;
+  double value = 0;
  
    /* Compute NaN and infinities using a variable, to cope with compilers that
       think they are smarter than we are.  */
@@ -3216,88 +3213,137 @@ string_to_float (char const *cp, int ignore_trailing)
       atof ("-0.0") drops the sign.  */
    int negative = *cp == '-';
  
-  double value = 0;
+  int signedp = negative || *cp == '+';
+  cp += signedp;
  
    state = 0;
-  if (negative || *cp == '+')
-    cp++;
  
-  if (*cp >= '0' && *cp <= '9')
+  leading_digit = digit_to_number (*cp, base);
+  if (0 <= leading_digit)
      {
        state |= LEAD_INT;
-      while (*cp >= '0' && *cp <= '9')
-       cp++;
+      do
+       ++cp;
+      while (0 <= digit_to_number (*cp, base));
      }
+
    if (*cp == '.')
      {
        state |= DOT_CHAR;
        cp++;
      }
-  if (*cp >= '0' && *cp <= '9')
-    {
-      state |= TRAIL_INT;
-      while (*cp >= '0' && *cp <= '9')
-       cp++;
-    }
-  if (*cp == 'e' || *cp == 'E')
-    {
-      state |= E_CHAR;
-      cp++;
-      if (*cp == '+' || *cp == '-')
-       cp++;
-    }
  
-  if (*cp >= '0' && *cp <= '9')
+  if (base == 10)
      {
-      state |= EXP_INT;
-      while (*cp >= '0' && *cp <= '9')
-       cp++;
-    }
-  else if (cp == start)
-    ;
-  else if (cp[-1] == '+' && cp[0] == 'I' && cp[1] == 'N' && cp[2] == 'F')
-    {
-      state |= EXP_INT;
-      cp += 3;
-      value = 1.0 / zero;
+      if ('0' <= *cp && *cp <= '9')
+       {
+         state |= TRAIL_INT;
+         do
+           cp++;
+         while ('0' <= *cp && *cp <= '9');
+       }
+      if (*cp == 'e' || *cp == 'E')
+       {
+         char const *ecp = cp;
+         cp++;
+         if (*cp == '+' || *cp == '-')
+           cp++;
+         if ('0' <= *cp && *cp <= '9')
+           {
+             state |= E_EXP;
+             do
+               cp++;
+             while ('0' <= *cp && *cp <= '9');
+           }
+         else if (cp[-1] == '+'
+                  && cp[0] == 'I' && cp[1] == 'N' && cp[2] == 'F')
+           {
+             state |= E_EXP;
+             cp += 3;
+             value = 1.0 / zero;
+           }
+         else if (cp[-1] == '+'
+                  && cp[0] == 'N' && cp[1] == 'a' && cp[2] == 'N')
+           {
+             state |= E_EXP;
+             cp += 3;
+             value = zero / zero;
+
+             /* If that made a "negative" NaN, negate it.  */
+             {
+               int i;
+               union { double d; char c[sizeof (double)]; }
+                 u_data, u_minus_zero;
+               u_data.d = value;
+               u_minus_zero.d = -0.0;
+               for (i = 0; i < sizeof (double); i++)
+                 if (u_data.c[i] & u_minus_zero.c[i])
+                   {
+                     value = -value;
+                     break;
+                   }
+             }
+             /* Now VALUE is a positive NaN.  */
+           }
+         else
+           cp = ecp;
+       }
+
+      float_syntax = ((state & (DOT_CHAR|TRAIL_INT)) == (DOT_CHAR|TRAIL_INT)
+                     || state == (LEAD_INT|E_EXP));
      }
-  else if (cp[-1] == '+' && cp[0] == 'N' && cp[1] == 'a' && cp[2] == 'N')
-    {
-      state |= EXP_INT;
-      cp += 3;
-      value = zero / zero;
  
-      /* If that made a "negative" NaN, negate it.  */
-      {
-       int i;
-       union { double d; char c[sizeof (double)]; } u_data, u_minus_zero;
+  /* Return nil if the number uses invalid syntax.  If IGNORE_TRAILING, accept
+     any prefix that matches.  Otherwise, the entire string must match.  */
+  if (! (ignore_trailing
+        ? ((state & LEAD_INT) != 0 || float_syntax)
+        : (!*cp && ((state & ~DOT_CHAR) == LEAD_INT || float_syntax))))
+    return Qnil;
  
-       u_data.d = value;
-       u_minus_zero.d = - 0.0;
-       for (i = 0; i < sizeof (double); i++)
-         if (u_data.c[i] & u_minus_zero.c[i])
+  /* If the number does not use float syntax, and fits into a fixnum, return
+     the fixnum.  */
+  if (0 <= leading_digit && ! float_syntax)
+    {
+      /* Convert string to EMACS_INT.  Do not use strtol, to avoid assuming
+        that EMACS_INT is no wider than 'long', and because when BASE is 16
+        strtol might accept numbers like "0x1" that are not allowed here.  */
+      EMACS_INT n = leading_digit;
+      EMACS_INT abs_bound =
+       (negative ? -MOST_NEGATIVE_FIXNUM : MOST_POSITIVE_FIXNUM);
+      EMACS_INT abs_bound_over_base = abs_bound / base;
+
+      for (cp = string + signedp + 1; ; cp++)
+       {
+         int d = digit_to_number (*cp, base);
+         if (d < 0)
             {
-             value = - value;
+             if (n <= abs_bound)
+               return make_number (negative ? -n : n);
               break;
             }
-      }
-      /* Now VALUE is a positive NaN.  */
-    }
+         if (abs_bound_over_base < n)
+           break;
+         n = base * n + d;
+       }
  
-  if (! (state == (LEAD_INT|DOT_CHAR|TRAIL_INT)
-        || state == (DOT_CHAR|TRAIL_INT)
-        || state == (LEAD_INT|E_CHAR|EXP_INT)
-        || state == (LEAD_INT|DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)
-        || state == (DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)))
-    return make_number (0); /* Any non-float value will do.  */
+      /* Unfortunately there's no simple and reliable way to convert
+        non-base-10 to floating point.  */
+      if (base != 10)
+       xsignal (Qoverflow_error, list1 (build_string (string)));
+    }
  
+  /* Either the number uses float syntax, or it does not fit into a fixnum.
+     Convert it from string to floating point, unless the value is already
+     known because it is an infinity or a NAN.  */
    if (! value)
-    value = atof (start + negative);
+    value = atof (string + signedp);
+
    if (negative)
-    value = - value;
+    value = -value;
    return make_float (value);
  }
  
+
  \f
  static Lisp_Object
  read_vector (Lisp_Object readcharfun, int bytecodeflag)
author	Paul Eggert <eggert@cs.ucla.edu>
	Thu, 21 Apr 2011 06:03:09 +0000 (23:03 -0700)
committer	Paul Eggert <eggert@cs.ucla.edu>
	Thu, 21 Apr 2011 06:03:09 +0000 (23:03 -0700)
src/ChangeLog		patch \| blob \| blame \| history
src/data.c		patch \| blob \| blame \| history
src/lisp.h		patch \| blob \| blame \| history
src/lread.c		patch \| blob \| blame \| history