From: Paul Eggert Date: Thu, 21 Apr 2011 06:03:09 +0000 (-0700) Subject: Treat large integers as floats in the Lisp reader and in string-to-number. X-Git-Url: http://git.hcoop.net/bpt/emacs.git/commitdiff_plain/452f4150134e4ba7bbd2bad9ce87d19c200505de?hp=6703b2e490339a624bb83c9543f1e51ede26b52b Treat large integers as floats in the Lisp reader and in string-to-number. --- diff --git a/src/ChangeLog b/src/ChangeLog index 7eaa153f79..2b9978f3d6 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,32 @@ +2011-04-21 Paul Eggert + + Make the Lisp reader and string-to-float more consistent. + * data.c (atof): Remove decl; no longer used or needed. + (digit_to_number): Move to lread.c. + (Fstring_to_number): Use new string_to_number function, to be + consistent with how the Lisp reader treats infinities and NaNs. + Do not assume that floating-point numbers represent EMACS_INT + without losing information; this is not true on most 64-bit hosts. + Avoid double-rounding errors, by insisting on integers when + parsing non-base-10 numbers, as the documentation specifies. + * lisp.h (string_to_number): New decl, replacing ... + (isfloat_string): Remove. + * lread.c (read1): Do not accept +. and -. as integers; this + appears to have been a coding error. Similarly, do not accept + strings like +-1e0 as floating point numbers. Do not report + overflow for integer overflows unless the base is not 10 which + means we have no simple and reliable way to continue. + Break out the floating-point parsing into a new + function string_to_number, so that Fstring_to_number parses + floating point numbers consistently with the Lisp reader. + (digit_to_number): Moved here from data.c. Make it static inline. + (E_CHAR, EXP_INT): Remove, replacing with ... + (E_EXP): New macro, to solve the "1.0e+" problem mentioned below. + (string_to_number): New function, replacing isfloat_string. + This function checks for valid syntax and produces the resulting + Lisp float number too. Rework it so that string-to-number + no longer mishandles examples like "1.0e+". + 2011-04-20 Paul Eggert * textprop.c (set_text_properties_1): Rewrite for clarity, @@ -15,29 +44,6 @@ * alloc.c (overrun_check_malloc, overrun_check_realloc): Now static. (overrun_check_free): Likewise. - Make the Lisp reader and string-to-float more consistent. - * data.c (atof): Remove decl; no longer used or needed. - (Fstring_to_number): Use new string_to_float function, to be - consistent with how the Lisp reader treats infinities and NaNs. - Do not assume that floating-point numbers represent EMACS_INT - without losing information; this is not true on most 64-bit hosts. - Avoid double-rounding errors, by insisting on integers when - parsing non-base-10 numbers, as the documentation specifies. - Report integer overflow instead of silently converting to - integers. - * lisp.h (string_to_float): New decl, replacing ... - (isfloat_string): Remove. - * lread.c (read1): Do not accept +. and -. as integers; this - appears to have been a coding error. Similarly, do not accept - strings like +-1e0 as floating point numbers. Do not report - overflow for some integer overflows and not others; instead, - report them all. Break out the floating-point parsing into a new - function string_to_float, so that Fstring_to_number parses - floating point numbers consistently with the Lisp reader. - (string_to_float): New function, replacing isfloat_string. - This function checks for valid syntax and produces the resulting - Lisp float number too. - * alloc.c (SDATA_SIZE) [!GC_CHECK_STRING_BYTES]: Avoid runtime check in the common case where SDATA_DATA_OFFSET is a multiple of Emacs word size. diff --git a/src/data.c b/src/data.c index 486816cac7..4e81c80d0e 100644 --- a/src/data.c +++ b/src/data.c @@ -2374,26 +2374,6 @@ NUMBER may be an integer or a floating point number. */) return build_string (buffer); } -INLINE static int -digit_to_number (int character, int base) -{ - int digit; - - if (character >= '0' && character <= '9') - digit = character - '0'; - else if (character >= 'a' && character <= 'z') - digit = character - 'a' + 10; - else if (character >= 'A' && character <= 'Z') - digit = character - 'A' + 10; - else - return -1; - - if (digit >= base) - return -1; - else - return digit; -} - DEFUN ("string-to-number", Fstring_to_number, Sstring_to_number, 1, 2, 0, doc: /* Parse STRING as a decimal number and return the number. This parses both integers and floating point numbers. @@ -2406,7 +2386,7 @@ If the base used is not 10, STRING is always parsed as integer. */) { register char *p; register int b; - EMACS_INT n; + Lisp_Object val; CHECK_STRING (string); @@ -2420,25 +2400,13 @@ If the base used is not 10, STRING is always parsed as integer. */) xsignal1 (Qargs_out_of_range, base); } - /* Skip any whitespace at the front of the number. Typically strtol does - this anyway, so we might as well be consistent. */ p = SSDATA (string); while (*p == ' ' || *p == '\t') p++; - if (b == 10) - { - Lisp_Object val = string_to_float (p, 1); - if (FLOATP (val)) - return val; - } - - n = strtol (p, NULL, b); - if (FIXNUM_OVERFLOW_P (n)) - xsignal (Qoverflow_error, list1 (string)); - return make_number (n); + val = string_to_number (p, b, 1); + return NILP (val) ? make_number (0) : val; } - enum arithop { diff --git a/src/lisp.h b/src/lisp.h index 8d333a3999..5bace90e53 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -2782,7 +2782,7 @@ extern Lisp_Object oblookup (Lisp_Object, const char *, EMACS_INT, EMACS_INT); } while (0) extern int openp (Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object *, Lisp_Object); -Lisp_Object string_to_float (char const *, int); +Lisp_Object string_to_number (char const *, int, int); extern void map_obarray (Lisp_Object, void (*) (Lisp_Object, Lisp_Object), Lisp_Object); extern void dir_warning (const char *, Lisp_Object); diff --git a/src/lread.c b/src/lread.c index a872929e08..390c57d167 100644 --- a/src/lread.c +++ b/src/lread.c @@ -3005,32 +3005,8 @@ read1 (register Lisp_Object readcharfun, int *pch, int first_in_list) if (!quoted && !uninterned_symbol) { - register char *p1; - Lisp_Object result; - p1 = read_buffer; - if (*p1 == '+' || *p1 == '-') p1++; - /* Is it an integer? */ - if ('0' <= *p1 && *p1 <= '9') - { - do - p1++; - while ('0' <= *p1 && *p1 <= '9'); - - /* Integers can have trailing decimal points. */ - p1 += (*p1 == '.'); - if (p1 == p) - { - /* It is an integer. */ - EMACS_INT n = strtol (read_buffer, NULL, 10); - if (FIXNUM_OVERFLOW_P (n)) - xsignal (Qoverflow_error, - list1 (build_string (read_buffer))); - return make_number (n); - } - } - - result = string_to_float (read_buffer, 0); - if (FLOATP (result)) + Lisp_Object result = string_to_number (read_buffer, 10, 0); + if (! NILP (result)) return result; } { @@ -3189,23 +3165,44 @@ substitute_in_interval (INTERVAL interval, Lisp_Object arg) } +static inline int +digit_to_number (int character, int base) +{ + int digit; + + if ('0' <= character && character <= '9') + digit = character - '0'; + else if ('a' <= character && character <= 'z') + digit = character - 'a' + 10; + else if ('A' <= character && character <= 'Z') + digit = character - 'A' + 10; + else + return -1; + + return digit < base ? digit : -1; +} + #define LEAD_INT 1 #define DOT_CHAR 2 #define TRAIL_INT 4 -#define E_CHAR 8 -#define EXP_INT 16 +#define E_EXP 16 -/* Convert CP to a floating point number. Return a non-float value if CP does - not have valid floating point syntax. If IGNORE_TRAILING is nonzero, - consider just the longest prefix of CP that has valid floating point - syntax. */ +/* Convert STRING to a number, assuming base BASE. Return a fixnum if CP has + integer syntax and fits in a fixnum, else return the nearest float if CP has + either floating point or integer syntax and BASE is 10, else return nil. If + IGNORE_TRAILING is nonzero, consider just the longest prefix of CP that has + valid floating point syntax. Signal an overflow if BASE is not 10 and the + number has integer syntax but does not fit. */ Lisp_Object -string_to_float (char const *cp, int ignore_trailing) +string_to_number (char const *string, int base, int ignore_trailing) { int state; - const char *start = cp; + char const *cp = string; + int leading_digit; + int float_syntax = 0; + double value = 0; /* Compute NaN and infinities using a variable, to cope with compilers that think they are smarter than we are. */ @@ -3216,88 +3213,137 @@ string_to_float (char const *cp, int ignore_trailing) atof ("-0.0") drops the sign. */ int negative = *cp == '-'; - double value = 0; + int signedp = negative || *cp == '+'; + cp += signedp; state = 0; - if (negative || *cp == '+') - cp++; - if (*cp >= '0' && *cp <= '9') + leading_digit = digit_to_number (*cp, base); + if (0 <= leading_digit) { state |= LEAD_INT; - while (*cp >= '0' && *cp <= '9') - cp++; + do + ++cp; + while (0 <= digit_to_number (*cp, base)); } + if (*cp == '.') { state |= DOT_CHAR; cp++; } - if (*cp >= '0' && *cp <= '9') - { - state |= TRAIL_INT; - while (*cp >= '0' && *cp <= '9') - cp++; - } - if (*cp == 'e' || *cp == 'E') - { - state |= E_CHAR; - cp++; - if (*cp == '+' || *cp == '-') - cp++; - } - if (*cp >= '0' && *cp <= '9') + if (base == 10) { - state |= EXP_INT; - while (*cp >= '0' && *cp <= '9') - cp++; - } - else if (cp == start) - ; - else if (cp[-1] == '+' && cp[0] == 'I' && cp[1] == 'N' && cp[2] == 'F') - { - state |= EXP_INT; - cp += 3; - value = 1.0 / zero; + if ('0' <= *cp && *cp <= '9') + { + state |= TRAIL_INT; + do + cp++; + while ('0' <= *cp && *cp <= '9'); + } + if (*cp == 'e' || *cp == 'E') + { + char const *ecp = cp; + cp++; + if (*cp == '+' || *cp == '-') + cp++; + if ('0' <= *cp && *cp <= '9') + { + state |= E_EXP; + do + cp++; + while ('0' <= *cp && *cp <= '9'); + } + else if (cp[-1] == '+' + && cp[0] == 'I' && cp[1] == 'N' && cp[2] == 'F') + { + state |= E_EXP; + cp += 3; + value = 1.0 / zero; + } + else if (cp[-1] == '+' + && cp[0] == 'N' && cp[1] == 'a' && cp[2] == 'N') + { + state |= E_EXP; + cp += 3; + value = zero / zero; + + /* If that made a "negative" NaN, negate it. */ + { + int i; + union { double d; char c[sizeof (double)]; } + u_data, u_minus_zero; + u_data.d = value; + u_minus_zero.d = -0.0; + for (i = 0; i < sizeof (double); i++) + if (u_data.c[i] & u_minus_zero.c[i]) + { + value = -value; + break; + } + } + /* Now VALUE is a positive NaN. */ + } + else + cp = ecp; + } + + float_syntax = ((state & (DOT_CHAR|TRAIL_INT)) == (DOT_CHAR|TRAIL_INT) + || state == (LEAD_INT|E_EXP)); } - else if (cp[-1] == '+' && cp[0] == 'N' && cp[1] == 'a' && cp[2] == 'N') - { - state |= EXP_INT; - cp += 3; - value = zero / zero; - /* If that made a "negative" NaN, negate it. */ - { - int i; - union { double d; char c[sizeof (double)]; } u_data, u_minus_zero; + /* Return nil if the number uses invalid syntax. If IGNORE_TRAILING, accept + any prefix that matches. Otherwise, the entire string must match. */ + if (! (ignore_trailing + ? ((state & LEAD_INT) != 0 || float_syntax) + : (!*cp && ((state & ~DOT_CHAR) == LEAD_INT || float_syntax)))) + return Qnil; - u_data.d = value; - u_minus_zero.d = - 0.0; - for (i = 0; i < sizeof (double); i++) - if (u_data.c[i] & u_minus_zero.c[i]) + /* If the number does not use float syntax, and fits into a fixnum, return + the fixnum. */ + if (0 <= leading_digit && ! float_syntax) + { + /* Convert string to EMACS_INT. Do not use strtol, to avoid assuming + that EMACS_INT is no wider than 'long', and because when BASE is 16 + strtol might accept numbers like "0x1" that are not allowed here. */ + EMACS_INT n = leading_digit; + EMACS_INT abs_bound = + (negative ? -MOST_NEGATIVE_FIXNUM : MOST_POSITIVE_FIXNUM); + EMACS_INT abs_bound_over_base = abs_bound / base; + + for (cp = string + signedp + 1; ; cp++) + { + int d = digit_to_number (*cp, base); + if (d < 0) { - value = - value; + if (n <= abs_bound) + return make_number (negative ? -n : n); break; } - } - /* Now VALUE is a positive NaN. */ - } + if (abs_bound_over_base < n) + break; + n = base * n + d; + } - if (! (state == (LEAD_INT|DOT_CHAR|TRAIL_INT) - || state == (DOT_CHAR|TRAIL_INT) - || state == (LEAD_INT|E_CHAR|EXP_INT) - || state == (LEAD_INT|DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT) - || state == (DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT))) - return make_number (0); /* Any non-float value will do. */ + /* Unfortunately there's no simple and reliable way to convert + non-base-10 to floating point. */ + if (base != 10) + xsignal (Qoverflow_error, list1 (build_string (string))); + } + /* Either the number uses float syntax, or it does not fit into a fixnum. + Convert it from string to floating point, unless the value is already + known because it is an infinity or a NAN. */ if (! value) - value = atof (start + negative); + value = atof (string + signedp); + if (negative) - value = - value; + value = -value; return make_float (value); } + static Lisp_Object read_vector (Lisp_Object readcharfun, int bytecodeflag)