From 3cc5a5328c43317b12a7163c4e1c0a56d85b93ce Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Thu, 7 Jul 2011 17:51:25 -0700 Subject: [PATCH] Improve hashing quality when configured --with-wide-int. * fns.c (hash_string): New function, taken from sxhash_string. Do not discard information about ASCII character case; this discarding is no longer needed. (sxhash-string): Use it. Change sig to match it. Caller changed. * lisp.h: Declare it. * lread.c (hash_string): Remove, since we now use fns.c's version. The fns.c version returns a wider integer if --with-wide-int is specified, so this should help the quality of the hashing a bit. --- src/ChangeLog | 12 ++++++++++++ src/fns.c | 26 +++++++++++++++++--------- src/lisp.h | 1 + src/lread.c | 19 ------------------- 4 files changed, 30 insertions(+), 28 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 0265828c60..aaf87deb9a 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,15 @@ +2011-07-08 Paul Eggert + + Improve hashing quality when configured --with-wide-int. + * fns.c (hash_string): New function, taken from sxhash_string. + Do not discard information about ASCII character case; this + discarding is no longer needed. + (sxhash-string): Use it. Change sig to match it. Caller changed. + * lisp.h: Declare it. + * lread.c (hash_string): Remove, since we now use fns.c's version. + The fns.c version returns a wider integer if --with-wide-int is + specified, so this should help the quality of the hashing a bit. + 2011-07-07 Paul Eggert * emacs.c: Integer overflow minor fix. diff --git a/src/fns.c b/src/fns.c index 0ca731ed33..9c9d19fe26 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4098,25 +4098,33 @@ sweep_weak_hash_tables (void) #define SXHASH_REDUCE(X) \ ((((X) ^ (X) >> (BITS_PER_EMACS_INT - FIXNUM_BITS))) & INTMASK) -/* Return a hash for string PTR which has length LEN. The hash - code returned is guaranteed to fit in a Lisp integer. */ +/* Return a hash for string PTR which has length LEN. The hash value + can be any EMACS_UINT value. */ -static EMACS_UINT -sxhash_string (unsigned char *ptr, EMACS_INT len) +EMACS_UINT +hash_string (char const *ptr, ptrdiff_t len) { - unsigned char *p = ptr; - unsigned char *end = p + len; + char const *p = ptr; + char const *end = p + len; unsigned char c; EMACS_UINT hash = 0; while (p != end) { c = *p++; - if (c >= 0140) - c -= 40; hash = SXHASH_COMBINE (hash, c); } + return hash; +} + +/* Return a hash for string PTR which has length LEN. The hash + code returned is guaranteed to fit in a Lisp integer. */ + +static EMACS_UINT +sxhash_string (char const *ptr, ptrdiff_t len) +{ + EMACS_UINT hash = hash_string (ptr, len); return SXHASH_REDUCE (hash); } @@ -4231,7 +4239,7 @@ sxhash (Lisp_Object obj, int depth) /* Fall through. */ case Lisp_String: - hash = sxhash_string (SDATA (obj), SCHARS (obj)); + hash = sxhash_string (SSDATA (obj), SBYTES (obj)); break; /* This can be everything from a vector to an overlay. */ diff --git a/src/lisp.h b/src/lisp.h index 257c204e3b..1e141dbb5d 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -2557,6 +2557,7 @@ extern void sweep_weak_hash_tables (void); extern Lisp_Object Qcursor_in_echo_area; extern Lisp_Object Qstring_lessp; extern Lisp_Object QCsize, QCtest, QCweakness, Qequal, Qeq, Qeql; +EMACS_UINT hash_string (char const *, ptrdiff_t); EMACS_UINT sxhash (Lisp_Object, int); Lisp_Object make_hash_table (Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, diff --git a/src/lread.c b/src/lread.c index a9b69a1977..6a97be2be4 100644 --- a/src/lread.c +++ b/src/lread.c @@ -3647,8 +3647,6 @@ static Lisp_Object initial_obarray; static size_t oblookup_last_bucket_number; -static size_t hash_string (const char *ptr, size_t len); - /* Get an error if OBARRAY is not an obarray. If it is one, return it. */ @@ -3891,23 +3889,6 @@ oblookup (Lisp_Object obarray, register const char *ptr, EMACS_INT size, EMACS_I XSETINT (tem, hash); return tem; } - -static size_t -hash_string (const char *ptr, size_t len) -{ - register const char *p = ptr; - register const char *end = p + len; - register unsigned char c; - register size_t hash = 0; - - while (p != end) - { - c = *p++; - if (c >= 0140) c -= 40; - hash = (hash << 3) + (hash >> (CHAR_BIT * sizeof hash - 4)) + c; - } - return hash; -} void map_obarray (Lisp_Object obarray, void (*fn) (Lisp_Object, Lisp_Object), Lisp_Object arg) -- 2.20.1