1 /* Copyright (C) 1995, 1996, 1997, 2000, 2001, 2003, 2004, 2006, 2008,
2 * 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
33 #include "libguile/_scm.h"
34 #include "libguile/chars.h"
35 #include "libguile/ports.h"
36 #include "libguile/strings.h"
37 #include "libguile/symbols.h"
38 #include "libguile/vectors.h"
40 #include "libguile/validate.h"
41 #include "libguile/hash.h"
45 extern double floor();
49 /* This hash function is originally from
50 http://burtleburtle.net/bob/c/lookup3.c by Bob Jenkins, May 2006,
51 Public Domain. No warranty. */
53 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
56 a -= c; a ^= rot(c, 4); c += b; \
57 b -= a; b ^= rot(a, 6); a += c; \
58 c -= b; c ^= rot(b, 8); b += a; \
59 a -= c; a ^= rot(c,16); c += b; \
60 b -= a; b ^= rot(a,19); a += c; \
61 c -= b; c ^= rot(b, 4); b += a; \
64 #define final(a,b,c) \
66 c ^= b; c -= rot(b,14); \
67 a ^= c; a -= rot(c,11); \
68 b ^= a; b -= rot(a,25); \
69 c ^= b; c -= rot(b,16); \
70 a ^= c; a -= rot(c,4); \
71 b ^= a; b -= rot(a,14); \
72 c ^= b; c -= rot(b,24); \
75 #define JENKINS_LOOKUP3_HASHWORD2(k, length, ret) \
77 scm_t_uint32 a, b, c; \
79 /* Set up the internal state. */ \
80 a = b = c = 0xdeadbeef + ((scm_t_uint32)(length<<2)) + 47; \
82 /* Handle most of the key. */ \
93 /* Handle the last 3 elements. */ \
94 switch(length) /* All the case statements fall through. */ \
100 case 0: /* case 0: nothing left to add */ \
104 if (sizeof (ret) == 8) \
105 ret = (((unsigned long) c) << 32) | b; \
112 narrow_string_hash (const scm_t_uint8
*str
, size_t len
)
115 JENKINS_LOOKUP3_HASHWORD2 (str
, len
, ret
);
116 ret
>>= 2; /* Ensure that it fits in a fixnum. */
121 wide_string_hash (const scm_t_wchar
*str
, size_t len
)
124 JENKINS_LOOKUP3_HASHWORD2 (str
, len
, ret
);
125 ret
>>= 2; /* Ensure that it fits in a fixnum. */
130 scm_string_hash (const unsigned char *str
, size_t len
)
132 return narrow_string_hash (str
, len
);
136 scm_i_string_hash (SCM str
)
138 size_t len
= scm_i_string_length (str
);
140 if (scm_i_is_narrow_string (str
))
141 return narrow_string_hash ((const scm_t_uint8
*) scm_i_string_chars (str
),
144 return wide_string_hash (scm_i_string_wide_chars (str
), len
);
148 scm_i_locale_string_hash (const char *str
, size_t len
)
150 return scm_i_string_hash (scm_from_locale_stringn (str
, len
));
154 scm_i_latin1_string_hash (const char *str
, size_t len
)
156 if (len
== (size_t) -1)
159 return narrow_string_hash ((const scm_t_uint8
*) str
, len
);
162 /* A tricky optimization, but probably worth it. */
164 scm_i_utf8_string_hash (const char *str
, size_t len
)
166 const scm_t_uint8
*end
, *ustr
= (const scm_t_uint8
*) str
;
169 /* The length of the string in characters. This name corresponds to
170 Jenkins' original name. */
173 scm_t_uint32 a
, b
, c
, u32
;
175 if (len
== (size_t) -1)
180 if (u8_check (ustr
, len
) != NULL
)
181 /* Invalid UTF-8; punt. */
182 return scm_i_string_hash (scm_from_utf8_stringn (str
, len
));
184 length
= u8_strnlen (ustr
, len
);
186 /* Set up the internal state. */
187 a
= b
= c
= 0xdeadbeef + ((scm_t_uint32
)(length
<<2)) + 47;
189 /* Handle most of the key. */
192 ustr
+= u8_mbtouc_unsafe (&u32
, ustr
, end
- ustr
);
194 ustr
+= u8_mbtouc_unsafe (&u32
, ustr
, end
- ustr
);
196 ustr
+= u8_mbtouc_unsafe (&u32
, ustr
, end
- ustr
);
202 /* Handle the last 3 elements's. */
203 ustr
+= u8_mbtouc_unsafe (&u32
, ustr
, end
- ustr
);
207 ustr
+= u8_mbtouc_unsafe (&u32
, ustr
, end
- ustr
);
211 ustr
+= u8_mbtouc_unsafe (&u32
, ustr
, end
- ustr
);
218 if (sizeof (unsigned long) == 8)
219 ret
= (((unsigned long) c
) << 32) | b
;
223 ret
>>= 2; /* Ensure that it fits in a fixnum. */
227 static unsigned long scm_raw_ihashq (scm_t_bits key
);
228 static unsigned long scm_raw_ihash (SCM obj
, size_t depth
);
230 /* Return the hash of struct OBJ. Traverse OBJ's fields to compute the
231 result, unless DEPTH is zero. Assumes that OBJ is a struct. */
233 scm_i_struct_hash (SCM obj
, size_t depth
)
237 size_t struct_size
, field_num
;
240 layout
= SCM_STRUCT_LAYOUT (obj
);
241 struct_size
= scm_i_symbol_length (layout
) / 2;
242 data
= SCM_STRUCT_DATA (obj
);
244 hash
= scm_raw_ihashq (SCM_UNPACK (SCM_STRUCT_VTABLE (obj
)));
246 for (field_num
= 0; field_num
< struct_size
; field_num
++)
250 protection
= scm_i_symbol_ref (layout
, field_num
* 2 + 1);
251 if (protection
!= 'h' && protection
!= 'o')
254 type
= scm_i_symbol_ref (layout
, field_num
* 2);
258 hash
^= scm_raw_ihash (SCM_PACK (data
[field_num
]),
262 hash
^= scm_raw_ihashq (data
[field_num
]);
265 /* Ignore 's' fields. */;
270 /* FIXME: Tail elements should be taken into account. */
275 /* Thomas Wang's integer hasher, from
276 http://www.cris.com/~Ttwang/tech/inthash.htm. */
278 scm_raw_ihashq (scm_t_bits key
)
280 if (sizeof (key
) < 8)
282 key
= (key
^ 61) ^ (key
>> 16);
283 key
= key
+ (key
<< 3);
284 key
= key
^ (key
>> 4);
285 key
= key
* 0x27d4eb2d;
286 key
= key
^ (key
>> 15);
290 key
= (~key
) + (key
<< 21); // key = (key << 21) - key - 1;
291 key
= key
^ (key
>> 24);
292 key
= (key
+ (key
<< 3)) + (key
<< 8); // key * 265
293 key
= key
^ (key
>> 14);
294 key
= (key
+ (key
<< 2)) + (key
<< 4); // key * 21
295 key
= key
^ (key
>> 28);
296 key
= key
+ (key
<< 31);
298 key
>>= 2; /* Ensure that it fits in a fixnum. */
302 /* `depth' is used to limit recursion. */
304 scm_raw_ihash (SCM obj
, size_t depth
)
307 return scm_raw_ihashq (SCM_UNPACK (obj
));
309 switch (SCM_TYP7(obj
))
311 /* FIXME: do better for structs, variables, ... Also the hashes
312 are currently associative, which ain't the right thing. */
314 return scm_raw_ihashq (SCM_TYP16 (obj
));
316 if (scm_is_integer (obj
))
318 SCM n
= SCM_I_MAKINUM (SCM_MOST_POSITIVE_FIXNUM
);
319 if (scm_is_inexact (obj
))
320 obj
= scm_inexact_to_exact (obj
);
321 return scm_raw_ihashq (scm_to_ulong (scm_modulo (obj
, n
)));
324 return scm_i_string_hash (scm_number_to_string (obj
, scm_from_int (10)));
326 return scm_i_string_hash (obj
);
328 return scm_i_symbol_hash (obj
);
329 case scm_tc7_pointer
:
330 return scm_raw_ihashq ((scm_t_uintptr
) SCM_POINTER_VALUE (obj
));
334 size_t len
= SCM_SIMPLE_VECTOR_LENGTH (obj
);
335 size_t i
= depth
/ 2;
336 unsigned long h
= scm_raw_ihashq (SCM_CELL_WORD_0 (obj
));
339 h
^= scm_raw_ihash (scm_c_vector_ref (obj
, h
% len
), i
);
342 case scm_tcs_cons_imcar
:
343 case scm_tcs_cons_nimcar
:
345 return (scm_raw_ihash (SCM_CAR (obj
), depth
/ 2)
346 ^ scm_raw_ihash (SCM_CDR (obj
), depth
/ 2));
348 return scm_raw_ihashq (scm_tc3_cons
);
350 return scm_i_struct_hash (obj
, depth
);
352 return scm_raw_ihashq (SCM_CELL_WORD_0 (obj
));
360 scm_ihashq (SCM obj
, unsigned long n
)
362 return scm_raw_ihashq (SCM_UNPACK (obj
)) % n
;
366 SCM_DEFINE (scm_hashq
, "hashq", 2, 0, 0,
368 "Determine a hash value for @var{key} that is suitable for\n"
369 "lookups in a hashtable of size @var{size}, where @code{eq?} is\n"
370 "used as the equality predicate. The function returns an\n"
371 "integer in the range 0 to @var{size} - 1. Note that\n"
372 "@code{hashq} may use internal addresses. Thus two calls to\n"
373 "hashq where the keys are @code{eq?} are not guaranteed to\n"
374 "deliver the same value if the key object gets garbage collected\n"
375 "in between. This can happen, for example with symbols:\n"
376 "@code{(hashq 'foo n) (gc) (hashq 'foo n)} may produce two\n"
377 "different values, since @code{foo} will be garbage collected.")
378 #define FUNC_NAME s_scm_hashq
380 unsigned long sz
= scm_to_unsigned_integer (size
, 1, ULONG_MAX
);
381 return scm_from_ulong (scm_ihashq (key
, sz
));
390 scm_ihashv (SCM obj
, unsigned long n
)
393 return scm_raw_ihash (obj
, 10) % n
;
395 return scm_raw_ihashq (SCM_UNPACK (obj
)) % n
;
399 SCM_DEFINE (scm_hashv
, "hashv", 2, 0, 0,
401 "Determine a hash value for @var{key} that is suitable for\n"
402 "lookups in a hashtable of size @var{size}, where @code{eqv?} is\n"
403 "used as the equality predicate. The function returns an\n"
404 "integer in the range 0 to @var{size} - 1. Note that\n"
405 "@code{(hashv key)} may use internal addresses. Thus two calls\n"
406 "to hashv where the keys are @code{eqv?} are not guaranteed to\n"
407 "deliver the same value if the key object gets garbage collected\n"
408 "in between. This can happen, for example with symbols:\n"
409 "@code{(hashv 'foo n) (gc) (hashv 'foo n)} may produce two\n"
410 "different values, since @code{foo} will be garbage collected.")
411 #define FUNC_NAME s_scm_hashv
413 unsigned long sz
= scm_to_unsigned_integer (size
, 1, ULONG_MAX
);
414 return scm_from_ulong (scm_ihashv (key
, sz
));
423 scm_ihash (SCM obj
, unsigned long n
)
425 return (unsigned long) scm_raw_ihash (obj
, 10) % n
;
428 SCM_DEFINE (scm_hash
, "hash", 2, 0, 0,
430 "Determine a hash value for @var{key} that is suitable for\n"
431 "lookups in a hashtable of size @var{size}, where @code{equal?}\n"
432 "is used as the equality predicate. The function returns an\n"
433 "integer in the range 0 to @var{size} - 1.")
434 #define FUNC_NAME s_scm_hash
436 unsigned long sz
= scm_to_unsigned_integer (size
, 1, ULONG_MAX
);
437 return scm_from_ulong (scm_ihash (key
, sz
));
448 #include "libguile/hash.x"