Commit | Line | Data |
---|---|---|
8ac870de LC |
1 | /* Copyright (C) 1995, 1996, 1997, 2000, 2001, 2003, 2004, 2006, 2008, |
2 | * 2009, 2010, 2011, 2012 Free Software Foundation, Inc. | |
0f2d19dd | 3 | * |
73be1d9e | 4 | * This library is free software; you can redistribute it and/or |
53befeb7 NJ |
5 | * modify it under the terms of the GNU Lesser General Public License |
6 | * as published by the Free Software Foundation; either version 3 of | |
7 | * the License, or (at your option) any later version. | |
0f2d19dd | 8 | * |
53befeb7 NJ |
9 | * This library is distributed in the hope that it will be useful, but |
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
73be1d9e MV |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | * Lesser General Public License for more details. | |
0f2d19dd | 13 | * |
73be1d9e MV |
14 | * You should have received a copy of the GNU Lesser General Public |
15 | * License along with this library; if not, write to the Free Software | |
53befeb7 NJ |
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
17 | * 02110-1301 USA | |
73be1d9e | 18 | */ |
1bbd0b84 | 19 | |
1bbd0b84 | 20 | |
0f2d19dd | 21 | \f |
dbb605f5 LC |
22 | #ifdef HAVE_CONFIG_H |
23 | # include <config.h> | |
24 | #endif | |
0f2d19dd | 25 | |
62241538 AW |
26 | #ifdef HAVE_WCHAR_H |
27 | #include <wchar.h> | |
28 | #endif | |
29 | ||
10483f9e | 30 | #include <math.h> |
62241538 AW |
31 | #include <unistr.h> |
32 | ||
a0599745 MD |
33 | #include "libguile/_scm.h" |
34 | #include "libguile/chars.h" | |
35 | #include "libguile/ports.h" | |
a002f1a2 DH |
36 | #include "libguile/strings.h" |
37 | #include "libguile/symbols.h" | |
a0599745 | 38 | #include "libguile/vectors.h" |
0f2d19dd | 39 | |
a0599745 MD |
40 | #include "libguile/validate.h" |
41 | #include "libguile/hash.h" | |
0f2d19dd JB |
42 | \f |
43 | ||
44 | #ifndef floor | |
45 | extern double floor(); | |
46 | #endif | |
47 | ||
1cc91f1b | 48 | |
1a04d29d AW |
49 | /* This hash function is originally from |
50 | http://burtleburtle.net/bob/c/lookup3.c by Bob Jenkins, May 2006, | |
51 | Public Domain. No warranty. */ | |
52 | ||
53 | #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) | |
54 | #define mix(a,b,c) \ | |
55 | { \ | |
56 | a -= c; a ^= rot(c, 4); c += b; \ | |
57 | b -= a; b ^= rot(a, 6); a += c; \ | |
58 | c -= b; c ^= rot(b, 8); b += a; \ | |
59 | a -= c; a ^= rot(c,16); c += b; \ | |
60 | b -= a; b ^= rot(a,19); a += c; \ | |
61 | c -= b; c ^= rot(b, 4); b += a; \ | |
62 | } | |
63 | ||
64 | #define final(a,b,c) \ | |
65 | { \ | |
66 | c ^= b; c -= rot(b,14); \ | |
67 | a ^= c; a -= rot(c,11); \ | |
68 | b ^= a; b -= rot(a,25); \ | |
69 | c ^= b; c -= rot(b,16); \ | |
70 | a ^= c; a -= rot(c,4); \ | |
71 | b ^= a; b -= rot(a,14); \ | |
72 | c ^= b; c -= rot(b,24); \ | |
73 | } | |
74 | ||
75 | #define JENKINS_LOOKUP3_HASHWORD2(k, length, ret) \ | |
76 | do { \ | |
77 | scm_t_uint32 a, b, c; \ | |
78 | \ | |
79 | /* Set up the internal state. */ \ | |
80 | a = b = c = 0xdeadbeef + ((scm_t_uint32)(length<<2)) + 47; \ | |
81 | \ | |
82 | /* Handle most of the key. */ \ | |
83 | while (length > 3) \ | |
84 | { \ | |
85 | a += k[0]; \ | |
86 | b += k[1]; \ | |
87 | c += k[2]; \ | |
88 | mix (a, b, c); \ | |
89 | length -= 3; \ | |
90 | k += 3; \ | |
91 | } \ | |
92 | \ | |
93 | /* Handle the last 3 elements. */ \ | |
94 | switch(length) /* All the case statements fall through. */ \ | |
95 | { \ | |
96 | case 3 : c += k[2]; \ | |
97 | case 2 : b += k[1]; \ | |
98 | case 1 : a += k[0]; \ | |
99 | final (a, b, c); \ | |
100 | case 0: /* case 0: nothing left to add */ \ | |
101 | break; \ | |
102 | } \ | |
103 | \ | |
104 | if (sizeof (ret) == 8) \ | |
105 | ret = (((unsigned long) c) << 32) | b; \ | |
106 | else \ | |
107 | ret = c; \ | |
108 | } while (0) | |
109 | ||
110 | ||
111 | static unsigned long | |
112 | narrow_string_hash (const scm_t_uint8 *str, size_t len) | |
113 | { | |
114 | unsigned long ret; | |
115 | JENKINS_LOOKUP3_HASHWORD2 (str, len, ret); | |
116 | ret >>= 2; /* Ensure that it fits in a fixnum. */ | |
117 | return ret; | |
118 | } | |
119 | ||
120 | static unsigned long | |
121 | wide_string_hash (const scm_t_wchar *str, size_t len) | |
122 | { | |
123 | unsigned long ret; | |
124 | JENKINS_LOOKUP3_HASHWORD2 (str, len, ret); | |
125 | ret >>= 2; /* Ensure that it fits in a fixnum. */ | |
126 | return ret; | |
127 | } | |
128 | ||
c014a02e | 129 | unsigned long |
1be6b49c | 130 | scm_string_hash (const unsigned char *str, size_t len) |
ba393257 | 131 | { |
1a04d29d | 132 | return narrow_string_hash (str, len); |
ba393257 DH |
133 | } |
134 | ||
e23106d5 MG |
135 | unsigned long |
136 | scm_i_string_hash (SCM str) | |
137 | { | |
138 | size_t len = scm_i_string_length (str); | |
e23106d5 | 139 | |
1a04d29d AW |
140 | if (scm_i_is_narrow_string (str)) |
141 | return narrow_string_hash ((const scm_t_uint8 *) scm_i_string_chars (str), | |
142 | len); | |
143 | else | |
144 | return wide_string_hash (scm_i_string_wide_chars (str), len); | |
e23106d5 MG |
145 | } |
146 | ||
62241538 AW |
147 | unsigned long |
148 | scm_i_locale_string_hash (const char *str, size_t len) | |
149 | { | |
62241538 | 150 | return scm_i_string_hash (scm_from_locale_stringn (str, len)); |
62241538 AW |
151 | } |
152 | ||
153 | unsigned long | |
154 | scm_i_latin1_string_hash (const char *str, size_t len) | |
155 | { | |
62241538 AW |
156 | if (len == (size_t) -1) |
157 | len = strlen (str); | |
158 | ||
1a04d29d | 159 | return narrow_string_hash ((const scm_t_uint8 *) str, len); |
62241538 AW |
160 | } |
161 | ||
1a04d29d | 162 | /* A tricky optimization, but probably worth it. */ |
62241538 AW |
163 | unsigned long |
164 | scm_i_utf8_string_hash (const char *str, size_t len) | |
165 | { | |
1a04d29d AW |
166 | const scm_t_uint8 *end, *ustr = (const scm_t_uint8 *) str; |
167 | unsigned long ret; | |
168 | ||
169 | /* The length of the string in characters. This name corresponds to | |
170 | Jenkins' original name. */ | |
171 | size_t length; | |
172 | ||
173 | scm_t_uint32 a, b, c, u32; | |
174 | ||
62241538 AW |
175 | if (len == (size_t) -1) |
176 | len = strlen (str); | |
177 | ||
1a04d29d AW |
178 | end = ustr + len; |
179 | ||
180 | if (u8_check (ustr, len) != NULL) | |
181 | /* Invalid UTF-8; punt. */ | |
182 | return scm_i_string_hash (scm_from_utf8_stringn (str, len)); | |
183 | ||
184 | length = u8_strnlen (ustr, len); | |
185 | ||
186 | /* Set up the internal state. */ | |
187 | a = b = c = 0xdeadbeef + ((scm_t_uint32)(length<<2)) + 47; | |
188 | ||
189 | /* Handle most of the key. */ | |
190 | while (length > 3) | |
62241538 | 191 | { |
1a04d29d AW |
192 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); |
193 | a += u32; | |
194 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
195 | b += u32; | |
196 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
197 | c += u32; | |
198 | mix (a, b, c); | |
199 | length -= 3; | |
62241538 AW |
200 | } |
201 | ||
1a04d29d AW |
202 | /* Handle the last 3 elements's. */ |
203 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
204 | a += u32; | |
205 | if (--length) | |
206 | { | |
207 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
208 | b += u32; | |
209 | if (--length) | |
210 | { | |
211 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
212 | c += u32; | |
213 | } | |
214 | } | |
215 | ||
216 | final (a, b, c); | |
217 | ||
218 | if (sizeof (unsigned long) == 8) | |
219 | ret = (((unsigned long) c) << 32) | b; | |
220 | else | |
221 | ret = c; | |
222 | ||
223 | ret >>= 2; /* Ensure that it fits in a fixnum. */ | |
224 | return ret; | |
62241538 AW |
225 | } |
226 | ||
fa980bcc MW |
227 | static unsigned long scm_raw_ihashq (scm_t_bits key); |
228 | static unsigned long scm_raw_ihash (SCM obj, size_t depth); | |
229 | ||
230 | /* Return the hash of struct OBJ. Traverse OBJ's fields to compute the | |
231 | result, unless DEPTH is zero. Assumes that OBJ is a struct. */ | |
232 | static unsigned long | |
233 | scm_i_struct_hash (SCM obj, size_t depth) | |
234 | { | |
235 | SCM layout; | |
236 | scm_t_bits *data; | |
237 | size_t struct_size, field_num; | |
238 | unsigned long hash; | |
239 | ||
240 | layout = SCM_STRUCT_LAYOUT (obj); | |
241 | struct_size = scm_i_symbol_length (layout) / 2; | |
242 | data = SCM_STRUCT_DATA (obj); | |
243 | ||
244 | hash = scm_raw_ihashq (SCM_UNPACK (SCM_STRUCT_VTABLE (obj))); | |
245 | if (depth > 0) | |
246 | for (field_num = 0; field_num < struct_size; field_num++) | |
247 | { | |
248 | int protection; | |
249 | ||
250 | protection = scm_i_symbol_ref (layout, field_num * 2 + 1); | |
251 | if (protection != 'h' && protection != 'o') | |
252 | { | |
253 | int type; | |
254 | type = scm_i_symbol_ref (layout, field_num * 2); | |
255 | switch (type) | |
256 | { | |
257 | case 'p': | |
258 | hash ^= scm_raw_ihash (SCM_PACK (data[field_num]), | |
259 | depth / 2); | |
260 | break; | |
261 | case 'u': | |
262 | hash ^= scm_raw_ihashq (data[field_num]); | |
263 | break; | |
264 | default: | |
265 | /* Ignore 's' fields. */; | |
266 | } | |
267 | } | |
268 | } | |
ba393257 | 269 | |
fa980bcc | 270 | /* FIXME: Tail elements should be taken into account. */ |
dba97178 | 271 | |
fa980bcc MW |
272 | return hash; |
273 | } | |
ba393257 | 274 | |
71f89dd7 AW |
275 | /* Thomas Wang's integer hasher, from |
276 | http://www.cris.com/~Ttwang/tech/inthash.htm. */ | |
277 | static unsigned long | |
278 | scm_raw_ihashq (scm_t_bits key) | |
279 | { | |
280 | if (sizeof (key) < 8) | |
281 | { | |
282 | key = (key ^ 61) ^ (key >> 16); | |
283 | key = key + (key << 3); | |
284 | key = key ^ (key >> 4); | |
285 | key = key * 0x27d4eb2d; | |
286 | key = key ^ (key >> 15); | |
287 | } | |
288 | else | |
289 | { | |
290 | key = (~key) + (key << 21); // key = (key << 21) - key - 1; | |
291 | key = key ^ (key >> 24); | |
292 | key = (key + (key << 3)) + (key << 8); // key * 265 | |
293 | key = key ^ (key >> 14); | |
294 | key = (key + (key << 2)) + (key << 4); // key * 21 | |
295 | key = key ^ (key >> 28); | |
296 | key = key + (key << 31); | |
297 | } | |
298 | key >>= 2; /* Ensure that it fits in a fixnum. */ | |
299 | return key; | |
300 | } | |
301 | ||
9d013330 | 302 | /* `depth' is used to limit recursion. */ |
d1d1c5de | 303 | static unsigned long |
9d013330 | 304 | scm_raw_ihash (SCM obj, size_t depth) |
0f2d19dd | 305 | { |
9d013330 AW |
306 | if (SCM_IMP (obj)) |
307 | return scm_raw_ihashq (SCM_UNPACK (obj)); | |
308 | ||
309 | switch (SCM_TYP7(obj)) | |
310 | { | |
311 | /* FIXME: do better for structs, variables, ... Also the hashes | |
312 | are currently associative, which ain't the right thing. */ | |
0f2d19dd | 313 | case scm_tc7_smob: |
9d013330 | 314 | return scm_raw_ihashq (SCM_TYP16 (obj)); |
534c55a9 | 315 | case scm_tc7_number: |
9d013330 AW |
316 | if (scm_is_integer (obj)) |
317 | { | |
318 | SCM n = SCM_I_MAKINUM (SCM_MOST_POSITIVE_FIXNUM); | |
319 | if (scm_is_inexact (obj)) | |
320 | obj = scm_inexact_to_exact (obj); | |
321 | return scm_raw_ihashq (scm_to_ulong (scm_modulo (obj, n))); | |
322 | } | |
323 | else | |
324 | return scm_i_string_hash (scm_number_to_string (obj, scm_from_int (10))); | |
0f2d19dd | 325 | case scm_tc7_string: |
9d013330 | 326 | return scm_i_string_hash (obj); |
28b06554 | 327 | case scm_tc7_symbol: |
9d013330 | 328 | return scm_i_symbol_hash (obj); |
3854d5fd | 329 | case scm_tc7_pointer: |
9d013330 | 330 | return scm_raw_ihashq ((scm_t_uintptr) SCM_POINTER_VALUE (obj)); |
0f2d19dd JB |
331 | case scm_tc7_wvect: |
332 | case scm_tc7_vector: | |
333 | { | |
4057a3e0 | 334 | size_t len = SCM_SIMPLE_VECTOR_LENGTH (obj); |
9d013330 AW |
335 | size_t i = depth / 2; |
336 | unsigned long h = scm_raw_ihashq (SCM_CELL_WORD_0 (obj)); | |
549333ef AW |
337 | if (len) |
338 | while (i--) | |
339 | h ^= scm_raw_ihash (scm_c_vector_ref (obj, h % len), i); | |
9d013330 | 340 | return h; |
0f2d19dd | 341 | } |
94a5efac GB |
342 | case scm_tcs_cons_imcar: |
343 | case scm_tcs_cons_nimcar: | |
9d013330 AW |
344 | if (depth) |
345 | return (scm_raw_ihash (SCM_CAR (obj), depth / 2) | |
346 | ^ scm_raw_ihash (SCM_CDR (obj), depth / 2)); | |
347 | else | |
348 | return scm_raw_ihashq (scm_tc3_cons); | |
fa980bcc MW |
349 | case scm_tcs_struct: |
350 | return scm_i_struct_hash (obj, depth); | |
9d013330 AW |
351 | default: |
352 | return scm_raw_ihashq (SCM_CELL_WORD_0 (obj)); | |
0f2d19dd | 353 | } |
0f2d19dd JB |
354 | } |
355 | ||
356 | ||
357 | \f | |
358 | ||
c014a02e ML |
359 | unsigned long |
360 | scm_ihashq (SCM obj, unsigned long n) | |
0f2d19dd | 361 | { |
71f89dd7 | 362 | return scm_raw_ihashq (SCM_UNPACK (obj)) % n; |
0f2d19dd JB |
363 | } |
364 | ||
365 | ||
3b3b36dd | 366 | SCM_DEFINE (scm_hashq, "hashq", 2, 0, 0, |
94a5efac | 367 | (SCM key, SCM size), |
5352393c MG |
368 | "Determine a hash value for @var{key} that is suitable for\n" |
369 | "lookups in a hashtable of size @var{size}, where @code{eq?} is\n" | |
370 | "used as the equality predicate. The function returns an\n" | |
371 | "integer in the range 0 to @var{size} - 1. Note that\n" | |
372 | "@code{hashq} may use internal addresses. Thus two calls to\n" | |
373 | "hashq where the keys are @code{eq?} are not guaranteed to\n" | |
374 | "deliver the same value if the key object gets garbage collected\n" | |
375 | "in between. This can happen, for example with symbols:\n" | |
376 | "@code{(hashq 'foo n) (gc) (hashq 'foo n)} may produce two\n" | |
377 | "different values, since @code{foo} will be garbage collected.") | |
1bbd0b84 | 378 | #define FUNC_NAME s_scm_hashq |
0f2d19dd | 379 | { |
a55c2b68 MV |
380 | unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); |
381 | return scm_from_ulong (scm_ihashq (key, sz)); | |
0f2d19dd | 382 | } |
1bbd0b84 | 383 | #undef FUNC_NAME |
0f2d19dd JB |
384 | |
385 | ||
386 | \f | |
387 | ||
1cc91f1b | 388 | |
c014a02e ML |
389 | unsigned long |
390 | scm_ihashv (SCM obj, unsigned long n) | |
0f2d19dd | 391 | { |
0c95b57d | 392 | if (SCM_NUMP(obj)) |
9d013330 | 393 | return scm_raw_ihash (obj, 10) % n; |
0f2d19dd | 394 | else |
71f89dd7 | 395 | return scm_raw_ihashq (SCM_UNPACK (obj)) % n; |
0f2d19dd JB |
396 | } |
397 | ||
398 | ||
3b3b36dd | 399 | SCM_DEFINE (scm_hashv, "hashv", 2, 0, 0, |
94a5efac | 400 | (SCM key, SCM size), |
5352393c MG |
401 | "Determine a hash value for @var{key} that is suitable for\n" |
402 | "lookups in a hashtable of size @var{size}, where @code{eqv?} is\n" | |
403 | "used as the equality predicate. The function returns an\n" | |
404 | "integer in the range 0 to @var{size} - 1. Note that\n" | |
405 | "@code{(hashv key)} may use internal addresses. Thus two calls\n" | |
406 | "to hashv where the keys are @code{eqv?} are not guaranteed to\n" | |
407 | "deliver the same value if the key object gets garbage collected\n" | |
408 | "in between. This can happen, for example with symbols:\n" | |
409 | "@code{(hashv 'foo n) (gc) (hashv 'foo n)} may produce two\n" | |
410 | "different values, since @code{foo} will be garbage collected.") | |
1bbd0b84 | 411 | #define FUNC_NAME s_scm_hashv |
0f2d19dd | 412 | { |
a55c2b68 MV |
413 | unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); |
414 | return scm_from_ulong (scm_ihashv (key, sz)); | |
0f2d19dd | 415 | } |
1bbd0b84 | 416 | #undef FUNC_NAME |
0f2d19dd JB |
417 | |
418 | ||
419 | \f | |
420 | ||
1cc91f1b | 421 | |
c014a02e ML |
422 | unsigned long |
423 | scm_ihash (SCM obj, unsigned long n) | |
0f2d19dd | 424 | { |
9d013330 | 425 | return (unsigned long) scm_raw_ihash (obj, 10) % n; |
0f2d19dd JB |
426 | } |
427 | ||
3b3b36dd | 428 | SCM_DEFINE (scm_hash, "hash", 2, 0, 0, |
94a5efac | 429 | (SCM key, SCM size), |
5352393c MG |
430 | "Determine a hash value for @var{key} that is suitable for\n" |
431 | "lookups in a hashtable of size @var{size}, where @code{equal?}\n" | |
432 | "is used as the equality predicate. The function returns an\n" | |
433 | "integer in the range 0 to @var{size} - 1.") | |
1bbd0b84 | 434 | #define FUNC_NAME s_scm_hash |
0f2d19dd | 435 | { |
a55c2b68 MV |
436 | unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); |
437 | return scm_from_ulong (scm_ihash (key, sz)); | |
0f2d19dd | 438 | } |
1bbd0b84 | 439 | #undef FUNC_NAME |
0f2d19dd JB |
440 | |
441 | ||
442 | \f | |
443 | ||
1cc91f1b | 444 | |
0f2d19dd JB |
445 | void |
446 | scm_init_hash () | |
0f2d19dd | 447 | { |
a0599745 | 448 | #include "libguile/hash.x" |
0f2d19dd JB |
449 | } |
450 | ||
89e00824 ML |
451 | |
452 | /* | |
453 | Local Variables: | |
454 | c-file-style: "gnu" | |
455 | End: | |
456 | */ |