Commit | Line | Data |
---|---|---|
8ac870de | 1 | /* Copyright (C) 1995, 1996, 1997, 2000, 2001, 2003, 2004, 2006, 2008, |
894d0b89 | 2 | * 2009, 2010, 2011, 2012, 2014, 2015 Free Software Foundation, Inc. |
8ac870de | 3 | * |
73be1d9e | 4 | * This library is free software; you can redistribute it and/or |
53befeb7 NJ |
5 | * modify it under the terms of the GNU Lesser General Public License |
6 | * as published by the Free Software Foundation; either version 3 of | |
7 | * the License, or (at your option) any later version. | |
0f2d19dd | 8 | * |
53befeb7 NJ |
9 | * This library is distributed in the hope that it will be useful, but |
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
73be1d9e MV |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | * Lesser General Public License for more details. | |
0f2d19dd | 13 | * |
73be1d9e MV |
14 | * You should have received a copy of the GNU Lesser General Public |
15 | * License along with this library; if not, write to the Free Software | |
53befeb7 NJ |
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
17 | * 02110-1301 USA | |
73be1d9e | 18 | */ |
1bbd0b84 | 19 | |
1bbd0b84 | 20 | |
0f2d19dd | 21 | \f |
dbb605f5 LC |
22 | #ifdef HAVE_CONFIG_H |
23 | # include <config.h> | |
24 | #endif | |
0f2d19dd | 25 | |
62241538 AW |
26 | #ifdef HAVE_WCHAR_H |
27 | #include <wchar.h> | |
28 | #endif | |
29 | ||
10483f9e | 30 | #include <math.h> |
62241538 AW |
31 | #include <unistr.h> |
32 | ||
a0599745 MD |
33 | #include "libguile/_scm.h" |
34 | #include "libguile/chars.h" | |
35 | #include "libguile/ports.h" | |
a002f1a2 DH |
36 | #include "libguile/strings.h" |
37 | #include "libguile/symbols.h" | |
a0599745 | 38 | #include "libguile/vectors.h" |
0f2d19dd | 39 | |
a0599745 MD |
40 | #include "libguile/validate.h" |
41 | #include "libguile/hash.h" | |
0f2d19dd JB |
42 | \f |
43 | ||
44 | #ifndef floor | |
45 | extern double floor(); | |
46 | #endif | |
47 | ||
1cc91f1b | 48 | |
1a04d29d AW |
49 | /* This hash function is originally from |
50 | http://burtleburtle.net/bob/c/lookup3.c by Bob Jenkins, May 2006, | |
51 | Public Domain. No warranty. */ | |
52 | ||
53 | #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) | |
54 | #define mix(a,b,c) \ | |
55 | { \ | |
56 | a -= c; a ^= rot(c, 4); c += b; \ | |
57 | b -= a; b ^= rot(a, 6); a += c; \ | |
58 | c -= b; c ^= rot(b, 8); b += a; \ | |
59 | a -= c; a ^= rot(c,16); c += b; \ | |
60 | b -= a; b ^= rot(a,19); a += c; \ | |
61 | c -= b; c ^= rot(b, 4); b += a; \ | |
62 | } | |
63 | ||
64 | #define final(a,b,c) \ | |
65 | { \ | |
66 | c ^= b; c -= rot(b,14); \ | |
67 | a ^= c; a -= rot(c,11); \ | |
68 | b ^= a; b -= rot(a,25); \ | |
69 | c ^= b; c -= rot(b,16); \ | |
70 | a ^= c; a -= rot(c,4); \ | |
71 | b ^= a; b -= rot(a,14); \ | |
72 | c ^= b; c -= rot(b,24); \ | |
73 | } | |
74 | ||
75 | #define JENKINS_LOOKUP3_HASHWORD2(k, length, ret) \ | |
76 | do { \ | |
77 | scm_t_uint32 a, b, c; \ | |
78 | \ | |
79 | /* Set up the internal state. */ \ | |
80 | a = b = c = 0xdeadbeef + ((scm_t_uint32)(length<<2)) + 47; \ | |
81 | \ | |
82 | /* Handle most of the key. */ \ | |
83 | while (length > 3) \ | |
84 | { \ | |
85 | a += k[0]; \ | |
86 | b += k[1]; \ | |
87 | c += k[2]; \ | |
88 | mix (a, b, c); \ | |
89 | length -= 3; \ | |
90 | k += 3; \ | |
91 | } \ | |
92 | \ | |
93 | /* Handle the last 3 elements. */ \ | |
94 | switch(length) /* All the case statements fall through. */ \ | |
95 | { \ | |
96 | case 3 : c += k[2]; \ | |
97 | case 2 : b += k[1]; \ | |
98 | case 1 : a += k[0]; \ | |
99 | final (a, b, c); \ | |
100 | case 0: /* case 0: nothing left to add */ \ | |
101 | break; \ | |
102 | } \ | |
103 | \ | |
104 | if (sizeof (ret) == 8) \ | |
105 | ret = (((unsigned long) c) << 32) | b; \ | |
106 | else \ | |
107 | ret = c; \ | |
108 | } while (0) | |
109 | ||
110 | ||
111 | static unsigned long | |
112 | narrow_string_hash (const scm_t_uint8 *str, size_t len) | |
113 | { | |
114 | unsigned long ret; | |
115 | JENKINS_LOOKUP3_HASHWORD2 (str, len, ret); | |
116 | ret >>= 2; /* Ensure that it fits in a fixnum. */ | |
117 | return ret; | |
118 | } | |
119 | ||
120 | static unsigned long | |
121 | wide_string_hash (const scm_t_wchar *str, size_t len) | |
122 | { | |
123 | unsigned long ret; | |
124 | JENKINS_LOOKUP3_HASHWORD2 (str, len, ret); | |
125 | ret >>= 2; /* Ensure that it fits in a fixnum. */ | |
126 | return ret; | |
127 | } | |
128 | ||
01a301d1 | 129 | unsigned long |
e23106d5 MG |
130 | scm_i_string_hash (SCM str) |
131 | { | |
132 | size_t len = scm_i_string_length (str); | |
e23106d5 | 133 | |
1a04d29d AW |
134 | if (scm_i_is_narrow_string (str)) |
135 | return narrow_string_hash ((const scm_t_uint8 *) scm_i_string_chars (str), | |
136 | len); | |
137 | else | |
138 | return wide_string_hash (scm_i_string_wide_chars (str), len); | |
e23106d5 MG |
139 | } |
140 | ||
62241538 AW |
141 | unsigned long |
142 | scm_i_locale_string_hash (const char *str, size_t len) | |
143 | { | |
62241538 | 144 | return scm_i_string_hash (scm_from_locale_stringn (str, len)); |
62241538 AW |
145 | } |
146 | ||
147 | unsigned long | |
148 | scm_i_latin1_string_hash (const char *str, size_t len) | |
149 | { | |
62241538 AW |
150 | if (len == (size_t) -1) |
151 | len = strlen (str); | |
152 | ||
1a04d29d | 153 | return narrow_string_hash ((const scm_t_uint8 *) str, len); |
62241538 AW |
154 | } |
155 | ||
1a04d29d | 156 | /* A tricky optimization, but probably worth it. */ |
62241538 AW |
157 | unsigned long |
158 | scm_i_utf8_string_hash (const char *str, size_t len) | |
159 | { | |
1a04d29d AW |
160 | const scm_t_uint8 *end, *ustr = (const scm_t_uint8 *) str; |
161 | unsigned long ret; | |
162 | ||
163 | /* The length of the string in characters. This name corresponds to | |
164 | Jenkins' original name. */ | |
165 | size_t length; | |
166 | ||
167 | scm_t_uint32 a, b, c, u32; | |
168 | ||
62241538 AW |
169 | if (len == (size_t) -1) |
170 | len = strlen (str); | |
171 | ||
1a04d29d AW |
172 | end = ustr + len; |
173 | ||
174 | if (u8_check (ustr, len) != NULL) | |
175 | /* Invalid UTF-8; punt. */ | |
176 | return scm_i_string_hash (scm_from_utf8_stringn (str, len)); | |
177 | ||
178 | length = u8_strnlen (ustr, len); | |
179 | ||
180 | /* Set up the internal state. */ | |
181 | a = b = c = 0xdeadbeef + ((scm_t_uint32)(length<<2)) + 47; | |
182 | ||
183 | /* Handle most of the key. */ | |
184 | while (length > 3) | |
62241538 | 185 | { |
1a04d29d AW |
186 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); |
187 | a += u32; | |
188 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
189 | b += u32; | |
190 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
191 | c += u32; | |
192 | mix (a, b, c); | |
193 | length -= 3; | |
62241538 AW |
194 | } |
195 | ||
1a04d29d AW |
196 | /* Handle the last 3 elements's. */ |
197 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
198 | a += u32; | |
199 | if (--length) | |
200 | { | |
201 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
202 | b += u32; | |
203 | if (--length) | |
204 | { | |
205 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
206 | c += u32; | |
207 | } | |
208 | } | |
209 | ||
210 | final (a, b, c); | |
211 | ||
212 | if (sizeof (unsigned long) == 8) | |
213 | ret = (((unsigned long) c) << 32) | b; | |
214 | else | |
215 | ret = c; | |
216 | ||
217 | ret >>= 2; /* Ensure that it fits in a fixnum. */ | |
218 | return ret; | |
62241538 AW |
219 | } |
220 | ||
fa980bcc MW |
221 | static unsigned long scm_raw_ihashq (scm_t_bits key); |
222 | static unsigned long scm_raw_ihash (SCM obj, size_t depth); | |
223 | ||
224 | /* Return the hash of struct OBJ. Traverse OBJ's fields to compute the | |
225 | result, unless DEPTH is zero. Assumes that OBJ is a struct. */ | |
226 | static unsigned long | |
227 | scm_i_struct_hash (SCM obj, size_t depth) | |
228 | { | |
229 | SCM layout; | |
230 | scm_t_bits *data; | |
231 | size_t struct_size, field_num; | |
232 | unsigned long hash; | |
233 | ||
234 | layout = SCM_STRUCT_LAYOUT (obj); | |
235 | struct_size = scm_i_symbol_length (layout) / 2; | |
236 | data = SCM_STRUCT_DATA (obj); | |
237 | ||
238 | hash = scm_raw_ihashq (SCM_UNPACK (SCM_STRUCT_VTABLE (obj))); | |
239 | if (depth > 0) | |
240 | for (field_num = 0; field_num < struct_size; field_num++) | |
241 | { | |
242 | int protection; | |
243 | ||
244 | protection = scm_i_symbol_ref (layout, field_num * 2 + 1); | |
245 | if (protection != 'h' && protection != 'o') | |
246 | { | |
247 | int type; | |
248 | type = scm_i_symbol_ref (layout, field_num * 2); | |
249 | switch (type) | |
250 | { | |
251 | case 'p': | |
252 | hash ^= scm_raw_ihash (SCM_PACK (data[field_num]), | |
253 | depth / 2); | |
254 | break; | |
255 | case 'u': | |
256 | hash ^= scm_raw_ihashq (data[field_num]); | |
257 | break; | |
258 | default: | |
259 | /* Ignore 's' fields. */; | |
260 | } | |
261 | } | |
262 | } | |
ba393257 | 263 | |
fa980bcc | 264 | /* FIXME: Tail elements should be taken into account. */ |
dba97178 | 265 | |
fa980bcc MW |
266 | return hash; |
267 | } | |
ba393257 | 268 | |
71f89dd7 AW |
269 | /* Thomas Wang's integer hasher, from |
270 | http://www.cris.com/~Ttwang/tech/inthash.htm. */ | |
271 | static unsigned long | |
272 | scm_raw_ihashq (scm_t_bits key) | |
273 | { | |
274 | if (sizeof (key) < 8) | |
275 | { | |
276 | key = (key ^ 61) ^ (key >> 16); | |
277 | key = key + (key << 3); | |
278 | key = key ^ (key >> 4); | |
279 | key = key * 0x27d4eb2d; | |
280 | key = key ^ (key >> 15); | |
281 | } | |
282 | else | |
283 | { | |
284 | key = (~key) + (key << 21); // key = (key << 21) - key - 1; | |
285 | key = key ^ (key >> 24); | |
286 | key = (key + (key << 3)) + (key << 8); // key * 265 | |
287 | key = key ^ (key >> 14); | |
288 | key = (key + (key << 2)) + (key << 4); // key * 21 | |
289 | key = key ^ (key >> 28); | |
290 | key = key + (key << 31); | |
291 | } | |
292 | key >>= 2; /* Ensure that it fits in a fixnum. */ | |
293 | return key; | |
294 | } | |
295 | ||
9d013330 | 296 | /* `depth' is used to limit recursion. */ |
d1d1c5de | 297 | static unsigned long |
9d013330 | 298 | scm_raw_ihash (SCM obj, size_t depth) |
0f2d19dd | 299 | { |
9d013330 AW |
300 | if (SCM_IMP (obj)) |
301 | return scm_raw_ihashq (SCM_UNPACK (obj)); | |
302 | ||
303 | switch (SCM_TYP7(obj)) | |
304 | { | |
305 | /* FIXME: do better for structs, variables, ... Also the hashes | |
306 | are currently associative, which ain't the right thing. */ | |
0f2d19dd | 307 | case scm_tc7_smob: |
9d013330 | 308 | return scm_raw_ihashq (SCM_TYP16 (obj)); |
534c55a9 | 309 | case scm_tc7_number: |
9d013330 AW |
310 | if (scm_is_integer (obj)) |
311 | { | |
312 | SCM n = SCM_I_MAKINUM (SCM_MOST_POSITIVE_FIXNUM); | |
313 | if (scm_is_inexact (obj)) | |
314 | obj = scm_inexact_to_exact (obj); | |
315 | return scm_raw_ihashq (scm_to_ulong (scm_modulo (obj, n))); | |
316 | } | |
317 | else | |
318 | return scm_i_string_hash (scm_number_to_string (obj, scm_from_int (10))); | |
0f2d19dd | 319 | case scm_tc7_string: |
9d013330 | 320 | return scm_i_string_hash (obj); |
28b06554 | 321 | case scm_tc7_symbol: |
9d013330 | 322 | return scm_i_symbol_hash (obj); |
3854d5fd | 323 | case scm_tc7_pointer: |
9d013330 | 324 | return scm_raw_ihashq ((scm_t_uintptr) SCM_POINTER_VALUE (obj)); |
0f2d19dd JB |
325 | case scm_tc7_wvect: |
326 | case scm_tc7_vector: | |
327 | { | |
4057a3e0 | 328 | size_t len = SCM_SIMPLE_VECTOR_LENGTH (obj); |
9d013330 AW |
329 | size_t i = depth / 2; |
330 | unsigned long h = scm_raw_ihashq (SCM_CELL_WORD_0 (obj)); | |
549333ef AW |
331 | if (len) |
332 | while (i--) | |
333 | h ^= scm_raw_ihash (scm_c_vector_ref (obj, h % len), i); | |
9d013330 | 334 | return h; |
0f2d19dd | 335 | } |
94a5efac GB |
336 | case scm_tcs_cons_imcar: |
337 | case scm_tcs_cons_nimcar: | |
9d013330 AW |
338 | if (depth) |
339 | return (scm_raw_ihash (SCM_CAR (obj), depth / 2) | |
340 | ^ scm_raw_ihash (SCM_CDR (obj), depth / 2)); | |
341 | else | |
342 | return scm_raw_ihashq (scm_tc3_cons); | |
fa980bcc MW |
343 | case scm_tcs_struct: |
344 | return scm_i_struct_hash (obj, depth); | |
9d013330 AW |
345 | default: |
346 | return scm_raw_ihashq (SCM_CELL_WORD_0 (obj)); | |
0f2d19dd | 347 | } |
0f2d19dd JB |
348 | } |
349 | ||
350 | ||
351 | \f | |
352 | ||
c014a02e ML |
353 | unsigned long |
354 | scm_ihashq (SCM obj, unsigned long n) | |
0f2d19dd | 355 | { |
71f89dd7 | 356 | return scm_raw_ihashq (SCM_UNPACK (obj)) % n; |
0f2d19dd JB |
357 | } |
358 | ||
359 | ||
3b3b36dd | 360 | SCM_DEFINE (scm_hashq, "hashq", 2, 0, 0, |
94a5efac | 361 | (SCM key, SCM size), |
5352393c MG |
362 | "Determine a hash value for @var{key} that is suitable for\n" |
363 | "lookups in a hashtable of size @var{size}, where @code{eq?} is\n" | |
364 | "used as the equality predicate. The function returns an\n" | |
365 | "integer in the range 0 to @var{size} - 1. Note that\n" | |
366 | "@code{hashq} may use internal addresses. Thus two calls to\n" | |
367 | "hashq where the keys are @code{eq?} are not guaranteed to\n" | |
368 | "deliver the same value if the key object gets garbage collected\n" | |
369 | "in between. This can happen, for example with symbols:\n" | |
370 | "@code{(hashq 'foo n) (gc) (hashq 'foo n)} may produce two\n" | |
371 | "different values, since @code{foo} will be garbage collected.") | |
1bbd0b84 | 372 | #define FUNC_NAME s_scm_hashq |
0f2d19dd | 373 | { |
a55c2b68 MV |
374 | unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); |
375 | return scm_from_ulong (scm_ihashq (key, sz)); | |
0f2d19dd | 376 | } |
1bbd0b84 | 377 | #undef FUNC_NAME |
0f2d19dd JB |
378 | |
379 | ||
380 | \f | |
381 | ||
1cc91f1b | 382 | |
c014a02e ML |
383 | unsigned long |
384 | scm_ihashv (SCM obj, unsigned long n) | |
0f2d19dd | 385 | { |
0c95b57d | 386 | if (SCM_NUMP(obj)) |
9d013330 | 387 | return scm_raw_ihash (obj, 10) % n; |
0f2d19dd | 388 | else |
71f89dd7 | 389 | return scm_raw_ihashq (SCM_UNPACK (obj)) % n; |
0f2d19dd JB |
390 | } |
391 | ||
392 | ||
3b3b36dd | 393 | SCM_DEFINE (scm_hashv, "hashv", 2, 0, 0, |
94a5efac | 394 | (SCM key, SCM size), |
5352393c MG |
395 | "Determine a hash value for @var{key} that is suitable for\n" |
396 | "lookups in a hashtable of size @var{size}, where @code{eqv?} is\n" | |
397 | "used as the equality predicate. The function returns an\n" | |
398 | "integer in the range 0 to @var{size} - 1. Note that\n" | |
399 | "@code{(hashv key)} may use internal addresses. Thus two calls\n" | |
400 | "to hashv where the keys are @code{eqv?} are not guaranteed to\n" | |
401 | "deliver the same value if the key object gets garbage collected\n" | |
402 | "in between. This can happen, for example with symbols:\n" | |
403 | "@code{(hashv 'foo n) (gc) (hashv 'foo n)} may produce two\n" | |
404 | "different values, since @code{foo} will be garbage collected.") | |
1bbd0b84 | 405 | #define FUNC_NAME s_scm_hashv |
0f2d19dd | 406 | { |
a55c2b68 MV |
407 | unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); |
408 | return scm_from_ulong (scm_ihashv (key, sz)); | |
0f2d19dd | 409 | } |
1bbd0b84 | 410 | #undef FUNC_NAME |
0f2d19dd JB |
411 | |
412 | ||
413 | \f | |
414 | ||
1cc91f1b | 415 | |
c014a02e ML |
416 | unsigned long |
417 | scm_ihash (SCM obj, unsigned long n) | |
0f2d19dd | 418 | { |
9d013330 | 419 | return (unsigned long) scm_raw_ihash (obj, 10) % n; |
0f2d19dd JB |
420 | } |
421 | ||
3b3b36dd | 422 | SCM_DEFINE (scm_hash, "hash", 2, 0, 0, |
94a5efac | 423 | (SCM key, SCM size), |
5352393c MG |
424 | "Determine a hash value for @var{key} that is suitable for\n" |
425 | "lookups in a hashtable of size @var{size}, where @code{equal?}\n" | |
426 | "is used as the equality predicate. The function returns an\n" | |
427 | "integer in the range 0 to @var{size} - 1.") | |
1bbd0b84 | 428 | #define FUNC_NAME s_scm_hash |
0f2d19dd | 429 | { |
a55c2b68 MV |
430 | unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); |
431 | return scm_from_ulong (scm_ihash (key, sz)); | |
0f2d19dd | 432 | } |
1bbd0b84 | 433 | #undef FUNC_NAME |
0f2d19dd JB |
434 | |
435 | ||
436 | \f | |
437 | ||
1cc91f1b | 438 | |
0f2d19dd JB |
439 | void |
440 | scm_init_hash () | |
0f2d19dd | 441 | { |
a0599745 | 442 | #include "libguile/hash.x" |
0f2d19dd JB |
443 | } |
444 | ||
89e00824 ML |
445 | |
446 | /* | |
447 | Local Variables: | |
448 | c-file-style: "gnu" | |
449 | End: | |
450 | */ |