Commit | Line | Data |
---|---|---|
247a56fa | 1 | /* Copyright (C) 1995,1996,1997, 2000, 2001, 2003, 2004, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. |
0f2d19dd | 2 | * |
73be1d9e | 3 | * This library is free software; you can redistribute it and/or |
53befeb7 NJ |
4 | * modify it under the terms of the GNU Lesser General Public License |
5 | * as published by the Free Software Foundation; either version 3 of | |
6 | * the License, or (at your option) any later version. | |
0f2d19dd | 7 | * |
53befeb7 NJ |
8 | * This library is distributed in the hope that it will be useful, but |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
73be1d9e MV |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | * Lesser General Public License for more details. | |
0f2d19dd | 12 | * |
73be1d9e MV |
13 | * You should have received a copy of the GNU Lesser General Public |
14 | * License along with this library; if not, write to the Free Software | |
53befeb7 NJ |
15 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
16 | * 02110-1301 USA | |
73be1d9e | 17 | */ |
1bbd0b84 | 18 | |
1bbd0b84 | 19 | |
0f2d19dd | 20 | \f |
dbb605f5 LC |
21 | #ifdef HAVE_CONFIG_H |
22 | # include <config.h> | |
23 | #endif | |
0f2d19dd | 24 | |
62241538 AW |
25 | #ifdef HAVE_WCHAR_H |
26 | #include <wchar.h> | |
27 | #endif | |
28 | ||
10483f9e | 29 | #include <math.h> |
62241538 AW |
30 | #include <unistr.h> |
31 | ||
a0599745 MD |
32 | #include "libguile/_scm.h" |
33 | #include "libguile/chars.h" | |
34 | #include "libguile/ports.h" | |
a002f1a2 DH |
35 | #include "libguile/strings.h" |
36 | #include "libguile/symbols.h" | |
a0599745 | 37 | #include "libguile/vectors.h" |
0f2d19dd | 38 | |
a0599745 MD |
39 | #include "libguile/validate.h" |
40 | #include "libguile/hash.h" | |
0f2d19dd JB |
41 | \f |
42 | ||
43 | #ifndef floor | |
44 | extern double floor(); | |
45 | #endif | |
46 | ||
1cc91f1b | 47 | |
1a04d29d AW |
48 | /* This hash function is originally from |
49 | http://burtleburtle.net/bob/c/lookup3.c by Bob Jenkins, May 2006, | |
50 | Public Domain. No warranty. */ | |
51 | ||
52 | #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) | |
53 | #define mix(a,b,c) \ | |
54 | { \ | |
55 | a -= c; a ^= rot(c, 4); c += b; \ | |
56 | b -= a; b ^= rot(a, 6); a += c; \ | |
57 | c -= b; c ^= rot(b, 8); b += a; \ | |
58 | a -= c; a ^= rot(c,16); c += b; \ | |
59 | b -= a; b ^= rot(a,19); a += c; \ | |
60 | c -= b; c ^= rot(b, 4); b += a; \ | |
61 | } | |
62 | ||
63 | #define final(a,b,c) \ | |
64 | { \ | |
65 | c ^= b; c -= rot(b,14); \ | |
66 | a ^= c; a -= rot(c,11); \ | |
67 | b ^= a; b -= rot(a,25); \ | |
68 | c ^= b; c -= rot(b,16); \ | |
69 | a ^= c; a -= rot(c,4); \ | |
70 | b ^= a; b -= rot(a,14); \ | |
71 | c ^= b; c -= rot(b,24); \ | |
72 | } | |
73 | ||
74 | #define JENKINS_LOOKUP3_HASHWORD2(k, length, ret) \ | |
75 | do { \ | |
76 | scm_t_uint32 a, b, c; \ | |
77 | \ | |
78 | /* Set up the internal state. */ \ | |
79 | a = b = c = 0xdeadbeef + ((scm_t_uint32)(length<<2)) + 47; \ | |
80 | \ | |
81 | /* Handle most of the key. */ \ | |
82 | while (length > 3) \ | |
83 | { \ | |
84 | a += k[0]; \ | |
85 | b += k[1]; \ | |
86 | c += k[2]; \ | |
87 | mix (a, b, c); \ | |
88 | length -= 3; \ | |
89 | k += 3; \ | |
90 | } \ | |
91 | \ | |
92 | /* Handle the last 3 elements. */ \ | |
93 | switch(length) /* All the case statements fall through. */ \ | |
94 | { \ | |
95 | case 3 : c += k[2]; \ | |
96 | case 2 : b += k[1]; \ | |
97 | case 1 : a += k[0]; \ | |
98 | final (a, b, c); \ | |
99 | case 0: /* case 0: nothing left to add */ \ | |
100 | break; \ | |
101 | } \ | |
102 | \ | |
103 | if (sizeof (ret) == 8) \ | |
104 | ret = (((unsigned long) c) << 32) | b; \ | |
105 | else \ | |
106 | ret = c; \ | |
107 | } while (0) | |
108 | ||
109 | ||
110 | static unsigned long | |
111 | narrow_string_hash (const scm_t_uint8 *str, size_t len) | |
112 | { | |
113 | unsigned long ret; | |
114 | JENKINS_LOOKUP3_HASHWORD2 (str, len, ret); | |
115 | ret >>= 2; /* Ensure that it fits in a fixnum. */ | |
116 | return ret; | |
117 | } | |
118 | ||
119 | static unsigned long | |
120 | wide_string_hash (const scm_t_wchar *str, size_t len) | |
121 | { | |
122 | unsigned long ret; | |
123 | JENKINS_LOOKUP3_HASHWORD2 (str, len, ret); | |
124 | ret >>= 2; /* Ensure that it fits in a fixnum. */ | |
125 | return ret; | |
126 | } | |
127 | ||
c014a02e | 128 | unsigned long |
1be6b49c | 129 | scm_string_hash (const unsigned char *str, size_t len) |
ba393257 | 130 | { |
1a04d29d | 131 | return narrow_string_hash (str, len); |
ba393257 DH |
132 | } |
133 | ||
e23106d5 MG |
134 | unsigned long |
135 | scm_i_string_hash (SCM str) | |
136 | { | |
137 | size_t len = scm_i_string_length (str); | |
e23106d5 | 138 | |
1a04d29d AW |
139 | if (scm_i_is_narrow_string (str)) |
140 | return narrow_string_hash ((const scm_t_uint8 *) scm_i_string_chars (str), | |
141 | len); | |
142 | else | |
143 | return wide_string_hash (scm_i_string_wide_chars (str), len); | |
e23106d5 MG |
144 | } |
145 | ||
62241538 AW |
146 | unsigned long |
147 | scm_i_locale_string_hash (const char *str, size_t len) | |
148 | { | |
62241538 | 149 | return scm_i_string_hash (scm_from_locale_stringn (str, len)); |
62241538 AW |
150 | } |
151 | ||
152 | unsigned long | |
153 | scm_i_latin1_string_hash (const char *str, size_t len) | |
154 | { | |
62241538 AW |
155 | if (len == (size_t) -1) |
156 | len = strlen (str); | |
157 | ||
1a04d29d | 158 | return narrow_string_hash ((const scm_t_uint8 *) str, len); |
62241538 AW |
159 | } |
160 | ||
1a04d29d | 161 | /* A tricky optimization, but probably worth it. */ |
62241538 AW |
162 | unsigned long |
163 | scm_i_utf8_string_hash (const char *str, size_t len) | |
164 | { | |
1a04d29d AW |
165 | const scm_t_uint8 *end, *ustr = (const scm_t_uint8 *) str; |
166 | unsigned long ret; | |
167 | ||
168 | /* The length of the string in characters. This name corresponds to | |
169 | Jenkins' original name. */ | |
170 | size_t length; | |
171 | ||
172 | scm_t_uint32 a, b, c, u32; | |
173 | ||
62241538 AW |
174 | if (len == (size_t) -1) |
175 | len = strlen (str); | |
176 | ||
1a04d29d AW |
177 | end = ustr + len; |
178 | ||
179 | if (u8_check (ustr, len) != NULL) | |
180 | /* Invalid UTF-8; punt. */ | |
181 | return scm_i_string_hash (scm_from_utf8_stringn (str, len)); | |
182 | ||
183 | length = u8_strnlen (ustr, len); | |
184 | ||
185 | /* Set up the internal state. */ | |
186 | a = b = c = 0xdeadbeef + ((scm_t_uint32)(length<<2)) + 47; | |
187 | ||
188 | /* Handle most of the key. */ | |
189 | while (length > 3) | |
62241538 | 190 | { |
1a04d29d AW |
191 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); |
192 | a += u32; | |
193 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
194 | b += u32; | |
195 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
196 | c += u32; | |
197 | mix (a, b, c); | |
198 | length -= 3; | |
62241538 AW |
199 | } |
200 | ||
1a04d29d AW |
201 | /* Handle the last 3 elements's. */ |
202 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
203 | a += u32; | |
204 | if (--length) | |
205 | { | |
206 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
207 | b += u32; | |
208 | if (--length) | |
209 | { | |
210 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
211 | c += u32; | |
212 | } | |
213 | } | |
214 | ||
215 | final (a, b, c); | |
216 | ||
217 | if (sizeof (unsigned long) == 8) | |
218 | ret = (((unsigned long) c) << 32) | b; | |
219 | else | |
220 | ret = c; | |
221 | ||
222 | ret >>= 2; /* Ensure that it fits in a fixnum. */ | |
223 | return ret; | |
62241538 AW |
224 | } |
225 | ||
ba393257 | 226 | |
71f89dd7 AW |
227 | /* Thomas Wang's integer hasher, from |
228 | http://www.cris.com/~Ttwang/tech/inthash.htm. */ | |
229 | static unsigned long | |
230 | scm_raw_ihashq (scm_t_bits key) | |
231 | { | |
232 | if (sizeof (key) < 8) | |
233 | { | |
234 | key = (key ^ 61) ^ (key >> 16); | |
235 | key = key + (key << 3); | |
236 | key = key ^ (key >> 4); | |
237 | key = key * 0x27d4eb2d; | |
238 | key = key ^ (key >> 15); | |
239 | } | |
240 | else | |
241 | { | |
242 | key = (~key) + (key << 21); // key = (key << 21) - key - 1; | |
243 | key = key ^ (key >> 24); | |
244 | key = (key + (key << 3)) + (key << 8); // key * 265 | |
245 | key = key ^ (key >> 14); | |
246 | key = (key + (key << 2)) + (key << 4); // key * 21 | |
247 | key = key ^ (key >> 28); | |
248 | key = key + (key << 31); | |
249 | } | |
250 | key >>= 2; /* Ensure that it fits in a fixnum. */ | |
251 | return key; | |
252 | } | |
253 | ||
9d013330 | 254 | /* `depth' is used to limit recursion. */ |
d1d1c5de | 255 | static unsigned long |
9d013330 | 256 | scm_raw_ihash (SCM obj, size_t depth) |
0f2d19dd | 257 | { |
9d013330 AW |
258 | if (SCM_IMP (obj)) |
259 | return scm_raw_ihashq (SCM_UNPACK (obj)); | |
260 | ||
261 | switch (SCM_TYP7(obj)) | |
262 | { | |
263 | /* FIXME: do better for structs, variables, ... Also the hashes | |
264 | are currently associative, which ain't the right thing. */ | |
0f2d19dd | 265 | case scm_tc7_smob: |
9d013330 | 266 | return scm_raw_ihashq (SCM_TYP16 (obj)); |
534c55a9 | 267 | case scm_tc7_number: |
9d013330 AW |
268 | if (scm_is_integer (obj)) |
269 | { | |
270 | SCM n = SCM_I_MAKINUM (SCM_MOST_POSITIVE_FIXNUM); | |
271 | if (scm_is_inexact (obj)) | |
272 | obj = scm_inexact_to_exact (obj); | |
273 | return scm_raw_ihashq (scm_to_ulong (scm_modulo (obj, n))); | |
274 | } | |
275 | else | |
276 | return scm_i_string_hash (scm_number_to_string (obj, scm_from_int (10))); | |
0f2d19dd | 277 | case scm_tc7_string: |
9d013330 | 278 | return scm_i_string_hash (obj); |
28b06554 | 279 | case scm_tc7_symbol: |
9d013330 | 280 | return scm_i_symbol_hash (obj); |
3854d5fd | 281 | case scm_tc7_pointer: |
9d013330 | 282 | return scm_raw_ihashq ((scm_t_uintptr) SCM_POINTER_VALUE (obj)); |
0f2d19dd JB |
283 | case scm_tc7_wvect: |
284 | case scm_tc7_vector: | |
285 | { | |
4057a3e0 | 286 | size_t len = SCM_SIMPLE_VECTOR_LENGTH (obj); |
9d013330 AW |
287 | size_t i = depth / 2; |
288 | unsigned long h = scm_raw_ihashq (SCM_CELL_WORD_0 (obj)); | |
549333ef AW |
289 | if (len) |
290 | while (i--) | |
291 | h ^= scm_raw_ihash (scm_c_vector_ref (obj, h % len), i); | |
9d013330 | 292 | return h; |
0f2d19dd | 293 | } |
94a5efac GB |
294 | case scm_tcs_cons_imcar: |
295 | case scm_tcs_cons_nimcar: | |
9d013330 AW |
296 | if (depth) |
297 | return (scm_raw_ihash (SCM_CAR (obj), depth / 2) | |
298 | ^ scm_raw_ihash (SCM_CDR (obj), depth / 2)); | |
299 | else | |
300 | return scm_raw_ihashq (scm_tc3_cons); | |
301 | default: | |
302 | return scm_raw_ihashq (SCM_CELL_WORD_0 (obj)); | |
0f2d19dd | 303 | } |
0f2d19dd JB |
304 | } |
305 | ||
306 | ||
307 | \f | |
308 | ||
c014a02e ML |
309 | unsigned long |
310 | scm_ihashq (SCM obj, unsigned long n) | |
0f2d19dd | 311 | { |
71f89dd7 | 312 | return scm_raw_ihashq (SCM_UNPACK (obj)) % n; |
0f2d19dd JB |
313 | } |
314 | ||
315 | ||
3b3b36dd | 316 | SCM_DEFINE (scm_hashq, "hashq", 2, 0, 0, |
94a5efac | 317 | (SCM key, SCM size), |
5352393c MG |
318 | "Determine a hash value for @var{key} that is suitable for\n" |
319 | "lookups in a hashtable of size @var{size}, where @code{eq?} is\n" | |
320 | "used as the equality predicate. The function returns an\n" | |
321 | "integer in the range 0 to @var{size} - 1. Note that\n" | |
322 | "@code{hashq} may use internal addresses. Thus two calls to\n" | |
323 | "hashq where the keys are @code{eq?} are not guaranteed to\n" | |
324 | "deliver the same value if the key object gets garbage collected\n" | |
325 | "in between. This can happen, for example with symbols:\n" | |
326 | "@code{(hashq 'foo n) (gc) (hashq 'foo n)} may produce two\n" | |
327 | "different values, since @code{foo} will be garbage collected.") | |
1bbd0b84 | 328 | #define FUNC_NAME s_scm_hashq |
0f2d19dd | 329 | { |
a55c2b68 MV |
330 | unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); |
331 | return scm_from_ulong (scm_ihashq (key, sz)); | |
0f2d19dd | 332 | } |
1bbd0b84 | 333 | #undef FUNC_NAME |
0f2d19dd JB |
334 | |
335 | ||
336 | \f | |
337 | ||
1cc91f1b | 338 | |
c014a02e ML |
339 | unsigned long |
340 | scm_ihashv (SCM obj, unsigned long n) | |
0f2d19dd | 341 | { |
0c95b57d | 342 | if (SCM_NUMP(obj)) |
9d013330 | 343 | return scm_raw_ihash (obj, 10) % n; |
0f2d19dd | 344 | else |
71f89dd7 | 345 | return scm_raw_ihashq (SCM_UNPACK (obj)) % n; |
0f2d19dd JB |
346 | } |
347 | ||
348 | ||
3b3b36dd | 349 | SCM_DEFINE (scm_hashv, "hashv", 2, 0, 0, |
94a5efac | 350 | (SCM key, SCM size), |
5352393c MG |
351 | "Determine a hash value for @var{key} that is suitable for\n" |
352 | "lookups in a hashtable of size @var{size}, where @code{eqv?} is\n" | |
353 | "used as the equality predicate. The function returns an\n" | |
354 | "integer in the range 0 to @var{size} - 1. Note that\n" | |
355 | "@code{(hashv key)} may use internal addresses. Thus two calls\n" | |
356 | "to hashv where the keys are @code{eqv?} are not guaranteed to\n" | |
357 | "deliver the same value if the key object gets garbage collected\n" | |
358 | "in between. This can happen, for example with symbols:\n" | |
359 | "@code{(hashv 'foo n) (gc) (hashv 'foo n)} may produce two\n" | |
360 | "different values, since @code{foo} will be garbage collected.") | |
1bbd0b84 | 361 | #define FUNC_NAME s_scm_hashv |
0f2d19dd | 362 | { |
a55c2b68 MV |
363 | unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); |
364 | return scm_from_ulong (scm_ihashv (key, sz)); | |
0f2d19dd | 365 | } |
1bbd0b84 | 366 | #undef FUNC_NAME |
0f2d19dd JB |
367 | |
368 | ||
369 | \f | |
370 | ||
1cc91f1b | 371 | |
c014a02e ML |
372 | unsigned long |
373 | scm_ihash (SCM obj, unsigned long n) | |
0f2d19dd | 374 | { |
9d013330 | 375 | return (unsigned long) scm_raw_ihash (obj, 10) % n; |
0f2d19dd JB |
376 | } |
377 | ||
3b3b36dd | 378 | SCM_DEFINE (scm_hash, "hash", 2, 0, 0, |
94a5efac | 379 | (SCM key, SCM size), |
5352393c MG |
380 | "Determine a hash value for @var{key} that is suitable for\n" |
381 | "lookups in a hashtable of size @var{size}, where @code{equal?}\n" | |
382 | "is used as the equality predicate. The function returns an\n" | |
383 | "integer in the range 0 to @var{size} - 1.") | |
1bbd0b84 | 384 | #define FUNC_NAME s_scm_hash |
0f2d19dd | 385 | { |
a55c2b68 MV |
386 | unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); |
387 | return scm_from_ulong (scm_ihash (key, sz)); | |
0f2d19dd | 388 | } |
1bbd0b84 | 389 | #undef FUNC_NAME |
0f2d19dd JB |
390 | |
391 | ||
392 | \f | |
393 | ||
1cc91f1b | 394 | |
0f2d19dd JB |
395 | void |
396 | scm_init_hash () | |
0f2d19dd | 397 | { |
a0599745 | 398 | #include "libguile/hash.x" |
0f2d19dd JB |
399 | } |
400 | ||
89e00824 ML |
401 | |
402 | /* | |
403 | Local Variables: | |
404 | c-file-style: "gnu" | |
405 | End: | |
406 | */ |