Commit | Line | Data |
---|---|---|
247a56fa | 1 | /* Copyright (C) 1995,1996,1997, 2000, 2001, 2003, 2004, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. |
0f2d19dd | 2 | * |
73be1d9e | 3 | * This library is free software; you can redistribute it and/or |
53befeb7 NJ |
4 | * modify it under the terms of the GNU Lesser General Public License |
5 | * as published by the Free Software Foundation; either version 3 of | |
6 | * the License, or (at your option) any later version. | |
0f2d19dd | 7 | * |
53befeb7 NJ |
8 | * This library is distributed in the hope that it will be useful, but |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
73be1d9e MV |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | * Lesser General Public License for more details. | |
0f2d19dd | 12 | * |
73be1d9e MV |
13 | * You should have received a copy of the GNU Lesser General Public |
14 | * License along with this library; if not, write to the Free Software | |
53befeb7 NJ |
15 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
16 | * 02110-1301 USA | |
73be1d9e | 17 | */ |
1bbd0b84 | 18 | |
1bbd0b84 | 19 | |
0f2d19dd | 20 | \f |
dbb605f5 LC |
21 | #ifdef HAVE_CONFIG_H |
22 | # include <config.h> | |
23 | #endif | |
0f2d19dd | 24 | |
62241538 AW |
25 | #ifdef HAVE_WCHAR_H |
26 | #include <wchar.h> | |
27 | #endif | |
28 | ||
10483f9e | 29 | #include <math.h> |
62241538 AW |
30 | #include <unistr.h> |
31 | ||
a0599745 MD |
32 | #include "libguile/_scm.h" |
33 | #include "libguile/chars.h" | |
34 | #include "libguile/ports.h" | |
a002f1a2 DH |
35 | #include "libguile/strings.h" |
36 | #include "libguile/symbols.h" | |
a0599745 | 37 | #include "libguile/vectors.h" |
0f2d19dd | 38 | |
a0599745 MD |
39 | #include "libguile/validate.h" |
40 | #include "libguile/hash.h" | |
0f2d19dd JB |
41 | \f |
42 | ||
43 | #ifndef floor | |
44 | extern double floor(); | |
45 | #endif | |
46 | ||
1cc91f1b | 47 | |
1a04d29d AW |
48 | /* This hash function is originally from |
49 | http://burtleburtle.net/bob/c/lookup3.c by Bob Jenkins, May 2006, | |
50 | Public Domain. No warranty. */ | |
51 | ||
52 | #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) | |
53 | #define mix(a,b,c) \ | |
54 | { \ | |
55 | a -= c; a ^= rot(c, 4); c += b; \ | |
56 | b -= a; b ^= rot(a, 6); a += c; \ | |
57 | c -= b; c ^= rot(b, 8); b += a; \ | |
58 | a -= c; a ^= rot(c,16); c += b; \ | |
59 | b -= a; b ^= rot(a,19); a += c; \ | |
60 | c -= b; c ^= rot(b, 4); b += a; \ | |
61 | } | |
62 | ||
63 | #define final(a,b,c) \ | |
64 | { \ | |
65 | c ^= b; c -= rot(b,14); \ | |
66 | a ^= c; a -= rot(c,11); \ | |
67 | b ^= a; b -= rot(a,25); \ | |
68 | c ^= b; c -= rot(b,16); \ | |
69 | a ^= c; a -= rot(c,4); \ | |
70 | b ^= a; b -= rot(a,14); \ | |
71 | c ^= b; c -= rot(b,24); \ | |
72 | } | |
73 | ||
74 | #define JENKINS_LOOKUP3_HASHWORD2(k, length, ret) \ | |
75 | do { \ | |
76 | scm_t_uint32 a, b, c; \ | |
77 | \ | |
78 | /* Set up the internal state. */ \ | |
79 | a = b = c = 0xdeadbeef + ((scm_t_uint32)(length<<2)) + 47; \ | |
80 | \ | |
81 | /* Handle most of the key. */ \ | |
82 | while (length > 3) \ | |
83 | { \ | |
84 | a += k[0]; \ | |
85 | b += k[1]; \ | |
86 | c += k[2]; \ | |
87 | mix (a, b, c); \ | |
88 | length -= 3; \ | |
89 | k += 3; \ | |
90 | } \ | |
91 | \ | |
92 | /* Handle the last 3 elements. */ \ | |
93 | switch(length) /* All the case statements fall through. */ \ | |
94 | { \ | |
95 | case 3 : c += k[2]; \ | |
96 | case 2 : b += k[1]; \ | |
97 | case 1 : a += k[0]; \ | |
98 | final (a, b, c); \ | |
99 | case 0: /* case 0: nothing left to add */ \ | |
100 | break; \ | |
101 | } \ | |
102 | \ | |
103 | if (sizeof (ret) == 8) \ | |
104 | ret = (((unsigned long) c) << 32) | b; \ | |
105 | else \ | |
106 | ret = c; \ | |
107 | } while (0) | |
108 | ||
109 | ||
110 | static unsigned long | |
111 | narrow_string_hash (const scm_t_uint8 *str, size_t len) | |
112 | { | |
113 | unsigned long ret; | |
114 | JENKINS_LOOKUP3_HASHWORD2 (str, len, ret); | |
115 | ret >>= 2; /* Ensure that it fits in a fixnum. */ | |
116 | return ret; | |
117 | } | |
118 | ||
119 | static unsigned long | |
120 | wide_string_hash (const scm_t_wchar *str, size_t len) | |
121 | { | |
122 | unsigned long ret; | |
123 | JENKINS_LOOKUP3_HASHWORD2 (str, len, ret); | |
124 | ret >>= 2; /* Ensure that it fits in a fixnum. */ | |
125 | return ret; | |
126 | } | |
127 | ||
c014a02e | 128 | unsigned long |
1be6b49c | 129 | scm_string_hash (const unsigned char *str, size_t len) |
ba393257 | 130 | { |
1a04d29d | 131 | return narrow_string_hash (str, len); |
ba393257 DH |
132 | } |
133 | ||
e23106d5 MG |
134 | unsigned long |
135 | scm_i_string_hash (SCM str) | |
136 | { | |
137 | size_t len = scm_i_string_length (str); | |
e23106d5 | 138 | |
1a04d29d AW |
139 | if (scm_i_is_narrow_string (str)) |
140 | return narrow_string_hash ((const scm_t_uint8 *) scm_i_string_chars (str), | |
141 | len); | |
142 | else | |
143 | return wide_string_hash (scm_i_string_wide_chars (str), len); | |
e23106d5 MG |
144 | } |
145 | ||
62241538 AW |
146 | unsigned long |
147 | scm_i_locale_string_hash (const char *str, size_t len) | |
148 | { | |
62241538 | 149 | return scm_i_string_hash (scm_from_locale_stringn (str, len)); |
62241538 AW |
150 | } |
151 | ||
152 | unsigned long | |
153 | scm_i_latin1_string_hash (const char *str, size_t len) | |
154 | { | |
62241538 AW |
155 | if (len == (size_t) -1) |
156 | len = strlen (str); | |
157 | ||
1a04d29d | 158 | return narrow_string_hash ((const scm_t_uint8 *) str, len); |
62241538 AW |
159 | } |
160 | ||
1a04d29d | 161 | /* A tricky optimization, but probably worth it. */ |
62241538 AW |
162 | unsigned long |
163 | scm_i_utf8_string_hash (const char *str, size_t len) | |
164 | { | |
1a04d29d AW |
165 | const scm_t_uint8 *end, *ustr = (const scm_t_uint8 *) str; |
166 | unsigned long ret; | |
167 | ||
168 | /* The length of the string in characters. This name corresponds to | |
169 | Jenkins' original name. */ | |
170 | size_t length; | |
171 | ||
172 | scm_t_uint32 a, b, c, u32; | |
173 | ||
62241538 AW |
174 | if (len == (size_t) -1) |
175 | len = strlen (str); | |
176 | ||
1a04d29d AW |
177 | end = ustr + len; |
178 | ||
179 | if (u8_check (ustr, len) != NULL) | |
180 | /* Invalid UTF-8; punt. */ | |
181 | return scm_i_string_hash (scm_from_utf8_stringn (str, len)); | |
182 | ||
183 | length = u8_strnlen (ustr, len); | |
184 | ||
185 | /* Set up the internal state. */ | |
186 | a = b = c = 0xdeadbeef + ((scm_t_uint32)(length<<2)) + 47; | |
187 | ||
188 | /* Handle most of the key. */ | |
189 | while (length > 3) | |
62241538 | 190 | { |
1a04d29d AW |
191 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); |
192 | a += u32; | |
193 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
194 | b += u32; | |
195 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
196 | c += u32; | |
197 | mix (a, b, c); | |
198 | length -= 3; | |
62241538 AW |
199 | } |
200 | ||
1a04d29d AW |
201 | /* Handle the last 3 elements's. */ |
202 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
203 | a += u32; | |
204 | if (--length) | |
205 | { | |
206 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
207 | b += u32; | |
208 | if (--length) | |
209 | { | |
210 | ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); | |
211 | c += u32; | |
212 | } | |
213 | } | |
214 | ||
215 | final (a, b, c); | |
216 | ||
217 | if (sizeof (unsigned long) == 8) | |
218 | ret = (((unsigned long) c) << 32) | b; | |
219 | else | |
220 | ret = c; | |
221 | ||
222 | ret >>= 2; /* Ensure that it fits in a fixnum. */ | |
223 | return ret; | |
62241538 AW |
224 | } |
225 | ||
ba393257 | 226 | |
dba97178 DH |
227 | /* Dirk:FIXME:: why downcase for characters? (2x: scm_hasher, scm_ihashv) */ |
228 | /* Dirk:FIXME:: scm_hasher could be made static. */ | |
229 | ||
230 | ||
c014a02e ML |
231 | unsigned long |
232 | scm_hasher(SCM obj, unsigned long n, size_t d) | |
0f2d19dd | 233 | { |
dba97178 DH |
234 | switch (SCM_ITAG3 (obj)) { |
235 | case scm_tc3_int_1: | |
236 | case scm_tc3_int_2: | |
e11e83f3 | 237 | return SCM_I_INUM(obj) % n; /* SCM_INUMP(obj) */ |
dba97178 DH |
238 | case scm_tc3_imm24: |
239 | if (SCM_CHARP(obj)) | |
84fad130 | 240 | return (unsigned)(scm_c_downcase(SCM_CHAR(obj))) % n; |
dba97178 | 241 | switch (SCM_UNPACK (obj)) { |
210c0325 | 242 | case SCM_EOL_BITS: |
94a5efac GB |
243 | d = 256; |
244 | break; | |
210c0325 | 245 | case SCM_BOOL_T_BITS: |
94a5efac GB |
246 | d = 257; |
247 | break; | |
210c0325 | 248 | case SCM_BOOL_F_BITS: |
94a5efac GB |
249 | d = 258; |
250 | break; | |
210c0325 | 251 | case SCM_EOF_VAL_BITS: |
94a5efac GB |
252 | d = 259; |
253 | break; | |
254 | default: | |
255 | d = 263; /* perhaps should be error */ | |
0f2d19dd JB |
256 | } |
257 | return d % n; | |
94a5efac GB |
258 | default: |
259 | return 263 % n; /* perhaps should be error */ | |
dba97178 | 260 | case scm_tc3_cons: |
0f2d19dd | 261 | switch SCM_TYP7(obj) { |
94a5efac GB |
262 | default: |
263 | return 263 % n; | |
0f2d19dd | 264 | case scm_tc7_smob: |
534c55a9 DH |
265 | return 263 % n; |
266 | case scm_tc7_number: | |
1be6b49c | 267 | switch SCM_TYP16 (obj) { |
950cc72b | 268 | case scm_tc16_big: |
e11e83f3 | 269 | return scm_to_ulong (scm_modulo (obj, scm_from_ulong (n))); |
950cc72b MD |
270 | case scm_tc16_real: |
271 | { | |
1be6b49c | 272 | double r = SCM_REAL_VALUE (obj); |
10483f9e | 273 | if (floor (r) == r && !isinf (r) && !isnan (r)) |
e11e83f3 MV |
274 | { |
275 | obj = scm_inexact_to_exact (obj); | |
276 | return scm_to_ulong (scm_modulo (obj, scm_from_ulong (n))); | |
277 | } | |
0f2d19dd | 278 | } |
534c55a9 | 279 | /* Fall through */ |
950cc72b | 280 | case scm_tc16_complex: |
f92e85f7 | 281 | case scm_tc16_fraction: |
e11e83f3 | 282 | obj = scm_number_to_string (obj, scm_from_int (10)); |
534c55a9 | 283 | /* Fall through */ |
0f2d19dd | 284 | } |
534c55a9 | 285 | /* Fall through */ |
0f2d19dd | 286 | case scm_tc7_string: |
8824ac88 | 287 | { |
5a6d139b | 288 | unsigned long hash = |
e23106d5 | 289 | scm_i_string_hash (obj) % n; |
8824ac88 MV |
290 | return hash; |
291 | } | |
28b06554 | 292 | case scm_tc7_symbol: |
cc95e00a | 293 | return scm_i_symbol_hash (obj) % n; |
3854d5fd LC |
294 | case scm_tc7_pointer: |
295 | { | |
296 | /* Pointer objects are typically used to store addresses of heap | |
297 | objects. On most platforms, these are at least 3-byte | |
298 | aligned (on x86_64-*-gnu, `malloc' returns 4-byte aligned | |
299 | addresses), so get rid of the least significant bits. */ | |
300 | scm_t_uintptr significant_bits; | |
301 | ||
302 | significant_bits = (scm_t_uintptr) SCM_POINTER_VALUE (obj) >> 4UL; | |
303 | return (size_t) significant_bits % n; | |
304 | } | |
0f2d19dd JB |
305 | case scm_tc7_wvect: |
306 | case scm_tc7_vector: | |
307 | { | |
4057a3e0 | 308 | size_t len = SCM_SIMPLE_VECTOR_LENGTH (obj); |
1be6b49c | 309 | if (len > 5) |
0f2d19dd | 310 | { |
1be6b49c | 311 | size_t i = d/2; |
c014a02e | 312 | unsigned long h = 1; |
4057a3e0 MV |
313 | while (i--) |
314 | { | |
315 | SCM elt = SCM_SIMPLE_VECTOR_REF (obj, h % len); | |
316 | h = ((h << 8) + (scm_hasher (elt, n, 2))) % n; | |
317 | } | |
0f2d19dd JB |
318 | return h; |
319 | } | |
320 | else | |
321 | { | |
1be6b49c | 322 | size_t i = len; |
c014a02e | 323 | unsigned long h = (n)-1; |
4057a3e0 MV |
324 | while (i--) |
325 | { | |
326 | SCM elt = SCM_SIMPLE_VECTOR_REF (obj, h % len); | |
327 | h = ((h << 8) + (scm_hasher (elt, n, d/len))) % n; | |
328 | } | |
0f2d19dd JB |
329 | return h; |
330 | } | |
331 | } | |
94a5efac GB |
332 | case scm_tcs_cons_imcar: |
333 | case scm_tcs_cons_nimcar: | |
1be6b49c ML |
334 | if (d) return (scm_hasher (SCM_CAR (obj), n, d/2) |
335 | + scm_hasher (SCM_CDR (obj), n, d/2)) % n; | |
0f2d19dd JB |
336 | else return 1; |
337 | case scm_tc7_port: | |
206d3de3 | 338 | return ((SCM_RDNG & SCM_CELL_WORD_0 (obj)) ? 260 : 261) % n; |
cc7005bc | 339 | case scm_tc7_program: |
0f2d19dd JB |
340 | return 262 % n; |
341 | } | |
342 | } | |
343 | } | |
344 | ||
345 | ||
346 | \f | |
347 | ||
1cc91f1b | 348 | |
c014a02e ML |
349 | unsigned long |
350 | scm_ihashq (SCM obj, unsigned long n) | |
0f2d19dd | 351 | { |
54778cd3 | 352 | return (SCM_UNPACK (obj) >> 1) % n; |
0f2d19dd JB |
353 | } |
354 | ||
355 | ||
3b3b36dd | 356 | SCM_DEFINE (scm_hashq, "hashq", 2, 0, 0, |
94a5efac | 357 | (SCM key, SCM size), |
5352393c MG |
358 | "Determine a hash value for @var{key} that is suitable for\n" |
359 | "lookups in a hashtable of size @var{size}, where @code{eq?} is\n" | |
360 | "used as the equality predicate. The function returns an\n" | |
361 | "integer in the range 0 to @var{size} - 1. Note that\n" | |
362 | "@code{hashq} may use internal addresses. Thus two calls to\n" | |
363 | "hashq where the keys are @code{eq?} are not guaranteed to\n" | |
364 | "deliver the same value if the key object gets garbage collected\n" | |
365 | "in between. This can happen, for example with symbols:\n" | |
366 | "@code{(hashq 'foo n) (gc) (hashq 'foo n)} may produce two\n" | |
367 | "different values, since @code{foo} will be garbage collected.") | |
1bbd0b84 | 368 | #define FUNC_NAME s_scm_hashq |
0f2d19dd | 369 | { |
a55c2b68 MV |
370 | unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); |
371 | return scm_from_ulong (scm_ihashq (key, sz)); | |
0f2d19dd | 372 | } |
1bbd0b84 | 373 | #undef FUNC_NAME |
0f2d19dd JB |
374 | |
375 | ||
376 | \f | |
377 | ||
1cc91f1b | 378 | |
c014a02e ML |
379 | unsigned long |
380 | scm_ihashv (SCM obj, unsigned long n) | |
0f2d19dd | 381 | { |
7866a09b | 382 | if (SCM_CHARP(obj)) |
84fad130 | 383 | return ((unsigned long) (scm_c_downcase (SCM_CHAR (obj)))) % n; /* downcase!?!! */ |
0f2d19dd | 384 | |
0c95b57d | 385 | if (SCM_NUMP(obj)) |
c014a02e | 386 | return (unsigned long) scm_hasher(obj, n, 10); |
0f2d19dd | 387 | else |
54778cd3 | 388 | return SCM_UNPACK (obj) % n; |
0f2d19dd JB |
389 | } |
390 | ||
391 | ||
3b3b36dd | 392 | SCM_DEFINE (scm_hashv, "hashv", 2, 0, 0, |
94a5efac | 393 | (SCM key, SCM size), |
5352393c MG |
394 | "Determine a hash value for @var{key} that is suitable for\n" |
395 | "lookups in a hashtable of size @var{size}, where @code{eqv?} is\n" | |
396 | "used as the equality predicate. The function returns an\n" | |
397 | "integer in the range 0 to @var{size} - 1. Note that\n" | |
398 | "@code{(hashv key)} may use internal addresses. Thus two calls\n" | |
399 | "to hashv where the keys are @code{eqv?} are not guaranteed to\n" | |
400 | "deliver the same value if the key object gets garbage collected\n" | |
401 | "in between. This can happen, for example with symbols:\n" | |
402 | "@code{(hashv 'foo n) (gc) (hashv 'foo n)} may produce two\n" | |
403 | "different values, since @code{foo} will be garbage collected.") | |
1bbd0b84 | 404 | #define FUNC_NAME s_scm_hashv |
0f2d19dd | 405 | { |
a55c2b68 MV |
406 | unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); |
407 | return scm_from_ulong (scm_ihashv (key, sz)); | |
0f2d19dd | 408 | } |
1bbd0b84 | 409 | #undef FUNC_NAME |
0f2d19dd JB |
410 | |
411 | ||
412 | \f | |
413 | ||
1cc91f1b | 414 | |
c014a02e ML |
415 | unsigned long |
416 | scm_ihash (SCM obj, unsigned long n) | |
0f2d19dd | 417 | { |
c014a02e | 418 | return (unsigned long) scm_hasher (obj, n, 10); |
0f2d19dd JB |
419 | } |
420 | ||
3b3b36dd | 421 | SCM_DEFINE (scm_hash, "hash", 2, 0, 0, |
94a5efac | 422 | (SCM key, SCM size), |
5352393c MG |
423 | "Determine a hash value for @var{key} that is suitable for\n" |
424 | "lookups in a hashtable of size @var{size}, where @code{equal?}\n" | |
425 | "is used as the equality predicate. The function returns an\n" | |
426 | "integer in the range 0 to @var{size} - 1.") | |
1bbd0b84 | 427 | #define FUNC_NAME s_scm_hash |
0f2d19dd | 428 | { |
a55c2b68 MV |
429 | unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); |
430 | return scm_from_ulong (scm_ihash (key, sz)); | |
0f2d19dd | 431 | } |
1bbd0b84 | 432 | #undef FUNC_NAME |
0f2d19dd JB |
433 | |
434 | ||
435 | \f | |
436 | ||
1cc91f1b | 437 | |
0f2d19dd JB |
438 | void |
439 | scm_init_hash () | |
0f2d19dd | 440 | { |
a0599745 | 441 | #include "libguile/hash.x" |
0f2d19dd JB |
442 | } |
443 | ||
89e00824 ML |
444 | |
445 | /* | |
446 | Local Variables: | |
447 | c-file-style: "gnu" | |
448 | End: | |
449 | */ |