Assignment conversion in the interpreter
[bpt/guile.git] / libguile / hash.c
CommitLineData
8ac870de
LC
1/* Copyright (C) 1995, 1996, 1997, 2000, 2001, 2003, 2004, 2006, 2008,
2 * 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
3 *
73be1d9e 4 * This library is free software; you can redistribute it and/or
53befeb7
NJ
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
0f2d19dd 8 *
53befeb7
NJ
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
0f2d19dd 13 *
73be1d9e
MV
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
53befeb7
NJ
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
73be1d9e 18 */
1bbd0b84 19
1bbd0b84 20
0f2d19dd 21\f
dbb605f5
LC
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif
0f2d19dd 25
62241538
AW
26#ifdef HAVE_WCHAR_H
27#include <wchar.h>
28#endif
29
10483f9e 30#include <math.h>
62241538
AW
31#include <unistr.h>
32
a0599745
MD
33#include "libguile/_scm.h"
34#include "libguile/chars.h"
35#include "libguile/ports.h"
a002f1a2
DH
36#include "libguile/strings.h"
37#include "libguile/symbols.h"
a0599745 38#include "libguile/vectors.h"
0f2d19dd 39
a0599745
MD
40#include "libguile/validate.h"
41#include "libguile/hash.h"
0f2d19dd
JB
42\f
43
44#ifndef floor
45extern double floor();
46#endif
47
1cc91f1b 48
1a04d29d
AW
49/* This hash function is originally from
50 http://burtleburtle.net/bob/c/lookup3.c by Bob Jenkins, May 2006,
51 Public Domain. No warranty. */
52
53#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
54#define mix(a,b,c) \
55{ \
56 a -= c; a ^= rot(c, 4); c += b; \
57 b -= a; b ^= rot(a, 6); a += c; \
58 c -= b; c ^= rot(b, 8); b += a; \
59 a -= c; a ^= rot(c,16); c += b; \
60 b -= a; b ^= rot(a,19); a += c; \
61 c -= b; c ^= rot(b, 4); b += a; \
62}
63
64#define final(a,b,c) \
65{ \
66 c ^= b; c -= rot(b,14); \
67 a ^= c; a -= rot(c,11); \
68 b ^= a; b -= rot(a,25); \
69 c ^= b; c -= rot(b,16); \
70 a ^= c; a -= rot(c,4); \
71 b ^= a; b -= rot(a,14); \
72 c ^= b; c -= rot(b,24); \
73}
74
75#define JENKINS_LOOKUP3_HASHWORD2(k, length, ret) \
76 do { \
77 scm_t_uint32 a, b, c; \
78 \
79 /* Set up the internal state. */ \
80 a = b = c = 0xdeadbeef + ((scm_t_uint32)(length<<2)) + 47; \
81 \
82 /* Handle most of the key. */ \
83 while (length > 3) \
84 { \
85 a += k[0]; \
86 b += k[1]; \
87 c += k[2]; \
88 mix (a, b, c); \
89 length -= 3; \
90 k += 3; \
91 } \
92 \
93 /* Handle the last 3 elements. */ \
94 switch(length) /* All the case statements fall through. */ \
95 { \
96 case 3 : c += k[2]; \
97 case 2 : b += k[1]; \
98 case 1 : a += k[0]; \
99 final (a, b, c); \
100 case 0: /* case 0: nothing left to add */ \
101 break; \
102 } \
103 \
104 if (sizeof (ret) == 8) \
105 ret = (((unsigned long) c) << 32) | b; \
106 else \
107 ret = c; \
108 } while (0)
109
110
111static unsigned long
112narrow_string_hash (const scm_t_uint8 *str, size_t len)
113{
114 unsigned long ret;
115 JENKINS_LOOKUP3_HASHWORD2 (str, len, ret);
116 ret >>= 2; /* Ensure that it fits in a fixnum. */
117 return ret;
118}
119
120static unsigned long
121wide_string_hash (const scm_t_wchar *str, size_t len)
122{
123 unsigned long ret;
124 JENKINS_LOOKUP3_HASHWORD2 (str, len, ret);
125 ret >>= 2; /* Ensure that it fits in a fixnum. */
126 return ret;
127}
128
c014a02e 129unsigned long
1be6b49c 130scm_string_hash (const unsigned char *str, size_t len)
ba393257 131{
1a04d29d 132 return narrow_string_hash (str, len);
ba393257
DH
133}
134
e23106d5
MG
135unsigned long
136scm_i_string_hash (SCM str)
137{
138 size_t len = scm_i_string_length (str);
e23106d5 139
1a04d29d
AW
140 if (scm_i_is_narrow_string (str))
141 return narrow_string_hash ((const scm_t_uint8 *) scm_i_string_chars (str),
142 len);
143 else
144 return wide_string_hash (scm_i_string_wide_chars (str), len);
e23106d5
MG
145}
146
62241538
AW
147unsigned long
148scm_i_locale_string_hash (const char *str, size_t len)
149{
62241538 150 return scm_i_string_hash (scm_from_locale_stringn (str, len));
62241538
AW
151}
152
153unsigned long
154scm_i_latin1_string_hash (const char *str, size_t len)
155{
62241538
AW
156 if (len == (size_t) -1)
157 len = strlen (str);
158
1a04d29d 159 return narrow_string_hash ((const scm_t_uint8 *) str, len);
62241538
AW
160}
161
1a04d29d 162/* A tricky optimization, but probably worth it. */
62241538
AW
163unsigned long
164scm_i_utf8_string_hash (const char *str, size_t len)
165{
1a04d29d
AW
166 const scm_t_uint8 *end, *ustr = (const scm_t_uint8 *) str;
167 unsigned long ret;
168
169 /* The length of the string in characters. This name corresponds to
170 Jenkins' original name. */
171 size_t length;
172
173 scm_t_uint32 a, b, c, u32;
174
62241538
AW
175 if (len == (size_t) -1)
176 len = strlen (str);
177
1a04d29d
AW
178 end = ustr + len;
179
180 if (u8_check (ustr, len) != NULL)
181 /* Invalid UTF-8; punt. */
182 return scm_i_string_hash (scm_from_utf8_stringn (str, len));
183
184 length = u8_strnlen (ustr, len);
185
186 /* Set up the internal state. */
187 a = b = c = 0xdeadbeef + ((scm_t_uint32)(length<<2)) + 47;
188
189 /* Handle most of the key. */
190 while (length > 3)
62241538 191 {
1a04d29d
AW
192 ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
193 a += u32;
194 ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
195 b += u32;
196 ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
197 c += u32;
198 mix (a, b, c);
199 length -= 3;
62241538
AW
200 }
201
1a04d29d
AW
202 /* Handle the last 3 elements's. */
203 ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
204 a += u32;
205 if (--length)
206 {
207 ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
208 b += u32;
209 if (--length)
210 {
211 ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
212 c += u32;
213 }
214 }
215
216 final (a, b, c);
217
218 if (sizeof (unsigned long) == 8)
219 ret = (((unsigned long) c) << 32) | b;
220 else
221 ret = c;
222
223 ret >>= 2; /* Ensure that it fits in a fixnum. */
224 return ret;
62241538
AW
225}
226
fa980bcc
MW
227static unsigned long scm_raw_ihashq (scm_t_bits key);
228static unsigned long scm_raw_ihash (SCM obj, size_t depth);
229
230/* Return the hash of struct OBJ. Traverse OBJ's fields to compute the
231 result, unless DEPTH is zero. Assumes that OBJ is a struct. */
232static unsigned long
233scm_i_struct_hash (SCM obj, size_t depth)
234{
235 SCM layout;
236 scm_t_bits *data;
237 size_t struct_size, field_num;
238 unsigned long hash;
239
240 layout = SCM_STRUCT_LAYOUT (obj);
241 struct_size = scm_i_symbol_length (layout) / 2;
242 data = SCM_STRUCT_DATA (obj);
243
244 hash = scm_raw_ihashq (SCM_UNPACK (SCM_STRUCT_VTABLE (obj)));
245 if (depth > 0)
246 for (field_num = 0; field_num < struct_size; field_num++)
247 {
248 int protection;
249
250 protection = scm_i_symbol_ref (layout, field_num * 2 + 1);
251 if (protection != 'h' && protection != 'o')
252 {
253 int type;
254 type = scm_i_symbol_ref (layout, field_num * 2);
255 switch (type)
256 {
257 case 'p':
258 hash ^= scm_raw_ihash (SCM_PACK (data[field_num]),
259 depth / 2);
260 break;
261 case 'u':
262 hash ^= scm_raw_ihashq (data[field_num]);
263 break;
264 default:
265 /* Ignore 's' fields. */;
266 }
267 }
268 }
ba393257 269
fa980bcc 270 /* FIXME: Tail elements should be taken into account. */
dba97178 271
fa980bcc
MW
272 return hash;
273}
ba393257 274
71f89dd7
AW
275/* Thomas Wang's integer hasher, from
276 http://www.cris.com/~Ttwang/tech/inthash.htm. */
277static unsigned long
278scm_raw_ihashq (scm_t_bits key)
279{
280 if (sizeof (key) < 8)
281 {
282 key = (key ^ 61) ^ (key >> 16);
283 key = key + (key << 3);
284 key = key ^ (key >> 4);
285 key = key * 0x27d4eb2d;
286 key = key ^ (key >> 15);
287 }
288 else
289 {
290 key = (~key) + (key << 21); // key = (key << 21) - key - 1;
291 key = key ^ (key >> 24);
292 key = (key + (key << 3)) + (key << 8); // key * 265
293 key = key ^ (key >> 14);
294 key = (key + (key << 2)) + (key << 4); // key * 21
295 key = key ^ (key >> 28);
296 key = key + (key << 31);
297 }
298 key >>= 2; /* Ensure that it fits in a fixnum. */
299 return key;
300}
301
9d013330 302/* `depth' is used to limit recursion. */
d1d1c5de 303static unsigned long
9d013330 304scm_raw_ihash (SCM obj, size_t depth)
0f2d19dd 305{
9d013330
AW
306 if (SCM_IMP (obj))
307 return scm_raw_ihashq (SCM_UNPACK (obj));
308
309 switch (SCM_TYP7(obj))
310 {
311 /* FIXME: do better for structs, variables, ... Also the hashes
312 are currently associative, which ain't the right thing. */
0f2d19dd 313 case scm_tc7_smob:
9d013330 314 return scm_raw_ihashq (SCM_TYP16 (obj));
534c55a9 315 case scm_tc7_number:
9d013330
AW
316 if (scm_is_integer (obj))
317 {
318 SCM n = SCM_I_MAKINUM (SCM_MOST_POSITIVE_FIXNUM);
319 if (scm_is_inexact (obj))
320 obj = scm_inexact_to_exact (obj);
321 return scm_raw_ihashq (scm_to_ulong (scm_modulo (obj, n)));
322 }
323 else
324 return scm_i_string_hash (scm_number_to_string (obj, scm_from_int (10)));
0f2d19dd 325 case scm_tc7_string:
9d013330 326 return scm_i_string_hash (obj);
28b06554 327 case scm_tc7_symbol:
9d013330 328 return scm_i_symbol_hash (obj);
3854d5fd 329 case scm_tc7_pointer:
9d013330 330 return scm_raw_ihashq ((scm_t_uintptr) SCM_POINTER_VALUE (obj));
0f2d19dd
JB
331 case scm_tc7_wvect:
332 case scm_tc7_vector:
333 {
4057a3e0 334 size_t len = SCM_SIMPLE_VECTOR_LENGTH (obj);
9d013330
AW
335 size_t i = depth / 2;
336 unsigned long h = scm_raw_ihashq (SCM_CELL_WORD_0 (obj));
549333ef
AW
337 if (len)
338 while (i--)
339 h ^= scm_raw_ihash (scm_c_vector_ref (obj, h % len), i);
9d013330 340 return h;
0f2d19dd 341 }
94a5efac
GB
342 case scm_tcs_cons_imcar:
343 case scm_tcs_cons_nimcar:
9d013330
AW
344 if (depth)
345 return (scm_raw_ihash (SCM_CAR (obj), depth / 2)
346 ^ scm_raw_ihash (SCM_CDR (obj), depth / 2));
347 else
348 return scm_raw_ihashq (scm_tc3_cons);
fa980bcc
MW
349 case scm_tcs_struct:
350 return scm_i_struct_hash (obj, depth);
9d013330
AW
351 default:
352 return scm_raw_ihashq (SCM_CELL_WORD_0 (obj));
0f2d19dd 353 }
0f2d19dd
JB
354}
355
356
357\f
358
c014a02e
ML
359unsigned long
360scm_ihashq (SCM obj, unsigned long n)
0f2d19dd 361{
71f89dd7 362 return scm_raw_ihashq (SCM_UNPACK (obj)) % n;
0f2d19dd
JB
363}
364
365
3b3b36dd 366SCM_DEFINE (scm_hashq, "hashq", 2, 0, 0,
94a5efac 367 (SCM key, SCM size),
5352393c
MG
368 "Determine a hash value for @var{key} that is suitable for\n"
369 "lookups in a hashtable of size @var{size}, where @code{eq?} is\n"
370 "used as the equality predicate. The function returns an\n"
371 "integer in the range 0 to @var{size} - 1. Note that\n"
372 "@code{hashq} may use internal addresses. Thus two calls to\n"
373 "hashq where the keys are @code{eq?} are not guaranteed to\n"
374 "deliver the same value if the key object gets garbage collected\n"
375 "in between. This can happen, for example with symbols:\n"
376 "@code{(hashq 'foo n) (gc) (hashq 'foo n)} may produce two\n"
377 "different values, since @code{foo} will be garbage collected.")
1bbd0b84 378#define FUNC_NAME s_scm_hashq
0f2d19dd 379{
a55c2b68
MV
380 unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX);
381 return scm_from_ulong (scm_ihashq (key, sz));
0f2d19dd 382}
1bbd0b84 383#undef FUNC_NAME
0f2d19dd
JB
384
385
386\f
387
1cc91f1b 388
c014a02e
ML
389unsigned long
390scm_ihashv (SCM obj, unsigned long n)
0f2d19dd 391{
0c95b57d 392 if (SCM_NUMP(obj))
9d013330 393 return scm_raw_ihash (obj, 10) % n;
0f2d19dd 394 else
71f89dd7 395 return scm_raw_ihashq (SCM_UNPACK (obj)) % n;
0f2d19dd
JB
396}
397
398
3b3b36dd 399SCM_DEFINE (scm_hashv, "hashv", 2, 0, 0,
94a5efac 400 (SCM key, SCM size),
5352393c
MG
401 "Determine a hash value for @var{key} that is suitable for\n"
402 "lookups in a hashtable of size @var{size}, where @code{eqv?} is\n"
403 "used as the equality predicate. The function returns an\n"
404 "integer in the range 0 to @var{size} - 1. Note that\n"
405 "@code{(hashv key)} may use internal addresses. Thus two calls\n"
406 "to hashv where the keys are @code{eqv?} are not guaranteed to\n"
407 "deliver the same value if the key object gets garbage collected\n"
408 "in between. This can happen, for example with symbols:\n"
409 "@code{(hashv 'foo n) (gc) (hashv 'foo n)} may produce two\n"
410 "different values, since @code{foo} will be garbage collected.")
1bbd0b84 411#define FUNC_NAME s_scm_hashv
0f2d19dd 412{
a55c2b68
MV
413 unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX);
414 return scm_from_ulong (scm_ihashv (key, sz));
0f2d19dd 415}
1bbd0b84 416#undef FUNC_NAME
0f2d19dd
JB
417
418
419\f
420
1cc91f1b 421
c014a02e
ML
422unsigned long
423scm_ihash (SCM obj, unsigned long n)
0f2d19dd 424{
9d013330 425 return (unsigned long) scm_raw_ihash (obj, 10) % n;
0f2d19dd
JB
426}
427
3b3b36dd 428SCM_DEFINE (scm_hash, "hash", 2, 0, 0,
94a5efac 429 (SCM key, SCM size),
5352393c
MG
430 "Determine a hash value for @var{key} that is suitable for\n"
431 "lookups in a hashtable of size @var{size}, where @code{equal?}\n"
432 "is used as the equality predicate. The function returns an\n"
433 "integer in the range 0 to @var{size} - 1.")
1bbd0b84 434#define FUNC_NAME s_scm_hash
0f2d19dd 435{
a55c2b68
MV
436 unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX);
437 return scm_from_ulong (scm_ihash (key, sz));
0f2d19dd 438}
1bbd0b84 439#undef FUNC_NAME
0f2d19dd
JB
440
441
442\f
443
1cc91f1b 444
0f2d19dd
JB
445void
446scm_init_hash ()
0f2d19dd 447{
a0599745 448#include "libguile/hash.x"
0f2d19dd
JB
449}
450
89e00824
ML
451
452/*
453 Local Variables:
454 c-file-style: "gnu"
455 End:
456*/