symbols.h reindent
[bpt/guile.git] / libguile / hash.c
CommitLineData
247a56fa 1/* Copyright (C) 1995,1996,1997, 2000, 2001, 2003, 2004, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e 3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
0f2d19dd 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd 20\f
dbb605f5
LC
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
0f2d19dd 24
62241538
AW
25#ifdef HAVE_WCHAR_H
26#include <wchar.h>
27#endif
28
10483f9e 29#include <math.h>
62241538
AW
30#include <unistr.h>
31
a0599745
MD
32#include "libguile/_scm.h"
33#include "libguile/chars.h"
34#include "libguile/ports.h"
a002f1a2
DH
35#include "libguile/strings.h"
36#include "libguile/symbols.h"
a0599745 37#include "libguile/vectors.h"
0f2d19dd 38
a0599745
MD
39#include "libguile/validate.h"
40#include "libguile/hash.h"
0f2d19dd
JB
41\f
42
43#ifndef floor
44extern double floor();
45#endif
46
1cc91f1b 47
1a04d29d
AW
48/* This hash function is originally from
49 http://burtleburtle.net/bob/c/lookup3.c by Bob Jenkins, May 2006,
50 Public Domain. No warranty. */
51
52#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
53#define mix(a,b,c) \
54{ \
55 a -= c; a ^= rot(c, 4); c += b; \
56 b -= a; b ^= rot(a, 6); a += c; \
57 c -= b; c ^= rot(b, 8); b += a; \
58 a -= c; a ^= rot(c,16); c += b; \
59 b -= a; b ^= rot(a,19); a += c; \
60 c -= b; c ^= rot(b, 4); b += a; \
61}
62
63#define final(a,b,c) \
64{ \
65 c ^= b; c -= rot(b,14); \
66 a ^= c; a -= rot(c,11); \
67 b ^= a; b -= rot(a,25); \
68 c ^= b; c -= rot(b,16); \
69 a ^= c; a -= rot(c,4); \
70 b ^= a; b -= rot(a,14); \
71 c ^= b; c -= rot(b,24); \
72}
73
74#define JENKINS_LOOKUP3_HASHWORD2(k, length, ret) \
75 do { \
76 scm_t_uint32 a, b, c; \
77 \
78 /* Set up the internal state. */ \
79 a = b = c = 0xdeadbeef + ((scm_t_uint32)(length<<2)) + 47; \
80 \
81 /* Handle most of the key. */ \
82 while (length > 3) \
83 { \
84 a += k[0]; \
85 b += k[1]; \
86 c += k[2]; \
87 mix (a, b, c); \
88 length -= 3; \
89 k += 3; \
90 } \
91 \
92 /* Handle the last 3 elements. */ \
93 switch(length) /* All the case statements fall through. */ \
94 { \
95 case 3 : c += k[2]; \
96 case 2 : b += k[1]; \
97 case 1 : a += k[0]; \
98 final (a, b, c); \
99 case 0: /* case 0: nothing left to add */ \
100 break; \
101 } \
102 \
103 if (sizeof (ret) == 8) \
104 ret = (((unsigned long) c) << 32) | b; \
105 else \
106 ret = c; \
107 } while (0)
108
109
110static unsigned long
111narrow_string_hash (const scm_t_uint8 *str, size_t len)
112{
113 unsigned long ret;
114 JENKINS_LOOKUP3_HASHWORD2 (str, len, ret);
115 ret >>= 2; /* Ensure that it fits in a fixnum. */
116 return ret;
117}
118
119static unsigned long
120wide_string_hash (const scm_t_wchar *str, size_t len)
121{
122 unsigned long ret;
123 JENKINS_LOOKUP3_HASHWORD2 (str, len, ret);
124 ret >>= 2; /* Ensure that it fits in a fixnum. */
125 return ret;
126}
127
c014a02e 128unsigned long
1be6b49c 129scm_string_hash (const unsigned char *str, size_t len)
ba393257 130{
1a04d29d 131 return narrow_string_hash (str, len);
ba393257
DH
132}
133
e23106d5
MG
134unsigned long
135scm_i_string_hash (SCM str)
136{
137 size_t len = scm_i_string_length (str);
e23106d5 138
1a04d29d
AW
139 if (scm_i_is_narrow_string (str))
140 return narrow_string_hash ((const scm_t_uint8 *) scm_i_string_chars (str),
141 len);
142 else
143 return wide_string_hash (scm_i_string_wide_chars (str), len);
e23106d5
MG
144}
145
62241538
AW
146unsigned long
147scm_i_locale_string_hash (const char *str, size_t len)
148{
62241538 149 return scm_i_string_hash (scm_from_locale_stringn (str, len));
62241538
AW
150}
151
152unsigned long
153scm_i_latin1_string_hash (const char *str, size_t len)
154{
62241538
AW
155 if (len == (size_t) -1)
156 len = strlen (str);
157
1a04d29d 158 return narrow_string_hash ((const scm_t_uint8 *) str, len);
62241538
AW
159}
160
1a04d29d 161/* A tricky optimization, but probably worth it. */
62241538
AW
162unsigned long
163scm_i_utf8_string_hash (const char *str, size_t len)
164{
1a04d29d
AW
165 const scm_t_uint8 *end, *ustr = (const scm_t_uint8 *) str;
166 unsigned long ret;
167
168 /* The length of the string in characters. This name corresponds to
169 Jenkins' original name. */
170 size_t length;
171
172 scm_t_uint32 a, b, c, u32;
173
62241538
AW
174 if (len == (size_t) -1)
175 len = strlen (str);
176
1a04d29d
AW
177 end = ustr + len;
178
179 if (u8_check (ustr, len) != NULL)
180 /* Invalid UTF-8; punt. */
181 return scm_i_string_hash (scm_from_utf8_stringn (str, len));
182
183 length = u8_strnlen (ustr, len);
184
185 /* Set up the internal state. */
186 a = b = c = 0xdeadbeef + ((scm_t_uint32)(length<<2)) + 47;
187
188 /* Handle most of the key. */
189 while (length > 3)
62241538 190 {
1a04d29d
AW
191 ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
192 a += u32;
193 ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
194 b += u32;
195 ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
196 c += u32;
197 mix (a, b, c);
198 length -= 3;
62241538
AW
199 }
200
1a04d29d
AW
201 /* Handle the last 3 elements's. */
202 ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
203 a += u32;
204 if (--length)
205 {
206 ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
207 b += u32;
208 if (--length)
209 {
210 ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
211 c += u32;
212 }
213 }
214
215 final (a, b, c);
216
217 if (sizeof (unsigned long) == 8)
218 ret = (((unsigned long) c) << 32) | b;
219 else
220 ret = c;
221
222 ret >>= 2; /* Ensure that it fits in a fixnum. */
223 return ret;
62241538
AW
224}
225
ba393257 226
dba97178
DH
227/* Dirk:FIXME:: why downcase for characters? (2x: scm_hasher, scm_ihashv) */
228/* Dirk:FIXME:: scm_hasher could be made static. */
229
230
c014a02e
ML
231unsigned long
232scm_hasher(SCM obj, unsigned long n, size_t d)
0f2d19dd 233{
dba97178
DH
234 switch (SCM_ITAG3 (obj)) {
235 case scm_tc3_int_1:
236 case scm_tc3_int_2:
e11e83f3 237 return SCM_I_INUM(obj) % n; /* SCM_INUMP(obj) */
dba97178
DH
238 case scm_tc3_imm24:
239 if (SCM_CHARP(obj))
84fad130 240 return (unsigned)(scm_c_downcase(SCM_CHAR(obj))) % n;
dba97178 241 switch (SCM_UNPACK (obj)) {
210c0325 242 case SCM_EOL_BITS:
94a5efac
GB
243 d = 256;
244 break;
210c0325 245 case SCM_BOOL_T_BITS:
94a5efac
GB
246 d = 257;
247 break;
210c0325 248 case SCM_BOOL_F_BITS:
94a5efac
GB
249 d = 258;
250 break;
210c0325 251 case SCM_EOF_VAL_BITS:
94a5efac
GB
252 d = 259;
253 break;
254 default:
255 d = 263; /* perhaps should be error */
0f2d19dd
JB
256 }
257 return d % n;
94a5efac
GB
258 default:
259 return 263 % n; /* perhaps should be error */
dba97178 260 case scm_tc3_cons:
0f2d19dd 261 switch SCM_TYP7(obj) {
94a5efac
GB
262 default:
263 return 263 % n;
0f2d19dd 264 case scm_tc7_smob:
534c55a9
DH
265 return 263 % n;
266 case scm_tc7_number:
1be6b49c 267 switch SCM_TYP16 (obj) {
950cc72b 268 case scm_tc16_big:
e11e83f3 269 return scm_to_ulong (scm_modulo (obj, scm_from_ulong (n)));
950cc72b
MD
270 case scm_tc16_real:
271 {
1be6b49c 272 double r = SCM_REAL_VALUE (obj);
10483f9e 273 if (floor (r) == r && !isinf (r) && !isnan (r))
e11e83f3
MV
274 {
275 obj = scm_inexact_to_exact (obj);
276 return scm_to_ulong (scm_modulo (obj, scm_from_ulong (n)));
277 }
0f2d19dd 278 }
534c55a9 279 /* Fall through */
950cc72b 280 case scm_tc16_complex:
f92e85f7 281 case scm_tc16_fraction:
e11e83f3 282 obj = scm_number_to_string (obj, scm_from_int (10));
534c55a9 283 /* Fall through */
0f2d19dd 284 }
534c55a9 285 /* Fall through */
0f2d19dd 286 case scm_tc7_string:
8824ac88 287 {
5a6d139b 288 unsigned long hash =
e23106d5 289 scm_i_string_hash (obj) % n;
8824ac88
MV
290 return hash;
291 }
28b06554 292 case scm_tc7_symbol:
cc95e00a 293 return scm_i_symbol_hash (obj) % n;
3854d5fd
LC
294 case scm_tc7_pointer:
295 {
296 /* Pointer objects are typically used to store addresses of heap
297 objects. On most platforms, these are at least 3-byte
298 aligned (on x86_64-*-gnu, `malloc' returns 4-byte aligned
299 addresses), so get rid of the least significant bits. */
300 scm_t_uintptr significant_bits;
301
302 significant_bits = (scm_t_uintptr) SCM_POINTER_VALUE (obj) >> 4UL;
303 return (size_t) significant_bits % n;
304 }
0f2d19dd
JB
305 case scm_tc7_wvect:
306 case scm_tc7_vector:
307 {
4057a3e0 308 size_t len = SCM_SIMPLE_VECTOR_LENGTH (obj);
1be6b49c 309 if (len > 5)
0f2d19dd 310 {
1be6b49c 311 size_t i = d/2;
c014a02e 312 unsigned long h = 1;
4057a3e0
MV
313 while (i--)
314 {
315 SCM elt = SCM_SIMPLE_VECTOR_REF (obj, h % len);
316 h = ((h << 8) + (scm_hasher (elt, n, 2))) % n;
317 }
0f2d19dd
JB
318 return h;
319 }
320 else
321 {
1be6b49c 322 size_t i = len;
c014a02e 323 unsigned long h = (n)-1;
4057a3e0
MV
324 while (i--)
325 {
326 SCM elt = SCM_SIMPLE_VECTOR_REF (obj, h % len);
327 h = ((h << 8) + (scm_hasher (elt, n, d/len))) % n;
328 }
0f2d19dd
JB
329 return h;
330 }
331 }
94a5efac
GB
332 case scm_tcs_cons_imcar:
333 case scm_tcs_cons_nimcar:
1be6b49c
ML
334 if (d) return (scm_hasher (SCM_CAR (obj), n, d/2)
335 + scm_hasher (SCM_CDR (obj), n, d/2)) % n;
0f2d19dd
JB
336 else return 1;
337 case scm_tc7_port:
206d3de3 338 return ((SCM_RDNG & SCM_CELL_WORD_0 (obj)) ? 260 : 261) % n;
cc7005bc 339 case scm_tc7_program:
0f2d19dd
JB
340 return 262 % n;
341 }
342 }
343}
344
345
346\f
347
1cc91f1b 348
c014a02e
ML
349unsigned long
350scm_ihashq (SCM obj, unsigned long n)
0f2d19dd 351{
54778cd3 352 return (SCM_UNPACK (obj) >> 1) % n;
0f2d19dd
JB
353}
354
355
3b3b36dd 356SCM_DEFINE (scm_hashq, "hashq", 2, 0, 0,
94a5efac 357 (SCM key, SCM size),
5352393c
MG
358 "Determine a hash value for @var{key} that is suitable for\n"
359 "lookups in a hashtable of size @var{size}, where @code{eq?} is\n"
360 "used as the equality predicate. The function returns an\n"
361 "integer in the range 0 to @var{size} - 1. Note that\n"
362 "@code{hashq} may use internal addresses. Thus two calls to\n"
363 "hashq where the keys are @code{eq?} are not guaranteed to\n"
364 "deliver the same value if the key object gets garbage collected\n"
365 "in between. This can happen, for example with symbols:\n"
366 "@code{(hashq 'foo n) (gc) (hashq 'foo n)} may produce two\n"
367 "different values, since @code{foo} will be garbage collected.")
1bbd0b84 368#define FUNC_NAME s_scm_hashq
0f2d19dd 369{
a55c2b68
MV
370 unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX);
371 return scm_from_ulong (scm_ihashq (key, sz));
0f2d19dd 372}
1bbd0b84 373#undef FUNC_NAME
0f2d19dd
JB
374
375
376\f
377
1cc91f1b 378
c014a02e
ML
379unsigned long
380scm_ihashv (SCM obj, unsigned long n)
0f2d19dd 381{
7866a09b 382 if (SCM_CHARP(obj))
84fad130 383 return ((unsigned long) (scm_c_downcase (SCM_CHAR (obj)))) % n; /* downcase!?!! */
0f2d19dd 384
0c95b57d 385 if (SCM_NUMP(obj))
c014a02e 386 return (unsigned long) scm_hasher(obj, n, 10);
0f2d19dd 387 else
54778cd3 388 return SCM_UNPACK (obj) % n;
0f2d19dd
JB
389}
390
391
3b3b36dd 392SCM_DEFINE (scm_hashv, "hashv", 2, 0, 0,
94a5efac 393 (SCM key, SCM size),
5352393c
MG
394 "Determine a hash value for @var{key} that is suitable for\n"
395 "lookups in a hashtable of size @var{size}, where @code{eqv?} is\n"
396 "used as the equality predicate. The function returns an\n"
397 "integer in the range 0 to @var{size} - 1. Note that\n"
398 "@code{(hashv key)} may use internal addresses. Thus two calls\n"
399 "to hashv where the keys are @code{eqv?} are not guaranteed to\n"
400 "deliver the same value if the key object gets garbage collected\n"
401 "in between. This can happen, for example with symbols:\n"
402 "@code{(hashv 'foo n) (gc) (hashv 'foo n)} may produce two\n"
403 "different values, since @code{foo} will be garbage collected.")
1bbd0b84 404#define FUNC_NAME s_scm_hashv
0f2d19dd 405{
a55c2b68
MV
406 unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX);
407 return scm_from_ulong (scm_ihashv (key, sz));
0f2d19dd 408}
1bbd0b84 409#undef FUNC_NAME
0f2d19dd
JB
410
411
412\f
413
1cc91f1b 414
c014a02e
ML
415unsigned long
416scm_ihash (SCM obj, unsigned long n)
0f2d19dd 417{
c014a02e 418 return (unsigned long) scm_hasher (obj, n, 10);
0f2d19dd
JB
419}
420
3b3b36dd 421SCM_DEFINE (scm_hash, "hash", 2, 0, 0,
94a5efac 422 (SCM key, SCM size),
5352393c
MG
423 "Determine a hash value for @var{key} that is suitable for\n"
424 "lookups in a hashtable of size @var{size}, where @code{equal?}\n"
425 "is used as the equality predicate. The function returns an\n"
426 "integer in the range 0 to @var{size} - 1.")
1bbd0b84 427#define FUNC_NAME s_scm_hash
0f2d19dd 428{
a55c2b68
MV
429 unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX);
430 return scm_from_ulong (scm_ihash (key, sz));
0f2d19dd 431}
1bbd0b84 432#undef FUNC_NAME
0f2d19dd
JB
433
434
435\f
436
1cc91f1b 437
0f2d19dd
JB
438void
439scm_init_hash ()
0f2d19dd 440{
a0599745 441#include "libguile/hash.x"
0f2d19dd
JB
442}
443
89e00824
ML
444
445/*
446 Local Variables:
447 c-file-style: "gnu"
448 End:
449*/