Update Gnulib to v0.0-6827-g39c3009; use the `dirfd' module.
[bpt/guile.git] / libguile / hash.c
CommitLineData
247a56fa 1/* Copyright (C) 1995,1996,1997, 2000, 2001, 2003, 2004, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e 3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
0f2d19dd 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd 20\f
dbb605f5
LC
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
0f2d19dd 24
62241538
AW
25#ifdef HAVE_WCHAR_H
26#include <wchar.h>
27#endif
28
10483f9e 29#include <math.h>
62241538
AW
30#include <unistr.h>
31
a0599745
MD
32#include "libguile/_scm.h"
33#include "libguile/chars.h"
34#include "libguile/ports.h"
a002f1a2
DH
35#include "libguile/strings.h"
36#include "libguile/symbols.h"
a0599745 37#include "libguile/vectors.h"
0f2d19dd 38
a0599745
MD
39#include "libguile/validate.h"
40#include "libguile/hash.h"
0f2d19dd
JB
41\f
42
43#ifndef floor
44extern double floor();
45#endif
46
1cc91f1b 47
c014a02e 48unsigned long
1be6b49c 49scm_string_hash (const unsigned char *str, size_t len)
ba393257 50{
b4d59261
MV
51 /* from suggestion at: */
52 /* http://srfi.schemers.org/srfi-13/mail-archive/msg00112.html */
53
54 unsigned long h = 0;
55 while (len-- > 0)
56 h = *str++ + h*37;
57 return h;
ba393257
DH
58}
59
e23106d5
MG
60unsigned long
61scm_i_string_hash (SCM str)
62{
63 size_t len = scm_i_string_length (str);
64 size_t i = 0;
65
66 unsigned long h = 0;
67 while (len-- > 0)
68 h = (unsigned long) scm_i_string_ref (str, i++) + h * 37;
69
70 scm_remember_upto_here_1 (str);
71 return h;
72}
73
62241538
AW
74unsigned long
75scm_i_locale_string_hash (const char *str, size_t len)
76{
77#ifdef HAVE_WCHAR_H
78 mbstate_t state;
79 wchar_t c;
80 size_t byte_idx = 0, nbytes;
81 unsigned long h = 0;
82
83 if (len == (size_t) -1)
84 len = strlen (str);
85
86 while ((nbytes = mbrtowc (&c, str + byte_idx, len - byte_idx, &state)) > 0)
87 {
88 if (nbytes >= (size_t) -2)
89 /* Invalid input string; punt. */
90 return scm_i_string_hash (scm_from_locale_stringn (str, len));
91
92 h = (unsigned long) c + h * 37;
93 byte_idx += nbytes;
94 }
95
96 return h;
97#else
98 return scm_i_string_hash (scm_from_locale_stringn (str, len));
99#endif
100}
101
102unsigned long
103scm_i_latin1_string_hash (const char *str, size_t len)
104{
105 const scm_t_uint8 *ustr = (const scm_t_uint8 *) str;
106 size_t i = 0;
107 unsigned long h = 0;
108
109 if (len == (size_t) -1)
110 len = strlen (str);
111
112 for (; i < len; i++)
113 h = (unsigned long) ustr[i] + h * 37;
114
115 return h;
116}
117
118unsigned long
119scm_i_utf8_string_hash (const char *str, size_t len)
120{
121 const scm_t_uint8 *ustr = (const scm_t_uint8 *) str;
122 size_t byte_idx = 0;
123 unsigned long h = 0;
124
125 if (len == (size_t) -1)
126 len = strlen (str);
127
128 while (byte_idx < len)
129 {
130 ucs4_t c;
131 int nbytes;
132
133 nbytes = u8_mbtouc (&c, ustr + byte_idx, len - byte_idx);
134 if (nbytes == 0)
135 break;
136 else if (nbytes < 0)
137 /* Bad UTF-8; punt. */
138 return scm_i_string_hash (scm_from_utf8_stringn (str, len));
139
140 h = (unsigned long) c + h * 37;
141 byte_idx += nbytes;
142 }
143
144 return h;
145}
146
ba393257 147
dba97178
DH
148/* Dirk:FIXME:: why downcase for characters? (2x: scm_hasher, scm_ihashv) */
149/* Dirk:FIXME:: scm_hasher could be made static. */
150
151
c014a02e
ML
152unsigned long
153scm_hasher(SCM obj, unsigned long n, size_t d)
0f2d19dd 154{
dba97178
DH
155 switch (SCM_ITAG3 (obj)) {
156 case scm_tc3_int_1:
157 case scm_tc3_int_2:
e11e83f3 158 return SCM_I_INUM(obj) % n; /* SCM_INUMP(obj) */
dba97178
DH
159 case scm_tc3_imm24:
160 if (SCM_CHARP(obj))
84fad130 161 return (unsigned)(scm_c_downcase(SCM_CHAR(obj))) % n;
dba97178 162 switch (SCM_UNPACK (obj)) {
210c0325 163 case SCM_EOL_BITS:
94a5efac
GB
164 d = 256;
165 break;
210c0325 166 case SCM_BOOL_T_BITS:
94a5efac
GB
167 d = 257;
168 break;
210c0325 169 case SCM_BOOL_F_BITS:
94a5efac
GB
170 d = 258;
171 break;
210c0325 172 case SCM_EOF_VAL_BITS:
94a5efac
GB
173 d = 259;
174 break;
175 default:
176 d = 263; /* perhaps should be error */
0f2d19dd
JB
177 }
178 return d % n;
94a5efac
GB
179 default:
180 return 263 % n; /* perhaps should be error */
dba97178 181 case scm_tc3_cons:
0f2d19dd 182 switch SCM_TYP7(obj) {
94a5efac
GB
183 default:
184 return 263 % n;
0f2d19dd 185 case scm_tc7_smob:
534c55a9
DH
186 return 263 % n;
187 case scm_tc7_number:
1be6b49c 188 switch SCM_TYP16 (obj) {
950cc72b 189 case scm_tc16_big:
e11e83f3 190 return scm_to_ulong (scm_modulo (obj, scm_from_ulong (n)));
950cc72b
MD
191 case scm_tc16_real:
192 {
1be6b49c 193 double r = SCM_REAL_VALUE (obj);
10483f9e 194 if (floor (r) == r && !isinf (r) && !isnan (r))
e11e83f3
MV
195 {
196 obj = scm_inexact_to_exact (obj);
197 return scm_to_ulong (scm_modulo (obj, scm_from_ulong (n)));
198 }
0f2d19dd 199 }
534c55a9 200 /* Fall through */
950cc72b 201 case scm_tc16_complex:
f92e85f7 202 case scm_tc16_fraction:
e11e83f3 203 obj = scm_number_to_string (obj, scm_from_int (10));
534c55a9 204 /* Fall through */
0f2d19dd 205 }
534c55a9 206 /* Fall through */
0f2d19dd 207 case scm_tc7_string:
8824ac88 208 {
5a6d139b 209 unsigned long hash =
e23106d5 210 scm_i_string_hash (obj) % n;
8824ac88
MV
211 return hash;
212 }
28b06554 213 case scm_tc7_symbol:
cc95e00a 214 return scm_i_symbol_hash (obj) % n;
3854d5fd
LC
215 case scm_tc7_pointer:
216 {
217 /* Pointer objects are typically used to store addresses of heap
218 objects. On most platforms, these are at least 3-byte
219 aligned (on x86_64-*-gnu, `malloc' returns 4-byte aligned
220 addresses), so get rid of the least significant bits. */
221 scm_t_uintptr significant_bits;
222
223 significant_bits = (scm_t_uintptr) SCM_POINTER_VALUE (obj) >> 4UL;
224 return (size_t) significant_bits % n;
225 }
0f2d19dd
JB
226 case scm_tc7_wvect:
227 case scm_tc7_vector:
228 {
4057a3e0 229 size_t len = SCM_SIMPLE_VECTOR_LENGTH (obj);
1be6b49c 230 if (len > 5)
0f2d19dd 231 {
1be6b49c 232 size_t i = d/2;
c014a02e 233 unsigned long h = 1;
4057a3e0
MV
234 while (i--)
235 {
236 SCM elt = SCM_SIMPLE_VECTOR_REF (obj, h % len);
237 h = ((h << 8) + (scm_hasher (elt, n, 2))) % n;
238 }
0f2d19dd
JB
239 return h;
240 }
241 else
242 {
1be6b49c 243 size_t i = len;
c014a02e 244 unsigned long h = (n)-1;
4057a3e0
MV
245 while (i--)
246 {
247 SCM elt = SCM_SIMPLE_VECTOR_REF (obj, h % len);
248 h = ((h << 8) + (scm_hasher (elt, n, d/len))) % n;
249 }
0f2d19dd
JB
250 return h;
251 }
252 }
94a5efac
GB
253 case scm_tcs_cons_imcar:
254 case scm_tcs_cons_nimcar:
1be6b49c
ML
255 if (d) return (scm_hasher (SCM_CAR (obj), n, d/2)
256 + scm_hasher (SCM_CDR (obj), n, d/2)) % n;
0f2d19dd
JB
257 else return 1;
258 case scm_tc7_port:
206d3de3 259 return ((SCM_RDNG & SCM_CELL_WORD_0 (obj)) ? 260 : 261) % n;
cc7005bc 260 case scm_tc7_program:
0f2d19dd
JB
261 return 262 % n;
262 }
263 }
264}
265
266
267\f
268
1cc91f1b 269
c014a02e
ML
270unsigned long
271scm_ihashq (SCM obj, unsigned long n)
0f2d19dd 272{
54778cd3 273 return (SCM_UNPACK (obj) >> 1) % n;
0f2d19dd
JB
274}
275
276
3b3b36dd 277SCM_DEFINE (scm_hashq, "hashq", 2, 0, 0,
94a5efac 278 (SCM key, SCM size),
5352393c
MG
279 "Determine a hash value for @var{key} that is suitable for\n"
280 "lookups in a hashtable of size @var{size}, where @code{eq?} is\n"
281 "used as the equality predicate. The function returns an\n"
282 "integer in the range 0 to @var{size} - 1. Note that\n"
283 "@code{hashq} may use internal addresses. Thus two calls to\n"
284 "hashq where the keys are @code{eq?} are not guaranteed to\n"
285 "deliver the same value if the key object gets garbage collected\n"
286 "in between. This can happen, for example with symbols:\n"
287 "@code{(hashq 'foo n) (gc) (hashq 'foo n)} may produce two\n"
288 "different values, since @code{foo} will be garbage collected.")
1bbd0b84 289#define FUNC_NAME s_scm_hashq
0f2d19dd 290{
a55c2b68
MV
291 unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX);
292 return scm_from_ulong (scm_ihashq (key, sz));
0f2d19dd 293}
1bbd0b84 294#undef FUNC_NAME
0f2d19dd
JB
295
296
297\f
298
1cc91f1b 299
c014a02e
ML
300unsigned long
301scm_ihashv (SCM obj, unsigned long n)
0f2d19dd 302{
7866a09b 303 if (SCM_CHARP(obj))
84fad130 304 return ((unsigned long) (scm_c_downcase (SCM_CHAR (obj)))) % n; /* downcase!?!! */
0f2d19dd 305
0c95b57d 306 if (SCM_NUMP(obj))
c014a02e 307 return (unsigned long) scm_hasher(obj, n, 10);
0f2d19dd 308 else
54778cd3 309 return SCM_UNPACK (obj) % n;
0f2d19dd
JB
310}
311
312
3b3b36dd 313SCM_DEFINE (scm_hashv, "hashv", 2, 0, 0,
94a5efac 314 (SCM key, SCM size),
5352393c
MG
315 "Determine a hash value for @var{key} that is suitable for\n"
316 "lookups in a hashtable of size @var{size}, where @code{eqv?} is\n"
317 "used as the equality predicate. The function returns an\n"
318 "integer in the range 0 to @var{size} - 1. Note that\n"
319 "@code{(hashv key)} may use internal addresses. Thus two calls\n"
320 "to hashv where the keys are @code{eqv?} are not guaranteed to\n"
321 "deliver the same value if the key object gets garbage collected\n"
322 "in between. This can happen, for example with symbols:\n"
323 "@code{(hashv 'foo n) (gc) (hashv 'foo n)} may produce two\n"
324 "different values, since @code{foo} will be garbage collected.")
1bbd0b84 325#define FUNC_NAME s_scm_hashv
0f2d19dd 326{
a55c2b68
MV
327 unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX);
328 return scm_from_ulong (scm_ihashv (key, sz));
0f2d19dd 329}
1bbd0b84 330#undef FUNC_NAME
0f2d19dd
JB
331
332
333\f
334
1cc91f1b 335
c014a02e
ML
336unsigned long
337scm_ihash (SCM obj, unsigned long n)
0f2d19dd 338{
c014a02e 339 return (unsigned long) scm_hasher (obj, n, 10);
0f2d19dd
JB
340}
341
3b3b36dd 342SCM_DEFINE (scm_hash, "hash", 2, 0, 0,
94a5efac 343 (SCM key, SCM size),
5352393c
MG
344 "Determine a hash value for @var{key} that is suitable for\n"
345 "lookups in a hashtable of size @var{size}, where @code{equal?}\n"
346 "is used as the equality predicate. The function returns an\n"
347 "integer in the range 0 to @var{size} - 1.")
1bbd0b84 348#define FUNC_NAME s_scm_hash
0f2d19dd 349{
a55c2b68
MV
350 unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX);
351 return scm_from_ulong (scm_ihash (key, sz));
0f2d19dd 352}
1bbd0b84 353#undef FUNC_NAME
0f2d19dd
JB
354
355
356\f
357
1cc91f1b 358
0f2d19dd
JB
359void
360scm_init_hash ()
0f2d19dd 361{
a0599745 362#include "libguile/hash.x"
0f2d19dd
JB
363}
364
89e00824
ML
365
366/*
367 Local Variables:
368 c-file-style: "gnu"
369 End:
370*/