Commit | Line | Data |
---|---|---|
247a56fa | 1 | /* Copyright (C) 1995,1996,1997, 2000, 2001, 2003, 2004, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. |
0f2d19dd | 2 | * |
73be1d9e | 3 | * This library is free software; you can redistribute it and/or |
53befeb7 NJ |
4 | * modify it under the terms of the GNU Lesser General Public License |
5 | * as published by the Free Software Foundation; either version 3 of | |
6 | * the License, or (at your option) any later version. | |
0f2d19dd | 7 | * |
53befeb7 NJ |
8 | * This library is distributed in the hope that it will be useful, but |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
73be1d9e MV |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | * Lesser General Public License for more details. | |
0f2d19dd | 12 | * |
73be1d9e MV |
13 | * You should have received a copy of the GNU Lesser General Public |
14 | * License along with this library; if not, write to the Free Software | |
53befeb7 NJ |
15 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
16 | * 02110-1301 USA | |
73be1d9e | 17 | */ |
1bbd0b84 | 18 | |
1bbd0b84 | 19 | |
0f2d19dd | 20 | \f |
dbb605f5 LC |
21 | #ifdef HAVE_CONFIG_H |
22 | # include <config.h> | |
23 | #endif | |
0f2d19dd | 24 | |
62241538 AW |
25 | #ifdef HAVE_WCHAR_H |
26 | #include <wchar.h> | |
27 | #endif | |
28 | ||
10483f9e | 29 | #include <math.h> |
62241538 AW |
30 | #include <unistr.h> |
31 | ||
a0599745 MD |
32 | #include "libguile/_scm.h" |
33 | #include "libguile/chars.h" | |
34 | #include "libguile/ports.h" | |
a002f1a2 DH |
35 | #include "libguile/strings.h" |
36 | #include "libguile/symbols.h" | |
a0599745 | 37 | #include "libguile/vectors.h" |
0f2d19dd | 38 | |
a0599745 MD |
39 | #include "libguile/validate.h" |
40 | #include "libguile/hash.h" | |
0f2d19dd JB |
41 | \f |
42 | ||
43 | #ifndef floor | |
44 | extern double floor(); | |
45 | #endif | |
46 | ||
1cc91f1b | 47 | |
c014a02e | 48 | unsigned long |
1be6b49c | 49 | scm_string_hash (const unsigned char *str, size_t len) |
ba393257 | 50 | { |
b4d59261 MV |
51 | /* from suggestion at: */ |
52 | /* http://srfi.schemers.org/srfi-13/mail-archive/msg00112.html */ | |
53 | ||
54 | unsigned long h = 0; | |
55 | while (len-- > 0) | |
56 | h = *str++ + h*37; | |
57 | return h; | |
ba393257 DH |
58 | } |
59 | ||
e23106d5 MG |
60 | unsigned long |
61 | scm_i_string_hash (SCM str) | |
62 | { | |
63 | size_t len = scm_i_string_length (str); | |
64 | size_t i = 0; | |
65 | ||
66 | unsigned long h = 0; | |
67 | while (len-- > 0) | |
68 | h = (unsigned long) scm_i_string_ref (str, i++) + h * 37; | |
69 | ||
70 | scm_remember_upto_here_1 (str); | |
71 | return h; | |
72 | } | |
73 | ||
62241538 AW |
74 | unsigned long |
75 | scm_i_locale_string_hash (const char *str, size_t len) | |
76 | { | |
77 | #ifdef HAVE_WCHAR_H | |
78 | mbstate_t state; | |
79 | wchar_t c; | |
80 | size_t byte_idx = 0, nbytes; | |
81 | unsigned long h = 0; | |
82 | ||
83 | if (len == (size_t) -1) | |
84 | len = strlen (str); | |
85 | ||
86 | while ((nbytes = mbrtowc (&c, str + byte_idx, len - byte_idx, &state)) > 0) | |
87 | { | |
88 | if (nbytes >= (size_t) -2) | |
89 | /* Invalid input string; punt. */ | |
90 | return scm_i_string_hash (scm_from_locale_stringn (str, len)); | |
91 | ||
92 | h = (unsigned long) c + h * 37; | |
93 | byte_idx += nbytes; | |
94 | } | |
95 | ||
96 | return h; | |
97 | #else | |
98 | return scm_i_string_hash (scm_from_locale_stringn (str, len)); | |
99 | #endif | |
100 | } | |
101 | ||
102 | unsigned long | |
103 | scm_i_latin1_string_hash (const char *str, size_t len) | |
104 | { | |
105 | const scm_t_uint8 *ustr = (const scm_t_uint8 *) str; | |
106 | size_t i = 0; | |
107 | unsigned long h = 0; | |
108 | ||
109 | if (len == (size_t) -1) | |
110 | len = strlen (str); | |
111 | ||
112 | for (; i < len; i++) | |
113 | h = (unsigned long) ustr[i] + h * 37; | |
114 | ||
115 | return h; | |
116 | } | |
117 | ||
118 | unsigned long | |
119 | scm_i_utf8_string_hash (const char *str, size_t len) | |
120 | { | |
121 | const scm_t_uint8 *ustr = (const scm_t_uint8 *) str; | |
122 | size_t byte_idx = 0; | |
123 | unsigned long h = 0; | |
124 | ||
125 | if (len == (size_t) -1) | |
126 | len = strlen (str); | |
127 | ||
128 | while (byte_idx < len) | |
129 | { | |
130 | ucs4_t c; | |
131 | int nbytes; | |
132 | ||
133 | nbytes = u8_mbtouc (&c, ustr + byte_idx, len - byte_idx); | |
134 | if (nbytes == 0) | |
135 | break; | |
136 | else if (nbytes < 0) | |
137 | /* Bad UTF-8; punt. */ | |
138 | return scm_i_string_hash (scm_from_utf8_stringn (str, len)); | |
139 | ||
140 | h = (unsigned long) c + h * 37; | |
141 | byte_idx += nbytes; | |
142 | } | |
143 | ||
144 | return h; | |
145 | } | |
146 | ||
ba393257 | 147 | |
dba97178 DH |
148 | /* Dirk:FIXME:: why downcase for characters? (2x: scm_hasher, scm_ihashv) */ |
149 | /* Dirk:FIXME:: scm_hasher could be made static. */ | |
150 | ||
151 | ||
c014a02e ML |
152 | unsigned long |
153 | scm_hasher(SCM obj, unsigned long n, size_t d) | |
0f2d19dd | 154 | { |
dba97178 DH |
155 | switch (SCM_ITAG3 (obj)) { |
156 | case scm_tc3_int_1: | |
157 | case scm_tc3_int_2: | |
e11e83f3 | 158 | return SCM_I_INUM(obj) % n; /* SCM_INUMP(obj) */ |
dba97178 DH |
159 | case scm_tc3_imm24: |
160 | if (SCM_CHARP(obj)) | |
84fad130 | 161 | return (unsigned)(scm_c_downcase(SCM_CHAR(obj))) % n; |
dba97178 | 162 | switch (SCM_UNPACK (obj)) { |
210c0325 | 163 | case SCM_EOL_BITS: |
94a5efac GB |
164 | d = 256; |
165 | break; | |
210c0325 | 166 | case SCM_BOOL_T_BITS: |
94a5efac GB |
167 | d = 257; |
168 | break; | |
210c0325 | 169 | case SCM_BOOL_F_BITS: |
94a5efac GB |
170 | d = 258; |
171 | break; | |
210c0325 | 172 | case SCM_EOF_VAL_BITS: |
94a5efac GB |
173 | d = 259; |
174 | break; | |
175 | default: | |
176 | d = 263; /* perhaps should be error */ | |
0f2d19dd JB |
177 | } |
178 | return d % n; | |
94a5efac GB |
179 | default: |
180 | return 263 % n; /* perhaps should be error */ | |
dba97178 | 181 | case scm_tc3_cons: |
0f2d19dd | 182 | switch SCM_TYP7(obj) { |
94a5efac GB |
183 | default: |
184 | return 263 % n; | |
0f2d19dd | 185 | case scm_tc7_smob: |
534c55a9 DH |
186 | return 263 % n; |
187 | case scm_tc7_number: | |
1be6b49c | 188 | switch SCM_TYP16 (obj) { |
950cc72b | 189 | case scm_tc16_big: |
e11e83f3 | 190 | return scm_to_ulong (scm_modulo (obj, scm_from_ulong (n))); |
950cc72b MD |
191 | case scm_tc16_real: |
192 | { | |
1be6b49c | 193 | double r = SCM_REAL_VALUE (obj); |
10483f9e | 194 | if (floor (r) == r && !isinf (r) && !isnan (r)) |
e11e83f3 MV |
195 | { |
196 | obj = scm_inexact_to_exact (obj); | |
197 | return scm_to_ulong (scm_modulo (obj, scm_from_ulong (n))); | |
198 | } | |
0f2d19dd | 199 | } |
534c55a9 | 200 | /* Fall through */ |
950cc72b | 201 | case scm_tc16_complex: |
f92e85f7 | 202 | case scm_tc16_fraction: |
e11e83f3 | 203 | obj = scm_number_to_string (obj, scm_from_int (10)); |
534c55a9 | 204 | /* Fall through */ |
0f2d19dd | 205 | } |
534c55a9 | 206 | /* Fall through */ |
0f2d19dd | 207 | case scm_tc7_string: |
8824ac88 | 208 | { |
5a6d139b | 209 | unsigned long hash = |
e23106d5 | 210 | scm_i_string_hash (obj) % n; |
8824ac88 MV |
211 | return hash; |
212 | } | |
28b06554 | 213 | case scm_tc7_symbol: |
cc95e00a | 214 | return scm_i_symbol_hash (obj) % n; |
3854d5fd LC |
215 | case scm_tc7_pointer: |
216 | { | |
217 | /* Pointer objects are typically used to store addresses of heap | |
218 | objects. On most platforms, these are at least 3-byte | |
219 | aligned (on x86_64-*-gnu, `malloc' returns 4-byte aligned | |
220 | addresses), so get rid of the least significant bits. */ | |
221 | scm_t_uintptr significant_bits; | |
222 | ||
223 | significant_bits = (scm_t_uintptr) SCM_POINTER_VALUE (obj) >> 4UL; | |
224 | return (size_t) significant_bits % n; | |
225 | } | |
0f2d19dd JB |
226 | case scm_tc7_wvect: |
227 | case scm_tc7_vector: | |
228 | { | |
4057a3e0 | 229 | size_t len = SCM_SIMPLE_VECTOR_LENGTH (obj); |
1be6b49c | 230 | if (len > 5) |
0f2d19dd | 231 | { |
1be6b49c | 232 | size_t i = d/2; |
c014a02e | 233 | unsigned long h = 1; |
4057a3e0 MV |
234 | while (i--) |
235 | { | |
236 | SCM elt = SCM_SIMPLE_VECTOR_REF (obj, h % len); | |
237 | h = ((h << 8) + (scm_hasher (elt, n, 2))) % n; | |
238 | } | |
0f2d19dd JB |
239 | return h; |
240 | } | |
241 | else | |
242 | { | |
1be6b49c | 243 | size_t i = len; |
c014a02e | 244 | unsigned long h = (n)-1; |
4057a3e0 MV |
245 | while (i--) |
246 | { | |
247 | SCM elt = SCM_SIMPLE_VECTOR_REF (obj, h % len); | |
248 | h = ((h << 8) + (scm_hasher (elt, n, d/len))) % n; | |
249 | } | |
0f2d19dd JB |
250 | return h; |
251 | } | |
252 | } | |
94a5efac GB |
253 | case scm_tcs_cons_imcar: |
254 | case scm_tcs_cons_nimcar: | |
1be6b49c ML |
255 | if (d) return (scm_hasher (SCM_CAR (obj), n, d/2) |
256 | + scm_hasher (SCM_CDR (obj), n, d/2)) % n; | |
0f2d19dd JB |
257 | else return 1; |
258 | case scm_tc7_port: | |
206d3de3 | 259 | return ((SCM_RDNG & SCM_CELL_WORD_0 (obj)) ? 260 : 261) % n; |
cc7005bc | 260 | case scm_tc7_program: |
0f2d19dd JB |
261 | return 262 % n; |
262 | } | |
263 | } | |
264 | } | |
265 | ||
266 | ||
267 | \f | |
268 | ||
1cc91f1b | 269 | |
c014a02e ML |
270 | unsigned long |
271 | scm_ihashq (SCM obj, unsigned long n) | |
0f2d19dd | 272 | { |
54778cd3 | 273 | return (SCM_UNPACK (obj) >> 1) % n; |
0f2d19dd JB |
274 | } |
275 | ||
276 | ||
3b3b36dd | 277 | SCM_DEFINE (scm_hashq, "hashq", 2, 0, 0, |
94a5efac | 278 | (SCM key, SCM size), |
5352393c MG |
279 | "Determine a hash value for @var{key} that is suitable for\n" |
280 | "lookups in a hashtable of size @var{size}, where @code{eq?} is\n" | |
281 | "used as the equality predicate. The function returns an\n" | |
282 | "integer in the range 0 to @var{size} - 1. Note that\n" | |
283 | "@code{hashq} may use internal addresses. Thus two calls to\n" | |
284 | "hashq where the keys are @code{eq?} are not guaranteed to\n" | |
285 | "deliver the same value if the key object gets garbage collected\n" | |
286 | "in between. This can happen, for example with symbols:\n" | |
287 | "@code{(hashq 'foo n) (gc) (hashq 'foo n)} may produce two\n" | |
288 | "different values, since @code{foo} will be garbage collected.") | |
1bbd0b84 | 289 | #define FUNC_NAME s_scm_hashq |
0f2d19dd | 290 | { |
a55c2b68 MV |
291 | unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); |
292 | return scm_from_ulong (scm_ihashq (key, sz)); | |
0f2d19dd | 293 | } |
1bbd0b84 | 294 | #undef FUNC_NAME |
0f2d19dd JB |
295 | |
296 | ||
297 | \f | |
298 | ||
1cc91f1b | 299 | |
c014a02e ML |
300 | unsigned long |
301 | scm_ihashv (SCM obj, unsigned long n) | |
0f2d19dd | 302 | { |
7866a09b | 303 | if (SCM_CHARP(obj)) |
84fad130 | 304 | return ((unsigned long) (scm_c_downcase (SCM_CHAR (obj)))) % n; /* downcase!?!! */ |
0f2d19dd | 305 | |
0c95b57d | 306 | if (SCM_NUMP(obj)) |
c014a02e | 307 | return (unsigned long) scm_hasher(obj, n, 10); |
0f2d19dd | 308 | else |
54778cd3 | 309 | return SCM_UNPACK (obj) % n; |
0f2d19dd JB |
310 | } |
311 | ||
312 | ||
3b3b36dd | 313 | SCM_DEFINE (scm_hashv, "hashv", 2, 0, 0, |
94a5efac | 314 | (SCM key, SCM size), |
5352393c MG |
315 | "Determine a hash value for @var{key} that is suitable for\n" |
316 | "lookups in a hashtable of size @var{size}, where @code{eqv?} is\n" | |
317 | "used as the equality predicate. The function returns an\n" | |
318 | "integer in the range 0 to @var{size} - 1. Note that\n" | |
319 | "@code{(hashv key)} may use internal addresses. Thus two calls\n" | |
320 | "to hashv where the keys are @code{eqv?} are not guaranteed to\n" | |
321 | "deliver the same value if the key object gets garbage collected\n" | |
322 | "in between. This can happen, for example with symbols:\n" | |
323 | "@code{(hashv 'foo n) (gc) (hashv 'foo n)} may produce two\n" | |
324 | "different values, since @code{foo} will be garbage collected.") | |
1bbd0b84 | 325 | #define FUNC_NAME s_scm_hashv |
0f2d19dd | 326 | { |
a55c2b68 MV |
327 | unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); |
328 | return scm_from_ulong (scm_ihashv (key, sz)); | |
0f2d19dd | 329 | } |
1bbd0b84 | 330 | #undef FUNC_NAME |
0f2d19dd JB |
331 | |
332 | ||
333 | \f | |
334 | ||
1cc91f1b | 335 | |
c014a02e ML |
336 | unsigned long |
337 | scm_ihash (SCM obj, unsigned long n) | |
0f2d19dd | 338 | { |
c014a02e | 339 | return (unsigned long) scm_hasher (obj, n, 10); |
0f2d19dd JB |
340 | } |
341 | ||
3b3b36dd | 342 | SCM_DEFINE (scm_hash, "hash", 2, 0, 0, |
94a5efac | 343 | (SCM key, SCM size), |
5352393c MG |
344 | "Determine a hash value for @var{key} that is suitable for\n" |
345 | "lookups in a hashtable of size @var{size}, where @code{equal?}\n" | |
346 | "is used as the equality predicate. The function returns an\n" | |
347 | "integer in the range 0 to @var{size} - 1.") | |
1bbd0b84 | 348 | #define FUNC_NAME s_scm_hash |
0f2d19dd | 349 | { |
a55c2b68 MV |
350 | unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); |
351 | return scm_from_ulong (scm_ihash (key, sz)); | |
0f2d19dd | 352 | } |
1bbd0b84 | 353 | #undef FUNC_NAME |
0f2d19dd JB |
354 | |
355 | ||
356 | \f | |
357 | ||
1cc91f1b | 358 | |
0f2d19dd JB |
359 | void |
360 | scm_init_hash () | |
0f2d19dd | 361 | { |
a0599745 | 362 | #include "libguile/hash.x" |
0f2d19dd JB |
363 | } |
364 | ||
89e00824 ML |
365 | |
366 | /* | |
367 | Local Variables: | |
368 | c-file-style: "gnu" | |
369 | End: | |
370 | */ |