Merge branch 'master' into boehm-demers-weiser-gc
[bpt/guile.git] / libguile / symbols.c
1 /* Copyright (C) 1995,1996,1997,1998,2000,2001, 2003, 2004, 2006, 2009 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
17 */
18
19
20 \f
21 #ifdef HAVE_CONFIG_H
22 # include <config.h>
23 #endif
24
25 #include "libguile/_scm.h"
26 #include "libguile/chars.h"
27 #include "libguile/eval.h"
28 #include "libguile/hash.h"
29 #include "libguile/smob.h"
30 #include "libguile/variable.h"
31 #include "libguile/alist.h"
32 #include "libguile/fluids.h"
33 #include "libguile/strings.h"
34 #include "libguile/vectors.h"
35 #include "libguile/hashtab.h"
36 #include "libguile/weaks.h"
37 #include "libguile/modules.h"
38 #include "libguile/read.h"
39 #include "libguile/srfi-13.h"
40
41 #include "libguile/validate.h"
42 #include "libguile/symbols.h"
43
44 #include "libguile/private-options.h"
45
46
47 #ifdef HAVE_STRING_H
48 #include <string.h>
49 #endif
50
51 \f
52
53 static SCM symbols;
54
55 #ifdef GUILE_DEBUG
56 SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
57 (),
58 "Return the system symbol obarray.")
59 #define FUNC_NAME s_scm_sys_symbols
60 {
61 return symbols;
62 }
63 #undef FUNC_NAME
64 #endif
65
66 \f
67
68 /* {Symbols}
69 */
70
71 /* In order to optimize reading speed, this function breaks part of
72 * the hashtable abstraction. The optimizations are:
73 *
74 * 1. The argument string can be compared directly to symbol objects
75 * without first creating an SCM string object. (This would have
76 * been necessary if we had used the hashtable API in hashtab.h.)
77 *
78 * 2. We can use the raw hash value stored in scm_i_symbol_hash (sym)
79 * to speed up lookup.
80 *
81 * Both optimizations might be possible without breaking the
82 * abstraction if the API in hashtab.c is improved.
83 */
84
85 unsigned long
86 scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
87 {
88 return scm_i_symbol_hash (obj) % n;
89 }
90
91 static SCM
92 lookup_interned_symbol (const char *name, size_t len,
93 unsigned long raw_hash)
94 {
95 /* Try to find the symbol in the symbols table */
96 SCM result = SCM_BOOL_F;
97 SCM bucket, elt, previous_elt;
98 unsigned long hash = raw_hash % SCM_HASHTABLE_N_BUCKETS (symbols);
99
100 bucket = SCM_HASHTABLE_BUCKET (symbols, hash);
101 for (elt = bucket, previous_elt = SCM_BOOL_F;
102 !scm_is_null (elt);
103 previous_elt = elt, elt = SCM_CDR (elt))
104 {
105 SCM pair, sym;
106
107 pair = SCM_CAR (elt);
108 if (!scm_is_pair (pair))
109 abort ();
110
111 if (SCM_WEAK_PAIR_CAR_DELETED_P (pair))
112 {
113 /* PAIR is a weak pair whose key got nullified: remove it from
114 BUCKET. */
115 /* FIXME: Since this is done lazily, i.e., only when a new symbol
116 is to be inserted in a bucket containing deleted symbols, the
117 number of items in the hash table may remain erroneous for some
118 time, thus precluding proper rehashing. */
119 if (previous_elt != SCM_BOOL_F)
120 SCM_SETCDR (previous_elt, SCM_CDR (elt));
121 else
122 bucket = SCM_CDR (elt);
123
124 SCM_HASHTABLE_DECREMENT (symbols);
125 continue;
126 }
127
128 sym = SCM_CAR (pair);
129
130 if (scm_i_symbol_hash (sym) == raw_hash
131 && scm_i_symbol_length (sym) == len)
132 {
133 const char *chrs = scm_i_symbol_chars (sym);
134 size_t i = len;
135
136 while (i != 0)
137 {
138 --i;
139 if (name[i] != chrs[i])
140 goto next_symbol;
141 }
142
143 /* We found it. */
144 result = sym;
145 break;
146 }
147 next_symbol:
148 ;
149 }
150
151 if (SCM_HASHTABLE_N_ITEMS (symbols) < SCM_HASHTABLE_LOWER (symbols))
152 /* We removed many symbols in this pass so trigger a rehashing. */
153 scm_i_rehash (symbols, scm_i_hash_symbol, 0, "lookup_interned_symbol");
154
155 return result;
156 }
157
158 /* Intern SYMBOL, an uninterned symbol. */
159 static void
160 intern_symbol (SCM symbol)
161 {
162 SCM slot, cell;
163 unsigned long hash;
164
165 hash = scm_i_symbol_hash (symbol) % SCM_HASHTABLE_N_BUCKETS (symbols);
166 slot = SCM_HASHTABLE_BUCKET (symbols, hash);
167 cell = scm_cons (symbol, SCM_UNDEFINED);
168
169 SCM_SET_HASHTABLE_BUCKET (symbols, hash, scm_cons (cell, slot));
170 SCM_HASHTABLE_INCREMENT (symbols);
171
172 if (SCM_HASHTABLE_N_ITEMS (symbols) > SCM_HASHTABLE_UPPER (symbols))
173 scm_i_rehash (symbols, scm_i_hash_symbol, 0, "intern_symbol");
174 }
175
176 static SCM
177 scm_i_c_mem2symbol (const char *name, size_t len)
178 {
179 SCM symbol;
180 size_t raw_hash = scm_string_hash ((const unsigned char *) name, len);
181
182 symbol = lookup_interned_symbol (name, len, raw_hash);
183 if (scm_is_false (symbol))
184 {
185 /* The symbol was not found, create it. */
186 symbol = scm_i_c_make_symbol (name, len, 0, raw_hash,
187 scm_cons (SCM_BOOL_F, SCM_EOL));
188 intern_symbol (symbol);
189 }
190
191 return symbol;
192 }
193
194 static SCM
195 scm_i_mem2symbol (SCM str)
196 {
197 SCM symbol;
198 const char *name = scm_i_string_chars (str);
199 size_t len = scm_i_string_length (str);
200 size_t raw_hash = scm_string_hash ((const unsigned char *) name, len);
201
202 symbol = lookup_interned_symbol (name, len, raw_hash);
203 if (scm_is_false (symbol))
204 {
205 /* The symbol was not found, create it. */
206 symbol = scm_i_make_symbol (str, 0, raw_hash,
207 scm_cons (SCM_BOOL_F, SCM_EOL));
208 intern_symbol (symbol);
209 }
210
211 return symbol;
212 }
213
214
215 static SCM
216 scm_i_mem2uninterned_symbol (SCM str)
217 {
218 const char *name = scm_i_string_chars (str);
219 size_t len = scm_i_string_length (str);
220 size_t raw_hash = scm_string_hash ((const unsigned char *) name, len);
221
222 return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED,
223 raw_hash, scm_cons (SCM_BOOL_F, SCM_EOL));
224 }
225
226 SCM_DEFINE (scm_symbol_p, "symbol?", 1, 0, 0,
227 (SCM obj),
228 "Return @code{#t} if @var{obj} is a symbol, otherwise return\n"
229 "@code{#f}.")
230 #define FUNC_NAME s_scm_symbol_p
231 {
232 return scm_from_bool (scm_is_symbol (obj));
233 }
234 #undef FUNC_NAME
235
236 SCM_DEFINE (scm_symbol_interned_p, "symbol-interned?", 1, 0, 0,
237 (SCM symbol),
238 "Return @code{#t} if @var{symbol} is interned, otherwise return\n"
239 "@code{#f}.")
240 #define FUNC_NAME s_scm_symbol_interned_p
241 {
242 SCM_VALIDATE_SYMBOL (1, symbol);
243 return scm_from_bool (scm_i_symbol_is_interned (symbol));
244 }
245 #undef FUNC_NAME
246
247 SCM_DEFINE (scm_make_symbol, "make-symbol", 1, 0, 0,
248 (SCM name),
249 "Return a new uninterned symbol with the name @var{name}. "
250 "The returned symbol is guaranteed to be unique and future "
251 "calls to @code{string->symbol} will not return it.")
252 #define FUNC_NAME s_scm_make_symbol
253 {
254 SCM_VALIDATE_STRING (1, name);
255 return scm_i_mem2uninterned_symbol (name);
256 }
257 #undef FUNC_NAME
258
259 SCM_DEFINE (scm_symbol_to_string, "symbol->string", 1, 0, 0,
260 (SCM s),
261 "Return the name of @var{symbol} as a string. If the symbol was\n"
262 "part of an object returned as the value of a literal expression\n"
263 "(section @pxref{Literal expressions,,,r5rs, The Revised^5\n"
264 "Report on Scheme}) or by a call to the @code{read} procedure,\n"
265 "and its name contains alphabetic characters, then the string\n"
266 "returned will contain characters in the implementation's\n"
267 "preferred standard case---some implementations will prefer\n"
268 "upper case, others lower case. If the symbol was returned by\n"
269 "@code{string->symbol}, the case of characters in the string\n"
270 "returned will be the same as the case in the string that was\n"
271 "passed to @code{string->symbol}. It is an error to apply\n"
272 "mutation procedures like @code{string-set!} to strings returned\n"
273 "by this procedure.\n"
274 "\n"
275 "The following examples assume that the implementation's\n"
276 "standard case is lower case:\n"
277 "\n"
278 "@lisp\n"
279 "(symbol->string 'flying-fish) @result{} \"flying-fish\"\n"
280 "(symbol->string 'Martin) @result{} \"martin\"\n"
281 "(symbol->string\n"
282 " (string->symbol \"Malvina\")) @result{} \"Malvina\"\n"
283 "@end lisp")
284 #define FUNC_NAME s_scm_symbol_to_string
285 {
286 SCM_VALIDATE_SYMBOL (1, s);
287 return scm_i_symbol_substring (s, 0, scm_i_symbol_length (s));
288 }
289 #undef FUNC_NAME
290
291
292 SCM_DEFINE (scm_string_to_symbol, "string->symbol", 1, 0, 0,
293 (SCM string),
294 "Return the symbol whose name is @var{string}. This procedure\n"
295 "can create symbols with names containing special characters or\n"
296 "letters in the non-standard case, but it is usually a bad idea\n"
297 "to create such symbols because in some implementations of\n"
298 "Scheme they cannot be read as themselves. See\n"
299 "@code{symbol->string}.\n"
300 "\n"
301 "The following examples assume that the implementation's\n"
302 "standard case is lower case:\n"
303 "\n"
304 "@lisp\n"
305 "(eq? 'mISSISSIppi 'mississippi) @result{} #t\n"
306 "(string->symbol \"mISSISSIppi\") @result{} @r{the symbol with name \"mISSISSIppi\"}\n"
307 "(eq? 'bitBlt (string->symbol \"bitBlt\")) @result{} #f\n"
308 "(eq? 'JollyWog\n"
309 " (string->symbol (symbol->string 'JollyWog))) @result{} #t\n"
310 "(string=? \"K. Harper, M.D.\"\n"
311 " (symbol->string\n"
312 " (string->symbol \"K. Harper, M.D.\"))) @result{}#t\n"
313 "@end lisp")
314 #define FUNC_NAME s_scm_string_to_symbol
315 {
316 SCM_VALIDATE_STRING (1, string);
317 return scm_i_mem2symbol (string);
318 }
319 #undef FUNC_NAME
320
321 SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0,
322 (SCM str),
323 "Return the symbol whose name is @var{str}. @var{str} is\n"
324 "converted to lowercase before the conversion is done, if Guile\n"
325 "is currently reading symbols case-insensitively.")
326 #define FUNC_NAME s_scm_string_ci_to_symbol
327 {
328 return scm_string_to_symbol (SCM_CASE_INSENSITIVE_P
329 ? scm_string_downcase(str)
330 : str);
331 }
332 #undef FUNC_NAME
333
334 #define MAX_PREFIX_LENGTH 30
335
336 SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
337 (SCM prefix),
338 "Create a new symbol with a name constructed from a prefix and\n"
339 "a counter value. The string @var{prefix} can be specified as\n"
340 "an optional argument. Default prefix is @code{ g}. The counter\n"
341 "is increased by 1 at each call. There is no provision for\n"
342 "resetting the counter.")
343 #define FUNC_NAME s_scm_gensym
344 {
345 static int gensym_counter = 0;
346
347 SCM suffix, name;
348 int n, n_digits;
349 char buf[SCM_INTBUFLEN];
350
351 if (SCM_UNBNDP (prefix))
352 prefix = scm_from_locale_string (" g");
353
354 /* mutex in case another thread looks and incs at the exact same moment */
355 scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex);
356 n = gensym_counter++;
357 scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
358
359 n_digits = scm_iint2str (n, 10, buf);
360 suffix = scm_from_locale_stringn (buf, n_digits);
361 name = scm_string_append (scm_list_2 (prefix, suffix));
362 return scm_string_to_symbol (name);
363 }
364 #undef FUNC_NAME
365
366 SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0,
367 (SCM symbol),
368 "Return a hash value for @var{symbol}.")
369 #define FUNC_NAME s_scm_symbol_hash
370 {
371 SCM_VALIDATE_SYMBOL (1, symbol);
372 return scm_from_ulong (scm_i_symbol_hash (symbol));
373 }
374 #undef FUNC_NAME
375
376 SCM_DEFINE (scm_symbol_fref, "symbol-fref", 1, 0, 0,
377 (SCM s),
378 "Return the contents of @var{symbol}'s @dfn{function slot}.")
379 #define FUNC_NAME s_scm_symbol_fref
380 {
381 SCM_VALIDATE_SYMBOL (1, s);
382 return SCM_CAR (SCM_CELL_OBJECT_3 (s));
383 }
384 #undef FUNC_NAME
385
386
387 SCM_DEFINE (scm_symbol_pref, "symbol-pref", 1, 0, 0,
388 (SCM s),
389 "Return the @dfn{property list} currently associated with @var{symbol}.")
390 #define FUNC_NAME s_scm_symbol_pref
391 {
392 SCM_VALIDATE_SYMBOL (1, s);
393 return SCM_CDR (SCM_CELL_OBJECT_3 (s));
394 }
395 #undef FUNC_NAME
396
397
398 SCM_DEFINE (scm_symbol_fset_x, "symbol-fset!", 2, 0, 0,
399 (SCM s, SCM val),
400 "Change the binding of @var{symbol}'s function slot.")
401 #define FUNC_NAME s_scm_symbol_fset_x
402 {
403 SCM_VALIDATE_SYMBOL (1, s);
404 SCM_SETCAR (SCM_CELL_OBJECT_3 (s), val);
405 return SCM_UNSPECIFIED;
406 }
407 #undef FUNC_NAME
408
409
410 SCM_DEFINE (scm_symbol_pset_x, "symbol-pset!", 2, 0, 0,
411 (SCM s, SCM val),
412 "Change the binding of @var{symbol}'s property slot.")
413 #define FUNC_NAME s_scm_symbol_pset_x
414 {
415 SCM_VALIDATE_SYMBOL (1, s);
416 SCM_SETCDR (SCM_CELL_OBJECT_3 (s), val);
417 return SCM_UNSPECIFIED;
418 }
419 #undef FUNC_NAME
420
421 SCM
422 scm_from_locale_symbol (const char *sym)
423 {
424 return scm_i_c_mem2symbol (sym, strlen (sym));
425 }
426
427 SCM
428 scm_from_locale_symboln (const char *sym, size_t len)
429 {
430 return scm_i_c_mem2symbol (sym, len);
431 }
432
433 SCM
434 scm_take_locale_symboln (char *sym, size_t len)
435 {
436 SCM res;
437 unsigned long raw_hash;
438
439 if (len == (size_t)-1)
440 len = strlen (sym);
441 else
442 {
443 /* Ensure STR is null terminated. A realloc for 1 extra byte should
444 often be satisfied from the alignment padding after the block, with
445 no actual data movement. */
446 sym = scm_realloc (sym, len+1);
447 sym[len] = '\0';
448 }
449
450 raw_hash = scm_string_hash ((unsigned char *)sym, len);
451 res = lookup_interned_symbol (sym, len, raw_hash);
452 if (scm_is_false (res))
453 {
454 res = scm_i_c_take_symbol (sym, len, 0, raw_hash,
455 scm_cons (SCM_BOOL_F, SCM_EOL));
456 intern_symbol (res);
457 }
458 else
459 free (sym);
460
461 return res;
462 }
463
464 SCM
465 scm_take_locale_symbol (char *sym)
466 {
467 return scm_take_locale_symboln (sym, (size_t)-1);
468 }
469
470 void
471 scm_symbols_prehistory ()
472 {
473 symbols = scm_make_weak_key_hash_table (scm_from_int (2139));
474 }
475
476
477 void
478 scm_init_symbols ()
479 {
480 #include "libguile/symbols.x"
481 }
482
483 /*
484 Local Variables:
485 c-file-style: "gnu"
486 End:
487 */