multiple obarrays
[bpt/guile.git] / libguile / symbols.c
1 /* Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2003, 2004,
2 * 2006, 2009, 2011, 2013 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20
21 \f
22 #ifdef HAVE_CONFIG_H
23 # include <config.h>
24 #endif
25
26 #include <unistr.h>
27
28 #include "libguile/_scm.h"
29 #include "libguile/chars.h"
30 #include "libguile/eval.h"
31 #include "libguile/hash.h"
32 #include "libguile/smob.h"
33 #include "libguile/variable.h"
34 #include "libguile/alist.h"
35 #include "libguile/fluids.h"
36 #include "libguile/strings.h"
37 #include "libguile/vectors.h"
38 #include "libguile/weak-set.h"
39 #include "libguile/modules.h"
40 #include "libguile/read.h"
41 #include "libguile/srfi-13.h"
42
43 #include "libguile/validate.h"
44 #include "libguile/symbols.h"
45
46 #include "libguile/private-options.h"
47
48
49 #ifdef HAVE_STRING_H
50 #include <string.h>
51 #endif
52
53 \f
54
55 static SCM symbols;
56
57 #ifdef GUILE_DEBUG
58 SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
59 (),
60 "Return the system symbol obarray.")
61 #define FUNC_NAME s_scm_sys_symbols
62 {
63 return symbols;
64 }
65 #undef FUNC_NAME
66 #endif
67
68 \f
69
70 /* {Symbols}
71 */
72
73 unsigned long
74 scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
75 {
76 return scm_i_symbol_hash (obj) % n;
77 }
78
79 struct string_lookup_data
80 {
81 SCM string;
82 unsigned long string_hash;
83 };
84
85 static int
86 string_lookup_predicate_fn (SCM sym, void *closure)
87 {
88 struct string_lookup_data *data = closure;
89
90 if (scm_i_symbol_hash (sym) == data->string_hash
91 && scm_i_symbol_length (sym) == scm_i_string_length (data->string))
92 {
93 size_t n = scm_i_symbol_length (sym);
94 while (n--)
95 if (scm_i_symbol_ref (sym, n) != scm_i_string_ref (data->string, n))
96 return 0;
97 return 1;
98 }
99 else
100 return 0;
101 }
102
103 static SCM
104 lookup_interned_symbol (SCM name, unsigned long raw_hash, SCM obarray)
105 {
106 struct string_lookup_data data;
107
108 data.string = name;
109 data.string_hash = raw_hash;
110
111 return scm_c_weak_set_lookup (obarray, raw_hash,
112 string_lookup_predicate_fn,
113 &data, SCM_BOOL_F);
114 }
115
116 struct latin1_lookup_data
117 {
118 const char *str;
119 size_t len;
120 unsigned long string_hash;
121 };
122
123 static int
124 latin1_lookup_predicate_fn (SCM sym, void *closure)
125 {
126 struct latin1_lookup_data *data = closure;
127
128 return scm_i_symbol_hash (sym) == data->string_hash
129 && scm_i_is_narrow_symbol (sym)
130 && scm_i_symbol_length (sym) == data->len
131 && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0;
132 }
133
134 static SCM
135 lookup_interned_latin1_symbol (const char *str, size_t len,
136 unsigned long raw_hash,
137 SCM obarray)
138 {
139 struct latin1_lookup_data data;
140
141 data.str = str;
142 data.len = len;
143 data.string_hash = raw_hash;
144
145 return scm_c_weak_set_lookup (obarray, raw_hash,
146 latin1_lookup_predicate_fn,
147 &data, SCM_BOOL_F);
148 }
149
150 struct utf8_lookup_data
151 {
152 const char *str;
153 size_t len;
154 unsigned long string_hash;
155 };
156
157 static int
158 utf8_string_equals_wide_string (const scm_t_uint8 *narrow, size_t nlen,
159 const scm_t_wchar *wide, size_t wlen)
160 {
161 size_t byte_idx = 0, char_idx = 0;
162
163 while (byte_idx < nlen && char_idx < wlen)
164 {
165 ucs4_t c;
166 int nbytes;
167
168 nbytes = u8_mbtouc (&c, narrow + byte_idx, nlen - byte_idx);
169 if (nbytes == 0)
170 break;
171 else if (c == 0xfffd)
172 /* Bad UTF-8. */
173 return 0;
174 else if (c != wide[char_idx])
175 return 0;
176
177 byte_idx += nbytes;
178 char_idx++;
179 }
180
181 return byte_idx == nlen && char_idx == wlen;
182 }
183
184 static int
185 utf8_lookup_predicate_fn (SCM sym, void *closure)
186 {
187 struct utf8_lookup_data *data = closure;
188
189 if (scm_i_symbol_hash (sym) != data->string_hash)
190 return 0;
191
192 if (scm_i_is_narrow_symbol (sym))
193 return (scm_i_symbol_length (sym) == data->len
194 && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0);
195 else
196 return utf8_string_equals_wide_string ((const scm_t_uint8 *) data->str,
197 data->len,
198 scm_i_symbol_wide_chars (sym),
199 scm_i_symbol_length (sym));
200 }
201
202 static SCM
203 lookup_interned_utf8_symbol (const char *str, size_t len,
204 unsigned long raw_hash,
205 SCM obarray)
206 {
207 struct utf8_lookup_data data;
208
209 data.str = str;
210 data.len = len;
211 data.string_hash = raw_hash;
212
213 return scm_c_weak_set_lookup (obarray, raw_hash,
214 utf8_lookup_predicate_fn,
215 &data, SCM_BOOL_F);
216 }
217
218 static int
219 symbol_lookup_predicate_fn (SCM sym, void *closure)
220 {
221 SCM other = SCM_PACK_POINTER (closure);
222
223 if (scm_i_symbol_hash (sym) == scm_i_symbol_hash (other)
224 && scm_i_symbol_length (sym) == scm_i_symbol_length (other))
225 {
226 if (scm_i_is_narrow_symbol (sym))
227 return scm_i_is_narrow_symbol (other)
228 && (strncmp (scm_i_symbol_chars (sym),
229 scm_i_symbol_chars (other),
230 scm_i_symbol_length (other)) == 0);
231 else
232 return scm_is_true
233 (scm_string_equal_p (scm_symbol_to_string (sym),
234 scm_symbol_to_string (other)));
235 }
236 return 0;
237 }
238
239 static SCM
240 scm_i_str2symbol (SCM str, SCM obarray)
241 {
242 SCM symbol;
243 size_t raw_hash = scm_i_string_hash (str);
244
245 symbol = lookup_interned_symbol (str, raw_hash, obarray);
246 if (scm_is_true (symbol))
247 return symbol;
248 else
249 {
250 /* The symbol was not found, create it. */
251 symbol = scm_i_make_symbol (str, 0, raw_hash,
252 scm_cons (SCM_BOOL_F, SCM_EOL));
253
254 /* Might return a different symbol, if another one was interned at
255 the same time. */
256 return scm_c_weak_set_add_x (obarray, raw_hash,
257 symbol_lookup_predicate_fn,
258 SCM_UNPACK_POINTER (symbol), symbol);
259 }
260 }
261
262
263 static SCM
264 scm_i_str2uninterned_symbol (SCM str)
265 {
266 size_t raw_hash = scm_i_string_hash (str);
267
268 return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED,
269 raw_hash, scm_cons (SCM_BOOL_F, SCM_EOL));
270 }
271
272 SCM_DEFINE (scm_symbol_p, "symbol?", 1, 0, 0,
273 (SCM obj),
274 "Return @code{#t} if @var{obj} is a symbol, otherwise return\n"
275 "@code{#f}.")
276 #define FUNC_NAME s_scm_symbol_p
277 {
278 return scm_from_bool (scm_is_symbol (obj));
279 }
280 #undef FUNC_NAME
281
282 SCM_DEFINE (scm_symbol_interned_p, "symbol-interned?", 1, 0, 0,
283 (SCM symbol),
284 "Return @code{#t} if @var{symbol} is interned, otherwise return\n"
285 "@code{#f}.")
286 #define FUNC_NAME s_scm_symbol_interned_p
287 {
288 SCM_VALIDATE_SYMBOL (1, symbol);
289 return scm_from_bool (scm_i_symbol_is_interned (symbol));
290 }
291 #undef FUNC_NAME
292
293 SCM_DEFINE (scm_make_symbol, "make-symbol", 1, 0, 0,
294 (SCM name),
295 "Return a new uninterned symbol with the name @var{name}. "
296 "The returned symbol is guaranteed to be unique and future "
297 "calls to @code{string->symbol} will not return it.")
298 #define FUNC_NAME s_scm_make_symbol
299 {
300 SCM_VALIDATE_STRING (1, name);
301 return scm_i_str2uninterned_symbol (name);
302 }
303 #undef FUNC_NAME
304
305 SCM_DEFINE (scm_symbol_to_string, "symbol->string", 1, 0, 0,
306 (SCM s),
307 "Return the name of @var{symbol} as a string. If the symbol was\n"
308 "part of an object returned as the value of a literal expression\n"
309 "(section @pxref{Literal expressions,,,r5rs, The Revised^5\n"
310 "Report on Scheme}) or by a call to the @code{read} procedure,\n"
311 "and its name contains alphabetic characters, then the string\n"
312 "returned will contain characters in the implementation's\n"
313 "preferred standard case---some implementations will prefer\n"
314 "upper case, others lower case. If the symbol was returned by\n"
315 "@code{string->symbol}, the case of characters in the string\n"
316 "returned will be the same as the case in the string that was\n"
317 "passed to @code{string->symbol}. It is an error to apply\n"
318 "mutation procedures like @code{string-set!} to strings returned\n"
319 "by this procedure.\n"
320 "\n"
321 "The following examples assume that the implementation's\n"
322 "standard case is lower case:\n"
323 "\n"
324 "@lisp\n"
325 "(symbol->string 'flying-fish) @result{} \"flying-fish\"\n"
326 "(symbol->string 'Martin) @result{} \"martin\"\n"
327 "(symbol->string\n"
328 " (string->symbol \"Malvina\")) @result{} \"Malvina\"\n"
329 "@end lisp")
330 #define FUNC_NAME s_scm_symbol_to_string
331 {
332 SCM_VALIDATE_SYMBOL (1, s);
333 return scm_i_symbol_substring (s, 0, scm_i_symbol_length (s));
334 }
335 #undef FUNC_NAME
336
337
338 SCM_DEFINE (scm_string_to_symbol, "string->symbol", 1, 0, 0,
339 (SCM string),
340 "Return the symbol whose name is @var{string}. This procedure\n"
341 "can create symbols with names containing special characters or\n"
342 "letters in the non-standard case, but it is usually a bad idea\n"
343 "to create such symbols because in some implementations of\n"
344 "Scheme they cannot be read as themselves. See\n"
345 "@code{symbol->string}.\n"
346 "\n"
347 "The following examples assume that the implementation's\n"
348 "standard case is lower case:\n"
349 "\n"
350 "@lisp\n"
351 "(eq? 'mISSISSIppi 'mississippi) @result{} #t\n"
352 "(string->symbol \"mISSISSIppi\") @result{} @r{the symbol with name \"mISSISSIppi\"}\n"
353 "(eq? 'bitBlt (string->symbol \"bitBlt\")) @result{} #f\n"
354 "(eq? 'JollyWog\n"
355 " (string->symbol (symbol->string 'JollyWog))) @result{} #t\n"
356 "(string=? \"K. Harper, M.D.\"\n"
357 " (symbol->string\n"
358 " (string->symbol \"K. Harper, M.D.\"))) @result{}#t\n"
359 "@end lisp")
360 #define FUNC_NAME s_scm_string_to_symbol
361 {
362 SCM_VALIDATE_STRING (1, string);
363 return scm_i_str2symbol (string, symbols);
364 }
365 #undef FUNC_NAME
366
367 SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0,
368 (SCM str),
369 "Return the symbol whose name is @var{str}. @var{str} is\n"
370 "converted to lowercase before the conversion is done, if Guile\n"
371 "is currently reading symbols case-insensitively.")
372 #define FUNC_NAME s_scm_string_ci_to_symbol
373 {
374 return scm_string_to_symbol (SCM_CASE_INSENSITIVE_P
375 ? scm_string_downcase(str)
376 : str);
377 }
378 #undef FUNC_NAME
379
380 SCM_DEFINE (scm_make_obarray, "make-obarray", 0, 0, 0,
381 (void),
382 "Return a fresh obarray.")
383 #define FUNC_NAME s_scm_make_obarray
384 {
385 return scm_c_make_weak_set (0);
386 }
387 #undef FUNC_NAME
388
389 SCM_DEFINE (scm_find_symbol, "find-symbol", 1, 1, 0,
390 (SCM string, SCM obarray),
391 "Return the symbol named @var{string} if it is present in\n"
392 "@var{obarray}. Return false otherwise.")
393 #define FUNC_NAME s_scm_find_symbol
394 {
395 if (SCM_UNBNDP (obarray))
396 obarray = symbols;
397
398 return lookup_interned_symbol (string,
399 scm_i_string_hash (string),
400 obarray);
401 }
402 #undef FUNC_NAME
403
404 SCM_DEFINE (scm_intern, "intern", 1, 1, 0,
405 (SCM string, SCM obarray),
406 "Intern @var{string} in @var{obarray}.")
407 #define FUNC_NAME s_scm_intern
408 {
409 if (SCM_UNBNDP (obarray))
410 obarray = symbols;
411
412 SCM_VALIDATE_STRING (1, string);
413 return scm_i_str2symbol (string, obarray);
414 }
415 #undef FUNC_NAME
416
417 SCM_DEFINE (scm_unintern, "unintern", 1, 1, 0,
418 (SCM symbol, SCM obarray),
419 "Unintern @var{symbol} from @var{obarray}.")
420 #define FUNC_NAME s_scm_unintern
421 {
422 if (SCM_UNBNDP (obarray))
423 obarray = symbols;
424
425 scm_weak_set_remove_x (obarray, symbol);
426 return SCM_UNSPECIFIED;
427 }
428 #undef FUNC_NAME
429
430 SCM_DEFINE (scm_obarray_for_each, "obarray-for-each", 1, 1, 0,
431 (SCM proc, SCM obarray),
432 "")
433 #define FUNC_NAME s_scm_obarray_for_each
434 {
435 if (SCM_UNBNDP (obarray))
436 obarray = symbols;
437
438 scm_weak_set_for_each (proc, obarray);
439 return SCM_UNSPECIFIED;
440 }
441 #undef FUNC_NAME
442
443 /* The default prefix for `gensym'd symbols. */
444 static SCM default_gensym_prefix;
445
446 #define MAX_PREFIX_LENGTH 30
447
448 SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
449 (SCM prefix),
450 "Create a new symbol with a name constructed from a prefix and\n"
451 "a counter value. The string @var{prefix} can be specified as\n"
452 "an optional argument. Default prefix is @code{ g}. The counter\n"
453 "is increased by 1 at each call. There is no provision for\n"
454 "resetting the counter.")
455 #define FUNC_NAME s_scm_gensym
456 {
457 static int gensym_counter = 0;
458
459 SCM suffix, name;
460 int n, n_digits;
461 char buf[SCM_INTBUFLEN];
462
463 if (SCM_UNBNDP (prefix))
464 prefix = default_gensym_prefix;
465
466 /* mutex in case another thread looks and incs at the exact same moment */
467 scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex);
468 n = gensym_counter++;
469 scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
470
471 n_digits = scm_iint2str (n, 10, buf);
472 suffix = scm_from_latin1_stringn (buf, n_digits);
473 name = scm_string_append (scm_list_2 (prefix, suffix));
474 return scm_string_to_symbol (name);
475 }
476 #undef FUNC_NAME
477
478 SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0,
479 (SCM symbol),
480 "Return a hash value for @var{symbol}.")
481 #define FUNC_NAME s_scm_symbol_hash
482 {
483 SCM_VALIDATE_SYMBOL (1, symbol);
484 return scm_from_ulong (scm_i_symbol_hash (symbol));
485 }
486 #undef FUNC_NAME
487
488 SCM_DEFINE (scm_symbol_fref, "symbol-fref", 1, 0, 0,
489 (SCM s),
490 "Return the contents of the symbol @var{s}'s @dfn{function slot}.")
491 #define FUNC_NAME s_scm_symbol_fref
492 {
493 SCM_VALIDATE_SYMBOL (1, s);
494 return SCM_CAR (SCM_CELL_OBJECT_3 (s));
495 }
496 #undef FUNC_NAME
497
498
499 SCM_DEFINE (scm_symbol_pref, "symbol-pref", 1, 0, 0,
500 (SCM s),
501 "Return the @dfn{property list} currently associated with the\n"
502 "symbol @var{s}.")
503 #define FUNC_NAME s_scm_symbol_pref
504 {
505 SCM_VALIDATE_SYMBOL (1, s);
506 return SCM_CDR (SCM_CELL_OBJECT_3 (s));
507 }
508 #undef FUNC_NAME
509
510
511 SCM_DEFINE (scm_symbol_fset_x, "symbol-fset!", 2, 0, 0,
512 (SCM s, SCM val),
513 "Change the binding of the symbol @var{s}'s function slot.")
514 #define FUNC_NAME s_scm_symbol_fset_x
515 {
516 SCM_VALIDATE_SYMBOL (1, s);
517 SCM_SETCAR (SCM_CELL_OBJECT_3 (s), val);
518 return SCM_UNSPECIFIED;
519 }
520 #undef FUNC_NAME
521
522
523 SCM_DEFINE (scm_symbol_pset_x, "symbol-pset!", 2, 0, 0,
524 (SCM s, SCM val),
525 "Change the binding of the symbol @var{s}'s property slot.")
526 #define FUNC_NAME s_scm_symbol_pset_x
527 {
528 SCM_VALIDATE_SYMBOL (1, s);
529 SCM_SETCDR (SCM_CELL_OBJECT_3 (s), val);
530 return SCM_UNSPECIFIED;
531 }
532 #undef FUNC_NAME
533
534 SCM
535 scm_from_locale_symbol (const char *sym)
536 {
537 return scm_from_locale_symboln (sym, -1);
538 }
539
540 SCM
541 scm_from_locale_symboln (const char *sym, size_t len)
542 {
543 SCM str = scm_from_locale_stringn (sym, len);
544 return scm_i_str2symbol (str, symbols);
545 }
546
547 SCM
548 scm_take_locale_symboln (char *sym, size_t len)
549 {
550 SCM str;
551
552 str = scm_take_locale_stringn (sym, len);
553 return scm_i_str2symbol (str, symbols);
554 }
555
556 SCM
557 scm_take_locale_symbol (char *sym)
558 {
559 return scm_take_locale_symboln (sym, (size_t)-1);
560 }
561
562 SCM
563 scm_from_latin1_symbol (const char *sym)
564 {
565 return scm_from_latin1_symboln (sym, -1);
566 }
567
568 SCM
569 scm_from_latin1_symboln (const char *sym, size_t len)
570 {
571 unsigned long hash;
572 SCM ret;
573
574 if (len == (size_t) -1)
575 len = strlen (sym);
576 hash = scm_i_latin1_string_hash (sym, len);
577
578 ret = lookup_interned_latin1_symbol (sym, len, hash, symbols);
579 if (scm_is_false (ret))
580 {
581 SCM str = scm_from_latin1_stringn (sym, len);
582 ret = scm_i_str2symbol (str, symbols);
583 }
584
585 return ret;
586 }
587
588 SCM
589 scm_from_utf8_symbol (const char *sym)
590 {
591 return scm_from_utf8_symboln (sym, -1);
592 }
593
594 SCM
595 scm_from_utf8_symboln (const char *sym, size_t len)
596 {
597 unsigned long hash;
598 SCM ret;
599
600 if (len == (size_t) -1)
601 len = strlen (sym);
602 hash = scm_i_utf8_string_hash (sym, len);
603
604 ret = lookup_interned_utf8_symbol (sym, len, hash, symbols);
605 if (scm_is_false (ret))
606 {
607 SCM str = scm_from_utf8_stringn (sym, len);
608 ret = scm_i_str2symbol (str, symbols);
609 }
610
611 return ret;
612 }
613
614 void
615 scm_symbols_prehistory ()
616 {
617 symbols = scm_c_make_weak_set (5000);
618 }
619
620
621 void
622 scm_init_symbols ()
623 {
624 #include "libguile/symbols.x"
625
626 default_gensym_prefix = scm_from_latin1_string (" g");
627 }
628
629 /*
630 Local Variables:
631 c-file-style: "gnu"
632 End:
633 */