Merge commit '3d51e57cfb0404db568a6adfde2a346d3fd9907e'
[bpt/guile.git] / libguile / symbols.c
1 /* Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2003, 2004,
2 * 2006, 2009, 2011 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20
21 \f
22 #ifdef HAVE_CONFIG_H
23 # include <config.h>
24 #endif
25
26 #include <unistr.h>
27
28 #include "libguile/_scm.h"
29 #include "libguile/chars.h"
30 #include "libguile/eval.h"
31 #include "libguile/hash.h"
32 #include "libguile/smob.h"
33 #include "libguile/variable.h"
34 #include "libguile/alist.h"
35 #include "libguile/fluids.h"
36 #include "libguile/threads.h"
37 #include "libguile/strings.h"
38 #include "libguile/vectors.h"
39 #include "libguile/weak-set.h"
40 #include "libguile/modules.h"
41 #include "libguile/read.h"
42 #include "libguile/srfi-13.h"
43
44 #include "libguile/validate.h"
45 #include "libguile/symbols.h"
46
47 #include "libguile/private-options.h"
48
49
50 #ifdef HAVE_STRING_H
51 #include <string.h>
52 #endif
53
54 \f
55
56 static SCM symbols;
57
58 #ifdef GUILE_DEBUG
59 SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
60 (),
61 "Return the system symbol obarray.")
62 #define FUNC_NAME s_scm_sys_symbols
63 {
64 return symbols;
65 }
66 #undef FUNC_NAME
67 #endif
68
69 \f
70
71 /* {Symbols}
72 */
73
74 unsigned long
75 scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
76 {
77 return scm_i_symbol_hash (obj) % n;
78 }
79
80 struct string_lookup_data
81 {
82 SCM string;
83 unsigned long string_hash;
84 };
85
86 static int
87 string_lookup_predicate_fn (SCM sym, void *closure)
88 {
89 struct string_lookup_data *data = closure;
90
91 if (scm_i_symbol_hash (sym) == data->string_hash
92 && scm_i_symbol_length (sym) == scm_i_string_length (data->string))
93 {
94 size_t n = scm_i_symbol_length (sym);
95 while (n--)
96 if (scm_i_symbol_ref (sym, n) != scm_i_string_ref (data->string, n))
97 return 0;
98 return 1;
99 }
100 else
101 return 0;
102 }
103
104 static SCM
105 lookup_interned_symbol (SCM name, unsigned long raw_hash)
106 {
107 struct string_lookup_data data;
108
109 data.string = name;
110 data.string_hash = raw_hash;
111
112 return scm_c_weak_set_lookup (symbols, raw_hash,
113 string_lookup_predicate_fn,
114 &data, SCM_BOOL_F);
115 }
116
117 struct latin1_lookup_data
118 {
119 const char *str;
120 size_t len;
121 unsigned long string_hash;
122 };
123
124 static int
125 latin1_lookup_predicate_fn (SCM sym, void *closure)
126 {
127 struct latin1_lookup_data *data = closure;
128
129 return scm_i_symbol_hash (sym) == data->string_hash
130 && scm_i_is_narrow_symbol (sym)
131 && scm_i_symbol_length (sym) == data->len
132 && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0;
133 }
134
135 static SCM
136 lookup_interned_latin1_symbol (const char *str, size_t len,
137 unsigned long raw_hash)
138 {
139 struct latin1_lookup_data data;
140
141 data.str = str;
142 data.len = len;
143 data.string_hash = raw_hash;
144
145 return scm_c_weak_set_lookup (symbols, raw_hash,
146 latin1_lookup_predicate_fn,
147 &data, SCM_BOOL_F);
148 }
149
150 struct utf8_lookup_data
151 {
152 const char *str;
153 size_t len;
154 unsigned long string_hash;
155 };
156
157 static int
158 utf8_string_equals_wide_string (const scm_t_uint8 *narrow, size_t nlen,
159 const scm_t_wchar *wide, size_t wlen)
160 {
161 size_t byte_idx = 0, char_idx = 0;
162
163 while (byte_idx < nlen && char_idx < wlen)
164 {
165 ucs4_t c;
166 int nbytes;
167
168 nbytes = u8_mbtouc (&c, narrow + byte_idx, nlen - byte_idx);
169 if (nbytes == 0)
170 break;
171 else if (nbytes < 0)
172 /* Bad UTF-8. */
173 return 0;
174 else if (c != wide[char_idx])
175 return 0;
176
177 byte_idx += nbytes;
178 char_idx++;
179 }
180
181 return byte_idx == nlen && char_idx == wlen;
182 }
183
184 static int
185 utf8_lookup_predicate_fn (SCM sym, void *closure)
186 {
187 struct utf8_lookup_data *data = closure;
188
189 if (scm_i_symbol_hash (sym) != data->string_hash)
190 return 0;
191
192 if (scm_i_is_narrow_symbol (sym))
193 return (scm_i_symbol_length (sym) == data->len
194 && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0);
195 else
196 return utf8_string_equals_wide_string ((const scm_t_uint8 *) data->str,
197 data->len,
198 scm_i_symbol_wide_chars (sym),
199 scm_i_symbol_length (sym));
200 }
201
202 static SCM
203 lookup_interned_utf8_symbol (const char *str, size_t len,
204 unsigned long raw_hash)
205 {
206 struct utf8_lookup_data data;
207
208 data.str = str;
209 data.len = len;
210 data.string_hash = raw_hash;
211
212 return scm_c_weak_set_lookup (symbols, raw_hash,
213 utf8_lookup_predicate_fn,
214 &data, SCM_BOOL_F);
215 }
216
217 static int
218 symbol_lookup_predicate_fn (SCM sym, void *closure)
219 {
220 SCM other = SCM_PACK_POINTER (closure);
221
222 if (scm_i_symbol_hash (sym) == scm_i_symbol_hash (other)
223 && scm_i_symbol_length (sym) == scm_i_symbol_length (other))
224 {
225 if (scm_i_is_narrow_symbol (sym))
226 return scm_i_is_narrow_symbol (other)
227 && (strncmp (scm_i_symbol_chars (sym),
228 scm_i_symbol_chars (other),
229 scm_i_symbol_length (other)) == 0);
230 else
231 return scm_is_true
232 (scm_string_equal_p (scm_symbol_to_string (sym),
233 scm_symbol_to_string (other)));
234 }
235 return 0;
236 }
237
238 static SCM
239 scm_i_str2symbol (SCM str)
240 {
241 SCM symbol;
242 size_t raw_hash = scm_i_string_hash (str);
243
244 symbol = lookup_interned_symbol (str, raw_hash);
245 if (scm_is_true (symbol))
246 return symbol;
247 else
248 {
249 /* The symbol was not found, create it. */
250 symbol = scm_i_make_symbol (str, 0, raw_hash,
251 scm_cons (SCM_BOOL_F, SCM_EOL));
252
253 /* Might return a different symbol, if another one was interned at
254 the same time. */
255 return scm_c_weak_set_add_x (symbols, raw_hash,
256 symbol_lookup_predicate_fn,
257 SCM_UNPACK_POINTER (symbol), symbol);
258 }
259 }
260
261
262 static SCM
263 scm_i_str2uninterned_symbol (SCM str)
264 {
265 size_t raw_hash = scm_i_string_hash (str);
266
267 return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED,
268 raw_hash, scm_cons (SCM_BOOL_F, SCM_EOL));
269 }
270
271 SCM_DEFINE (scm_symbol_p, "symbol?", 1, 0, 0,
272 (SCM obj),
273 "Return @code{#t} if @var{obj} is a symbol, otherwise return\n"
274 "@code{#f}.")
275 #define FUNC_NAME s_scm_symbol_p
276 {
277 return scm_from_bool (scm_is_symbol (obj));
278 }
279 #undef FUNC_NAME
280
281 SCM_DEFINE (scm_symbol_interned_p, "symbol-interned?", 1, 0, 0,
282 (SCM symbol),
283 "Return @code{#t} if @var{symbol} is interned, otherwise return\n"
284 "@code{#f}.")
285 #define FUNC_NAME s_scm_symbol_interned_p
286 {
287 SCM_VALIDATE_SYMBOL (1, symbol);
288 return scm_from_bool (scm_i_symbol_is_interned (symbol));
289 }
290 #undef FUNC_NAME
291
292 SCM_DEFINE (scm_make_symbol, "make-symbol", 1, 0, 0,
293 (SCM name),
294 "Return a new uninterned symbol with the name @var{name}. "
295 "The returned symbol is guaranteed to be unique and future "
296 "calls to @code{string->symbol} will not return it.")
297 #define FUNC_NAME s_scm_make_symbol
298 {
299 SCM_VALIDATE_STRING (1, name);
300 return scm_i_str2uninterned_symbol (name);
301 }
302 #undef FUNC_NAME
303
304 SCM_DEFINE (scm_symbol_to_string, "symbol->string", 1, 0, 0,
305 (SCM s),
306 "Return the name of @var{symbol} as a string. If the symbol was\n"
307 "part of an object returned as the value of a literal expression\n"
308 "(section @pxref{Literal expressions,,,r5rs, The Revised^5\n"
309 "Report on Scheme}) or by a call to the @code{read} procedure,\n"
310 "and its name contains alphabetic characters, then the string\n"
311 "returned will contain characters in the implementation's\n"
312 "preferred standard case---some implementations will prefer\n"
313 "upper case, others lower case. If the symbol was returned by\n"
314 "@code{string->symbol}, the case of characters in the string\n"
315 "returned will be the same as the case in the string that was\n"
316 "passed to @code{string->symbol}. It is an error to apply\n"
317 "mutation procedures like @code{string-set!} to strings returned\n"
318 "by this procedure.\n"
319 "\n"
320 "The following examples assume that the implementation's\n"
321 "standard case is lower case:\n"
322 "\n"
323 "@lisp\n"
324 "(symbol->string 'flying-fish) @result{} \"flying-fish\"\n"
325 "(symbol->string 'Martin) @result{} \"martin\"\n"
326 "(symbol->string\n"
327 " (string->symbol \"Malvina\")) @result{} \"Malvina\"\n"
328 "@end lisp")
329 #define FUNC_NAME s_scm_symbol_to_string
330 {
331 SCM_VALIDATE_SYMBOL (1, s);
332 return scm_i_symbol_substring (s, 0, scm_i_symbol_length (s));
333 }
334 #undef FUNC_NAME
335
336
337 SCM_DEFINE (scm_string_to_symbol, "string->symbol", 1, 0, 0,
338 (SCM string),
339 "Return the symbol whose name is @var{string}. This procedure\n"
340 "can create symbols with names containing special characters or\n"
341 "letters in the non-standard case, but it is usually a bad idea\n"
342 "to create such symbols because in some implementations of\n"
343 "Scheme they cannot be read as themselves. See\n"
344 "@code{symbol->string}.\n"
345 "\n"
346 "The following examples assume that the implementation's\n"
347 "standard case is lower case:\n"
348 "\n"
349 "@lisp\n"
350 "(eq? 'mISSISSIppi 'mississippi) @result{} #t\n"
351 "(string->symbol \"mISSISSIppi\") @result{} @r{the symbol with name \"mISSISSIppi\"}\n"
352 "(eq? 'bitBlt (string->symbol \"bitBlt\")) @result{} #f\n"
353 "(eq? 'JollyWog\n"
354 " (string->symbol (symbol->string 'JollyWog))) @result{} #t\n"
355 "(string=? \"K. Harper, M.D.\"\n"
356 " (symbol->string\n"
357 " (string->symbol \"K. Harper, M.D.\"))) @result{}#t\n"
358 "@end lisp")
359 #define FUNC_NAME s_scm_string_to_symbol
360 {
361 SCM_VALIDATE_STRING (1, string);
362 return scm_i_str2symbol (string);
363 }
364 #undef FUNC_NAME
365
366 SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0,
367 (SCM str),
368 "Return the symbol whose name is @var{str}. @var{str} is\n"
369 "converted to lowercase before the conversion is done, if Guile\n"
370 "is currently reading symbols case-insensitively.")
371 #define FUNC_NAME s_scm_string_ci_to_symbol
372 {
373 return scm_string_to_symbol (SCM_CASE_INSENSITIVE_P
374 ? scm_string_downcase(str)
375 : str);
376 }
377 #undef FUNC_NAME
378
379 /* The default prefix for `gensym'd symbols. */
380 static SCM default_gensym_prefix;
381
382 #define GENSYM_LENGTH 22 /* bytes */
383 #define GENSYM_RADIX_BITS 6
384 #define GENSYM_RADIX (1 << (GENSYM_RADIX_BITS))
385
386 SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
387 (SCM prefix),
388 "Create a new symbol with a name constructed from a prefix and\n"
389 "a counter value. The string @var{prefix} can be specified as\n"
390 "an optional argument. Default prefix is @code{ g}. The counter\n"
391 "is increased by 1 at each call. There is no provision for\n"
392 "resetting the counter.")
393 #define FUNC_NAME s_scm_gensym
394 {
395 static const char base64[GENSYM_RADIX] =
396 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789$@";
397 static const char base4[4] = "_.-~";
398
399 unsigned char *digit_buf = SCM_I_CURRENT_THREAD->gensym_counter;
400 char char_buf[GENSYM_LENGTH];
401 SCM suffix, name;
402 int i;
403
404 if (SCM_UNBNDP (prefix))
405 prefix = default_gensym_prefix;
406
407 if (SCM_UNLIKELY (digit_buf == NULL))
408 {
409 /* This is the first time gensym has been called in this thread.
410 Allocate and randomize our new thread-local gensym counter */
411 digit_buf = (unsigned char *)
412 scm_gc_malloc_pointerless (GENSYM_LENGTH, "gensym-counter");
413 scm_i_random_bytes_from_platform (digit_buf, GENSYM_LENGTH);
414 for (i = (GENSYM_LENGTH - 1); i >= 0; --i)
415 digit_buf[i] &= (GENSYM_RADIX - 1);
416 SCM_I_CURRENT_THREAD->gensym_counter = digit_buf;
417 }
418
419 /* Increment our thread-local gensym_counter. */
420 for (i = (GENSYM_LENGTH - 1); i >= 0; --i)
421 {
422 if (SCM_LIKELY (++(digit_buf[i]) < GENSYM_RADIX))
423 break;
424 else
425 digit_buf[i] = 0;
426 }
427
428 /* Encode digit_buf as base64, except for the first character where we
429 use the sparse glyphs "_.-~" (base 4) to provide some visual
430 separation between the prefix and the dense base64 block. */
431 for (i = (GENSYM_LENGTH - 1); i > 0; --i)
432 char_buf[i] = base64[digit_buf[i]];
433 char_buf[0] = base4[digit_buf[0] & 3];
434
435 suffix = scm_from_latin1_stringn (char_buf, GENSYM_LENGTH);
436 name = scm_string_append (scm_list_2 (prefix, suffix));
437 return scm_string_to_symbol (name);
438 }
439 #undef FUNC_NAME
440
441 SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0,
442 (SCM symbol),
443 "Return a hash value for @var{symbol}.")
444 #define FUNC_NAME s_scm_symbol_hash
445 {
446 SCM_VALIDATE_SYMBOL (1, symbol);
447 return scm_from_ulong (scm_i_symbol_hash (symbol));
448 }
449 #undef FUNC_NAME
450
451 SCM_DEFINE (scm_symbol_fref, "symbol-fref", 1, 0, 0,
452 (SCM s),
453 "Return the contents of @var{symbol}'s @dfn{function slot}.")
454 #define FUNC_NAME s_scm_symbol_fref
455 {
456 SCM_VALIDATE_SYMBOL (1, s);
457 return SCM_CAR (SCM_CELL_OBJECT_3 (s));
458 }
459 #undef FUNC_NAME
460
461
462 SCM_DEFINE (scm_symbol_pref, "symbol-pref", 1, 0, 0,
463 (SCM s),
464 "Return the @dfn{property list} currently associated with @var{symbol}.")
465 #define FUNC_NAME s_scm_symbol_pref
466 {
467 SCM_VALIDATE_SYMBOL (1, s);
468 return SCM_CDR (SCM_CELL_OBJECT_3 (s));
469 }
470 #undef FUNC_NAME
471
472
473 SCM_DEFINE (scm_symbol_fset_x, "symbol-fset!", 2, 0, 0,
474 (SCM s, SCM val),
475 "Change the binding of @var{symbol}'s function slot.")
476 #define FUNC_NAME s_scm_symbol_fset_x
477 {
478 SCM_VALIDATE_SYMBOL (1, s);
479 SCM_SETCAR (SCM_CELL_OBJECT_3 (s), val);
480 return SCM_UNSPECIFIED;
481 }
482 #undef FUNC_NAME
483
484
485 SCM_DEFINE (scm_symbol_pset_x, "symbol-pset!", 2, 0, 0,
486 (SCM s, SCM val),
487 "Change the binding of @var{symbol}'s property slot.")
488 #define FUNC_NAME s_scm_symbol_pset_x
489 {
490 SCM_VALIDATE_SYMBOL (1, s);
491 SCM_SETCDR (SCM_CELL_OBJECT_3 (s), val);
492 return SCM_UNSPECIFIED;
493 }
494 #undef FUNC_NAME
495
496 SCM
497 scm_from_locale_symbol (const char *sym)
498 {
499 return scm_from_locale_symboln (sym, -1);
500 }
501
502 SCM
503 scm_from_locale_symboln (const char *sym, size_t len)
504 {
505 SCM str = scm_from_locale_stringn (sym, len);
506 return scm_i_str2symbol (str);
507 }
508
509 SCM
510 scm_take_locale_symboln (char *sym, size_t len)
511 {
512 SCM str;
513
514 str = scm_take_locale_stringn (sym, len);
515 return scm_i_str2symbol (str);
516 }
517
518 SCM
519 scm_take_locale_symbol (char *sym)
520 {
521 return scm_take_locale_symboln (sym, (size_t)-1);
522 }
523
524 SCM
525 scm_from_latin1_symbol (const char *sym)
526 {
527 return scm_from_latin1_symboln (sym, -1);
528 }
529
530 SCM
531 scm_from_latin1_symboln (const char *sym, size_t len)
532 {
533 unsigned long hash;
534 SCM ret;
535
536 if (len == (size_t) -1)
537 len = strlen (sym);
538 hash = scm_i_latin1_string_hash (sym, len);
539
540 ret = lookup_interned_latin1_symbol (sym, len, hash);
541 if (scm_is_false (ret))
542 {
543 SCM str = scm_from_latin1_stringn (sym, len);
544 ret = scm_i_str2symbol (str);
545 }
546
547 return ret;
548 }
549
550 SCM
551 scm_from_utf8_symbol (const char *sym)
552 {
553 return scm_from_utf8_symboln (sym, -1);
554 }
555
556 SCM
557 scm_from_utf8_symboln (const char *sym, size_t len)
558 {
559 unsigned long hash;
560 SCM ret;
561
562 if (len == (size_t) -1)
563 len = strlen (sym);
564 hash = scm_i_utf8_string_hash (sym, len);
565
566 ret = lookup_interned_utf8_symbol (sym, len, hash);
567 if (scm_is_false (ret))
568 {
569 SCM str = scm_from_utf8_stringn (sym, len);
570 ret = scm_i_str2symbol (str);
571 }
572
573 return ret;
574 }
575
576 void
577 scm_symbols_prehistory ()
578 {
579 symbols = scm_c_make_weak_set (5000);
580 }
581
582
583 void
584 scm_init_symbols ()
585 {
586 #include "libguile/symbols.x"
587
588 default_gensym_prefix = scm_from_latin1_string (" g");
589 }
590
591 /*
592 Local Variables:
593 c-file-style: "gnu"
594 End:
595 */