1 /* Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2003, 2004,
2 * 2006, 2009, 2011 Free Software Foundation, Inc.
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 #include "libguile/_scm.h"
29 #include "libguile/chars.h"
30 #include "libguile/eval.h"
31 #include "libguile/hash.h"
32 #include "libguile/smob.h"
33 #include "libguile/variable.h"
34 #include "libguile/alist.h"
35 #include "libguile/fluids.h"
36 #include "libguile/threads.h"
37 #include "libguile/strings.h"
38 #include "libguile/vectors.h"
39 #include "libguile/weak-set.h"
40 #include "libguile/modules.h"
41 #include "libguile/read.h"
42 #include "libguile/srfi-13.h"
44 #include "libguile/validate.h"
45 #include "libguile/symbols.h"
47 #include "libguile/private-options.h"
59 SCM_DEFINE (scm_sys_symbols
, "%symbols", 0, 0, 0,
61 "Return the system symbol obarray.")
62 #define FUNC_NAME s_scm_sys_symbols
75 scm_i_hash_symbol (SCM obj
, unsigned long n
, void *closure
)
77 return scm_i_symbol_hash (obj
) % n
;
80 struct string_lookup_data
83 unsigned long string_hash
;
87 string_lookup_predicate_fn (SCM sym
, void *closure
)
89 struct string_lookup_data
*data
= closure
;
91 if (scm_i_symbol_hash (sym
) == data
->string_hash
92 && scm_i_symbol_length (sym
) == scm_i_string_length (data
->string
))
94 size_t n
= scm_i_symbol_length (sym
);
96 if (scm_i_symbol_ref (sym
, n
) != scm_i_string_ref (data
->string
, n
))
105 lookup_interned_symbol (SCM name
, unsigned long raw_hash
)
107 struct string_lookup_data data
;
110 data
.string_hash
= raw_hash
;
112 return scm_c_weak_set_lookup (symbols
, raw_hash
,
113 string_lookup_predicate_fn
,
117 struct latin1_lookup_data
121 unsigned long string_hash
;
125 latin1_lookup_predicate_fn (SCM sym
, void *closure
)
127 struct latin1_lookup_data
*data
= closure
;
129 return scm_i_symbol_hash (sym
) == data
->string_hash
130 && scm_i_is_narrow_symbol (sym
)
131 && scm_i_symbol_length (sym
) == data
->len
132 && strncmp (scm_i_symbol_chars (sym
), data
->str
, data
->len
) == 0;
136 lookup_interned_latin1_symbol (const char *str
, size_t len
,
137 unsigned long raw_hash
)
139 struct latin1_lookup_data data
;
143 data
.string_hash
= raw_hash
;
145 return scm_c_weak_set_lookup (symbols
, raw_hash
,
146 latin1_lookup_predicate_fn
,
150 struct utf8_lookup_data
154 unsigned long string_hash
;
158 utf8_string_equals_wide_string (const scm_t_uint8
*narrow
, size_t nlen
,
159 const scm_t_wchar
*wide
, size_t wlen
)
161 size_t byte_idx
= 0, char_idx
= 0;
163 while (byte_idx
< nlen
&& char_idx
< wlen
)
168 nbytes
= u8_mbtouc (&c
, narrow
+ byte_idx
, nlen
- byte_idx
);
174 else if (c
!= wide
[char_idx
])
181 return byte_idx
== nlen
&& char_idx
== wlen
;
185 utf8_lookup_predicate_fn (SCM sym
, void *closure
)
187 struct utf8_lookup_data
*data
= closure
;
189 if (scm_i_symbol_hash (sym
) != data
->string_hash
)
192 if (scm_i_is_narrow_symbol (sym
))
193 return (scm_i_symbol_length (sym
) == data
->len
194 && strncmp (scm_i_symbol_chars (sym
), data
->str
, data
->len
) == 0);
196 return utf8_string_equals_wide_string ((const scm_t_uint8
*) data
->str
,
198 scm_i_symbol_wide_chars (sym
),
199 scm_i_symbol_length (sym
));
203 lookup_interned_utf8_symbol (const char *str
, size_t len
,
204 unsigned long raw_hash
)
206 struct utf8_lookup_data data
;
210 data
.string_hash
= raw_hash
;
212 return scm_c_weak_set_lookup (symbols
, raw_hash
,
213 utf8_lookup_predicate_fn
,
218 symbol_lookup_predicate_fn (SCM sym
, void *closure
)
220 SCM other
= SCM_PACK_POINTER (closure
);
222 if (scm_i_symbol_hash (sym
) == scm_i_symbol_hash (other
)
223 && scm_i_symbol_length (sym
) == scm_i_symbol_length (other
))
225 if (scm_i_is_narrow_symbol (sym
))
226 return scm_i_is_narrow_symbol (other
)
227 && (strncmp (scm_i_symbol_chars (sym
),
228 scm_i_symbol_chars (other
),
229 scm_i_symbol_length (other
)) == 0);
232 (scm_string_equal_p (scm_symbol_to_string (sym
),
233 scm_symbol_to_string (other
)));
239 scm_i_str2symbol (SCM str
)
242 size_t raw_hash
= scm_i_string_hash (str
);
244 symbol
= lookup_interned_symbol (str
, raw_hash
);
245 if (scm_is_true (symbol
))
249 /* The symbol was not found, create it. */
250 symbol
= scm_i_make_symbol (str
, 0, raw_hash
,
251 scm_cons (SCM_BOOL_F
, SCM_EOL
));
253 /* Might return a different symbol, if another one was interned at
255 return scm_c_weak_set_add_x (symbols
, raw_hash
,
256 symbol_lookup_predicate_fn
,
257 SCM_UNPACK_POINTER (symbol
), symbol
);
263 scm_i_str2uninterned_symbol (SCM str
)
265 size_t raw_hash
= scm_i_string_hash (str
);
267 return scm_i_make_symbol (str
, SCM_I_F_SYMBOL_UNINTERNED
,
268 raw_hash
, scm_cons (SCM_BOOL_F
, SCM_EOL
));
271 SCM_DEFINE (scm_symbol_p
, "symbol?", 1, 0, 0,
273 "Return @code{#t} if @var{obj} is a symbol, otherwise return\n"
275 #define FUNC_NAME s_scm_symbol_p
277 return scm_from_bool (scm_is_symbol (obj
));
281 SCM_DEFINE (scm_symbol_interned_p
, "symbol-interned?", 1, 0, 0,
283 "Return @code{#t} if @var{symbol} is interned, otherwise return\n"
285 #define FUNC_NAME s_scm_symbol_interned_p
287 SCM_VALIDATE_SYMBOL (1, symbol
);
288 return scm_from_bool (scm_i_symbol_is_interned (symbol
));
292 SCM_DEFINE (scm_make_symbol
, "make-symbol", 1, 0, 0,
294 "Return a new uninterned symbol with the name @var{name}. "
295 "The returned symbol is guaranteed to be unique and future "
296 "calls to @code{string->symbol} will not return it.")
297 #define FUNC_NAME s_scm_make_symbol
299 SCM_VALIDATE_STRING (1, name
);
300 return scm_i_str2uninterned_symbol (name
);
304 SCM_DEFINE (scm_symbol_to_string
, "symbol->string", 1, 0, 0,
306 "Return the name of @var{symbol} as a string. If the symbol was\n"
307 "part of an object returned as the value of a literal expression\n"
308 "(section @pxref{Literal expressions,,,r5rs, The Revised^5\n"
309 "Report on Scheme}) or by a call to the @code{read} procedure,\n"
310 "and its name contains alphabetic characters, then the string\n"
311 "returned will contain characters in the implementation's\n"
312 "preferred standard case---some implementations will prefer\n"
313 "upper case, others lower case. If the symbol was returned by\n"
314 "@code{string->symbol}, the case of characters in the string\n"
315 "returned will be the same as the case in the string that was\n"
316 "passed to @code{string->symbol}. It is an error to apply\n"
317 "mutation procedures like @code{string-set!} to strings returned\n"
318 "by this procedure.\n"
320 "The following examples assume that the implementation's\n"
321 "standard case is lower case:\n"
324 "(symbol->string 'flying-fish) @result{} \"flying-fish\"\n"
325 "(symbol->string 'Martin) @result{} \"martin\"\n"
327 " (string->symbol \"Malvina\")) @result{} \"Malvina\"\n"
329 #define FUNC_NAME s_scm_symbol_to_string
331 SCM_VALIDATE_SYMBOL (1, s
);
332 return scm_i_symbol_substring (s
, 0, scm_i_symbol_length (s
));
337 SCM_DEFINE (scm_string_to_symbol
, "string->symbol", 1, 0, 0,
339 "Return the symbol whose name is @var{string}. This procedure\n"
340 "can create symbols with names containing special characters or\n"
341 "letters in the non-standard case, but it is usually a bad idea\n"
342 "to create such symbols because in some implementations of\n"
343 "Scheme they cannot be read as themselves. See\n"
344 "@code{symbol->string}.\n"
346 "The following examples assume that the implementation's\n"
347 "standard case is lower case:\n"
350 "(eq? 'mISSISSIppi 'mississippi) @result{} #t\n"
351 "(string->symbol \"mISSISSIppi\") @result{} @r{the symbol with name \"mISSISSIppi\"}\n"
352 "(eq? 'bitBlt (string->symbol \"bitBlt\")) @result{} #f\n"
354 " (string->symbol (symbol->string 'JollyWog))) @result{} #t\n"
355 "(string=? \"K. Harper, M.D.\"\n"
357 " (string->symbol \"K. Harper, M.D.\"))) @result{}#t\n"
359 #define FUNC_NAME s_scm_string_to_symbol
361 SCM_VALIDATE_STRING (1, string
);
362 return scm_i_str2symbol (string
);
366 SCM_DEFINE (scm_string_ci_to_symbol
, "string-ci->symbol", 1, 0, 0,
368 "Return the symbol whose name is @var{str}. @var{str} is\n"
369 "converted to lowercase before the conversion is done, if Guile\n"
370 "is currently reading symbols case-insensitively.")
371 #define FUNC_NAME s_scm_string_ci_to_symbol
373 return scm_string_to_symbol (SCM_CASE_INSENSITIVE_P
374 ? scm_string_downcase(str
)
379 /* The default prefix for `gensym'd symbols. */
380 static SCM default_gensym_prefix
;
382 #define GENSYM_LENGTH 22 /* bytes */
383 #define GENSYM_RADIX_BITS 6
384 #define GENSYM_RADIX (1 << (GENSYM_RADIX_BITS))
386 SCM_DEFINE (scm_gensym
, "gensym", 0, 1, 0,
388 "Create a new symbol with a name constructed from a prefix and\n"
389 "a counter value. The string @var{prefix} can be specified as\n"
390 "an optional argument. Default prefix is @code{ g}. The counter\n"
391 "is increased by 1 at each call. There is no provision for\n"
392 "resetting the counter.")
393 #define FUNC_NAME s_scm_gensym
395 static const char base64
[GENSYM_RADIX
] =
396 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789$@";
397 static const char base4
[4] = "_.-~";
399 unsigned char *digit_buf
= SCM_I_CURRENT_THREAD
->gensym_counter
;
400 char char_buf
[GENSYM_LENGTH
];
404 if (SCM_UNBNDP (prefix
))
405 prefix
= default_gensym_prefix
;
407 if (SCM_UNLIKELY (digit_buf
== NULL
))
409 /* This is the first time gensym has been called in this thread.
410 Allocate and randomize our new thread-local gensym counter */
411 digit_buf
= (unsigned char *)
412 scm_gc_malloc_pointerless (GENSYM_LENGTH
, "gensym-counter");
413 scm_i_random_bytes_from_platform (digit_buf
, GENSYM_LENGTH
);
414 for (i
= (GENSYM_LENGTH
- 1); i
>= 0; --i
)
415 digit_buf
[i
] &= (GENSYM_RADIX
- 1);
416 SCM_I_CURRENT_THREAD
->gensym_counter
= digit_buf
;
419 /* Increment our thread-local gensym_counter. */
420 for (i
= (GENSYM_LENGTH
- 1); i
>= 0; --i
)
422 if (SCM_LIKELY (++(digit_buf
[i
]) < GENSYM_RADIX
))
428 /* Encode digit_buf as base64, except for the first character where we
429 use the sparse glyphs "_.-~" (base 4) to provide some visual
430 separation between the prefix and the dense base64 block. */
431 for (i
= (GENSYM_LENGTH
- 1); i
> 0; --i
)
432 char_buf
[i
] = base64
[digit_buf
[i
]];
433 char_buf
[0] = base4
[digit_buf
[0] & 3];
435 suffix
= scm_from_latin1_stringn (char_buf
, GENSYM_LENGTH
);
436 name
= scm_string_append (scm_list_2 (prefix
, suffix
));
437 return scm_string_to_symbol (name
);
441 SCM_DEFINE (scm_symbol_hash
, "symbol-hash", 1, 0, 0,
443 "Return a hash value for @var{symbol}.")
444 #define FUNC_NAME s_scm_symbol_hash
446 SCM_VALIDATE_SYMBOL (1, symbol
);
447 return scm_from_ulong (scm_i_symbol_hash (symbol
));
451 SCM_DEFINE (scm_symbol_fref
, "symbol-fref", 1, 0, 0,
453 "Return the contents of @var{symbol}'s @dfn{function slot}.")
454 #define FUNC_NAME s_scm_symbol_fref
456 SCM_VALIDATE_SYMBOL (1, s
);
457 return SCM_CAR (SCM_CELL_OBJECT_3 (s
));
462 SCM_DEFINE (scm_symbol_pref
, "symbol-pref", 1, 0, 0,
464 "Return the @dfn{property list} currently associated with @var{symbol}.")
465 #define FUNC_NAME s_scm_symbol_pref
467 SCM_VALIDATE_SYMBOL (1, s
);
468 return SCM_CDR (SCM_CELL_OBJECT_3 (s
));
473 SCM_DEFINE (scm_symbol_fset_x
, "symbol-fset!", 2, 0, 0,
475 "Change the binding of @var{symbol}'s function slot.")
476 #define FUNC_NAME s_scm_symbol_fset_x
478 SCM_VALIDATE_SYMBOL (1, s
);
479 SCM_SETCAR (SCM_CELL_OBJECT_3 (s
), val
);
480 return SCM_UNSPECIFIED
;
485 SCM_DEFINE (scm_symbol_pset_x
, "symbol-pset!", 2, 0, 0,
487 "Change the binding of @var{symbol}'s property slot.")
488 #define FUNC_NAME s_scm_symbol_pset_x
490 SCM_VALIDATE_SYMBOL (1, s
);
491 SCM_SETCDR (SCM_CELL_OBJECT_3 (s
), val
);
492 return SCM_UNSPECIFIED
;
497 scm_from_locale_symbol (const char *sym
)
499 return scm_from_locale_symboln (sym
, -1);
503 scm_from_locale_symboln (const char *sym
, size_t len
)
505 SCM str
= scm_from_locale_stringn (sym
, len
);
506 return scm_i_str2symbol (str
);
510 scm_take_locale_symboln (char *sym
, size_t len
)
514 str
= scm_take_locale_stringn (sym
, len
);
515 return scm_i_str2symbol (str
);
519 scm_take_locale_symbol (char *sym
)
521 return scm_take_locale_symboln (sym
, (size_t)-1);
525 scm_from_latin1_symbol (const char *sym
)
527 return scm_from_latin1_symboln (sym
, -1);
531 scm_from_latin1_symboln (const char *sym
, size_t len
)
536 if (len
== (size_t) -1)
538 hash
= scm_i_latin1_string_hash (sym
, len
);
540 ret
= lookup_interned_latin1_symbol (sym
, len
, hash
);
541 if (scm_is_false (ret
))
543 SCM str
= scm_from_latin1_stringn (sym
, len
);
544 ret
= scm_i_str2symbol (str
);
551 scm_from_utf8_symbol (const char *sym
)
553 return scm_from_utf8_symboln (sym
, -1);
557 scm_from_utf8_symboln (const char *sym
, size_t len
)
562 if (len
== (size_t) -1)
564 hash
= scm_i_utf8_string_hash (sym
, len
);
566 ret
= lookup_interned_utf8_symbol (sym
, len
, hash
);
567 if (scm_is_false (ret
))
569 SCM str
= scm_from_utf8_stringn (sym
, len
);
570 ret
= scm_i_str2symbol (str
);
577 scm_symbols_prehistory ()
579 symbols
= scm_c_make_weak_set (5000);
586 #include "libguile/symbols.x"
588 default_gensym_prefix
= scm_from_latin1_string (" g");