Merge remote-tracking branch 'origin/stable-2.0'
[bpt/guile.git] / libguile / symbols.c
1 /* Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2003, 2004,
2 * 2006, 2009, 2011 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20
21 \f
22 #ifdef HAVE_CONFIG_H
23 # include <config.h>
24 #endif
25
26 #include <unistr.h>
27
28 #include "libguile/_scm.h"
29 #include "libguile/chars.h"
30 #include "libguile/eval.h"
31 #include "libguile/hash.h"
32 #include "libguile/smob.h"
33 #include "libguile/variable.h"
34 #include "libguile/alist.h"
35 #include "libguile/fluids.h"
36 #include "libguile/strings.h"
37 #include "libguile/vectors.h"
38 #include "libguile/weak-set.h"
39 #include "libguile/modules.h"
40 #include "libguile/read.h"
41 #include "libguile/srfi-13.h"
42
43 #include "libguile/validate.h"
44 #include "libguile/symbols.h"
45
46 #include "libguile/private-options.h"
47
48
49 #ifdef HAVE_STRING_H
50 #include <string.h>
51 #endif
52
53 \f
54
55 static SCM symbols;
56
57 #ifdef GUILE_DEBUG
58 SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
59 (),
60 "Return the system symbol obarray.")
61 #define FUNC_NAME s_scm_sys_symbols
62 {
63 return symbols;
64 }
65 #undef FUNC_NAME
66 #endif
67
68 \f
69
70 /* {Symbols}
71 */
72
73 unsigned long
74 scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
75 {
76 return scm_i_symbol_hash (obj) % n;
77 }
78
79 struct string_lookup_data
80 {
81 SCM string;
82 unsigned long string_hash;
83 };
84
85 static int
86 string_lookup_predicate_fn (SCM sym, void *closure)
87 {
88 struct string_lookup_data *data = closure;
89
90 if (scm_i_symbol_hash (sym) == data->string_hash
91 && scm_i_symbol_length (sym) == scm_i_string_length (data->string))
92 {
93 size_t n = scm_i_symbol_length (sym);
94 while (n--)
95 if (scm_i_symbol_ref (sym, n) != scm_i_string_ref (data->string, n))
96 return 0;
97 return 1;
98 }
99 else
100 return 0;
101 }
102
103 static SCM
104 lookup_interned_symbol (SCM name, unsigned long raw_hash)
105 {
106 struct string_lookup_data data;
107
108 data.string = name;
109 data.string_hash = raw_hash;
110
111 return scm_c_weak_set_lookup (symbols, raw_hash,
112 string_lookup_predicate_fn,
113 &data, SCM_BOOL_F);
114 }
115
116 struct latin1_lookup_data
117 {
118 const char *str;
119 size_t len;
120 unsigned long string_hash;
121 };
122
123 static int
124 latin1_lookup_predicate_fn (SCM sym, void *closure)
125 {
126 struct latin1_lookup_data *data = closure;
127
128 return scm_i_symbol_hash (sym) == data->string_hash
129 && scm_i_is_narrow_symbol (sym)
130 && scm_i_symbol_length (sym) == data->len
131 && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0;
132 }
133
134 static SCM
135 lookup_interned_latin1_symbol (const char *str, size_t len,
136 unsigned long raw_hash)
137 {
138 struct latin1_lookup_data data;
139
140 data.str = str;
141 data.len = len;
142 data.string_hash = raw_hash;
143
144 return scm_c_weak_set_lookup (symbols, raw_hash,
145 latin1_lookup_predicate_fn,
146 &data, SCM_BOOL_F);
147 }
148
149 struct utf8_lookup_data
150 {
151 const char *str;
152 size_t len;
153 unsigned long string_hash;
154 };
155
156 static int
157 utf8_string_equals_wide_string (const scm_t_uint8 *narrow, size_t nlen,
158 const scm_t_wchar *wide, size_t wlen)
159 {
160 size_t byte_idx = 0, char_idx = 0;
161
162 while (byte_idx < nlen && char_idx < wlen)
163 {
164 ucs4_t c;
165 int nbytes;
166
167 nbytes = u8_mbtouc (&c, narrow + byte_idx, nlen - byte_idx);
168 if (nbytes == 0)
169 break;
170 else if (nbytes < 0)
171 /* Bad UTF-8. */
172 return 0;
173 else if (c != wide[char_idx])
174 return 0;
175
176 byte_idx += nbytes;
177 char_idx++;
178 }
179
180 return byte_idx == nlen && char_idx == wlen;
181 }
182
183 static int
184 utf8_lookup_predicate_fn (SCM sym, void *closure)
185 {
186 struct utf8_lookup_data *data = closure;
187
188 if (scm_i_symbol_hash (sym) != data->string_hash)
189 return 0;
190
191 if (scm_i_is_narrow_symbol (sym))
192 return (scm_i_symbol_length (sym) == data->len
193 && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0);
194 else
195 return utf8_string_equals_wide_string ((const scm_t_uint8 *) data->str,
196 data->len,
197 scm_i_symbol_wide_chars (sym),
198 scm_i_symbol_length (sym));
199 }
200
201 static SCM
202 lookup_interned_utf8_symbol (const char *str, size_t len,
203 unsigned long raw_hash)
204 {
205 struct utf8_lookup_data data;
206
207 data.str = str;
208 data.len = len;
209 data.string_hash = raw_hash;
210
211 return scm_c_weak_set_lookup (symbols, raw_hash,
212 utf8_lookup_predicate_fn,
213 &data, SCM_BOOL_F);
214 }
215
216 static int
217 symbol_lookup_predicate_fn (SCM sym, void *closure)
218 {
219 SCM other = SCM_PACK_POINTER (closure);
220
221 if (scm_i_symbol_hash (sym) == scm_i_symbol_hash (other)
222 && scm_i_symbol_length (sym) == scm_i_symbol_length (other))
223 {
224 if (scm_i_is_narrow_symbol (sym))
225 return scm_i_is_narrow_symbol (other)
226 && (strncmp (scm_i_symbol_chars (sym),
227 scm_i_symbol_chars (other),
228 scm_i_symbol_length (other)) == 0);
229 else
230 return scm_is_true
231 (scm_string_equal_p (scm_symbol_to_string (sym),
232 scm_symbol_to_string (other)));
233 }
234 return 0;
235 }
236
237 static SCM
238 scm_i_str2symbol (SCM str)
239 {
240 SCM symbol;
241 size_t raw_hash = scm_i_string_hash (str);
242
243 symbol = lookup_interned_symbol (str, raw_hash);
244 if (scm_is_true (symbol))
245 return symbol;
246 else
247 {
248 /* The symbol was not found, create it. */
249 symbol = scm_i_make_symbol (str, 0, raw_hash,
250 scm_cons (SCM_BOOL_F, SCM_EOL));
251
252 /* Might return a different symbol, if another one was interned at
253 the same time. */
254 return scm_c_weak_set_add_x (symbols, raw_hash,
255 symbol_lookup_predicate_fn,
256 SCM_UNPACK_POINTER (symbol), symbol);
257 }
258 }
259
260
261 static SCM
262 scm_i_str2uninterned_symbol (SCM str)
263 {
264 size_t raw_hash = scm_i_string_hash (str);
265
266 return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED,
267 raw_hash, scm_cons (SCM_BOOL_F, SCM_EOL));
268 }
269
270 SCM_DEFINE (scm_symbol_p, "symbol?", 1, 0, 0,
271 (SCM obj),
272 "Return @code{#t} if @var{obj} is a symbol, otherwise return\n"
273 "@code{#f}.")
274 #define FUNC_NAME s_scm_symbol_p
275 {
276 return scm_from_bool (scm_is_symbol (obj));
277 }
278 #undef FUNC_NAME
279
280 SCM_DEFINE (scm_symbol_interned_p, "symbol-interned?", 1, 0, 0,
281 (SCM symbol),
282 "Return @code{#t} if @var{symbol} is interned, otherwise return\n"
283 "@code{#f}.")
284 #define FUNC_NAME s_scm_symbol_interned_p
285 {
286 SCM_VALIDATE_SYMBOL (1, symbol);
287 return scm_from_bool (scm_i_symbol_is_interned (symbol));
288 }
289 #undef FUNC_NAME
290
291 SCM_DEFINE (scm_make_symbol, "make-symbol", 1, 0, 0,
292 (SCM name),
293 "Return a new uninterned symbol with the name @var{name}. "
294 "The returned symbol is guaranteed to be unique and future "
295 "calls to @code{string->symbol} will not return it.")
296 #define FUNC_NAME s_scm_make_symbol
297 {
298 SCM_VALIDATE_STRING (1, name);
299 return scm_i_str2uninterned_symbol (name);
300 }
301 #undef FUNC_NAME
302
303 SCM_DEFINE (scm_symbol_to_string, "symbol->string", 1, 0, 0,
304 (SCM s),
305 "Return the name of @var{symbol} as a string. If the symbol was\n"
306 "part of an object returned as the value of a literal expression\n"
307 "(section @pxref{Literal expressions,,,r5rs, The Revised^5\n"
308 "Report on Scheme}) or by a call to the @code{read} procedure,\n"
309 "and its name contains alphabetic characters, then the string\n"
310 "returned will contain characters in the implementation's\n"
311 "preferred standard case---some implementations will prefer\n"
312 "upper case, others lower case. If the symbol was returned by\n"
313 "@code{string->symbol}, the case of characters in the string\n"
314 "returned will be the same as the case in the string that was\n"
315 "passed to @code{string->symbol}. It is an error to apply\n"
316 "mutation procedures like @code{string-set!} to strings returned\n"
317 "by this procedure.\n"
318 "\n"
319 "The following examples assume that the implementation's\n"
320 "standard case is lower case:\n"
321 "\n"
322 "@lisp\n"
323 "(symbol->string 'flying-fish) @result{} \"flying-fish\"\n"
324 "(symbol->string 'Martin) @result{} \"martin\"\n"
325 "(symbol->string\n"
326 " (string->symbol \"Malvina\")) @result{} \"Malvina\"\n"
327 "@end lisp")
328 #define FUNC_NAME s_scm_symbol_to_string
329 {
330 SCM_VALIDATE_SYMBOL (1, s);
331 return scm_i_symbol_substring (s, 0, scm_i_symbol_length (s));
332 }
333 #undef FUNC_NAME
334
335
336 SCM_DEFINE (scm_string_to_symbol, "string->symbol", 1, 0, 0,
337 (SCM string),
338 "Return the symbol whose name is @var{string}. This procedure\n"
339 "can create symbols with names containing special characters or\n"
340 "letters in the non-standard case, but it is usually a bad idea\n"
341 "to create such symbols because in some implementations of\n"
342 "Scheme they cannot be read as themselves. See\n"
343 "@code{symbol->string}.\n"
344 "\n"
345 "The following examples assume that the implementation's\n"
346 "standard case is lower case:\n"
347 "\n"
348 "@lisp\n"
349 "(eq? 'mISSISSIppi 'mississippi) @result{} #t\n"
350 "(string->symbol \"mISSISSIppi\") @result{} @r{the symbol with name \"mISSISSIppi\"}\n"
351 "(eq? 'bitBlt (string->symbol \"bitBlt\")) @result{} #f\n"
352 "(eq? 'JollyWog\n"
353 " (string->symbol (symbol->string 'JollyWog))) @result{} #t\n"
354 "(string=? \"K. Harper, M.D.\"\n"
355 " (symbol->string\n"
356 " (string->symbol \"K. Harper, M.D.\"))) @result{}#t\n"
357 "@end lisp")
358 #define FUNC_NAME s_scm_string_to_symbol
359 {
360 SCM_VALIDATE_STRING (1, string);
361 return scm_i_str2symbol (string);
362 }
363 #undef FUNC_NAME
364
365 SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0,
366 (SCM str),
367 "Return the symbol whose name is @var{str}. @var{str} is\n"
368 "converted to lowercase before the conversion is done, if Guile\n"
369 "is currently reading symbols case-insensitively.")
370 #define FUNC_NAME s_scm_string_ci_to_symbol
371 {
372 return scm_string_to_symbol (SCM_CASE_INSENSITIVE_P
373 ? scm_string_downcase(str)
374 : str);
375 }
376 #undef FUNC_NAME
377
378 /* The default prefix for `gensym'd symbols. */
379 static SCM default_gensym_prefix;
380
381 #define MAX_PREFIX_LENGTH 30
382
383 SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
384 (SCM prefix),
385 "Create a new symbol with a name constructed from a prefix and\n"
386 "a counter value. The string @var{prefix} can be specified as\n"
387 "an optional argument. Default prefix is @code{ g}. The counter\n"
388 "is increased by 1 at each call. There is no provision for\n"
389 "resetting the counter.")
390 #define FUNC_NAME s_scm_gensym
391 {
392 static int gensym_counter = 0;
393
394 SCM suffix, name;
395 int n, n_digits;
396 char buf[SCM_INTBUFLEN];
397
398 if (SCM_UNBNDP (prefix))
399 prefix = default_gensym_prefix;
400
401 /* mutex in case another thread looks and incs at the exact same moment */
402 scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex);
403 n = gensym_counter++;
404 scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
405
406 n_digits = scm_iint2str (n, 10, buf);
407 suffix = scm_from_latin1_stringn (buf, n_digits);
408 name = scm_string_append (scm_list_2 (prefix, suffix));
409 return scm_string_to_symbol (name);
410 }
411 #undef FUNC_NAME
412
413 SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0,
414 (SCM symbol),
415 "Return a hash value for @var{symbol}.")
416 #define FUNC_NAME s_scm_symbol_hash
417 {
418 SCM_VALIDATE_SYMBOL (1, symbol);
419 return scm_from_ulong (scm_i_symbol_hash (symbol));
420 }
421 #undef FUNC_NAME
422
423 SCM_DEFINE (scm_symbol_fref, "symbol-fref", 1, 0, 0,
424 (SCM s),
425 "Return the contents of the symbol @var{s}'s @dfn{function slot}.")
426 #define FUNC_NAME s_scm_symbol_fref
427 {
428 SCM_VALIDATE_SYMBOL (1, s);
429 return SCM_CAR (SCM_CELL_OBJECT_3 (s));
430 }
431 #undef FUNC_NAME
432
433
434 SCM_DEFINE (scm_symbol_pref, "symbol-pref", 1, 0, 0,
435 (SCM s),
436 "Return the @dfn{property list} currently associated with the\n"
437 "symbol @var{s}.")
438 #define FUNC_NAME s_scm_symbol_pref
439 {
440 SCM_VALIDATE_SYMBOL (1, s);
441 return SCM_CDR (SCM_CELL_OBJECT_3 (s));
442 }
443 #undef FUNC_NAME
444
445
446 SCM_DEFINE (scm_symbol_fset_x, "symbol-fset!", 2, 0, 0,
447 (SCM s, SCM val),
448 "Change the binding of the symbol @var{s}'s function slot.")
449 #define FUNC_NAME s_scm_symbol_fset_x
450 {
451 SCM_VALIDATE_SYMBOL (1, s);
452 SCM_SETCAR (SCM_CELL_OBJECT_3 (s), val);
453 return SCM_UNSPECIFIED;
454 }
455 #undef FUNC_NAME
456
457
458 SCM_DEFINE (scm_symbol_pset_x, "symbol-pset!", 2, 0, 0,
459 (SCM s, SCM val),
460 "Change the binding of the symbol @var{s}'s property slot.")
461 #define FUNC_NAME s_scm_symbol_pset_x
462 {
463 SCM_VALIDATE_SYMBOL (1, s);
464 SCM_SETCDR (SCM_CELL_OBJECT_3 (s), val);
465 return SCM_UNSPECIFIED;
466 }
467 #undef FUNC_NAME
468
469 SCM
470 scm_from_locale_symbol (const char *sym)
471 {
472 return scm_from_locale_symboln (sym, -1);
473 }
474
475 SCM
476 scm_from_locale_symboln (const char *sym, size_t len)
477 {
478 SCM str = scm_from_locale_stringn (sym, len);
479 return scm_i_str2symbol (str);
480 }
481
482 SCM
483 scm_take_locale_symboln (char *sym, size_t len)
484 {
485 SCM str;
486
487 str = scm_take_locale_stringn (sym, len);
488 return scm_i_str2symbol (str);
489 }
490
491 SCM
492 scm_take_locale_symbol (char *sym)
493 {
494 return scm_take_locale_symboln (sym, (size_t)-1);
495 }
496
497 SCM
498 scm_from_latin1_symbol (const char *sym)
499 {
500 return scm_from_latin1_symboln (sym, -1);
501 }
502
503 SCM
504 scm_from_latin1_symboln (const char *sym, size_t len)
505 {
506 unsigned long hash;
507 SCM ret;
508
509 if (len == (size_t) -1)
510 len = strlen (sym);
511 hash = scm_i_latin1_string_hash (sym, len);
512
513 ret = lookup_interned_latin1_symbol (sym, len, hash);
514 if (scm_is_false (ret))
515 {
516 SCM str = scm_from_latin1_stringn (sym, len);
517 ret = scm_i_str2symbol (str);
518 }
519
520 return ret;
521 }
522
523 SCM
524 scm_from_utf8_symbol (const char *sym)
525 {
526 return scm_from_utf8_symboln (sym, -1);
527 }
528
529 SCM
530 scm_from_utf8_symboln (const char *sym, size_t len)
531 {
532 unsigned long hash;
533 SCM ret;
534
535 if (len == (size_t) -1)
536 len = strlen (sym);
537 hash = scm_i_utf8_string_hash (sym, len);
538
539 ret = lookup_interned_utf8_symbol (sym, len, hash);
540 if (scm_is_false (ret))
541 {
542 SCM str = scm_from_utf8_stringn (sym, len);
543 ret = scm_i_str2symbol (str);
544 }
545
546 return ret;
547 }
548
549 void
550 scm_symbols_prehistory ()
551 {
552 symbols = scm_c_make_weak_set (5000);
553 }
554
555
556 void
557 scm_init_symbols ()
558 {
559 #include "libguile/symbols.x"
560
561 default_gensym_prefix = scm_from_latin1_string (" g");
562 }
563
564 /*
565 Local Variables:
566 c-file-style: "gnu"
567 End:
568 */