Simplify the interpreter for trivial inits and no letrec
[bpt/guile.git] / libguile / symbols.c
CommitLineData
ceed7709 1/* Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2003, 2004,
8c76a897 2 * 2006, 2009, 2011, 2013 Free Software Foundation, Inc.
ceed7709 3 *
73be1d9e 4 * This library is free software; you can redistribute it and/or
53befeb7
NJ
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
0f2d19dd 8 *
53befeb7
NJ
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
0f2d19dd 13 *
73be1d9e
MV
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
53befeb7
NJ
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
73be1d9e 18 */
1bbd0b84 19
1bbd0b84 20
0f2d19dd 21\f
dbb605f5 22#ifdef HAVE_CONFIG_H
cf007485
RB
23# include <config.h>
24#endif
0f2d19dd 25
f80d15c5
AW
26#include <unistr.h>
27
a0599745
MD
28#include "libguile/_scm.h"
29#include "libguile/chars.h"
30#include "libguile/eval.h"
ba393257 31#include "libguile/hash.h"
fb43bf74 32#include "libguile/smob.h"
a0599745
MD
33#include "libguile/variable.h"
34#include "libguile/alist.h"
7e73eaee 35#include "libguile/fluids.h"
a0599745
MD
36#include "libguile/strings.h"
37#include "libguile/vectors.h"
7887be7d 38#include "libguile/weak-set.h"
eb8db440 39#include "libguile/modules.h"
1206efbe
MV
40#include "libguile/read.h"
41#include "libguile/srfi-13.h"
a0599745
MD
42
43#include "libguile/validate.h"
44#include "libguile/symbols.h"
0f2d19dd 45
22fc179a
HWN
46#include "libguile/private-options.h"
47
48
95b88819
GH
49#ifdef HAVE_STRING_H
50#include <string.h>
51#endif
52
0f2d19dd
JB
53\f
54
0f979f3f
DH
55static SCM symbols;
56
a4c91488
MD
57#ifdef GUILE_DEBUG
58SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
59 (),
60 "Return the system symbol obarray.")
61#define FUNC_NAME s_scm_sys_symbols
62{
63 return symbols;
64}
65#undef FUNC_NAME
66#endif
67
0f979f3f
DH
68\f
69
0f2d19dd
JB
70/* {Symbols}
71 */
72
c35738c1
MD
73unsigned long
74scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
75{
3ee86942 76 return scm_i_symbol_hash (obj) % n;
c35738c1 77}
1cc91f1b 78
e0c83bf5
AW
79struct string_lookup_data
80{
17072fd2 81 SCM string;
e0c83bf5
AW
82 unsigned long string_hash;
83};
84
17072fd2
AW
85static int
86string_lookup_predicate_fn (SCM sym, void *closure)
e0c83bf5
AW
87{
88 struct string_lookup_data *data = closure;
89
17072fd2
AW
90 if (scm_i_symbol_hash (sym) == data->string_hash
91 && scm_i_symbol_length (sym) == scm_i_string_length (data->string))
fd0a5bbc 92 {
17072fd2
AW
93 size_t n = scm_i_symbol_length (sym);
94 while (n--)
95 if (scm_i_symbol_ref (sym, n) != scm_i_string_ref (data->string, n))
96 return 0;
97 return 1;
fd0a5bbc 98 }
17072fd2
AW
99 else
100 return 0;
e0c83bf5 101}
488b10b5 102
e0c83bf5
AW
103static SCM
104lookup_interned_symbol (SCM name, unsigned long raw_hash)
105{
106 struct string_lookup_data data;
e0c83bf5 107
17072fd2 108 data.string = name;
e0c83bf5
AW
109 data.string_hash = raw_hash;
110
7887be7d
AW
111 return scm_c_weak_set_lookup (symbols, raw_hash,
112 string_lookup_predicate_fn,
113 &data, SCM_BOOL_F);
fd0a5bbc 114}
3ee86942 115
30c282bf
AW
116struct latin1_lookup_data
117{
118 const char *str;
119 size_t len;
120 unsigned long string_hash;
121};
122
123static int
124latin1_lookup_predicate_fn (SCM sym, void *closure)
125{
126 struct latin1_lookup_data *data = closure;
127
128 return scm_i_symbol_hash (sym) == data->string_hash
129 && scm_i_is_narrow_symbol (sym)
130 && scm_i_symbol_length (sym) == data->len
131 && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0;
132}
133
134static SCM
135lookup_interned_latin1_symbol (const char *str, size_t len,
136 unsigned long raw_hash)
137{
138 struct latin1_lookup_data data;
30c282bf
AW
139
140 data.str = str;
141 data.len = len;
142 data.string_hash = raw_hash;
143
7887be7d
AW
144 return scm_c_weak_set_lookup (symbols, raw_hash,
145 latin1_lookup_predicate_fn,
146 &data, SCM_BOOL_F);
30c282bf
AW
147}
148
f80d15c5
AW
149struct utf8_lookup_data
150{
151 const char *str;
152 size_t len;
153 unsigned long string_hash;
154};
155
156static int
157utf8_string_equals_wide_string (const scm_t_uint8 *narrow, size_t nlen,
158 const scm_t_wchar *wide, size_t wlen)
159{
160 size_t byte_idx = 0, char_idx = 0;
161
162 while (byte_idx < nlen && char_idx < wlen)
163 {
164 ucs4_t c;
165 int nbytes;
166
167 nbytes = u8_mbtouc (&c, narrow + byte_idx, nlen - byte_idx);
168 if (nbytes == 0)
169 break;
8c76a897 170 else if (c == 0xfffd)
f80d15c5
AW
171 /* Bad UTF-8. */
172 return 0;
173 else if (c != wide[char_idx])
174 return 0;
175
176 byte_idx += nbytes;
177 char_idx++;
178 }
179
180 return byte_idx == nlen && char_idx == wlen;
181}
182
183static int
184utf8_lookup_predicate_fn (SCM sym, void *closure)
185{
186 struct utf8_lookup_data *data = closure;
187
188 if (scm_i_symbol_hash (sym) != data->string_hash)
189 return 0;
190
191 if (scm_i_is_narrow_symbol (sym))
192 return (scm_i_symbol_length (sym) == data->len
193 && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0);
194 else
195 return utf8_string_equals_wide_string ((const scm_t_uint8 *) data->str,
196 data->len,
197 scm_i_symbol_wide_chars (sym),
198 scm_i_symbol_length (sym));
199}
200
201static SCM
202lookup_interned_utf8_symbol (const char *str, size_t len,
203 unsigned long raw_hash)
204{
205 struct utf8_lookup_data data;
206
207 data.str = str;
208 data.len = len;
209 data.string_hash = raw_hash;
210
211 return scm_c_weak_set_lookup (symbols, raw_hash,
212 utf8_lookup_predicate_fn,
213 &data, SCM_BOOL_F);
214}
215
7887be7d
AW
216static int
217symbol_lookup_predicate_fn (SCM sym, void *closure)
e0c83bf5 218{
21041372 219 SCM other = SCM_PACK_POINTER (closure);
e0c83bf5 220
7887be7d
AW
221 if (scm_i_symbol_hash (sym) == scm_i_symbol_hash (other)
222 && scm_i_symbol_length (sym) == scm_i_symbol_length (other))
e0c83bf5 223 {
7887be7d
AW
224 if (scm_i_is_narrow_symbol (sym))
225 return scm_i_is_narrow_symbol (other)
226 && (strncmp (scm_i_symbol_chars (sym),
227 scm_i_symbol_chars (other),
228 scm_i_symbol_length (other)) == 0);
229 else
230 return scm_is_true
231 (scm_string_equal_p (scm_symbol_to_string (sym),
232 scm_symbol_to_string (other)));
e0c83bf5 233 }
7887be7d 234 return 0;
e0c83bf5 235}
7887be7d 236
fd0a5bbc 237static SCM
e23106d5 238scm_i_str2symbol (SCM str)
fd0a5bbc
HWN
239{
240 SCM symbol;
e23106d5 241 size_t raw_hash = scm_i_string_hash (str);
b52e071b 242
e23106d5 243 symbol = lookup_interned_symbol (str, raw_hash);
e0c83bf5
AW
244 if (scm_is_true (symbol))
245 return symbol;
246 else
05588a1a
LC
247 {
248 /* The symbol was not found, create it. */
249 symbol = scm_i_make_symbol (str, 0, raw_hash,
250 scm_cons (SCM_BOOL_F, SCM_EOL));
7887be7d
AW
251
252 /* Might return a different symbol, if another one was interned at
253 the same time. */
254 return scm_c_weak_set_add_x (symbols, raw_hash,
255 symbol_lookup_predicate_fn,
21041372 256 SCM_UNPACK_POINTER (symbol), symbol);
05588a1a 257 }
b52e071b
DH
258}
259
fd0a5bbc 260
3ee86942 261static SCM
e23106d5 262scm_i_str2uninterned_symbol (SCM str)
ac48757b 263{
e23106d5 264 size_t raw_hash = scm_i_string_hash (str);
3ee86942 265
6869328b
MV
266 return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED,
267 raw_hash, scm_cons (SCM_BOOL_F, SCM_EOL));
b52e071b
DH
268}
269
3b3b36dd 270SCM_DEFINE (scm_symbol_p, "symbol?", 1, 0, 0,
8e93e199 271 (SCM obj),
1e6808ea
MG
272 "Return @code{#t} if @var{obj} is a symbol, otherwise return\n"
273 "@code{#f}.")
1bbd0b84 274#define FUNC_NAME s_scm_symbol_p
0f2d19dd 275{
3ee86942 276 return scm_from_bool (scm_is_symbol (obj));
0f2d19dd 277}
1bbd0b84 278#undef FUNC_NAME
0f2d19dd 279
ac48757b
MV
280SCM_DEFINE (scm_symbol_interned_p, "symbol-interned?", 1, 0, 0,
281 (SCM symbol),
282 "Return @code{#t} if @var{symbol} is interned, otherwise return\n"
283 "@code{#f}.")
284#define FUNC_NAME s_scm_symbol_interned_p
285{
286 SCM_VALIDATE_SYMBOL (1, symbol);
3ee86942 287 return scm_from_bool (scm_i_symbol_is_interned (symbol));
ac48757b
MV
288}
289#undef FUNC_NAME
290
291SCM_DEFINE (scm_make_symbol, "make-symbol", 1, 0, 0,
292 (SCM name),
293 "Return a new uninterned symbol with the name @var{name}. "
294 "The returned symbol is guaranteed to be unique and future "
d58d5bfc 295 "calls to @code{string->symbol} will not return it.")
ac48757b
MV
296#define FUNC_NAME s_scm_make_symbol
297{
ac48757b 298 SCM_VALIDATE_STRING (1, name);
e23106d5 299 return scm_i_str2uninterned_symbol (name);
ac48757b
MV
300}
301#undef FUNC_NAME
302
3b3b36dd 303SCM_DEFINE (scm_symbol_to_string, "symbol->string", 1, 0, 0,
1bbd0b84 304 (SCM s),
1e6808ea
MG
305 "Return the name of @var{symbol} as a string. If the symbol was\n"
306 "part of an object returned as the value of a literal expression\n"
7a095584 307 "(section @pxref{Literal expressions,,,r5rs, The Revised^5\n"
1e6808ea
MG
308 "Report on Scheme}) or by a call to the @code{read} procedure,\n"
309 "and its name contains alphabetic characters, then the string\n"
310 "returned will contain characters in the implementation's\n"
311 "preferred standard case---some implementations will prefer\n"
312 "upper case, others lower case. If the symbol was returned by\n"
313 "@code{string->symbol}, the case of characters in the string\n"
314 "returned will be the same as the case in the string that was\n"
315 "passed to @code{string->symbol}. It is an error to apply\n"
316 "mutation procedures like @code{string-set!} to strings returned\n"
317 "by this procedure.\n"
318 "\n"
942e5b91 319 "The following examples assume that the implementation's\n"
1e6808ea
MG
320 "standard case is lower case:\n"
321 "\n"
942e5b91 322 "@lisp\n"
1e6808ea
MG
323 "(symbol->string 'flying-fish) @result{} \"flying-fish\"\n"
324 "(symbol->string 'Martin) @result{} \"martin\"\n"
5ffe9968 325 "(symbol->string\n"
942e5b91
MG
326 " (string->symbol \"Malvina\")) @result{} \"Malvina\"\n"
327 "@end lisp")
1bbd0b84 328#define FUNC_NAME s_scm_symbol_to_string
0f2d19dd 329{
28b06554 330 SCM_VALIDATE_SYMBOL (1, s);
3ee86942 331 return scm_i_symbol_substring (s, 0, scm_i_symbol_length (s));
0f2d19dd 332}
1bbd0b84 333#undef FUNC_NAME
0f2d19dd
JB
334
335
3b3b36dd 336SCM_DEFINE (scm_string_to_symbol, "string->symbol", 1, 0, 0,
1e6808ea
MG
337 (SCM string),
338 "Return the symbol whose name is @var{string}. This procedure\n"
942e5b91
MG
339 "can create symbols with names containing special characters or\n"
340 "letters in the non-standard case, but it is usually a bad idea\n"
1e6808ea
MG
341 "to create such symbols because in some implementations of\n"
342 "Scheme they cannot be read as themselves. See\n"
343 "@code{symbol->string}.\n"
344 "\n"
942e5b91 345 "The following examples assume that the implementation's\n"
1e6808ea
MG
346 "standard case is lower case:\n"
347 "\n"
942e5b91
MG
348 "@lisp\n"
349 "(eq? 'mISSISSIppi 'mississippi) @result{} #t\n"
350 "(string->symbol \"mISSISSIppi\") @result{} @r{the symbol with name \"mISSISSIppi\"}\n"
351 "(eq? 'bitBlt (string->symbol \"bitBlt\")) @result{} #f\n"
352 "(eq? 'JollyWog\n"
353 " (string->symbol (symbol->string 'JollyWog))) @result{} #t\n"
354 "(string=? \"K. Harper, M.D.\"\n"
355 " (symbol->string\n"
356 " (string->symbol \"K. Harper, M.D.\"))) @result{}#t\n"
357 "@end lisp")
1bbd0b84 358#define FUNC_NAME s_scm_string_to_symbol
0f2d19dd 359{
1e6808ea 360 SCM_VALIDATE_STRING (1, string);
e23106d5 361 return scm_i_str2symbol (string);
0f2d19dd 362}
1bbd0b84 363#undef FUNC_NAME
0f2d19dd 364
1206efbe
MV
365SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0,
366 (SCM str),
367 "Return the symbol whose name is @var{str}. @var{str} is\n"
368 "converted to lowercase before the conversion is done, if Guile\n"
369 "is currently reading symbols case-insensitively.")
370#define FUNC_NAME s_scm_string_ci_to_symbol
371{
372 return scm_string_to_symbol (SCM_CASE_INSENSITIVE_P
373 ? scm_string_downcase(str)
374 : str);
375}
376#undef FUNC_NAME
377
ceed7709
LC
378/* The default prefix for `gensym'd symbols. */
379static SCM default_gensym_prefix;
380
4496c9c1 381#define MAX_PREFIX_LENGTH 30
0f2d19dd 382
86d31dfe
MV
383SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
384 (SCM prefix),
385 "Create a new symbol with a name constructed from a prefix and\n"
386 "a counter value. The string @var{prefix} can be specified as\n"
68dc153d 387 "an optional argument. Default prefix is @code{ g}. The counter\n"
86d31dfe
MV
388 "is increased by 1 at each call. There is no provision for\n"
389 "resetting the counter.")
390#define FUNC_NAME s_scm_gensym
0f2d19dd 391{
4496c9c1
AW
392 static int gensym_counter = 0;
393
3ee86942 394 SCM suffix, name;
4496c9c1
AW
395 int n, n_digits;
396 char buf[SCM_INTBUFLEN];
7426a638 397
86d31dfe 398 if (SCM_UNBNDP (prefix))
ceed7709
LC
399 prefix = default_gensym_prefix;
400
4496c9c1
AW
401 /* mutex in case another thread looks and incs at the exact same moment */
402 scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex);
403 n = gensym_counter++;
404 scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
3ee86942 405
4496c9c1
AW
406 n_digits = scm_iint2str (n, 10, buf);
407 suffix = scm_from_latin1_stringn (buf, n_digits);
3ee86942
MV
408 name = scm_string_append (scm_list_2 (prefix, suffix));
409 return scm_string_to_symbol (name);
0f2d19dd 410}
1bbd0b84 411#undef FUNC_NAME
0f2d19dd 412
86d31dfe
MV
413SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0,
414 (SCM symbol),
415 "Return a hash value for @var{symbol}.")
416#define FUNC_NAME s_scm_symbol_hash
0f2d19dd 417{
86d31dfe 418 SCM_VALIDATE_SYMBOL (1, symbol);
3ee86942 419 return scm_from_ulong (scm_i_symbol_hash (symbol));
0f2d19dd 420}
1bbd0b84 421#undef FUNC_NAME
0f2d19dd 422
3b3b36dd 423SCM_DEFINE (scm_symbol_fref, "symbol-fref", 1, 0, 0,
1bbd0b84 424 (SCM s),
b7e64f8b 425 "Return the contents of the symbol @var{s}'s @dfn{function slot}.")
1bbd0b84 426#define FUNC_NAME s_scm_symbol_fref
0f2d19dd 427{
34d19ef6 428 SCM_VALIDATE_SYMBOL (1, s);
3ee86942 429 return SCM_CAR (SCM_CELL_OBJECT_3 (s));
0f2d19dd 430}
1bbd0b84 431#undef FUNC_NAME
0f2d19dd
JB
432
433
3b3b36dd 434SCM_DEFINE (scm_symbol_pref, "symbol-pref", 1, 0, 0,
1bbd0b84 435 (SCM s),
b7e64f8b
BT
436 "Return the @dfn{property list} currently associated with the\n"
437 "symbol @var{s}.")
1bbd0b84 438#define FUNC_NAME s_scm_symbol_pref
0f2d19dd 439{
34d19ef6 440 SCM_VALIDATE_SYMBOL (1, s);
3ee86942 441 return SCM_CDR (SCM_CELL_OBJECT_3 (s));
0f2d19dd 442}
1bbd0b84 443#undef FUNC_NAME
0f2d19dd
JB
444
445
3b3b36dd 446SCM_DEFINE (scm_symbol_fset_x, "symbol-fset!", 2, 0, 0,
1bbd0b84 447 (SCM s, SCM val),
b7e64f8b 448 "Change the binding of the symbol @var{s}'s function slot.")
1bbd0b84 449#define FUNC_NAME s_scm_symbol_fset_x
0f2d19dd 450{
34d19ef6 451 SCM_VALIDATE_SYMBOL (1, s);
3ee86942 452 SCM_SETCAR (SCM_CELL_OBJECT_3 (s), val);
0f2d19dd
JB
453 return SCM_UNSPECIFIED;
454}
1bbd0b84 455#undef FUNC_NAME
0f2d19dd
JB
456
457
3b3b36dd 458SCM_DEFINE (scm_symbol_pset_x, "symbol-pset!", 2, 0, 0,
1bbd0b84 459 (SCM s, SCM val),
b7e64f8b 460 "Change the binding of the symbol @var{s}'s property slot.")
1bbd0b84 461#define FUNC_NAME s_scm_symbol_pset_x
0f2d19dd 462{
34d19ef6 463 SCM_VALIDATE_SYMBOL (1, s);
3ee86942 464 SCM_SETCDR (SCM_CELL_OBJECT_3 (s), val);
0f2d19dd
JB
465 return SCM_UNSPECIFIED;
466}
1bbd0b84 467#undef FUNC_NAME
0f2d19dd 468
3ee86942
MV
469SCM
470scm_from_locale_symbol (const char *sym)
af68e5e5 471{
e23106d5 472 return scm_from_locale_symboln (sym, -1);
af68e5e5 473}
af68e5e5 474
3ee86942
MV
475SCM
476scm_from_locale_symboln (const char *sym, size_t len)
477{
e23106d5
MG
478 SCM str = scm_from_locale_stringn (sym, len);
479 return scm_i_str2symbol (str);
fd0a5bbc
HWN
480}
481
482SCM
483scm_take_locale_symboln (char *sym, size_t len)
484{
e23106d5 485 SCM str;
fd0a5bbc 486
e23106d5
MG
487 str = scm_take_locale_stringn (sym, len);
488 return scm_i_str2symbol (str);
fd0a5bbc
HWN
489}
490
491SCM
492scm_take_locale_symbol (char *sym)
493{
494 return scm_take_locale_symboln (sym, (size_t)-1);
3ee86942 495}
af68e5e5 496
ad5cbc47
AW
497SCM
498scm_from_latin1_symbol (const char *sym)
499{
500 return scm_from_latin1_symboln (sym, -1);
501}
502
503SCM
504scm_from_latin1_symboln (const char *sym, size_t len)
505{
30c282bf
AW
506 unsigned long hash;
507 SCM ret;
508
509 if (len == (size_t) -1)
510 len = strlen (sym);
511 hash = scm_i_latin1_string_hash (sym, len);
512
513 ret = lookup_interned_latin1_symbol (sym, len, hash);
514 if (scm_is_false (ret))
515 {
516 SCM str = scm_from_latin1_stringn (sym, len);
517 ret = scm_i_str2symbol (str);
518 }
519
520 return ret;
ad5cbc47
AW
521}
522
523SCM
524scm_from_utf8_symbol (const char *sym)
525{
526 return scm_from_utf8_symboln (sym, -1);
527}
528
529SCM
530scm_from_utf8_symboln (const char *sym, size_t len)
531{
f80d15c5
AW
532 unsigned long hash;
533 SCM ret;
534
535 if (len == (size_t) -1)
536 len = strlen (sym);
537 hash = scm_i_utf8_string_hash (sym, len);
538
539 ret = lookup_interned_utf8_symbol (sym, len, hash);
540 if (scm_is_false (ret))
541 {
542 SCM str = scm_from_utf8_stringn (sym, len);
543 ret = scm_i_str2symbol (str);
544 }
545
546 return ret;
ad5cbc47
AW
547}
548
0f979f3f
DH
549void
550scm_symbols_prehistory ()
551{
7887be7d 552 symbols = scm_c_make_weak_set (5000);
0f979f3f
DH
553}
554
555
0f2d19dd
JB
556void
557scm_init_symbols ()
0f2d19dd 558{
a0599745 559#include "libguile/symbols.x"
ceed7709
LC
560
561 default_gensym_prefix = scm_from_latin1_string (" g");
0f2d19dd 562}
89e00824
ML
563
564/*
565 Local Variables:
566 c-file-style: "gnu"
567 End:
568*/