add scm_{to,from}_{utf8,latin1}_string{n,}
[bpt/guile.git] / libguile / symbols.c
CommitLineData
05588a1a 1/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2003, 2004, 2006, 2009 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e 3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
0f2d19dd 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd 20\f
dbb605f5 21#ifdef HAVE_CONFIG_H
cf007485
RB
22# include <config.h>
23#endif
0f2d19dd 24
a0599745
MD
25#include "libguile/_scm.h"
26#include "libguile/chars.h"
27#include "libguile/eval.h"
ba393257 28#include "libguile/hash.h"
fb43bf74 29#include "libguile/smob.h"
a0599745
MD
30#include "libguile/variable.h"
31#include "libguile/alist.h"
7e73eaee 32#include "libguile/fluids.h"
a0599745
MD
33#include "libguile/strings.h"
34#include "libguile/vectors.h"
00ffa0e7 35#include "libguile/hashtab.h"
a0599745 36#include "libguile/weaks.h"
eb8db440 37#include "libguile/modules.h"
1206efbe
MV
38#include "libguile/read.h"
39#include "libguile/srfi-13.h"
a0599745
MD
40
41#include "libguile/validate.h"
42#include "libguile/symbols.h"
0f2d19dd 43
22fc179a
HWN
44#include "libguile/private-options.h"
45
46
95b88819
GH
47#ifdef HAVE_STRING_H
48#include <string.h>
49#endif
50
0f2d19dd
JB
51\f
52
0f979f3f
DH
53static SCM symbols;
54
a4c91488
MD
55#ifdef GUILE_DEBUG
56SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
57 (),
58 "Return the system symbol obarray.")
59#define FUNC_NAME s_scm_sys_symbols
60{
61 return symbols;
62}
63#undef FUNC_NAME
64#endif
65
0f979f3f
DH
66\f
67
0f2d19dd
JB
68/* {Symbols}
69 */
70
c35738c1
MD
71/* In order to optimize reading speed, this function breaks part of
72 * the hashtable abstraction. The optimizations are:
73 *
74 * 1. The argument string can be compared directly to symbol objects
75 * without first creating an SCM string object. (This would have
76 * been necessary if we had used the hashtable API in hashtab.h.)
77 *
3ee86942 78 * 2. We can use the raw hash value stored in scm_i_symbol_hash (sym)
c35738c1
MD
79 * to speed up lookup.
80 *
81 * Both optimizations might be possible without breaking the
82 * abstraction if the API in hashtab.c is improved.
83 */
84
85unsigned long
86scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
87{
3ee86942 88 return scm_i_symbol_hash (obj) % n;
c35738c1 89}
1cc91f1b 90
3ee86942 91static SCM
e23106d5 92lookup_interned_symbol (SCM name, unsigned long raw_hash)
b52e071b 93{
fd0a5bbc 94 /* Try to find the symbol in the symbols table */
488b10b5
LC
95 SCM result = SCM_BOOL_F;
96 SCM bucket, elt, previous_elt;
7af53150 97 size_t len;
fd0a5bbc
HWN
98 unsigned long hash = raw_hash % SCM_HASHTABLE_N_BUCKETS (symbols);
99
7af53150 100 len = scm_i_string_length (name);
488b10b5 101 bucket = SCM_HASHTABLE_BUCKET (symbols, hash);
7af53150 102
488b10b5
LC
103 for (elt = bucket, previous_elt = SCM_BOOL_F;
104 !scm_is_null (elt);
105 previous_elt = elt, elt = SCM_CDR (elt))
fd0a5bbc 106 {
3a2de079
LC
107 SCM pair, sym;
108
488b10b5 109 pair = SCM_CAR (elt);
3a2de079
LC
110 if (!scm_is_pair (pair))
111 abort ();
488b10b5
LC
112
113 if (SCM_WEAK_PAIR_CAR_DELETED_P (pair))
114 {
115 /* PAIR is a weak pair whose key got nullified: remove it from
116 BUCKET. */
117 /* FIXME: Since this is done lazily, i.e., only when a new symbol
118 is to be inserted in a bucket containing deleted symbols, the
119 number of items in the hash table may remain erroneous for some
120 time, thus precluding proper rehashing. */
121 if (previous_elt != SCM_BOOL_F)
122 SCM_SETCDR (previous_elt, SCM_CDR (elt));
123 else
124 bucket = SCM_CDR (elt);
125
126 SCM_HASHTABLE_DECREMENT (symbols);
127 continue;
128 }
3a2de079
LC
129
130 sym = SCM_CAR (pair);
131
fd0a5bbc
HWN
132 if (scm_i_symbol_hash (sym) == raw_hash
133 && scm_i_symbol_length (sym) == len)
134 {
e23106d5
MG
135 size_t i = len;
136
137 /* Slightly faster path for comparing narrow to narrow. */
138 if (scm_i_is_narrow_string (name) && scm_i_is_narrow_symbol (sym))
139 {
140 const char *chrs = scm_i_symbol_chars (sym);
141 const char *str = scm_i_string_chars (name);
142
143 while (i != 0)
144 {
145 --i;
146 if (str[i] != chrs[i])
147 goto next_symbol;
148 }
149 }
150 else
151 {
152 /* Somewhat slower path for comparing narrow to wide or
153 wide to wide. */
154 while (i != 0)
155 {
156 --i;
157 if (scm_i_string_ref (name, i) != scm_i_symbol_ref (sym, i))
158 goto next_symbol;
159 }
160 }
fd0a5bbc 161
488b10b5
LC
162 /* We found it. */
163 result = sym;
164 break;
fd0a5bbc
HWN
165 }
166 next_symbol:
167 ;
168 }
169
488b10b5
LC
170 if (SCM_HASHTABLE_N_ITEMS (symbols) < SCM_HASHTABLE_LOWER (symbols))
171 /* We removed many symbols in this pass so trigger a rehashing. */
172 scm_i_rehash (symbols, scm_i_hash_symbol, 0, "lookup_interned_symbol");
173
174 return result;
fd0a5bbc 175}
3ee86942 176
05588a1a
LC
177/* Intern SYMBOL, an uninterned symbol. */
178static void
179intern_symbol (SCM symbol)
180{
181 SCM slot, cell;
182 unsigned long hash;
183
184 hash = scm_i_symbol_hash (symbol) % SCM_HASHTABLE_N_BUCKETS (symbols);
185 slot = SCM_HASHTABLE_BUCKET (symbols, hash);
186 cell = scm_cons (symbol, SCM_UNDEFINED);
187
188 SCM_SET_HASHTABLE_BUCKET (symbols, hash, scm_cons (cell, slot));
189 SCM_HASHTABLE_INCREMENT (symbols);
190
191 if (SCM_HASHTABLE_N_ITEMS (symbols) > SCM_HASHTABLE_UPPER (symbols))
192 scm_i_rehash (symbols, scm_i_hash_symbol, 0, "intern_symbol");
193}
194
fd0a5bbc 195static SCM
e23106d5 196scm_i_str2symbol (SCM str)
fd0a5bbc
HWN
197{
198 SCM symbol;
e23106d5 199 size_t raw_hash = scm_i_string_hash (str);
b52e071b 200
e23106d5 201 symbol = lookup_interned_symbol (str, raw_hash);
05588a1a
LC
202 if (scm_is_false (symbol))
203 {
204 /* The symbol was not found, create it. */
205 symbol = scm_i_make_symbol (str, 0, raw_hash,
206 scm_cons (SCM_BOOL_F, SCM_EOL));
207 intern_symbol (symbol);
208 }
b52e071b 209
05588a1a 210 return symbol;
b52e071b
DH
211}
212
fd0a5bbc 213
3ee86942 214static SCM
e23106d5 215scm_i_str2uninterned_symbol (SCM str)
ac48757b 216{
e23106d5 217 size_t raw_hash = scm_i_string_hash (str);
3ee86942 218
6869328b
MV
219 return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED,
220 raw_hash, scm_cons (SCM_BOOL_F, SCM_EOL));
b52e071b
DH
221}
222
3b3b36dd 223SCM_DEFINE (scm_symbol_p, "symbol?", 1, 0, 0,
8e93e199 224 (SCM obj),
1e6808ea
MG
225 "Return @code{#t} if @var{obj} is a symbol, otherwise return\n"
226 "@code{#f}.")
1bbd0b84 227#define FUNC_NAME s_scm_symbol_p
0f2d19dd 228{
3ee86942 229 return scm_from_bool (scm_is_symbol (obj));
0f2d19dd 230}
1bbd0b84 231#undef FUNC_NAME
0f2d19dd 232
ac48757b
MV
233SCM_DEFINE (scm_symbol_interned_p, "symbol-interned?", 1, 0, 0,
234 (SCM symbol),
235 "Return @code{#t} if @var{symbol} is interned, otherwise return\n"
236 "@code{#f}.")
237#define FUNC_NAME s_scm_symbol_interned_p
238{
239 SCM_VALIDATE_SYMBOL (1, symbol);
3ee86942 240 return scm_from_bool (scm_i_symbol_is_interned (symbol));
ac48757b
MV
241}
242#undef FUNC_NAME
243
244SCM_DEFINE (scm_make_symbol, "make-symbol", 1, 0, 0,
245 (SCM name),
246 "Return a new uninterned symbol with the name @var{name}. "
247 "The returned symbol is guaranteed to be unique and future "
d58d5bfc 248 "calls to @code{string->symbol} will not return it.")
ac48757b
MV
249#define FUNC_NAME s_scm_make_symbol
250{
ac48757b 251 SCM_VALIDATE_STRING (1, name);
e23106d5 252 return scm_i_str2uninterned_symbol (name);
ac48757b
MV
253}
254#undef FUNC_NAME
255
3b3b36dd 256SCM_DEFINE (scm_symbol_to_string, "symbol->string", 1, 0, 0,
1bbd0b84 257 (SCM s),
1e6808ea
MG
258 "Return the name of @var{symbol} as a string. If the symbol was\n"
259 "part of an object returned as the value of a literal expression\n"
7a095584 260 "(section @pxref{Literal expressions,,,r5rs, The Revised^5\n"
1e6808ea
MG
261 "Report on Scheme}) or by a call to the @code{read} procedure,\n"
262 "and its name contains alphabetic characters, then the string\n"
263 "returned will contain characters in the implementation's\n"
264 "preferred standard case---some implementations will prefer\n"
265 "upper case, others lower case. If the symbol was returned by\n"
266 "@code{string->symbol}, the case of characters in the string\n"
267 "returned will be the same as the case in the string that was\n"
268 "passed to @code{string->symbol}. It is an error to apply\n"
269 "mutation procedures like @code{string-set!} to strings returned\n"
270 "by this procedure.\n"
271 "\n"
942e5b91 272 "The following examples assume that the implementation's\n"
1e6808ea
MG
273 "standard case is lower case:\n"
274 "\n"
942e5b91 275 "@lisp\n"
1e6808ea
MG
276 "(symbol->string 'flying-fish) @result{} \"flying-fish\"\n"
277 "(symbol->string 'Martin) @result{} \"martin\"\n"
5ffe9968 278 "(symbol->string\n"
942e5b91
MG
279 " (string->symbol \"Malvina\")) @result{} \"Malvina\"\n"
280 "@end lisp")
1bbd0b84 281#define FUNC_NAME s_scm_symbol_to_string
0f2d19dd 282{
28b06554 283 SCM_VALIDATE_SYMBOL (1, s);
3ee86942 284 return scm_i_symbol_substring (s, 0, scm_i_symbol_length (s));
0f2d19dd 285}
1bbd0b84 286#undef FUNC_NAME
0f2d19dd
JB
287
288
3b3b36dd 289SCM_DEFINE (scm_string_to_symbol, "string->symbol", 1, 0, 0,
1e6808ea
MG
290 (SCM string),
291 "Return the symbol whose name is @var{string}. This procedure\n"
942e5b91
MG
292 "can create symbols with names containing special characters or\n"
293 "letters in the non-standard case, but it is usually a bad idea\n"
1e6808ea
MG
294 "to create such symbols because in some implementations of\n"
295 "Scheme they cannot be read as themselves. See\n"
296 "@code{symbol->string}.\n"
297 "\n"
942e5b91 298 "The following examples assume that the implementation's\n"
1e6808ea
MG
299 "standard case is lower case:\n"
300 "\n"
942e5b91
MG
301 "@lisp\n"
302 "(eq? 'mISSISSIppi 'mississippi) @result{} #t\n"
303 "(string->symbol \"mISSISSIppi\") @result{} @r{the symbol with name \"mISSISSIppi\"}\n"
304 "(eq? 'bitBlt (string->symbol \"bitBlt\")) @result{} #f\n"
305 "(eq? 'JollyWog\n"
306 " (string->symbol (symbol->string 'JollyWog))) @result{} #t\n"
307 "(string=? \"K. Harper, M.D.\"\n"
308 " (symbol->string\n"
309 " (string->symbol \"K. Harper, M.D.\"))) @result{}#t\n"
310 "@end lisp")
1bbd0b84 311#define FUNC_NAME s_scm_string_to_symbol
0f2d19dd 312{
1e6808ea 313 SCM_VALIDATE_STRING (1, string);
e23106d5 314 return scm_i_str2symbol (string);
0f2d19dd 315}
1bbd0b84 316#undef FUNC_NAME
0f2d19dd 317
1206efbe
MV
318SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0,
319 (SCM str),
320 "Return the symbol whose name is @var{str}. @var{str} is\n"
321 "converted to lowercase before the conversion is done, if Guile\n"
322 "is currently reading symbols case-insensitively.")
323#define FUNC_NAME s_scm_string_ci_to_symbol
324{
325 return scm_string_to_symbol (SCM_CASE_INSENSITIVE_P
326 ? scm_string_downcase(str)
327 : str);
328}
329#undef FUNC_NAME
330
86d31dfe 331#define MAX_PREFIX_LENGTH 30
0f2d19dd 332
86d31dfe
MV
333SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
334 (SCM prefix),
335 "Create a new symbol with a name constructed from a prefix and\n"
336 "a counter value. The string @var{prefix} can be specified as\n"
68dc153d 337 "an optional argument. Default prefix is @code{ g}. The counter\n"
86d31dfe
MV
338 "is increased by 1 at each call. There is no provision for\n"
339 "resetting the counter.")
340#define FUNC_NAME s_scm_gensym
0f2d19dd 341{
7426a638 342 static int gensym_counter = 0;
3ee86942
MV
343
344 SCM suffix, name;
345 int n, n_digits;
346 char buf[SCM_INTBUFLEN];
7426a638 347
86d31dfe 348 if (SCM_UNBNDP (prefix))
3ee86942
MV
349 prefix = scm_from_locale_string (" g");
350
351 /* mutex in case another thread looks and incs at the exact same moment */
9de87eea 352 scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex);
3ee86942 353 n = gensym_counter++;
9de87eea 354 scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
3ee86942
MV
355
356 n_digits = scm_iint2str (n, 10, buf);
357 suffix = scm_from_locale_stringn (buf, n_digits);
358 name = scm_string_append (scm_list_2 (prefix, suffix));
359 return scm_string_to_symbol (name);
0f2d19dd 360}
1bbd0b84 361#undef FUNC_NAME
0f2d19dd 362
86d31dfe
MV
363SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0,
364 (SCM symbol),
365 "Return a hash value for @var{symbol}.")
366#define FUNC_NAME s_scm_symbol_hash
0f2d19dd 367{
86d31dfe 368 SCM_VALIDATE_SYMBOL (1, symbol);
3ee86942 369 return scm_from_ulong (scm_i_symbol_hash (symbol));
0f2d19dd 370}
1bbd0b84 371#undef FUNC_NAME
0f2d19dd 372
3b3b36dd 373SCM_DEFINE (scm_symbol_fref, "symbol-fref", 1, 0, 0,
1bbd0b84 374 (SCM s),
b380b885 375 "Return the contents of @var{symbol}'s @dfn{function slot}.")
1bbd0b84 376#define FUNC_NAME s_scm_symbol_fref
0f2d19dd 377{
34d19ef6 378 SCM_VALIDATE_SYMBOL (1, s);
3ee86942 379 return SCM_CAR (SCM_CELL_OBJECT_3 (s));
0f2d19dd 380}
1bbd0b84 381#undef FUNC_NAME
0f2d19dd
JB
382
383
3b3b36dd 384SCM_DEFINE (scm_symbol_pref, "symbol-pref", 1, 0, 0,
1bbd0b84 385 (SCM s),
b380b885 386 "Return the @dfn{property list} currently associated with @var{symbol}.")
1bbd0b84 387#define FUNC_NAME s_scm_symbol_pref
0f2d19dd 388{
34d19ef6 389 SCM_VALIDATE_SYMBOL (1, s);
3ee86942 390 return SCM_CDR (SCM_CELL_OBJECT_3 (s));
0f2d19dd 391}
1bbd0b84 392#undef FUNC_NAME
0f2d19dd
JB
393
394
3b3b36dd 395SCM_DEFINE (scm_symbol_fset_x, "symbol-fset!", 2, 0, 0,
1bbd0b84 396 (SCM s, SCM val),
b380b885 397 "Change the binding of @var{symbol}'s function slot.")
1bbd0b84 398#define FUNC_NAME s_scm_symbol_fset_x
0f2d19dd 399{
34d19ef6 400 SCM_VALIDATE_SYMBOL (1, s);
3ee86942 401 SCM_SETCAR (SCM_CELL_OBJECT_3 (s), val);
0f2d19dd
JB
402 return SCM_UNSPECIFIED;
403}
1bbd0b84 404#undef FUNC_NAME
0f2d19dd
JB
405
406
3b3b36dd 407SCM_DEFINE (scm_symbol_pset_x, "symbol-pset!", 2, 0, 0,
1bbd0b84 408 (SCM s, SCM val),
b380b885 409 "Change the binding of @var{symbol}'s property slot.")
1bbd0b84 410#define FUNC_NAME s_scm_symbol_pset_x
0f2d19dd 411{
34d19ef6 412 SCM_VALIDATE_SYMBOL (1, s);
3ee86942 413 SCM_SETCDR (SCM_CELL_OBJECT_3 (s), val);
0f2d19dd
JB
414 return SCM_UNSPECIFIED;
415}
1bbd0b84 416#undef FUNC_NAME
0f2d19dd 417
3ee86942
MV
418SCM
419scm_from_locale_symbol (const char *sym)
af68e5e5 420{
e23106d5 421 return scm_from_locale_symboln (sym, -1);
af68e5e5 422}
af68e5e5 423
3ee86942
MV
424SCM
425scm_from_locale_symboln (const char *sym, size_t len)
426{
e23106d5
MG
427 SCM str = scm_from_locale_stringn (sym, len);
428 return scm_i_str2symbol (str);
fd0a5bbc
HWN
429}
430
431SCM
432scm_take_locale_symboln (char *sym, size_t len)
433{
e23106d5 434 SCM str;
fd0a5bbc 435
e23106d5
MG
436 str = scm_take_locale_stringn (sym, len);
437 return scm_i_str2symbol (str);
fd0a5bbc
HWN
438}
439
440SCM
441scm_take_locale_symbol (char *sym)
442{
443 return scm_take_locale_symboln (sym, (size_t)-1);
3ee86942 444}
af68e5e5 445
0f979f3f
DH
446void
447scm_symbols_prehistory ()
448{
e11e83f3 449 symbols = scm_make_weak_key_hash_table (scm_from_int (2139));
0f979f3f
DH
450}
451
452
0f2d19dd
JB
453void
454scm_init_symbols ()
0f2d19dd 455{
a0599745 456#include "libguile/symbols.x"
0f2d19dd 457}
89e00824
ML
458
459/*
460 Local Variables:
461 c-file-style: "gnu"
462 End:
463*/