Add initial support for wide symbols
[bpt/guile.git] / libguile / symbols.c
1 /* Copyright (C) 1995,1996,1997,1998,2000,2001, 2003, 2004, 2006, 2009 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
17 */
18
19
20 \f
21 #ifdef HAVE_CONFIG_H
22 # include <config.h>
23 #endif
24
25 #include "libguile/_scm.h"
26 #include "libguile/chars.h"
27 #include "libguile/eval.h"
28 #include "libguile/hash.h"
29 #include "libguile/smob.h"
30 #include "libguile/variable.h"
31 #include "libguile/alist.h"
32 #include "libguile/fluids.h"
33 #include "libguile/strings.h"
34 #include "libguile/vectors.h"
35 #include "libguile/hashtab.h"
36 #include "libguile/weaks.h"
37 #include "libguile/modules.h"
38 #include "libguile/read.h"
39 #include "libguile/srfi-13.h"
40
41 #include "libguile/validate.h"
42 #include "libguile/symbols.h"
43
44 #include "libguile/private-options.h"
45
46
47 #ifdef HAVE_STRING_H
48 #include <string.h>
49 #endif
50
51 \f
52
53 static SCM symbols;
54
55 #ifdef GUILE_DEBUG
56 SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
57 (),
58 "Return the system symbol obarray.")
59 #define FUNC_NAME s_scm_sys_symbols
60 {
61 return symbols;
62 }
63 #undef FUNC_NAME
64 #endif
65
66 \f
67
68 /* {Symbols}
69 */
70
71 /* In order to optimize reading speed, this function breaks part of
72 * the hashtable abstraction. The optimizations are:
73 *
74 * 1. The argument string can be compared directly to symbol objects
75 * without first creating an SCM string object. (This would have
76 * been necessary if we had used the hashtable API in hashtab.h.)
77 *
78 * 2. We can use the raw hash value stored in scm_i_symbol_hash (sym)
79 * to speed up lookup.
80 *
81 * Both optimizations might be possible without breaking the
82 * abstraction if the API in hashtab.c is improved.
83 */
84
85 unsigned long
86 scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
87 {
88 return scm_i_symbol_hash (obj) % n;
89 }
90
91 static SCM
92 lookup_interned_symbol (SCM name, unsigned long raw_hash)
93 {
94 /* Try to find the symbol in the symbols table */
95 SCM l;
96 size_t len = scm_i_string_length (name);
97 unsigned long hash = raw_hash % SCM_HASHTABLE_N_BUCKETS (symbols);
98
99 for (l = SCM_HASHTABLE_BUCKET (symbols, hash);
100 !scm_is_null (l);
101 l = SCM_CDR (l))
102 {
103 SCM sym = SCM_CAAR (l);
104 if (scm_i_symbol_hash (sym) == raw_hash
105 && scm_i_symbol_length (sym) == len)
106 {
107 size_t i = len;
108
109 /* Slightly faster path for comparing narrow to narrow. */
110 if (scm_i_is_narrow_string (name) && scm_i_is_narrow_symbol (sym))
111 {
112 const char *chrs = scm_i_symbol_chars (sym);
113 const char *str = scm_i_string_chars (name);
114
115 while (i != 0)
116 {
117 --i;
118 if (str[i] != chrs[i])
119 goto next_symbol;
120 }
121 }
122 else
123 {
124 /* Somewhat slower path for comparing narrow to wide or
125 wide to wide. */
126 while (i != 0)
127 {
128 --i;
129 if (scm_i_string_ref (name, i) != scm_i_symbol_ref (sym, i))
130 goto next_symbol;
131 }
132 }
133
134 return sym;
135 }
136 next_symbol:
137 ;
138 }
139
140 return SCM_BOOL_F;
141 }
142
143 /* Intern SYMBOL, an uninterned symbol. */
144 static void
145 intern_symbol (SCM symbol)
146 {
147 SCM slot, cell;
148 unsigned long hash;
149
150 hash = scm_i_symbol_hash (symbol) % SCM_HASHTABLE_N_BUCKETS (symbols);
151 slot = SCM_HASHTABLE_BUCKET (symbols, hash);
152 cell = scm_cons (symbol, SCM_UNDEFINED);
153
154 SCM_SET_HASHTABLE_BUCKET (symbols, hash, scm_cons (cell, slot));
155 SCM_HASHTABLE_INCREMENT (symbols);
156
157 if (SCM_HASHTABLE_N_ITEMS (symbols) > SCM_HASHTABLE_UPPER (symbols))
158 scm_i_rehash (symbols, scm_i_hash_symbol, 0, "intern_symbol");
159 }
160
161 static SCM
162 scm_i_str2symbol (SCM str)
163 {
164 SCM symbol;
165 size_t raw_hash = scm_i_string_hash (str);
166
167 symbol = lookup_interned_symbol (str, raw_hash);
168 if (scm_is_false (symbol))
169 {
170 /* The symbol was not found, create it. */
171 symbol = scm_i_make_symbol (str, 0, raw_hash,
172 scm_cons (SCM_BOOL_F, SCM_EOL));
173 intern_symbol (symbol);
174 }
175
176 return symbol;
177 }
178
179
180 static SCM
181 scm_i_str2uninterned_symbol (SCM str)
182 {
183 size_t raw_hash = scm_i_string_hash (str);
184
185 return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED,
186 raw_hash, scm_cons (SCM_BOOL_F, SCM_EOL));
187 }
188
189 SCM_DEFINE (scm_symbol_p, "symbol?", 1, 0, 0,
190 (SCM obj),
191 "Return @code{#t} if @var{obj} is a symbol, otherwise return\n"
192 "@code{#f}.")
193 #define FUNC_NAME s_scm_symbol_p
194 {
195 return scm_from_bool (scm_is_symbol (obj));
196 }
197 #undef FUNC_NAME
198
199 SCM_DEFINE (scm_symbol_interned_p, "symbol-interned?", 1, 0, 0,
200 (SCM symbol),
201 "Return @code{#t} if @var{symbol} is interned, otherwise return\n"
202 "@code{#f}.")
203 #define FUNC_NAME s_scm_symbol_interned_p
204 {
205 SCM_VALIDATE_SYMBOL (1, symbol);
206 return scm_from_bool (scm_i_symbol_is_interned (symbol));
207 }
208 #undef FUNC_NAME
209
210 SCM_DEFINE (scm_make_symbol, "make-symbol", 1, 0, 0,
211 (SCM name),
212 "Return a new uninterned symbol with the name @var{name}. "
213 "The returned symbol is guaranteed to be unique and future "
214 "calls to @code{string->symbol} will not return it.")
215 #define FUNC_NAME s_scm_make_symbol
216 {
217 SCM_VALIDATE_STRING (1, name);
218 return scm_i_str2uninterned_symbol (name);
219 }
220 #undef FUNC_NAME
221
222 SCM_DEFINE (scm_symbol_to_string, "symbol->string", 1, 0, 0,
223 (SCM s),
224 "Return the name of @var{symbol} as a string. If the symbol was\n"
225 "part of an object returned as the value of a literal expression\n"
226 "(section @pxref{Literal expressions,,,r5rs, The Revised^5\n"
227 "Report on Scheme}) or by a call to the @code{read} procedure,\n"
228 "and its name contains alphabetic characters, then the string\n"
229 "returned will contain characters in the implementation's\n"
230 "preferred standard case---some implementations will prefer\n"
231 "upper case, others lower case. If the symbol was returned by\n"
232 "@code{string->symbol}, the case of characters in the string\n"
233 "returned will be the same as the case in the string that was\n"
234 "passed to @code{string->symbol}. It is an error to apply\n"
235 "mutation procedures like @code{string-set!} to strings returned\n"
236 "by this procedure.\n"
237 "\n"
238 "The following examples assume that the implementation's\n"
239 "standard case is lower case:\n"
240 "\n"
241 "@lisp\n"
242 "(symbol->string 'flying-fish) @result{} \"flying-fish\"\n"
243 "(symbol->string 'Martin) @result{} \"martin\"\n"
244 "(symbol->string\n"
245 " (string->symbol \"Malvina\")) @result{} \"Malvina\"\n"
246 "@end lisp")
247 #define FUNC_NAME s_scm_symbol_to_string
248 {
249 SCM_VALIDATE_SYMBOL (1, s);
250 return scm_i_symbol_substring (s, 0, scm_i_symbol_length (s));
251 }
252 #undef FUNC_NAME
253
254
255 SCM_DEFINE (scm_string_to_symbol, "string->symbol", 1, 0, 0,
256 (SCM string),
257 "Return the symbol whose name is @var{string}. This procedure\n"
258 "can create symbols with names containing special characters or\n"
259 "letters in the non-standard case, but it is usually a bad idea\n"
260 "to create such symbols because in some implementations of\n"
261 "Scheme they cannot be read as themselves. See\n"
262 "@code{symbol->string}.\n"
263 "\n"
264 "The following examples assume that the implementation's\n"
265 "standard case is lower case:\n"
266 "\n"
267 "@lisp\n"
268 "(eq? 'mISSISSIppi 'mississippi) @result{} #t\n"
269 "(string->symbol \"mISSISSIppi\") @result{} @r{the symbol with name \"mISSISSIppi\"}\n"
270 "(eq? 'bitBlt (string->symbol \"bitBlt\")) @result{} #f\n"
271 "(eq? 'JollyWog\n"
272 " (string->symbol (symbol->string 'JollyWog))) @result{} #t\n"
273 "(string=? \"K. Harper, M.D.\"\n"
274 " (symbol->string\n"
275 " (string->symbol \"K. Harper, M.D.\"))) @result{}#t\n"
276 "@end lisp")
277 #define FUNC_NAME s_scm_string_to_symbol
278 {
279 SCM_VALIDATE_STRING (1, string);
280 return scm_i_str2symbol (string);
281 }
282 #undef FUNC_NAME
283
284 SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0,
285 (SCM str),
286 "Return the symbol whose name is @var{str}. @var{str} is\n"
287 "converted to lowercase before the conversion is done, if Guile\n"
288 "is currently reading symbols case-insensitively.")
289 #define FUNC_NAME s_scm_string_ci_to_symbol
290 {
291 return scm_string_to_symbol (SCM_CASE_INSENSITIVE_P
292 ? scm_string_downcase(str)
293 : str);
294 }
295 #undef FUNC_NAME
296
297 #define MAX_PREFIX_LENGTH 30
298
299 SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
300 (SCM prefix),
301 "Create a new symbol with a name constructed from a prefix and\n"
302 "a counter value. The string @var{prefix} can be specified as\n"
303 "an optional argument. Default prefix is @code{ g}. The counter\n"
304 "is increased by 1 at each call. There is no provision for\n"
305 "resetting the counter.")
306 #define FUNC_NAME s_scm_gensym
307 {
308 static int gensym_counter = 0;
309
310 SCM suffix, name;
311 int n, n_digits;
312 char buf[SCM_INTBUFLEN];
313
314 if (SCM_UNBNDP (prefix))
315 prefix = scm_from_locale_string (" g");
316
317 /* mutex in case another thread looks and incs at the exact same moment */
318 scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex);
319 n = gensym_counter++;
320 scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
321
322 n_digits = scm_iint2str (n, 10, buf);
323 suffix = scm_from_locale_stringn (buf, n_digits);
324 name = scm_string_append (scm_list_2 (prefix, suffix));
325 return scm_string_to_symbol (name);
326 }
327 #undef FUNC_NAME
328
329 SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0,
330 (SCM symbol),
331 "Return a hash value for @var{symbol}.")
332 #define FUNC_NAME s_scm_symbol_hash
333 {
334 SCM_VALIDATE_SYMBOL (1, symbol);
335 return scm_from_ulong (scm_i_symbol_hash (symbol));
336 }
337 #undef FUNC_NAME
338
339 SCM_DEFINE (scm_symbol_fref, "symbol-fref", 1, 0, 0,
340 (SCM s),
341 "Return the contents of @var{symbol}'s @dfn{function slot}.")
342 #define FUNC_NAME s_scm_symbol_fref
343 {
344 SCM_VALIDATE_SYMBOL (1, s);
345 return SCM_CAR (SCM_CELL_OBJECT_3 (s));
346 }
347 #undef FUNC_NAME
348
349
350 SCM_DEFINE (scm_symbol_pref, "symbol-pref", 1, 0, 0,
351 (SCM s),
352 "Return the @dfn{property list} currently associated with @var{symbol}.")
353 #define FUNC_NAME s_scm_symbol_pref
354 {
355 SCM_VALIDATE_SYMBOL (1, s);
356 return SCM_CDR (SCM_CELL_OBJECT_3 (s));
357 }
358 #undef FUNC_NAME
359
360
361 SCM_DEFINE (scm_symbol_fset_x, "symbol-fset!", 2, 0, 0,
362 (SCM s, SCM val),
363 "Change the binding of @var{symbol}'s function slot.")
364 #define FUNC_NAME s_scm_symbol_fset_x
365 {
366 SCM_VALIDATE_SYMBOL (1, s);
367 SCM_SETCAR (SCM_CELL_OBJECT_3 (s), val);
368 return SCM_UNSPECIFIED;
369 }
370 #undef FUNC_NAME
371
372
373 SCM_DEFINE (scm_symbol_pset_x, "symbol-pset!", 2, 0, 0,
374 (SCM s, SCM val),
375 "Change the binding of @var{symbol}'s property slot.")
376 #define FUNC_NAME s_scm_symbol_pset_x
377 {
378 SCM_VALIDATE_SYMBOL (1, s);
379 SCM_SETCDR (SCM_CELL_OBJECT_3 (s), val);
380 return SCM_UNSPECIFIED;
381 }
382 #undef FUNC_NAME
383
384 SCM
385 scm_from_locale_symbol (const char *sym)
386 {
387 return scm_from_locale_symboln (sym, -1);
388 }
389
390 SCM
391 scm_from_locale_symboln (const char *sym, size_t len)
392 {
393 SCM str = scm_from_locale_stringn (sym, len);
394 return scm_i_str2symbol (str);
395 }
396
397 SCM
398 scm_take_locale_symboln (char *sym, size_t len)
399 {
400 SCM str;
401
402 str = scm_take_locale_stringn (sym, len);
403 return scm_i_str2symbol (str);
404 }
405
406 SCM
407 scm_take_locale_symbol (char *sym)
408 {
409 return scm_take_locale_symboln (sym, (size_t)-1);
410 }
411
412 void
413 scm_symbols_prehistory ()
414 {
415 symbols = scm_make_weak_key_hash_table (scm_from_int (2139));
416 scm_permanent_object (symbols);
417 }
418
419
420 void
421 scm_init_symbols ()
422 {
423 #include "libguile/symbols.x"
424 }
425
426 /*
427 Local Variables:
428 c-file-style: "gnu"
429 End:
430 */