-/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2003, 2004, 2006 Free Software Foundation, Inc.
- *
+/* Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2003, 2004,
+ * 2006, 2009, 2011, 2013 Free Software Foundation, Inc.
+ *
* This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 3 of
+ * the License, or (at your option) any later version.
*
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA
*/
\f
-#if HAVE_CONFIG_H
+#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
+#include <unistr.h>
+
#include "libguile/_scm.h"
#include "libguile/chars.h"
#include "libguile/eval.h"
#include "libguile/fluids.h"
#include "libguile/strings.h"
#include "libguile/vectors.h"
-#include "libguile/hashtab.h"
-#include "libguile/weaks.h"
+#include "libguile/weak-set.h"
#include "libguile/modules.h"
#include "libguile/read.h"
#include "libguile/srfi-13.h"
#include "libguile/validate.h"
#include "libguile/symbols.h"
+#include "libguile/private-options.h"
+
+
#ifdef HAVE_STRING_H
#include <string.h>
#endif
/* {Symbols}
*/
-/* In order to optimize reading speed, this function breaks part of
- * the hashtable abstraction. The optimizations are:
- *
- * 1. The argument string can be compared directly to symbol objects
- * without first creating an SCM string object. (This would have
- * been necessary if we had used the hashtable API in hashtab.h.)
- *
- * 2. We can use the raw hash value stored in scm_i_symbol_hash (sym)
- * to speed up lookup.
- *
- * Both optimizations might be possible without breaking the
- * abstraction if the API in hashtab.c is improved.
- */
-
unsigned long
scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
{
return scm_i_symbol_hash (obj) % n;
}
-static SCM
-lookup_interned_symbol (const char *name, size_t len,
- unsigned long raw_hash)
+struct string_lookup_data
+{
+ SCM string;
+ unsigned long string_hash;
+};
+
+static int
+string_lookup_predicate_fn (SCM sym, void *closure)
{
- /* Try to find the symbol in the symbols table */
- SCM l;
- unsigned long hash = raw_hash % SCM_HASHTABLE_N_BUCKETS (symbols);
+ struct string_lookup_data *data = closure;
- for (l = SCM_HASHTABLE_BUCKET (symbols, hash);
- !scm_is_null (l);
- l = SCM_CDR (l))
+ if (scm_i_symbol_hash (sym) == data->string_hash
+ && scm_i_symbol_length (sym) == scm_i_string_length (data->string))
{
- SCM pair, sym;
-
- pair = SCM_CAR (l);
- if (!scm_is_pair (pair))
- abort ();
- if (SCM_UNPACK (SCM_CAR (pair)) == NULL)
- /* Weak pointer. Ignore it. */
- /* FIXME: Should we as well remove it, as in `scm_fixup_weak_alist'? */
- continue;
-
- sym = SCM_CAR (pair);
-
- if (scm_i_symbol_hash (sym) == raw_hash
- && scm_i_symbol_length (sym) == len)
- {
- const char *chrs = scm_i_symbol_chars (sym);
- size_t i = len;
-
- while (i != 0)
- {
- --i;
- if (name[i] != chrs[i])
- goto next_symbol;
- }
-
- return sym;
- }
- next_symbol:
- ;
+ size_t n = scm_i_symbol_length (sym);
+ while (n--)
+ if (scm_i_symbol_ref (sym, n) != scm_i_string_ref (data->string, n))
+ return 0;
+ return 1;
}
+ else
+ return 0;
+}
+
+static SCM
+lookup_interned_symbol (SCM name, unsigned long raw_hash)
+{
+ struct string_lookup_data data;
- return SCM_BOOL_F;
+ data.string = name;
+ data.string_hash = raw_hash;
+
+ return scm_c_weak_set_lookup (symbols, raw_hash,
+ string_lookup_predicate_fn,
+ &data, SCM_BOOL_F);
+}
+
+struct latin1_lookup_data
+{
+ const char *str;
+ size_t len;
+ unsigned long string_hash;
+};
+
+static int
+latin1_lookup_predicate_fn (SCM sym, void *closure)
+{
+ struct latin1_lookup_data *data = closure;
+
+ return scm_i_symbol_hash (sym) == data->string_hash
+ && scm_i_is_narrow_symbol (sym)
+ && scm_i_symbol_length (sym) == data->len
+ && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0;
}
static SCM
-scm_i_c_mem2symbol (const char *name, size_t len)
+lookup_interned_latin1_symbol (const char *str, size_t len,
+ unsigned long raw_hash)
{
- SCM symbol;
- size_t raw_hash = scm_string_hash ((const unsigned char *) name, len);
- size_t hash = raw_hash % SCM_HASHTABLE_N_BUCKETS (symbols);
+ struct latin1_lookup_data data;
- symbol = lookup_interned_symbol (name, len, raw_hash);
- if (symbol != SCM_BOOL_F)
- return symbol;
+ data.str = str;
+ data.len = len;
+ data.string_hash = raw_hash;
+
+ return scm_c_weak_set_lookup (symbols, raw_hash,
+ latin1_lookup_predicate_fn,
+ &data, SCM_BOOL_F);
+}
- {
- /* The symbol was not found - create it. */
- SCM symbol = scm_i_c_make_symbol (name, len, 0, raw_hash,
- scm_cons (SCM_BOOL_F, SCM_EOL));
+struct utf8_lookup_data
+{
+ const char *str;
+ size_t len;
+ unsigned long string_hash;
+};
+
+static int
+utf8_string_equals_wide_string (const scm_t_uint8 *narrow, size_t nlen,
+ const scm_t_wchar *wide, size_t wlen)
+{
+ size_t byte_idx = 0, char_idx = 0;
+
+ while (byte_idx < nlen && char_idx < wlen)
+ {
+ ucs4_t c;
+ int nbytes;
+
+ nbytes = u8_mbtouc (&c, narrow + byte_idx, nlen - byte_idx);
+ if (nbytes == 0)
+ break;
+ else if (c == 0xfffd)
+ /* Bad UTF-8. */
+ return 0;
+ else if (c != wide[char_idx])
+ return 0;
+
+ byte_idx += nbytes;
+ char_idx++;
+ }
- SCM slot = SCM_HASHTABLE_BUCKET (symbols, hash);
- SCM cell = scm_cons (symbol, SCM_UNDEFINED);
- SCM_SET_HASHTABLE_BUCKET (symbols, hash, scm_cons (cell, slot));
- SCM_HASHTABLE_INCREMENT (symbols);
- if (SCM_HASHTABLE_N_ITEMS (symbols) > SCM_HASHTABLE_UPPER (symbols))
- scm_i_rehash (symbols, scm_i_hash_symbol, 0, "scm_mem2symbol");
+ return byte_idx == nlen && char_idx == wlen;
+}
- return symbol;
- }
+static int
+utf8_lookup_predicate_fn (SCM sym, void *closure)
+{
+ struct utf8_lookup_data *data = closure;
+
+ if (scm_i_symbol_hash (sym) != data->string_hash)
+ return 0;
+
+ if (scm_i_is_narrow_symbol (sym))
+ return (scm_i_symbol_length (sym) == data->len
+ && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0);
+ else
+ return utf8_string_equals_wide_string ((const scm_t_uint8 *) data->str,
+ data->len,
+ scm_i_symbol_wide_chars (sym),
+ scm_i_symbol_length (sym));
}
static SCM
-scm_i_mem2symbol (SCM str)
+lookup_interned_utf8_symbol (const char *str, size_t len,
+ unsigned long raw_hash)
{
- SCM symbol;
- const char *name = scm_i_string_chars (str);
- size_t len = scm_i_string_length (str);
- size_t raw_hash = scm_string_hash ((const unsigned char *) name, len);
- size_t hash = raw_hash % SCM_HASHTABLE_N_BUCKETS (symbols);
+ struct utf8_lookup_data data;
- symbol = lookup_interned_symbol (name, len, raw_hash);
- if (symbol != SCM_BOOL_F)
- return symbol;
+ data.str = str;
+ data.len = len;
+ data.string_hash = raw_hash;
+
+ return scm_c_weak_set_lookup (symbols, raw_hash,
+ utf8_lookup_predicate_fn,
+ &data, SCM_BOOL_F);
+}
- {
- /* The symbol was not found - create it. */
- SCM symbol = scm_i_make_symbol (str, 0, raw_hash,
- scm_cons (SCM_BOOL_F, SCM_EOL));
+static int
+symbol_lookup_predicate_fn (SCM sym, void *closure)
+{
+ SCM other = SCM_PACK_POINTER (closure);
- SCM slot = SCM_HASHTABLE_BUCKET (symbols, hash);
- SCM cell = scm_cons (symbol, SCM_UNDEFINED);
- SCM_SET_HASHTABLE_BUCKET (symbols, hash, scm_cons (cell, slot));
- SCM_HASHTABLE_INCREMENT (symbols);
- if (SCM_HASHTABLE_N_ITEMS (symbols) > SCM_HASHTABLE_UPPER (symbols))
- scm_i_rehash (symbols, scm_i_hash_symbol, 0, "scm_mem2symbol");
+ if (scm_i_symbol_hash (sym) == scm_i_symbol_hash (other)
+ && scm_i_symbol_length (sym) == scm_i_symbol_length (other))
+ {
+ if (scm_i_is_narrow_symbol (sym))
+ return scm_i_is_narrow_symbol (other)
+ && (strncmp (scm_i_symbol_chars (sym),
+ scm_i_symbol_chars (other),
+ scm_i_symbol_length (other)) == 0);
+ else
+ return scm_is_true
+ (scm_string_equal_p (scm_symbol_to_string (sym),
+ scm_symbol_to_string (other)));
+ }
+ return 0;
+}
+
+static SCM
+scm_i_str2symbol (SCM str)
+{
+ SCM symbol;
+ size_t raw_hash = scm_i_string_hash (str);
+ symbol = lookup_interned_symbol (str, raw_hash);
+ if (scm_is_true (symbol))
return symbol;
- }
+ else
+ {
+ /* The symbol was not found, create it. */
+ symbol = scm_i_make_symbol (str, 0, raw_hash,
+ scm_cons (SCM_BOOL_F, SCM_EOL));
+
+ /* Might return a different symbol, if another one was interned at
+ the same time. */
+ return scm_c_weak_set_add_x (symbols, raw_hash,
+ symbol_lookup_predicate_fn,
+ SCM_UNPACK_POINTER (symbol), symbol);
+ }
}
static SCM
-scm_i_mem2uninterned_symbol (SCM str)
+scm_i_str2uninterned_symbol (SCM str)
{
- const char *name = scm_i_string_chars (str);
- size_t len = scm_i_string_length (str);
- size_t raw_hash = scm_string_hash ((const unsigned char *) name, len);
+ size_t raw_hash = scm_i_string_hash (str);
return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED,
raw_hash, scm_cons (SCM_BOOL_F, SCM_EOL));
#define FUNC_NAME s_scm_make_symbol
{
SCM_VALIDATE_STRING (1, name);
- return scm_i_mem2uninterned_symbol (name);
+ return scm_i_str2uninterned_symbol (name);
}
#undef FUNC_NAME
#define FUNC_NAME s_scm_string_to_symbol
{
SCM_VALIDATE_STRING (1, string);
- return scm_i_mem2symbol (string);
+ return scm_i_str2symbol (string);
}
#undef FUNC_NAME
}
#undef FUNC_NAME
+/* The default prefix for `gensym'd symbols. */
+static SCM default_gensym_prefix;
+
#define MAX_PREFIX_LENGTH 30
SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
char buf[SCM_INTBUFLEN];
if (SCM_UNBNDP (prefix))
- prefix = scm_from_locale_string (" g");
-
+ prefix = default_gensym_prefix;
+
/* mutex in case another thread looks and incs at the exact same moment */
scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex);
n = gensym_counter++;
scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
n_digits = scm_iint2str (n, 10, buf);
- suffix = scm_from_locale_stringn (buf, n_digits);
+ suffix = scm_from_latin1_stringn (buf, n_digits);
name = scm_string_append (scm_list_2 (prefix, suffix));
return scm_string_to_symbol (name);
}
SCM_DEFINE (scm_symbol_fref, "symbol-fref", 1, 0, 0,
(SCM s),
- "Return the contents of @var{symbol}'s @dfn{function slot}.")
+ "Return the contents of the symbol @var{s}'s @dfn{function slot}.")
#define FUNC_NAME s_scm_symbol_fref
{
SCM_VALIDATE_SYMBOL (1, s);
SCM_DEFINE (scm_symbol_pref, "symbol-pref", 1, 0, 0,
(SCM s),
- "Return the @dfn{property list} currently associated with @var{symbol}.")
+ "Return the @dfn{property list} currently associated with the\n"
+ "symbol @var{s}.")
#define FUNC_NAME s_scm_symbol_pref
{
SCM_VALIDATE_SYMBOL (1, s);
SCM_DEFINE (scm_symbol_fset_x, "symbol-fset!", 2, 0, 0,
(SCM s, SCM val),
- "Change the binding of @var{symbol}'s function slot.")
+ "Change the binding of the symbol @var{s}'s function slot.")
#define FUNC_NAME s_scm_symbol_fset_x
{
SCM_VALIDATE_SYMBOL (1, s);
SCM_DEFINE (scm_symbol_pset_x, "symbol-pset!", 2, 0, 0,
(SCM s, SCM val),
- "Change the binding of @var{symbol}'s property slot.")
+ "Change the binding of the symbol @var{s}'s property slot.")
#define FUNC_NAME s_scm_symbol_pset_x
{
SCM_VALIDATE_SYMBOL (1, s);
SCM
scm_from_locale_symbol (const char *sym)
{
- return scm_i_c_mem2symbol (sym, strlen (sym));
+ return scm_from_locale_symboln (sym, -1);
}
SCM
scm_from_locale_symboln (const char *sym, size_t len)
{
- return scm_i_c_mem2symbol (sym, len);
+ SCM str = scm_from_locale_stringn (sym, len);
+ return scm_i_str2symbol (str);
}
SCM
scm_take_locale_symboln (char *sym, size_t len)
{
- SCM res;
- unsigned long raw_hash;
+ SCM str;
+
+ str = scm_take_locale_stringn (sym, len);
+ return scm_i_str2symbol (str);
+}
- if (len == (size_t)-1)
+SCM
+scm_take_locale_symbol (char *sym)
+{
+ return scm_take_locale_symboln (sym, (size_t)-1);
+}
+
+SCM
+scm_from_latin1_symbol (const char *sym)
+{
+ return scm_from_latin1_symboln (sym, -1);
+}
+
+SCM
+scm_from_latin1_symboln (const char *sym, size_t len)
+{
+ unsigned long hash;
+ SCM ret;
+
+ if (len == (size_t) -1)
len = strlen (sym);
- else
- {
- /* Ensure STR is null terminated. A realloc for 1 extra byte should
- often be satisfied from the alignment padding after the block, with
- no actual data movement. */
- sym = scm_realloc (sym, len+1);
- sym[len] = '\0';
- }
+ hash = scm_i_latin1_string_hash (sym, len);
- raw_hash = scm_string_hash ((unsigned char *)sym, len);
- res = lookup_interned_symbol (sym, len, raw_hash);
- if (res != SCM_BOOL_F)
+ ret = lookup_interned_latin1_symbol (sym, len, hash);
+ if (scm_is_false (ret))
{
- free (sym);
- return res;
+ SCM str = scm_from_latin1_stringn (sym, len);
+ ret = scm_i_str2symbol (str);
}
- res = scm_i_c_take_symbol (sym, len, 0, raw_hash,
- scm_cons (SCM_BOOL_F, SCM_EOL));
+ return ret;
+}
- return res;
+SCM
+scm_from_utf8_symbol (const char *sym)
+{
+ return scm_from_utf8_symboln (sym, -1);
}
SCM
-scm_take_locale_symbol (char *sym)
+scm_from_utf8_symboln (const char *sym, size_t len)
{
- return scm_take_locale_symboln (sym, (size_t)-1);
+ unsigned long hash;
+ SCM ret;
+
+ if (len == (size_t) -1)
+ len = strlen (sym);
+ hash = scm_i_utf8_string_hash (sym, len);
+
+ ret = lookup_interned_utf8_symbol (sym, len, hash);
+ if (scm_is_false (ret))
+ {
+ SCM str = scm_from_utf8_stringn (sym, len);
+ ret = scm_i_str2symbol (str);
+ }
+
+ return ret;
}
void
scm_symbols_prehistory ()
{
- symbols = scm_make_weak_key_hash_table (scm_from_int (2139));
- scm_permanent_object (symbols);
+ symbols = scm_c_make_weak_set (5000);
}
scm_init_symbols ()
{
#include "libguile/symbols.x"
+
+ default_gensym_prefix = scm_from_latin1_string (" g");
}
/*