/* srfi-14.c --- SRFI-14 procedures for Guile
*
- * Copyright (C) 2001, 2004, 2006, 2007 Free Software Foundation, Inc.
+ * Copyright (C) 2001, 2004, 2006, 2007, 2009, 2011 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
/* Include the pre-computed standard charset data. */
#include "libguile/srfi-14.i.c"
+scm_t_char_range cs_full_ranges[] = {
+ {0x0000, SCM_CODEPOINT_SURROGATE_START - 1}
+ ,
+ {SCM_CODEPOINT_SURROGATE_END + 1, SCM_CODEPOINT_MAX}
+};
+
+scm_t_char_set cs_full = {
+ 2,
+ cs_full_ranges
+};
+
+
#define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset))
#define SCM_CHARSET_SET(cs, idx) \
return 1;
}
-
-/* Smob free hook for character sets. */
-static size_t
-charset_free (SCM charset)
-{
- scm_t_char_set *cs;
- size_t len = 0;
-
- cs = SCM_CHARSET_DATA (charset);
- if (cs != NULL)
- len = cs->len;
- if (len > 0)
- scm_gc_free (cs->ranges, sizeof (scm_t_char_range) * len,
- "character-set");
-
- cs->ranges = NULL;
- cs->len = 0;
-
- scm_gc_free (cs, sizeof (scm_t_char_set), "character-set");
-
- scm_remember_upto_here_1 (charset);
-
- return 0;
-}
-
-
/* Smob print hook for character sets cursors. */
static int
charset_cursor_print (SCM cursor, SCM port,
return 1;
}
-/* Smob free hook for character sets. */
-static size_t
-charset_cursor_free (SCM charset)
-{
- scm_t_char_set_cursor *cur;
-
- cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (charset);
- scm_gc_free (cur, sizeof (scm_t_char_set_cursor), "charset-cursor");
- scm_remember_upto_here_1 (charset);
-
- return 0;
-}
-
/* Create a new, empty character set. */
static SCM
SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1,
(SCM char_sets),
- "Return @code{#t} if every character set @var{cs}i is a subset\n"
- "of character set @var{cs}i+1.")
+ "Return @code{#t} if every character set @var{char_set}i is a subset\n"
+ "of character set @var{char_set}i+1.")
#define FUNC_NAME s_scm_char_set_leq
{
int argnum = 1;
(SCM cs, SCM bound),
"Compute a hash value for the character set @var{cs}. If\n"
"@var{bound} is given and non-zero, it restricts the\n"
- "returned value to the range 0 @dots{} @var{bound - 1}.")
+ "returned value to the range 0 @dots{} @var{bound} - 1.")
#define FUNC_NAME s_scm_char_set_hash
{
const unsigned long default_bnd = 871;
clower = scm_to_size_t (lower);
cupper = scm_to_size_t (upper) - 1;
- SCM_ASSERT_RANGE (1, lower, clower >= 0);
- SCM_ASSERT_RANGE (2, upper, cupper >= 0);
SCM_ASSERT_RANGE (2, upper, cupper >= clower);
if (!SCM_UNBNDP (error))
{
cs = make_char_set (FUNC_NAME);
else
{
- SCM_VALIDATE_SMOB (4, base_cs, charset);
+ SCM_VALIDATE_SMOB (3, base_cs, charset);
if (reuse)
cs = base_cs;
else
"If @var{error} is a true value, an error is signalled if the\n"
"specified range contains characters which are not valid\n"
"Unicode code points. If @var{error} is @code{#f},\n"
- "these characters are silently left out of the resultung\n"
+ "these characters are silently left out of the resulting\n"
"character set.\n"
"\n"
"The characters in @var{base_cs} are added to the result, if\n"
"If @var{error} is a true value, an error is signalled if the\n"
"specified range contains characters which are not contained in\n"
"the implemented character range. If @var{error} is @code{#f},\n"
- "these characters are silently left out of the resultung\n"
+ "these characters are silently left out of the resulting\n"
"character set.\n"
"\n"
"The characters are added to @var{base_cs} and @var{base_cs} is\n"
count = scm_to_int (scm_char_set_size (cs));
if (wide)
- result = scm_i_make_wide_string (count, &wbuf);
+ result = scm_i_make_wide_string (count, &wbuf, 0);
else
- result = scm_i_make_string (count, &buf);
+ result = scm_i_make_string (count, &buf, 0);
for (k = 0; k < cs_data->len; k++)
for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
SCM scm_char_set_blank;
SCM scm_char_set_ascii;
SCM scm_char_set_empty;
+SCM scm_char_set_designated;
SCM scm_char_set_full;
SCM_NEWSMOB (cs, scm_tc16_charset, p);
scm_c_define (name, cs);
- return scm_permanent_object (cs);
+ return cs;
}
-#ifdef SCM_CHARSET_DEBUG
-SCM_DEFINE (scm_debug_char_set, "debug-char-set", 1, 0, 0,
- (SCM charset),
- "Print out the internal C structure of @var{charset}.\n")
-#define FUNC_NAME s_scm_debug_char_set
-{
- int i;
- scm_t_char_set *cs = SCM_CHARSET_DATA (charset);
- fprintf (stderr, "cs %p\n", cs);
- fprintf (stderr, "len %d\n", cs->len);
- fprintf (stderr, "arr %p\n", cs->ranges);
+SCM_DEFINE (scm_sys_char_set_dump, "%char-set-dump", 1, 0, 0, (SCM charset),
+ "Returns an association list containing debugging information\n"
+ "for @var{charset}. The association list has the following entries."
+ "@table @code\n"
+ "@item char-set\n"
+ "The char-set itself.\n"
+ "@item len\n"
+ "The number of character ranges the char-set contains\n"
+ "@item ranges\n"
+ "A list of lists where each sublist a range of code points\n"
+ "and their associated characters"
+ "@end table")
+#define FUNC_NAME s_scm_sys_char_set_dump
+{
+ SCM e1, e2, e3;
+ SCM ranges = SCM_EOL, elt;
+ size_t i;
+ scm_t_char_set *cs;
+ char codepoint_string_lo[9], codepoint_string_hi[9];
+
+ SCM_VALIDATE_SMOB (1, charset, charset);
+ cs = SCM_CHARSET_DATA (charset);
+
+ e1 = scm_cons (scm_from_latin1_symbol ("char-set"),
+ charset);
+ e2 = scm_cons (scm_from_latin1_symbol ("n"),
+ scm_from_size_t (cs->len));
+
for (i = 0; i < cs->len; i++)
{
- if (cs->ranges[i].lo == cs->ranges[i].hi)
- fprintf (stderr, "%04x\n", cs->ranges[i].lo);
+ if (cs->ranges[i].lo > 0xFFFF)
+ sprintf (codepoint_string_lo, "U+%06x", cs->ranges[i].lo);
else
- fprintf (stderr, "%04x..%04x\t[%d]\n",
- cs->ranges[i].lo,
- cs->ranges[i].hi, cs->ranges[i].hi - cs->ranges[i].lo + 1);
+ sprintf (codepoint_string_lo, "U+%04x", cs->ranges[i].lo);
+ if (cs->ranges[i].hi > 0xFFFF)
+ sprintf (codepoint_string_hi, "U+%06x", cs->ranges[i].hi);
+ else
+ sprintf (codepoint_string_hi, "U+%04x", cs->ranges[i].hi);
+
+ elt = scm_list_4 (SCM_MAKE_CHAR (cs->ranges[i].lo),
+ SCM_MAKE_CHAR (cs->ranges[i].hi),
+ scm_from_locale_string (codepoint_string_lo),
+ scm_from_locale_string (codepoint_string_hi));
+ ranges = scm_append (scm_list_2 (ranges,
+ scm_list_1 (elt)));
}
- printf ("\n");
- return SCM_UNSPECIFIED;
+ e3 = scm_cons (scm_from_latin1_symbol ("ranges"),
+ ranges);
+
+ return scm_list_3 (e1, e2, e3);
}
#undef FUNC_NAME
-#endif /* SCM_CHARSET_DEBUG */
+
\f
scm_init_srfi_14 (void)
{
scm_tc16_charset = scm_make_smob_type ("character-set", 0);
- scm_set_smob_free (scm_tc16_charset, charset_free);
scm_set_smob_print (scm_tc16_charset, charset_print);
scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0);
- scm_set_smob_free (scm_tc16_charset_cursor, charset_cursor_free);
scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print);
scm_char_set_upper_case =
scm_char_set_blank = define_charset ("char-set:blank", &cs_blank);
scm_char_set_ascii = define_charset ("char-set:ascii", &cs_ascii);
scm_char_set_empty = define_charset ("char-set:empty", &cs_empty);
+ scm_char_set_designated = define_charset ("char-set:designated", &cs_designated);
scm_char_set_full = define_charset ("char-set:full", &cs_full);
#include "libguile/srfi-14.x"