1 /* srfi-14.c --- SRFI-14 procedures for Guile
3 * Copyright (C) 2001, 2004 Free Software Foundation, Inc.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include "libguile/srfi-14.h"
28 #define SCM_CHARSET_SET(cs, idx) \
29 (((long *) SCM_SMOB_DATA (cs))[(idx) / SCM_BITS_PER_LONG] |= \
30 (1L << ((idx) % SCM_BITS_PER_LONG)))
32 #define BYTES_PER_CHARSET (SCM_CHARSET_SIZE / 8)
33 #define LONGS_PER_CHARSET (SCM_CHARSET_SIZE / SCM_BITS_PER_LONG)
36 /* Smob type code for character sets. */
37 int scm_tc16_charset
= 0;
40 /* Smob print hook for character sets. */
42 charset_print (SCM charset
, SCM port
, scm_print_state
*pstate SCM_UNUSED
)
47 scm_puts ("#<charset {", port
);
48 for (i
= 0; i
< SCM_CHARSET_SIZE
; i
++)
49 if (SCM_CHARSET_GET (charset
, i
))
55 scm_write (SCM_MAKE_CHAR (i
), port
);
57 scm_puts ("}>", port
);
62 /* Smob free hook for character sets. */
64 charset_free (SCM charset
)
66 return scm_smob_free (charset
);
70 /* Create a new, empty character set. */
72 make_char_set (const char * func_name
)
76 p
= scm_gc_malloc (BYTES_PER_CHARSET
, "character-set");
77 memset (p
, 0, BYTES_PER_CHARSET
);
78 SCM_RETURN_NEWSMOB (scm_tc16_charset
, p
);
82 SCM_DEFINE (scm_char_set_p
, "char-set?", 1, 0, 0,
84 "Return @code{#t} if @var{obj} is a character set, @code{#f}\n"
86 #define FUNC_NAME s_scm_char_set_p
88 return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset
, obj
));
93 SCM_DEFINE (scm_char_set_eq
, "char-set=", 0, 0, 1,
95 "Return @code{#t} if all given character sets are equal.")
96 #define FUNC_NAME s_scm_char_set_eq
99 long *cs1_data
= NULL
;
101 SCM_VALIDATE_REST_ARGUMENT (char_sets
);
103 while (!SCM_NULLP (char_sets
))
105 SCM csi
= SCM_CAR (char_sets
);
108 SCM_VALIDATE_SMOB (argnum
, csi
, charset
);
110 csi_data
= (long *) SCM_SMOB_DATA (csi
);
111 if (cs1_data
== NULL
)
113 else if (memcmp (cs1_data
, csi_data
, BYTES_PER_CHARSET
) != 0)
115 char_sets
= SCM_CDR (char_sets
);
122 SCM_DEFINE (scm_char_set_leq
, "char-set<=", 0, 0, 1,
124 "Return @code{#t} if every character set @var{cs}i is a subset\n"
125 "of character set @var{cs}i+1.")
126 #define FUNC_NAME s_scm_char_set_leq
129 long *prev_data
= NULL
;
131 SCM_VALIDATE_REST_ARGUMENT (char_sets
);
133 while (!SCM_NULLP (char_sets
))
135 SCM csi
= SCM_CAR (char_sets
);
138 SCM_VALIDATE_SMOB (argnum
, csi
, charset
);
140 csi_data
= (long *) SCM_SMOB_DATA (csi
);
145 for (k
= 0; k
< LONGS_PER_CHARSET
; k
++)
147 if ((prev_data
[k
] & csi_data
[k
]) != prev_data
[k
])
151 prev_data
= csi_data
;
152 char_sets
= SCM_CDR (char_sets
);
159 SCM_DEFINE (scm_char_set_hash
, "char-set-hash", 1, 1, 0,
161 "Compute a hash value for the character set @var{cs}. If\n"
162 "@var{bound} is given and non-zero, it restricts the\n"
163 "returned value to the range 0 @dots{} @var{bound - 1}.")
164 #define FUNC_NAME s_scm_char_set_hash
166 const unsigned long default_bnd
= 871;
169 unsigned long val
= 0;
172 SCM_VALIDATE_SMOB (1, cs
, charset
);
174 if (SCM_UNBNDP (bound
))
178 bnd
= scm_to_ulong (bound
);
183 p
= (long *) SCM_SMOB_DATA (cs
);
184 for (k
= 0; k
< LONGS_PER_CHARSET
; k
++)
187 val
= p
[k
] + (val
<< 1);
189 return scm_from_ulong (val
% bnd
);
194 SCM_DEFINE (scm_char_set_cursor
, "char-set-cursor", 1, 0, 0,
196 "Return a cursor into the character set @var{cs}.")
197 #define FUNC_NAME s_scm_char_set_cursor
201 SCM_VALIDATE_SMOB (1, cs
, charset
);
202 for (idx
= 0; idx
< SCM_CHARSET_SIZE
; idx
++)
204 if (SCM_CHARSET_GET (cs
, idx
))
207 return SCM_I_MAKINUM (idx
);
212 SCM_DEFINE (scm_char_set_ref
, "char-set-ref", 2, 0, 0,
213 (SCM cs
, SCM cursor
),
214 "Return the character at the current cursor position\n"
215 "@var{cursor} in the character set @var{cs}. It is an error to\n"
216 "pass a cursor for which @code{end-of-char-set?} returns true.")
217 #define FUNC_NAME s_scm_char_set_ref
219 size_t ccursor
= scm_to_size_t (cursor
);
220 SCM_VALIDATE_SMOB (1, cs
, charset
);
222 if (ccursor
>= SCM_CHARSET_SIZE
|| !SCM_CHARSET_GET (cs
, ccursor
))
223 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor
));
224 return SCM_MAKE_CHAR (ccursor
);
229 SCM_DEFINE (scm_char_set_cursor_next
, "char-set-cursor-next", 2, 0, 0,
230 (SCM cs
, SCM cursor
),
231 "Advance the character set cursor @var{cursor} to the next\n"
232 "character in the character set @var{cs}. It is an error if the\n"
233 "cursor given satisfies @code{end-of-char-set?}.")
234 #define FUNC_NAME s_scm_char_set_cursor_next
236 size_t ccursor
= scm_to_size_t (cursor
);
237 SCM_VALIDATE_SMOB (1, cs
, charset
);
239 if (ccursor
>= SCM_CHARSET_SIZE
|| !SCM_CHARSET_GET (cs
, ccursor
))
240 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor
));
241 for (ccursor
++; ccursor
< SCM_CHARSET_SIZE
; ccursor
++)
243 if (SCM_CHARSET_GET (cs
, ccursor
))
246 return SCM_I_MAKINUM (ccursor
);
251 SCM_DEFINE (scm_end_of_char_set_p
, "end-of-char-set?", 1, 0, 0,
253 "Return @code{#t} if @var{cursor} has reached the end of a\n"
254 "character set, @code{#f} otherwise.")
255 #define FUNC_NAME s_scm_end_of_char_set_p
257 size_t ccursor
= scm_to_size_t (cursor
);
258 return scm_from_bool (ccursor
>= SCM_CHARSET_SIZE
);
263 SCM_DEFINE (scm_char_set_fold
, "char-set-fold", 3, 0, 0,
264 (SCM kons
, SCM knil
, SCM cs
),
265 "Fold the procedure @var{kons} over the character set @var{cs},\n"
266 "initializing it with @var{knil}.")
267 #define FUNC_NAME s_scm_char_set_fold
271 SCM_VALIDATE_PROC (1, kons
);
272 SCM_VALIDATE_SMOB (3, cs
, charset
);
274 for (k
= 0; k
< SCM_CHARSET_SIZE
; k
++)
275 if (SCM_CHARSET_GET (cs
, k
))
277 knil
= scm_call_2 (kons
, SCM_MAKE_CHAR (k
), knil
);
284 SCM_DEFINE (scm_char_set_unfold
, "char-set-unfold", 4, 1, 0,
285 (SCM p
, SCM f
, SCM g
, SCM seed
, SCM base_cs
),
286 "This is a fundamental constructor for character sets.\n"
288 "@item @var{g} is used to generate a series of ``seed'' values\n"
289 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
290 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
291 "@item @var{p} tells us when to stop -- when it returns true\n"
292 "when applied to one of the seed values.\n"
293 "@item @var{f} maps each seed value to a character. These\n"
294 "characters are added to the base character set @var{base_cs} to\n"
295 "form the result; @var{base_cs} defaults to the empty set.\n"
297 #define FUNC_NAME s_scm_char_set_unfold
301 SCM_VALIDATE_PROC (1, p
);
302 SCM_VALIDATE_PROC (2, f
);
303 SCM_VALIDATE_PROC (3, g
);
304 if (!SCM_UNBNDP (base_cs
))
306 SCM_VALIDATE_SMOB (5, base_cs
, charset
);
307 result
= scm_char_set_copy (base_cs
);
310 result
= make_char_set (FUNC_NAME
);
312 tmp
= scm_call_1 (p
, seed
);
313 while (scm_is_false (tmp
))
315 SCM ch
= scm_call_1 (f
, seed
);
317 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f
));
318 SCM_CHARSET_SET (result
, SCM_CHAR (ch
));
320 seed
= scm_call_1 (g
, seed
);
321 tmp
= scm_call_1 (p
, seed
);
328 SCM_DEFINE (scm_char_set_unfold_x
, "char-set-unfold!", 5, 0, 0,
329 (SCM p
, SCM f
, SCM g
, SCM seed
, SCM base_cs
),
330 "This is a fundamental constructor for character sets.\n"
332 "@item @var{g} is used to generate a series of ``seed'' values\n"
333 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
334 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
335 "@item @var{p} tells us when to stop -- when it returns true\n"
336 "when applied to one of the seed values.\n"
337 "@item @var{f} maps each seed value to a character. These\n"
338 "characters are added to the base character set @var{base_cs} to\n"
339 "form the result; @var{base_cs} defaults to the empty set.\n"
341 #define FUNC_NAME s_scm_char_set_unfold_x
345 SCM_VALIDATE_PROC (1, p
);
346 SCM_VALIDATE_PROC (2, f
);
347 SCM_VALIDATE_PROC (3, g
);
348 SCM_VALIDATE_SMOB (5, base_cs
, charset
);
350 tmp
= scm_call_1 (p
, seed
);
351 while (scm_is_false (tmp
))
353 SCM ch
= scm_call_1 (f
, seed
);
355 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f
));
356 SCM_CHARSET_SET (base_cs
, SCM_CHAR (ch
));
358 seed
= scm_call_1 (g
, seed
);
359 tmp
= scm_call_1 (p
, seed
);
366 SCM_DEFINE (scm_char_set_for_each
, "char-set-for-each", 2, 0, 0,
368 "Apply @var{proc} to every character in the character set\n"
369 "@var{cs}. The return value is not specified.")
370 #define FUNC_NAME s_scm_char_set_for_each
374 SCM_VALIDATE_PROC (1, proc
);
375 SCM_VALIDATE_SMOB (2, cs
, charset
);
377 for (k
= 0; k
< SCM_CHARSET_SIZE
; k
++)
378 if (SCM_CHARSET_GET (cs
, k
))
379 scm_call_1 (proc
, SCM_MAKE_CHAR (k
));
380 return SCM_UNSPECIFIED
;
385 SCM_DEFINE (scm_char_set_map
, "char-set-map", 2, 0, 0,
387 "Map the procedure @var{proc} over every character in @var{cs}.\n"
388 "@var{proc} must be a character -> character procedure.")
389 #define FUNC_NAME s_scm_char_set_map
394 SCM_VALIDATE_PROC (1, proc
);
395 SCM_VALIDATE_SMOB (2, cs
, charset
);
397 result
= make_char_set (FUNC_NAME
);
398 for (k
= 0; k
< SCM_CHARSET_SIZE
; k
++)
399 if (SCM_CHARSET_GET (cs
, k
))
401 SCM ch
= scm_call_1 (proc
, SCM_MAKE_CHAR (k
));
403 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (proc
));
404 SCM_CHARSET_SET (result
, SCM_CHAR (ch
));
411 SCM_DEFINE (scm_char_set_copy
, "char-set-copy", 1, 0, 0,
413 "Return a newly allocated character set containing all\n"
414 "characters in @var{cs}.")
415 #define FUNC_NAME s_scm_char_set_copy
421 SCM_VALIDATE_SMOB (1, cs
, charset
);
422 ret
= make_char_set (FUNC_NAME
);
423 p1
= (long *) SCM_SMOB_DATA (cs
);
424 p2
= (long *) SCM_SMOB_DATA (ret
);
425 for (k
= 0; k
< LONGS_PER_CHARSET
; k
++)
432 SCM_DEFINE (scm_char_set
, "char-set", 0, 0, 1,
434 "Return a character set containing all given characters.")
435 #define FUNC_NAME s_scm_char_set
441 SCM_VALIDATE_REST_ARGUMENT (rest
);
442 cs
= make_char_set (FUNC_NAME
);
443 p
= (long *) SCM_SMOB_DATA (cs
);
444 while (!SCM_NULLP (rest
))
448 SCM_VALIDATE_CHAR_COPY (argnum
, SCM_CAR (rest
), c
);
450 rest
= SCM_CDR (rest
);
451 p
[c
/ SCM_BITS_PER_LONG
] |= 1L << (c
% SCM_BITS_PER_LONG
);
458 SCM_DEFINE (scm_list_to_char_set
, "list->char-set", 1, 1, 0,
459 (SCM list
, SCM base_cs
),
460 "Convert the character list @var{list} to a character set. If\n"
461 "the character set @var{base_cs} is given, the character in this\n"
462 "set are also included in the result.")
463 #define FUNC_NAME s_scm_list_to_char_set
468 SCM_VALIDATE_LIST (1, list
);
469 if (SCM_UNBNDP (base_cs
))
470 cs
= make_char_set (FUNC_NAME
);
473 SCM_VALIDATE_SMOB (2, base_cs
, charset
);
474 cs
= scm_char_set_copy (base_cs
);
476 p
= (long *) SCM_SMOB_DATA (cs
);
477 while (!SCM_NULLP (list
))
479 SCM chr
= SCM_CAR (list
);
482 SCM_VALIDATE_CHAR_COPY (0, chr
, c
);
483 list
= SCM_CDR (list
);
485 p
[c
/ SCM_BITS_PER_LONG
] |= 1L << (c
% SCM_BITS_PER_LONG
);
492 SCM_DEFINE (scm_list_to_char_set_x
, "list->char-set!", 2, 0, 0,
493 (SCM list
, SCM base_cs
),
494 "Convert the character list @var{list} to a character set. The\n"
495 "characters are added to @var{base_cs} and @var{base_cs} is\n"
497 #define FUNC_NAME s_scm_list_to_char_set_x
501 SCM_VALIDATE_LIST (1, list
);
502 SCM_VALIDATE_SMOB (2, base_cs
, charset
);
503 p
= (long *) SCM_SMOB_DATA (base_cs
);
504 while (!SCM_NULLP (list
))
506 SCM chr
= SCM_CAR (list
);
509 SCM_VALIDATE_CHAR_COPY (0, chr
, c
);
510 list
= SCM_CDR (list
);
512 p
[c
/ SCM_BITS_PER_LONG
] |= 1L << (c
% SCM_BITS_PER_LONG
);
519 SCM_DEFINE (scm_string_to_char_set
, "string->char-set", 1, 1, 0,
520 (SCM str
, SCM base_cs
),
521 "Convert the string @var{str} to a character set. If the\n"
522 "character set @var{base_cs} is given, the characters in this\n"
523 "set are also included in the result.")
524 #define FUNC_NAME s_scm_string_to_char_set
531 SCM_VALIDATE_STRING (1, str
);
532 if (SCM_UNBNDP (base_cs
))
533 cs
= make_char_set (FUNC_NAME
);
536 SCM_VALIDATE_SMOB (2, base_cs
, charset
);
537 cs
= scm_char_set_copy (base_cs
);
539 p
= (long *) SCM_SMOB_DATA (cs
);
540 s
= scm_i_string_chars (str
);
541 len
= scm_i_string_length (str
);
545 p
[c
/ SCM_BITS_PER_LONG
] |= 1L << (c
% SCM_BITS_PER_LONG
);
547 scm_remember_upto_here_1 (str
);
553 SCM_DEFINE (scm_string_to_char_set_x
, "string->char-set!", 2, 0, 0,
554 (SCM str
, SCM base_cs
),
555 "Convert the string @var{str} to a character set. The\n"
556 "characters from the string are added to @var{base_cs}, and\n"
557 "@var{base_cs} is returned.")
558 #define FUNC_NAME s_scm_string_to_char_set_x
564 SCM_VALIDATE_STRING (1, str
);
565 SCM_VALIDATE_SMOB (2, base_cs
, charset
);
566 p
= (long *) SCM_SMOB_DATA (base_cs
);
567 s
= scm_i_string_chars (str
);
568 len
= scm_i_string_length (str
);
572 p
[c
/ SCM_BITS_PER_LONG
] |= 1L << (c
% SCM_BITS_PER_LONG
);
574 scm_remember_upto_here_1 (str
);
580 SCM_DEFINE (scm_char_set_filter
, "char-set-filter", 2, 1, 0,
581 (SCM pred
, SCM cs
, SCM base_cs
),
582 "Return a character set containing every character from @var{cs}\n"
583 "so that it satisfies @var{pred}. If provided, the characters\n"
584 "from @var{base_cs} are added to the result.")
585 #define FUNC_NAME s_scm_char_set_filter
591 SCM_VALIDATE_PROC (1, pred
);
592 SCM_VALIDATE_SMOB (2, cs
, charset
);
593 if (!SCM_UNBNDP (base_cs
))
595 SCM_VALIDATE_SMOB (3, base_cs
, charset
);
596 ret
= scm_char_set_copy (base_cs
);
599 ret
= make_char_set (FUNC_NAME
);
600 p
= (long *) SCM_SMOB_DATA (ret
);
601 for (k
= 0; k
< SCM_CHARSET_SIZE
; k
++)
603 if (SCM_CHARSET_GET (cs
, k
))
605 SCM res
= scm_call_1 (pred
, SCM_MAKE_CHAR (k
));
607 if (scm_is_true (res
))
608 p
[k
/ SCM_BITS_PER_LONG
] |= 1L << (k
% SCM_BITS_PER_LONG
);
616 SCM_DEFINE (scm_char_set_filter_x
, "char-set-filter!", 3, 0, 0,
617 (SCM pred
, SCM cs
, SCM base_cs
),
618 "Return a character set containing every character from @var{cs}\n"
619 "so that it satisfies @var{pred}. The characters are added to\n"
620 "@var{base_cs} and @var{base_cs} is returned.")
621 #define FUNC_NAME s_scm_char_set_filter_x
626 SCM_VALIDATE_PROC (1, pred
);
627 SCM_VALIDATE_SMOB (2, cs
, charset
);
628 SCM_VALIDATE_SMOB (3, base_cs
, charset
);
629 p
= (long *) SCM_SMOB_DATA (base_cs
);
630 for (k
= 0; k
< SCM_CHARSET_SIZE
; k
++)
632 if (SCM_CHARSET_GET (cs
, k
))
634 SCM res
= scm_call_1 (pred
, SCM_MAKE_CHAR (k
));
636 if (scm_is_true (res
))
637 p
[k
/ SCM_BITS_PER_LONG
] |= 1L << (k
% SCM_BITS_PER_LONG
);
645 SCM_DEFINE (scm_ucs_range_to_char_set
, "ucs-range->char-set", 2, 2, 0,
646 (SCM lower
, SCM upper
, SCM error
, SCM base_cs
),
647 "Return a character set containing all characters whose\n"
648 "character codes lie in the half-open range\n"
649 "[@var{lower},@var{upper}).\n"
651 "If @var{error} is a true value, an error is signalled if the\n"
652 "specified range contains characters which are not contained in\n"
653 "the implemented character range. If @var{error} is @code{#f},\n"
654 "these characters are silently left out of the resultung\n"
657 "The characters in @var{base_cs} are added to the result, if\n"
659 #define FUNC_NAME s_scm_ucs_range_to_char_set
662 size_t clower
, cupper
;
665 clower
= scm_to_size_t (lower
);
666 cupper
= scm_to_size_t (upper
);
667 SCM_ASSERT_RANGE (2, upper
, cupper
>= clower
);
668 if (!SCM_UNBNDP (error
))
670 if (scm_is_true (error
))
672 SCM_ASSERT_RANGE (1, lower
, clower
<= SCM_CHARSET_SIZE
);
673 SCM_ASSERT_RANGE (2, upper
, cupper
<= SCM_CHARSET_SIZE
);
676 if (clower
> SCM_CHARSET_SIZE
)
677 clower
= SCM_CHARSET_SIZE
;
678 if (cupper
> SCM_CHARSET_SIZE
)
679 cupper
= SCM_CHARSET_SIZE
;
680 if (SCM_UNBNDP (base_cs
))
681 cs
= make_char_set (FUNC_NAME
);
684 SCM_VALIDATE_SMOB (4, base_cs
, charset
);
685 cs
= scm_char_set_copy (base_cs
);
687 p
= (long *) SCM_SMOB_DATA (cs
);
688 while (clower
< cupper
)
690 p
[clower
/ SCM_BITS_PER_LONG
] |= 1L << (clower
% SCM_BITS_PER_LONG
);
698 SCM_DEFINE (scm_ucs_range_to_char_set_x
, "ucs-range->char-set!", 4, 0, 0,
699 (SCM lower
, SCM upper
, SCM error
, SCM base_cs
),
700 "Return a character set containing all characters whose\n"
701 "character codes lie in the half-open range\n"
702 "[@var{lower},@var{upper}).\n"
704 "If @var{error} is a true value, an error is signalled if the\n"
705 "specified range contains characters which are not contained in\n"
706 "the implemented character range. If @var{error} is @code{#f},\n"
707 "these characters are silently left out of the resultung\n"
710 "The characters are added to @var{base_cs} and @var{base_cs} is\n"
712 #define FUNC_NAME s_scm_ucs_range_to_char_set_x
714 size_t clower
, cupper
;
717 clower
= scm_to_size_t (lower
);
718 cupper
= scm_to_size_t (upper
);
719 SCM_ASSERT_RANGE (2, upper
, cupper
>= clower
);
720 if (scm_is_true (error
))
722 SCM_ASSERT_RANGE (1, lower
, clower
<= SCM_CHARSET_SIZE
);
723 SCM_ASSERT_RANGE (2, upper
, cupper
<= SCM_CHARSET_SIZE
);
725 if (clower
> SCM_CHARSET_SIZE
)
726 clower
= SCM_CHARSET_SIZE
;
727 if (cupper
> SCM_CHARSET_SIZE
)
728 cupper
= SCM_CHARSET_SIZE
;
729 p
= (long *) SCM_SMOB_DATA (base_cs
);
730 while (clower
< cupper
)
732 p
[clower
/ SCM_BITS_PER_LONG
] |= 1L << (clower
% SCM_BITS_PER_LONG
);
739 SCM_DEFINE (scm_to_char_set
, "->char-set", 1, 0, 0,
741 "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.")
742 #define FUNC_NAME s_scm_to_char_set
744 if (scm_is_string (x
))
745 return scm_string_to_char_set (x
, SCM_UNDEFINED
);
746 else if (SCM_CHARP (x
))
747 return scm_char_set (scm_list_1 (x
));
748 else if (SCM_SMOB_PREDICATE (scm_tc16_charset
, x
))
751 scm_wrong_type_arg (NULL
, 0, x
);
755 SCM_DEFINE (scm_char_set_size
, "char-set-size", 1, 0, 0,
757 "Return the number of elements in character set @var{cs}.")
758 #define FUNC_NAME s_scm_char_set_size
762 SCM_VALIDATE_SMOB (1, cs
, charset
);
763 for (k
= 0; k
< SCM_CHARSET_SIZE
; k
++)
764 if (SCM_CHARSET_GET (cs
, k
))
766 return SCM_I_MAKINUM (count
);
771 SCM_DEFINE (scm_char_set_count
, "char-set-count", 2, 0, 0,
773 "Return the number of the elements int the character set\n"
774 "@var{cs} which satisfy the predicate @var{pred}.")
775 #define FUNC_NAME s_scm_char_set_count
779 SCM_VALIDATE_PROC (1, pred
);
780 SCM_VALIDATE_SMOB (2, cs
, charset
);
782 for (k
= 0; k
< SCM_CHARSET_SIZE
; k
++)
783 if (SCM_CHARSET_GET (cs
, k
))
785 SCM res
= scm_call_1 (pred
, SCM_MAKE_CHAR (k
));
786 if (scm_is_true (res
))
789 return SCM_I_MAKINUM (count
);
794 SCM_DEFINE (scm_char_set_to_list
, "char-set->list", 1, 0, 0,
796 "Return a list containing the elements of the character set\n"
798 #define FUNC_NAME s_scm_char_set_to_list
801 SCM result
= SCM_EOL
;
803 SCM_VALIDATE_SMOB (1, cs
, charset
);
804 for (k
= SCM_CHARSET_SIZE
; k
> 0; k
--)
805 if (SCM_CHARSET_GET (cs
, k
- 1))
806 result
= scm_cons (SCM_MAKE_CHAR (k
- 1), result
);
812 SCM_DEFINE (scm_char_set_to_string
, "char-set->string", 1, 0, 0,
814 "Return a string containing the elements of the character set\n"
815 "@var{cs}. The order in which the characters are placed in the\n"
816 "string is not defined.")
817 #define FUNC_NAME s_scm_char_set_to_string
825 SCM_VALIDATE_SMOB (1, cs
, charset
);
826 for (k
= 0; k
< SCM_CHARSET_SIZE
; k
++)
827 if (SCM_CHARSET_GET (cs
, k
))
829 result
= scm_i_make_string (count
, &p
);
830 for (k
= 0; k
< SCM_CHARSET_SIZE
; k
++)
831 if (SCM_CHARSET_GET (cs
, k
))
838 SCM_DEFINE (scm_char_set_contains_p
, "char-set-contains?", 2, 0, 0,
840 "Return @code{#t} iff the character @var{ch} is contained in the\n"
841 "character set @var{cs}.")
842 #define FUNC_NAME s_scm_char_set_contains_p
844 SCM_VALIDATE_SMOB (1, cs
, charset
);
845 SCM_VALIDATE_CHAR (2, ch
);
846 return scm_from_bool (SCM_CHARSET_GET (cs
, SCM_CHAR (ch
)));
851 SCM_DEFINE (scm_char_set_every
, "char-set-every", 2, 0, 0,
853 "Return a true value if every character in the character set\n"
854 "@var{cs} satisfies the predicate @var{pred}.")
855 #define FUNC_NAME s_scm_char_set_every
858 SCM res
= SCM_BOOL_T
;
860 SCM_VALIDATE_PROC (1, pred
);
861 SCM_VALIDATE_SMOB (2, cs
, charset
);
863 for (k
= 0; k
< SCM_CHARSET_SIZE
; k
++)
864 if (SCM_CHARSET_GET (cs
, k
))
866 res
= scm_call_1 (pred
, SCM_MAKE_CHAR (k
));
867 if (scm_is_false (res
))
875 SCM_DEFINE (scm_char_set_any
, "char-set-any", 2, 0, 0,
877 "Return a true value if any character in the character set\n"
878 "@var{cs} satisfies the predicate @var{pred}.")
879 #define FUNC_NAME s_scm_char_set_any
883 SCM_VALIDATE_PROC (1, pred
);
884 SCM_VALIDATE_SMOB (2, cs
, charset
);
886 for (k
= 0; k
< SCM_CHARSET_SIZE
; k
++)
887 if (SCM_CHARSET_GET (cs
, k
))
889 SCM res
= scm_call_1 (pred
, SCM_MAKE_CHAR (k
));
890 if (scm_is_true (res
))
898 SCM_DEFINE (scm_char_set_adjoin
, "char-set-adjoin", 1, 0, 1,
900 "Add all character arguments to the first argument, which must\n"
901 "be a character set.")
902 #define FUNC_NAME s_scm_char_set_adjoin
906 SCM_VALIDATE_SMOB (1, cs
, charset
);
907 SCM_VALIDATE_REST_ARGUMENT (rest
);
908 cs
= scm_char_set_copy (cs
);
910 p
= (long *) SCM_SMOB_DATA (cs
);
911 while (!SCM_NULLP (rest
))
913 SCM chr
= SCM_CAR (rest
);
916 SCM_VALIDATE_CHAR_COPY (1, chr
, c
);
917 rest
= SCM_CDR (rest
);
919 p
[c
/ SCM_BITS_PER_LONG
] |= 1L << (c
% SCM_BITS_PER_LONG
);
926 SCM_DEFINE (scm_char_set_delete
, "char-set-delete", 1, 0, 1,
928 "Delete all character arguments from the first argument, which\n"
929 "must be a character set.")
930 #define FUNC_NAME s_scm_char_set_delete
934 SCM_VALIDATE_SMOB (1, cs
, charset
);
935 SCM_VALIDATE_REST_ARGUMENT (rest
);
936 cs
= scm_char_set_copy (cs
);
938 p
= (long *) SCM_SMOB_DATA (cs
);
939 while (!SCM_NULLP (rest
))
941 SCM chr
= SCM_CAR (rest
);
944 SCM_VALIDATE_CHAR_COPY (1, chr
, c
);
945 rest
= SCM_CDR (rest
);
947 p
[c
/ SCM_BITS_PER_LONG
] &= ~(1L << (c
% SCM_BITS_PER_LONG
));
954 SCM_DEFINE (scm_char_set_adjoin_x
, "char-set-adjoin!", 1, 0, 1,
956 "Add all character arguments to the first argument, which must\n"
957 "be a character set.")
958 #define FUNC_NAME s_scm_char_set_adjoin_x
962 SCM_VALIDATE_SMOB (1, cs
, charset
);
963 SCM_VALIDATE_REST_ARGUMENT (rest
);
965 p
= (long *) SCM_SMOB_DATA (cs
);
966 while (!SCM_NULLP (rest
))
968 SCM chr
= SCM_CAR (rest
);
971 SCM_VALIDATE_CHAR_COPY (1, chr
, c
);
972 rest
= SCM_CDR (rest
);
974 p
[c
/ SCM_BITS_PER_LONG
] |= 1L << (c
% SCM_BITS_PER_LONG
);
981 SCM_DEFINE (scm_char_set_delete_x
, "char-set-delete!", 1, 0, 1,
983 "Delete all character arguments from the first argument, which\n"
984 "must be a character set.")
985 #define FUNC_NAME s_scm_char_set_delete_x
989 SCM_VALIDATE_SMOB (1, cs
, charset
);
990 SCM_VALIDATE_REST_ARGUMENT (rest
);
992 p
= (long *) SCM_SMOB_DATA (cs
);
993 while (!SCM_NULLP (rest
))
995 SCM chr
= SCM_CAR (rest
);
998 SCM_VALIDATE_CHAR_COPY (1, chr
, c
);
999 rest
= SCM_CDR (rest
);
1001 p
[c
/ SCM_BITS_PER_LONG
] &= ~(1L << (c
% SCM_BITS_PER_LONG
));
1008 SCM_DEFINE (scm_char_set_complement
, "char-set-complement", 1, 0, 0,
1010 "Return the complement of the character set @var{cs}.")
1011 #define FUNC_NAME s_scm_char_set_complement
1017 SCM_VALIDATE_SMOB (1, cs
, charset
);
1019 res
= make_char_set (FUNC_NAME
);
1020 p
= (long *) SCM_SMOB_DATA (res
);
1021 q
= (long *) SCM_SMOB_DATA (cs
);
1022 for (k
= 0; k
< LONGS_PER_CHARSET
; k
++)
1029 SCM_DEFINE (scm_char_set_union
, "char-set-union", 0, 0, 1,
1031 "Return the union of all argument character sets.")
1032 #define FUNC_NAME s_scm_char_set_union
1038 SCM_VALIDATE_REST_ARGUMENT (rest
);
1040 res
= make_char_set (FUNC_NAME
);
1041 p
= (long *) SCM_SMOB_DATA (res
);
1042 while (!SCM_NULLP (rest
))
1045 SCM cs
= SCM_CAR (rest
);
1046 SCM_VALIDATE_SMOB (c
, cs
, charset
);
1048 rest
= SCM_CDR (rest
);
1050 for (k
= 0; k
< LONGS_PER_CHARSET
; k
++)
1051 p
[k
] |= ((long *) SCM_SMOB_DATA (cs
))[k
];
1058 SCM_DEFINE (scm_char_set_intersection
, "char-set-intersection", 0, 0, 1,
1060 "Return the intersection of all argument character sets.")
1061 #define FUNC_NAME s_scm_char_set_intersection
1065 SCM_VALIDATE_REST_ARGUMENT (rest
);
1067 if (SCM_NULLP (rest
))
1068 res
= make_char_set (FUNC_NAME
);
1074 res
= scm_char_set_copy (SCM_CAR (rest
));
1075 p
= (long *) SCM_SMOB_DATA (res
);
1076 rest
= SCM_CDR (rest
);
1078 while (SCM_CONSP (rest
))
1081 SCM cs
= SCM_CAR (rest
);
1084 SCM_VALIDATE_SMOB (argnum
, cs
, charset
);
1086 cs_data
= (long *) SCM_SMOB_DATA (cs
);
1087 rest
= SCM_CDR (rest
);
1088 for (k
= 0; k
< LONGS_PER_CHARSET
; k
++)
1098 SCM_DEFINE (scm_char_set_difference
, "char-set-difference", 1, 0, 1,
1099 (SCM cs1
, SCM rest
),
1100 "Return the difference of all argument character sets.")
1101 #define FUNC_NAME s_scm_char_set_difference
1107 SCM_VALIDATE_SMOB (1, cs1
, charset
);
1108 SCM_VALIDATE_REST_ARGUMENT (rest
);
1110 res
= scm_char_set_copy (cs1
);
1111 p
= (long *) SCM_SMOB_DATA (res
);
1112 while (!SCM_NULLP (rest
))
1115 SCM cs
= SCM_CAR (rest
);
1116 SCM_VALIDATE_SMOB (c
, cs
, charset
);
1118 rest
= SCM_CDR (rest
);
1120 for (k
= 0; k
< LONGS_PER_CHARSET
; k
++)
1121 p
[k
] &= ~((long *) SCM_SMOB_DATA (cs
))[k
];
1128 SCM_DEFINE (scm_char_set_xor
, "char-set-xor", 0, 0, 1,
1130 "Return the exclusive-or of all argument character sets.")
1131 #define FUNC_NAME s_scm_char_set_xor
1135 SCM_VALIDATE_REST_ARGUMENT (rest
);
1137 if (SCM_NULLP (rest
))
1138 res
= make_char_set (FUNC_NAME
);
1144 res
= scm_char_set_copy (SCM_CAR (rest
));
1145 p
= (long *) SCM_SMOB_DATA (res
);
1146 rest
= SCM_CDR (rest
);
1148 while (SCM_CONSP (rest
))
1150 SCM cs
= SCM_CAR (rest
);
1154 SCM_VALIDATE_SMOB (argnum
, cs
, charset
);
1156 cs_data
= (long *) SCM_SMOB_DATA (cs
);
1157 rest
= SCM_CDR (rest
);
1159 for (k
= 0; k
< LONGS_PER_CHARSET
; k
++)
1168 SCM_DEFINE (scm_char_set_diff_plus_intersection
, "char-set-diff+intersection", 1, 0, 1,
1169 (SCM cs1
, SCM rest
),
1170 "Return the difference and the intersection of all argument\n"
1172 #define FUNC_NAME s_scm_char_set_diff_plus_intersection
1178 SCM_VALIDATE_SMOB (1, cs1
, charset
);
1179 SCM_VALIDATE_REST_ARGUMENT (rest
);
1181 res1
= scm_char_set_copy (cs1
);
1182 res2
= make_char_set (FUNC_NAME
);
1183 p
= (long *) SCM_SMOB_DATA (res1
);
1184 q
= (long *) SCM_SMOB_DATA (res2
);
1185 while (!SCM_NULLP (rest
))
1188 SCM cs
= SCM_CAR (rest
);
1191 SCM_VALIDATE_SMOB (c
, cs
, charset
);
1193 r
= (long *) SCM_SMOB_DATA (cs
);
1195 for (k
= 0; k
< LONGS_PER_CHARSET
; k
++)
1197 q
[k
] |= p
[k
] & r
[k
];
1200 rest
= SCM_CDR (rest
);
1202 return scm_values (scm_list_2 (res1
, res2
));
1207 SCM_DEFINE (scm_char_set_complement_x
, "char-set-complement!", 1, 0, 0,
1209 "Return the complement of the character set @var{cs}.")
1210 #define FUNC_NAME s_scm_char_set_complement_x
1215 SCM_VALIDATE_SMOB (1, cs
, charset
);
1216 p
= (long *) SCM_SMOB_DATA (cs
);
1217 for (k
= 0; k
< LONGS_PER_CHARSET
; k
++)
1224 SCM_DEFINE (scm_char_set_union_x
, "char-set-union!", 1, 0, 1,
1225 (SCM cs1
, SCM rest
),
1226 "Return the union of all argument character sets.")
1227 #define FUNC_NAME s_scm_char_set_union_x
1232 SCM_VALIDATE_SMOB (1, cs1
, charset
);
1233 SCM_VALIDATE_REST_ARGUMENT (rest
);
1235 p
= (long *) SCM_SMOB_DATA (cs1
);
1236 while (!SCM_NULLP (rest
))
1239 SCM cs
= SCM_CAR (rest
);
1240 SCM_VALIDATE_SMOB (c
, cs
, charset
);
1242 rest
= SCM_CDR (rest
);
1244 for (k
= 0; k
< LONGS_PER_CHARSET
; k
++)
1245 p
[k
] |= ((long *) SCM_SMOB_DATA (cs
))[k
];
1252 SCM_DEFINE (scm_char_set_intersection_x
, "char-set-intersection!", 1, 0, 1,
1253 (SCM cs1
, SCM rest
),
1254 "Return the intersection of all argument character sets.")
1255 #define FUNC_NAME s_scm_char_set_intersection_x
1260 SCM_VALIDATE_SMOB (1, cs1
, charset
);
1261 SCM_VALIDATE_REST_ARGUMENT (rest
);
1263 p
= (long *) SCM_SMOB_DATA (cs1
);
1264 while (!SCM_NULLP (rest
))
1267 SCM cs
= SCM_CAR (rest
);
1268 SCM_VALIDATE_SMOB (c
, cs
, charset
);
1270 rest
= SCM_CDR (rest
);
1272 for (k
= 0; k
< LONGS_PER_CHARSET
; k
++)
1273 p
[k
] &= ((long *) SCM_SMOB_DATA (cs
))[k
];
1280 SCM_DEFINE (scm_char_set_difference_x
, "char-set-difference!", 1, 0, 1,
1281 (SCM cs1
, SCM rest
),
1282 "Return the difference of all argument character sets.")
1283 #define FUNC_NAME s_scm_char_set_difference_x
1288 SCM_VALIDATE_SMOB (1, cs1
, charset
);
1289 SCM_VALIDATE_REST_ARGUMENT (rest
);
1291 p
= (long *) SCM_SMOB_DATA (cs1
);
1292 while (!SCM_NULLP (rest
))
1295 SCM cs
= SCM_CAR (rest
);
1296 SCM_VALIDATE_SMOB (c
, cs
, charset
);
1298 rest
= SCM_CDR (rest
);
1300 for (k
= 0; k
< LONGS_PER_CHARSET
; k
++)
1301 p
[k
] &= ~((long *) SCM_SMOB_DATA (cs
))[k
];
1308 SCM_DEFINE (scm_char_set_xor_x
, "char-set-xor!", 1, 0, 1,
1309 (SCM cs1
, SCM rest
),
1310 "Return the exclusive-or of all argument character sets.")
1311 #define FUNC_NAME s_scm_char_set_xor_x
1313 /* a side-effecting variant should presumably give consistent results:
1314 (define a (char-set #\a))
1315 (char-set-xor a a a) -> char set #\a
1316 (char-set-xor! a a a) -> char set #\a
1318 return scm_char_set_xor (scm_cons (cs1
, rest
));
1321 /* this would give (char-set-xor! a a a) -> empty char set. */
1325 SCM_VALIDATE_SMOB (1, cs1
, charset
);
1326 SCM_VALIDATE_REST_ARGUMENT (rest
);
1328 p
= (long *) SCM_SMOB_DATA (cs1
);
1329 while (!SCM_NULLP (rest
))
1332 SCM cs
= SCM_CAR (rest
);
1333 SCM_VALIDATE_SMOB (c
, cs
, charset
);
1335 rest
= SCM_CDR (rest
);
1337 for (k
= 0; k
< LONGS_PER_CHARSET
; k
++)
1338 p
[k
] ^= ((long *) SCM_SMOB_DATA (cs
))[k
];
1346 SCM_DEFINE (scm_char_set_diff_plus_intersection_x
, "char-set-diff+intersection!", 2, 0, 1,
1347 (SCM cs1
, SCM cs2
, SCM rest
),
1348 "Return the difference and the intersection of all argument\n"
1350 #define FUNC_NAME s_scm_char_set_diff_plus_intersection_x
1356 SCM_VALIDATE_SMOB (1, cs1
, charset
);
1357 SCM_VALIDATE_SMOB (2, cs2
, charset
);
1358 SCM_VALIDATE_REST_ARGUMENT (rest
);
1360 p
= (long *) SCM_SMOB_DATA (cs1
);
1361 q
= (long *) SCM_SMOB_DATA (cs2
);
1364 /* (char-set-diff+intersection! a a ...): can't share storage,
1365 but we know the answer without checking for further
1367 return scm_values (scm_list_2 (make_char_set (FUNC_NAME
), cs1
));
1369 for (k
= 0; k
< LONGS_PER_CHARSET
; k
++)
1376 while (!SCM_NULLP (rest
))
1378 SCM cs
= SCM_CAR (rest
);
1381 SCM_VALIDATE_SMOB (c
, cs
, charset
);
1383 r
= (long *) SCM_SMOB_DATA (cs
);
1385 for (k
= 0; k
< LONGS_PER_CHARSET
; k
++)
1387 q
[k
] |= p
[k
] & r
[k
];
1390 rest
= SCM_CDR (rest
);
1392 return scm_values (scm_list_2 (cs1
, cs2
));
1396 SCM scm_char_set_lower_case
;
1397 SCM scm_char_set_upper_case
;
1398 SCM scm_char_set_title_case
;
1399 SCM scm_char_set_letter
;
1400 SCM scm_char_set_digit
;
1401 SCM scm_char_set_letter_and_digit
;
1402 SCM scm_char_set_graphic
;
1403 SCM scm_char_set_printing
;
1404 SCM scm_char_set_whitespace
;
1405 SCM scm_char_set_iso_control
;
1406 SCM scm_char_set_punctuation
;
1407 SCM scm_char_set_symbol
;
1408 SCM scm_char_set_hex_digit
;
1409 SCM scm_char_set_blank
;
1410 SCM scm_char_set_ascii
;
1411 SCM scm_char_set_empty
;
1412 SCM scm_char_set_full
;
1415 make_predset (int (*pred
) (int))
1418 SCM cs
= make_char_set (NULL
);
1419 for (ch
= 0; ch
< 256; ch
++)
1421 SCM_CHARSET_SET (cs
, ch
);
1426 define_predset (const char *name
, int (*pred
) (int))
1428 SCM cs
= make_predset (pred
);
1429 scm_c_define (name
, cs
);
1430 return scm_permanent_object (cs
);
1434 make_strset (const char *str
)
1436 SCM cs
= make_char_set (NULL
);
1439 SCM_CHARSET_SET (cs
, *str
);
1446 define_strset (const char *name
, const char *str
)
1448 SCM cs
= make_strset (str
);
1449 scm_c_define (name
, cs
);
1450 return scm_permanent_object (cs
);
1453 static int false (int ch
) { return 0; }
1454 static int true (int ch
) { return 1; }
1457 scm_init_srfi_14 (void)
1459 scm_tc16_charset
= scm_make_smob_type ("character-set",
1461 scm_set_smob_free (scm_tc16_charset
, charset_free
);
1462 scm_set_smob_print (scm_tc16_charset
, charset_print
);
1464 scm_char_set_upper_case
= define_predset ("char-set:upper-case", isupper
);
1465 scm_char_set_lower_case
= define_predset ("char-set:lower-case", islower
);
1466 scm_char_set_title_case
= define_predset ("char-set:title-case", false);
1467 scm_char_set_letter
= define_predset ("char-set:letter", isalpha
);
1468 scm_char_set_digit
= define_predset ("char-set:digit", isdigit
);
1469 scm_char_set_letter_and_digit
= define_predset ("char-set:letter+digit",
1471 scm_char_set_graphic
= define_predset ("char-set:graphic", isgraph
);
1472 scm_char_set_printing
= define_predset ("char-set:printing", isprint
);
1473 scm_char_set_whitespace
= define_predset ("char-set:whitespace", isspace
);
1474 scm_char_set_iso_control
= define_predset ("char-set:iso-control", iscntrl
);
1475 scm_char_set_punctuation
= define_predset ("char-set:punctuation", ispunct
);
1476 scm_char_set_symbol
= define_strset ("char-set:symbol", "$+<=>^`|~");
1477 scm_char_set_hex_digit
= define_predset ("char-set:hex-digit", isxdigit
);
1478 scm_char_set_blank
= define_strset ("char-set:blank", " \t");
1479 scm_char_set_ascii
= define_predset ("char-set:ascii", isascii
);
1480 scm_char_set_empty
= define_predset ("char-set:empty", false);
1481 scm_char_set_full
= define_predset ("char-set:full", true);
1483 #include "libguile/srfi-14.x"
1486 /* End of srfi-14.c. */