1 /* srfi-14.c --- SRFI-14 procedures for Guile
3 * Copyright (C) 2001, 2004, 2006, 2007, 2009 Free Software Foundation, Inc.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public License
7 * as published by the Free Software Foundation; either version 3 of
8 * the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
30 #include "libguile/srfi-14.h"
31 #include "libguile/strings.h"
32 #include "libguile/chars.h"
34 /* Include the pre-computed standard charset data. */
35 #include "libguile/srfi-14.i.c"
37 scm_t_char_range cs_full_ranges
[] = {
38 {0x0000, SCM_CODEPOINT_SURROGATE_START
- 1}
40 {SCM_CODEPOINT_SURROGATE_END
+ 1, SCM_CODEPOINT_MAX
}
43 scm_t_char_set cs_full
= {
49 #define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset))
51 #define SCM_CHARSET_SET(cs, idx) \
52 scm_i_charset_set (SCM_CHARSET_DATA (cs), idx)
54 #define SCM_CHARSET_UNSET(cs, idx) \
55 scm_i_charset_unset (SCM_CHARSET_DATA (cs), idx)
57 /* Smob type code for character sets. */
58 int scm_tc16_charset
= 0;
59 int scm_tc16_charset_cursor
= 0;
61 /* True if N exists in charset CS. */
63 scm_i_charset_get (scm_t_char_set
*cs
, scm_t_wchar n
)
70 if (cs
->ranges
[i
].lo
<= n
&& n
<= cs
->ranges
[i
].hi
)
78 /* Put N into charset CS. */
80 scm_i_charset_set (scm_t_char_set
*cs
, scm_t_wchar n
)
90 /* Already in this range */
91 if (cs
->ranges
[i
].lo
<= n
&& n
<= cs
->ranges
[i
].hi
)
96 if (n
== cs
->ranges
[i
].lo
- 1)
98 /* This char is one below the current range. */
99 if (i
> 0 && cs
->ranges
[i
- 1].hi
+ 1 == n
)
101 /* It is also one above the previous range. */
102 /* This is an impossible condition: in the previous
103 iteration, the test for 'one above the current range'
104 should already have inserted the character here. */
109 /* Expand the range down by one. */
110 cs
->ranges
[i
].lo
= n
;
114 else if (n
== cs
->ranges
[i
].hi
+ 1)
116 /* This char is one above the current range. */
117 if (i
< len
- 1 && cs
->ranges
[i
+ 1].lo
- 1 == n
)
119 /* It is also one below the next range, so combine them. */
120 cs
->ranges
[i
].hi
= cs
->ranges
[i
+ 1].hi
;
122 memmove (cs
->ranges
+ (i
+ 1), cs
->ranges
+ (i
+ 2),
123 sizeof (scm_t_char_range
) * (len
- i
- 2));
124 cs
->ranges
= scm_gc_realloc (cs
->ranges
,
125 sizeof (scm_t_char_range
) * len
,
126 sizeof (scm_t_char_range
) * (len
-
134 /* Expand the range up by one. */
135 cs
->ranges
[i
].hi
= n
;
139 else if (n
< cs
->ranges
[i
].lo
- 1)
141 /* This is a new range below the current one. */
142 cs
->ranges
= scm_gc_realloc (cs
->ranges
,
143 sizeof (scm_t_char_range
) * len
,
144 sizeof (scm_t_char_range
) * (len
+ 1),
146 memmove (cs
->ranges
+ (i
+ 1), cs
->ranges
+ i
,
147 sizeof (scm_t_char_range
) * (len
- i
));
148 cs
->ranges
[i
].lo
= n
;
149 cs
->ranges
[i
].hi
= n
;
157 /* This is a new range above all previous ranges. */
160 cs
->ranges
= scm_gc_malloc (sizeof (scm_t_char_range
), "character-set");
164 cs
->ranges
= scm_gc_realloc (cs
->ranges
,
165 sizeof (scm_t_char_range
) * len
,
166 sizeof (scm_t_char_range
) * (len
+ 1),
169 cs
->ranges
[len
].lo
= n
;
170 cs
->ranges
[len
].hi
= n
;
176 /* Put LO to HI inclusive into charset CS. */
178 scm_i_charset_set_range (scm_t_char_set
*cs
, scm_t_wchar lo
, scm_t_wchar hi
)
185 /* Already in this range */
186 if (cs
->ranges
[i
].lo
<= lo
&& cs
->ranges
[i
].hi
>= hi
)
192 if (cs
->ranges
[i
].lo
- 1 > hi
)
194 /* Add a new range below the current one. */
195 cs
->ranges
= scm_gc_realloc (cs
->ranges
,
196 sizeof (scm_t_char_range
) * cs
->len
,
197 sizeof (scm_t_char_range
) * (cs
->len
+ 1),
199 memmove (cs
->ranges
+ (i
+ 1), cs
->ranges
+ i
,
200 sizeof (scm_t_char_range
) * (cs
->len
- i
));
201 cs
->ranges
[i
].lo
= lo
;
202 cs
->ranges
[i
].hi
= hi
;
207 /* cur: +---+ or +---+ or +---+
208 new: +---+ +---+ +---+
210 if (cs
->ranges
[i
].lo
> lo
211 && (cs
->ranges
[i
].lo
- 1 <= hi
&& cs
->ranges
[i
].hi
>= hi
))
213 cs
->ranges
[i
].lo
= lo
;
217 /* cur: +---+ or +---+ or +---+
218 new: +---+ +---+ +---+
220 else if (cs
->ranges
[i
].hi
+ 1 >= lo
&& cs
->ranges
[i
].hi
< hi
)
222 if (cs
->ranges
[i
].lo
> lo
)
223 cs
->ranges
[i
].lo
= lo
;
224 if (cs
->ranges
[i
].hi
< hi
)
225 cs
->ranges
[i
].hi
= hi
;
226 while (i
< cs
->len
- 1)
231 if (cs
->ranges
[i
+ 1].lo
- 1 > hi
)
234 /* cur: --+ +---+ or --+ +---+ or --+ +--+
235 new: -----+ ------+ ---------+
237 /* Combine this range with the previous one. */
238 if (cs
->ranges
[i
+ 1].hi
> hi
)
239 cs
->ranges
[i
].hi
= cs
->ranges
[i
+ 1].hi
;
241 memmove (cs
->ranges
+ i
+ 1, cs
->ranges
+ i
+ 2,
242 sizeof (scm_t_char_range
) * (cs
->len
- i
- 2));
243 cs
->ranges
= scm_gc_realloc (cs
->ranges
,
244 sizeof (scm_t_char_range
) * cs
->len
,
245 sizeof (scm_t_char_range
) * (cs
->len
- 1),
254 /* This is a new range above all previous ranges. */
257 cs
->ranges
= scm_gc_malloc (sizeof (scm_t_char_range
), "character-set");
261 cs
->ranges
= scm_gc_realloc (cs
->ranges
,
262 sizeof (scm_t_char_range
) * cs
->len
,
263 sizeof (scm_t_char_range
) * (cs
->len
+ 1),
267 cs
->ranges
[cs
->len
- 1].lo
= lo
;
268 cs
->ranges
[cs
->len
- 1].hi
= hi
;
273 /* If N is in charset CS, remove it. */
275 scm_i_charset_unset (scm_t_char_set
*cs
, scm_t_wchar n
)
285 if (n
< cs
->ranges
[i
].lo
)
286 /* Not in this set. */
289 if (n
== cs
->ranges
[i
].lo
&& n
== cs
->ranges
[i
].hi
)
291 /* Remove this one-character range. */
294 scm_gc_free (cs
->ranges
,
295 sizeof (scm_t_char_range
) * cs
->len
,
301 else if (i
< len
- 1)
303 memmove (cs
->ranges
+ i
, cs
->ranges
+ (i
+ 1),
304 sizeof (scm_t_char_range
) * (len
- i
- 1));
305 cs
->ranges
= scm_gc_realloc (cs
->ranges
,
306 sizeof (scm_t_char_range
) * len
,
307 sizeof (scm_t_char_range
) * (len
-
313 else if (i
== len
- 1)
315 cs
->ranges
= scm_gc_realloc (cs
->ranges
,
316 sizeof (scm_t_char_range
) * len
,
317 sizeof (scm_t_char_range
) * (len
-
324 else if (n
== cs
->ranges
[i
].lo
)
326 /* Shrink this range from the left. */
327 cs
->ranges
[i
].lo
= n
+ 1;
330 else if (n
== cs
->ranges
[i
].hi
)
332 /* Shrink this range from the right. */
333 cs
->ranges
[i
].hi
= n
- 1;
336 else if (n
> cs
->ranges
[i
].lo
&& n
< cs
->ranges
[i
].hi
)
338 /* Split this range into two pieces. */
339 cs
->ranges
= scm_gc_realloc (cs
->ranges
,
340 sizeof (scm_t_char_range
) * len
,
341 sizeof (scm_t_char_range
) * (len
+ 1),
344 memmove (cs
->ranges
+ (i
+ 2), cs
->ranges
+ (i
+ 1),
345 sizeof (scm_t_char_range
) * (len
- i
- 1));
346 cs
->ranges
[i
+ 1].hi
= cs
->ranges
[i
].hi
;
347 cs
->ranges
[i
+ 1].lo
= n
+ 1;
348 cs
->ranges
[i
].hi
= n
- 1;
356 /* This value is above all ranges, so do nothing here. */
361 charsets_equal (scm_t_char_set
*a
, scm_t_char_set
*b
)
363 if (a
->len
!= b
->len
)
366 if (memcmp (a
->ranges
, b
->ranges
, sizeof (scm_t_char_range
) * a
->len
) != 0)
372 /* Return true if every character in A is also in B. */
374 charsets_leq (scm_t_char_set
*a
, scm_t_char_set
*b
)
377 scm_t_wchar alo
, ahi
;
385 alo
= a
->ranges
[i
].lo
;
386 ahi
= a
->ranges
[i
].hi
;
387 while (b
->ranges
[j
].hi
< alo
)
394 if (alo
< b
->ranges
[j
].lo
|| ahi
> b
->ranges
[j
].hi
)
402 /* Merge B into A. */
404 charsets_union (scm_t_char_set
*a
, scm_t_char_set
*b
)
407 scm_t_wchar blo
, bhi
;
415 a
->ranges
= scm_gc_malloc (sizeof (scm_t_char_range
) * b
->len
,
417 memcpy (a
->ranges
, b
->ranges
, sizeof (scm_t_char_range
) * b
->len
);
423 blo
= b
->ranges
[i
].lo
;
424 bhi
= b
->ranges
[i
].hi
;
425 scm_i_charset_set_range (a
, blo
, bhi
);
433 /* Remove elements not both in A and B from A. */
435 charsets_intersection (scm_t_char_set
*a
, scm_t_char_set
*b
)
438 scm_t_wchar blo
, bhi
, n
;
446 scm_gc_free (a
->ranges
, sizeof (scm_t_char_range
) * a
->len
,
452 c
= (scm_t_char_set
*) scm_malloc (sizeof (scm_t_char_set
));
458 blo
= b
->ranges
[i
].lo
;
459 bhi
= b
->ranges
[i
].hi
;
460 for (n
= blo
; n
<= bhi
; n
++)
461 if (scm_i_charset_get (a
, n
))
462 scm_i_charset_set (c
, n
);
465 scm_gc_free (a
->ranges
, sizeof (scm_t_char_range
) * a
->len
,
470 a
->ranges
= c
->ranges
;
477 #define SCM_ADD_RANGE(low, high) \
479 p->ranges[idx].lo = (low); \
480 p->ranges[idx++].hi = (high); \
482 #define SCM_ADD_RANGE_SKIP_SURROGATES(low, high) \
484 p->ranges[idx].lo = (low); \
485 p->ranges[idx++].hi = SCM_CODEPOINT_SURROGATE_START - 1; \
486 p->ranges[idx].lo = SCM_CODEPOINT_SURROGATE_END + 1; \
487 p->ranges[idx++].hi = (high); \
492 /* Make P the compelement of Q. */
494 charsets_complement (scm_t_char_set
*p
, scm_t_char_set
*q
)
501 /* Fill with all valid codepoints. */
503 p
->ranges
= scm_gc_malloc (sizeof (scm_t_char_range
) * 2,
505 SCM_ADD_RANGE_SKIP_SURROGATES (0, SCM_CODEPOINT_MAX
);
510 scm_gc_free (p
->ranges
, sizeof (scm_t_char_set
) * p
->len
,
513 /* Count the number of ranges needed for the output. */
515 if (q
->ranges
[0].lo
> 0)
517 if (q
->ranges
[q
->len
- 1].hi
< SCM_CODEPOINT_MAX
)
521 (scm_t_char_range
*) scm_gc_malloc (sizeof (scm_t_char_range
) * p
->len
,
523 if (q
->ranges
[0].lo
> 0)
525 if (q
->ranges
[0].lo
> SCM_CODEPOINT_SURROGATE_END
)
526 SCM_ADD_RANGE_SKIP_SURROGATES (0, q
->ranges
[0].lo
- 1);
528 SCM_ADD_RANGE (0, q
->ranges
[0].lo
- 1);
530 for (k
= 1; k
< q
->len
; k
++)
532 if (q
->ranges
[k
- 1].hi
< SCM_CODEPOINT_SURROGATE_START
533 && q
->ranges
[k
].lo
- 1 > SCM_CODEPOINT_SURROGATE_END
)
534 SCM_ADD_RANGE_SKIP_SURROGATES (q
->ranges
[k
- 1].hi
+ 1, q
->ranges
[k
].lo
- 1);
536 SCM_ADD_RANGE (q
->ranges
[k
- 1].hi
+ 1, q
->ranges
[k
].lo
- 1);
538 if (q
->ranges
[q
->len
- 1].hi
< SCM_CODEPOINT_MAX
)
540 if (q
->ranges
[q
->len
- 1].hi
< SCM_CODEPOINT_SURROGATE_START
)
541 SCM_ADD_RANGE_SKIP_SURROGATES (q
->ranges
[q
->len
- 1].hi
+ 1, SCM_CODEPOINT_MAX
);
543 SCM_ADD_RANGE (q
->ranges
[q
->len
- 1].hi
+ 1, SCM_CODEPOINT_MAX
);
548 #undef SCM_ADD_RANGE_SKIP_SURROGATES
550 /* Replace A with elements only found in one of A or B. */
552 charsets_xor (scm_t_char_set
*a
, scm_t_char_set
*b
)
555 scm_t_wchar blo
, bhi
, n
;
565 (scm_t_char_range
*) scm_gc_malloc (sizeof (scm_t_char_range
) *
566 b
->len
, "character-set");
568 memcpy (a
->ranges
, b
->ranges
, sizeof (scm_t_char_range
) * a
->len
);
574 blo
= b
->ranges
[i
].lo
;
575 bhi
= b
->ranges
[i
].hi
;
576 for (n
= blo
; n
<= bhi
; n
++)
578 if (scm_i_charset_get (a
, n
))
579 scm_i_charset_unset (a
, n
);
581 scm_i_charset_set (a
, n
);
589 /* Smob print hook for character sets. */
591 charset_print (SCM charset
, SCM port
, scm_print_state
*pstate SCM_UNUSED
)
596 const size_t max_ranges_to_print
= 50;
598 p
= SCM_CHARSET_DATA (charset
);
600 scm_puts ("#<charset {", port
);
601 for (i
= 0; i
< p
->len
; i
++)
606 scm_puts (" ", port
);
607 scm_write (SCM_MAKE_CHAR (p
->ranges
[i
].lo
), port
);
608 if (p
->ranges
[i
].lo
!= p
->ranges
[i
].hi
)
610 scm_puts ("..", port
);
611 scm_write (SCM_MAKE_CHAR (p
->ranges
[i
].hi
), port
);
613 if (i
>= max_ranges_to_print
)
615 /* Too many to print here. Quit early. */
616 scm_puts (" ...", port
);
620 scm_puts ("}>", port
);
624 /* Smob print hook for character sets cursors. */
626 charset_cursor_print (SCM cursor
, SCM port
,
627 scm_print_state
*pstate SCM_UNUSED
)
629 scm_t_char_set_cursor
*cur
;
631 cur
= (scm_t_char_set_cursor
*) SCM_SMOB_DATA (cursor
);
633 scm_puts ("#<charset-cursor ", port
);
634 if (cur
->range
== (size_t) (-1))
635 scm_puts ("(empty)", port
);
638 scm_write (scm_from_size_t (cur
->range
), port
);
639 scm_puts (":", port
);
640 scm_write (scm_from_int32 (cur
->n
), port
);
642 scm_puts (">", port
);
647 /* Create a new, empty character set. */
649 make_char_set (const char *func_name
)
653 p
= scm_gc_malloc (sizeof (scm_t_char_set
), "character-set");
654 memset (p
, 0, sizeof (scm_t_char_set
));
655 SCM_RETURN_NEWSMOB (scm_tc16_charset
, p
);
659 SCM_DEFINE (scm_char_set_p
, "char-set?", 1, 0, 0,
661 "Return @code{#t} if @var{obj} is a character set, @code{#f}\n"
663 #define FUNC_NAME s_scm_char_set_p
665 return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset
, obj
));
670 SCM_DEFINE (scm_char_set_eq
, "char-set=", 0, 0, 1,
672 "Return @code{#t} if all given character sets are equal.")
673 #define FUNC_NAME s_scm_char_set_eq
676 scm_t_char_set
*cs1_data
= NULL
;
678 SCM_VALIDATE_REST_ARGUMENT (char_sets
);
680 while (!scm_is_null (char_sets
))
682 SCM csi
= SCM_CAR (char_sets
);
683 scm_t_char_set
*csi_data
;
685 SCM_VALIDATE_SMOB (argnum
, csi
, charset
);
687 csi_data
= SCM_CHARSET_DATA (csi
);
688 if (cs1_data
== NULL
)
690 else if (!charsets_equal (cs1_data
, csi_data
))
692 char_sets
= SCM_CDR (char_sets
);
699 SCM_DEFINE (scm_char_set_leq
, "char-set<=", 0, 0, 1,
701 "Return @code{#t} if every character set @var{cs}i is a subset\n"
702 "of character set @var{cs}i+1.")
703 #define FUNC_NAME s_scm_char_set_leq
706 scm_t_char_set
*prev_data
= NULL
;
708 SCM_VALIDATE_REST_ARGUMENT (char_sets
);
710 while (!scm_is_null (char_sets
))
712 SCM csi
= SCM_CAR (char_sets
);
713 scm_t_char_set
*csi_data
;
715 SCM_VALIDATE_SMOB (argnum
, csi
, charset
);
717 csi_data
= SCM_CHARSET_DATA (csi
);
720 if (!charsets_leq (prev_data
, csi_data
))
723 prev_data
= csi_data
;
724 char_sets
= SCM_CDR (char_sets
);
731 SCM_DEFINE (scm_char_set_hash
, "char-set-hash", 1, 1, 0,
733 "Compute a hash value for the character set @var{cs}. If\n"
734 "@var{bound} is given and non-zero, it restricts the\n"
735 "returned value to the range 0 @dots{} @var{bound - 1}.")
736 #define FUNC_NAME s_scm_char_set_hash
738 const unsigned long default_bnd
= 871;
741 unsigned long val
= 0;
745 SCM_VALIDATE_SMOB (1, cs
, charset
);
747 if (SCM_UNBNDP (bound
))
751 bnd
= scm_to_ulong (bound
);
756 p
= SCM_CHARSET_DATA (cs
);
757 for (k
= 0; k
< p
->len
; k
++)
759 for (c
= p
->ranges
[k
].lo
; c
<= p
->ranges
[k
].hi
; c
++)
760 val
= c
+ (val
<< 1);
762 return scm_from_ulong (val
% bnd
);
767 SCM_DEFINE (scm_char_set_cursor
, "char-set-cursor", 1, 0, 0,
768 (SCM cs
), "Return a cursor into the character set @var{cs}.")
769 #define FUNC_NAME s_scm_char_set_cursor
771 scm_t_char_set
*cs_data
;
772 scm_t_char_set_cursor
*cur_data
;
774 SCM_VALIDATE_SMOB (1, cs
, charset
);
775 cs_data
= SCM_CHARSET_DATA (cs
);
777 (scm_t_char_set_cursor
*) scm_gc_malloc (sizeof (scm_t_char_set_cursor
),
779 if (cs_data
->len
== 0)
781 cur_data
->range
= (size_t) (-1);
787 cur_data
->n
= cs_data
->ranges
[0].lo
;
789 SCM_RETURN_NEWSMOB (scm_tc16_charset_cursor
, cur_data
);
794 SCM_DEFINE (scm_char_set_ref
, "char-set-ref", 2, 0, 0,
795 (SCM cs
, SCM cursor
),
796 "Return the character at the current cursor position\n"
797 "@var{cursor} in the character set @var{cs}. It is an error to\n"
798 "pass a cursor for which @code{end-of-char-set?} returns true.")
799 #define FUNC_NAME s_scm_char_set_ref
801 scm_t_char_set
*cs_data
;
802 scm_t_char_set_cursor
*cur_data
;
805 SCM_VALIDATE_SMOB (1, cs
, charset
);
806 SCM_VALIDATE_SMOB (2, cursor
, charset_cursor
);
808 cs_data
= SCM_CHARSET_DATA (cs
);
809 cur_data
= (scm_t_char_set_cursor
*) SCM_SMOB_DATA (cursor
);
811 /* Validate that this cursor is still true. */
813 if (i
== (size_t) (-1)
815 || cur_data
->n
< cs_data
->ranges
[i
].lo
816 || cur_data
->n
> cs_data
->ranges
[i
].hi
)
817 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor
));
818 return SCM_MAKE_CHAR (cur_data
->n
);
823 SCM_DEFINE (scm_char_set_cursor_next
, "char-set-cursor-next", 2, 0, 0,
824 (SCM cs
, SCM cursor
),
825 "Advance the character set cursor @var{cursor} to the next\n"
826 "character in the character set @var{cs}. It is an error if the\n"
827 "cursor given satisfies @code{end-of-char-set?}.")
828 #define FUNC_NAME s_scm_char_set_cursor_next
830 scm_t_char_set
*cs_data
;
831 scm_t_char_set_cursor
*cur_data
;
834 SCM_VALIDATE_SMOB (1, cs
, charset
);
835 SCM_VALIDATE_SMOB (2, cursor
, charset_cursor
);
837 cs_data
= SCM_CHARSET_DATA (cs
);
838 cur_data
= (scm_t_char_set_cursor
*) SCM_SMOB_DATA (cursor
);
840 /* Validate that this cursor is still true. */
842 if (i
== (size_t) (-1)
844 || cur_data
->n
< cs_data
->ranges
[i
].lo
845 || cur_data
->n
> cs_data
->ranges
[i
].hi
)
846 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor
));
847 /* Increment the cursor. */
848 if (cur_data
->n
== cs_data
->ranges
[i
].hi
)
850 if (i
+ 1 < cs_data
->len
)
852 cur_data
->range
= i
+ 1;
853 cur_data
->n
= cs_data
->ranges
[i
+ 1].lo
;
857 /* This is the end of the road. */
858 cur_data
->range
= (size_t) (-1);
864 cur_data
->n
= cur_data
->n
+ 1;
872 SCM_DEFINE (scm_end_of_char_set_p
, "end-of-char-set?", 1, 0, 0,
874 "Return @code{#t} if @var{cursor} has reached the end of a\n"
875 "character set, @code{#f} otherwise.")
876 #define FUNC_NAME s_scm_end_of_char_set_p
878 scm_t_char_set_cursor
*cur_data
;
879 SCM_VALIDATE_SMOB (1, cursor
, charset_cursor
);
881 cur_data
= (scm_t_char_set_cursor
*) SCM_SMOB_DATA (cursor
);
882 if (cur_data
->range
== (size_t) (-1))
890 SCM_DEFINE (scm_char_set_fold
, "char-set-fold", 3, 0, 0,
891 (SCM kons
, SCM knil
, SCM cs
),
892 "Fold the procedure @var{kons} over the character set @var{cs},\n"
893 "initializing it with @var{knil}.")
894 #define FUNC_NAME s_scm_char_set_fold
896 scm_t_char_set
*cs_data
;
900 SCM_VALIDATE_PROC (1, kons
);
901 SCM_VALIDATE_SMOB (3, cs
, charset
);
903 cs_data
= SCM_CHARSET_DATA (cs
);
905 if (cs_data
->len
== 0)
908 for (k
= 0; k
< cs_data
->len
; k
++)
909 for (n
= cs_data
->ranges
[k
].lo
; n
<= cs_data
->ranges
[k
].hi
; n
++)
911 knil
= scm_call_2 (kons
, SCM_MAKE_CHAR (n
), knil
);
918 SCM_DEFINE (scm_char_set_unfold
, "char-set-unfold", 4, 1, 0,
919 (SCM p
, SCM f
, SCM g
, SCM seed
, SCM base_cs
),
920 "This is a fundamental constructor for character sets.\n"
922 "@item @var{g} is used to generate a series of ``seed'' values\n"
923 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
924 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
925 "@item @var{p} tells us when to stop -- when it returns true\n"
926 "when applied to one of the seed values.\n"
927 "@item @var{f} maps each seed value to a character. These\n"
928 "characters are added to the base character set @var{base_cs} to\n"
929 "form the result; @var{base_cs} defaults to the empty set.\n"
931 #define FUNC_NAME s_scm_char_set_unfold
935 SCM_VALIDATE_PROC (1, p
);
936 SCM_VALIDATE_PROC (2, f
);
937 SCM_VALIDATE_PROC (3, g
);
938 if (!SCM_UNBNDP (base_cs
))
940 SCM_VALIDATE_SMOB (5, base_cs
, charset
);
941 result
= scm_char_set_copy (base_cs
);
944 result
= make_char_set (FUNC_NAME
);
946 tmp
= scm_call_1 (p
, seed
);
947 while (scm_is_false (tmp
))
949 SCM ch
= scm_call_1 (f
, seed
);
951 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f
));
952 SCM_CHARSET_SET (result
, SCM_CHAR (ch
));
954 seed
= scm_call_1 (g
, seed
);
955 tmp
= scm_call_1 (p
, seed
);
962 SCM_DEFINE (scm_char_set_unfold_x
, "char-set-unfold!", 5, 0, 0,
963 (SCM p
, SCM f
, SCM g
, SCM seed
, SCM base_cs
),
964 "This is a fundamental constructor for character sets.\n"
966 "@item @var{g} is used to generate a series of ``seed'' values\n"
967 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
968 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
969 "@item @var{p} tells us when to stop -- when it returns true\n"
970 "when applied to one of the seed values.\n"
971 "@item @var{f} maps each seed value to a character. These\n"
972 "characters are added to the base character set @var{base_cs} to\n"
973 "form the result; @var{base_cs} defaults to the empty set.\n"
975 #define FUNC_NAME s_scm_char_set_unfold_x
979 SCM_VALIDATE_PROC (1, p
);
980 SCM_VALIDATE_PROC (2, f
);
981 SCM_VALIDATE_PROC (3, g
);
982 SCM_VALIDATE_SMOB (5, base_cs
, charset
);
984 tmp
= scm_call_1 (p
, seed
);
985 while (scm_is_false (tmp
))
987 SCM ch
= scm_call_1 (f
, seed
);
989 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f
));
990 SCM_CHARSET_SET (base_cs
, SCM_CHAR (ch
));
992 seed
= scm_call_1 (g
, seed
);
993 tmp
= scm_call_1 (p
, seed
);
1000 SCM_DEFINE (scm_char_set_for_each
, "char-set-for-each", 2, 0, 0,
1002 "Apply @var{proc} to every character in the character set\n"
1003 "@var{cs}. The return value is not specified.")
1004 #define FUNC_NAME s_scm_char_set_for_each
1006 scm_t_char_set
*cs_data
;
1010 SCM_VALIDATE_PROC (1, proc
);
1011 SCM_VALIDATE_SMOB (2, cs
, charset
);
1013 cs_data
= SCM_CHARSET_DATA (cs
);
1015 if (cs_data
->len
== 0)
1016 return SCM_UNSPECIFIED
;
1018 for (k
= 0; k
< cs_data
->len
; k
++)
1019 for (n
= cs_data
->ranges
[k
].lo
; n
<= cs_data
->ranges
[k
].hi
; n
++)
1021 scm_call_1 (proc
, SCM_MAKE_CHAR (n
));
1024 return SCM_UNSPECIFIED
;
1029 SCM_DEFINE (scm_char_set_map
, "char-set-map", 2, 0, 0,
1031 "Map the procedure @var{proc} over every character in @var{cs}.\n"
1032 "@var{proc} must be a character -> character procedure.")
1033 #define FUNC_NAME s_scm_char_set_map
1037 scm_t_char_set
*cs_data
;
1040 SCM_VALIDATE_PROC (1, proc
);
1041 SCM_VALIDATE_SMOB (2, cs
, charset
);
1043 result
= make_char_set (FUNC_NAME
);
1044 cs_data
= SCM_CHARSET_DATA (cs
);
1046 if (cs_data
->len
== 0)
1049 for (k
= 0; k
< cs_data
->len
; k
++)
1050 for (n
= cs_data
->ranges
[k
].lo
; n
<= cs_data
->ranges
[k
].hi
; n
++)
1052 SCM ch
= scm_call_1 (proc
, SCM_MAKE_CHAR (n
));
1053 if (!SCM_CHARP (ch
))
1054 SCM_MISC_ERROR ("procedure ~S returned non-char",
1056 SCM_CHARSET_SET (result
, SCM_CHAR (ch
));
1063 SCM_DEFINE (scm_char_set_copy
, "char-set-copy", 1, 0, 0,
1065 "Return a newly allocated character set containing all\n"
1066 "characters in @var{cs}.")
1067 #define FUNC_NAME s_scm_char_set_copy
1070 scm_t_char_set
*p1
, *p2
;
1072 SCM_VALIDATE_SMOB (1, cs
, charset
);
1073 ret
= make_char_set (FUNC_NAME
);
1074 p1
= SCM_CHARSET_DATA (cs
);
1075 p2
= SCM_CHARSET_DATA (ret
);
1082 p2
->ranges
= scm_gc_malloc (sizeof (scm_t_char_range
) * p1
->len
,
1084 memcpy (p2
->ranges
, p1
->ranges
, sizeof (scm_t_char_range
) * p1
->len
);
1092 SCM_DEFINE (scm_char_set
, "char-set", 0, 0, 1,
1094 "Return a character set containing all given characters.")
1095 #define FUNC_NAME s_scm_char_set
1100 SCM_VALIDATE_REST_ARGUMENT (rest
);
1101 cs
= make_char_set (FUNC_NAME
);
1102 while (!scm_is_null (rest
))
1106 SCM_VALIDATE_CHAR_COPY (argnum
, SCM_CAR (rest
), c
);
1108 rest
= SCM_CDR (rest
);
1109 SCM_CHARSET_SET (cs
, c
);
1116 SCM_DEFINE (scm_list_to_char_set
, "list->char-set", 1, 1, 0,
1117 (SCM list
, SCM base_cs
),
1118 "Convert the character list @var{list} to a character set. If\n"
1119 "the character set @var{base_cs} is given, the character in this\n"
1120 "set are also included in the result.")
1121 #define FUNC_NAME s_scm_list_to_char_set
1125 SCM_VALIDATE_LIST (1, list
);
1126 if (SCM_UNBNDP (base_cs
))
1127 cs
= make_char_set (FUNC_NAME
);
1130 SCM_VALIDATE_SMOB (2, base_cs
, charset
);
1131 cs
= scm_char_set_copy (base_cs
);
1133 while (!scm_is_null (list
))
1135 SCM chr
= SCM_CAR (list
);
1138 SCM_VALIDATE_CHAR_COPY (0, chr
, c
);
1139 list
= SCM_CDR (list
);
1142 SCM_CHARSET_SET (cs
, c
);
1149 SCM_DEFINE (scm_list_to_char_set_x
, "list->char-set!", 2, 0, 0,
1150 (SCM list
, SCM base_cs
),
1151 "Convert the character list @var{list} to a character set. The\n"
1152 "characters are added to @var{base_cs} and @var{base_cs} is\n"
1154 #define FUNC_NAME s_scm_list_to_char_set_x
1156 SCM_VALIDATE_LIST (1, list
);
1157 SCM_VALIDATE_SMOB (2, base_cs
, charset
);
1158 while (!scm_is_null (list
))
1160 SCM chr
= SCM_CAR (list
);
1163 SCM_VALIDATE_CHAR_COPY (0, chr
, c
);
1164 list
= SCM_CDR (list
);
1166 SCM_CHARSET_SET (base_cs
, c
);
1173 SCM_DEFINE (scm_string_to_char_set
, "string->char-set", 1, 1, 0,
1174 (SCM str
, SCM base_cs
),
1175 "Convert the string @var{str} to a character set. If the\n"
1176 "character set @var{base_cs} is given, the characters in this\n"
1177 "set are also included in the result.")
1178 #define FUNC_NAME s_scm_string_to_char_set
1183 SCM_VALIDATE_STRING (1, str
);
1184 if (SCM_UNBNDP (base_cs
))
1185 cs
= make_char_set (FUNC_NAME
);
1188 SCM_VALIDATE_SMOB (2, base_cs
, charset
);
1189 cs
= scm_char_set_copy (base_cs
);
1191 len
= scm_i_string_length (str
);
1194 scm_t_wchar c
= scm_i_string_ref (str
, k
++);
1195 SCM_CHARSET_SET (cs
, c
);
1197 scm_remember_upto_here_1 (str
);
1203 SCM_DEFINE (scm_string_to_char_set_x
, "string->char-set!", 2, 0, 0,
1204 (SCM str
, SCM base_cs
),
1205 "Convert the string @var{str} to a character set. The\n"
1206 "characters from the string are added to @var{base_cs}, and\n"
1207 "@var{base_cs} is returned.")
1208 #define FUNC_NAME s_scm_string_to_char_set_x
1212 SCM_VALIDATE_STRING (1, str
);
1213 SCM_VALIDATE_SMOB (2, base_cs
, charset
);
1214 len
= scm_i_string_length (str
);
1217 scm_t_wchar c
= scm_i_string_ref (str
, k
++);
1218 SCM_CHARSET_SET (base_cs
, c
);
1220 scm_remember_upto_here_1 (str
);
1226 SCM_DEFINE (scm_char_set_filter
, "char-set-filter", 2, 1, 0,
1227 (SCM pred
, SCM cs
, SCM base_cs
),
1228 "Return a character set containing every character from @var{cs}\n"
1229 "so that it satisfies @var{pred}. If provided, the characters\n"
1230 "from @var{base_cs} are added to the result.")
1231 #define FUNC_NAME s_scm_char_set_filter
1238 SCM_VALIDATE_PROC (1, pred
);
1239 SCM_VALIDATE_SMOB (2, cs
, charset
);
1240 if (!SCM_UNBNDP (base_cs
))
1242 SCM_VALIDATE_SMOB (3, base_cs
, charset
);
1243 ret
= scm_char_set_copy (base_cs
);
1246 ret
= make_char_set (FUNC_NAME
);
1248 p
= SCM_CHARSET_DATA (cs
);
1253 for (k
= 0; k
< p
->len
; k
++)
1254 for (n
= p
->ranges
[k
].lo
; n
<= p
->ranges
[k
].hi
; n
++)
1256 SCM res
= scm_call_1 (pred
, SCM_MAKE_CHAR (n
));
1258 if (scm_is_true (res
))
1259 SCM_CHARSET_SET (ret
, n
);
1266 SCM_DEFINE (scm_char_set_filter_x
, "char-set-filter!", 3, 0, 0,
1267 (SCM pred
, SCM cs
, SCM base_cs
),
1268 "Return a character set containing every character from @var{cs}\n"
1269 "so that it satisfies @var{pred}. The characters are added to\n"
1270 "@var{base_cs} and @var{base_cs} is returned.")
1271 #define FUNC_NAME s_scm_char_set_filter_x
1277 SCM_VALIDATE_PROC (1, pred
);
1278 SCM_VALIDATE_SMOB (2, cs
, charset
);
1279 SCM_VALIDATE_SMOB (3, base_cs
, charset
);
1280 p
= SCM_CHARSET_DATA (cs
);
1284 for (k
= 0; k
< p
->len
; k
++)
1285 for (n
= p
->ranges
[k
].lo
; n
<= p
->ranges
[k
].hi
; n
++)
1287 SCM res
= scm_call_1 (pred
, SCM_MAKE_CHAR (n
));
1289 if (scm_is_true (res
))
1290 SCM_CHARSET_SET (base_cs
, n
);
1297 /* Return a character set containing all the characters from [LOWER,UPPER),
1298 giving range errors if ERROR, adding chars from BASE_CS, and recycling
1299 BASE_CS if REUSE is true. */
1301 scm_i_ucs_range_to_char_set (const char *FUNC_NAME
, SCM lower
, SCM upper
,
1302 SCM error
, SCM base_cs
, int reuse
)
1305 size_t clower
, cupper
;
1307 clower
= scm_to_size_t (lower
);
1308 cupper
= scm_to_size_t (upper
) - 1;
1309 SCM_ASSERT_RANGE (2, upper
, cupper
>= clower
);
1310 if (!SCM_UNBNDP (error
))
1312 if (scm_is_true (error
))
1314 SCM_ASSERT_RANGE (1, lower
, SCM_IS_UNICODE_CHAR (clower
));
1315 SCM_ASSERT_RANGE (2, upper
, SCM_IS_UNICODE_CHAR (cupper
));
1316 if (clower
< SCM_CODEPOINT_SURROGATE_START
1317 && cupper
> SCM_CODEPOINT_SURROGATE_END
)
1318 scm_error(scm_out_of_range_key
,
1319 FUNC_NAME
, "invalid range - contains surrogate characters: ~S to ~S",
1320 scm_list_2 (lower
, upper
), scm_list_1 (upper
));
1324 if (SCM_UNBNDP (base_cs
))
1325 cs
= make_char_set (FUNC_NAME
);
1328 SCM_VALIDATE_SMOB (3, base_cs
, charset
);
1332 cs
= scm_char_set_copy (base_cs
);
1335 if ((clower
>= SCM_CODEPOINT_SURROGATE_START
&& clower
<= SCM_CODEPOINT_SURROGATE_END
)
1336 && (cupper
>= SCM_CODEPOINT_SURROGATE_START
&& cupper
<= SCM_CODEPOINT_SURROGATE_END
))
1339 if (clower
> SCM_CODEPOINT_MAX
)
1340 clower
= SCM_CODEPOINT_MAX
;
1341 if (clower
>= SCM_CODEPOINT_SURROGATE_START
&& clower
<= SCM_CODEPOINT_SURROGATE_END
)
1342 clower
= SCM_CODEPOINT_SURROGATE_END
+ 1;
1343 if (cupper
> SCM_CODEPOINT_MAX
)
1344 cupper
= SCM_CODEPOINT_MAX
;
1345 if (cupper
>= SCM_CODEPOINT_SURROGATE_START
&& cupper
<= SCM_CODEPOINT_SURROGATE_END
)
1346 cupper
= SCM_CODEPOINT_SURROGATE_START
- 1;
1347 if (clower
< SCM_CODEPOINT_SURROGATE_START
&& cupper
> SCM_CODEPOINT_SURROGATE_END
)
1349 scm_i_charset_set_range (SCM_CHARSET_DATA (cs
), clower
, SCM_CODEPOINT_SURROGATE_START
- 1);
1350 scm_i_charset_set_range (SCM_CHARSET_DATA (cs
), SCM_CODEPOINT_SURROGATE_END
+ 1, cupper
);
1353 scm_i_charset_set_range (SCM_CHARSET_DATA (cs
), clower
, cupper
);
1357 SCM_DEFINE (scm_ucs_range_to_char_set
, "ucs-range->char-set", 2, 2, 0,
1358 (SCM lower
, SCM upper
, SCM error
, SCM base_cs
),
1359 "Return a character set containing all characters whose\n"
1360 "character codes lie in the half-open range\n"
1361 "[@var{lower},@var{upper}).\n"
1363 "If @var{error} is a true value, an error is signalled if the\n"
1364 "specified range contains characters which are not valid\n"
1365 "Unicode code points. If @var{error} is @code{#f},\n"
1366 "these characters are silently left out of the resultung\n"
1369 "The characters in @var{base_cs} are added to the result, if\n"
1371 #define FUNC_NAME s_scm_ucs_range_to_char_set
1373 return scm_i_ucs_range_to_char_set (FUNC_NAME
, lower
, upper
,
1379 SCM_DEFINE (scm_ucs_range_to_char_set_x
, "ucs-range->char-set!", 4, 0, 0,
1380 (SCM lower
, SCM upper
, SCM error
, SCM base_cs
),
1381 "Return a character set containing all characters whose\n"
1382 "character codes lie in the half-open range\n"
1383 "[@var{lower},@var{upper}).\n"
1385 "If @var{error} is a true value, an error is signalled if the\n"
1386 "specified range contains characters which are not contained in\n"
1387 "the implemented character range. If @var{error} is @code{#f},\n"
1388 "these characters are silently left out of the resultung\n"
1391 "The characters are added to @var{base_cs} and @var{base_cs} is\n"
1393 #define FUNC_NAME s_scm_ucs_range_to_char_set_x
1395 SCM_VALIDATE_SMOB (4, base_cs
, charset
);
1396 return scm_i_ucs_range_to_char_set (FUNC_NAME
, lower
, upper
,
1401 SCM_DEFINE (scm_to_char_set
, "->char-set", 1, 0, 0,
1403 "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.")
1404 #define FUNC_NAME s_scm_to_char_set
1406 if (scm_is_string (x
))
1407 return scm_string_to_char_set (x
, SCM_UNDEFINED
);
1408 else if (SCM_CHARP (x
))
1409 return scm_char_set (scm_list_1 (x
));
1410 else if (SCM_SMOB_PREDICATE (scm_tc16_charset
, x
))
1413 scm_wrong_type_arg (NULL
, 0, x
);
1417 SCM_DEFINE (scm_char_set_size
, "char-set-size", 1, 0, 0,
1419 "Return the number of elements in character set @var{cs}.")
1420 #define FUNC_NAME s_scm_char_set_size
1423 scm_t_char_set
*cs_data
;
1425 SCM_VALIDATE_SMOB (1, cs
, charset
);
1426 cs_data
= SCM_CHARSET_DATA (cs
);
1428 if (cs_data
->len
== 0)
1429 return scm_from_int (0);
1431 for (k
= 0; k
< cs_data
->len
; k
++)
1432 count
+= cs_data
->ranges
[k
].hi
- cs_data
->ranges
[k
].lo
+ 1;
1434 return scm_from_int (count
);
1439 SCM_DEFINE (scm_char_set_count
, "char-set-count", 2, 0, 0,
1441 "Return the number of the elements int the character set\n"
1442 "@var{cs} which satisfy the predicate @var{pred}.")
1443 #define FUNC_NAME s_scm_char_set_count
1447 scm_t_char_set
*cs_data
;
1449 SCM_VALIDATE_PROC (1, pred
);
1450 SCM_VALIDATE_SMOB (2, cs
, charset
);
1451 cs_data
= SCM_CHARSET_DATA (cs
);
1452 if (cs_data
->len
== 0)
1453 return scm_from_int (0);
1455 for (k
= 0; k
< cs_data
->len
; k
++)
1456 for (n
= cs_data
->ranges
[k
].lo
; n
<= cs_data
->ranges
[k
].hi
; n
++)
1458 SCM res
= scm_call_1 (pred
, SCM_MAKE_CHAR (n
));
1459 if (scm_is_true (res
))
1462 return SCM_I_MAKINUM (count
);
1467 SCM_DEFINE (scm_char_set_to_list
, "char-set->list", 1, 0, 0,
1469 "Return a list containing the elements of the character set\n"
1471 #define FUNC_NAME s_scm_char_set_to_list
1475 SCM result
= SCM_EOL
;
1478 SCM_VALIDATE_SMOB (1, cs
, charset
);
1479 p
= SCM_CHARSET_DATA (cs
);
1483 for (k
= p
->len
- 1; k
>= 0; k
--)
1484 for (n
= p
->ranges
[k
].hi
; n
>= p
->ranges
[k
].lo
; n
--)
1485 result
= scm_cons (SCM_MAKE_CHAR (n
), result
);
1491 SCM_DEFINE (scm_char_set_to_string
, "char-set->string", 1, 0, 0,
1493 "Return a string containing the elements of the character set\n"
1494 "@var{cs}. The order in which the characters are placed in the\n"
1495 "string is not defined.")
1496 #define FUNC_NAME s_scm_char_set_to_string
1504 scm_t_char_set
*cs_data
;
1508 SCM_VALIDATE_SMOB (1, cs
, charset
);
1509 cs_data
= SCM_CHARSET_DATA (cs
);
1510 if (cs_data
->len
== 0)
1513 if (cs_data
->ranges
[cs_data
->len
- 1].hi
> 255)
1516 count
= scm_to_int (scm_char_set_size (cs
));
1518 result
= scm_i_make_wide_string (count
, &wbuf
);
1520 result
= scm_i_make_string (count
, &buf
);
1522 for (k
= 0; k
< cs_data
->len
; k
++)
1523 for (n
= cs_data
->ranges
[k
].lo
; n
<= cs_data
->ranges
[k
].hi
; n
++)
1535 SCM_DEFINE (scm_char_set_contains_p
, "char-set-contains?", 2, 0, 0,
1537 "Return @code{#t} iff the character @var{ch} is contained in the\n"
1538 "character set @var{cs}.")
1539 #define FUNC_NAME s_scm_char_set_contains_p
1541 SCM_VALIDATE_SMOB (1, cs
, charset
);
1542 SCM_VALIDATE_CHAR (2, ch
);
1543 return scm_from_bool (SCM_CHARSET_GET (cs
, SCM_CHAR (ch
)));
1548 SCM_DEFINE (scm_char_set_every
, "char-set-every", 2, 0, 0,
1550 "Return a true value if every character in the character set\n"
1551 "@var{cs} satisfies the predicate @var{pred}.")
1552 #define FUNC_NAME s_scm_char_set_every
1556 SCM res
= SCM_BOOL_T
;
1557 scm_t_char_set
*cs_data
;
1559 SCM_VALIDATE_PROC (1, pred
);
1560 SCM_VALIDATE_SMOB (2, cs
, charset
);
1562 cs_data
= SCM_CHARSET_DATA (cs
);
1563 if (cs_data
->len
== 0)
1566 for (k
= 0; k
< cs_data
->len
; k
++)
1567 for (n
= cs_data
->ranges
[k
].lo
; n
<= cs_data
->ranges
[k
].hi
; n
++)
1569 res
= scm_call_1 (pred
, SCM_MAKE_CHAR (n
));
1570 if (scm_is_false (res
))
1578 SCM_DEFINE (scm_char_set_any
, "char-set-any", 2, 0, 0,
1580 "Return a true value if any character in the character set\n"
1581 "@var{cs} satisfies the predicate @var{pred}.")
1582 #define FUNC_NAME s_scm_char_set_any
1586 scm_t_char_set
*cs_data
;
1588 SCM_VALIDATE_PROC (1, pred
);
1589 SCM_VALIDATE_SMOB (2, cs
, charset
);
1591 cs_data
= SCM_CHARSET_DATA (cs
);
1592 if (cs_data
->len
== 0)
1595 for (k
= 0; k
< cs_data
->len
; k
++)
1596 for (n
= cs_data
->ranges
[k
].lo
; n
<= cs_data
->ranges
[k
].hi
; n
++)
1598 SCM res
= scm_call_1 (pred
, SCM_MAKE_CHAR (n
));
1599 if (scm_is_true (res
))
1607 SCM_DEFINE (scm_char_set_adjoin
, "char-set-adjoin", 1, 0, 1,
1609 "Add all character arguments to the first argument, which must\n"
1610 "be a character set.")
1611 #define FUNC_NAME s_scm_char_set_adjoin
1613 SCM_VALIDATE_SMOB (1, cs
, charset
);
1614 SCM_VALIDATE_REST_ARGUMENT (rest
);
1615 cs
= scm_char_set_copy (cs
);
1617 while (!scm_is_null (rest
))
1619 SCM chr
= SCM_CAR (rest
);
1622 SCM_VALIDATE_CHAR_COPY (1, chr
, c
);
1623 rest
= SCM_CDR (rest
);
1625 SCM_CHARSET_SET (cs
, c
);
1632 SCM_DEFINE (scm_char_set_delete
, "char-set-delete", 1, 0, 1,
1634 "Delete all character arguments from the first argument, which\n"
1635 "must be a character set.")
1636 #define FUNC_NAME s_scm_char_set_delete
1638 SCM_VALIDATE_SMOB (1, cs
, charset
);
1639 SCM_VALIDATE_REST_ARGUMENT (rest
);
1640 cs
= scm_char_set_copy (cs
);
1642 while (!scm_is_null (rest
))
1644 SCM chr
= SCM_CAR (rest
);
1647 SCM_VALIDATE_CHAR_COPY (1, chr
, c
);
1648 rest
= SCM_CDR (rest
);
1650 SCM_CHARSET_UNSET (cs
, c
);
1657 SCM_DEFINE (scm_char_set_adjoin_x
, "char-set-adjoin!", 1, 0, 1,
1659 "Add all character arguments to the first argument, which must\n"
1660 "be a character set.")
1661 #define FUNC_NAME s_scm_char_set_adjoin_x
1663 SCM_VALIDATE_SMOB (1, cs
, charset
);
1664 SCM_VALIDATE_REST_ARGUMENT (rest
);
1666 while (!scm_is_null (rest
))
1668 SCM chr
= SCM_CAR (rest
);
1671 SCM_VALIDATE_CHAR_COPY (1, chr
, c
);
1672 rest
= SCM_CDR (rest
);
1674 SCM_CHARSET_SET (cs
, c
);
1681 SCM_DEFINE (scm_char_set_delete_x
, "char-set-delete!", 1, 0, 1,
1683 "Delete all character arguments from the first argument, which\n"
1684 "must be a character set.")
1685 #define FUNC_NAME s_scm_char_set_delete_x
1687 SCM_VALIDATE_SMOB (1, cs
, charset
);
1688 SCM_VALIDATE_REST_ARGUMENT (rest
);
1690 while (!scm_is_null (rest
))
1692 SCM chr
= SCM_CAR (rest
);
1695 SCM_VALIDATE_CHAR_COPY (1, chr
, c
);
1696 rest
= SCM_CDR (rest
);
1698 SCM_CHARSET_UNSET (cs
, c
);
1705 SCM_DEFINE (scm_char_set_complement
, "char-set-complement", 1, 0, 0,
1706 (SCM cs
), "Return the complement of the character set @var{cs}.")
1707 #define FUNC_NAME s_scm_char_set_complement
1710 scm_t_char_set
*p
, *q
;
1712 SCM_VALIDATE_SMOB (1, cs
, charset
);
1714 res
= make_char_set (FUNC_NAME
);
1715 p
= SCM_CHARSET_DATA (res
);
1716 q
= SCM_CHARSET_DATA (cs
);
1718 charsets_complement (p
, q
);
1724 SCM_DEFINE (scm_char_set_union
, "char-set-union", 0, 0, 1,
1726 "Return the union of all argument character sets.")
1727 #define FUNC_NAME s_scm_char_set_union
1733 SCM_VALIDATE_REST_ARGUMENT (rest
);
1735 res
= make_char_set (FUNC_NAME
);
1736 p
= SCM_CHARSET_DATA (res
);
1737 while (!scm_is_null (rest
))
1739 SCM cs
= SCM_CAR (rest
);
1740 SCM_VALIDATE_SMOB (c
, cs
, charset
);
1742 rest
= SCM_CDR (rest
);
1745 charsets_union (p
, (scm_t_char_set
*) SCM_SMOB_DATA (cs
));
1752 SCM_DEFINE (scm_char_set_intersection
, "char-set-intersection", 0, 0, 1,
1754 "Return the intersection of all argument character sets.")
1755 #define FUNC_NAME s_scm_char_set_intersection
1759 SCM_VALIDATE_REST_ARGUMENT (rest
);
1761 if (scm_is_null (rest
))
1762 res
= make_char_set (FUNC_NAME
);
1768 res
= scm_char_set_copy (SCM_CAR (rest
));
1769 p
= SCM_CHARSET_DATA (res
);
1770 rest
= SCM_CDR (rest
);
1772 while (scm_is_pair (rest
))
1774 SCM cs
= SCM_CAR (rest
);
1775 scm_t_char_set
*cs_data
;
1777 SCM_VALIDATE_SMOB (argnum
, cs
, charset
);
1779 cs_data
= SCM_CHARSET_DATA (cs
);
1780 rest
= SCM_CDR (rest
);
1781 charsets_intersection (p
, cs_data
);
1790 SCM_DEFINE (scm_char_set_difference
, "char-set-difference", 1, 0, 1,
1791 (SCM cs1
, SCM rest
),
1792 "Return the difference of all argument character sets.")
1793 #define FUNC_NAME s_scm_char_set_difference
1797 scm_t_char_set
*p
, *q
;
1799 SCM_VALIDATE_SMOB (1, cs1
, charset
);
1800 SCM_VALIDATE_REST_ARGUMENT (rest
);
1802 res
= scm_char_set_copy (cs1
);
1803 p
= SCM_CHARSET_DATA (res
);
1804 compl = make_char_set (FUNC_NAME
);
1805 q
= SCM_CHARSET_DATA (compl);
1806 while (!scm_is_null (rest
))
1808 SCM cs
= SCM_CAR (rest
);
1809 SCM_VALIDATE_SMOB (c
, cs
, charset
);
1811 rest
= SCM_CDR (rest
);
1813 charsets_complement (q
, SCM_CHARSET_DATA (cs
));
1814 charsets_intersection (p
, q
);
1821 SCM_DEFINE (scm_char_set_xor
, "char-set-xor", 0, 0, 1,
1823 "Return the exclusive-or of all argument character sets.")
1824 #define FUNC_NAME s_scm_char_set_xor
1828 SCM_VALIDATE_REST_ARGUMENT (rest
);
1830 if (scm_is_null (rest
))
1831 res
= make_char_set (FUNC_NAME
);
1837 res
= scm_char_set_copy (SCM_CAR (rest
));
1838 p
= SCM_CHARSET_DATA (res
);
1839 rest
= SCM_CDR (rest
);
1841 while (scm_is_pair (rest
))
1843 SCM cs
= SCM_CAR (rest
);
1844 scm_t_char_set
*cs_data
;
1846 SCM_VALIDATE_SMOB (argnum
, cs
, charset
);
1848 cs_data
= SCM_CHARSET_DATA (cs
);
1849 rest
= SCM_CDR (rest
);
1851 charsets_xor (p
, cs_data
);
1859 SCM_DEFINE (scm_char_set_diff_plus_intersection
, "char-set-diff+intersection", 1, 0, 1,
1860 (SCM cs1
, SCM rest
),
1861 "Return the difference and the intersection of all argument\n"
1863 #define FUNC_NAME s_scm_char_set_diff_plus_intersection
1867 scm_t_char_set
*p
, *q
;
1869 SCM_VALIDATE_SMOB (1, cs1
, charset
);
1870 SCM_VALIDATE_REST_ARGUMENT (rest
);
1872 res1
= scm_char_set_copy (cs1
);
1873 res2
= make_char_set (FUNC_NAME
);
1874 p
= SCM_CHARSET_DATA (res1
);
1875 q
= SCM_CHARSET_DATA (res2
);
1876 while (!scm_is_null (rest
))
1878 SCM cs
= SCM_CAR (rest
);
1881 SCM_VALIDATE_SMOB (c
, cs
, charset
);
1883 r
= SCM_CHARSET_DATA (cs
);
1885 charsets_union (q
, r
);
1886 charsets_intersection (p
, r
);
1887 rest
= SCM_CDR (rest
);
1889 return scm_values (scm_list_2 (res1
, res2
));
1894 SCM_DEFINE (scm_char_set_complement_x
, "char-set-complement!", 1, 0, 0,
1895 (SCM cs
), "Return the complement of the character set @var{cs}.")
1896 #define FUNC_NAME s_scm_char_set_complement_x
1898 SCM_VALIDATE_SMOB (1, cs
, charset
);
1899 cs
= scm_char_set_complement (cs
);
1905 SCM_DEFINE (scm_char_set_union_x
, "char-set-union!", 1, 0, 1,
1906 (SCM cs1
, SCM rest
),
1907 "Return the union of all argument character sets.")
1908 #define FUNC_NAME s_scm_char_set_union_x
1910 SCM_VALIDATE_SMOB (1, cs1
, charset
);
1911 SCM_VALIDATE_REST_ARGUMENT (rest
);
1913 cs1
= scm_char_set_union (scm_cons (cs1
, rest
));
1919 SCM_DEFINE (scm_char_set_intersection_x
, "char-set-intersection!", 1, 0, 1,
1920 (SCM cs1
, SCM rest
),
1921 "Return the intersection of all argument character sets.")
1922 #define FUNC_NAME s_scm_char_set_intersection_x
1924 SCM_VALIDATE_SMOB (1, cs1
, charset
);
1925 SCM_VALIDATE_REST_ARGUMENT (rest
);
1927 cs1
= scm_char_set_intersection (scm_cons (cs1
, rest
));
1933 SCM_DEFINE (scm_char_set_difference_x
, "char-set-difference!", 1, 0, 1,
1934 (SCM cs1
, SCM rest
),
1935 "Return the difference of all argument character sets.")
1936 #define FUNC_NAME s_scm_char_set_difference_x
1938 SCM_VALIDATE_SMOB (1, cs1
, charset
);
1939 SCM_VALIDATE_REST_ARGUMENT (rest
);
1941 cs1
= scm_char_set_difference (cs1
, rest
);
1947 SCM_DEFINE (scm_char_set_xor_x
, "char-set-xor!", 1, 0, 1,
1948 (SCM cs1
, SCM rest
),
1949 "Return the exclusive-or of all argument character sets.")
1950 #define FUNC_NAME s_scm_char_set_xor_x
1952 /* a side-effecting variant should presumably give consistent results:
1953 (define a (char-set #\a))
1954 (char-set-xor a a a) -> char set #\a
1955 (char-set-xor! a a a) -> char set #\a
1957 cs1
= scm_char_set_xor (scm_cons (cs1
, rest
));
1963 SCM_DEFINE (scm_char_set_diff_plus_intersection_x
,
1964 "char-set-diff+intersection!", 2, 0, 1, (SCM cs1
, SCM cs2
,
1966 "Return the difference and the intersection of all argument\n"
1968 #define FUNC_NAME s_scm_char_set_diff_plus_intersection_x
1970 SCM diff
, intersect
;
1972 diff
= scm_char_set_difference (cs1
, scm_cons (cs2
, rest
));
1974 scm_char_set_intersection (scm_cons (cs1
, scm_cons (cs2
, rest
)));
1977 return scm_values (scm_list_2 (cs1
, cs2
));
1983 /* Standard character sets. */
1985 SCM scm_char_set_lower_case
;
1986 SCM scm_char_set_upper_case
;
1987 SCM scm_char_set_title_case
;
1988 SCM scm_char_set_letter
;
1989 SCM scm_char_set_digit
;
1990 SCM scm_char_set_letter_and_digit
;
1991 SCM scm_char_set_graphic
;
1992 SCM scm_char_set_printing
;
1993 SCM scm_char_set_whitespace
;
1994 SCM scm_char_set_iso_control
;
1995 SCM scm_char_set_punctuation
;
1996 SCM scm_char_set_symbol
;
1997 SCM scm_char_set_hex_digit
;
1998 SCM scm_char_set_blank
;
1999 SCM scm_char_set_ascii
;
2000 SCM scm_char_set_empty
;
2001 SCM scm_char_set_designated
;
2002 SCM scm_char_set_full
;
2005 /* Create an empty character set and return it after binding it to NAME. */
2007 define_charset (const char *name
, const scm_t_char_set
*p
)
2011 SCM_NEWSMOB (cs
, scm_tc16_charset
, p
);
2012 scm_c_define (name
, cs
);
2016 SCM_DEFINE (scm_sys_char_set_dump
, "%char-set-dump", 1, 0, 0, (SCM charset
),
2017 "Returns an association list containing debugging information\n"
2018 "for @var{charset}. The association list has the following entries."
2021 "The char-set itself.\n"
2023 "The number of character ranges the char-set contains\n"
2025 "A list of lists where each sublist a range of code points\n"
2026 "and their associated characters"
2028 #define FUNC_NAME s_scm_sys_char_set_dump
2031 SCM ranges
= SCM_EOL
, elt
;
2034 char codepoint_string_lo
[9], codepoint_string_hi
[9];
2036 SCM_VALIDATE_SMOB (1, charset
, charset
);
2037 cs
= SCM_CHARSET_DATA (charset
);
2039 e1
= scm_cons (scm_from_locale_symbol ("char-set"),
2041 e2
= scm_cons (scm_from_locale_symbol ("n"),
2042 scm_from_size_t (cs
->len
));
2044 for (i
= 0; i
< cs
->len
; i
++)
2046 if (cs
->ranges
[i
].lo
> 0xFFFF)
2047 sprintf (codepoint_string_lo
, "U+%06x", cs
->ranges
[i
].lo
);
2049 sprintf (codepoint_string_lo
, "U+%04x", cs
->ranges
[i
].lo
);
2050 if (cs
->ranges
[i
].hi
> 0xFFFF)
2051 sprintf (codepoint_string_hi
, "U+%06x", cs
->ranges
[i
].hi
);
2053 sprintf (codepoint_string_hi
, "U+%04x", cs
->ranges
[i
].hi
);
2055 elt
= scm_list_4 (SCM_MAKE_CHAR (cs
->ranges
[i
].lo
),
2056 SCM_MAKE_CHAR (cs
->ranges
[i
].hi
),
2057 scm_from_locale_string (codepoint_string_lo
),
2058 scm_from_locale_string (codepoint_string_hi
));
2059 ranges
= scm_append (scm_list_2 (ranges
,
2062 e3
= scm_cons (scm_from_locale_symbol ("ranges"),
2065 return scm_list_3 (e1
, e2
, e3
);
2073 scm_init_srfi_14 (void)
2075 scm_tc16_charset
= scm_make_smob_type ("character-set", 0);
2076 scm_set_smob_print (scm_tc16_charset
, charset_print
);
2078 scm_tc16_charset_cursor
= scm_make_smob_type ("char-set-cursor", 0);
2079 scm_set_smob_print (scm_tc16_charset_cursor
, charset_cursor_print
);
2081 scm_char_set_upper_case
=
2082 define_charset ("char-set:upper-case", &cs_upper_case
);
2083 scm_char_set_lower_case
=
2084 define_charset ("char-set:lower-case", &cs_lower_case
);
2085 scm_char_set_title_case
=
2086 define_charset ("char-set:title-case", &cs_title_case
);
2087 scm_char_set_letter
= define_charset ("char-set:letter", &cs_letter
);
2088 scm_char_set_digit
= define_charset ("char-set:digit", &cs_digit
);
2089 scm_char_set_letter_and_digit
=
2090 define_charset ("char-set:letter+digit", &cs_letter_plus_digit
);
2091 scm_char_set_graphic
= define_charset ("char-set:graphic", &cs_graphic
);
2092 scm_char_set_printing
= define_charset ("char-set:printing", &cs_printing
);
2093 scm_char_set_whitespace
=
2094 define_charset ("char-set:whitespace", &cs_whitespace
);
2095 scm_char_set_iso_control
=
2096 define_charset ("char-set:iso-control", &cs_iso_control
);
2097 scm_char_set_punctuation
=
2098 define_charset ("char-set:punctuation", &cs_punctuation
);
2099 scm_char_set_symbol
= define_charset ("char-set:symbol", &cs_symbol
);
2100 scm_char_set_hex_digit
=
2101 define_charset ("char-set:hex-digit", &cs_hex_digit
);
2102 scm_char_set_blank
= define_charset ("char-set:blank", &cs_blank
);
2103 scm_char_set_ascii
= define_charset ("char-set:ascii", &cs_ascii
);
2104 scm_char_set_empty
= define_charset ("char-set:empty", &cs_empty
);
2105 scm_char_set_designated
= define_charset ("char-set:designated", &cs_designated
);
2106 scm_char_set_full
= define_charset ("char-set:full", &cs_full
);
2108 #include "libguile/srfi-14.x"
2111 /* End of srfi-14.c. */