fe56270080d6ddc121d8279f6e1a694c345b20df
[bpt/guile.git] / libguile / srfi-14.c
1 /* srfi-14.c --- SRFI-14 procedures for Guile
2 *
3 * Copyright (C) 2001, 2004, 2006, 2007 Free Software Foundation, Inc.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public License
7 * as published by the Free Software Foundation; either version 3 of
8 * the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301 USA
19 */
20
21 #ifdef HAVE_CONFIG_H
22 # include <config.h>
23 #endif
24
25
26 #include <string.h>
27 #include <unictype.h>
28
29 #include "libguile.h"
30 #include "libguile/srfi-14.h"
31 #include "libguile/strings.h"
32
33 /* Include the pre-computed standard charset data. */
34 #include "libguile/srfi-14.i.c"
35
36 #define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset))
37
38 #define SCM_CHARSET_SET(cs, idx) \
39 scm_i_charset_set (SCM_CHARSET_DATA (cs), idx)
40
41 #define SCM_CHARSET_UNSET(cs, idx) \
42 scm_i_charset_unset (SCM_CHARSET_DATA (cs), idx)
43
44 /* Smob type code for character sets. */
45 int scm_tc16_charset = 0;
46 int scm_tc16_charset_cursor = 0;
47
48 /* True if N exists in charset CS. */
49 int
50 scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n)
51 {
52 size_t i;
53
54 i = 0;
55 while (i < cs->len)
56 {
57 if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi)
58 return 1;
59 i++;
60 }
61
62 return 0;
63 }
64
65 /* Put N into charset CS. */
66 void
67 scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n)
68 {
69 size_t i;
70 size_t len;
71
72 len = cs->len;
73
74 i = 0;
75 while (i < len)
76 {
77 /* Already in this range */
78 if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi)
79 {
80 return;
81 }
82
83 if (n == cs->ranges[i].lo - 1)
84 {
85 /* This char is one below the current range. */
86 if (i > 0 && cs->ranges[i - 1].hi + 1 == n)
87 {
88 /* It is also one above the previous range, so combine them. */
89 cs->ranges[i - 1].hi = cs->ranges[i].hi;
90 if (i < len - 1)
91 memmove (cs->ranges + i, cs->ranges + (i + 1),
92 sizeof (scm_t_char_range) * (len - i - 1));
93 cs->ranges = scm_gc_realloc (cs->ranges,
94 sizeof (scm_t_char_range) * len,
95 sizeof (scm_t_char_range) * (len -
96 1),
97 "character-set");
98 cs->len = len - 1;
99 return;
100 }
101 else
102 {
103 /* Expand the range down by one. */
104 cs->ranges[i].lo = n;
105 return;
106 }
107 }
108 else if (n == cs->ranges[i].hi + 1)
109 {
110 /* This char is one above the current range. */
111 if (i < len - 1 && cs->ranges[i + 1].lo - 1 == n)
112 {
113 /* It is also one below the next range, so combine them. */
114 cs->ranges[i].hi = cs->ranges[i + 1].hi;
115 if (i < len - 2)
116 memmove (cs->ranges + (i + 1), cs->ranges + (i + 2),
117 sizeof (scm_t_char_range) * (len - i - 2));
118 cs->ranges = scm_gc_realloc (cs->ranges,
119 sizeof (scm_t_char_range) * len,
120 sizeof (scm_t_char_range) * (len -
121 1),
122 "character-set");
123 cs->len = len - 1;
124 return;
125 }
126 else
127 {
128 /* Expand the range up by one. */
129 cs->ranges[i].hi = n;
130 return;
131 }
132 }
133 else if (n < cs->ranges[i].lo - 1)
134 {
135 /* This is a new range below the current one. */
136 cs->ranges = scm_gc_realloc (cs->ranges,
137 sizeof (scm_t_char_range) * len,
138 sizeof (scm_t_char_range) * (len + 1),
139 "character-set");
140 memmove (cs->ranges + (i + 1), cs->ranges + i,
141 sizeof (scm_t_char_range) * (len - i));
142 cs->ranges[i].lo = n;
143 cs->ranges[i].hi = n;
144 cs->len = len + 1;
145 return;
146 }
147
148 i++;
149 }
150
151 /* This is a new range above all previous ranges. */
152 if (len == 0)
153 {
154 cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set");
155 }
156 else
157 {
158 cs->ranges = scm_gc_realloc (cs->ranges,
159 sizeof (scm_t_char_range) * len,
160 sizeof (scm_t_char_range) * (len + 1),
161 "character-set");
162 }
163 cs->ranges[len].lo = n;
164 cs->ranges[len].hi = n;
165 cs->len = len + 1;
166
167 return;
168 }
169
170 /* If N is in charset CS, remove it. */
171 void
172 scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n)
173 {
174 size_t i;
175 size_t len;
176
177 len = cs->len;
178
179 i = 0;
180 while (i < len)
181 {
182 if (n < cs->ranges[i].lo)
183 /* Not in this set. */
184 return;
185
186 if (n == cs->ranges[i].lo && n == cs->ranges[i].hi)
187 {
188 /* Remove this one-character range. */
189 if (len == 1)
190 {
191 scm_gc_free (cs->ranges,
192 sizeof (scm_t_char_range) * cs->len,
193 "character-set");
194 cs->ranges = NULL;
195 cs->len = 0;
196 return;
197 }
198 else if (i < len - 1)
199 {
200 memmove (cs->ranges + i, cs->ranges + (i + 1),
201 sizeof (scm_t_char_range) * (len - i - 1));
202 cs->ranges = scm_gc_realloc (cs->ranges,
203 sizeof (scm_t_char_range) * len,
204 sizeof (scm_t_char_range) * (len -
205 1),
206 "character-set");
207 cs->len = len - 1;
208 return;
209 }
210 else if (i == len - 1)
211 {
212 cs->ranges = scm_gc_realloc (cs->ranges,
213 sizeof (scm_t_char_range) * len,
214 sizeof (scm_t_char_range) * (len -
215 1),
216 "character-set");
217 cs->len = len - 1;
218 return;
219 }
220 }
221 else if (n == cs->ranges[i].lo)
222 {
223 /* Shrink this range from the left. */
224 cs->ranges[i].lo = n + 1;
225 return;
226 }
227 else if (n == cs->ranges[i].hi)
228 {
229 /* Shrink this range from the right. */
230 cs->ranges[i].hi = n - 1;
231 return;
232 }
233 else if (n > cs->ranges[i].lo && n < cs->ranges[i].hi)
234 {
235 /* Split this range into two pieces. */
236 cs->ranges = scm_gc_realloc (cs->ranges,
237 sizeof (scm_t_char_range) * len,
238 sizeof (scm_t_char_range) * (len + 1),
239 "character-set");
240 if (i < len - 1)
241 memmove (cs->ranges + (i + 2), cs->ranges + (i + 1),
242 sizeof (scm_t_char_range) * (len - i - 1));
243 cs->ranges[i + 1].hi = cs->ranges[i].hi;
244 cs->ranges[i + 1].lo = n + 1;
245 cs->ranges[i].hi = n - 1;
246 cs->len = len + 1;
247 return;
248 }
249
250 i++;
251 }
252
253 /* This value is above all ranges, so do nothing here. */
254 return;
255 }
256
257 static int
258 charsets_equal (scm_t_char_set *a, scm_t_char_set *b)
259 {
260 if (a->len != b->len)
261 return 0;
262
263 if (memcmp (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len) != 0)
264 return 0;
265
266 return 1;
267 }
268
269 /* Return true if every character in A is also in B. */
270 static int
271 charsets_leq (scm_t_char_set *a, scm_t_char_set *b)
272 {
273 size_t i = 0, j = 0;
274 scm_t_wchar alo, ahi;
275
276 if (a->len == 0)
277 return 1;
278 if (b->len == 0)
279 return 0;
280 while (i < a->len)
281 {
282 alo = a->ranges[i].lo;
283 ahi = a->ranges[i].hi;
284 while (b->ranges[j].hi < alo)
285 {
286 if (j < b->len - 1)
287 j++;
288 else
289 return 0;
290 }
291 if (alo < b->ranges[j].lo || ahi > b->ranges[j].hi)
292 return 0;
293 i++;
294 }
295
296 return 1;
297 }
298
299 /* Merge B into A. */
300 static void
301 charsets_union (scm_t_char_set *a, scm_t_char_set *b)
302 {
303 size_t i = 0;
304 scm_t_wchar blo, bhi, n;
305
306 if (b->len == 0)
307 return;
308
309 if (a->len == 0)
310 {
311 a->len = b->len;
312 a->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * b->len,
313 "character-set");
314 memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * b->len);
315 return;
316 }
317
318 /* This needs optimization. */
319 while (i < b->len)
320 {
321 blo = b->ranges[i].lo;
322 bhi = b->ranges[i].hi;
323 for (n = blo; n <= bhi; n++)
324 scm_i_charset_set (a, n);
325
326 i++;
327 }
328
329 return;
330 }
331
332 /* Remove elements not both in A and B from A. */
333 static void
334 charsets_intersection (scm_t_char_set *a, scm_t_char_set *b)
335 {
336 size_t i = 0;
337 scm_t_wchar blo, bhi, n;
338 scm_t_char_set *c;
339
340 if (a->len == 0)
341 return;
342
343 if (b->len == 0)
344 {
345 scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len,
346 "character-set");
347 a->len = 0;
348 return;
349 }
350
351 c = (scm_t_char_set *) scm_malloc (sizeof (scm_t_char_set));
352 c->len = 0;
353 c->ranges = NULL;
354
355 while (i < b->len)
356 {
357 blo = b->ranges[i].lo;
358 bhi = b->ranges[i].hi;
359 for (n = blo; n <= bhi; n++)
360 if (scm_i_charset_get (a, n))
361 scm_i_charset_set (c, n);
362 i++;
363 }
364 scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len,
365 "character-set");
366
367 a->len = c->len;
368 if (c->len != 0)
369 a->ranges = c->ranges;
370 else
371 a->ranges = NULL;
372 free (c);
373 return;
374 }
375
376 /* Make P the compelement of Q. */
377 static void
378 charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
379 {
380 int k, idx;
381
382 if (q->len == 0)
383 {
384 /* Fill with all valid codepoints. */
385 p->len = 2;
386 p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
387 "character-set");
388 p->ranges[0].lo = 0;
389 p->ranges[0].hi = 0xd7ff;
390 p->ranges[1].lo = 0xe000;
391 p->ranges[1].hi = SCM_CODEPOINT_MAX;
392 return;
393 }
394
395 if (p->len > 0)
396 scm_gc_free (p->ranges, sizeof (scm_t_char_set) * p->len,
397 "character-set");
398
399 p->len = 0;
400 if (q->ranges[0].lo > 0)
401 p->len++;
402 if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
403 p->len++;
404 p->len += q->len - 1;
405 p->ranges =
406 (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * p->len,
407 "character-set");
408 idx = 0;
409 if (q->ranges[0].lo > 0)
410 {
411 p->ranges[idx].lo = 0;
412 p->ranges[idx++].hi = q->ranges[0].lo - 1;
413 }
414 for (k = 1; k < q->len; k++)
415 {
416 p->ranges[idx].lo = q->ranges[k - 1].hi + 1;
417 p->ranges[idx++].hi = q->ranges[k].lo - 1;
418 }
419 if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
420 {
421 p->ranges[idx].lo = q->ranges[q->len - 1].hi + 1;
422 p->ranges[idx].hi = SCM_CODEPOINT_MAX;
423 }
424 return;
425 }
426
427 /* Replace A with elements only found in one of A or B. */
428 static void
429 charsets_xor (scm_t_char_set *a, scm_t_char_set *b)
430 {
431 size_t i = 0;
432 scm_t_wchar blo, bhi, n;
433
434 if (b->len == 0)
435 {
436 return;
437 }
438
439 if (a->len == 0)
440 {
441 a->ranges =
442 (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) *
443 b->len, "character-set");
444 a->len = b->len;
445 memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len);
446 return;
447 }
448
449 while (i < b->len)
450 {
451 blo = b->ranges[i].lo;
452 bhi = b->ranges[i].hi;
453 for (n = blo; n <= bhi; n++)
454 {
455 if (scm_i_charset_get (a, n))
456 scm_i_charset_unset (a, n);
457 else
458 scm_i_charset_set (a, n);
459 }
460
461 i++;
462 }
463 return;
464 }
465
466 /* Smob print hook for character sets. */
467 static int
468 charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED)
469 {
470 size_t i;
471 int first = 1;
472 scm_t_char_set *p;
473 const size_t max_ranges_to_print = 50;
474
475 p = SCM_CHARSET_DATA (charset);
476
477 scm_puts ("#<charset {", port);
478 for (i = 0; i < p->len; i++)
479 {
480 if (first)
481 first = 0;
482 else
483 scm_puts (" ", port);
484 scm_write (SCM_MAKE_CHAR (p->ranges[i].lo), port);
485 if (p->ranges[i].lo != p->ranges[i].hi)
486 {
487 scm_puts ("..", port);
488 scm_write (SCM_MAKE_CHAR (p->ranges[i].hi), port);
489 }
490 if (i >= max_ranges_to_print)
491 {
492 /* Too many to print here. Quit early. */
493 scm_puts (" ...", port);
494 break;
495 }
496 }
497 scm_puts ("}>", port);
498 return 1;
499 }
500
501
502 /* Smob free hook for character sets. */
503 static size_t
504 charset_free (SCM charset)
505 {
506 scm_t_char_set *cs;
507 size_t len = 0;
508
509 cs = SCM_CHARSET_DATA (charset);
510 if (cs != NULL)
511 len = cs->len;
512 if (len > 0)
513 scm_gc_free (cs->ranges, sizeof (scm_t_char_range) * len,
514 "character-set");
515
516 cs->ranges = NULL;
517 cs->len = 0;
518
519 scm_gc_free (cs, sizeof (scm_t_char_set), "character-set");
520
521 scm_remember_upto_here_1 (charset);
522
523 return 0;
524 }
525
526
527 /* Smob print hook for character sets cursors. */
528 static int
529 charset_cursor_print (SCM cursor, SCM port,
530 scm_print_state *pstate SCM_UNUSED)
531 {
532 scm_t_char_set_cursor *cur;
533
534 cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
535
536 scm_puts ("#<charset-cursor ", port);
537 if (cur->range == (size_t) (-1))
538 scm_puts ("(empty)", port);
539 else
540 {
541 scm_write (scm_from_size_t (cur->range), port);
542 scm_puts (":", port);
543 scm_write (scm_from_int32 (cur->n), port);
544 }
545 scm_puts (">", port);
546 return 1;
547 }
548
549 /* Smob free hook for character sets. */
550 static size_t
551 charset_cursor_free (SCM charset)
552 {
553 scm_t_char_set_cursor *cur;
554
555 cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (charset);
556 scm_gc_free (cur, sizeof (scm_t_char_set_cursor), "charset-cursor");
557 scm_remember_upto_here_1 (charset);
558
559 return 0;
560 }
561
562
563 /* Create a new, empty character set. */
564 static SCM
565 make_char_set (const char *func_name)
566 {
567 scm_t_char_set *p;
568
569 p = scm_gc_malloc (sizeof (scm_t_char_set), "character-set");
570 memset (p, 0, sizeof (scm_t_char_set));
571 SCM_RETURN_NEWSMOB (scm_tc16_charset, p);
572 }
573
574
575 SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0,
576 (SCM obj),
577 "Return @code{#t} if @var{obj} is a character set, @code{#f}\n"
578 "otherwise.")
579 #define FUNC_NAME s_scm_char_set_p
580 {
581 return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj));
582 }
583 #undef FUNC_NAME
584
585
586 SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1,
587 (SCM char_sets),
588 "Return @code{#t} if all given character sets are equal.")
589 #define FUNC_NAME s_scm_char_set_eq
590 {
591 int argnum = 1;
592 scm_t_char_set *cs1_data = NULL;
593
594 SCM_VALIDATE_REST_ARGUMENT (char_sets);
595
596 while (!scm_is_null (char_sets))
597 {
598 SCM csi = SCM_CAR (char_sets);
599 scm_t_char_set *csi_data;
600
601 SCM_VALIDATE_SMOB (argnum, csi, charset);
602 argnum++;
603 csi_data = SCM_CHARSET_DATA (csi);
604 if (cs1_data == NULL)
605 cs1_data = csi_data;
606 else if (!charsets_equal (cs1_data, csi_data))
607 return SCM_BOOL_F;
608 char_sets = SCM_CDR (char_sets);
609 }
610 return SCM_BOOL_T;
611 }
612 #undef FUNC_NAME
613
614
615 SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1,
616 (SCM char_sets),
617 "Return @code{#t} if every character set @var{cs}i is a subset\n"
618 "of character set @var{cs}i+1.")
619 #define FUNC_NAME s_scm_char_set_leq
620 {
621 int argnum = 1;
622 scm_t_char_set *prev_data = NULL;
623
624 SCM_VALIDATE_REST_ARGUMENT (char_sets);
625
626 while (!scm_is_null (char_sets))
627 {
628 SCM csi = SCM_CAR (char_sets);
629 scm_t_char_set *csi_data;
630
631 SCM_VALIDATE_SMOB (argnum, csi, charset);
632 argnum++;
633 csi_data = SCM_CHARSET_DATA (csi);
634 if (prev_data)
635 {
636 if (!charsets_leq (prev_data, csi_data))
637 return SCM_BOOL_F;
638 }
639 prev_data = csi_data;
640 char_sets = SCM_CDR (char_sets);
641 }
642 return SCM_BOOL_T;
643 }
644 #undef FUNC_NAME
645
646
647 SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0,
648 (SCM cs, SCM bound),
649 "Compute a hash value for the character set @var{cs}. If\n"
650 "@var{bound} is given and non-zero, it restricts the\n"
651 "returned value to the range 0 @dots{} @var{bound - 1}.")
652 #define FUNC_NAME s_scm_char_set_hash
653 {
654 const unsigned long default_bnd = 871;
655 unsigned long bnd;
656 scm_t_char_set *p;
657 unsigned long val = 0;
658 int k;
659 scm_t_wchar c;
660
661 SCM_VALIDATE_SMOB (1, cs, charset);
662
663 if (SCM_UNBNDP (bound))
664 bnd = default_bnd;
665 else
666 {
667 bnd = scm_to_ulong (bound);
668 if (bnd == 0)
669 bnd = default_bnd;
670 }
671
672 p = SCM_CHARSET_DATA (cs);
673 for (k = 0; k < p->len; k++)
674 {
675 for (c = p->ranges[k].lo; c <= p->ranges[k].hi; c++)
676 val = c + (val << 1);
677 }
678 return scm_from_ulong (val % bnd);
679 }
680 #undef FUNC_NAME
681
682
683 SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0,
684 (SCM cs), "Return a cursor into the character set @var{cs}.")
685 #define FUNC_NAME s_scm_char_set_cursor
686 {
687 scm_t_char_set *cs_data;
688 scm_t_char_set_cursor *cur_data;
689
690 SCM_VALIDATE_SMOB (1, cs, charset);
691 cs_data = SCM_CHARSET_DATA (cs);
692 cur_data =
693 (scm_t_char_set_cursor *) scm_gc_malloc (sizeof (scm_t_char_set_cursor),
694 "charset-cursor");
695 if (cs_data->len == 0)
696 {
697 cur_data->range = (size_t) (-1);
698 cur_data->n = 0;
699 }
700 else
701 {
702 cur_data->range = 0;
703 cur_data->n = cs_data->ranges[0].lo;
704 }
705 SCM_RETURN_NEWSMOB (scm_tc16_charset_cursor, cur_data);
706 }
707
708 #undef FUNC_NAME
709
710
711 SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0,
712 (SCM cs, SCM cursor),
713 "Return the character at the current cursor position\n"
714 "@var{cursor} in the character set @var{cs}. It is an error to\n"
715 "pass a cursor for which @code{end-of-char-set?} returns true.")
716 #define FUNC_NAME s_scm_char_set_ref
717 {
718 scm_t_char_set *cs_data;
719 scm_t_char_set_cursor *cur_data;
720 size_t i;
721
722 SCM_VALIDATE_SMOB (1, cs, charset);
723 SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
724
725 cs_data = SCM_CHARSET_DATA (cs);
726 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
727
728 /* Validate that this cursor is still true. */
729 i = cur_data->range;
730 if (i == (size_t) (-1)
731 || i >= cs_data->len
732 || cur_data->n < cs_data->ranges[i].lo
733 || cur_data->n > cs_data->ranges[i].hi)
734 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
735 return SCM_MAKE_CHAR (cur_data->n);
736 }
737
738 #undef FUNC_NAME
739
740
741 SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0,
742 (SCM cs, SCM cursor),
743 "Advance the character set cursor @var{cursor} to the next\n"
744 "character in the character set @var{cs}. It is an error if the\n"
745 "cursor given satisfies @code{end-of-char-set?}.")
746 #define FUNC_NAME s_scm_char_set_cursor_next
747 {
748 scm_t_char_set *cs_data;
749 scm_t_char_set_cursor *cur_data;
750 size_t i;
751
752 SCM_VALIDATE_SMOB (1, cs, charset);
753 SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
754
755 cs_data = SCM_CHARSET_DATA (cs);
756 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
757
758 /* Validate that this cursor is still true. */
759 i = cur_data->range;
760 if (i == (size_t) (-1)
761 || i >= cs_data->len
762 || cur_data->n < cs_data->ranges[i].lo
763 || cur_data->n > cs_data->ranges[i].hi)
764 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
765 /* Increment the cursor. */
766 if (cur_data->n == cs_data->ranges[i].hi)
767 {
768 if (i + 1 < cs_data->len)
769 {
770 cur_data->range = i + 1;
771 cur_data->n = cs_data->ranges[i + 1].lo;
772 }
773 else
774 {
775 /* This is the end of the road. */
776 cur_data->range = (size_t) (-1);
777 cur_data->n = 0;
778 }
779 }
780 else
781 {
782 cur_data->n = cur_data->n + 1;
783 }
784
785 return cursor;
786 }
787
788 #undef FUNC_NAME
789
790
791 SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0,
792 (SCM cursor),
793 "Return @code{#t} if @var{cursor} has reached the end of a\n"
794 "character set, @code{#f} otherwise.")
795 #define FUNC_NAME s_scm_end_of_char_set_p
796 {
797 scm_t_char_set_cursor *cur_data;
798 SCM_VALIDATE_SMOB (1, cursor, charset_cursor);
799
800 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
801 if (cur_data->range == (size_t) (-1))
802 return SCM_BOOL_T;
803
804 return SCM_BOOL_F;
805 }
806
807 #undef FUNC_NAME
808
809
810 SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0,
811 (SCM kons, SCM knil, SCM cs),
812 "Fold the procedure @var{kons} over the character set @var{cs},\n"
813 "initializing it with @var{knil}.")
814 #define FUNC_NAME s_scm_char_set_fold
815 {
816 scm_t_char_set *cs_data;
817 int k;
818 scm_t_wchar n;
819
820 SCM_VALIDATE_PROC (1, kons);
821 SCM_VALIDATE_SMOB (3, cs, charset);
822
823 cs_data = SCM_CHARSET_DATA (cs);
824
825 if (cs_data->len == 0)
826 return knil;
827
828 for (k = 0; k < cs_data->len; k++)
829 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
830 {
831 knil = scm_call_2 (kons, SCM_MAKE_CHAR (n), knil);
832 }
833 return knil;
834 }
835 #undef FUNC_NAME
836
837
838 SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0,
839 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
840 "This is a fundamental constructor for character sets.\n"
841 "@itemize @bullet\n"
842 "@item @var{g} is used to generate a series of ``seed'' values\n"
843 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
844 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
845 "@item @var{p} tells us when to stop -- when it returns true\n"
846 "when applied to one of the seed values.\n"
847 "@item @var{f} maps each seed value to a character. These\n"
848 "characters are added to the base character set @var{base_cs} to\n"
849 "form the result; @var{base_cs} defaults to the empty set.\n"
850 "@end itemize")
851 #define FUNC_NAME s_scm_char_set_unfold
852 {
853 SCM result, tmp;
854
855 SCM_VALIDATE_PROC (1, p);
856 SCM_VALIDATE_PROC (2, f);
857 SCM_VALIDATE_PROC (3, g);
858 if (!SCM_UNBNDP (base_cs))
859 {
860 SCM_VALIDATE_SMOB (5, base_cs, charset);
861 result = scm_char_set_copy (base_cs);
862 }
863 else
864 result = make_char_set (FUNC_NAME);
865
866 tmp = scm_call_1 (p, seed);
867 while (scm_is_false (tmp))
868 {
869 SCM ch = scm_call_1 (f, seed);
870 if (!SCM_CHARP (ch))
871 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
872 SCM_CHARSET_SET (result, SCM_CHAR (ch));
873
874 seed = scm_call_1 (g, seed);
875 tmp = scm_call_1 (p, seed);
876 }
877 return result;
878 }
879 #undef FUNC_NAME
880
881
882 SCM_DEFINE (scm_char_set_unfold_x, "char-set-unfold!", 5, 0, 0,
883 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
884 "This is a fundamental constructor for character sets.\n"
885 "@itemize @bullet\n"
886 "@item @var{g} is used to generate a series of ``seed'' values\n"
887 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
888 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
889 "@item @var{p} tells us when to stop -- when it returns true\n"
890 "when applied to one of the seed values.\n"
891 "@item @var{f} maps each seed value to a character. These\n"
892 "characters are added to the base character set @var{base_cs} to\n"
893 "form the result; @var{base_cs} defaults to the empty set.\n"
894 "@end itemize")
895 #define FUNC_NAME s_scm_char_set_unfold_x
896 {
897 SCM tmp;
898
899 SCM_VALIDATE_PROC (1, p);
900 SCM_VALIDATE_PROC (2, f);
901 SCM_VALIDATE_PROC (3, g);
902 SCM_VALIDATE_SMOB (5, base_cs, charset);
903
904 tmp = scm_call_1 (p, seed);
905 while (scm_is_false (tmp))
906 {
907 SCM ch = scm_call_1 (f, seed);
908 if (!SCM_CHARP (ch))
909 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
910 SCM_CHARSET_SET (base_cs, SCM_CHAR (ch));
911
912 seed = scm_call_1 (g, seed);
913 tmp = scm_call_1 (p, seed);
914 }
915 return base_cs;
916 }
917 #undef FUNC_NAME
918
919
920 SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0,
921 (SCM proc, SCM cs),
922 "Apply @var{proc} to every character in the character set\n"
923 "@var{cs}. The return value is not specified.")
924 #define FUNC_NAME s_scm_char_set_for_each
925 {
926 scm_t_char_set *cs_data;
927 int k;
928 scm_t_wchar n;
929
930 SCM_VALIDATE_PROC (1, proc);
931 SCM_VALIDATE_SMOB (2, cs, charset);
932
933 cs_data = SCM_CHARSET_DATA (cs);
934
935 if (cs_data->len == 0)
936 return SCM_UNSPECIFIED;
937
938 for (k = 0; k < cs_data->len; k++)
939 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
940 {
941 scm_call_1 (proc, SCM_MAKE_CHAR (n));
942 }
943
944 return SCM_UNSPECIFIED;
945 }
946
947 #undef FUNC_NAME
948
949
950 SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0,
951 (SCM proc, SCM cs),
952 "Map the procedure @var{proc} over every character in @var{cs}.\n"
953 "@var{proc} must be a character -> character procedure.")
954 #define FUNC_NAME s_scm_char_set_map
955 {
956 SCM result;
957 int k;
958 scm_t_char_set *cs_data;
959 scm_t_wchar n;
960
961 SCM_VALIDATE_PROC (1, proc);
962 SCM_VALIDATE_SMOB (2, cs, charset);
963
964 result = make_char_set (FUNC_NAME);
965 cs_data = SCM_CHARSET_DATA (cs);
966
967 if (cs_data->len == 0)
968 return result;
969
970 for (k = 0; k < cs_data->len; k++)
971 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
972 {
973 SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (n));
974 if (!SCM_CHARP (ch))
975 SCM_MISC_ERROR ("procedure ~S returned non-char",
976 scm_list_1 (proc));
977 SCM_CHARSET_SET (result, SCM_CHAR (ch));
978 }
979 return result;
980 }
981 #undef FUNC_NAME
982
983
984 SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0,
985 (SCM cs),
986 "Return a newly allocated character set containing all\n"
987 "characters in @var{cs}.")
988 #define FUNC_NAME s_scm_char_set_copy
989 {
990 SCM ret;
991 scm_t_char_set *p1, *p2;
992
993 SCM_VALIDATE_SMOB (1, cs, charset);
994 ret = make_char_set (FUNC_NAME);
995 p1 = SCM_CHARSET_DATA (cs);
996 p2 = SCM_CHARSET_DATA (ret);
997 p2->len = p1->len;
998
999 if (p1->len == 0)
1000 p2->ranges = NULL;
1001 else
1002 {
1003 p2->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * p1->len,
1004 "character-set");
1005 memcpy (p2->ranges, p1->ranges, sizeof (scm_t_char_range) * p1->len);
1006 }
1007
1008 return ret;
1009 }
1010
1011 #undef FUNC_NAME
1012
1013
1014 SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1,
1015 (SCM rest),
1016 "Return a character set containing all given characters.")
1017 #define FUNC_NAME s_scm_char_set
1018 {
1019 SCM cs;
1020 int argnum = 1;
1021
1022 SCM_VALIDATE_REST_ARGUMENT (rest);
1023 cs = make_char_set (FUNC_NAME);
1024 while (!scm_is_null (rest))
1025 {
1026 scm_t_wchar c;
1027
1028 SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c);
1029 argnum++;
1030 rest = SCM_CDR (rest);
1031 SCM_CHARSET_SET (cs, c);
1032 }
1033 return cs;
1034 }
1035 #undef FUNC_NAME
1036
1037
1038 SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0,
1039 (SCM list, SCM base_cs),
1040 "Convert the character list @var{list} to a character set. If\n"
1041 "the character set @var{base_cs} is given, the character in this\n"
1042 "set are also included in the result.")
1043 #define FUNC_NAME s_scm_list_to_char_set
1044 {
1045 SCM cs;
1046
1047 SCM_VALIDATE_LIST (1, list);
1048 if (SCM_UNBNDP (base_cs))
1049 cs = make_char_set (FUNC_NAME);
1050 else
1051 {
1052 SCM_VALIDATE_SMOB (2, base_cs, charset);
1053 cs = scm_char_set_copy (base_cs);
1054 }
1055 while (!scm_is_null (list))
1056 {
1057 SCM chr = SCM_CAR (list);
1058 scm_t_wchar c;
1059
1060 SCM_VALIDATE_CHAR_COPY (0, chr, c);
1061 list = SCM_CDR (list);
1062
1063
1064 SCM_CHARSET_SET (cs, c);
1065 }
1066 return cs;
1067 }
1068 #undef FUNC_NAME
1069
1070
1071 SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0,
1072 (SCM list, SCM base_cs),
1073 "Convert the character list @var{list} to a character set. The\n"
1074 "characters are added to @var{base_cs} and @var{base_cs} is\n"
1075 "returned.")
1076 #define FUNC_NAME s_scm_list_to_char_set_x
1077 {
1078 SCM_VALIDATE_LIST (1, list);
1079 SCM_VALIDATE_SMOB (2, base_cs, charset);
1080 while (!scm_is_null (list))
1081 {
1082 SCM chr = SCM_CAR (list);
1083 scm_t_wchar c;
1084
1085 SCM_VALIDATE_CHAR_COPY (0, chr, c);
1086 list = SCM_CDR (list);
1087
1088 SCM_CHARSET_SET (base_cs, c);
1089 }
1090 return base_cs;
1091 }
1092 #undef FUNC_NAME
1093
1094
1095 SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0,
1096 (SCM str, SCM base_cs),
1097 "Convert the string @var{str} to a character set. If the\n"
1098 "character set @var{base_cs} is given, the characters in this\n"
1099 "set are also included in the result.")
1100 #define FUNC_NAME s_scm_string_to_char_set
1101 {
1102 SCM cs;
1103 size_t k = 0, len;
1104
1105 SCM_VALIDATE_STRING (1, str);
1106 if (SCM_UNBNDP (base_cs))
1107 cs = make_char_set (FUNC_NAME);
1108 else
1109 {
1110 SCM_VALIDATE_SMOB (2, base_cs, charset);
1111 cs = scm_char_set_copy (base_cs);
1112 }
1113 len = scm_i_string_length (str);
1114 while (k < len)
1115 {
1116 scm_t_wchar c = scm_i_string_ref (str, k++);
1117 SCM_CHARSET_SET (cs, c);
1118 }
1119 scm_remember_upto_here_1 (str);
1120 return cs;
1121 }
1122 #undef FUNC_NAME
1123
1124
1125 SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0,
1126 (SCM str, SCM base_cs),
1127 "Convert the string @var{str} to a character set. The\n"
1128 "characters from the string are added to @var{base_cs}, and\n"
1129 "@var{base_cs} is returned.")
1130 #define FUNC_NAME s_scm_string_to_char_set_x
1131 {
1132 size_t k = 0, len;
1133
1134 SCM_VALIDATE_STRING (1, str);
1135 SCM_VALIDATE_SMOB (2, base_cs, charset);
1136 len = scm_i_string_length (str);
1137 while (k < len)
1138 {
1139 scm_t_wchar c = scm_i_string_ref (str, k++);
1140 SCM_CHARSET_SET (base_cs, c);
1141 }
1142 scm_remember_upto_here_1 (str);
1143 return base_cs;
1144 }
1145 #undef FUNC_NAME
1146
1147
1148 SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0,
1149 (SCM pred, SCM cs, SCM base_cs),
1150 "Return a character set containing every character from @var{cs}\n"
1151 "so that it satisfies @var{pred}. If provided, the characters\n"
1152 "from @var{base_cs} are added to the result.")
1153 #define FUNC_NAME s_scm_char_set_filter
1154 {
1155 SCM ret;
1156 int k;
1157 scm_t_wchar n;
1158 scm_t_char_set *p;
1159
1160 SCM_VALIDATE_PROC (1, pred);
1161 SCM_VALIDATE_SMOB (2, cs, charset);
1162 if (!SCM_UNBNDP (base_cs))
1163 {
1164 SCM_VALIDATE_SMOB (3, base_cs, charset);
1165 ret = scm_char_set_copy (base_cs);
1166 }
1167 else
1168 ret = make_char_set (FUNC_NAME);
1169
1170 p = SCM_CHARSET_DATA (cs);
1171
1172 if (p->len == 0)
1173 return ret;
1174
1175 for (k = 0; k < p->len; k++)
1176 for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
1177 {
1178 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1179
1180 if (scm_is_true (res))
1181 SCM_CHARSET_SET (ret, n);
1182 }
1183 return ret;
1184 }
1185
1186 #undef FUNC_NAME
1187
1188
1189 SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0,
1190 (SCM pred, SCM cs, SCM base_cs),
1191 "Return a character set containing every character from @var{cs}\n"
1192 "so that it satisfies @var{pred}. The characters are added to\n"
1193 "@var{base_cs} and @var{base_cs} is returned.")
1194 #define FUNC_NAME s_scm_char_set_filter_x
1195 {
1196 int k;
1197 scm_t_wchar n;
1198 scm_t_char_set *p;
1199
1200 SCM_VALIDATE_PROC (1, pred);
1201 SCM_VALIDATE_SMOB (2, cs, charset);
1202 SCM_VALIDATE_SMOB (3, base_cs, charset);
1203 p = SCM_CHARSET_DATA (cs);
1204 if (p->len == 0)
1205 return base_cs;
1206
1207 for (k = 0; k < p->len; k++)
1208 for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
1209 {
1210 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
1211
1212 if (scm_is_true (res))
1213 SCM_CHARSET_SET (base_cs, n);
1214 }
1215 return base_cs;
1216 }
1217
1218 #undef FUNC_NAME
1219
1220
1221 SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0,
1222 (SCM lower, SCM upper, SCM error, SCM base_cs),
1223 "Return a character set containing all characters whose\n"
1224 "character codes lie in the half-open range\n"
1225 "[@var{lower},@var{upper}).\n"
1226 "\n"
1227 "If @var{error} is a true value, an error is signalled if the\n"
1228 "specified range contains characters which are not contained in\n"
1229 "the implemented character range. If @var{error} is @code{#f},\n"
1230 "these characters are silently left out of the resultung\n"
1231 "character set.\n"
1232 "\n"
1233 "The characters in @var{base_cs} are added to the result, if\n"
1234 "given.")
1235 #define FUNC_NAME s_scm_ucs_range_to_char_set
1236 {
1237 SCM cs;
1238 size_t clower, cupper;
1239
1240 clower = scm_to_size_t (lower);
1241 cupper = scm_to_size_t (upper);
1242 SCM_ASSERT_RANGE (2, upper, cupper >= clower);
1243 if (!SCM_UNBNDP (error))
1244 {
1245 if (scm_is_true (error))
1246 {
1247 SCM_ASSERT_RANGE (1, lower, SCM_IS_UNICODE_CHAR (clower));
1248 SCM_ASSERT_RANGE (2, upper, SCM_IS_UNICODE_CHAR (cupper));
1249 }
1250 }
1251 if (clower > 0x10FFFF)
1252 clower = 0x10FFFF;
1253 if (cupper > 0x10FFFF)
1254 cupper = 0x10FFFF;
1255 if (SCM_UNBNDP (base_cs))
1256 cs = make_char_set (FUNC_NAME);
1257 else
1258 {
1259 SCM_VALIDATE_SMOB (4, base_cs, charset);
1260 cs = scm_char_set_copy (base_cs);
1261 }
1262 /* It not be difficult to write a more optimized version of the
1263 following. */
1264 while (clower < cupper)
1265 {
1266 SCM_CHARSET_SET (cs, clower);
1267 clower++;
1268 }
1269 return cs;
1270 }
1271 #undef FUNC_NAME
1272
1273
1274 SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0,
1275 (SCM lower, SCM upper, SCM error, SCM base_cs),
1276 "Return a character set containing all characters whose\n"
1277 "character codes lie in the half-open range\n"
1278 "[@var{lower},@var{upper}).\n"
1279 "\n"
1280 "If @var{error} is a true value, an error is signalled if the\n"
1281 "specified range contains characters which are not contained in\n"
1282 "the implemented character range. If @var{error} is @code{#f},\n"
1283 "these characters are silently left out of the resultung\n"
1284 "character set.\n"
1285 "\n"
1286 "The characters are added to @var{base_cs} and @var{base_cs} is\n"
1287 "returned.")
1288 #define FUNC_NAME s_scm_ucs_range_to_char_set_x
1289 {
1290 size_t clower, cupper;
1291
1292 clower = scm_to_size_t (lower);
1293 cupper = scm_to_size_t (upper);
1294 SCM_ASSERT_RANGE (2, upper, cupper >= clower);
1295 if (scm_is_true (error))
1296 {
1297 SCM_ASSERT_RANGE (1, lower, SCM_IS_UNICODE_CHAR (clower));
1298 SCM_ASSERT_RANGE (2, upper, SCM_IS_UNICODE_CHAR (cupper));
1299 }
1300 if (clower > SCM_CODEPOINT_MAX)
1301 clower = SCM_CODEPOINT_MAX;
1302 if (cupper > SCM_CODEPOINT_MAX)
1303 cupper = SCM_CODEPOINT_MAX;
1304
1305 while (clower < cupper)
1306 {
1307 if (SCM_IS_UNICODE_CHAR (clower))
1308 SCM_CHARSET_SET (base_cs, clower);
1309 clower++;
1310 }
1311 return base_cs;
1312 }
1313 #undef FUNC_NAME
1314
1315 SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0,
1316 (SCM x),
1317 "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.")
1318 #define FUNC_NAME s_scm_to_char_set
1319 {
1320 if (scm_is_string (x))
1321 return scm_string_to_char_set (x, SCM_UNDEFINED);
1322 else if (SCM_CHARP (x))
1323 return scm_char_set (scm_list_1 (x));
1324 else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x))
1325 return x;
1326 else
1327 scm_wrong_type_arg (NULL, 0, x);
1328 }
1329 #undef FUNC_NAME
1330
1331 SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0,
1332 (SCM cs),
1333 "Return the number of elements in character set @var{cs}.")
1334 #define FUNC_NAME s_scm_char_set_size
1335 {
1336 int k, count = 0;
1337 scm_t_char_set *cs_data;
1338
1339 SCM_VALIDATE_SMOB (1, cs, charset);
1340 cs_data = SCM_CHARSET_DATA (cs);
1341
1342 if (cs_data->len == 0)
1343 return scm_from_int (0);
1344
1345 for (k = 0; k < cs_data->len; k++)
1346 count += cs_data->ranges[k].hi - cs_data->ranges[k].lo + 1;
1347
1348 return scm_from_int (count);
1349 }
1350
1351 #undef FUNC_NAME
1352
1353
1354 SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0,
1355 (SCM pred, SCM cs),
1356 "Return the number of the elements int the character set\n"
1357 "@var{cs} which satisfy the predicate @var{pred}.")
1358 #define FUNC_NAME s_scm_char_set_count
1359 {
1360 int k, count = 0;
1361 scm_t_wchar n;
1362 scm_t_char_set *cs_data;
1363
1364 SCM_VALIDATE_PROC (1, pred);
1365 SCM_VALIDATE_SMOB (2, cs, charset);
1366 cs_data = SCM_CHARSET_DATA (cs);
1367 if (cs_data->len == 0)
1368 return scm_from_int (0);
1369
1370 for (k = 0; k < cs_data->len; k++)
1371 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1372 {
1373 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1374 if (scm_is_true (res))
1375 count++;
1376 }
1377 return SCM_I_MAKINUM (count);
1378 }
1379 #undef FUNC_NAME
1380
1381
1382 SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0,
1383 (SCM cs),
1384 "Return a list containing the elements of the character set\n"
1385 "@var{cs}.")
1386 #define FUNC_NAME s_scm_char_set_to_list
1387 {
1388 int k;
1389 scm_t_wchar n;
1390 SCM result = SCM_EOL;
1391 scm_t_char_set *p;
1392
1393 SCM_VALIDATE_SMOB (1, cs, charset);
1394 p = SCM_CHARSET_DATA (cs);
1395 if (p->len == 0)
1396 return SCM_EOL;
1397
1398 for (k = p->len - 1; k >= 0; k--)
1399 for (n = p->ranges[k].hi; n >= p->ranges[k].lo; n--)
1400 result = scm_cons (SCM_MAKE_CHAR (n), result);
1401 return result;
1402 }
1403
1404 #undef FUNC_NAME
1405
1406
1407 SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0,
1408 (SCM cs),
1409 "Return a string containing the elements of the character set\n"
1410 "@var{cs}. The order in which the characters are placed in the\n"
1411 "string is not defined.")
1412 #define FUNC_NAME s_scm_char_set_to_string
1413 {
1414 int k;
1415 int count = 0;
1416 int idx = 0;
1417 int wide = 0;
1418 SCM result;
1419 scm_t_wchar n;
1420 scm_t_char_set *cs_data;
1421 char *buf;
1422 scm_t_wchar *wbuf;
1423
1424 SCM_VALIDATE_SMOB (1, cs, charset);
1425 cs_data = SCM_CHARSET_DATA (cs);
1426 if (cs_data->len == 0)
1427 return scm_nullstr;
1428
1429 if (cs_data->ranges[cs_data->len - 1].hi > 255)
1430 wide = 1;
1431
1432 count = scm_to_int (scm_char_set_size (cs));
1433 if (wide)
1434 result = scm_i_make_wide_string (count, &wbuf);
1435 else
1436 result = scm_i_make_string (count, &buf);
1437
1438 for (k = 0; k < cs_data->len; k++)
1439 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1440 {
1441 if (wide)
1442 wbuf[idx++] = n;
1443 else
1444 buf[idx++] = n;
1445 }
1446 return result;
1447 }
1448
1449 #undef FUNC_NAME
1450
1451
1452 SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0,
1453 (SCM cs, SCM ch),
1454 "Return @code{#t} iff the character @var{ch} is contained in the\n"
1455 "character set @var{cs}.")
1456 #define FUNC_NAME s_scm_char_set_contains_p
1457 {
1458 SCM_VALIDATE_SMOB (1, cs, charset);
1459 SCM_VALIDATE_CHAR (2, ch);
1460 return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch)));
1461 }
1462 #undef FUNC_NAME
1463
1464
1465 SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0,
1466 (SCM pred, SCM cs),
1467 "Return a true value if every character in the character set\n"
1468 "@var{cs} satisfies the predicate @var{pred}.")
1469 #define FUNC_NAME s_scm_char_set_every
1470 {
1471 int k;
1472 scm_t_wchar n;
1473 SCM res = SCM_BOOL_T;
1474 scm_t_char_set *cs_data;
1475
1476 SCM_VALIDATE_PROC (1, pred);
1477 SCM_VALIDATE_SMOB (2, cs, charset);
1478
1479 cs_data = SCM_CHARSET_DATA (cs);
1480 if (cs_data->len == 0)
1481 return SCM_BOOL_T;
1482
1483 for (k = 0; k < cs_data->len; k++)
1484 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1485 {
1486 res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1487 if (scm_is_false (res))
1488 return res;
1489 }
1490 return SCM_BOOL_T;
1491 }
1492
1493 #undef FUNC_NAME
1494
1495
1496 SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0,
1497 (SCM pred, SCM cs),
1498 "Return a true value if any character in the character set\n"
1499 "@var{cs} satisfies the predicate @var{pred}.")
1500 #define FUNC_NAME s_scm_char_set_any
1501 {
1502 int k;
1503 scm_t_wchar n;
1504 scm_t_char_set *cs_data;
1505
1506 SCM_VALIDATE_PROC (1, pred);
1507 SCM_VALIDATE_SMOB (2, cs, charset);
1508
1509 cs_data = (scm_t_char_set *) cs;
1510
1511 for (k = 0; k < cs_data->len; k++)
1512 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1513 {
1514 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1515 if (scm_is_true (res))
1516 return res;
1517 }
1518 return SCM_BOOL_F;
1519 }
1520 #undef FUNC_NAME
1521
1522
1523 SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1,
1524 (SCM cs, SCM rest),
1525 "Add all character arguments to the first argument, which must\n"
1526 "be a character set.")
1527 #define FUNC_NAME s_scm_char_set_adjoin
1528 {
1529 SCM_VALIDATE_SMOB (1, cs, charset);
1530 SCM_VALIDATE_REST_ARGUMENT (rest);
1531 cs = scm_char_set_copy (cs);
1532
1533 while (!scm_is_null (rest))
1534 {
1535 SCM chr = SCM_CAR (rest);
1536 scm_t_wchar c;
1537
1538 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1539 rest = SCM_CDR (rest);
1540
1541 SCM_CHARSET_SET (cs, c);
1542 }
1543 return cs;
1544 }
1545 #undef FUNC_NAME
1546
1547
1548 SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1,
1549 (SCM cs, SCM rest),
1550 "Delete all character arguments from the first argument, which\n"
1551 "must be a character set.")
1552 #define FUNC_NAME s_scm_char_set_delete
1553 {
1554 SCM_VALIDATE_SMOB (1, cs, charset);
1555 SCM_VALIDATE_REST_ARGUMENT (rest);
1556 cs = scm_char_set_copy (cs);
1557
1558 while (!scm_is_null (rest))
1559 {
1560 SCM chr = SCM_CAR (rest);
1561 scm_t_wchar c;
1562
1563 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1564 rest = SCM_CDR (rest);
1565
1566 SCM_CHARSET_UNSET (cs, c);
1567 }
1568 return cs;
1569 }
1570 #undef FUNC_NAME
1571
1572
1573 SCM_DEFINE (scm_char_set_adjoin_x, "char-set-adjoin!", 1, 0, 1,
1574 (SCM cs, SCM rest),
1575 "Add all character arguments to the first argument, which must\n"
1576 "be a character set.")
1577 #define FUNC_NAME s_scm_char_set_adjoin_x
1578 {
1579 SCM_VALIDATE_SMOB (1, cs, charset);
1580 SCM_VALIDATE_REST_ARGUMENT (rest);
1581
1582 while (!scm_is_null (rest))
1583 {
1584 SCM chr = SCM_CAR (rest);
1585 scm_t_wchar c;
1586
1587 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1588 rest = SCM_CDR (rest);
1589
1590 SCM_CHARSET_SET (cs, c);
1591 }
1592 return cs;
1593 }
1594 #undef FUNC_NAME
1595
1596
1597 SCM_DEFINE (scm_char_set_delete_x, "char-set-delete!", 1, 0, 1,
1598 (SCM cs, SCM rest),
1599 "Delete all character arguments from the first argument, which\n"
1600 "must be a character set.")
1601 #define FUNC_NAME s_scm_char_set_delete_x
1602 {
1603 SCM_VALIDATE_SMOB (1, cs, charset);
1604 SCM_VALIDATE_REST_ARGUMENT (rest);
1605
1606 while (!scm_is_null (rest))
1607 {
1608 SCM chr = SCM_CAR (rest);
1609 scm_t_wchar c;
1610
1611 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1612 rest = SCM_CDR (rest);
1613
1614 SCM_CHARSET_UNSET (cs, c);
1615 }
1616 return cs;
1617 }
1618 #undef FUNC_NAME
1619
1620
1621 SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0,
1622 (SCM cs), "Return the complement of the character set @var{cs}.")
1623 #define FUNC_NAME s_scm_char_set_complement
1624 {
1625 SCM res;
1626 scm_t_char_set *p, *q;
1627
1628 SCM_VALIDATE_SMOB (1, cs, charset);
1629
1630 res = make_char_set (FUNC_NAME);
1631 p = SCM_CHARSET_DATA (res);
1632 q = SCM_CHARSET_DATA (cs);
1633
1634 charsets_complement (p, q);
1635 return res;
1636 }
1637
1638 #undef FUNC_NAME
1639
1640
1641 SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1,
1642 (SCM rest),
1643 "Return the union of all argument character sets.")
1644 #define FUNC_NAME s_scm_char_set_union
1645 {
1646 int c = 1;
1647 SCM res;
1648 scm_t_char_set *p;
1649
1650 SCM_VALIDATE_REST_ARGUMENT (rest);
1651
1652 res = make_char_set (FUNC_NAME);
1653 p = SCM_CHARSET_DATA (res);
1654 while (!scm_is_null (rest))
1655 {
1656 SCM cs = SCM_CAR (rest);
1657 SCM_VALIDATE_SMOB (c, cs, charset);
1658 c++;
1659 rest = SCM_CDR (rest);
1660
1661
1662 charsets_union (p, (scm_t_char_set *) SCM_SMOB_DATA (cs));
1663 }
1664 return res;
1665 }
1666 #undef FUNC_NAME
1667
1668
1669 SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1,
1670 (SCM rest),
1671 "Return the intersection of all argument character sets.")
1672 #define FUNC_NAME s_scm_char_set_intersection
1673 {
1674 SCM res;
1675
1676 SCM_VALIDATE_REST_ARGUMENT (rest);
1677
1678 if (scm_is_null (rest))
1679 res = make_char_set (FUNC_NAME);
1680 else
1681 {
1682 scm_t_char_set *p;
1683 int argnum = 2;
1684
1685 res = scm_char_set_copy (SCM_CAR (rest));
1686 p = SCM_CHARSET_DATA (res);
1687 rest = SCM_CDR (rest);
1688
1689 while (scm_is_pair (rest))
1690 {
1691 SCM cs = SCM_CAR (rest);
1692 scm_t_char_set *cs_data;
1693
1694 SCM_VALIDATE_SMOB (argnum, cs, charset);
1695 argnum++;
1696 cs_data = SCM_CHARSET_DATA (cs);
1697 rest = SCM_CDR (rest);
1698 charsets_intersection (p, cs_data);
1699 }
1700 }
1701
1702 return res;
1703 }
1704 #undef FUNC_NAME
1705
1706
1707 SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1,
1708 (SCM cs1, SCM rest),
1709 "Return the difference of all argument character sets.")
1710 #define FUNC_NAME s_scm_char_set_difference
1711 {
1712 int c = 2;
1713 SCM res, compl;
1714 scm_t_char_set *p, *q;
1715
1716 SCM_VALIDATE_SMOB (1, cs1, charset);
1717 SCM_VALIDATE_REST_ARGUMENT (rest);
1718
1719 res = scm_char_set_copy (cs1);
1720 p = SCM_CHARSET_DATA (res);
1721 compl = make_char_set (FUNC_NAME);
1722 q = SCM_CHARSET_DATA (compl);
1723 while (!scm_is_null (rest))
1724 {
1725 SCM cs = SCM_CAR (rest);
1726 SCM_VALIDATE_SMOB (c, cs, charset);
1727 c++;
1728 rest = SCM_CDR (rest);
1729
1730 charsets_complement (q, SCM_CHARSET_DATA (cs));
1731 charsets_intersection (p, q);
1732 }
1733 return res;
1734 }
1735 #undef FUNC_NAME
1736
1737
1738 SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1,
1739 (SCM rest),
1740 "Return the exclusive-or of all argument character sets.")
1741 #define FUNC_NAME s_scm_char_set_xor
1742 {
1743 SCM res;
1744
1745 SCM_VALIDATE_REST_ARGUMENT (rest);
1746
1747 if (scm_is_null (rest))
1748 res = make_char_set (FUNC_NAME);
1749 else
1750 {
1751 int argnum = 2;
1752 scm_t_char_set *p;
1753
1754 res = scm_char_set_copy (SCM_CAR (rest));
1755 p = SCM_CHARSET_DATA (res);
1756 rest = SCM_CDR (rest);
1757
1758 while (scm_is_pair (rest))
1759 {
1760 SCM cs = SCM_CAR (rest);
1761 scm_t_char_set *cs_data;
1762
1763 SCM_VALIDATE_SMOB (argnum, cs, charset);
1764 argnum++;
1765 cs_data = SCM_CHARSET_DATA (cs);
1766 rest = SCM_CDR (rest);
1767
1768 charsets_xor (p, cs_data);
1769 }
1770 }
1771 return res;
1772 }
1773 #undef FUNC_NAME
1774
1775
1776 SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1, 0, 1,
1777 (SCM cs1, SCM rest),
1778 "Return the difference and the intersection of all argument\n"
1779 "character sets.")
1780 #define FUNC_NAME s_scm_char_set_diff_plus_intersection
1781 {
1782 int c = 2;
1783 SCM res1, res2;
1784 scm_t_char_set *p, *q;
1785
1786 SCM_VALIDATE_SMOB (1, cs1, charset);
1787 SCM_VALIDATE_REST_ARGUMENT (rest);
1788
1789 res1 = scm_char_set_copy (cs1);
1790 res2 = make_char_set (FUNC_NAME);
1791 p = SCM_CHARSET_DATA (res1);
1792 q = SCM_CHARSET_DATA (res2);
1793 while (!scm_is_null (rest))
1794 {
1795 SCM cs = SCM_CAR (rest);
1796 scm_t_char_set *r;
1797
1798 SCM_VALIDATE_SMOB (c, cs, charset);
1799 c++;
1800 r = SCM_CHARSET_DATA (cs);
1801
1802 charsets_union (q, r);
1803 charsets_intersection (p, r);
1804 rest = SCM_CDR (rest);
1805 }
1806 return scm_values (scm_list_2 (res1, res2));
1807 }
1808 #undef FUNC_NAME
1809
1810
1811 SCM_DEFINE (scm_char_set_complement_x, "char-set-complement!", 1, 0, 0,
1812 (SCM cs), "Return the complement of the character set @var{cs}.")
1813 #define FUNC_NAME s_scm_char_set_complement_x
1814 {
1815 SCM_VALIDATE_SMOB (1, cs, charset);
1816 cs = scm_char_set_complement (cs);
1817 return cs;
1818 }
1819
1820 #undef FUNC_NAME
1821
1822
1823 SCM_DEFINE (scm_char_set_union_x, "char-set-union!", 1, 0, 1,
1824 (SCM cs1, SCM rest),
1825 "Return the union of all argument character sets.")
1826 #define FUNC_NAME s_scm_char_set_union_x
1827 {
1828 SCM_VALIDATE_SMOB (1, cs1, charset);
1829 SCM_VALIDATE_REST_ARGUMENT (rest);
1830
1831 cs1 = scm_char_set_union (scm_cons (cs1, rest));
1832 return cs1;
1833 }
1834
1835 #undef FUNC_NAME
1836
1837
1838 SCM_DEFINE (scm_char_set_intersection_x, "char-set-intersection!", 1, 0, 1,
1839 (SCM cs1, SCM rest),
1840 "Return the intersection of all argument character sets.")
1841 #define FUNC_NAME s_scm_char_set_intersection_x
1842 {
1843 SCM_VALIDATE_SMOB (1, cs1, charset);
1844 SCM_VALIDATE_REST_ARGUMENT (rest);
1845
1846 cs1 = scm_char_set_intersection (scm_cons (cs1, rest));
1847 return cs1;
1848 }
1849
1850 #undef FUNC_NAME
1851
1852
1853 SCM_DEFINE (scm_char_set_difference_x, "char-set-difference!", 1, 0, 1,
1854 (SCM cs1, SCM rest),
1855 "Return the difference of all argument character sets.")
1856 #define FUNC_NAME s_scm_char_set_difference_x
1857 {
1858 SCM_VALIDATE_SMOB (1, cs1, charset);
1859 SCM_VALIDATE_REST_ARGUMENT (rest);
1860
1861 cs1 = scm_char_set_difference (cs1, rest);
1862 return cs1;
1863 }
1864
1865 #undef FUNC_NAME
1866
1867
1868 SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1,
1869 (SCM cs1, SCM rest),
1870 "Return the exclusive-or of all argument character sets.")
1871 #define FUNC_NAME s_scm_char_set_xor_x
1872 {
1873 /* a side-effecting variant should presumably give consistent results:
1874 (define a (char-set #\a))
1875 (char-set-xor a a a) -> char set #\a
1876 (char-set-xor! a a a) -> char set #\a
1877 */
1878 return scm_char_set_xor (scm_cons (cs1, rest));
1879 }
1880
1881 #undef FUNC_NAME
1882
1883
1884 SCM_DEFINE (scm_char_set_diff_plus_intersection_x,
1885 "char-set-diff+intersection!", 2, 0, 1, (SCM cs1, SCM cs2,
1886 SCM rest),
1887 "Return the difference and the intersection of all argument\n"
1888 "character sets.")
1889 #define FUNC_NAME s_scm_char_set_diff_plus_intersection_x
1890 {
1891 SCM diff, intersect;
1892
1893 diff = scm_char_set_difference (cs1, scm_cons (cs2, rest));
1894 intersect =
1895 scm_char_set_intersection (scm_cons (cs1, scm_cons (cs2, rest)));
1896 cs1 = diff;
1897 cs2 = intersect;
1898 return scm_values (scm_list_2 (cs1, cs2));
1899 }
1900
1901 #undef FUNC_NAME
1902 \f
1903
1904 /* Standard character sets. */
1905
1906 SCM scm_char_set_lower_case;
1907 SCM scm_char_set_upper_case;
1908 SCM scm_char_set_title_case;
1909 SCM scm_char_set_letter;
1910 SCM scm_char_set_digit;
1911 SCM scm_char_set_letter_and_digit;
1912 SCM scm_char_set_graphic;
1913 SCM scm_char_set_printing;
1914 SCM scm_char_set_whitespace;
1915 SCM scm_char_set_iso_control;
1916 SCM scm_char_set_punctuation;
1917 SCM scm_char_set_symbol;
1918 SCM scm_char_set_hex_digit;
1919 SCM scm_char_set_blank;
1920 SCM scm_char_set_ascii;
1921 SCM scm_char_set_empty;
1922 SCM scm_char_set_full;
1923
1924
1925 /* Create an empty character set and return it after binding it to NAME. */
1926 static inline SCM
1927 define_charset (const char *name, const scm_t_char_set *p)
1928 {
1929 SCM cs;
1930
1931 SCM_NEWSMOB (cs, scm_tc16_charset, p);
1932 scm_c_define (name, cs);
1933 return scm_permanent_object (cs);
1934 }
1935
1936 #ifdef SCM_CHARSET_DEBUG
1937 SCM_DEFINE (scm_debug_char_set, "debug-char-set", 1, 0, 0,
1938 (SCM charset),
1939 "Print out the internal C structure of @var{charset}.\n")
1940 #define FUNC_NAME s_debug_char_set
1941 {
1942 int i;
1943 scm_t_char_set *cs = SCM_CHARSET_DATA (charset);
1944 fprintf (stderr, "cs %p\n", cs);
1945 fprintf (stderr, "len %d\n", cs->len);
1946 fprintf (stderr, "arr %p\n", cs->ranges);
1947 for (i = 0; i < cs->len; i++)
1948 {
1949 if (cs->ranges[i].lo == cs->ranges[i].hi)
1950 fprintf (stderr, "%04x\n", cs->ranges[i].lo);
1951 else
1952 fprintf (stderr, "%04x..%04x\t[%d]\n",
1953 cs->ranges[i].lo,
1954 cs->ranges[i].hi, cs->ranges[i].hi - cs->ranges[i].lo + 1);
1955 }
1956 printf ("\n");
1957 return SCM_UNSPECIFIED;
1958 }
1959
1960 #undef FUNC_NAME
1961 #endif
1962 \f
1963
1964
1965 void
1966 scm_init_srfi_14 (void)
1967 {
1968 scm_tc16_charset = scm_make_smob_type ("character-set", 0);
1969 scm_set_smob_free (scm_tc16_charset, charset_free);
1970 scm_set_smob_print (scm_tc16_charset, charset_print);
1971
1972 scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0);
1973 scm_set_smob_free (scm_tc16_charset_cursor, charset_cursor_free);
1974 scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print);
1975
1976 scm_char_set_upper_case =
1977 define_charset ("char-set:upper-case", &cs_upper_case);
1978 scm_char_set_lower_case =
1979 define_charset ("char-set:lower-case", &cs_lower_case);
1980 scm_char_set_title_case =
1981 define_charset ("char-set:title-case", &cs_title_case);
1982 scm_char_set_letter = define_charset ("char-set:letter", &cs_letter);
1983 scm_char_set_digit = define_charset ("char-set:digit", &cs_digit);
1984 scm_char_set_letter_and_digit =
1985 define_charset ("char-set:letter+digit", &cs_letter_plus_digit);
1986 scm_char_set_graphic = define_charset ("char-set:graphic", &cs_graphic);
1987 scm_char_set_printing = define_charset ("char-set:printing", &cs_printing);
1988 scm_char_set_whitespace =
1989 define_charset ("char-set:whitespace", &cs_whitespace);
1990 scm_char_set_iso_control =
1991 define_charset ("char-set:iso-control", &cs_iso_control);
1992 scm_char_set_punctuation =
1993 define_charset ("char-set:punctuation", &cs_punctuation);
1994 scm_char_set_symbol = define_charset ("char-set:symbol", &cs_symbol);
1995 scm_char_set_hex_digit =
1996 define_charset ("char-set:hex-digit", &cs_hex_digit);
1997 scm_char_set_blank = define_charset ("char-set:blank", &cs_blank);
1998 scm_char_set_ascii = define_charset ("char-set:ascii", &cs_ascii);
1999 scm_char_set_empty = define_charset ("char-set:empty", &cs_empty);
2000 scm_char_set_full = define_charset ("char-set:full", &cs_full);
2001
2002 #include "libguile/srfi-14.x"
2003 }
2004
2005 /* End of srfi-14.c. */