Fix infinite loop in expander
[bpt/guile.git] / libguile / srfi-14.c
1 /* srfi-14.c --- SRFI-14 procedures for Guile
2 *
3 * Copyright (C) 2001, 2004, 2006, 2007, 2009, 2011 Free Software Foundation, Inc.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public License
7 * as published by the Free Software Foundation; either version 3 of
8 * the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301 USA
19 */
20
21 #ifdef HAVE_CONFIG_H
22 # include <config.h>
23 #endif
24
25
26 #include <string.h>
27 #include <unictype.h>
28
29 #include "libguile.h"
30 #include "libguile/srfi-14.h"
31 #include "libguile/strings.h"
32 #include "libguile/chars.h"
33
34 /* Include the pre-computed standard charset data. */
35 #include "libguile/srfi-14.i.c"
36
37 scm_t_char_range cs_full_ranges[] = {
38 {0x0000, SCM_CODEPOINT_SURROGATE_START - 1}
39 ,
40 {SCM_CODEPOINT_SURROGATE_END + 1, SCM_CODEPOINT_MAX}
41 };
42
43 scm_t_char_set cs_full = {
44 2,
45 cs_full_ranges
46 };
47
48
49 #define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset))
50
51 #define SCM_CHARSET_SET(cs, idx) \
52 scm_i_charset_set (SCM_CHARSET_DATA (cs), idx)
53
54 #define SCM_CHARSET_UNSET(cs, idx) \
55 scm_i_charset_unset (SCM_CHARSET_DATA (cs), idx)
56
57 /* Smob type code for character sets. */
58 int scm_tc16_charset = 0;
59 int scm_tc16_charset_cursor = 0;
60
61 /* True if N exists in charset CS. */
62 int
63 scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n)
64 {
65 size_t i;
66
67 i = 0;
68 while (i < cs->len)
69 {
70 if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi)
71 return 1;
72 i++;
73 }
74
75 return 0;
76 }
77
78 /* Put N into charset CS. */
79 void
80 scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n)
81 {
82 size_t i;
83 size_t len;
84
85 len = cs->len;
86
87 i = 0;
88 while (i < len)
89 {
90 /* Already in this range */
91 if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi)
92 {
93 return;
94 }
95
96 if (n == cs->ranges[i].lo - 1)
97 {
98 /* This char is one below the current range. */
99 if (i > 0 && cs->ranges[i - 1].hi + 1 == n)
100 {
101 /* It is also one above the previous range. */
102 /* This is an impossible condition: in the previous
103 iteration, the test for 'one above the current range'
104 should already have inserted the character here. */
105 abort ();
106 }
107 else
108 {
109 /* Expand the range down by one. */
110 cs->ranges[i].lo = n;
111 return;
112 }
113 }
114 else if (n == cs->ranges[i].hi + 1)
115 {
116 /* This char is one above the current range. */
117 if (i < len - 1 && cs->ranges[i + 1].lo - 1 == n)
118 {
119 /* It is also one below the next range, so combine them. */
120 cs->ranges[i].hi = cs->ranges[i + 1].hi;
121 if (i < len - 2)
122 memmove (cs->ranges + (i + 1), cs->ranges + (i + 2),
123 sizeof (scm_t_char_range) * (len - i - 2));
124 cs->ranges = scm_gc_realloc (cs->ranges,
125 sizeof (scm_t_char_range) * len,
126 sizeof (scm_t_char_range) * (len -
127 1),
128 "character-set");
129 cs->len = len - 1;
130 return;
131 }
132 else
133 {
134 /* Expand the range up by one. */
135 cs->ranges[i].hi = n;
136 return;
137 }
138 }
139 else if (n < cs->ranges[i].lo - 1)
140 {
141 /* This is a new range below the current one. */
142 cs->ranges = scm_gc_realloc (cs->ranges,
143 sizeof (scm_t_char_range) * len,
144 sizeof (scm_t_char_range) * (len + 1),
145 "character-set");
146 memmove (cs->ranges + (i + 1), cs->ranges + i,
147 sizeof (scm_t_char_range) * (len - i));
148 cs->ranges[i].lo = n;
149 cs->ranges[i].hi = n;
150 cs->len = len + 1;
151 return;
152 }
153
154 i++;
155 }
156
157 /* This is a new range above all previous ranges. */
158 if (len == 0)
159 {
160 cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set");
161 }
162 else
163 {
164 cs->ranges = scm_gc_realloc (cs->ranges,
165 sizeof (scm_t_char_range) * len,
166 sizeof (scm_t_char_range) * (len + 1),
167 "character-set");
168 }
169 cs->ranges[len].lo = n;
170 cs->ranges[len].hi = n;
171 cs->len = len + 1;
172
173 return;
174 }
175
176 /* Put LO to HI inclusive into charset CS. */
177 static void
178 scm_i_charset_set_range (scm_t_char_set *cs, scm_t_wchar lo, scm_t_wchar hi)
179 {
180 size_t i;
181
182 i = 0;
183 while (i < cs->len)
184 {
185 /* Already in this range */
186 if (cs->ranges[i].lo <= lo && cs->ranges[i].hi >= hi)
187 return;
188
189 /* cur: +---+
190 new: +---+
191 */
192 if (cs->ranges[i].lo - 1 > hi)
193 {
194 /* Add a new range below the current one. */
195 cs->ranges = scm_gc_realloc (cs->ranges,
196 sizeof (scm_t_char_range) * cs->len,
197 sizeof (scm_t_char_range) * (cs->len + 1),
198 "character-set");
199 memmove (cs->ranges + (i + 1), cs->ranges + i,
200 sizeof (scm_t_char_range) * (cs->len - i));
201 cs->ranges[i].lo = lo;
202 cs->ranges[i].hi = hi;
203 cs->len += 1;
204 return;
205 }
206
207 /* cur: +---+ or +---+ or +---+
208 new: +---+ +---+ +---+
209 */
210 if (cs->ranges[i].lo > lo
211 && (cs->ranges[i].lo - 1 <= hi && cs->ranges[i].hi >= hi))
212 {
213 cs->ranges[i].lo = lo;
214 return;
215 }
216
217 /* cur: +---+ or +---+ or +---+
218 new: +---+ +---+ +---+
219 */
220 else if (cs->ranges[i].hi + 1 >= lo && cs->ranges[i].hi < hi)
221 {
222 if (cs->ranges[i].lo > lo)
223 cs->ranges[i].lo = lo;
224 if (cs->ranges[i].hi < hi)
225 cs->ranges[i].hi = hi;
226 while (i < cs->len - 1)
227 {
228 /* cur: --+ +---+
229 new: -----+
230 */
231 if (cs->ranges[i + 1].lo - 1 > hi)
232 break;
233
234 /* cur: --+ +---+ or --+ +---+ or --+ +--+
235 new: -----+ ------+ ---------+
236 */
237 /* Combine this range with the previous one. */
238 if (cs->ranges[i + 1].hi > hi)
239 cs->ranges[i].hi = cs->ranges[i + 1].hi;
240 if (i + 1 < cs->len)
241 memmove (cs->ranges + i + 1, cs->ranges + i + 2,
242 sizeof (scm_t_char_range) * (cs->len - i - 2));
243 cs->ranges = scm_gc_realloc (cs->ranges,
244 sizeof (scm_t_char_range) * cs->len,
245 sizeof (scm_t_char_range) * (cs->len - 1),
246 "character-set");
247 cs->len -= 1;
248 }
249 return;
250 }
251 i ++;
252 }
253
254 /* This is a new range above all previous ranges. */
255 if (cs->len == 0)
256 {
257 cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set");
258 }
259 else
260 {
261 cs->ranges = scm_gc_realloc (cs->ranges,
262 sizeof (scm_t_char_range) * cs->len,
263 sizeof (scm_t_char_range) * (cs->len + 1),
264 "character-set");
265 }
266 cs->len += 1;
267 cs->ranges[cs->len - 1].lo = lo;
268 cs->ranges[cs->len - 1].hi = hi;
269
270 return;
271 }
272
273 /* If N is in charset CS, remove it. */
274 void
275 scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n)
276 {
277 size_t i;
278 size_t len;
279
280 len = cs->len;
281
282 i = 0;
283 while (i < len)
284 {
285 if (n < cs->ranges[i].lo)
286 /* Not in this set. */
287 return;
288
289 if (n == cs->ranges[i].lo && n == cs->ranges[i].hi)
290 {
291 /* Remove this one-character range. */
292 if (len == 1)
293 {
294 scm_gc_free (cs->ranges,
295 sizeof (scm_t_char_range) * cs->len,
296 "character-set");
297 cs->ranges = NULL;
298 cs->len = 0;
299 return;
300 }
301 else if (i < len - 1)
302 {
303 memmove (cs->ranges + i, cs->ranges + (i + 1),
304 sizeof (scm_t_char_range) * (len - i - 1));
305 cs->ranges = scm_gc_realloc (cs->ranges,
306 sizeof (scm_t_char_range) * len,
307 sizeof (scm_t_char_range) * (len -
308 1),
309 "character-set");
310 cs->len = len - 1;
311 return;
312 }
313 else if (i == len - 1)
314 {
315 cs->ranges = scm_gc_realloc (cs->ranges,
316 sizeof (scm_t_char_range) * len,
317 sizeof (scm_t_char_range) * (len -
318 1),
319 "character-set");
320 cs->len = len - 1;
321 return;
322 }
323 }
324 else if (n == cs->ranges[i].lo)
325 {
326 /* Shrink this range from the left. */
327 cs->ranges[i].lo = n + 1;
328 return;
329 }
330 else if (n == cs->ranges[i].hi)
331 {
332 /* Shrink this range from the right. */
333 cs->ranges[i].hi = n - 1;
334 return;
335 }
336 else if (n > cs->ranges[i].lo && n < cs->ranges[i].hi)
337 {
338 /* Split this range into two pieces. */
339 cs->ranges = scm_gc_realloc (cs->ranges,
340 sizeof (scm_t_char_range) * len,
341 sizeof (scm_t_char_range) * (len + 1),
342 "character-set");
343 if (i < len - 1)
344 memmove (cs->ranges + (i + 2), cs->ranges + (i + 1),
345 sizeof (scm_t_char_range) * (len - i - 1));
346 cs->ranges[i + 1].hi = cs->ranges[i].hi;
347 cs->ranges[i + 1].lo = n + 1;
348 cs->ranges[i].hi = n - 1;
349 cs->len = len + 1;
350 return;
351 }
352
353 i++;
354 }
355
356 /* This value is above all ranges, so do nothing here. */
357 return;
358 }
359
360 static int
361 charsets_equal (scm_t_char_set *a, scm_t_char_set *b)
362 {
363 if (a->len != b->len)
364 return 0;
365
366 if (memcmp (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len) != 0)
367 return 0;
368
369 return 1;
370 }
371
372 /* Return true if every character in A is also in B. */
373 static int
374 charsets_leq (scm_t_char_set *a, scm_t_char_set *b)
375 {
376 size_t i = 0, j = 0;
377 scm_t_wchar alo, ahi;
378
379 if (a->len == 0)
380 return 1;
381 if (b->len == 0)
382 return 0;
383 while (i < a->len)
384 {
385 alo = a->ranges[i].lo;
386 ahi = a->ranges[i].hi;
387 while (b->ranges[j].hi < alo)
388 {
389 if (j < b->len - 1)
390 j++;
391 else
392 return 0;
393 }
394 if (alo < b->ranges[j].lo || ahi > b->ranges[j].hi)
395 return 0;
396 i++;
397 }
398
399 return 1;
400 }
401
402 /* Merge B into A. */
403 static void
404 charsets_union (scm_t_char_set *a, scm_t_char_set *b)
405 {
406 size_t i = 0;
407 scm_t_wchar blo, bhi;
408
409 if (b->len == 0)
410 return;
411
412 if (a->len == 0)
413 {
414 a->len = b->len;
415 a->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * b->len,
416 "character-set");
417 memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * b->len);
418 return;
419 }
420
421 while (i < b->len)
422 {
423 blo = b->ranges[i].lo;
424 bhi = b->ranges[i].hi;
425 scm_i_charset_set_range (a, blo, bhi);
426
427 i++;
428 }
429
430 return;
431 }
432
433 /* Remove elements not both in A and B from A. */
434 static void
435 charsets_intersection (scm_t_char_set *a, scm_t_char_set *b)
436 {
437 size_t i = 0;
438 scm_t_wchar blo, bhi, n;
439 scm_t_char_set *c;
440
441 if (a->len == 0)
442 return;
443
444 if (b->len == 0)
445 {
446 scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len,
447 "character-set");
448 a->len = 0;
449 return;
450 }
451
452 c = (scm_t_char_set *) scm_malloc (sizeof (scm_t_char_set));
453 c->len = 0;
454 c->ranges = NULL;
455
456 while (i < b->len)
457 {
458 blo = b->ranges[i].lo;
459 bhi = b->ranges[i].hi;
460 for (n = blo; n <= bhi; n++)
461 if (scm_i_charset_get (a, n))
462 scm_i_charset_set (c, n);
463 i++;
464 }
465 scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len,
466 "character-set");
467
468 a->len = c->len;
469 if (c->len != 0)
470 a->ranges = c->ranges;
471 else
472 a->ranges = NULL;
473 free (c);
474 return;
475 }
476
477 #define SCM_ADD_RANGE(low, high) \
478 do { \
479 p->ranges[idx].lo = (low); \
480 p->ranges[idx++].hi = (high); \
481 } while (0)
482 #define SCM_ADD_RANGE_SKIP_SURROGATES(low, high) \
483 do { \
484 p->ranges[idx].lo = (low); \
485 p->ranges[idx++].hi = SCM_CODEPOINT_SURROGATE_START - 1; \
486 p->ranges[idx].lo = SCM_CODEPOINT_SURROGATE_END + 1; \
487 p->ranges[idx++].hi = (high); \
488 } while (0)
489
490
491
492 /* Make P the compelement of Q. */
493 static void
494 charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
495 {
496 int k, idx;
497
498 idx = 0;
499 if (q->len == 0)
500 {
501 /* Fill with all valid codepoints. */
502 p->len = 2;
503 p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
504 "character-set");
505 SCM_ADD_RANGE_SKIP_SURROGATES (0, SCM_CODEPOINT_MAX);
506 return;
507 }
508
509 if (p->len > 0)
510 scm_gc_free (p->ranges, sizeof (scm_t_char_set) * p->len,
511 "character-set");
512
513 /* Count the number of ranges needed for the output. */
514 p->len = 0;
515 if (q->ranges[0].lo > 0)
516 p->len++;
517 if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
518 p->len++;
519 p->len += q->len;
520 p->ranges =
521 (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * p->len,
522 "character-set");
523 if (q->ranges[0].lo > 0)
524 {
525 if (q->ranges[0].lo > SCM_CODEPOINT_SURROGATE_END)
526 SCM_ADD_RANGE_SKIP_SURROGATES (0, q->ranges[0].lo - 1);
527 else
528 SCM_ADD_RANGE (0, q->ranges[0].lo - 1);
529 }
530 for (k = 1; k < q->len; k++)
531 {
532 if (q->ranges[k - 1].hi < SCM_CODEPOINT_SURROGATE_START
533 && q->ranges[k].lo - 1 > SCM_CODEPOINT_SURROGATE_END)
534 SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
535 else
536 SCM_ADD_RANGE (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
537 }
538 if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
539 {
540 if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_SURROGATE_START)
541 SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
542 else
543 SCM_ADD_RANGE (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
544 }
545 return;
546 }
547 #undef SCM_ADD_RANGE
548 #undef SCM_ADD_RANGE_SKIP_SURROGATES
549
550 /* Replace A with elements only found in one of A or B. */
551 static void
552 charsets_xor (scm_t_char_set *a, scm_t_char_set *b)
553 {
554 size_t i = 0;
555 scm_t_wchar blo, bhi, n;
556
557 if (b->len == 0)
558 {
559 return;
560 }
561
562 if (a->len == 0)
563 {
564 a->ranges =
565 (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) *
566 b->len, "character-set");
567 a->len = b->len;
568 memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len);
569 return;
570 }
571
572 while (i < b->len)
573 {
574 blo = b->ranges[i].lo;
575 bhi = b->ranges[i].hi;
576 for (n = blo; n <= bhi; n++)
577 {
578 if (scm_i_charset_get (a, n))
579 scm_i_charset_unset (a, n);
580 else
581 scm_i_charset_set (a, n);
582 }
583
584 i++;
585 }
586 return;
587 }
588
589 /* Smob print hook for character sets. */
590 static int
591 charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED)
592 {
593 size_t i;
594 int first = 1;
595 scm_t_char_set *p;
596 const size_t max_ranges_to_print = 50;
597
598 p = SCM_CHARSET_DATA (charset);
599
600 scm_puts_unlocked ("#<charset {", port);
601 for (i = 0; i < p->len; i++)
602 {
603 if (first)
604 first = 0;
605 else
606 scm_puts_unlocked (" ", port);
607 scm_write (SCM_MAKE_CHAR (p->ranges[i].lo), port);
608 if (p->ranges[i].lo != p->ranges[i].hi)
609 {
610 scm_puts_unlocked ("..", port);
611 scm_write (SCM_MAKE_CHAR (p->ranges[i].hi), port);
612 }
613 if (i >= max_ranges_to_print)
614 {
615 /* Too many to print here. Quit early. */
616 scm_puts_unlocked (" ...", port);
617 break;
618 }
619 }
620 scm_puts_unlocked ("}>", port);
621 return 1;
622 }
623
624 /* Smob print hook for character sets cursors. */
625 static int
626 charset_cursor_print (SCM cursor, SCM port,
627 scm_print_state *pstate SCM_UNUSED)
628 {
629 scm_t_char_set_cursor *cur;
630
631 cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
632
633 scm_puts_unlocked ("#<charset-cursor ", port);
634 if (cur->range == (size_t) (-1))
635 scm_puts_unlocked ("(empty)", port);
636 else
637 {
638 scm_write (scm_from_size_t (cur->range), port);
639 scm_puts_unlocked (":", port);
640 scm_write (scm_from_int32 (cur->n), port);
641 }
642 scm_puts_unlocked (">", port);
643 return 1;
644 }
645
646
647 /* Create a new, empty character set. */
648 static SCM
649 make_char_set (const char *func_name)
650 {
651 scm_t_char_set *p;
652
653 p = scm_gc_malloc (sizeof (scm_t_char_set), "character-set");
654 memset (p, 0, sizeof (scm_t_char_set));
655 SCM_RETURN_NEWSMOB (scm_tc16_charset, p);
656 }
657
658
659 SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0,
660 (SCM obj),
661 "Return @code{#t} if @var{obj} is a character set, @code{#f}\n"
662 "otherwise.")
663 #define FUNC_NAME s_scm_char_set_p
664 {
665 return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj));
666 }
667 #undef FUNC_NAME
668
669
670 SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1,
671 (SCM char_sets),
672 "Return @code{#t} if all given character sets are equal.")
673 #define FUNC_NAME s_scm_char_set_eq
674 {
675 int argnum = 1;
676 scm_t_char_set *cs1_data = NULL;
677
678 SCM_VALIDATE_REST_ARGUMENT (char_sets);
679
680 while (!scm_is_null (char_sets))
681 {
682 SCM csi = SCM_CAR (char_sets);
683 scm_t_char_set *csi_data;
684
685 SCM_VALIDATE_SMOB (argnum, csi, charset);
686 argnum++;
687 csi_data = SCM_CHARSET_DATA (csi);
688 if (cs1_data == NULL)
689 cs1_data = csi_data;
690 else if (!charsets_equal (cs1_data, csi_data))
691 return SCM_BOOL_F;
692 char_sets = SCM_CDR (char_sets);
693 }
694 return SCM_BOOL_T;
695 }
696 #undef FUNC_NAME
697
698
699 SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1,
700 (SCM char_sets),
701 "Return @code{#t} if every character set @var{char_set}i is a subset\n"
702 "of character set @var{char_set}i+1.")
703 #define FUNC_NAME s_scm_char_set_leq
704 {
705 int argnum = 1;
706 scm_t_char_set *prev_data = NULL;
707
708 SCM_VALIDATE_REST_ARGUMENT (char_sets);
709
710 while (!scm_is_null (char_sets))
711 {
712 SCM csi = SCM_CAR (char_sets);
713 scm_t_char_set *csi_data;
714
715 SCM_VALIDATE_SMOB (argnum, csi, charset);
716 argnum++;
717 csi_data = SCM_CHARSET_DATA (csi);
718 if (prev_data)
719 {
720 if (!charsets_leq (prev_data, csi_data))
721 return SCM_BOOL_F;
722 }
723 prev_data = csi_data;
724 char_sets = SCM_CDR (char_sets);
725 }
726 return SCM_BOOL_T;
727 }
728 #undef FUNC_NAME
729
730
731 SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0,
732 (SCM cs, SCM bound),
733 "Compute a hash value for the character set @var{cs}. If\n"
734 "@var{bound} is given and non-zero, it restricts the\n"
735 "returned value to the range 0 @dots{} @var{bound} - 1.")
736 #define FUNC_NAME s_scm_char_set_hash
737 {
738 const unsigned long default_bnd = 871;
739 unsigned long bnd;
740 scm_t_char_set *p;
741 unsigned long val = 0;
742 int k;
743 scm_t_wchar c;
744
745 SCM_VALIDATE_SMOB (1, cs, charset);
746
747 if (SCM_UNBNDP (bound))
748 bnd = default_bnd;
749 else
750 {
751 bnd = scm_to_ulong (bound);
752 if (bnd == 0)
753 bnd = default_bnd;
754 }
755
756 p = SCM_CHARSET_DATA (cs);
757 for (k = 0; k < p->len; k++)
758 {
759 for (c = p->ranges[k].lo; c <= p->ranges[k].hi; c++)
760 val = c + (val << 1);
761 }
762 return scm_from_ulong (val % bnd);
763 }
764 #undef FUNC_NAME
765
766
767 SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0,
768 (SCM cs), "Return a cursor into the character set @var{cs}.")
769 #define FUNC_NAME s_scm_char_set_cursor
770 {
771 scm_t_char_set *cs_data;
772 scm_t_char_set_cursor *cur_data;
773
774 SCM_VALIDATE_SMOB (1, cs, charset);
775 cs_data = SCM_CHARSET_DATA (cs);
776 cur_data =
777 (scm_t_char_set_cursor *) scm_gc_malloc (sizeof (scm_t_char_set_cursor),
778 "charset-cursor");
779 if (cs_data->len == 0)
780 {
781 cur_data->range = (size_t) (-1);
782 cur_data->n = 0;
783 }
784 else
785 {
786 cur_data->range = 0;
787 cur_data->n = cs_data->ranges[0].lo;
788 }
789 SCM_RETURN_NEWSMOB (scm_tc16_charset_cursor, cur_data);
790 }
791 #undef FUNC_NAME
792
793
794 SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0,
795 (SCM cs, SCM cursor),
796 "Return the character at the current cursor position\n"
797 "@var{cursor} in the character set @var{cs}. It is an error to\n"
798 "pass a cursor for which @code{end-of-char-set?} returns true.")
799 #define FUNC_NAME s_scm_char_set_ref
800 {
801 scm_t_char_set *cs_data;
802 scm_t_char_set_cursor *cur_data;
803 size_t i;
804
805 SCM_VALIDATE_SMOB (1, cs, charset);
806 SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
807
808 cs_data = SCM_CHARSET_DATA (cs);
809 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
810
811 /* Validate that this cursor is still true. */
812 i = cur_data->range;
813 if (i == (size_t) (-1)
814 || i >= cs_data->len
815 || cur_data->n < cs_data->ranges[i].lo
816 || cur_data->n > cs_data->ranges[i].hi)
817 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
818 return SCM_MAKE_CHAR (cur_data->n);
819 }
820 #undef FUNC_NAME
821
822
823 SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0,
824 (SCM cs, SCM cursor),
825 "Advance the character set cursor @var{cursor} to the next\n"
826 "character in the character set @var{cs}. It is an error if the\n"
827 "cursor given satisfies @code{end-of-char-set?}.")
828 #define FUNC_NAME s_scm_char_set_cursor_next
829 {
830 scm_t_char_set *cs_data;
831 scm_t_char_set_cursor *cur_data;
832 size_t i;
833
834 SCM_VALIDATE_SMOB (1, cs, charset);
835 SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
836
837 cs_data = SCM_CHARSET_DATA (cs);
838 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
839
840 /* Validate that this cursor is still true. */
841 i = cur_data->range;
842 if (i == (size_t) (-1)
843 || i >= cs_data->len
844 || cur_data->n < cs_data->ranges[i].lo
845 || cur_data->n > cs_data->ranges[i].hi)
846 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
847 /* Increment the cursor. */
848 if (cur_data->n == cs_data->ranges[i].hi)
849 {
850 if (i + 1 < cs_data->len)
851 {
852 cur_data->range = i + 1;
853 cur_data->n = cs_data->ranges[i + 1].lo;
854 }
855 else
856 {
857 /* This is the end of the road. */
858 cur_data->range = (size_t) (-1);
859 cur_data->n = 0;
860 }
861 }
862 else
863 {
864 cur_data->n = cur_data->n + 1;
865 }
866
867 return cursor;
868 }
869 #undef FUNC_NAME
870
871
872 SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0,
873 (SCM cursor),
874 "Return @code{#t} if @var{cursor} has reached the end of a\n"
875 "character set, @code{#f} otherwise.")
876 #define FUNC_NAME s_scm_end_of_char_set_p
877 {
878 scm_t_char_set_cursor *cur_data;
879 SCM_VALIDATE_SMOB (1, cursor, charset_cursor);
880
881 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
882 if (cur_data->range == (size_t) (-1))
883 return SCM_BOOL_T;
884
885 return SCM_BOOL_F;
886 }
887 #undef FUNC_NAME
888
889
890 SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0,
891 (SCM kons, SCM knil, SCM cs),
892 "Fold the procedure @var{kons} over the character set @var{cs},\n"
893 "initializing it with @var{knil}.")
894 #define FUNC_NAME s_scm_char_set_fold
895 {
896 scm_t_char_set *cs_data;
897 int k;
898 scm_t_wchar n;
899
900 SCM_VALIDATE_PROC (1, kons);
901 SCM_VALIDATE_SMOB (3, cs, charset);
902
903 cs_data = SCM_CHARSET_DATA (cs);
904
905 if (cs_data->len == 0)
906 return knil;
907
908 for (k = 0; k < cs_data->len; k++)
909 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
910 {
911 knil = scm_call_2 (kons, SCM_MAKE_CHAR (n), knil);
912 }
913 return knil;
914 }
915 #undef FUNC_NAME
916
917
918 SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0,
919 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
920 "This is a fundamental constructor for character sets.\n"
921 "@itemize @bullet\n"
922 "@item @var{g} is used to generate a series of ``seed'' values\n"
923 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
924 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
925 "@item @var{p} tells us when to stop -- when it returns true\n"
926 "when applied to one of the seed values.\n"
927 "@item @var{f} maps each seed value to a character. These\n"
928 "characters are added to the base character set @var{base_cs} to\n"
929 "form the result; @var{base_cs} defaults to the empty set.\n"
930 "@end itemize")
931 #define FUNC_NAME s_scm_char_set_unfold
932 {
933 SCM result, tmp;
934
935 SCM_VALIDATE_PROC (1, p);
936 SCM_VALIDATE_PROC (2, f);
937 SCM_VALIDATE_PROC (3, g);
938 if (!SCM_UNBNDP (base_cs))
939 {
940 SCM_VALIDATE_SMOB (5, base_cs, charset);
941 result = scm_char_set_copy (base_cs);
942 }
943 else
944 result = make_char_set (FUNC_NAME);
945
946 tmp = scm_call_1 (p, seed);
947 while (scm_is_false (tmp))
948 {
949 SCM ch = scm_call_1 (f, seed);
950 if (!SCM_CHARP (ch))
951 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
952 SCM_CHARSET_SET (result, SCM_CHAR (ch));
953
954 seed = scm_call_1 (g, seed);
955 tmp = scm_call_1 (p, seed);
956 }
957 return result;
958 }
959 #undef FUNC_NAME
960
961
962 SCM_DEFINE (scm_char_set_unfold_x, "char-set-unfold!", 5, 0, 0,
963 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
964 "This is a fundamental constructor for character sets.\n"
965 "@itemize @bullet\n"
966 "@item @var{g} is used to generate a series of ``seed'' values\n"
967 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
968 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
969 "@item @var{p} tells us when to stop -- when it returns true\n"
970 "when applied to one of the seed values.\n"
971 "@item @var{f} maps each seed value to a character. These\n"
972 "characters are added to the base character set @var{base_cs} to\n"
973 "form the result; @var{base_cs} defaults to the empty set.\n"
974 "@end itemize")
975 #define FUNC_NAME s_scm_char_set_unfold_x
976 {
977 SCM tmp;
978
979 SCM_VALIDATE_PROC (1, p);
980 SCM_VALIDATE_PROC (2, f);
981 SCM_VALIDATE_PROC (3, g);
982 SCM_VALIDATE_SMOB (5, base_cs, charset);
983
984 tmp = scm_call_1 (p, seed);
985 while (scm_is_false (tmp))
986 {
987 SCM ch = scm_call_1 (f, seed);
988 if (!SCM_CHARP (ch))
989 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
990 SCM_CHARSET_SET (base_cs, SCM_CHAR (ch));
991
992 seed = scm_call_1 (g, seed);
993 tmp = scm_call_1 (p, seed);
994 }
995 return base_cs;
996 }
997 #undef FUNC_NAME
998
999
1000 SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0,
1001 (SCM proc, SCM cs),
1002 "Apply @var{proc} to every character in the character set\n"
1003 "@var{cs}. The return value is not specified.")
1004 #define FUNC_NAME s_scm_char_set_for_each
1005 {
1006 scm_t_char_set *cs_data;
1007 int k;
1008 scm_t_wchar n;
1009
1010 SCM_VALIDATE_PROC (1, proc);
1011 SCM_VALIDATE_SMOB (2, cs, charset);
1012
1013 cs_data = SCM_CHARSET_DATA (cs);
1014
1015 if (cs_data->len == 0)
1016 return SCM_UNSPECIFIED;
1017
1018 for (k = 0; k < cs_data->len; k++)
1019 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1020 {
1021 scm_call_1 (proc, SCM_MAKE_CHAR (n));
1022 }
1023
1024 return SCM_UNSPECIFIED;
1025 }
1026 #undef FUNC_NAME
1027
1028
1029 SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0,
1030 (SCM proc, SCM cs),
1031 "Map the procedure @var{proc} over every character in @var{cs}.\n"
1032 "@var{proc} must be a character -> character procedure.")
1033 #define FUNC_NAME s_scm_char_set_map
1034 {
1035 SCM result;
1036 int k;
1037 scm_t_char_set *cs_data;
1038 scm_t_wchar n;
1039
1040 SCM_VALIDATE_PROC (1, proc);
1041 SCM_VALIDATE_SMOB (2, cs, charset);
1042
1043 result = make_char_set (FUNC_NAME);
1044 cs_data = SCM_CHARSET_DATA (cs);
1045
1046 if (cs_data->len == 0)
1047 return result;
1048
1049 for (k = 0; k < cs_data->len; k++)
1050 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1051 {
1052 SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (n));
1053 if (!SCM_CHARP (ch))
1054 SCM_MISC_ERROR ("procedure ~S returned non-char",
1055 scm_list_1 (proc));
1056 SCM_CHARSET_SET (result, SCM_CHAR (ch));
1057 }
1058 return result;
1059 }
1060 #undef FUNC_NAME
1061
1062
1063 SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0,
1064 (SCM cs),
1065 "Return a newly allocated character set containing all\n"
1066 "characters in @var{cs}.")
1067 #define FUNC_NAME s_scm_char_set_copy
1068 {
1069 SCM ret;
1070 scm_t_char_set *p1, *p2;
1071
1072 SCM_VALIDATE_SMOB (1, cs, charset);
1073 ret = make_char_set (FUNC_NAME);
1074 p1 = SCM_CHARSET_DATA (cs);
1075 p2 = SCM_CHARSET_DATA (ret);
1076 p2->len = p1->len;
1077
1078 if (p1->len == 0)
1079 p2->ranges = NULL;
1080 else
1081 {
1082 p2->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * p1->len,
1083 "character-set");
1084 memcpy (p2->ranges, p1->ranges, sizeof (scm_t_char_range) * p1->len);
1085 }
1086
1087 return ret;
1088 }
1089 #undef FUNC_NAME
1090
1091
1092 SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1,
1093 (SCM rest),
1094 "Return a character set containing all given characters.")
1095 #define FUNC_NAME s_scm_char_set
1096 {
1097 SCM cs;
1098 int argnum = 1;
1099
1100 SCM_VALIDATE_REST_ARGUMENT (rest);
1101 cs = make_char_set (FUNC_NAME);
1102 while (!scm_is_null (rest))
1103 {
1104 scm_t_wchar c;
1105
1106 SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c);
1107 argnum++;
1108 rest = SCM_CDR (rest);
1109 SCM_CHARSET_SET (cs, c);
1110 }
1111 return cs;
1112 }
1113 #undef FUNC_NAME
1114
1115
1116 SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0,
1117 (SCM list, SCM base_cs),
1118 "Convert the character list @var{list} to a character set. If\n"
1119 "the character set @var{base_cs} is given, the character in this\n"
1120 "set are also included in the result.")
1121 #define FUNC_NAME s_scm_list_to_char_set
1122 {
1123 SCM cs;
1124
1125 SCM_VALIDATE_LIST (1, list);
1126 if (SCM_UNBNDP (base_cs))
1127 cs = make_char_set (FUNC_NAME);
1128 else
1129 {
1130 SCM_VALIDATE_SMOB (2, base_cs, charset);
1131 cs = scm_char_set_copy (base_cs);
1132 }
1133 while (!scm_is_null (list))
1134 {
1135 SCM chr = SCM_CAR (list);
1136 scm_t_wchar c;
1137
1138 SCM_VALIDATE_CHAR_COPY (0, chr, c);
1139 list = SCM_CDR (list);
1140
1141
1142 SCM_CHARSET_SET (cs, c);
1143 }
1144 return cs;
1145 }
1146 #undef FUNC_NAME
1147
1148
1149 SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0,
1150 (SCM list, SCM base_cs),
1151 "Convert the character list @var{list} to a character set. The\n"
1152 "characters are added to @var{base_cs} and @var{base_cs} is\n"
1153 "returned.")
1154 #define FUNC_NAME s_scm_list_to_char_set_x
1155 {
1156 SCM_VALIDATE_LIST (1, list);
1157 SCM_VALIDATE_SMOB (2, base_cs, charset);
1158 while (!scm_is_null (list))
1159 {
1160 SCM chr = SCM_CAR (list);
1161 scm_t_wchar c;
1162
1163 SCM_VALIDATE_CHAR_COPY (0, chr, c);
1164 list = SCM_CDR (list);
1165
1166 SCM_CHARSET_SET (base_cs, c);
1167 }
1168 return base_cs;
1169 }
1170 #undef FUNC_NAME
1171
1172
1173 SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0,
1174 (SCM str, SCM base_cs),
1175 "Convert the string @var{str} to a character set. If the\n"
1176 "character set @var{base_cs} is given, the characters in this\n"
1177 "set are also included in the result.")
1178 #define FUNC_NAME s_scm_string_to_char_set
1179 {
1180 SCM cs;
1181 size_t k = 0, len;
1182
1183 SCM_VALIDATE_STRING (1, str);
1184 if (SCM_UNBNDP (base_cs))
1185 cs = make_char_set (FUNC_NAME);
1186 else
1187 {
1188 SCM_VALIDATE_SMOB (2, base_cs, charset);
1189 cs = scm_char_set_copy (base_cs);
1190 }
1191 len = scm_i_string_length (str);
1192 while (k < len)
1193 {
1194 scm_t_wchar c = scm_i_string_ref (str, k++);
1195 SCM_CHARSET_SET (cs, c);
1196 }
1197 scm_remember_upto_here_1 (str);
1198 return cs;
1199 }
1200 #undef FUNC_NAME
1201
1202
1203 SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0,
1204 (SCM str, SCM base_cs),
1205 "Convert the string @var{str} to a character set. The\n"
1206 "characters from the string are added to @var{base_cs}, and\n"
1207 "@var{base_cs} is returned.")
1208 #define FUNC_NAME s_scm_string_to_char_set_x
1209 {
1210 size_t k = 0, len;
1211
1212 SCM_VALIDATE_STRING (1, str);
1213 SCM_VALIDATE_SMOB (2, base_cs, charset);
1214 len = scm_i_string_length (str);
1215 while (k < len)
1216 {
1217 scm_t_wchar c = scm_i_string_ref (str, k++);
1218 SCM_CHARSET_SET (base_cs, c);
1219 }
1220 scm_remember_upto_here_1 (str);
1221 return base_cs;
1222 }
1223 #undef FUNC_NAME
1224
1225
1226 SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0,
1227 (SCM pred, SCM cs, SCM base_cs),
1228 "Return a character set containing every character from @var{cs}\n"
1229 "so that it satisfies @var{pred}. If provided, the characters\n"
1230 "from @var{base_cs} are added to the result.")
1231 #define FUNC_NAME s_scm_char_set_filter
1232 {
1233 SCM ret;
1234 int k;
1235 scm_t_wchar n;
1236 scm_t_char_set *p;
1237
1238 SCM_VALIDATE_PROC (1, pred);
1239 SCM_VALIDATE_SMOB (2, cs, charset);
1240 if (!SCM_UNBNDP (base_cs))
1241 {
1242 SCM_VALIDATE_SMOB (3, base_cs, charset);
1243 ret = scm_char_set_copy (base_cs);
1244 }
1245 else
1246 ret = make_char_set (FUNC_NAME);
1247
1248 p = SCM_CHARSET_DATA (cs);
1249
1250 if (p->len == 0)
1251 return ret;
1252
1253 for (k = 0; k < p->len; k++)
1254 for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
1255 {
1256 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1257
1258 if (scm_is_true (res))
1259 SCM_CHARSET_SET (ret, n);
1260 }
1261 return ret;
1262 }
1263 #undef FUNC_NAME
1264
1265
1266 SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0,
1267 (SCM pred, SCM cs, SCM base_cs),
1268 "Return a character set containing every character from @var{cs}\n"
1269 "so that it satisfies @var{pred}. The characters are added to\n"
1270 "@var{base_cs} and @var{base_cs} is returned.")
1271 #define FUNC_NAME s_scm_char_set_filter_x
1272 {
1273 int k;
1274 scm_t_wchar n;
1275 scm_t_char_set *p;
1276
1277 SCM_VALIDATE_PROC (1, pred);
1278 SCM_VALIDATE_SMOB (2, cs, charset);
1279 SCM_VALIDATE_SMOB (3, base_cs, charset);
1280 p = SCM_CHARSET_DATA (cs);
1281 if (p->len == 0)
1282 return base_cs;
1283
1284 for (k = 0; k < p->len; k++)
1285 for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
1286 {
1287 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1288
1289 if (scm_is_true (res))
1290 SCM_CHARSET_SET (base_cs, n);
1291 }
1292 return base_cs;
1293 }
1294 #undef FUNC_NAME
1295
1296
1297 /* Return a character set containing all the characters from [LOWER,UPPER),
1298 giving range errors if ERROR, adding chars from BASE_CS, and recycling
1299 BASE_CS if REUSE is true. */
1300 static SCM
1301 scm_i_ucs_range_to_char_set (const char *FUNC_NAME, SCM lower, SCM upper,
1302 SCM error, SCM base_cs, int reuse)
1303 {
1304 SCM cs;
1305 size_t clower, cupper;
1306
1307 clower = scm_to_size_t (lower);
1308 cupper = scm_to_size_t (upper) - 1;
1309 SCM_ASSERT_RANGE (2, upper, cupper >= clower);
1310 if (!SCM_UNBNDP (error))
1311 {
1312 if (scm_is_true (error))
1313 {
1314 SCM_ASSERT_RANGE (1, lower, SCM_IS_UNICODE_CHAR (clower));
1315 SCM_ASSERT_RANGE (2, upper, SCM_IS_UNICODE_CHAR (cupper));
1316 if (clower < SCM_CODEPOINT_SURROGATE_START
1317 && cupper > SCM_CODEPOINT_SURROGATE_END)
1318 scm_error(scm_out_of_range_key,
1319 FUNC_NAME, "invalid range - contains surrogate characters: ~S to ~S",
1320 scm_list_2 (lower, upper), scm_list_1 (upper));
1321 }
1322 }
1323
1324 if (SCM_UNBNDP (base_cs))
1325 cs = make_char_set (FUNC_NAME);
1326 else
1327 {
1328 SCM_VALIDATE_SMOB (3, base_cs, charset);
1329 if (reuse)
1330 cs = base_cs;
1331 else
1332 cs = scm_char_set_copy (base_cs);
1333 }
1334
1335 if ((clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END)
1336 && (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END))
1337 return cs;
1338
1339 if (clower > SCM_CODEPOINT_MAX)
1340 clower = SCM_CODEPOINT_MAX;
1341 if (clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END)
1342 clower = SCM_CODEPOINT_SURROGATE_END + 1;
1343 if (cupper > SCM_CODEPOINT_MAX)
1344 cupper = SCM_CODEPOINT_MAX;
1345 if (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END)
1346 cupper = SCM_CODEPOINT_SURROGATE_START - 1;
1347 if (clower < SCM_CODEPOINT_SURROGATE_START && cupper > SCM_CODEPOINT_SURROGATE_END)
1348 {
1349 scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, SCM_CODEPOINT_SURROGATE_START - 1);
1350 scm_i_charset_set_range (SCM_CHARSET_DATA (cs), SCM_CODEPOINT_SURROGATE_END + 1, cupper);
1351 }
1352 else
1353 scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, cupper);
1354 return cs;
1355 }
1356
1357 SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0,
1358 (SCM lower, SCM upper, SCM error, SCM base_cs),
1359 "Return a character set containing all characters whose\n"
1360 "character codes lie in the half-open range\n"
1361 "[@var{lower},@var{upper}).\n"
1362 "\n"
1363 "If @var{error} is a true value, an error is signalled if the\n"
1364 "specified range contains characters which are not valid\n"
1365 "Unicode code points. If @var{error} is @code{#f},\n"
1366 "these characters are silently left out of the resulting\n"
1367 "character set.\n"
1368 "\n"
1369 "The characters in @var{base_cs} are added to the result, if\n"
1370 "given.")
1371 #define FUNC_NAME s_scm_ucs_range_to_char_set
1372 {
1373 return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper,
1374 error, base_cs, 0);
1375 }
1376 #undef FUNC_NAME
1377
1378
1379 SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0,
1380 (SCM lower, SCM upper, SCM error, SCM base_cs),
1381 "Return a character set containing all characters whose\n"
1382 "character codes lie in the half-open range\n"
1383 "[@var{lower},@var{upper}).\n"
1384 "\n"
1385 "If @var{error} is a true value, an error is signalled if the\n"
1386 "specified range contains characters which are not contained in\n"
1387 "the implemented character range. If @var{error} is @code{#f},\n"
1388 "these characters are silently left out of the resulting\n"
1389 "character set.\n"
1390 "\n"
1391 "The characters are added to @var{base_cs} and @var{base_cs} is\n"
1392 "returned.")
1393 #define FUNC_NAME s_scm_ucs_range_to_char_set_x
1394 {
1395 SCM_VALIDATE_SMOB (4, base_cs, charset);
1396 return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper,
1397 error, base_cs, 1);
1398 }
1399 #undef FUNC_NAME
1400
1401 SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0,
1402 (SCM x),
1403 "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.")
1404 #define FUNC_NAME s_scm_to_char_set
1405 {
1406 if (scm_is_string (x))
1407 return scm_string_to_char_set (x, SCM_UNDEFINED);
1408 else if (SCM_CHARP (x))
1409 return scm_char_set (scm_list_1 (x));
1410 else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x))
1411 return x;
1412 else
1413 scm_wrong_type_arg (NULL, 0, x);
1414 }
1415 #undef FUNC_NAME
1416
1417 SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0,
1418 (SCM cs),
1419 "Return the number of elements in character set @var{cs}.")
1420 #define FUNC_NAME s_scm_char_set_size
1421 {
1422 int k, count = 0;
1423 scm_t_char_set *cs_data;
1424
1425 SCM_VALIDATE_SMOB (1, cs, charset);
1426 cs_data = SCM_CHARSET_DATA (cs);
1427
1428 if (cs_data->len == 0)
1429 return scm_from_int (0);
1430
1431 for (k = 0; k < cs_data->len; k++)
1432 count += cs_data->ranges[k].hi - cs_data->ranges[k].lo + 1;
1433
1434 return scm_from_int (count);
1435 }
1436 #undef FUNC_NAME
1437
1438
1439 SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0,
1440 (SCM pred, SCM cs),
1441 "Return the number of the elements int the character set\n"
1442 "@var{cs} which satisfy the predicate @var{pred}.")
1443 #define FUNC_NAME s_scm_char_set_count
1444 {
1445 int k, count = 0;
1446 scm_t_wchar n;
1447 scm_t_char_set *cs_data;
1448
1449 SCM_VALIDATE_PROC (1, pred);
1450 SCM_VALIDATE_SMOB (2, cs, charset);
1451 cs_data = SCM_CHARSET_DATA (cs);
1452 if (cs_data->len == 0)
1453 return scm_from_int (0);
1454
1455 for (k = 0; k < cs_data->len; k++)
1456 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1457 {
1458 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1459 if (scm_is_true (res))
1460 count++;
1461 }
1462 return SCM_I_MAKINUM (count);
1463 }
1464 #undef FUNC_NAME
1465
1466
1467 SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0,
1468 (SCM cs),
1469 "Return a list containing the elements of the character set\n"
1470 "@var{cs}.")
1471 #define FUNC_NAME s_scm_char_set_to_list
1472 {
1473 int k;
1474 scm_t_wchar n;
1475 SCM result = SCM_EOL;
1476 scm_t_char_set *p;
1477
1478 SCM_VALIDATE_SMOB (1, cs, charset);
1479 p = SCM_CHARSET_DATA (cs);
1480 if (p->len == 0)
1481 return SCM_EOL;
1482
1483 for (k = p->len - 1; k >= 0; k--)
1484 for (n = p->ranges[k].hi; n >= p->ranges[k].lo; n--)
1485 result = scm_cons (SCM_MAKE_CHAR (n), result);
1486 return result;
1487 }
1488 #undef FUNC_NAME
1489
1490
1491 SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0,
1492 (SCM cs),
1493 "Return a string containing the elements of the character set\n"
1494 "@var{cs}. The order in which the characters are placed in the\n"
1495 "string is not defined.")
1496 #define FUNC_NAME s_scm_char_set_to_string
1497 {
1498 int k;
1499 int count = 0;
1500 int idx = 0;
1501 int wide = 0;
1502 SCM result;
1503 scm_t_wchar n;
1504 scm_t_char_set *cs_data;
1505 char *buf;
1506 scm_t_wchar *wbuf;
1507
1508 SCM_VALIDATE_SMOB (1, cs, charset);
1509 cs_data = SCM_CHARSET_DATA (cs);
1510 if (cs_data->len == 0)
1511 return scm_nullstr;
1512
1513 if (cs_data->ranges[cs_data->len - 1].hi > 255)
1514 wide = 1;
1515
1516 count = scm_to_int (scm_char_set_size (cs));
1517 if (wide)
1518 result = scm_i_make_wide_string (count, &wbuf, 0);
1519 else
1520 result = scm_i_make_string (count, &buf, 0);
1521
1522 for (k = 0; k < cs_data->len; k++)
1523 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1524 {
1525 if (wide)
1526 wbuf[idx++] = n;
1527 else
1528 buf[idx++] = n;
1529 }
1530 return result;
1531 }
1532 #undef FUNC_NAME
1533
1534
1535 SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0,
1536 (SCM cs, SCM ch),
1537 "Return @code{#t} iff the character @var{ch} is contained in the\n"
1538 "character set @var{cs}.")
1539 #define FUNC_NAME s_scm_char_set_contains_p
1540 {
1541 SCM_VALIDATE_SMOB (1, cs, charset);
1542 SCM_VALIDATE_CHAR (2, ch);
1543 return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch)));
1544 }
1545 #undef FUNC_NAME
1546
1547
1548 SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0,
1549 (SCM pred, SCM cs),
1550 "Return a true value if every character in the character set\n"
1551 "@var{cs} satisfies the predicate @var{pred}.")
1552 #define FUNC_NAME s_scm_char_set_every
1553 {
1554 int k;
1555 scm_t_wchar n;
1556 SCM res = SCM_BOOL_T;
1557 scm_t_char_set *cs_data;
1558
1559 SCM_VALIDATE_PROC (1, pred);
1560 SCM_VALIDATE_SMOB (2, cs, charset);
1561
1562 cs_data = SCM_CHARSET_DATA (cs);
1563 if (cs_data->len == 0)
1564 return SCM_BOOL_T;
1565
1566 for (k = 0; k < cs_data->len; k++)
1567 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1568 {
1569 res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1570 if (scm_is_false (res))
1571 return res;
1572 }
1573 return SCM_BOOL_T;
1574 }
1575 #undef FUNC_NAME
1576
1577
1578 SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0,
1579 (SCM pred, SCM cs),
1580 "Return a true value if any character in the character set\n"
1581 "@var{cs} satisfies the predicate @var{pred}.")
1582 #define FUNC_NAME s_scm_char_set_any
1583 {
1584 int k;
1585 scm_t_wchar n;
1586 scm_t_char_set *cs_data;
1587
1588 SCM_VALIDATE_PROC (1, pred);
1589 SCM_VALIDATE_SMOB (2, cs, charset);
1590
1591 cs_data = SCM_CHARSET_DATA (cs);
1592 if (cs_data->len == 0)
1593 return SCM_BOOL_T;
1594
1595 for (k = 0; k < cs_data->len; k++)
1596 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1597 {
1598 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1599 if (scm_is_true (res))
1600 return res;
1601 }
1602 return SCM_BOOL_F;
1603 }
1604 #undef FUNC_NAME
1605
1606
1607 SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1,
1608 (SCM cs, SCM rest),
1609 "Add all character arguments to the first argument, which must\n"
1610 "be a character set.")
1611 #define FUNC_NAME s_scm_char_set_adjoin
1612 {
1613 SCM_VALIDATE_SMOB (1, cs, charset);
1614 SCM_VALIDATE_REST_ARGUMENT (rest);
1615 cs = scm_char_set_copy (cs);
1616
1617 while (!scm_is_null (rest))
1618 {
1619 SCM chr = SCM_CAR (rest);
1620 scm_t_wchar c;
1621
1622 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1623 rest = SCM_CDR (rest);
1624
1625 SCM_CHARSET_SET (cs, c);
1626 }
1627 return cs;
1628 }
1629 #undef FUNC_NAME
1630
1631
1632 SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1,
1633 (SCM cs, SCM rest),
1634 "Delete all character arguments from the first argument, which\n"
1635 "must be a character set.")
1636 #define FUNC_NAME s_scm_char_set_delete
1637 {
1638 SCM_VALIDATE_SMOB (1, cs, charset);
1639 SCM_VALIDATE_REST_ARGUMENT (rest);
1640 cs = scm_char_set_copy (cs);
1641
1642 while (!scm_is_null (rest))
1643 {
1644 SCM chr = SCM_CAR (rest);
1645 scm_t_wchar c;
1646
1647 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1648 rest = SCM_CDR (rest);
1649
1650 SCM_CHARSET_UNSET (cs, c);
1651 }
1652 return cs;
1653 }
1654 #undef FUNC_NAME
1655
1656
1657 SCM_DEFINE (scm_char_set_adjoin_x, "char-set-adjoin!", 1, 0, 1,
1658 (SCM cs, SCM rest),
1659 "Add all character arguments to the first argument, which must\n"
1660 "be a character set.")
1661 #define FUNC_NAME s_scm_char_set_adjoin_x
1662 {
1663 SCM_VALIDATE_SMOB (1, cs, charset);
1664 SCM_VALIDATE_REST_ARGUMENT (rest);
1665
1666 while (!scm_is_null (rest))
1667 {
1668 SCM chr = SCM_CAR (rest);
1669 scm_t_wchar c;
1670
1671 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1672 rest = SCM_CDR (rest);
1673
1674 SCM_CHARSET_SET (cs, c);
1675 }
1676 return cs;
1677 }
1678 #undef FUNC_NAME
1679
1680
1681 SCM_DEFINE (scm_char_set_delete_x, "char-set-delete!", 1, 0, 1,
1682 (SCM cs, SCM rest),
1683 "Delete all character arguments from the first argument, which\n"
1684 "must be a character set.")
1685 #define FUNC_NAME s_scm_char_set_delete_x
1686 {
1687 SCM_VALIDATE_SMOB (1, cs, charset);
1688 SCM_VALIDATE_REST_ARGUMENT (rest);
1689
1690 while (!scm_is_null (rest))
1691 {
1692 SCM chr = SCM_CAR (rest);
1693 scm_t_wchar c;
1694
1695 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1696 rest = SCM_CDR (rest);
1697
1698 SCM_CHARSET_UNSET (cs, c);
1699 }
1700 return cs;
1701 }
1702 #undef FUNC_NAME
1703
1704
1705 SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0,
1706 (SCM cs), "Return the complement of the character set @var{cs}.")
1707 #define FUNC_NAME s_scm_char_set_complement
1708 {
1709 SCM res;
1710 scm_t_char_set *p, *q;
1711
1712 SCM_VALIDATE_SMOB (1, cs, charset);
1713
1714 res = make_char_set (FUNC_NAME);
1715 p = SCM_CHARSET_DATA (res);
1716 q = SCM_CHARSET_DATA (cs);
1717
1718 charsets_complement (p, q);
1719 return res;
1720 }
1721 #undef FUNC_NAME
1722
1723
1724 SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1,
1725 (SCM rest),
1726 "Return the union of all argument character sets.")
1727 #define FUNC_NAME s_scm_char_set_union
1728 {
1729 int c = 1;
1730 SCM res;
1731 scm_t_char_set *p;
1732
1733 SCM_VALIDATE_REST_ARGUMENT (rest);
1734
1735 res = make_char_set (FUNC_NAME);
1736 p = SCM_CHARSET_DATA (res);
1737 while (!scm_is_null (rest))
1738 {
1739 SCM cs = SCM_CAR (rest);
1740 SCM_VALIDATE_SMOB (c, cs, charset);
1741 c++;
1742 rest = SCM_CDR (rest);
1743
1744
1745 charsets_union (p, (scm_t_char_set *) SCM_SMOB_DATA (cs));
1746 }
1747 return res;
1748 }
1749 #undef FUNC_NAME
1750
1751
1752 SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1,
1753 (SCM rest),
1754 "Return the intersection of all argument character sets.")
1755 #define FUNC_NAME s_scm_char_set_intersection
1756 {
1757 SCM res;
1758
1759 SCM_VALIDATE_REST_ARGUMENT (rest);
1760
1761 if (scm_is_null (rest))
1762 res = make_char_set (FUNC_NAME);
1763 else
1764 {
1765 scm_t_char_set *p;
1766 int argnum = 2;
1767
1768 res = scm_char_set_copy (SCM_CAR (rest));
1769 p = SCM_CHARSET_DATA (res);
1770 rest = SCM_CDR (rest);
1771
1772 while (scm_is_pair (rest))
1773 {
1774 SCM cs = SCM_CAR (rest);
1775 scm_t_char_set *cs_data;
1776
1777 SCM_VALIDATE_SMOB (argnum, cs, charset);
1778 argnum++;
1779 cs_data = SCM_CHARSET_DATA (cs);
1780 rest = SCM_CDR (rest);
1781 charsets_intersection (p, cs_data);
1782 }
1783 }
1784
1785 return res;
1786 }
1787 #undef FUNC_NAME
1788
1789
1790 SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1,
1791 (SCM cs1, SCM rest),
1792 "Return the difference of all argument character sets.")
1793 #define FUNC_NAME s_scm_char_set_difference
1794 {
1795 int c = 2;
1796 SCM res, compl;
1797 scm_t_char_set *p, *q;
1798
1799 SCM_VALIDATE_SMOB (1, cs1, charset);
1800 SCM_VALIDATE_REST_ARGUMENT (rest);
1801
1802 res = scm_char_set_copy (cs1);
1803 p = SCM_CHARSET_DATA (res);
1804 compl = make_char_set (FUNC_NAME);
1805 q = SCM_CHARSET_DATA (compl);
1806 while (!scm_is_null (rest))
1807 {
1808 SCM cs = SCM_CAR (rest);
1809 SCM_VALIDATE_SMOB (c, cs, charset);
1810 c++;
1811 rest = SCM_CDR (rest);
1812
1813 charsets_complement (q, SCM_CHARSET_DATA (cs));
1814 charsets_intersection (p, q);
1815 }
1816 return res;
1817 }
1818 #undef FUNC_NAME
1819
1820
1821 SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1,
1822 (SCM rest),
1823 "Return the exclusive-or of all argument character sets.")
1824 #define FUNC_NAME s_scm_char_set_xor
1825 {
1826 SCM res;
1827
1828 SCM_VALIDATE_REST_ARGUMENT (rest);
1829
1830 if (scm_is_null (rest))
1831 res = make_char_set (FUNC_NAME);
1832 else
1833 {
1834 int argnum = 2;
1835 scm_t_char_set *p;
1836
1837 res = scm_char_set_copy (SCM_CAR (rest));
1838 p = SCM_CHARSET_DATA (res);
1839 rest = SCM_CDR (rest);
1840
1841 while (scm_is_pair (rest))
1842 {
1843 SCM cs = SCM_CAR (rest);
1844 scm_t_char_set *cs_data;
1845
1846 SCM_VALIDATE_SMOB (argnum, cs, charset);
1847 argnum++;
1848 cs_data = SCM_CHARSET_DATA (cs);
1849 rest = SCM_CDR (rest);
1850
1851 charsets_xor (p, cs_data);
1852 }
1853 }
1854 return res;
1855 }
1856 #undef FUNC_NAME
1857
1858
1859 SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1, 0, 1,
1860 (SCM cs1, SCM rest),
1861 "Return the difference and the intersection of all argument\n"
1862 "character sets.")
1863 #define FUNC_NAME s_scm_char_set_diff_plus_intersection
1864 {
1865 int c = 2;
1866 SCM res1, res2;
1867 scm_t_char_set *p, *q;
1868
1869 SCM_VALIDATE_SMOB (1, cs1, charset);
1870 SCM_VALIDATE_REST_ARGUMENT (rest);
1871
1872 res1 = scm_char_set_copy (cs1);
1873 res2 = make_char_set (FUNC_NAME);
1874 p = SCM_CHARSET_DATA (res1);
1875 q = SCM_CHARSET_DATA (res2);
1876 while (!scm_is_null (rest))
1877 {
1878 SCM cs = SCM_CAR (rest);
1879 scm_t_char_set *r;
1880
1881 SCM_VALIDATE_SMOB (c, cs, charset);
1882 c++;
1883 r = SCM_CHARSET_DATA (cs);
1884
1885 charsets_union (q, r);
1886 charsets_intersection (p, r);
1887 rest = SCM_CDR (rest);
1888 }
1889 return scm_values (scm_list_2 (res1, res2));
1890 }
1891 #undef FUNC_NAME
1892
1893
1894 SCM_DEFINE (scm_char_set_complement_x, "char-set-complement!", 1, 0, 0,
1895 (SCM cs), "Return the complement of the character set @var{cs}.")
1896 #define FUNC_NAME s_scm_char_set_complement_x
1897 {
1898 SCM_VALIDATE_SMOB (1, cs, charset);
1899 cs = scm_char_set_complement (cs);
1900 return cs;
1901 }
1902 #undef FUNC_NAME
1903
1904
1905 SCM_DEFINE (scm_char_set_union_x, "char-set-union!", 1, 0, 1,
1906 (SCM cs1, SCM rest),
1907 "Return the union of all argument character sets.")
1908 #define FUNC_NAME s_scm_char_set_union_x
1909 {
1910 SCM_VALIDATE_SMOB (1, cs1, charset);
1911 SCM_VALIDATE_REST_ARGUMENT (rest);
1912
1913 cs1 = scm_char_set_union (scm_cons (cs1, rest));
1914 return cs1;
1915 }
1916 #undef FUNC_NAME
1917
1918
1919 SCM_DEFINE (scm_char_set_intersection_x, "char-set-intersection!", 1, 0, 1,
1920 (SCM cs1, SCM rest),
1921 "Return the intersection of all argument character sets.")
1922 #define FUNC_NAME s_scm_char_set_intersection_x
1923 {
1924 SCM_VALIDATE_SMOB (1, cs1, charset);
1925 SCM_VALIDATE_REST_ARGUMENT (rest);
1926
1927 cs1 = scm_char_set_intersection (scm_cons (cs1, rest));
1928 return cs1;
1929 }
1930 #undef FUNC_NAME
1931
1932
1933 SCM_DEFINE (scm_char_set_difference_x, "char-set-difference!", 1, 0, 1,
1934 (SCM cs1, SCM rest),
1935 "Return the difference of all argument character sets.")
1936 #define FUNC_NAME s_scm_char_set_difference_x
1937 {
1938 SCM_VALIDATE_SMOB (1, cs1, charset);
1939 SCM_VALIDATE_REST_ARGUMENT (rest);
1940
1941 cs1 = scm_char_set_difference (cs1, rest);
1942 return cs1;
1943 }
1944 #undef FUNC_NAME
1945
1946
1947 SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1,
1948 (SCM cs1, SCM rest),
1949 "Return the exclusive-or of all argument character sets.")
1950 #define FUNC_NAME s_scm_char_set_xor_x
1951 {
1952 /* a side-effecting variant should presumably give consistent results:
1953 (define a (char-set #\a))
1954 (char-set-xor a a a) -> char set #\a
1955 (char-set-xor! a a a) -> char set #\a
1956 */
1957 cs1 = scm_char_set_xor (scm_cons (cs1, rest));
1958 return cs1;
1959 }
1960 #undef FUNC_NAME
1961
1962
1963 SCM_DEFINE (scm_char_set_diff_plus_intersection_x,
1964 "char-set-diff+intersection!", 2, 0, 1, (SCM cs1, SCM cs2,
1965 SCM rest),
1966 "Return the difference and the intersection of all argument\n"
1967 "character sets.")
1968 #define FUNC_NAME s_scm_char_set_diff_plus_intersection_x
1969 {
1970 SCM diff, intersect;
1971
1972 diff = scm_char_set_difference (cs1, scm_cons (cs2, rest));
1973 intersect =
1974 scm_char_set_intersection (scm_cons (cs1, scm_cons (cs2, rest)));
1975 cs1 = diff;
1976 cs2 = intersect;
1977 return scm_values (scm_list_2 (cs1, cs2));
1978 }
1979 #undef FUNC_NAME
1980
1981 \f
1982
1983 /* Standard character sets. */
1984
1985 SCM scm_char_set_lower_case;
1986 SCM scm_char_set_upper_case;
1987 SCM scm_char_set_title_case;
1988 SCM scm_char_set_letter;
1989 SCM scm_char_set_digit;
1990 SCM scm_char_set_letter_and_digit;
1991 SCM scm_char_set_graphic;
1992 SCM scm_char_set_printing;
1993 SCM scm_char_set_whitespace;
1994 SCM scm_char_set_iso_control;
1995 SCM scm_char_set_punctuation;
1996 SCM scm_char_set_symbol;
1997 SCM scm_char_set_hex_digit;
1998 SCM scm_char_set_blank;
1999 SCM scm_char_set_ascii;
2000 SCM scm_char_set_empty;
2001 SCM scm_char_set_designated;
2002 SCM scm_char_set_full;
2003
2004
2005 /* Create an empty character set and return it after binding it to NAME. */
2006 static inline SCM
2007 define_charset (const char *name, const scm_t_char_set *p)
2008 {
2009 SCM cs;
2010
2011 SCM_NEWSMOB (cs, scm_tc16_charset, p);
2012 scm_c_define (name, cs);
2013 return cs;
2014 }
2015
2016 SCM_DEFINE (scm_sys_char_set_dump, "%char-set-dump", 1, 0, 0, (SCM charset),
2017 "Returns an association list containing debugging information\n"
2018 "for @var{charset}. The association list has the following entries."
2019 "@table @code\n"
2020 "@item char-set\n"
2021 "The char-set itself.\n"
2022 "@item len\n"
2023 "The number of character ranges the char-set contains\n"
2024 "@item ranges\n"
2025 "A list of lists where each sublist a range of code points\n"
2026 "and their associated characters"
2027 "@end table")
2028 #define FUNC_NAME s_scm_sys_char_set_dump
2029 {
2030 SCM e1, e2, e3;
2031 SCM ranges = SCM_EOL, elt;
2032 size_t i;
2033 scm_t_char_set *cs;
2034 char codepoint_string_lo[9], codepoint_string_hi[9];
2035
2036 SCM_VALIDATE_SMOB (1, charset, charset);
2037 cs = SCM_CHARSET_DATA (charset);
2038
2039 e1 = scm_cons (scm_from_latin1_symbol ("char-set"),
2040 charset);
2041 e2 = scm_cons (scm_from_latin1_symbol ("n"),
2042 scm_from_size_t (cs->len));
2043
2044 for (i = 0; i < cs->len; i++)
2045 {
2046 if (cs->ranges[i].lo > 0xFFFF)
2047 sprintf (codepoint_string_lo, "U+%06x", cs->ranges[i].lo);
2048 else
2049 sprintf (codepoint_string_lo, "U+%04x", cs->ranges[i].lo);
2050 if (cs->ranges[i].hi > 0xFFFF)
2051 sprintf (codepoint_string_hi, "U+%06x", cs->ranges[i].hi);
2052 else
2053 sprintf (codepoint_string_hi, "U+%04x", cs->ranges[i].hi);
2054
2055 elt = scm_list_4 (SCM_MAKE_CHAR (cs->ranges[i].lo),
2056 SCM_MAKE_CHAR (cs->ranges[i].hi),
2057 scm_from_locale_string (codepoint_string_lo),
2058 scm_from_locale_string (codepoint_string_hi));
2059 ranges = scm_append (scm_list_2 (ranges,
2060 scm_list_1 (elt)));
2061 }
2062 e3 = scm_cons (scm_from_latin1_symbol ("ranges"),
2063 ranges);
2064
2065 return scm_list_3 (e1, e2, e3);
2066 }
2067 #undef FUNC_NAME
2068
2069 \f
2070
2071
2072 void
2073 scm_init_srfi_14 (void)
2074 {
2075 scm_tc16_charset = scm_make_smob_type ("character-set", 0);
2076 scm_set_smob_print (scm_tc16_charset, charset_print);
2077
2078 scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0);
2079 scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print);
2080
2081 scm_char_set_upper_case =
2082 define_charset ("char-set:upper-case", &cs_upper_case);
2083 scm_char_set_lower_case =
2084 define_charset ("char-set:lower-case", &cs_lower_case);
2085 scm_char_set_title_case =
2086 define_charset ("char-set:title-case", &cs_title_case);
2087 scm_char_set_letter = define_charset ("char-set:letter", &cs_letter);
2088 scm_char_set_digit = define_charset ("char-set:digit", &cs_digit);
2089 scm_char_set_letter_and_digit =
2090 define_charset ("char-set:letter+digit", &cs_letter_plus_digit);
2091 scm_char_set_graphic = define_charset ("char-set:graphic", &cs_graphic);
2092 scm_char_set_printing = define_charset ("char-set:printing", &cs_printing);
2093 scm_char_set_whitespace =
2094 define_charset ("char-set:whitespace", &cs_whitespace);
2095 scm_char_set_iso_control =
2096 define_charset ("char-set:iso-control", &cs_iso_control);
2097 scm_char_set_punctuation =
2098 define_charset ("char-set:punctuation", &cs_punctuation);
2099 scm_char_set_symbol = define_charset ("char-set:symbol", &cs_symbol);
2100 scm_char_set_hex_digit =
2101 define_charset ("char-set:hex-digit", &cs_hex_digit);
2102 scm_char_set_blank = define_charset ("char-set:blank", &cs_blank);
2103 scm_char_set_ascii = define_charset ("char-set:ascii", &cs_ascii);
2104 scm_char_set_empty = define_charset ("char-set:empty", &cs_empty);
2105 scm_char_set_designated = define_charset ("char-set:designated", &cs_designated);
2106 scm_char_set_full = define_charset ("char-set:full", &cs_full);
2107
2108 #include "libguile/srfi-14.x"
2109 }
2110
2111 /* End of srfi-14.c. */