Remove always-true range checks in scm_i_ucs_range_to_char_set
[bpt/guile.git] / libguile / srfi-14.c
1 /* srfi-14.c --- SRFI-14 procedures for Guile
2 *
3 * Copyright (C) 2001, 2004, 2006, 2007 Free Software Foundation, Inc.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public License
7 * as published by the Free Software Foundation; either version 3 of
8 * the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301 USA
19 */
20
21 #ifdef HAVE_CONFIG_H
22 # include <config.h>
23 #endif
24
25
26 #include <string.h>
27 #include <unictype.h>
28
29 #include "libguile.h"
30 #include "libguile/srfi-14.h"
31 #include "libguile/strings.h"
32 #include "libguile/chars.h"
33
34 /* Include the pre-computed standard charset data. */
35 #include "libguile/srfi-14.i.c"
36
37 #define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset))
38
39 #define SCM_CHARSET_SET(cs, idx) \
40 scm_i_charset_set (SCM_CHARSET_DATA (cs), idx)
41
42 #define SCM_CHARSET_UNSET(cs, idx) \
43 scm_i_charset_unset (SCM_CHARSET_DATA (cs), idx)
44
45 /* Smob type code for character sets. */
46 int scm_tc16_charset = 0;
47 int scm_tc16_charset_cursor = 0;
48
49 /* True if N exists in charset CS. */
50 int
51 scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n)
52 {
53 size_t i;
54
55 i = 0;
56 while (i < cs->len)
57 {
58 if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi)
59 return 1;
60 i++;
61 }
62
63 return 0;
64 }
65
66 /* Put N into charset CS. */
67 void
68 scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n)
69 {
70 size_t i;
71 size_t len;
72
73 len = cs->len;
74
75 i = 0;
76 while (i < len)
77 {
78 /* Already in this range */
79 if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi)
80 {
81 return;
82 }
83
84 if (n == cs->ranges[i].lo - 1)
85 {
86 /* This char is one below the current range. */
87 if (i > 0 && cs->ranges[i - 1].hi + 1 == n)
88 {
89 /* It is also one above the previous range. */
90 /* This is an impossible condition: in the previous
91 iteration, the test for 'one above the current range'
92 should already have inserted the character here. */
93 abort ();
94 }
95 else
96 {
97 /* Expand the range down by one. */
98 cs->ranges[i].lo = n;
99 return;
100 }
101 }
102 else if (n == cs->ranges[i].hi + 1)
103 {
104 /* This char is one above the current range. */
105 if (i < len - 1 && cs->ranges[i + 1].lo - 1 == n)
106 {
107 /* It is also one below the next range, so combine them. */
108 cs->ranges[i].hi = cs->ranges[i + 1].hi;
109 if (i < len - 2)
110 memmove (cs->ranges + (i + 1), cs->ranges + (i + 2),
111 sizeof (scm_t_char_range) * (len - i - 2));
112 cs->ranges = scm_gc_realloc (cs->ranges,
113 sizeof (scm_t_char_range) * len,
114 sizeof (scm_t_char_range) * (len -
115 1),
116 "character-set");
117 cs->len = len - 1;
118 return;
119 }
120 else
121 {
122 /* Expand the range up by one. */
123 cs->ranges[i].hi = n;
124 return;
125 }
126 }
127 else if (n < cs->ranges[i].lo - 1)
128 {
129 /* This is a new range below the current one. */
130 cs->ranges = scm_gc_realloc (cs->ranges,
131 sizeof (scm_t_char_range) * len,
132 sizeof (scm_t_char_range) * (len + 1),
133 "character-set");
134 memmove (cs->ranges + (i + 1), cs->ranges + i,
135 sizeof (scm_t_char_range) * (len - i));
136 cs->ranges[i].lo = n;
137 cs->ranges[i].hi = n;
138 cs->len = len + 1;
139 return;
140 }
141
142 i++;
143 }
144
145 /* This is a new range above all previous ranges. */
146 if (len == 0)
147 {
148 cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set");
149 }
150 else
151 {
152 cs->ranges = scm_gc_realloc (cs->ranges,
153 sizeof (scm_t_char_range) * len,
154 sizeof (scm_t_char_range) * (len + 1),
155 "character-set");
156 }
157 cs->ranges[len].lo = n;
158 cs->ranges[len].hi = n;
159 cs->len = len + 1;
160
161 return;
162 }
163
164 /* Put LO to HI inclusive into charset CS. */
165 static void
166 scm_i_charset_set_range (scm_t_char_set *cs, scm_t_wchar lo, scm_t_wchar hi)
167 {
168 size_t i;
169
170 i = 0;
171 while (i < cs->len)
172 {
173 /* Already in this range */
174 if (cs->ranges[i].lo <= lo && cs->ranges[i].hi >= hi)
175 return;
176
177 /* cur: +---+
178 new: +---+
179 */
180 if (cs->ranges[i].lo - 1 > hi)
181 {
182 /* Add a new range below the current one. */
183 cs->ranges = scm_gc_realloc (cs->ranges,
184 sizeof (scm_t_char_range) * cs->len,
185 sizeof (scm_t_char_range) * (cs->len + 1),
186 "character-set");
187 memmove (cs->ranges + (i + 1), cs->ranges + i,
188 sizeof (scm_t_char_range) * (cs->len - i));
189 cs->ranges[i].lo = lo;
190 cs->ranges[i].hi = hi;
191 cs->len += 1;
192 return;
193 }
194
195 /* cur: +---+ or +---+ or +---+
196 new: +---+ +---+ +---+
197 */
198 if (cs->ranges[i].lo > lo
199 && (cs->ranges[i].lo - 1 <= hi && cs->ranges[i].hi >= hi))
200 {
201 cs->ranges[i].lo = lo;
202 return;
203 }
204
205 /* cur: +---+ or +---+ or +---+
206 new: +---+ +---+ +---+
207 */
208 else if (cs->ranges[i].hi + 1 >= lo && cs->ranges[i].hi < hi)
209 {
210 if (cs->ranges[i].lo > lo)
211 cs->ranges[i].lo = lo;
212 if (cs->ranges[i].hi < hi)
213 cs->ranges[i].hi = hi;
214 while (i < cs->len - 1)
215 {
216 /* cur: --+ +---+
217 new: -----+
218 */
219 if (cs->ranges[i + 1].lo - 1 > hi)
220 break;
221
222 /* cur: --+ +---+ or --+ +---+ or --+ +--+
223 new: -----+ ------+ ---------+
224 */
225 /* Combine this range with the previous one. */
226 if (cs->ranges[i + 1].hi > hi)
227 cs->ranges[i].hi = cs->ranges[i + 1].hi;
228 if (i + 1 < cs->len)
229 memmove (cs->ranges + i + 1, cs->ranges + i + 2,
230 sizeof (scm_t_char_range) * (cs->len - i - 2));
231 cs->ranges = scm_gc_realloc (cs->ranges,
232 sizeof (scm_t_char_range) * cs->len,
233 sizeof (scm_t_char_range) * (cs->len - 1),
234 "character-set");
235 cs->len -= 1;
236 }
237 return;
238 }
239 i ++;
240 }
241
242 /* This is a new range above all previous ranges. */
243 if (cs->len == 0)
244 {
245 cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set");
246 }
247 else
248 {
249 cs->ranges = scm_gc_realloc (cs->ranges,
250 sizeof (scm_t_char_range) * cs->len,
251 sizeof (scm_t_char_range) * (cs->len + 1),
252 "character-set");
253 }
254 cs->len += 1;
255 cs->ranges[cs->len - 1].lo = lo;
256 cs->ranges[cs->len - 1].hi = hi;
257
258 return;
259 }
260
261 /* If N is in charset CS, remove it. */
262 void
263 scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n)
264 {
265 size_t i;
266 size_t len;
267
268 len = cs->len;
269
270 i = 0;
271 while (i < len)
272 {
273 if (n < cs->ranges[i].lo)
274 /* Not in this set. */
275 return;
276
277 if (n == cs->ranges[i].lo && n == cs->ranges[i].hi)
278 {
279 /* Remove this one-character range. */
280 if (len == 1)
281 {
282 scm_gc_free (cs->ranges,
283 sizeof (scm_t_char_range) * cs->len,
284 "character-set");
285 cs->ranges = NULL;
286 cs->len = 0;
287 return;
288 }
289 else if (i < len - 1)
290 {
291 memmove (cs->ranges + i, cs->ranges + (i + 1),
292 sizeof (scm_t_char_range) * (len - i - 1));
293 cs->ranges = scm_gc_realloc (cs->ranges,
294 sizeof (scm_t_char_range) * len,
295 sizeof (scm_t_char_range) * (len -
296 1),
297 "character-set");
298 cs->len = len - 1;
299 return;
300 }
301 else if (i == len - 1)
302 {
303 cs->ranges = scm_gc_realloc (cs->ranges,
304 sizeof (scm_t_char_range) * len,
305 sizeof (scm_t_char_range) * (len -
306 1),
307 "character-set");
308 cs->len = len - 1;
309 return;
310 }
311 }
312 else if (n == cs->ranges[i].lo)
313 {
314 /* Shrink this range from the left. */
315 cs->ranges[i].lo = n + 1;
316 return;
317 }
318 else if (n == cs->ranges[i].hi)
319 {
320 /* Shrink this range from the right. */
321 cs->ranges[i].hi = n - 1;
322 return;
323 }
324 else if (n > cs->ranges[i].lo && n < cs->ranges[i].hi)
325 {
326 /* Split this range into two pieces. */
327 cs->ranges = scm_gc_realloc (cs->ranges,
328 sizeof (scm_t_char_range) * len,
329 sizeof (scm_t_char_range) * (len + 1),
330 "character-set");
331 if (i < len - 1)
332 memmove (cs->ranges + (i + 2), cs->ranges + (i + 1),
333 sizeof (scm_t_char_range) * (len - i - 1));
334 cs->ranges[i + 1].hi = cs->ranges[i].hi;
335 cs->ranges[i + 1].lo = n + 1;
336 cs->ranges[i].hi = n - 1;
337 cs->len = len + 1;
338 return;
339 }
340
341 i++;
342 }
343
344 /* This value is above all ranges, so do nothing here. */
345 return;
346 }
347
348 static int
349 charsets_equal (scm_t_char_set *a, scm_t_char_set *b)
350 {
351 if (a->len != b->len)
352 return 0;
353
354 if (memcmp (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len) != 0)
355 return 0;
356
357 return 1;
358 }
359
360 /* Return true if every character in A is also in B. */
361 static int
362 charsets_leq (scm_t_char_set *a, scm_t_char_set *b)
363 {
364 size_t i = 0, j = 0;
365 scm_t_wchar alo, ahi;
366
367 if (a->len == 0)
368 return 1;
369 if (b->len == 0)
370 return 0;
371 while (i < a->len)
372 {
373 alo = a->ranges[i].lo;
374 ahi = a->ranges[i].hi;
375 while (b->ranges[j].hi < alo)
376 {
377 if (j < b->len - 1)
378 j++;
379 else
380 return 0;
381 }
382 if (alo < b->ranges[j].lo || ahi > b->ranges[j].hi)
383 return 0;
384 i++;
385 }
386
387 return 1;
388 }
389
390 /* Merge B into A. */
391 static void
392 charsets_union (scm_t_char_set *a, scm_t_char_set *b)
393 {
394 size_t i = 0;
395 scm_t_wchar blo, bhi;
396
397 if (b->len == 0)
398 return;
399
400 if (a->len == 0)
401 {
402 a->len = b->len;
403 a->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * b->len,
404 "character-set");
405 memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * b->len);
406 return;
407 }
408
409 while (i < b->len)
410 {
411 blo = b->ranges[i].lo;
412 bhi = b->ranges[i].hi;
413 scm_i_charset_set_range (a, blo, bhi);
414
415 i++;
416 }
417
418 return;
419 }
420
421 /* Remove elements not both in A and B from A. */
422 static void
423 charsets_intersection (scm_t_char_set *a, scm_t_char_set *b)
424 {
425 size_t i = 0;
426 scm_t_wchar blo, bhi, n;
427 scm_t_char_set *c;
428
429 if (a->len == 0)
430 return;
431
432 if (b->len == 0)
433 {
434 scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len,
435 "character-set");
436 a->len = 0;
437 return;
438 }
439
440 c = (scm_t_char_set *) scm_malloc (sizeof (scm_t_char_set));
441 c->len = 0;
442 c->ranges = NULL;
443
444 while (i < b->len)
445 {
446 blo = b->ranges[i].lo;
447 bhi = b->ranges[i].hi;
448 for (n = blo; n <= bhi; n++)
449 if (scm_i_charset_get (a, n))
450 scm_i_charset_set (c, n);
451 i++;
452 }
453 scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len,
454 "character-set");
455
456 a->len = c->len;
457 if (c->len != 0)
458 a->ranges = c->ranges;
459 else
460 a->ranges = NULL;
461 free (c);
462 return;
463 }
464
465 #define SCM_ADD_RANGE(low, high) \
466 do { \
467 p->ranges[idx].lo = (low); \
468 p->ranges[idx++].hi = (high); \
469 } while (0)
470 #define SCM_ADD_RANGE_SKIP_SURROGATES(low, high) \
471 do { \
472 p->ranges[idx].lo = (low); \
473 p->ranges[idx++].hi = SCM_CODEPOINT_SURROGATE_START - 1; \
474 p->ranges[idx].lo = SCM_CODEPOINT_SURROGATE_END + 1; \
475 p->ranges[idx++].hi = (high); \
476 } while (0)
477
478
479
480 /* Make P the compelement of Q. */
481 static void
482 charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
483 {
484 int k, idx;
485
486 idx = 0;
487 if (q->len == 0)
488 {
489 /* Fill with all valid codepoints. */
490 p->len = 2;
491 p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
492 "character-set");
493 SCM_ADD_RANGE_SKIP_SURROGATES (0, SCM_CODEPOINT_MAX);
494 return;
495 }
496
497 if (p->len > 0)
498 scm_gc_free (p->ranges, sizeof (scm_t_char_set) * p->len,
499 "character-set");
500
501 /* Count the number of ranges needed for the output. */
502 p->len = 0;
503 if (q->ranges[0].lo > 0)
504 p->len++;
505 if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
506 p->len++;
507 p->len += q->len;
508 p->ranges =
509 (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * p->len,
510 "character-set");
511 if (q->ranges[0].lo > 0)
512 {
513 if (q->ranges[0].lo > SCM_CODEPOINT_SURROGATE_END)
514 SCM_ADD_RANGE_SKIP_SURROGATES (0, q->ranges[0].lo - 1);
515 else
516 SCM_ADD_RANGE (0, q->ranges[0].lo - 1);
517 }
518 for (k = 1; k < q->len; k++)
519 {
520 if (q->ranges[k - 1].hi < SCM_CODEPOINT_SURROGATE_START
521 && q->ranges[k].lo - 1 > SCM_CODEPOINT_SURROGATE_END)
522 SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
523 else
524 SCM_ADD_RANGE (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
525 }
526 if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
527 {
528 if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_SURROGATE_START)
529 SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
530 else
531 SCM_ADD_RANGE (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
532 }
533 return;
534 }
535 #undef SCM_ADD_RANGE
536 #undef SCM_ADD_RANGE_SKIP_SURROGATES
537
538 /* Replace A with elements only found in one of A or B. */
539 static void
540 charsets_xor (scm_t_char_set *a, scm_t_char_set *b)
541 {
542 size_t i = 0;
543 scm_t_wchar blo, bhi, n;
544
545 if (b->len == 0)
546 {
547 return;
548 }
549
550 if (a->len == 0)
551 {
552 a->ranges =
553 (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) *
554 b->len, "character-set");
555 a->len = b->len;
556 memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len);
557 return;
558 }
559
560 while (i < b->len)
561 {
562 blo = b->ranges[i].lo;
563 bhi = b->ranges[i].hi;
564 for (n = blo; n <= bhi; n++)
565 {
566 if (scm_i_charset_get (a, n))
567 scm_i_charset_unset (a, n);
568 else
569 scm_i_charset_set (a, n);
570 }
571
572 i++;
573 }
574 return;
575 }
576
577 /* Smob print hook for character sets. */
578 static int
579 charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED)
580 {
581 size_t i;
582 int first = 1;
583 scm_t_char_set *p;
584 const size_t max_ranges_to_print = 50;
585
586 p = SCM_CHARSET_DATA (charset);
587
588 scm_puts ("#<charset {", port);
589 for (i = 0; i < p->len; i++)
590 {
591 if (first)
592 first = 0;
593 else
594 scm_puts (" ", port);
595 scm_write (SCM_MAKE_CHAR (p->ranges[i].lo), port);
596 if (p->ranges[i].lo != p->ranges[i].hi)
597 {
598 scm_puts ("..", port);
599 scm_write (SCM_MAKE_CHAR (p->ranges[i].hi), port);
600 }
601 if (i >= max_ranges_to_print)
602 {
603 /* Too many to print here. Quit early. */
604 scm_puts (" ...", port);
605 break;
606 }
607 }
608 scm_puts ("}>", port);
609 return 1;
610 }
611
612
613 /* Smob free hook for character sets. */
614 static size_t
615 charset_free (SCM charset)
616 {
617 scm_t_char_set *cs;
618 size_t len = 0;
619
620 cs = SCM_CHARSET_DATA (charset);
621 if (cs != NULL)
622 len = cs->len;
623 if (len > 0)
624 scm_gc_free (cs->ranges, sizeof (scm_t_char_range) * len,
625 "character-set");
626
627 cs->ranges = NULL;
628 cs->len = 0;
629
630 scm_gc_free (cs, sizeof (scm_t_char_set), "character-set");
631
632 scm_remember_upto_here_1 (charset);
633
634 return 0;
635 }
636
637
638 /* Smob print hook for character sets cursors. */
639 static int
640 charset_cursor_print (SCM cursor, SCM port,
641 scm_print_state *pstate SCM_UNUSED)
642 {
643 scm_t_char_set_cursor *cur;
644
645 cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
646
647 scm_puts ("#<charset-cursor ", port);
648 if (cur->range == (size_t) (-1))
649 scm_puts ("(empty)", port);
650 else
651 {
652 scm_write (scm_from_size_t (cur->range), port);
653 scm_puts (":", port);
654 scm_write (scm_from_int32 (cur->n), port);
655 }
656 scm_puts (">", port);
657 return 1;
658 }
659
660 /* Smob free hook for character sets. */
661 static size_t
662 charset_cursor_free (SCM charset)
663 {
664 scm_t_char_set_cursor *cur;
665
666 cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (charset);
667 scm_gc_free (cur, sizeof (scm_t_char_set_cursor), "charset-cursor");
668 scm_remember_upto_here_1 (charset);
669
670 return 0;
671 }
672
673
674 /* Create a new, empty character set. */
675 static SCM
676 make_char_set (const char *func_name)
677 {
678 scm_t_char_set *p;
679
680 p = scm_gc_malloc (sizeof (scm_t_char_set), "character-set");
681 memset (p, 0, sizeof (scm_t_char_set));
682 SCM_RETURN_NEWSMOB (scm_tc16_charset, p);
683 }
684
685
686 SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0,
687 (SCM obj),
688 "Return @code{#t} if @var{obj} is a character set, @code{#f}\n"
689 "otherwise.")
690 #define FUNC_NAME s_scm_char_set_p
691 {
692 return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj));
693 }
694 #undef FUNC_NAME
695
696
697 SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1,
698 (SCM char_sets),
699 "Return @code{#t} if all given character sets are equal.")
700 #define FUNC_NAME s_scm_char_set_eq
701 {
702 int argnum = 1;
703 scm_t_char_set *cs1_data = NULL;
704
705 SCM_VALIDATE_REST_ARGUMENT (char_sets);
706
707 while (!scm_is_null (char_sets))
708 {
709 SCM csi = SCM_CAR (char_sets);
710 scm_t_char_set *csi_data;
711
712 SCM_VALIDATE_SMOB (argnum, csi, charset);
713 argnum++;
714 csi_data = SCM_CHARSET_DATA (csi);
715 if (cs1_data == NULL)
716 cs1_data = csi_data;
717 else if (!charsets_equal (cs1_data, csi_data))
718 return SCM_BOOL_F;
719 char_sets = SCM_CDR (char_sets);
720 }
721 return SCM_BOOL_T;
722 }
723 #undef FUNC_NAME
724
725
726 SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1,
727 (SCM char_sets),
728 "Return @code{#t} if every character set @var{cs}i is a subset\n"
729 "of character set @var{cs}i+1.")
730 #define FUNC_NAME s_scm_char_set_leq
731 {
732 int argnum = 1;
733 scm_t_char_set *prev_data = NULL;
734
735 SCM_VALIDATE_REST_ARGUMENT (char_sets);
736
737 while (!scm_is_null (char_sets))
738 {
739 SCM csi = SCM_CAR (char_sets);
740 scm_t_char_set *csi_data;
741
742 SCM_VALIDATE_SMOB (argnum, csi, charset);
743 argnum++;
744 csi_data = SCM_CHARSET_DATA (csi);
745 if (prev_data)
746 {
747 if (!charsets_leq (prev_data, csi_data))
748 return SCM_BOOL_F;
749 }
750 prev_data = csi_data;
751 char_sets = SCM_CDR (char_sets);
752 }
753 return SCM_BOOL_T;
754 }
755 #undef FUNC_NAME
756
757
758 SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0,
759 (SCM cs, SCM bound),
760 "Compute a hash value for the character set @var{cs}. If\n"
761 "@var{bound} is given and non-zero, it restricts the\n"
762 "returned value to the range 0 @dots{} @var{bound - 1}.")
763 #define FUNC_NAME s_scm_char_set_hash
764 {
765 const unsigned long default_bnd = 871;
766 unsigned long bnd;
767 scm_t_char_set *p;
768 unsigned long val = 0;
769 int k;
770 scm_t_wchar c;
771
772 SCM_VALIDATE_SMOB (1, cs, charset);
773
774 if (SCM_UNBNDP (bound))
775 bnd = default_bnd;
776 else
777 {
778 bnd = scm_to_ulong (bound);
779 if (bnd == 0)
780 bnd = default_bnd;
781 }
782
783 p = SCM_CHARSET_DATA (cs);
784 for (k = 0; k < p->len; k++)
785 {
786 for (c = p->ranges[k].lo; c <= p->ranges[k].hi; c++)
787 val = c + (val << 1);
788 }
789 return scm_from_ulong (val % bnd);
790 }
791 #undef FUNC_NAME
792
793
794 SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0,
795 (SCM cs), "Return a cursor into the character set @var{cs}.")
796 #define FUNC_NAME s_scm_char_set_cursor
797 {
798 scm_t_char_set *cs_data;
799 scm_t_char_set_cursor *cur_data;
800
801 SCM_VALIDATE_SMOB (1, cs, charset);
802 cs_data = SCM_CHARSET_DATA (cs);
803 cur_data =
804 (scm_t_char_set_cursor *) scm_gc_malloc (sizeof (scm_t_char_set_cursor),
805 "charset-cursor");
806 if (cs_data->len == 0)
807 {
808 cur_data->range = (size_t) (-1);
809 cur_data->n = 0;
810 }
811 else
812 {
813 cur_data->range = 0;
814 cur_data->n = cs_data->ranges[0].lo;
815 }
816 SCM_RETURN_NEWSMOB (scm_tc16_charset_cursor, cur_data);
817 }
818 #undef FUNC_NAME
819
820
821 SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0,
822 (SCM cs, SCM cursor),
823 "Return the character at the current cursor position\n"
824 "@var{cursor} in the character set @var{cs}. It is an error to\n"
825 "pass a cursor for which @code{end-of-char-set?} returns true.")
826 #define FUNC_NAME s_scm_char_set_ref
827 {
828 scm_t_char_set *cs_data;
829 scm_t_char_set_cursor *cur_data;
830 size_t i;
831
832 SCM_VALIDATE_SMOB (1, cs, charset);
833 SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
834
835 cs_data = SCM_CHARSET_DATA (cs);
836 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
837
838 /* Validate that this cursor is still true. */
839 i = cur_data->range;
840 if (i == (size_t) (-1)
841 || i >= cs_data->len
842 || cur_data->n < cs_data->ranges[i].lo
843 || cur_data->n > cs_data->ranges[i].hi)
844 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
845 return SCM_MAKE_CHAR (cur_data->n);
846 }
847 #undef FUNC_NAME
848
849
850 SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0,
851 (SCM cs, SCM cursor),
852 "Advance the character set cursor @var{cursor} to the next\n"
853 "character in the character set @var{cs}. It is an error if the\n"
854 "cursor given satisfies @code{end-of-char-set?}.")
855 #define FUNC_NAME s_scm_char_set_cursor_next
856 {
857 scm_t_char_set *cs_data;
858 scm_t_char_set_cursor *cur_data;
859 size_t i;
860
861 SCM_VALIDATE_SMOB (1, cs, charset);
862 SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
863
864 cs_data = SCM_CHARSET_DATA (cs);
865 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
866
867 /* Validate that this cursor is still true. */
868 i = cur_data->range;
869 if (i == (size_t) (-1)
870 || i >= cs_data->len
871 || cur_data->n < cs_data->ranges[i].lo
872 || cur_data->n > cs_data->ranges[i].hi)
873 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
874 /* Increment the cursor. */
875 if (cur_data->n == cs_data->ranges[i].hi)
876 {
877 if (i + 1 < cs_data->len)
878 {
879 cur_data->range = i + 1;
880 cur_data->n = cs_data->ranges[i + 1].lo;
881 }
882 else
883 {
884 /* This is the end of the road. */
885 cur_data->range = (size_t) (-1);
886 cur_data->n = 0;
887 }
888 }
889 else
890 {
891 cur_data->n = cur_data->n + 1;
892 }
893
894 return cursor;
895 }
896 #undef FUNC_NAME
897
898
899 SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0,
900 (SCM cursor),
901 "Return @code{#t} if @var{cursor} has reached the end of a\n"
902 "character set, @code{#f} otherwise.")
903 #define FUNC_NAME s_scm_end_of_char_set_p
904 {
905 scm_t_char_set_cursor *cur_data;
906 SCM_VALIDATE_SMOB (1, cursor, charset_cursor);
907
908 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
909 if (cur_data->range == (size_t) (-1))
910 return SCM_BOOL_T;
911
912 return SCM_BOOL_F;
913 }
914 #undef FUNC_NAME
915
916
917 SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0,
918 (SCM kons, SCM knil, SCM cs),
919 "Fold the procedure @var{kons} over the character set @var{cs},\n"
920 "initializing it with @var{knil}.")
921 #define FUNC_NAME s_scm_char_set_fold
922 {
923 scm_t_char_set *cs_data;
924 int k;
925 scm_t_wchar n;
926
927 SCM_VALIDATE_PROC (1, kons);
928 SCM_VALIDATE_SMOB (3, cs, charset);
929
930 cs_data = SCM_CHARSET_DATA (cs);
931
932 if (cs_data->len == 0)
933 return knil;
934
935 for (k = 0; k < cs_data->len; k++)
936 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
937 {
938 knil = scm_call_2 (kons, SCM_MAKE_CHAR (n), knil);
939 }
940 return knil;
941 }
942 #undef FUNC_NAME
943
944
945 SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0,
946 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
947 "This is a fundamental constructor for character sets.\n"
948 "@itemize @bullet\n"
949 "@item @var{g} is used to generate a series of ``seed'' values\n"
950 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
951 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
952 "@item @var{p} tells us when to stop -- when it returns true\n"
953 "when applied to one of the seed values.\n"
954 "@item @var{f} maps each seed value to a character. These\n"
955 "characters are added to the base character set @var{base_cs} to\n"
956 "form the result; @var{base_cs} defaults to the empty set.\n"
957 "@end itemize")
958 #define FUNC_NAME s_scm_char_set_unfold
959 {
960 SCM result, tmp;
961
962 SCM_VALIDATE_PROC (1, p);
963 SCM_VALIDATE_PROC (2, f);
964 SCM_VALIDATE_PROC (3, g);
965 if (!SCM_UNBNDP (base_cs))
966 {
967 SCM_VALIDATE_SMOB (5, base_cs, charset);
968 result = scm_char_set_copy (base_cs);
969 }
970 else
971 result = make_char_set (FUNC_NAME);
972
973 tmp = scm_call_1 (p, seed);
974 while (scm_is_false (tmp))
975 {
976 SCM ch = scm_call_1 (f, seed);
977 if (!SCM_CHARP (ch))
978 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
979 SCM_CHARSET_SET (result, SCM_CHAR (ch));
980
981 seed = scm_call_1 (g, seed);
982 tmp = scm_call_1 (p, seed);
983 }
984 return result;
985 }
986 #undef FUNC_NAME
987
988
989 SCM_DEFINE (scm_char_set_unfold_x, "char-set-unfold!", 5, 0, 0,
990 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
991 "This is a fundamental constructor for character sets.\n"
992 "@itemize @bullet\n"
993 "@item @var{g} is used to generate a series of ``seed'' values\n"
994 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
995 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
996 "@item @var{p} tells us when to stop -- when it returns true\n"
997 "when applied to one of the seed values.\n"
998 "@item @var{f} maps each seed value to a character. These\n"
999 "characters are added to the base character set @var{base_cs} to\n"
1000 "form the result; @var{base_cs} defaults to the empty set.\n"
1001 "@end itemize")
1002 #define FUNC_NAME s_scm_char_set_unfold_x
1003 {
1004 SCM tmp;
1005
1006 SCM_VALIDATE_PROC (1, p);
1007 SCM_VALIDATE_PROC (2, f);
1008 SCM_VALIDATE_PROC (3, g);
1009 SCM_VALIDATE_SMOB (5, base_cs, charset);
1010
1011 tmp = scm_call_1 (p, seed);
1012 while (scm_is_false (tmp))
1013 {
1014 SCM ch = scm_call_1 (f, seed);
1015 if (!SCM_CHARP (ch))
1016 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
1017 SCM_CHARSET_SET (base_cs, SCM_CHAR (ch));
1018
1019 seed = scm_call_1 (g, seed);
1020 tmp = scm_call_1 (p, seed);
1021 }
1022 return base_cs;
1023 }
1024 #undef FUNC_NAME
1025
1026
1027 SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0,
1028 (SCM proc, SCM cs),
1029 "Apply @var{proc} to every character in the character set\n"
1030 "@var{cs}. The return value is not specified.")
1031 #define FUNC_NAME s_scm_char_set_for_each
1032 {
1033 scm_t_char_set *cs_data;
1034 int k;
1035 scm_t_wchar n;
1036
1037 SCM_VALIDATE_PROC (1, proc);
1038 SCM_VALIDATE_SMOB (2, cs, charset);
1039
1040 cs_data = SCM_CHARSET_DATA (cs);
1041
1042 if (cs_data->len == 0)
1043 return SCM_UNSPECIFIED;
1044
1045 for (k = 0; k < cs_data->len; k++)
1046 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1047 {
1048 scm_call_1 (proc, SCM_MAKE_CHAR (n));
1049 }
1050
1051 return SCM_UNSPECIFIED;
1052 }
1053 #undef FUNC_NAME
1054
1055
1056 SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0,
1057 (SCM proc, SCM cs),
1058 "Map the procedure @var{proc} over every character in @var{cs}.\n"
1059 "@var{proc} must be a character -> character procedure.")
1060 #define FUNC_NAME s_scm_char_set_map
1061 {
1062 SCM result;
1063 int k;
1064 scm_t_char_set *cs_data;
1065 scm_t_wchar n;
1066
1067 SCM_VALIDATE_PROC (1, proc);
1068 SCM_VALIDATE_SMOB (2, cs, charset);
1069
1070 result = make_char_set (FUNC_NAME);
1071 cs_data = SCM_CHARSET_DATA (cs);
1072
1073 if (cs_data->len == 0)
1074 return result;
1075
1076 for (k = 0; k < cs_data->len; k++)
1077 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1078 {
1079 SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (n));
1080 if (!SCM_CHARP (ch))
1081 SCM_MISC_ERROR ("procedure ~S returned non-char",
1082 scm_list_1 (proc));
1083 SCM_CHARSET_SET (result, SCM_CHAR (ch));
1084 }
1085 return result;
1086 }
1087 #undef FUNC_NAME
1088
1089
1090 SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0,
1091 (SCM cs),
1092 "Return a newly allocated character set containing all\n"
1093 "characters in @var{cs}.")
1094 #define FUNC_NAME s_scm_char_set_copy
1095 {
1096 SCM ret;
1097 scm_t_char_set *p1, *p2;
1098
1099 SCM_VALIDATE_SMOB (1, cs, charset);
1100 ret = make_char_set (FUNC_NAME);
1101 p1 = SCM_CHARSET_DATA (cs);
1102 p2 = SCM_CHARSET_DATA (ret);
1103 p2->len = p1->len;
1104
1105 if (p1->len == 0)
1106 p2->ranges = NULL;
1107 else
1108 {
1109 p2->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * p1->len,
1110 "character-set");
1111 memcpy (p2->ranges, p1->ranges, sizeof (scm_t_char_range) * p1->len);
1112 }
1113
1114 return ret;
1115 }
1116 #undef FUNC_NAME
1117
1118
1119 SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1,
1120 (SCM rest),
1121 "Return a character set containing all given characters.")
1122 #define FUNC_NAME s_scm_char_set
1123 {
1124 SCM cs;
1125 int argnum = 1;
1126
1127 SCM_VALIDATE_REST_ARGUMENT (rest);
1128 cs = make_char_set (FUNC_NAME);
1129 while (!scm_is_null (rest))
1130 {
1131 scm_t_wchar c;
1132
1133 SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c);
1134 argnum++;
1135 rest = SCM_CDR (rest);
1136 SCM_CHARSET_SET (cs, c);
1137 }
1138 return cs;
1139 }
1140 #undef FUNC_NAME
1141
1142
1143 SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0,
1144 (SCM list, SCM base_cs),
1145 "Convert the character list @var{list} to a character set. If\n"
1146 "the character set @var{base_cs} is given, the character in this\n"
1147 "set are also included in the result.")
1148 #define FUNC_NAME s_scm_list_to_char_set
1149 {
1150 SCM cs;
1151
1152 SCM_VALIDATE_LIST (1, list);
1153 if (SCM_UNBNDP (base_cs))
1154 cs = make_char_set (FUNC_NAME);
1155 else
1156 {
1157 SCM_VALIDATE_SMOB (2, base_cs, charset);
1158 cs = scm_char_set_copy (base_cs);
1159 }
1160 while (!scm_is_null (list))
1161 {
1162 SCM chr = SCM_CAR (list);
1163 scm_t_wchar c;
1164
1165 SCM_VALIDATE_CHAR_COPY (0, chr, c);
1166 list = SCM_CDR (list);
1167
1168
1169 SCM_CHARSET_SET (cs, c);
1170 }
1171 return cs;
1172 }
1173 #undef FUNC_NAME
1174
1175
1176 SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0,
1177 (SCM list, SCM base_cs),
1178 "Convert the character list @var{list} to a character set. The\n"
1179 "characters are added to @var{base_cs} and @var{base_cs} is\n"
1180 "returned.")
1181 #define FUNC_NAME s_scm_list_to_char_set_x
1182 {
1183 SCM_VALIDATE_LIST (1, list);
1184 SCM_VALIDATE_SMOB (2, base_cs, charset);
1185 while (!scm_is_null (list))
1186 {
1187 SCM chr = SCM_CAR (list);
1188 scm_t_wchar c;
1189
1190 SCM_VALIDATE_CHAR_COPY (0, chr, c);
1191 list = SCM_CDR (list);
1192
1193 SCM_CHARSET_SET (base_cs, c);
1194 }
1195 return base_cs;
1196 }
1197 #undef FUNC_NAME
1198
1199
1200 SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0,
1201 (SCM str, SCM base_cs),
1202 "Convert the string @var{str} to a character set. If the\n"
1203 "character set @var{base_cs} is given, the characters in this\n"
1204 "set are also included in the result.")
1205 #define FUNC_NAME s_scm_string_to_char_set
1206 {
1207 SCM cs;
1208 size_t k = 0, len;
1209
1210 SCM_VALIDATE_STRING (1, str);
1211 if (SCM_UNBNDP (base_cs))
1212 cs = make_char_set (FUNC_NAME);
1213 else
1214 {
1215 SCM_VALIDATE_SMOB (2, base_cs, charset);
1216 cs = scm_char_set_copy (base_cs);
1217 }
1218 len = scm_i_string_length (str);
1219 while (k < len)
1220 {
1221 scm_t_wchar c = scm_i_string_ref (str, k++);
1222 SCM_CHARSET_SET (cs, c);
1223 }
1224 scm_remember_upto_here_1 (str);
1225 return cs;
1226 }
1227 #undef FUNC_NAME
1228
1229
1230 SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0,
1231 (SCM str, SCM base_cs),
1232 "Convert the string @var{str} to a character set. The\n"
1233 "characters from the string are added to @var{base_cs}, and\n"
1234 "@var{base_cs} is returned.")
1235 #define FUNC_NAME s_scm_string_to_char_set_x
1236 {
1237 size_t k = 0, len;
1238
1239 SCM_VALIDATE_STRING (1, str);
1240 SCM_VALIDATE_SMOB (2, base_cs, charset);
1241 len = scm_i_string_length (str);
1242 while (k < len)
1243 {
1244 scm_t_wchar c = scm_i_string_ref (str, k++);
1245 SCM_CHARSET_SET (base_cs, c);
1246 }
1247 scm_remember_upto_here_1 (str);
1248 return base_cs;
1249 }
1250 #undef FUNC_NAME
1251
1252
1253 SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0,
1254 (SCM pred, SCM cs, SCM base_cs),
1255 "Return a character set containing every character from @var{cs}\n"
1256 "so that it satisfies @var{pred}. If provided, the characters\n"
1257 "from @var{base_cs} are added to the result.")
1258 #define FUNC_NAME s_scm_char_set_filter
1259 {
1260 SCM ret;
1261 int k;
1262 scm_t_wchar n;
1263 scm_t_char_set *p;
1264
1265 SCM_VALIDATE_PROC (1, pred);
1266 SCM_VALIDATE_SMOB (2, cs, charset);
1267 if (!SCM_UNBNDP (base_cs))
1268 {
1269 SCM_VALIDATE_SMOB (3, base_cs, charset);
1270 ret = scm_char_set_copy (base_cs);
1271 }
1272 else
1273 ret = make_char_set (FUNC_NAME);
1274
1275 p = SCM_CHARSET_DATA (cs);
1276
1277 if (p->len == 0)
1278 return ret;
1279
1280 for (k = 0; k < p->len; k++)
1281 for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
1282 {
1283 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1284
1285 if (scm_is_true (res))
1286 SCM_CHARSET_SET (ret, n);
1287 }
1288 return ret;
1289 }
1290 #undef FUNC_NAME
1291
1292
1293 SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0,
1294 (SCM pred, SCM cs, SCM base_cs),
1295 "Return a character set containing every character from @var{cs}\n"
1296 "so that it satisfies @var{pred}. The characters are added to\n"
1297 "@var{base_cs} and @var{base_cs} is returned.")
1298 #define FUNC_NAME s_scm_char_set_filter_x
1299 {
1300 int k;
1301 scm_t_wchar n;
1302 scm_t_char_set *p;
1303
1304 SCM_VALIDATE_PROC (1, pred);
1305 SCM_VALIDATE_SMOB (2, cs, charset);
1306 SCM_VALIDATE_SMOB (3, base_cs, charset);
1307 p = SCM_CHARSET_DATA (cs);
1308 if (p->len == 0)
1309 return base_cs;
1310
1311 for (k = 0; k < p->len; k++)
1312 for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
1313 {
1314 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1315
1316 if (scm_is_true (res))
1317 SCM_CHARSET_SET (base_cs, n);
1318 }
1319 return base_cs;
1320 }
1321 #undef FUNC_NAME
1322
1323
1324 /* Return a character set containing all the characters from [LOWER,UPPER),
1325 giving range errors if ERROR, adding chars from BASE_CS, and recycling
1326 BASE_CS if REUSE is true. */
1327 static SCM
1328 scm_i_ucs_range_to_char_set (const char *FUNC_NAME, SCM lower, SCM upper,
1329 SCM error, SCM base_cs, int reuse)
1330 {
1331 SCM cs;
1332 size_t clower, cupper;
1333
1334 clower = scm_to_size_t (lower);
1335 cupper = scm_to_size_t (upper) - 1;
1336 SCM_ASSERT_RANGE (2, upper, cupper >= clower);
1337 if (!SCM_UNBNDP (error))
1338 {
1339 if (scm_is_true (error))
1340 {
1341 SCM_ASSERT_RANGE (1, lower, SCM_IS_UNICODE_CHAR (clower));
1342 SCM_ASSERT_RANGE (2, upper, SCM_IS_UNICODE_CHAR (cupper));
1343 if (clower < SCM_CODEPOINT_SURROGATE_START
1344 && cupper > SCM_CODEPOINT_SURROGATE_END)
1345 scm_error(scm_out_of_range_key,
1346 FUNC_NAME, "invalid range - contains surrogate characters: ~S to ~S",
1347 scm_list_2 (lower, upper), scm_list_1 (upper));
1348 }
1349 }
1350
1351 if (SCM_UNBNDP (base_cs))
1352 cs = make_char_set (FUNC_NAME);
1353 else
1354 {
1355 SCM_VALIDATE_SMOB (4, base_cs, charset);
1356 if (reuse)
1357 cs = base_cs;
1358 else
1359 cs = scm_char_set_copy (base_cs);
1360 }
1361
1362 if ((clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END)
1363 && (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END))
1364 return cs;
1365
1366 if (clower > SCM_CODEPOINT_MAX)
1367 clower = SCM_CODEPOINT_MAX;
1368 if (clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END)
1369 clower = SCM_CODEPOINT_SURROGATE_END + 1;
1370 if (cupper > SCM_CODEPOINT_MAX)
1371 cupper = SCM_CODEPOINT_MAX;
1372 if (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END)
1373 cupper = SCM_CODEPOINT_SURROGATE_START - 1;
1374 if (clower < SCM_CODEPOINT_SURROGATE_START && cupper > SCM_CODEPOINT_SURROGATE_END)
1375 {
1376 scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, SCM_CODEPOINT_SURROGATE_START - 1);
1377 scm_i_charset_set_range (SCM_CHARSET_DATA (cs), SCM_CODEPOINT_SURROGATE_END + 1, cupper);
1378 }
1379 else
1380 scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, cupper);
1381 return cs;
1382 }
1383
1384 SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0,
1385 (SCM lower, SCM upper, SCM error, SCM base_cs),
1386 "Return a character set containing all characters whose\n"
1387 "character codes lie in the half-open range\n"
1388 "[@var{lower},@var{upper}).\n"
1389 "\n"
1390 "If @var{error} is a true value, an error is signalled if the\n"
1391 "specified range contains characters which are not valid\n"
1392 "Unicode code points. If @var{error} is @code{#f},\n"
1393 "these characters are silently left out of the resultung\n"
1394 "character set.\n"
1395 "\n"
1396 "The characters in @var{base_cs} are added to the result, if\n"
1397 "given.")
1398 #define FUNC_NAME s_scm_ucs_range_to_char_set
1399 {
1400 return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper,
1401 error, base_cs, 0);
1402 }
1403 #undef FUNC_NAME
1404
1405
1406 SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0,
1407 (SCM lower, SCM upper, SCM error, SCM base_cs),
1408 "Return a character set containing all characters whose\n"
1409 "character codes lie in the half-open range\n"
1410 "[@var{lower},@var{upper}).\n"
1411 "\n"
1412 "If @var{error} is a true value, an error is signalled if the\n"
1413 "specified range contains characters which are not contained in\n"
1414 "the implemented character range. If @var{error} is @code{#f},\n"
1415 "these characters are silently left out of the resultung\n"
1416 "character set.\n"
1417 "\n"
1418 "The characters are added to @var{base_cs} and @var{base_cs} is\n"
1419 "returned.")
1420 #define FUNC_NAME s_scm_ucs_range_to_char_set_x
1421 {
1422 SCM_VALIDATE_SMOB (4, base_cs, charset);
1423 return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper,
1424 error, base_cs, 1);
1425 }
1426 #undef FUNC_NAME
1427
1428 SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0,
1429 (SCM x),
1430 "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.")
1431 #define FUNC_NAME s_scm_to_char_set
1432 {
1433 if (scm_is_string (x))
1434 return scm_string_to_char_set (x, SCM_UNDEFINED);
1435 else if (SCM_CHARP (x))
1436 return scm_char_set (scm_list_1 (x));
1437 else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x))
1438 return x;
1439 else
1440 scm_wrong_type_arg (NULL, 0, x);
1441 }
1442 #undef FUNC_NAME
1443
1444 SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0,
1445 (SCM cs),
1446 "Return the number of elements in character set @var{cs}.")
1447 #define FUNC_NAME s_scm_char_set_size
1448 {
1449 int k, count = 0;
1450 scm_t_char_set *cs_data;
1451
1452 SCM_VALIDATE_SMOB (1, cs, charset);
1453 cs_data = SCM_CHARSET_DATA (cs);
1454
1455 if (cs_data->len == 0)
1456 return scm_from_int (0);
1457
1458 for (k = 0; k < cs_data->len; k++)
1459 count += cs_data->ranges[k].hi - cs_data->ranges[k].lo + 1;
1460
1461 return scm_from_int (count);
1462 }
1463 #undef FUNC_NAME
1464
1465
1466 SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0,
1467 (SCM pred, SCM cs),
1468 "Return the number of the elements int the character set\n"
1469 "@var{cs} which satisfy the predicate @var{pred}.")
1470 #define FUNC_NAME s_scm_char_set_count
1471 {
1472 int k, count = 0;
1473 scm_t_wchar n;
1474 scm_t_char_set *cs_data;
1475
1476 SCM_VALIDATE_PROC (1, pred);
1477 SCM_VALIDATE_SMOB (2, cs, charset);
1478 cs_data = SCM_CHARSET_DATA (cs);
1479 if (cs_data->len == 0)
1480 return scm_from_int (0);
1481
1482 for (k = 0; k < cs_data->len; k++)
1483 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1484 {
1485 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1486 if (scm_is_true (res))
1487 count++;
1488 }
1489 return SCM_I_MAKINUM (count);
1490 }
1491 #undef FUNC_NAME
1492
1493
1494 SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0,
1495 (SCM cs),
1496 "Return a list containing the elements of the character set\n"
1497 "@var{cs}.")
1498 #define FUNC_NAME s_scm_char_set_to_list
1499 {
1500 int k;
1501 scm_t_wchar n;
1502 SCM result = SCM_EOL;
1503 scm_t_char_set *p;
1504
1505 SCM_VALIDATE_SMOB (1, cs, charset);
1506 p = SCM_CHARSET_DATA (cs);
1507 if (p->len == 0)
1508 return SCM_EOL;
1509
1510 for (k = p->len - 1; k >= 0; k--)
1511 for (n = p->ranges[k].hi; n >= p->ranges[k].lo; n--)
1512 result = scm_cons (SCM_MAKE_CHAR (n), result);
1513 return result;
1514 }
1515 #undef FUNC_NAME
1516
1517
1518 SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0,
1519 (SCM cs),
1520 "Return a string containing the elements of the character set\n"
1521 "@var{cs}. The order in which the characters are placed in the\n"
1522 "string is not defined.")
1523 #define FUNC_NAME s_scm_char_set_to_string
1524 {
1525 int k;
1526 int count = 0;
1527 int idx = 0;
1528 int wide = 0;
1529 SCM result;
1530 scm_t_wchar n;
1531 scm_t_char_set *cs_data;
1532 char *buf;
1533 scm_t_wchar *wbuf;
1534
1535 SCM_VALIDATE_SMOB (1, cs, charset);
1536 cs_data = SCM_CHARSET_DATA (cs);
1537 if (cs_data->len == 0)
1538 return scm_nullstr;
1539
1540 if (cs_data->ranges[cs_data->len - 1].hi > 255)
1541 wide = 1;
1542
1543 count = scm_to_int (scm_char_set_size (cs));
1544 if (wide)
1545 result = scm_i_make_wide_string (count, &wbuf);
1546 else
1547 result = scm_i_make_string (count, &buf);
1548
1549 for (k = 0; k < cs_data->len; k++)
1550 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1551 {
1552 if (wide)
1553 wbuf[idx++] = n;
1554 else
1555 buf[idx++] = n;
1556 }
1557 return result;
1558 }
1559 #undef FUNC_NAME
1560
1561
1562 SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0,
1563 (SCM cs, SCM ch),
1564 "Return @code{#t} iff the character @var{ch} is contained in the\n"
1565 "character set @var{cs}.")
1566 #define FUNC_NAME s_scm_char_set_contains_p
1567 {
1568 SCM_VALIDATE_SMOB (1, cs, charset);
1569 SCM_VALIDATE_CHAR (2, ch);
1570 return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch)));
1571 }
1572 #undef FUNC_NAME
1573
1574
1575 SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0,
1576 (SCM pred, SCM cs),
1577 "Return a true value if every character in the character set\n"
1578 "@var{cs} satisfies the predicate @var{pred}.")
1579 #define FUNC_NAME s_scm_char_set_every
1580 {
1581 int k;
1582 scm_t_wchar n;
1583 SCM res = SCM_BOOL_T;
1584 scm_t_char_set *cs_data;
1585
1586 SCM_VALIDATE_PROC (1, pred);
1587 SCM_VALIDATE_SMOB (2, cs, charset);
1588
1589 cs_data = SCM_CHARSET_DATA (cs);
1590 if (cs_data->len == 0)
1591 return SCM_BOOL_T;
1592
1593 for (k = 0; k < cs_data->len; k++)
1594 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1595 {
1596 res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1597 if (scm_is_false (res))
1598 return res;
1599 }
1600 return SCM_BOOL_T;
1601 }
1602 #undef FUNC_NAME
1603
1604
1605 SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0,
1606 (SCM pred, SCM cs),
1607 "Return a true value if any character in the character set\n"
1608 "@var{cs} satisfies the predicate @var{pred}.")
1609 #define FUNC_NAME s_scm_char_set_any
1610 {
1611 int k;
1612 scm_t_wchar n;
1613 scm_t_char_set *cs_data;
1614
1615 SCM_VALIDATE_PROC (1, pred);
1616 SCM_VALIDATE_SMOB (2, cs, charset);
1617
1618 cs_data = SCM_CHARSET_DATA (cs);
1619 if (cs_data->len == 0)
1620 return SCM_BOOL_T;
1621
1622 for (k = 0; k < cs_data->len; k++)
1623 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1624 {
1625 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1626 if (scm_is_true (res))
1627 return res;
1628 }
1629 return SCM_BOOL_F;
1630 }
1631 #undef FUNC_NAME
1632
1633
1634 SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1,
1635 (SCM cs, SCM rest),
1636 "Add all character arguments to the first argument, which must\n"
1637 "be a character set.")
1638 #define FUNC_NAME s_scm_char_set_adjoin
1639 {
1640 SCM_VALIDATE_SMOB (1, cs, charset);
1641 SCM_VALIDATE_REST_ARGUMENT (rest);
1642 cs = scm_char_set_copy (cs);
1643
1644 while (!scm_is_null (rest))
1645 {
1646 SCM chr = SCM_CAR (rest);
1647 scm_t_wchar c;
1648
1649 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1650 rest = SCM_CDR (rest);
1651
1652 SCM_CHARSET_SET (cs, c);
1653 }
1654 return cs;
1655 }
1656 #undef FUNC_NAME
1657
1658
1659 SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1,
1660 (SCM cs, SCM rest),
1661 "Delete all character arguments from the first argument, which\n"
1662 "must be a character set.")
1663 #define FUNC_NAME s_scm_char_set_delete
1664 {
1665 SCM_VALIDATE_SMOB (1, cs, charset);
1666 SCM_VALIDATE_REST_ARGUMENT (rest);
1667 cs = scm_char_set_copy (cs);
1668
1669 while (!scm_is_null (rest))
1670 {
1671 SCM chr = SCM_CAR (rest);
1672 scm_t_wchar c;
1673
1674 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1675 rest = SCM_CDR (rest);
1676
1677 SCM_CHARSET_UNSET (cs, c);
1678 }
1679 return cs;
1680 }
1681 #undef FUNC_NAME
1682
1683
1684 SCM_DEFINE (scm_char_set_adjoin_x, "char-set-adjoin!", 1, 0, 1,
1685 (SCM cs, SCM rest),
1686 "Add all character arguments to the first argument, which must\n"
1687 "be a character set.")
1688 #define FUNC_NAME s_scm_char_set_adjoin_x
1689 {
1690 SCM_VALIDATE_SMOB (1, cs, charset);
1691 SCM_VALIDATE_REST_ARGUMENT (rest);
1692
1693 while (!scm_is_null (rest))
1694 {
1695 SCM chr = SCM_CAR (rest);
1696 scm_t_wchar c;
1697
1698 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1699 rest = SCM_CDR (rest);
1700
1701 SCM_CHARSET_SET (cs, c);
1702 }
1703 return cs;
1704 }
1705 #undef FUNC_NAME
1706
1707
1708 SCM_DEFINE (scm_char_set_delete_x, "char-set-delete!", 1, 0, 1,
1709 (SCM cs, SCM rest),
1710 "Delete all character arguments from the first argument, which\n"
1711 "must be a character set.")
1712 #define FUNC_NAME s_scm_char_set_delete_x
1713 {
1714 SCM_VALIDATE_SMOB (1, cs, charset);
1715 SCM_VALIDATE_REST_ARGUMENT (rest);
1716
1717 while (!scm_is_null (rest))
1718 {
1719 SCM chr = SCM_CAR (rest);
1720 scm_t_wchar c;
1721
1722 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1723 rest = SCM_CDR (rest);
1724
1725 SCM_CHARSET_UNSET (cs, c);
1726 }
1727 return cs;
1728 }
1729 #undef FUNC_NAME
1730
1731
1732 SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0,
1733 (SCM cs), "Return the complement of the character set @var{cs}.")
1734 #define FUNC_NAME s_scm_char_set_complement
1735 {
1736 SCM res;
1737 scm_t_char_set *p, *q;
1738
1739 SCM_VALIDATE_SMOB (1, cs, charset);
1740
1741 res = make_char_set (FUNC_NAME);
1742 p = SCM_CHARSET_DATA (res);
1743 q = SCM_CHARSET_DATA (cs);
1744
1745 charsets_complement (p, q);
1746 return res;
1747 }
1748 #undef FUNC_NAME
1749
1750
1751 SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1,
1752 (SCM rest),
1753 "Return the union of all argument character sets.")
1754 #define FUNC_NAME s_scm_char_set_union
1755 {
1756 int c = 1;
1757 SCM res;
1758 scm_t_char_set *p;
1759
1760 SCM_VALIDATE_REST_ARGUMENT (rest);
1761
1762 res = make_char_set (FUNC_NAME);
1763 p = SCM_CHARSET_DATA (res);
1764 while (!scm_is_null (rest))
1765 {
1766 SCM cs = SCM_CAR (rest);
1767 SCM_VALIDATE_SMOB (c, cs, charset);
1768 c++;
1769 rest = SCM_CDR (rest);
1770
1771
1772 charsets_union (p, (scm_t_char_set *) SCM_SMOB_DATA (cs));
1773 }
1774 return res;
1775 }
1776 #undef FUNC_NAME
1777
1778
1779 SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1,
1780 (SCM rest),
1781 "Return the intersection of all argument character sets.")
1782 #define FUNC_NAME s_scm_char_set_intersection
1783 {
1784 SCM res;
1785
1786 SCM_VALIDATE_REST_ARGUMENT (rest);
1787
1788 if (scm_is_null (rest))
1789 res = make_char_set (FUNC_NAME);
1790 else
1791 {
1792 scm_t_char_set *p;
1793 int argnum = 2;
1794
1795 res = scm_char_set_copy (SCM_CAR (rest));
1796 p = SCM_CHARSET_DATA (res);
1797 rest = SCM_CDR (rest);
1798
1799 while (scm_is_pair (rest))
1800 {
1801 SCM cs = SCM_CAR (rest);
1802 scm_t_char_set *cs_data;
1803
1804 SCM_VALIDATE_SMOB (argnum, cs, charset);
1805 argnum++;
1806 cs_data = SCM_CHARSET_DATA (cs);
1807 rest = SCM_CDR (rest);
1808 charsets_intersection (p, cs_data);
1809 }
1810 }
1811
1812 return res;
1813 }
1814 #undef FUNC_NAME
1815
1816
1817 SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1,
1818 (SCM cs1, SCM rest),
1819 "Return the difference of all argument character sets.")
1820 #define FUNC_NAME s_scm_char_set_difference
1821 {
1822 int c = 2;
1823 SCM res, compl;
1824 scm_t_char_set *p, *q;
1825
1826 SCM_VALIDATE_SMOB (1, cs1, charset);
1827 SCM_VALIDATE_REST_ARGUMENT (rest);
1828
1829 res = scm_char_set_copy (cs1);
1830 p = SCM_CHARSET_DATA (res);
1831 compl = make_char_set (FUNC_NAME);
1832 q = SCM_CHARSET_DATA (compl);
1833 while (!scm_is_null (rest))
1834 {
1835 SCM cs = SCM_CAR (rest);
1836 SCM_VALIDATE_SMOB (c, cs, charset);
1837 c++;
1838 rest = SCM_CDR (rest);
1839
1840 charsets_complement (q, SCM_CHARSET_DATA (cs));
1841 charsets_intersection (p, q);
1842 }
1843 return res;
1844 }
1845 #undef FUNC_NAME
1846
1847
1848 SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1,
1849 (SCM rest),
1850 "Return the exclusive-or of all argument character sets.")
1851 #define FUNC_NAME s_scm_char_set_xor
1852 {
1853 SCM res;
1854
1855 SCM_VALIDATE_REST_ARGUMENT (rest);
1856
1857 if (scm_is_null (rest))
1858 res = make_char_set (FUNC_NAME);
1859 else
1860 {
1861 int argnum = 2;
1862 scm_t_char_set *p;
1863
1864 res = scm_char_set_copy (SCM_CAR (rest));
1865 p = SCM_CHARSET_DATA (res);
1866 rest = SCM_CDR (rest);
1867
1868 while (scm_is_pair (rest))
1869 {
1870 SCM cs = SCM_CAR (rest);
1871 scm_t_char_set *cs_data;
1872
1873 SCM_VALIDATE_SMOB (argnum, cs, charset);
1874 argnum++;
1875 cs_data = SCM_CHARSET_DATA (cs);
1876 rest = SCM_CDR (rest);
1877
1878 charsets_xor (p, cs_data);
1879 }
1880 }
1881 return res;
1882 }
1883 #undef FUNC_NAME
1884
1885
1886 SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1, 0, 1,
1887 (SCM cs1, SCM rest),
1888 "Return the difference and the intersection of all argument\n"
1889 "character sets.")
1890 #define FUNC_NAME s_scm_char_set_diff_plus_intersection
1891 {
1892 int c = 2;
1893 SCM res1, res2;
1894 scm_t_char_set *p, *q;
1895
1896 SCM_VALIDATE_SMOB (1, cs1, charset);
1897 SCM_VALIDATE_REST_ARGUMENT (rest);
1898
1899 res1 = scm_char_set_copy (cs1);
1900 res2 = make_char_set (FUNC_NAME);
1901 p = SCM_CHARSET_DATA (res1);
1902 q = SCM_CHARSET_DATA (res2);
1903 while (!scm_is_null (rest))
1904 {
1905 SCM cs = SCM_CAR (rest);
1906 scm_t_char_set *r;
1907
1908 SCM_VALIDATE_SMOB (c, cs, charset);
1909 c++;
1910 r = SCM_CHARSET_DATA (cs);
1911
1912 charsets_union (q, r);
1913 charsets_intersection (p, r);
1914 rest = SCM_CDR (rest);
1915 }
1916 return scm_values (scm_list_2 (res1, res2));
1917 }
1918 #undef FUNC_NAME
1919
1920
1921 SCM_DEFINE (scm_char_set_complement_x, "char-set-complement!", 1, 0, 0,
1922 (SCM cs), "Return the complement of the character set @var{cs}.")
1923 #define FUNC_NAME s_scm_char_set_complement_x
1924 {
1925 SCM_VALIDATE_SMOB (1, cs, charset);
1926 cs = scm_char_set_complement (cs);
1927 return cs;
1928 }
1929 #undef FUNC_NAME
1930
1931
1932 SCM_DEFINE (scm_char_set_union_x, "char-set-union!", 1, 0, 1,
1933 (SCM cs1, SCM rest),
1934 "Return the union of all argument character sets.")
1935 #define FUNC_NAME s_scm_char_set_union_x
1936 {
1937 SCM_VALIDATE_SMOB (1, cs1, charset);
1938 SCM_VALIDATE_REST_ARGUMENT (rest);
1939
1940 cs1 = scm_char_set_union (scm_cons (cs1, rest));
1941 return cs1;
1942 }
1943 #undef FUNC_NAME
1944
1945
1946 SCM_DEFINE (scm_char_set_intersection_x, "char-set-intersection!", 1, 0, 1,
1947 (SCM cs1, SCM rest),
1948 "Return the intersection of all argument character sets.")
1949 #define FUNC_NAME s_scm_char_set_intersection_x
1950 {
1951 SCM_VALIDATE_SMOB (1, cs1, charset);
1952 SCM_VALIDATE_REST_ARGUMENT (rest);
1953
1954 cs1 = scm_char_set_intersection (scm_cons (cs1, rest));
1955 return cs1;
1956 }
1957 #undef FUNC_NAME
1958
1959
1960 SCM_DEFINE (scm_char_set_difference_x, "char-set-difference!", 1, 0, 1,
1961 (SCM cs1, SCM rest),
1962 "Return the difference of all argument character sets.")
1963 #define FUNC_NAME s_scm_char_set_difference_x
1964 {
1965 SCM_VALIDATE_SMOB (1, cs1, charset);
1966 SCM_VALIDATE_REST_ARGUMENT (rest);
1967
1968 cs1 = scm_char_set_difference (cs1, rest);
1969 return cs1;
1970 }
1971 #undef FUNC_NAME
1972
1973
1974 SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1,
1975 (SCM cs1, SCM rest),
1976 "Return the exclusive-or of all argument character sets.")
1977 #define FUNC_NAME s_scm_char_set_xor_x
1978 {
1979 /* a side-effecting variant should presumably give consistent results:
1980 (define a (char-set #\a))
1981 (char-set-xor a a a) -> char set #\a
1982 (char-set-xor! a a a) -> char set #\a
1983 */
1984 cs1 = scm_char_set_xor (scm_cons (cs1, rest));
1985 return cs1;
1986 }
1987 #undef FUNC_NAME
1988
1989
1990 SCM_DEFINE (scm_char_set_diff_plus_intersection_x,
1991 "char-set-diff+intersection!", 2, 0, 1, (SCM cs1, SCM cs2,
1992 SCM rest),
1993 "Return the difference and the intersection of all argument\n"
1994 "character sets.")
1995 #define FUNC_NAME s_scm_char_set_diff_plus_intersection_x
1996 {
1997 SCM diff, intersect;
1998
1999 diff = scm_char_set_difference (cs1, scm_cons (cs2, rest));
2000 intersect =
2001 scm_char_set_intersection (scm_cons (cs1, scm_cons (cs2, rest)));
2002 cs1 = diff;
2003 cs2 = intersect;
2004 return scm_values (scm_list_2 (cs1, cs2));
2005 }
2006 #undef FUNC_NAME
2007
2008 \f
2009
2010 /* Standard character sets. */
2011
2012 SCM scm_char_set_lower_case;
2013 SCM scm_char_set_upper_case;
2014 SCM scm_char_set_title_case;
2015 SCM scm_char_set_letter;
2016 SCM scm_char_set_digit;
2017 SCM scm_char_set_letter_and_digit;
2018 SCM scm_char_set_graphic;
2019 SCM scm_char_set_printing;
2020 SCM scm_char_set_whitespace;
2021 SCM scm_char_set_iso_control;
2022 SCM scm_char_set_punctuation;
2023 SCM scm_char_set_symbol;
2024 SCM scm_char_set_hex_digit;
2025 SCM scm_char_set_blank;
2026 SCM scm_char_set_ascii;
2027 SCM scm_char_set_empty;
2028 SCM scm_char_set_full;
2029
2030
2031 /* Create an empty character set and return it after binding it to NAME. */
2032 static inline SCM
2033 define_charset (const char *name, const scm_t_char_set *p)
2034 {
2035 SCM cs;
2036
2037 SCM_NEWSMOB (cs, scm_tc16_charset, p);
2038 scm_c_define (name, cs);
2039 return scm_permanent_object (cs);
2040 }
2041
2042 #ifdef SCM_CHARSET_DEBUG
2043 SCM_DEFINE (scm_debug_char_set, "debug-char-set", 1, 0, 0,
2044 (SCM charset),
2045 "Print out the internal C structure of @var{charset}.\n")
2046 #define FUNC_NAME s_scm_debug_char_set
2047 {
2048 int i;
2049 scm_t_char_set *cs = SCM_CHARSET_DATA (charset);
2050 fprintf (stderr, "cs %p\n", cs);
2051 fprintf (stderr, "len %d\n", cs->len);
2052 fprintf (stderr, "arr %p\n", cs->ranges);
2053 for (i = 0; i < cs->len; i++)
2054 {
2055 if (cs->ranges[i].lo == cs->ranges[i].hi)
2056 fprintf (stderr, "%04x\n", cs->ranges[i].lo);
2057 else
2058 fprintf (stderr, "%04x..%04x\t[%d]\n",
2059 cs->ranges[i].lo,
2060 cs->ranges[i].hi, cs->ranges[i].hi - cs->ranges[i].lo + 1);
2061 }
2062 printf ("\n");
2063 return SCM_UNSPECIFIED;
2064 }
2065 #undef FUNC_NAME
2066 #endif /* SCM_CHARSET_DEBUG */
2067 \f
2068
2069
2070 void
2071 scm_init_srfi_14 (void)
2072 {
2073 scm_tc16_charset = scm_make_smob_type ("character-set", 0);
2074 scm_set_smob_free (scm_tc16_charset, charset_free);
2075 scm_set_smob_print (scm_tc16_charset, charset_print);
2076
2077 scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0);
2078 scm_set_smob_free (scm_tc16_charset_cursor, charset_cursor_free);
2079 scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print);
2080
2081 scm_char_set_upper_case =
2082 define_charset ("char-set:upper-case", &cs_upper_case);
2083 scm_char_set_lower_case =
2084 define_charset ("char-set:lower-case", &cs_lower_case);
2085 scm_char_set_title_case =
2086 define_charset ("char-set:title-case", &cs_title_case);
2087 scm_char_set_letter = define_charset ("char-set:letter", &cs_letter);
2088 scm_char_set_digit = define_charset ("char-set:digit", &cs_digit);
2089 scm_char_set_letter_and_digit =
2090 define_charset ("char-set:letter+digit", &cs_letter_plus_digit);
2091 scm_char_set_graphic = define_charset ("char-set:graphic", &cs_graphic);
2092 scm_char_set_printing = define_charset ("char-set:printing", &cs_printing);
2093 scm_char_set_whitespace =
2094 define_charset ("char-set:whitespace", &cs_whitespace);
2095 scm_char_set_iso_control =
2096 define_charset ("char-set:iso-control", &cs_iso_control);
2097 scm_char_set_punctuation =
2098 define_charset ("char-set:punctuation", &cs_punctuation);
2099 scm_char_set_symbol = define_charset ("char-set:symbol", &cs_symbol);
2100 scm_char_set_hex_digit =
2101 define_charset ("char-set:hex-digit", &cs_hex_digit);
2102 scm_char_set_blank = define_charset ("char-set:blank", &cs_blank);
2103 scm_char_set_ascii = define_charset ("char-set:ascii", &cs_ascii);
2104 scm_char_set_empty = define_charset ("char-set:empty", &cs_empty);
2105 scm_char_set_full = define_charset ("char-set:full", &cs_full);
2106
2107 #include "libguile/srfi-14.x"
2108 }
2109
2110 /* End of srfi-14.c. */