char-set-xor! should modify the input parameter
[bpt/guile.git] / libguile / srfi-14.c
CommitLineData
63181a97
MV
1/* srfi-14.c --- SRFI-14 procedures for Guile
2 *
6caac03c 3 * Copyright (C) 2001, 2004, 2006, 2007 Free Software Foundation, Inc.
63181a97
MV
4 *
5 * This library is free software; you can redistribute it and/or
53befeb7
NJ
6 * modify it under the terms of the GNU Lesser General Public License
7 * as published by the Free Software Foundation; either version 3 of
8 * the License, or (at your option) any later version.
63181a97 9 *
53befeb7
NJ
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
63181a97
MV
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
53befeb7
NJ
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301 USA
63181a97
MV
19 */
20
a17d2654
LC
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
24
25
63181a97 26#include <string.h>
f49dbcad 27#include <unictype.h>
63181a97
MV
28
29#include "libguile.h"
30#include "libguile/srfi-14.h"
f49dbcad 31#include "libguile/strings.h"
24d23822 32#include "libguile/chars.h"
63181a97 33
f49dbcad
MG
34/* Include the pre-computed standard charset data. */
35#include "libguile/srfi-14.i.c"
63181a97 36
f49dbcad 37#define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset))
63181a97 38
f49dbcad
MG
39#define SCM_CHARSET_SET(cs, idx) \
40 scm_i_charset_set (SCM_CHARSET_DATA (cs), idx)
63181a97 41
f49dbcad
MG
42#define SCM_CHARSET_UNSET(cs, idx) \
43 scm_i_charset_unset (SCM_CHARSET_DATA (cs), idx)
63181a97
MV
44
45/* Smob type code for character sets. */
46int scm_tc16_charset = 0;
f49dbcad
MG
47int scm_tc16_charset_cursor = 0;
48
49/* True if N exists in charset CS. */
50int
51scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n)
52{
53 size_t i;
54
55 i = 0;
56 while (i < cs->len)
57 {
58 if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi)
59 return 1;
60 i++;
61 }
62
63 return 0;
64}
65
66/* Put N into charset CS. */
67void
68scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n)
69{
70 size_t i;
71 size_t len;
72
73 len = cs->len;
74
75 i = 0;
76 while (i < len)
77 {
78 /* Already in this range */
79 if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi)
80 {
81 return;
82 }
83
84 if (n == cs->ranges[i].lo - 1)
85 {
86 /* This char is one below the current range. */
87 if (i > 0 && cs->ranges[i - 1].hi + 1 == n)
88 {
89 /* It is also one above the previous range, so combine them. */
90 cs->ranges[i - 1].hi = cs->ranges[i].hi;
91 if (i < len - 1)
92 memmove (cs->ranges + i, cs->ranges + (i + 1),
93 sizeof (scm_t_char_range) * (len - i - 1));
94 cs->ranges = scm_gc_realloc (cs->ranges,
95 sizeof (scm_t_char_range) * len,
96 sizeof (scm_t_char_range) * (len -
97 1),
98 "character-set");
99 cs->len = len - 1;
100 return;
101 }
102 else
103 {
104 /* Expand the range down by one. */
105 cs->ranges[i].lo = n;
106 return;
107 }
108 }
109 else if (n == cs->ranges[i].hi + 1)
110 {
111 /* This char is one above the current range. */
112 if (i < len - 1 && cs->ranges[i + 1].lo - 1 == n)
113 {
114 /* It is also one below the next range, so combine them. */
115 cs->ranges[i].hi = cs->ranges[i + 1].hi;
116 if (i < len - 2)
117 memmove (cs->ranges + (i + 1), cs->ranges + (i + 2),
118 sizeof (scm_t_char_range) * (len - i - 2));
119 cs->ranges = scm_gc_realloc (cs->ranges,
120 sizeof (scm_t_char_range) * len,
121 sizeof (scm_t_char_range) * (len -
122 1),
123 "character-set");
124 cs->len = len - 1;
125 return;
126 }
127 else
128 {
129 /* Expand the range up by one. */
130 cs->ranges[i].hi = n;
131 return;
132 }
133 }
134 else if (n < cs->ranges[i].lo - 1)
135 {
136 /* This is a new range below the current one. */
137 cs->ranges = scm_gc_realloc (cs->ranges,
138 sizeof (scm_t_char_range) * len,
139 sizeof (scm_t_char_range) * (len + 1),
140 "character-set");
141 memmove (cs->ranges + (i + 1), cs->ranges + i,
142 sizeof (scm_t_char_range) * (len - i));
143 cs->ranges[i].lo = n;
144 cs->ranges[i].hi = n;
145 cs->len = len + 1;
146 return;
147 }
148
149 i++;
150 }
151
152 /* This is a new range above all previous ranges. */
153 if (len == 0)
154 {
155 cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set");
156 }
157 else
158 {
159 cs->ranges = scm_gc_realloc (cs->ranges,
160 sizeof (scm_t_char_range) * len,
161 sizeof (scm_t_char_range) * (len + 1),
162 "character-set");
163 }
164 cs->ranges[len].lo = n;
165 cs->ranges[len].hi = n;
166 cs->len = len + 1;
167
168 return;
169}
170
171/* If N is in charset CS, remove it. */
172void
173scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n)
174{
175 size_t i;
176 size_t len;
177
178 len = cs->len;
179
180 i = 0;
181 while (i < len)
182 {
183 if (n < cs->ranges[i].lo)
184 /* Not in this set. */
185 return;
186
187 if (n == cs->ranges[i].lo && n == cs->ranges[i].hi)
188 {
189 /* Remove this one-character range. */
190 if (len == 1)
191 {
192 scm_gc_free (cs->ranges,
193 sizeof (scm_t_char_range) * cs->len,
194 "character-set");
195 cs->ranges = NULL;
196 cs->len = 0;
197 return;
198 }
199 else if (i < len - 1)
200 {
201 memmove (cs->ranges + i, cs->ranges + (i + 1),
202 sizeof (scm_t_char_range) * (len - i - 1));
203 cs->ranges = scm_gc_realloc (cs->ranges,
204 sizeof (scm_t_char_range) * len,
205 sizeof (scm_t_char_range) * (len -
206 1),
207 "character-set");
208 cs->len = len - 1;
209 return;
210 }
211 else if (i == len - 1)
212 {
213 cs->ranges = scm_gc_realloc (cs->ranges,
214 sizeof (scm_t_char_range) * len,
215 sizeof (scm_t_char_range) * (len -
216 1),
217 "character-set");
218 cs->len = len - 1;
219 return;
220 }
221 }
222 else if (n == cs->ranges[i].lo)
223 {
224 /* Shrink this range from the left. */
225 cs->ranges[i].lo = n + 1;
226 return;
227 }
228 else if (n == cs->ranges[i].hi)
229 {
230 /* Shrink this range from the right. */
231 cs->ranges[i].hi = n - 1;
232 return;
233 }
234 else if (n > cs->ranges[i].lo && n < cs->ranges[i].hi)
235 {
236 /* Split this range into two pieces. */
237 cs->ranges = scm_gc_realloc (cs->ranges,
238 sizeof (scm_t_char_range) * len,
239 sizeof (scm_t_char_range) * (len + 1),
240 "character-set");
241 if (i < len - 1)
242 memmove (cs->ranges + (i + 2), cs->ranges + (i + 1),
243 sizeof (scm_t_char_range) * (len - i - 1));
244 cs->ranges[i + 1].hi = cs->ranges[i].hi;
245 cs->ranges[i + 1].lo = n + 1;
246 cs->ranges[i].hi = n - 1;
247 cs->len = len + 1;
248 return;
249 }
250
251 i++;
252 }
253
254 /* This value is above all ranges, so do nothing here. */
255 return;
256}
257
258static int
259charsets_equal (scm_t_char_set *a, scm_t_char_set *b)
260{
261 if (a->len != b->len)
262 return 0;
263
264 if (memcmp (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len) != 0)
265 return 0;
266
267 return 1;
268}
269
270/* Return true if every character in A is also in B. */
271static int
272charsets_leq (scm_t_char_set *a, scm_t_char_set *b)
273{
274 size_t i = 0, j = 0;
275 scm_t_wchar alo, ahi;
276
277 if (a->len == 0)
278 return 1;
279 if (b->len == 0)
280 return 0;
281 while (i < a->len)
282 {
283 alo = a->ranges[i].lo;
284 ahi = a->ranges[i].hi;
285 while (b->ranges[j].hi < alo)
286 {
287 if (j < b->len - 1)
288 j++;
289 else
290 return 0;
291 }
292 if (alo < b->ranges[j].lo || ahi > b->ranges[j].hi)
293 return 0;
294 i++;
295 }
296
297 return 1;
298}
299
300/* Merge B into A. */
301static void
302charsets_union (scm_t_char_set *a, scm_t_char_set *b)
303{
304 size_t i = 0;
305 scm_t_wchar blo, bhi, n;
306
307 if (b->len == 0)
308 return;
309
310 if (a->len == 0)
311 {
312 a->len = b->len;
313 a->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * b->len,
314 "character-set");
315 memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * b->len);
316 return;
317 }
318
319 /* This needs optimization. */
320 while (i < b->len)
321 {
322 blo = b->ranges[i].lo;
323 bhi = b->ranges[i].hi;
324 for (n = blo; n <= bhi; n++)
325 scm_i_charset_set (a, n);
326
327 i++;
328 }
329
330 return;
331}
332
333/* Remove elements not both in A and B from A. */
334static void
335charsets_intersection (scm_t_char_set *a, scm_t_char_set *b)
336{
337 size_t i = 0;
338 scm_t_wchar blo, bhi, n;
339 scm_t_char_set *c;
340
341 if (a->len == 0)
342 return;
343
344 if (b->len == 0)
345 {
346 scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len,
347 "character-set");
348 a->len = 0;
349 return;
350 }
351
352 c = (scm_t_char_set *) scm_malloc (sizeof (scm_t_char_set));
353 c->len = 0;
354 c->ranges = NULL;
355
356 while (i < b->len)
357 {
358 blo = b->ranges[i].lo;
359 bhi = b->ranges[i].hi;
360 for (n = blo; n <= bhi; n++)
361 if (scm_i_charset_get (a, n))
362 scm_i_charset_set (c, n);
363 i++;
364 }
365 scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len,
366 "character-set");
367
368 a->len = c->len;
369 if (c->len != 0)
370 a->ranges = c->ranges;
371 else
372 a->ranges = NULL;
373 free (c);
374 return;
375}
376
377/* Make P the compelement of Q. */
378static void
379charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
380{
381 int k, idx;
382
383 if (q->len == 0)
384 {
385 /* Fill with all valid codepoints. */
386 p->len = 2;
387 p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
388 "character-set");
389 p->ranges[0].lo = 0;
24d23822
MG
390 p->ranges[0].hi = SCM_CODEPOINT_SURROGATE_START - 1;
391 p->ranges[1].lo = SCM_CODEPOINT_SURROGATE_END + 1;
f49dbcad
MG
392 p->ranges[1].hi = SCM_CODEPOINT_MAX;
393 return;
394 }
395
396 if (p->len > 0)
397 scm_gc_free (p->ranges, sizeof (scm_t_char_set) * p->len,
398 "character-set");
399
400 p->len = 0;
401 if (q->ranges[0].lo > 0)
402 p->len++;
403 if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
404 p->len++;
405 p->len += q->len - 1;
406 p->ranges =
407 (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * p->len,
408 "character-set");
409 idx = 0;
410 if (q->ranges[0].lo > 0)
411 {
412 p->ranges[idx].lo = 0;
413 p->ranges[idx++].hi = q->ranges[0].lo - 1;
414 }
415 for (k = 1; k < q->len; k++)
416 {
417 p->ranges[idx].lo = q->ranges[k - 1].hi + 1;
418 p->ranges[idx++].hi = q->ranges[k].lo - 1;
419 }
420 if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
421 {
422 p->ranges[idx].lo = q->ranges[q->len - 1].hi + 1;
423 p->ranges[idx].hi = SCM_CODEPOINT_MAX;
424 }
425 return;
426}
427
428/* Replace A with elements only found in one of A or B. */
429static void
430charsets_xor (scm_t_char_set *a, scm_t_char_set *b)
431{
432 size_t i = 0;
433 scm_t_wchar blo, bhi, n;
434
435 if (b->len == 0)
436 {
437 return;
438 }
63181a97 439
f49dbcad
MG
440 if (a->len == 0)
441 {
442 a->ranges =
443 (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) *
444 b->len, "character-set");
445 a->len = b->len;
446 memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len);
447 return;
448 }
449
450 while (i < b->len)
451 {
452 blo = b->ranges[i].lo;
453 bhi = b->ranges[i].hi;
454 for (n = blo; n <= bhi; n++)
455 {
456 if (scm_i_charset_get (a, n))
457 scm_i_charset_unset (a, n);
458 else
459 scm_i_charset_set (a, n);
460 }
461
462 i++;
463 }
464 return;
465}
63181a97
MV
466
467/* Smob print hook for character sets. */
468static int
469charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED)
470{
f49dbcad 471 size_t i;
63181a97 472 int first = 1;
f49dbcad
MG
473 scm_t_char_set *p;
474 const size_t max_ranges_to_print = 50;
475
476 p = SCM_CHARSET_DATA (charset);
63181a97
MV
477
478 scm_puts ("#<charset {", port);
f49dbcad
MG
479 for (i = 0; i < p->len; i++)
480 {
481 if (first)
482 first = 0;
483 else
484 scm_puts (" ", port);
485 scm_write (SCM_MAKE_CHAR (p->ranges[i].lo), port);
486 if (p->ranges[i].lo != p->ranges[i].hi)
487 {
488 scm_puts ("..", port);
489 scm_write (SCM_MAKE_CHAR (p->ranges[i].hi), port);
490 }
491 if (i >= max_ranges_to_print)
492 {
493 /* Too many to print here. Quit early. */
494 scm_puts (" ...", port);
495 break;
496 }
497 }
63181a97
MV
498 scm_puts ("}>", port);
499 return 1;
500}
501
502
503/* Smob free hook for character sets. */
504static size_t
505charset_free (SCM charset)
506{
f49dbcad
MG
507 scm_t_char_set *cs;
508 size_t len = 0;
509
510 cs = SCM_CHARSET_DATA (charset);
511 if (cs != NULL)
512 len = cs->len;
513 if (len > 0)
514 scm_gc_free (cs->ranges, sizeof (scm_t_char_range) * len,
515 "character-set");
516
517 cs->ranges = NULL;
518 cs->len = 0;
519
520 scm_gc_free (cs, sizeof (scm_t_char_set), "character-set");
521
522 scm_remember_upto_here_1 (charset);
523
524 return 0;
525}
526
527
528/* Smob print hook for character sets cursors. */
529static int
530charset_cursor_print (SCM cursor, SCM port,
531 scm_print_state *pstate SCM_UNUSED)
532{
533 scm_t_char_set_cursor *cur;
534
535 cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
536
537 scm_puts ("#<charset-cursor ", port);
538 if (cur->range == (size_t) (-1))
539 scm_puts ("(empty)", port);
540 else
541 {
542 scm_write (scm_from_size_t (cur->range), port);
543 scm_puts (":", port);
544 scm_write (scm_from_int32 (cur->n), port);
545 }
546 scm_puts (">", port);
547 return 1;
548}
549
550/* Smob free hook for character sets. */
551static size_t
552charset_cursor_free (SCM charset)
553{
554 scm_t_char_set_cursor *cur;
555
556 cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (charset);
557 scm_gc_free (cur, sizeof (scm_t_char_set_cursor), "charset-cursor");
558 scm_remember_upto_here_1 (charset);
559
560 return 0;
63181a97
MV
561}
562
563
564/* Create a new, empty character set. */
565static SCM
f49dbcad 566make_char_set (const char *func_name)
63181a97 567{
f49dbcad 568 scm_t_char_set *p;
63181a97 569
f49dbcad
MG
570 p = scm_gc_malloc (sizeof (scm_t_char_set), "character-set");
571 memset (p, 0, sizeof (scm_t_char_set));
63181a97
MV
572 SCM_RETURN_NEWSMOB (scm_tc16_charset, p);
573}
574
575
576SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0,
577 (SCM obj),
578 "Return @code{#t} if @var{obj} is a character set, @code{#f}\n"
579 "otherwise.")
580#define FUNC_NAME s_scm_char_set_p
581{
582 return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj));
583}
584#undef FUNC_NAME
585
586
587SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1,
588 (SCM char_sets),
589 "Return @code{#t} if all given character sets are equal.")
590#define FUNC_NAME s_scm_char_set_eq
591{
592 int argnum = 1;
f49dbcad 593 scm_t_char_set *cs1_data = NULL;
63181a97
MV
594
595 SCM_VALIDATE_REST_ARGUMENT (char_sets);
596
d2e53ed6 597 while (!scm_is_null (char_sets))
63181a97
MV
598 {
599 SCM csi = SCM_CAR (char_sets);
f49dbcad 600 scm_t_char_set *csi_data;
63181a97
MV
601
602 SCM_VALIDATE_SMOB (argnum, csi, charset);
603 argnum++;
f49dbcad 604 csi_data = SCM_CHARSET_DATA (csi);
63181a97 605 if (cs1_data == NULL)
f49dbcad
MG
606 cs1_data = csi_data;
607 else if (!charsets_equal (cs1_data, csi_data))
608 return SCM_BOOL_F;
63181a97
MV
609 char_sets = SCM_CDR (char_sets);
610 }
611 return SCM_BOOL_T;
612}
613#undef FUNC_NAME
614
615
616SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1,
617 (SCM char_sets),
618 "Return @code{#t} if every character set @var{cs}i is a subset\n"
619 "of character set @var{cs}i+1.")
620#define FUNC_NAME s_scm_char_set_leq
621{
622 int argnum = 1;
f49dbcad 623 scm_t_char_set *prev_data = NULL;
63181a97
MV
624
625 SCM_VALIDATE_REST_ARGUMENT (char_sets);
626
d2e53ed6 627 while (!scm_is_null (char_sets))
63181a97
MV
628 {
629 SCM csi = SCM_CAR (char_sets);
f49dbcad 630 scm_t_char_set *csi_data;
63181a97
MV
631
632 SCM_VALIDATE_SMOB (argnum, csi, charset);
633 argnum++;
f49dbcad 634 csi_data = SCM_CHARSET_DATA (csi);
63181a97 635 if (prev_data)
f49dbcad
MG
636 {
637 if (!charsets_leq (prev_data, csi_data))
638 return SCM_BOOL_F;
639 }
63181a97
MV
640 prev_data = csi_data;
641 char_sets = SCM_CDR (char_sets);
642 }
643 return SCM_BOOL_T;
644}
645#undef FUNC_NAME
646
647
648SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0,
649 (SCM cs, SCM bound),
650 "Compute a hash value for the character set @var{cs}. If\n"
651 "@var{bound} is given and non-zero, it restricts the\n"
652 "returned value to the range 0 @dots{} @var{bound - 1}.")
653#define FUNC_NAME s_scm_char_set_hash
654{
655 const unsigned long default_bnd = 871;
656 unsigned long bnd;
f49dbcad 657 scm_t_char_set *p;
63181a97
MV
658 unsigned long val = 0;
659 int k;
f49dbcad 660 scm_t_wchar c;
63181a97
MV
661
662 SCM_VALIDATE_SMOB (1, cs, charset);
663
664 if (SCM_UNBNDP (bound))
665 bnd = default_bnd;
666 else
667 {
668 bnd = scm_to_ulong (bound);
669 if (bnd == 0)
f49dbcad 670 bnd = default_bnd;
63181a97
MV
671 }
672
f49dbcad
MG
673 p = SCM_CHARSET_DATA (cs);
674 for (k = 0; k < p->len; k++)
63181a97 675 {
f49dbcad
MG
676 for (c = p->ranges[k].lo; c <= p->ranges[k].hi; c++)
677 val = c + (val << 1);
63181a97
MV
678 }
679 return scm_from_ulong (val % bnd);
680}
681#undef FUNC_NAME
682
683
684SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0,
f49dbcad 685 (SCM cs), "Return a cursor into the character set @var{cs}.")
63181a97
MV
686#define FUNC_NAME s_scm_char_set_cursor
687{
f49dbcad
MG
688 scm_t_char_set *cs_data;
689 scm_t_char_set_cursor *cur_data;
63181a97
MV
690
691 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
692 cs_data = SCM_CHARSET_DATA (cs);
693 cur_data =
694 (scm_t_char_set_cursor *) scm_gc_malloc (sizeof (scm_t_char_set_cursor),
695 "charset-cursor");
696 if (cs_data->len == 0)
63181a97 697 {
f49dbcad
MG
698 cur_data->range = (size_t) (-1);
699 cur_data->n = 0;
63181a97 700 }
f49dbcad
MG
701 else
702 {
703 cur_data->range = 0;
704 cur_data->n = cs_data->ranges[0].lo;
705 }
706 SCM_RETURN_NEWSMOB (scm_tc16_charset_cursor, cur_data);
63181a97
MV
707}
708#undef FUNC_NAME
709
710
711SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0,
f49dbcad
MG
712 (SCM cs, SCM cursor),
713 "Return the character at the current cursor position\n"
714 "@var{cursor} in the character set @var{cs}. It is an error to\n"
715 "pass a cursor for which @code{end-of-char-set?} returns true.")
63181a97
MV
716#define FUNC_NAME s_scm_char_set_ref
717{
f49dbcad
MG
718 scm_t_char_set *cs_data;
719 scm_t_char_set_cursor *cur_data;
720 size_t i;
721
63181a97 722 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
723 SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
724
725 cs_data = SCM_CHARSET_DATA (cs);
726 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
63181a97 727
f49dbcad
MG
728 /* Validate that this cursor is still true. */
729 i = cur_data->range;
730 if (i == (size_t) (-1)
731 || i >= cs_data->len
732 || cur_data->n < cs_data->ranges[i].lo
733 || cur_data->n > cs_data->ranges[i].hi)
63181a97 734 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
f49dbcad 735 return SCM_MAKE_CHAR (cur_data->n);
63181a97
MV
736}
737#undef FUNC_NAME
738
739
740SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0,
f49dbcad
MG
741 (SCM cs, SCM cursor),
742 "Advance the character set cursor @var{cursor} to the next\n"
743 "character in the character set @var{cs}. It is an error if the\n"
744 "cursor given satisfies @code{end-of-char-set?}.")
63181a97
MV
745#define FUNC_NAME s_scm_char_set_cursor_next
746{
f49dbcad
MG
747 scm_t_char_set *cs_data;
748 scm_t_char_set_cursor *cur_data;
749 size_t i;
750
63181a97 751 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
752 SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
753
754 cs_data = SCM_CHARSET_DATA (cs);
755 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
63181a97 756
f49dbcad
MG
757 /* Validate that this cursor is still true. */
758 i = cur_data->range;
759 if (i == (size_t) (-1)
760 || i >= cs_data->len
761 || cur_data->n < cs_data->ranges[i].lo
762 || cur_data->n > cs_data->ranges[i].hi)
63181a97 763 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
f49dbcad
MG
764 /* Increment the cursor. */
765 if (cur_data->n == cs_data->ranges[i].hi)
63181a97 766 {
f49dbcad
MG
767 if (i + 1 < cs_data->len)
768 {
769 cur_data->range = i + 1;
770 cur_data->n = cs_data->ranges[i + 1].lo;
771 }
772 else
773 {
774 /* This is the end of the road. */
775 cur_data->range = (size_t) (-1);
776 cur_data->n = 0;
777 }
63181a97 778 }
f49dbcad
MG
779 else
780 {
781 cur_data->n = cur_data->n + 1;
782 }
783
784 return cursor;
63181a97
MV
785}
786#undef FUNC_NAME
787
788
789SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0,
f49dbcad
MG
790 (SCM cursor),
791 "Return @code{#t} if @var{cursor} has reached the end of a\n"
792 "character set, @code{#f} otherwise.")
63181a97
MV
793#define FUNC_NAME s_scm_end_of_char_set_p
794{
f49dbcad
MG
795 scm_t_char_set_cursor *cur_data;
796 SCM_VALIDATE_SMOB (1, cursor, charset_cursor);
797
798 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
799 if (cur_data->range == (size_t) (-1))
800 return SCM_BOOL_T;
801
802 return SCM_BOOL_F;
63181a97
MV
803}
804#undef FUNC_NAME
805
806
807SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0,
f49dbcad
MG
808 (SCM kons, SCM knil, SCM cs),
809 "Fold the procedure @var{kons} over the character set @var{cs},\n"
810 "initializing it with @var{knil}.")
63181a97
MV
811#define FUNC_NAME s_scm_char_set_fold
812{
f49dbcad 813 scm_t_char_set *cs_data;
63181a97 814 int k;
f49dbcad 815 scm_t_wchar n;
63181a97
MV
816
817 SCM_VALIDATE_PROC (1, kons);
818 SCM_VALIDATE_SMOB (3, cs, charset);
819
f49dbcad
MG
820 cs_data = SCM_CHARSET_DATA (cs);
821
822 if (cs_data->len == 0)
823 return knil;
824
825 for (k = 0; k < cs_data->len; k++)
826 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
63181a97 827 {
f49dbcad 828 knil = scm_call_2 (kons, SCM_MAKE_CHAR (n), knil);
63181a97
MV
829 }
830 return knil;
831}
832#undef FUNC_NAME
833
834
835SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0,
836 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
837 "This is a fundamental constructor for character sets.\n"
838 "@itemize @bullet\n"
839 "@item @var{g} is used to generate a series of ``seed'' values\n"
840 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
841 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
842 "@item @var{p} tells us when to stop -- when it returns true\n"
843 "when applied to one of the seed values.\n"
844 "@item @var{f} maps each seed value to a character. These\n"
845 "characters are added to the base character set @var{base_cs} to\n"
846 "form the result; @var{base_cs} defaults to the empty set.\n"
847 "@end itemize")
848#define FUNC_NAME s_scm_char_set_unfold
849{
850 SCM result, tmp;
851
852 SCM_VALIDATE_PROC (1, p);
853 SCM_VALIDATE_PROC (2, f);
854 SCM_VALIDATE_PROC (3, g);
855 if (!SCM_UNBNDP (base_cs))
856 {
857 SCM_VALIDATE_SMOB (5, base_cs, charset);
858 result = scm_char_set_copy (base_cs);
859 }
860 else
861 result = make_char_set (FUNC_NAME);
862
863 tmp = scm_call_1 (p, seed);
864 while (scm_is_false (tmp))
865 {
866 SCM ch = scm_call_1 (f, seed);
867 if (!SCM_CHARP (ch))
868 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
869 SCM_CHARSET_SET (result, SCM_CHAR (ch));
870
871 seed = scm_call_1 (g, seed);
872 tmp = scm_call_1 (p, seed);
873 }
874 return result;
875}
876#undef FUNC_NAME
877
878
879SCM_DEFINE (scm_char_set_unfold_x, "char-set-unfold!", 5, 0, 0,
880 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
881 "This is a fundamental constructor for character sets.\n"
882 "@itemize @bullet\n"
883 "@item @var{g} is used to generate a series of ``seed'' values\n"
884 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
885 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
886 "@item @var{p} tells us when to stop -- when it returns true\n"
887 "when applied to one of the seed values.\n"
888 "@item @var{f} maps each seed value to a character. These\n"
889 "characters are added to the base character set @var{base_cs} to\n"
890 "form the result; @var{base_cs} defaults to the empty set.\n"
891 "@end itemize")
892#define FUNC_NAME s_scm_char_set_unfold_x
893{
894 SCM tmp;
895
896 SCM_VALIDATE_PROC (1, p);
897 SCM_VALIDATE_PROC (2, f);
898 SCM_VALIDATE_PROC (3, g);
899 SCM_VALIDATE_SMOB (5, base_cs, charset);
900
901 tmp = scm_call_1 (p, seed);
902 while (scm_is_false (tmp))
903 {
904 SCM ch = scm_call_1 (f, seed);
905 if (!SCM_CHARP (ch))
906 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
907 SCM_CHARSET_SET (base_cs, SCM_CHAR (ch));
908
909 seed = scm_call_1 (g, seed);
910 tmp = scm_call_1 (p, seed);
911 }
912 return base_cs;
913}
914#undef FUNC_NAME
915
916
917SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0,
f49dbcad
MG
918 (SCM proc, SCM cs),
919 "Apply @var{proc} to every character in the character set\n"
920 "@var{cs}. The return value is not specified.")
63181a97
MV
921#define FUNC_NAME s_scm_char_set_for_each
922{
f49dbcad 923 scm_t_char_set *cs_data;
63181a97 924 int k;
f49dbcad 925 scm_t_wchar n;
63181a97
MV
926
927 SCM_VALIDATE_PROC (1, proc);
928 SCM_VALIDATE_SMOB (2, cs, charset);
929
f49dbcad
MG
930 cs_data = SCM_CHARSET_DATA (cs);
931
932 if (cs_data->len == 0)
933 return SCM_UNSPECIFIED;
934
935 for (k = 0; k < cs_data->len; k++)
936 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
937 {
938 scm_call_1 (proc, SCM_MAKE_CHAR (n));
939 }
940
63181a97
MV
941 return SCM_UNSPECIFIED;
942}
943#undef FUNC_NAME
944
945
946SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0,
947 (SCM proc, SCM cs),
948 "Map the procedure @var{proc} over every character in @var{cs}.\n"
949 "@var{proc} must be a character -> character procedure.")
950#define FUNC_NAME s_scm_char_set_map
951{
952 SCM result;
953 int k;
f49dbcad
MG
954 scm_t_char_set *cs_data;
955 scm_t_wchar n;
63181a97
MV
956
957 SCM_VALIDATE_PROC (1, proc);
958 SCM_VALIDATE_SMOB (2, cs, charset);
959
960 result = make_char_set (FUNC_NAME);
f49dbcad
MG
961 cs_data = SCM_CHARSET_DATA (cs);
962
963 if (cs_data->len == 0)
964 return result;
965
966 for (k = 0; k < cs_data->len; k++)
967 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
63181a97 968 {
f49dbcad
MG
969 SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (n));
970 if (!SCM_CHARP (ch))
971 SCM_MISC_ERROR ("procedure ~S returned non-char",
972 scm_list_1 (proc));
973 SCM_CHARSET_SET (result, SCM_CHAR (ch));
63181a97
MV
974 }
975 return result;
976}
977#undef FUNC_NAME
978
979
980SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0,
981 (SCM cs),
982 "Return a newly allocated character set containing all\n"
983 "characters in @var{cs}.")
984#define FUNC_NAME s_scm_char_set_copy
985{
986 SCM ret;
f49dbcad 987 scm_t_char_set *p1, *p2;
63181a97
MV
988
989 SCM_VALIDATE_SMOB (1, cs, charset);
990 ret = make_char_set (FUNC_NAME);
f49dbcad
MG
991 p1 = SCM_CHARSET_DATA (cs);
992 p2 = SCM_CHARSET_DATA (ret);
993 p2->len = p1->len;
994
995 if (p1->len == 0)
996 p2->ranges = NULL;
997 else
998 {
999 p2->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * p1->len,
1000 "character-set");
1001 memcpy (p2->ranges, p1->ranges, sizeof (scm_t_char_range) * p1->len);
1002 }
1003
63181a97
MV
1004 return ret;
1005}
1006#undef FUNC_NAME
1007
1008
1009SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1,
1010 (SCM rest),
1011 "Return a character set containing all given characters.")
1012#define FUNC_NAME s_scm_char_set
1013{
1014 SCM cs;
63181a97
MV
1015 int argnum = 1;
1016
1017 SCM_VALIDATE_REST_ARGUMENT (rest);
1018 cs = make_char_set (FUNC_NAME);
d2e53ed6 1019 while (!scm_is_null (rest))
63181a97 1020 {
f49dbcad 1021 scm_t_wchar c;
63181a97
MV
1022
1023 SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c);
1024 argnum++;
1025 rest = SCM_CDR (rest);
f49dbcad 1026 SCM_CHARSET_SET (cs, c);
63181a97
MV
1027 }
1028 return cs;
1029}
1030#undef FUNC_NAME
1031
1032
1033SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0,
1034 (SCM list, SCM base_cs),
1035 "Convert the character list @var{list} to a character set. If\n"
1036 "the character set @var{base_cs} is given, the character in this\n"
1037 "set are also included in the result.")
1038#define FUNC_NAME s_scm_list_to_char_set
1039{
1040 SCM cs;
63181a97
MV
1041
1042 SCM_VALIDATE_LIST (1, list);
1043 if (SCM_UNBNDP (base_cs))
1044 cs = make_char_set (FUNC_NAME);
1045 else
1046 {
1047 SCM_VALIDATE_SMOB (2, base_cs, charset);
1048 cs = scm_char_set_copy (base_cs);
1049 }
d2e53ed6 1050 while (!scm_is_null (list))
63181a97
MV
1051 {
1052 SCM chr = SCM_CAR (list);
f49dbcad 1053 scm_t_wchar c;
63181a97
MV
1054
1055 SCM_VALIDATE_CHAR_COPY (0, chr, c);
1056 list = SCM_CDR (list);
1057
f49dbcad
MG
1058
1059 SCM_CHARSET_SET (cs, c);
63181a97
MV
1060 }
1061 return cs;
1062}
1063#undef FUNC_NAME
1064
1065
1066SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0,
f49dbcad
MG
1067 (SCM list, SCM base_cs),
1068 "Convert the character list @var{list} to a character set. The\n"
1069 "characters are added to @var{base_cs} and @var{base_cs} is\n"
1070 "returned.")
63181a97
MV
1071#define FUNC_NAME s_scm_list_to_char_set_x
1072{
63181a97
MV
1073 SCM_VALIDATE_LIST (1, list);
1074 SCM_VALIDATE_SMOB (2, base_cs, charset);
d2e53ed6 1075 while (!scm_is_null (list))
63181a97
MV
1076 {
1077 SCM chr = SCM_CAR (list);
f49dbcad 1078 scm_t_wchar c;
63181a97
MV
1079
1080 SCM_VALIDATE_CHAR_COPY (0, chr, c);
1081 list = SCM_CDR (list);
1082
f49dbcad 1083 SCM_CHARSET_SET (base_cs, c);
63181a97
MV
1084 }
1085 return base_cs;
1086}
1087#undef FUNC_NAME
1088
1089
1090SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0,
1091 (SCM str, SCM base_cs),
1092 "Convert the string @var{str} to a character set. If the\n"
1093 "character set @var{base_cs} is given, the characters in this\n"
1094 "set are also included in the result.")
1095#define FUNC_NAME s_scm_string_to_char_set
1096{
1097 SCM cs;
63181a97
MV
1098 size_t k = 0, len;
1099
1100 SCM_VALIDATE_STRING (1, str);
1101 if (SCM_UNBNDP (base_cs))
1102 cs = make_char_set (FUNC_NAME);
1103 else
1104 {
1105 SCM_VALIDATE_SMOB (2, base_cs, charset);
1106 cs = scm_char_set_copy (base_cs);
1107 }
63181a97
MV
1108 len = scm_i_string_length (str);
1109 while (k < len)
1110 {
f49dbcad
MG
1111 scm_t_wchar c = scm_i_string_ref (str, k++);
1112 SCM_CHARSET_SET (cs, c);
63181a97
MV
1113 }
1114 scm_remember_upto_here_1 (str);
1115 return cs;
1116}
1117#undef FUNC_NAME
1118
1119
1120SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0,
f49dbcad
MG
1121 (SCM str, SCM base_cs),
1122 "Convert the string @var{str} to a character set. The\n"
1123 "characters from the string are added to @var{base_cs}, and\n"
1124 "@var{base_cs} is returned.")
63181a97
MV
1125#define FUNC_NAME s_scm_string_to_char_set_x
1126{
63181a97
MV
1127 size_t k = 0, len;
1128
1129 SCM_VALIDATE_STRING (1, str);
1130 SCM_VALIDATE_SMOB (2, base_cs, charset);
63181a97
MV
1131 len = scm_i_string_length (str);
1132 while (k < len)
1133 {
f49dbcad
MG
1134 scm_t_wchar c = scm_i_string_ref (str, k++);
1135 SCM_CHARSET_SET (base_cs, c);
63181a97
MV
1136 }
1137 scm_remember_upto_here_1 (str);
1138 return base_cs;
1139}
1140#undef FUNC_NAME
1141
1142
1143SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0,
1144 (SCM pred, SCM cs, SCM base_cs),
1145 "Return a character set containing every character from @var{cs}\n"
1146 "so that it satisfies @var{pred}. If provided, the characters\n"
1147 "from @var{base_cs} are added to the result.")
1148#define FUNC_NAME s_scm_char_set_filter
1149{
1150 SCM ret;
1151 int k;
f49dbcad
MG
1152 scm_t_wchar n;
1153 scm_t_char_set *p;
63181a97
MV
1154
1155 SCM_VALIDATE_PROC (1, pred);
1156 SCM_VALIDATE_SMOB (2, cs, charset);
1157 if (!SCM_UNBNDP (base_cs))
1158 {
1159 SCM_VALIDATE_SMOB (3, base_cs, charset);
1160 ret = scm_char_set_copy (base_cs);
1161 }
1162 else
1163 ret = make_char_set (FUNC_NAME);
63181a97 1164
f49dbcad
MG
1165 p = SCM_CHARSET_DATA (cs);
1166
1167 if (p->len == 0)
1168 return ret;
1169
1170 for (k = 0; k < p->len; k++)
1171 for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
1172 {
1173 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1174
1175 if (scm_is_true (res))
1176 SCM_CHARSET_SET (ret, n);
1177 }
63181a97
MV
1178 return ret;
1179}
1180#undef FUNC_NAME
1181
1182
1183SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0,
1184 (SCM pred, SCM cs, SCM base_cs),
1185 "Return a character set containing every character from @var{cs}\n"
1186 "so that it satisfies @var{pred}. The characters are added to\n"
1187 "@var{base_cs} and @var{base_cs} is returned.")
1188#define FUNC_NAME s_scm_char_set_filter_x
1189{
1190 int k;
f49dbcad
MG
1191 scm_t_wchar n;
1192 scm_t_char_set *p;
63181a97
MV
1193
1194 SCM_VALIDATE_PROC (1, pred);
1195 SCM_VALIDATE_SMOB (2, cs, charset);
1196 SCM_VALIDATE_SMOB (3, base_cs, charset);
f49dbcad
MG
1197 p = SCM_CHARSET_DATA (cs);
1198 if (p->len == 0)
1199 return base_cs;
63181a97 1200
f49dbcad
MG
1201 for (k = 0; k < p->len; k++)
1202 for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
1203 {
1204 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
1205
1206 if (scm_is_true (res))
1207 SCM_CHARSET_SET (base_cs, n);
1208 }
63181a97
MV
1209 return base_cs;
1210}
1211#undef FUNC_NAME
1212
1213
1214SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0,
1215 (SCM lower, SCM upper, SCM error, SCM base_cs),
1216 "Return a character set containing all characters whose\n"
1217 "character codes lie in the half-open range\n"
1218 "[@var{lower},@var{upper}).\n"
1219 "\n"
1220 "If @var{error} is a true value, an error is signalled if the\n"
1221 "specified range contains characters which are not contained in\n"
1222 "the implemented character range. If @var{error} is @code{#f},\n"
1223 "these characters are silently left out of the resultung\n"
1224 "character set.\n"
1225 "\n"
1226 "The characters in @var{base_cs} are added to the result, if\n"
1227 "given.")
1228#define FUNC_NAME s_scm_ucs_range_to_char_set
1229{
1230 SCM cs;
1231 size_t clower, cupper;
63181a97
MV
1232
1233 clower = scm_to_size_t (lower);
1234 cupper = scm_to_size_t (upper);
1235 SCM_ASSERT_RANGE (2, upper, cupper >= clower);
1236 if (!SCM_UNBNDP (error))
1237 {
1238 if (scm_is_true (error))
f49dbcad
MG
1239 {
1240 SCM_ASSERT_RANGE (1, lower, SCM_IS_UNICODE_CHAR (clower));
1241 SCM_ASSERT_RANGE (2, upper, SCM_IS_UNICODE_CHAR (cupper));
1242 }
63181a97 1243 }
f49dbcad
MG
1244 if (clower > 0x10FFFF)
1245 clower = 0x10FFFF;
1246 if (cupper > 0x10FFFF)
1247 cupper = 0x10FFFF;
63181a97
MV
1248 if (SCM_UNBNDP (base_cs))
1249 cs = make_char_set (FUNC_NAME);
1250 else
1251 {
1252 SCM_VALIDATE_SMOB (4, base_cs, charset);
1253 cs = scm_char_set_copy (base_cs);
1254 }
f49dbcad
MG
1255 /* It not be difficult to write a more optimized version of the
1256 following. */
63181a97
MV
1257 while (clower < cupper)
1258 {
f49dbcad 1259 SCM_CHARSET_SET (cs, clower);
63181a97
MV
1260 clower++;
1261 }
1262 return cs;
1263}
1264#undef FUNC_NAME
1265
1266
1267SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0,
1268 (SCM lower, SCM upper, SCM error, SCM base_cs),
1269 "Return a character set containing all characters whose\n"
1270 "character codes lie in the half-open range\n"
1271 "[@var{lower},@var{upper}).\n"
1272 "\n"
1273 "If @var{error} is a true value, an error is signalled if the\n"
1274 "specified range contains characters which are not contained in\n"
1275 "the implemented character range. If @var{error} is @code{#f},\n"
1276 "these characters are silently left out of the resultung\n"
1277 "character set.\n"
1278 "\n"
1279 "The characters are added to @var{base_cs} and @var{base_cs} is\n"
1280 "returned.")
1281#define FUNC_NAME s_scm_ucs_range_to_char_set_x
1282{
1283 size_t clower, cupper;
63181a97
MV
1284
1285 clower = scm_to_size_t (lower);
1286 cupper = scm_to_size_t (upper);
1287 SCM_ASSERT_RANGE (2, upper, cupper >= clower);
1288 if (scm_is_true (error))
1289 {
f49dbcad
MG
1290 SCM_ASSERT_RANGE (1, lower, SCM_IS_UNICODE_CHAR (clower));
1291 SCM_ASSERT_RANGE (2, upper, SCM_IS_UNICODE_CHAR (cupper));
63181a97 1292 }
f49dbcad
MG
1293 if (clower > SCM_CODEPOINT_MAX)
1294 clower = SCM_CODEPOINT_MAX;
1295 if (cupper > SCM_CODEPOINT_MAX)
1296 cupper = SCM_CODEPOINT_MAX;
1297
63181a97
MV
1298 while (clower < cupper)
1299 {
f49dbcad
MG
1300 if (SCM_IS_UNICODE_CHAR (clower))
1301 SCM_CHARSET_SET (base_cs, clower);
63181a97
MV
1302 clower++;
1303 }
1304 return base_cs;
1305}
1306#undef FUNC_NAME
1307
1308SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0,
1309 (SCM x),
1310 "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.")
1311#define FUNC_NAME s_scm_to_char_set
1312{
1313 if (scm_is_string (x))
1314 return scm_string_to_char_set (x, SCM_UNDEFINED);
1315 else if (SCM_CHARP (x))
1316 return scm_char_set (scm_list_1 (x));
1317 else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x))
1318 return x;
1319 else
1320 scm_wrong_type_arg (NULL, 0, x);
1321}
1322#undef FUNC_NAME
1323
1324SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0,
1325 (SCM cs),
1326 "Return the number of elements in character set @var{cs}.")
1327#define FUNC_NAME s_scm_char_set_size
1328{
1329 int k, count = 0;
f49dbcad 1330 scm_t_char_set *cs_data;
63181a97
MV
1331
1332 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
1333 cs_data = SCM_CHARSET_DATA (cs);
1334
1335 if (cs_data->len == 0)
1336 return scm_from_int (0);
1337
1338 for (k = 0; k < cs_data->len; k++)
1339 count += cs_data->ranges[k].hi - cs_data->ranges[k].lo + 1;
1340
1341 return scm_from_int (count);
63181a97
MV
1342}
1343#undef FUNC_NAME
1344
1345
1346SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0,
1347 (SCM pred, SCM cs),
1348 "Return the number of the elements int the character set\n"
1349 "@var{cs} which satisfy the predicate @var{pred}.")
1350#define FUNC_NAME s_scm_char_set_count
1351{
1352 int k, count = 0;
f49dbcad
MG
1353 scm_t_wchar n;
1354 scm_t_char_set *cs_data;
63181a97
MV
1355
1356 SCM_VALIDATE_PROC (1, pred);
1357 SCM_VALIDATE_SMOB (2, cs, charset);
f49dbcad
MG
1358 cs_data = SCM_CHARSET_DATA (cs);
1359 if (cs_data->len == 0)
1360 return scm_from_int (0);
63181a97 1361
f49dbcad
MG
1362 for (k = 0; k < cs_data->len; k++)
1363 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
63181a97 1364 {
f49dbcad
MG
1365 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1366 if (scm_is_true (res))
1367 count++;
63181a97
MV
1368 }
1369 return SCM_I_MAKINUM (count);
1370}
1371#undef FUNC_NAME
1372
1373
1374SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0,
1375 (SCM cs),
1376 "Return a list containing the elements of the character set\n"
1377 "@var{cs}.")
1378#define FUNC_NAME s_scm_char_set_to_list
1379{
1380 int k;
f49dbcad 1381 scm_t_wchar n;
63181a97 1382 SCM result = SCM_EOL;
f49dbcad 1383 scm_t_char_set *p;
63181a97
MV
1384
1385 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
1386 p = SCM_CHARSET_DATA (cs);
1387 if (p->len == 0)
1388 return SCM_EOL;
1389
1390 for (k = p->len - 1; k >= 0; k--)
1391 for (n = p->ranges[k].hi; n >= p->ranges[k].lo; n--)
1392 result = scm_cons (SCM_MAKE_CHAR (n), result);
63181a97
MV
1393 return result;
1394}
1395#undef FUNC_NAME
1396
1397
1398SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0,
1399 (SCM cs),
1400 "Return a string containing the elements of the character set\n"
1401 "@var{cs}. The order in which the characters are placed in the\n"
1402 "string is not defined.")
1403#define FUNC_NAME s_scm_char_set_to_string
1404{
1405 int k;
1406 int count = 0;
1407 int idx = 0;
f49dbcad 1408 int wide = 0;
63181a97 1409 SCM result;
f49dbcad
MG
1410 scm_t_wchar n;
1411 scm_t_char_set *cs_data;
1412 char *buf;
1413 scm_t_wchar *wbuf;
63181a97
MV
1414
1415 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
1416 cs_data = SCM_CHARSET_DATA (cs);
1417 if (cs_data->len == 0)
1418 return scm_nullstr;
1419
1420 if (cs_data->ranges[cs_data->len - 1].hi > 255)
1421 wide = 1;
1422
1423 count = scm_to_int (scm_char_set_size (cs));
1424 if (wide)
1425 result = scm_i_make_wide_string (count, &wbuf);
1426 else
1427 result = scm_i_make_string (count, &buf);
1428
1429 for (k = 0; k < cs_data->len; k++)
1430 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1431 {
1432 if (wide)
1433 wbuf[idx++] = n;
1434 else
1435 buf[idx++] = n;
1436 }
63181a97
MV
1437 return result;
1438}
1439#undef FUNC_NAME
1440
1441
1442SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0,
1443 (SCM cs, SCM ch),
1444 "Return @code{#t} iff the character @var{ch} is contained in the\n"
1445 "character set @var{cs}.")
1446#define FUNC_NAME s_scm_char_set_contains_p
1447{
1448 SCM_VALIDATE_SMOB (1, cs, charset);
1449 SCM_VALIDATE_CHAR (2, ch);
1450 return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch)));
1451}
1452#undef FUNC_NAME
1453
1454
1455SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0,
1456 (SCM pred, SCM cs),
1457 "Return a true value if every character in the character set\n"
1458 "@var{cs} satisfies the predicate @var{pred}.")
1459#define FUNC_NAME s_scm_char_set_every
1460{
1461 int k;
f49dbcad 1462 scm_t_wchar n;
63181a97 1463 SCM res = SCM_BOOL_T;
f49dbcad 1464 scm_t_char_set *cs_data;
63181a97
MV
1465
1466 SCM_VALIDATE_PROC (1, pred);
1467 SCM_VALIDATE_SMOB (2, cs, charset);
1468
f49dbcad
MG
1469 cs_data = SCM_CHARSET_DATA (cs);
1470 if (cs_data->len == 0)
1471 return SCM_BOOL_T;
1472
1473 for (k = 0; k < cs_data->len; k++)
1474 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
63181a97 1475 {
f49dbcad
MG
1476 res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1477 if (scm_is_false (res))
1478 return res;
63181a97 1479 }
f49dbcad 1480 return SCM_BOOL_T;
63181a97
MV
1481}
1482#undef FUNC_NAME
1483
1484
1485SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0,
1486 (SCM pred, SCM cs),
1487 "Return a true value if any character in the character set\n"
1488 "@var{cs} satisfies the predicate @var{pred}.")
1489#define FUNC_NAME s_scm_char_set_any
1490{
1491 int k;
f49dbcad
MG
1492 scm_t_wchar n;
1493 scm_t_char_set *cs_data;
63181a97
MV
1494
1495 SCM_VALIDATE_PROC (1, pred);
1496 SCM_VALIDATE_SMOB (2, cs, charset);
1497
f49dbcad
MG
1498 cs_data = (scm_t_char_set *) cs;
1499
1500 for (k = 0; k < cs_data->len; k++)
1501 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
63181a97 1502 {
f49dbcad
MG
1503 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1504 if (scm_is_true (res))
1505 return res;
63181a97
MV
1506 }
1507 return SCM_BOOL_F;
1508}
1509#undef FUNC_NAME
1510
1511
1512SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1,
f49dbcad
MG
1513 (SCM cs, SCM rest),
1514 "Add all character arguments to the first argument, which must\n"
1515 "be a character set.")
63181a97
MV
1516#define FUNC_NAME s_scm_char_set_adjoin
1517{
63181a97
MV
1518 SCM_VALIDATE_SMOB (1, cs, charset);
1519 SCM_VALIDATE_REST_ARGUMENT (rest);
1520 cs = scm_char_set_copy (cs);
1521
d2e53ed6 1522 while (!scm_is_null (rest))
63181a97
MV
1523 {
1524 SCM chr = SCM_CAR (rest);
f49dbcad 1525 scm_t_wchar c;
63181a97
MV
1526
1527 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1528 rest = SCM_CDR (rest);
1529
f49dbcad 1530 SCM_CHARSET_SET (cs, c);
63181a97
MV
1531 }
1532 return cs;
1533}
1534#undef FUNC_NAME
1535
1536
1537SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1,
f49dbcad
MG
1538 (SCM cs, SCM rest),
1539 "Delete all character arguments from the first argument, which\n"
1540 "must be a character set.")
63181a97
MV
1541#define FUNC_NAME s_scm_char_set_delete
1542{
63181a97
MV
1543 SCM_VALIDATE_SMOB (1, cs, charset);
1544 SCM_VALIDATE_REST_ARGUMENT (rest);
1545 cs = scm_char_set_copy (cs);
1546
d2e53ed6 1547 while (!scm_is_null (rest))
63181a97
MV
1548 {
1549 SCM chr = SCM_CAR (rest);
f49dbcad 1550 scm_t_wchar c;
63181a97
MV
1551
1552 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1553 rest = SCM_CDR (rest);
1554
f49dbcad 1555 SCM_CHARSET_UNSET (cs, c);
63181a97
MV
1556 }
1557 return cs;
1558}
1559#undef FUNC_NAME
1560
1561
1562SCM_DEFINE (scm_char_set_adjoin_x, "char-set-adjoin!", 1, 0, 1,
f49dbcad
MG
1563 (SCM cs, SCM rest),
1564 "Add all character arguments to the first argument, which must\n"
1565 "be a character set.")
63181a97
MV
1566#define FUNC_NAME s_scm_char_set_adjoin_x
1567{
63181a97
MV
1568 SCM_VALIDATE_SMOB (1, cs, charset);
1569 SCM_VALIDATE_REST_ARGUMENT (rest);
1570
d2e53ed6 1571 while (!scm_is_null (rest))
63181a97
MV
1572 {
1573 SCM chr = SCM_CAR (rest);
f49dbcad 1574 scm_t_wchar c;
63181a97
MV
1575
1576 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1577 rest = SCM_CDR (rest);
1578
f49dbcad 1579 SCM_CHARSET_SET (cs, c);
63181a97
MV
1580 }
1581 return cs;
1582}
1583#undef FUNC_NAME
1584
1585
1586SCM_DEFINE (scm_char_set_delete_x, "char-set-delete!", 1, 0, 1,
f49dbcad
MG
1587 (SCM cs, SCM rest),
1588 "Delete all character arguments from the first argument, which\n"
1589 "must be a character set.")
63181a97
MV
1590#define FUNC_NAME s_scm_char_set_delete_x
1591{
63181a97
MV
1592 SCM_VALIDATE_SMOB (1, cs, charset);
1593 SCM_VALIDATE_REST_ARGUMENT (rest);
1594
d2e53ed6 1595 while (!scm_is_null (rest))
63181a97
MV
1596 {
1597 SCM chr = SCM_CAR (rest);
f49dbcad 1598 scm_t_wchar c;
63181a97
MV
1599
1600 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1601 rest = SCM_CDR (rest);
1602
f49dbcad 1603 SCM_CHARSET_UNSET (cs, c);
63181a97
MV
1604 }
1605 return cs;
1606}
1607#undef FUNC_NAME
1608
1609
1610SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0,
f49dbcad 1611 (SCM cs), "Return the complement of the character set @var{cs}.")
63181a97
MV
1612#define FUNC_NAME s_scm_char_set_complement
1613{
63181a97 1614 SCM res;
f49dbcad 1615 scm_t_char_set *p, *q;
63181a97
MV
1616
1617 SCM_VALIDATE_SMOB (1, cs, charset);
1618
1619 res = make_char_set (FUNC_NAME);
f49dbcad
MG
1620 p = SCM_CHARSET_DATA (res);
1621 q = SCM_CHARSET_DATA (cs);
1622
1623 charsets_complement (p, q);
63181a97
MV
1624 return res;
1625}
1626#undef FUNC_NAME
1627
1628
1629SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1,
1630 (SCM rest),
1631 "Return the union of all argument character sets.")
1632#define FUNC_NAME s_scm_char_set_union
1633{
1634 int c = 1;
1635 SCM res;
f49dbcad 1636 scm_t_char_set *p;
63181a97
MV
1637
1638 SCM_VALIDATE_REST_ARGUMENT (rest);
1639
1640 res = make_char_set (FUNC_NAME);
f49dbcad 1641 p = SCM_CHARSET_DATA (res);
d2e53ed6 1642 while (!scm_is_null (rest))
63181a97 1643 {
63181a97
MV
1644 SCM cs = SCM_CAR (rest);
1645 SCM_VALIDATE_SMOB (c, cs, charset);
1646 c++;
1647 rest = SCM_CDR (rest);
1648
f49dbcad
MG
1649
1650 charsets_union (p, (scm_t_char_set *) SCM_SMOB_DATA (cs));
63181a97
MV
1651 }
1652 return res;
1653}
1654#undef FUNC_NAME
1655
1656
1657SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1,
1658 (SCM rest),
1659 "Return the intersection of all argument character sets.")
1660#define FUNC_NAME s_scm_char_set_intersection
1661{
1662 SCM res;
1663
1664 SCM_VALIDATE_REST_ARGUMENT (rest);
1665
d2e53ed6 1666 if (scm_is_null (rest))
63181a97
MV
1667 res = make_char_set (FUNC_NAME);
1668 else
1669 {
f49dbcad 1670 scm_t_char_set *p;
63181a97
MV
1671 int argnum = 2;
1672
1673 res = scm_char_set_copy (SCM_CAR (rest));
f49dbcad 1674 p = SCM_CHARSET_DATA (res);
63181a97
MV
1675 rest = SCM_CDR (rest);
1676
d2e53ed6 1677 while (scm_is_pair (rest))
f49dbcad
MG
1678 {
1679 SCM cs = SCM_CAR (rest);
1680 scm_t_char_set *cs_data;
1681
1682 SCM_VALIDATE_SMOB (argnum, cs, charset);
1683 argnum++;
1684 cs_data = SCM_CHARSET_DATA (cs);
1685 rest = SCM_CDR (rest);
1686 charsets_intersection (p, cs_data);
1687 }
63181a97
MV
1688 }
1689
1690 return res;
1691}
1692#undef FUNC_NAME
1693
1694
1695SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1,
1696 (SCM cs1, SCM rest),
1697 "Return the difference of all argument character sets.")
1698#define FUNC_NAME s_scm_char_set_difference
1699{
1700 int c = 2;
f49dbcad
MG
1701 SCM res, compl;
1702 scm_t_char_set *p, *q;
63181a97
MV
1703
1704 SCM_VALIDATE_SMOB (1, cs1, charset);
1705 SCM_VALIDATE_REST_ARGUMENT (rest);
1706
1707 res = scm_char_set_copy (cs1);
f49dbcad
MG
1708 p = SCM_CHARSET_DATA (res);
1709 compl = make_char_set (FUNC_NAME);
1710 q = SCM_CHARSET_DATA (compl);
d2e53ed6 1711 while (!scm_is_null (rest))
63181a97 1712 {
63181a97
MV
1713 SCM cs = SCM_CAR (rest);
1714 SCM_VALIDATE_SMOB (c, cs, charset);
1715 c++;
1716 rest = SCM_CDR (rest);
1717
f49dbcad
MG
1718 charsets_complement (q, SCM_CHARSET_DATA (cs));
1719 charsets_intersection (p, q);
63181a97
MV
1720 }
1721 return res;
1722}
1723#undef FUNC_NAME
1724
1725
1726SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1,
1727 (SCM rest),
1728 "Return the exclusive-or of all argument character sets.")
1729#define FUNC_NAME s_scm_char_set_xor
1730{
1731 SCM res;
1732
1733 SCM_VALIDATE_REST_ARGUMENT (rest);
1734
d2e53ed6 1735 if (scm_is_null (rest))
63181a97
MV
1736 res = make_char_set (FUNC_NAME);
1737 else
1738 {
1739 int argnum = 2;
f49dbcad 1740 scm_t_char_set *p;
63181a97
MV
1741
1742 res = scm_char_set_copy (SCM_CAR (rest));
f49dbcad 1743 p = SCM_CHARSET_DATA (res);
63181a97
MV
1744 rest = SCM_CDR (rest);
1745
d2e53ed6 1746 while (scm_is_pair (rest))
f49dbcad
MG
1747 {
1748 SCM cs = SCM_CAR (rest);
1749 scm_t_char_set *cs_data;
1750
1751 SCM_VALIDATE_SMOB (argnum, cs, charset);
1752 argnum++;
1753 cs_data = SCM_CHARSET_DATA (cs);
1754 rest = SCM_CDR (rest);
1755
1756 charsets_xor (p, cs_data);
1757 }
63181a97
MV
1758 }
1759 return res;
1760}
1761#undef FUNC_NAME
1762
1763
1764SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1, 0, 1,
1765 (SCM cs1, SCM rest),
1766 "Return the difference and the intersection of all argument\n"
1767 "character sets.")
1768#define FUNC_NAME s_scm_char_set_diff_plus_intersection
1769{
1770 int c = 2;
1771 SCM res1, res2;
f49dbcad 1772 scm_t_char_set *p, *q;
63181a97
MV
1773
1774 SCM_VALIDATE_SMOB (1, cs1, charset);
1775 SCM_VALIDATE_REST_ARGUMENT (rest);
1776
1777 res1 = scm_char_set_copy (cs1);
1778 res2 = make_char_set (FUNC_NAME);
f49dbcad
MG
1779 p = SCM_CHARSET_DATA (res1);
1780 q = SCM_CHARSET_DATA (res2);
d2e53ed6 1781 while (!scm_is_null (rest))
63181a97 1782 {
63181a97 1783 SCM cs = SCM_CAR (rest);
f49dbcad 1784 scm_t_char_set *r;
63181a97
MV
1785
1786 SCM_VALIDATE_SMOB (c, cs, charset);
1787 c++;
f49dbcad 1788 r = SCM_CHARSET_DATA (cs);
63181a97 1789
f49dbcad
MG
1790 charsets_union (q, r);
1791 charsets_intersection (p, r);
63181a97
MV
1792 rest = SCM_CDR (rest);
1793 }
1794 return scm_values (scm_list_2 (res1, res2));
1795}
1796#undef FUNC_NAME
1797
1798
1799SCM_DEFINE (scm_char_set_complement_x, "char-set-complement!", 1, 0, 0,
f49dbcad 1800 (SCM cs), "Return the complement of the character set @var{cs}.")
63181a97
MV
1801#define FUNC_NAME s_scm_char_set_complement_x
1802{
63181a97 1803 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad 1804 cs = scm_char_set_complement (cs);
63181a97
MV
1805 return cs;
1806}
1807#undef FUNC_NAME
1808
1809
1810SCM_DEFINE (scm_char_set_union_x, "char-set-union!", 1, 0, 1,
f49dbcad
MG
1811 (SCM cs1, SCM rest),
1812 "Return the union of all argument character sets.")
63181a97
MV
1813#define FUNC_NAME s_scm_char_set_union_x
1814{
63181a97
MV
1815 SCM_VALIDATE_SMOB (1, cs1, charset);
1816 SCM_VALIDATE_REST_ARGUMENT (rest);
1817
f49dbcad 1818 cs1 = scm_char_set_union (scm_cons (cs1, rest));
63181a97
MV
1819 return cs1;
1820}
1821#undef FUNC_NAME
1822
1823
1824SCM_DEFINE (scm_char_set_intersection_x, "char-set-intersection!", 1, 0, 1,
f49dbcad
MG
1825 (SCM cs1, SCM rest),
1826 "Return the intersection of all argument character sets.")
63181a97
MV
1827#define FUNC_NAME s_scm_char_set_intersection_x
1828{
63181a97
MV
1829 SCM_VALIDATE_SMOB (1, cs1, charset);
1830 SCM_VALIDATE_REST_ARGUMENT (rest);
1831
f49dbcad 1832 cs1 = scm_char_set_intersection (scm_cons (cs1, rest));
63181a97
MV
1833 return cs1;
1834}
1835#undef FUNC_NAME
1836
1837
1838SCM_DEFINE (scm_char_set_difference_x, "char-set-difference!", 1, 0, 1,
f49dbcad
MG
1839 (SCM cs1, SCM rest),
1840 "Return the difference of all argument character sets.")
63181a97
MV
1841#define FUNC_NAME s_scm_char_set_difference_x
1842{
63181a97
MV
1843 SCM_VALIDATE_SMOB (1, cs1, charset);
1844 SCM_VALIDATE_REST_ARGUMENT (rest);
1845
f49dbcad 1846 cs1 = scm_char_set_difference (cs1, rest);
63181a97
MV
1847 return cs1;
1848}
1849#undef FUNC_NAME
1850
1851
1852SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1,
1853 (SCM cs1, SCM rest),
1854 "Return the exclusive-or of all argument character sets.")
1855#define FUNC_NAME s_scm_char_set_xor_x
1856{
1857 /* a side-effecting variant should presumably give consistent results:
1858 (define a (char-set #\a))
1859 (char-set-xor a a a) -> char set #\a
1860 (char-set-xor! a a a) -> char set #\a
f49dbcad 1861 */
7165abeb
MG
1862 cs1 = scm_char_set_xor (scm_cons (cs1, rest));
1863 return cs1;
63181a97
MV
1864}
1865#undef FUNC_NAME
1866
1867
f49dbcad
MG
1868SCM_DEFINE (scm_char_set_diff_plus_intersection_x,
1869 "char-set-diff+intersection!", 2, 0, 1, (SCM cs1, SCM cs2,
1870 SCM rest),
1871 "Return the difference and the intersection of all argument\n"
1872 "character sets.")
63181a97
MV
1873#define FUNC_NAME s_scm_char_set_diff_plus_intersection_x
1874{
f49dbcad 1875 SCM diff, intersect;
63181a97 1876
f49dbcad
MG
1877 diff = scm_char_set_difference (cs1, scm_cons (cs2, rest));
1878 intersect =
1879 scm_char_set_intersection (scm_cons (cs1, scm_cons (cs2, rest)));
1880 cs1 = diff;
1881 cs2 = intersect;
63181a97
MV
1882 return scm_values (scm_list_2 (cs1, cs2));
1883}
f49dbcad 1884#undef FUNC_NAME
d5ecf579 1885
a17d2654 1886\f
f49dbcad 1887
a17d2654
LC
1888/* Standard character sets. */
1889
63181a97
MV
1890SCM scm_char_set_lower_case;
1891SCM scm_char_set_upper_case;
1892SCM scm_char_set_title_case;
1893SCM scm_char_set_letter;
1894SCM scm_char_set_digit;
1895SCM scm_char_set_letter_and_digit;
1896SCM scm_char_set_graphic;
1897SCM scm_char_set_printing;
1898SCM scm_char_set_whitespace;
1899SCM scm_char_set_iso_control;
1900SCM scm_char_set_punctuation;
1901SCM scm_char_set_symbol;
1902SCM scm_char_set_hex_digit;
1903SCM scm_char_set_blank;
1904SCM scm_char_set_ascii;
1905SCM scm_char_set_empty;
1906SCM scm_char_set_full;
1907
63181a97 1908
a17d2654
LC
1909/* Create an empty character set and return it after binding it to NAME. */
1910static inline SCM
f49dbcad 1911define_charset (const char *name, const scm_t_char_set *p)
63181a97 1912{
f49dbcad
MG
1913 SCM cs;
1914
1915 SCM_NEWSMOB (cs, scm_tc16_charset, p);
63181a97
MV
1916 scm_c_define (name, cs);
1917 return scm_permanent_object (cs);
1918}
1919
f49dbcad
MG
1920#ifdef SCM_CHARSET_DEBUG
1921SCM_DEFINE (scm_debug_char_set, "debug-char-set", 1, 0, 0,
1922 (SCM charset),
1923 "Print out the internal C structure of @var{charset}.\n")
d5ecf579 1924#define FUNC_NAME s_scm_debug_char_set
63181a97 1925{
f49dbcad
MG
1926 int i;
1927 scm_t_char_set *cs = SCM_CHARSET_DATA (charset);
1928 fprintf (stderr, "cs %p\n", cs);
1929 fprintf (stderr, "len %d\n", cs->len);
1930 fprintf (stderr, "arr %p\n", cs->ranges);
1931 for (i = 0; i < cs->len; i++)
63181a97 1932 {
f49dbcad
MG
1933 if (cs->ranges[i].lo == cs->ranges[i].hi)
1934 fprintf (stderr, "%04x\n", cs->ranges[i].lo);
1935 else
1936 fprintf (stderr, "%04x..%04x\t[%d]\n",
1937 cs->ranges[i].lo,
1938 cs->ranges[i].hi, cs->ranges[i].hi - cs->ranges[i].lo + 1);
63181a97 1939 }
f49dbcad
MG
1940 printf ("\n");
1941 return SCM_UNSPECIFIED;
63181a97 1942}
f49dbcad 1943#undef FUNC_NAME
d5ecf579 1944#endif /* SCM_CHARSET_DEBUG */
a17d2654 1945\f
f49dbcad
MG
1946
1947
63181a97
MV
1948void
1949scm_init_srfi_14 (void)
1950{
f49dbcad 1951 scm_tc16_charset = scm_make_smob_type ("character-set", 0);
63181a97
MV
1952 scm_set_smob_free (scm_tc16_charset, charset_free);
1953 scm_set_smob_print (scm_tc16_charset, charset_print);
1954
f49dbcad
MG
1955 scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0);
1956 scm_set_smob_free (scm_tc16_charset_cursor, charset_cursor_free);
1957 scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print);
1958
1959 scm_char_set_upper_case =
1960 define_charset ("char-set:upper-case", &cs_upper_case);
1961 scm_char_set_lower_case =
1962 define_charset ("char-set:lower-case", &cs_lower_case);
1963 scm_char_set_title_case =
1964 define_charset ("char-set:title-case", &cs_title_case);
1965 scm_char_set_letter = define_charset ("char-set:letter", &cs_letter);
1966 scm_char_set_digit = define_charset ("char-set:digit", &cs_digit);
1967 scm_char_set_letter_and_digit =
1968 define_charset ("char-set:letter+digit", &cs_letter_plus_digit);
1969 scm_char_set_graphic = define_charset ("char-set:graphic", &cs_graphic);
1970 scm_char_set_printing = define_charset ("char-set:printing", &cs_printing);
1971 scm_char_set_whitespace =
1972 define_charset ("char-set:whitespace", &cs_whitespace);
1973 scm_char_set_iso_control =
1974 define_charset ("char-set:iso-control", &cs_iso_control);
1975 scm_char_set_punctuation =
1976 define_charset ("char-set:punctuation", &cs_punctuation);
1977 scm_char_set_symbol = define_charset ("char-set:symbol", &cs_symbol);
1978 scm_char_set_hex_digit =
1979 define_charset ("char-set:hex-digit", &cs_hex_digit);
1980 scm_char_set_blank = define_charset ("char-set:blank", &cs_blank);
1981 scm_char_set_ascii = define_charset ("char-set:ascii", &cs_ascii);
1982 scm_char_set_empty = define_charset ("char-set:empty", &cs_empty);
1983 scm_char_set_full = define_charset ("char-set:full", &cs_full);
63181a97
MV
1984
1985#include "libguile/srfi-14.x"
1986}
1987
1988/* End of srfi-14.c. */