The charset complement operator should not include surrogates
[bpt/guile.git] / libguile / srfi-14.c
CommitLineData
63181a97
MV
1/* srfi-14.c --- SRFI-14 procedures for Guile
2 *
6caac03c 3 * Copyright (C) 2001, 2004, 2006, 2007 Free Software Foundation, Inc.
63181a97
MV
4 *
5 * This library is free software; you can redistribute it and/or
53befeb7
NJ
6 * modify it under the terms of the GNU Lesser General Public License
7 * as published by the Free Software Foundation; either version 3 of
8 * the License, or (at your option) any later version.
63181a97 9 *
53befeb7
NJ
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
63181a97
MV
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
53befeb7
NJ
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301 USA
63181a97
MV
19 */
20
a17d2654
LC
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
24
25
63181a97 26#include <string.h>
f49dbcad 27#include <unictype.h>
63181a97
MV
28
29#include "libguile.h"
30#include "libguile/srfi-14.h"
f49dbcad 31#include "libguile/strings.h"
24d23822 32#include "libguile/chars.h"
63181a97 33
f49dbcad
MG
34/* Include the pre-computed standard charset data. */
35#include "libguile/srfi-14.i.c"
63181a97 36
f49dbcad 37#define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset))
63181a97 38
f49dbcad
MG
39#define SCM_CHARSET_SET(cs, idx) \
40 scm_i_charset_set (SCM_CHARSET_DATA (cs), idx)
63181a97 41
f49dbcad
MG
42#define SCM_CHARSET_UNSET(cs, idx) \
43 scm_i_charset_unset (SCM_CHARSET_DATA (cs), idx)
63181a97
MV
44
45/* Smob type code for character sets. */
46int scm_tc16_charset = 0;
f49dbcad
MG
47int scm_tc16_charset_cursor = 0;
48
49/* True if N exists in charset CS. */
50int
51scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n)
52{
53 size_t i;
54
55 i = 0;
56 while (i < cs->len)
57 {
58 if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi)
59 return 1;
60 i++;
61 }
62
63 return 0;
64}
65
66/* Put N into charset CS. */
67void
68scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n)
69{
70 size_t i;
71 size_t len;
72
73 len = cs->len;
74
75 i = 0;
76 while (i < len)
77 {
78 /* Already in this range */
79 if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi)
80 {
81 return;
82 }
83
84 if (n == cs->ranges[i].lo - 1)
85 {
86 /* This char is one below the current range. */
87 if (i > 0 && cs->ranges[i - 1].hi + 1 == n)
88 {
89 /* It is also one above the previous range, so combine them. */
90 cs->ranges[i - 1].hi = cs->ranges[i].hi;
91 if (i < len - 1)
92 memmove (cs->ranges + i, cs->ranges + (i + 1),
93 sizeof (scm_t_char_range) * (len - i - 1));
94 cs->ranges = scm_gc_realloc (cs->ranges,
95 sizeof (scm_t_char_range) * len,
96 sizeof (scm_t_char_range) * (len -
97 1),
98 "character-set");
99 cs->len = len - 1;
100 return;
101 }
102 else
103 {
104 /* Expand the range down by one. */
105 cs->ranges[i].lo = n;
106 return;
107 }
108 }
109 else if (n == cs->ranges[i].hi + 1)
110 {
111 /* This char is one above the current range. */
112 if (i < len - 1 && cs->ranges[i + 1].lo - 1 == n)
113 {
114 /* It is also one below the next range, so combine them. */
115 cs->ranges[i].hi = cs->ranges[i + 1].hi;
116 if (i < len - 2)
117 memmove (cs->ranges + (i + 1), cs->ranges + (i + 2),
118 sizeof (scm_t_char_range) * (len - i - 2));
119 cs->ranges = scm_gc_realloc (cs->ranges,
120 sizeof (scm_t_char_range) * len,
121 sizeof (scm_t_char_range) * (len -
122 1),
123 "character-set");
124 cs->len = len - 1;
125 return;
126 }
127 else
128 {
129 /* Expand the range up by one. */
130 cs->ranges[i].hi = n;
131 return;
132 }
133 }
134 else if (n < cs->ranges[i].lo - 1)
135 {
136 /* This is a new range below the current one. */
137 cs->ranges = scm_gc_realloc (cs->ranges,
138 sizeof (scm_t_char_range) * len,
139 sizeof (scm_t_char_range) * (len + 1),
140 "character-set");
141 memmove (cs->ranges + (i + 1), cs->ranges + i,
142 sizeof (scm_t_char_range) * (len - i));
143 cs->ranges[i].lo = n;
144 cs->ranges[i].hi = n;
145 cs->len = len + 1;
146 return;
147 }
148
149 i++;
150 }
151
152 /* This is a new range above all previous ranges. */
153 if (len == 0)
154 {
155 cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set");
156 }
157 else
158 {
159 cs->ranges = scm_gc_realloc (cs->ranges,
160 sizeof (scm_t_char_range) * len,
161 sizeof (scm_t_char_range) * (len + 1),
162 "character-set");
163 }
164 cs->ranges[len].lo = n;
165 cs->ranges[len].hi = n;
166 cs->len = len + 1;
167
168 return;
169}
170
91772d8f
MG
171/* Put LO to HI inclusive into charset CS. */
172static void
173scm_i_charset_set_range (scm_t_char_set *cs, scm_t_wchar lo, scm_t_wchar hi)
174{
175 size_t i;
176
177 i = 0;
178 while (i < cs->len)
179 {
180 /* Already in this range */
181 if (cs->ranges[i].lo <= lo && cs->ranges[i].hi >= hi)
182 return;
183
184 /* cur: +---+
185 new: +---+
186 */
187 if (cs->ranges[i].lo - 1 > hi)
188 {
189 /* Add a new range below the current one. */
190 cs->ranges = scm_gc_realloc (cs->ranges,
191 sizeof (scm_t_char_range) * cs->len,
192 sizeof (scm_t_char_range) * (cs->len + 1),
193 "character-set");
194 memmove (cs->ranges + (i + 1), cs->ranges + i,
195 sizeof (scm_t_char_range) * (cs->len - i));
196 cs->ranges[i].lo = lo;
197 cs->ranges[i].hi = hi;
198 cs->len += 1;
199 return;
200 }
201
202 /* cur: +---+ or +---+ or +---+
203 new: +---+ +---+ +---+
204 */
205 if (cs->ranges[i].lo > lo
206 && (cs->ranges[i].lo - 1 <= hi && cs->ranges[i].hi >= hi))
207 {
208 cs->ranges[i].lo = lo;
209 return;
210 }
211
212 /* cur: +---+ or +---+ or +---+
213 new: +---+ +---+ +---+
214 */
215 else if (cs->ranges[i].hi + 1 >= lo && cs->ranges[i].hi < hi)
216 {
217 if (cs->ranges[i].lo > lo)
218 cs->ranges[i].lo = lo;
219 if (cs->ranges[i].hi < hi)
220 cs->ranges[i].hi = hi;
221 while (i < cs->len - 1)
222 {
223 /* cur: --+ +---+
224 new: -----+
225 */
226 if (cs->ranges[i + 1].lo - 1 > hi)
227 break;
228
229 /* cur: --+ +---+ or --+ +---+ or --+ +--+
230 new: -----+ ------+ ---------+
231 */
232 /* Combine this range with the previous one. */
233 if (cs->ranges[i + 1].hi > hi)
234 cs->ranges[i].hi = cs->ranges[i + 1].hi;
235 if (i + 1 < cs->len)
236 memmove (cs->ranges + i + 1, cs->ranges + i + 2,
237 sizeof (scm_t_char_range) * (cs->len - i - 2));
238 cs->ranges = scm_gc_realloc (cs->ranges,
239 sizeof (scm_t_char_range) * cs->len,
240 sizeof (scm_t_char_range) * (cs->len - 1),
241 "character-set");
242 cs->len -= 1;
243 }
244 return;
245 }
246 i ++;
247 }
248
249 /* This is a new range above all previous ranges. */
250 if (cs->len == 0)
251 {
252 cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set");
253 }
254 else
255 {
256 cs->ranges = scm_gc_realloc (cs->ranges,
257 sizeof (scm_t_char_range) * cs->len,
258 sizeof (scm_t_char_range) * (cs->len + 1),
259 "character-set");
260 }
261 cs->len += 1;
262 cs->ranges[cs->len - 1].lo = lo;
263 cs->ranges[cs->len - 1].hi = hi;
264
265 return;
266}
267
f49dbcad
MG
268/* If N is in charset CS, remove it. */
269void
270scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n)
271{
272 size_t i;
273 size_t len;
274
275 len = cs->len;
276
277 i = 0;
278 while (i < len)
279 {
280 if (n < cs->ranges[i].lo)
281 /* Not in this set. */
282 return;
283
284 if (n == cs->ranges[i].lo && n == cs->ranges[i].hi)
285 {
286 /* Remove this one-character range. */
287 if (len == 1)
288 {
289 scm_gc_free (cs->ranges,
290 sizeof (scm_t_char_range) * cs->len,
291 "character-set");
292 cs->ranges = NULL;
293 cs->len = 0;
294 return;
295 }
296 else if (i < len - 1)
297 {
298 memmove (cs->ranges + i, cs->ranges + (i + 1),
299 sizeof (scm_t_char_range) * (len - i - 1));
300 cs->ranges = scm_gc_realloc (cs->ranges,
301 sizeof (scm_t_char_range) * len,
302 sizeof (scm_t_char_range) * (len -
303 1),
304 "character-set");
305 cs->len = len - 1;
306 return;
307 }
308 else if (i == len - 1)
309 {
310 cs->ranges = scm_gc_realloc (cs->ranges,
311 sizeof (scm_t_char_range) * len,
312 sizeof (scm_t_char_range) * (len -
313 1),
314 "character-set");
315 cs->len = len - 1;
316 return;
317 }
318 }
319 else if (n == cs->ranges[i].lo)
320 {
321 /* Shrink this range from the left. */
322 cs->ranges[i].lo = n + 1;
323 return;
324 }
325 else if (n == cs->ranges[i].hi)
326 {
327 /* Shrink this range from the right. */
328 cs->ranges[i].hi = n - 1;
329 return;
330 }
331 else if (n > cs->ranges[i].lo && n < cs->ranges[i].hi)
332 {
333 /* Split this range into two pieces. */
334 cs->ranges = scm_gc_realloc (cs->ranges,
335 sizeof (scm_t_char_range) * len,
336 sizeof (scm_t_char_range) * (len + 1),
337 "character-set");
338 if (i < len - 1)
339 memmove (cs->ranges + (i + 2), cs->ranges + (i + 1),
340 sizeof (scm_t_char_range) * (len - i - 1));
341 cs->ranges[i + 1].hi = cs->ranges[i].hi;
342 cs->ranges[i + 1].lo = n + 1;
343 cs->ranges[i].hi = n - 1;
344 cs->len = len + 1;
345 return;
346 }
347
348 i++;
349 }
350
351 /* This value is above all ranges, so do nothing here. */
352 return;
353}
354
355static int
356charsets_equal (scm_t_char_set *a, scm_t_char_set *b)
357{
358 if (a->len != b->len)
359 return 0;
360
361 if (memcmp (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len) != 0)
362 return 0;
363
364 return 1;
365}
366
367/* Return true if every character in A is also in B. */
368static int
369charsets_leq (scm_t_char_set *a, scm_t_char_set *b)
370{
371 size_t i = 0, j = 0;
372 scm_t_wchar alo, ahi;
373
374 if (a->len == 0)
375 return 1;
376 if (b->len == 0)
377 return 0;
378 while (i < a->len)
379 {
380 alo = a->ranges[i].lo;
381 ahi = a->ranges[i].hi;
382 while (b->ranges[j].hi < alo)
383 {
384 if (j < b->len - 1)
385 j++;
386 else
387 return 0;
388 }
389 if (alo < b->ranges[j].lo || ahi > b->ranges[j].hi)
390 return 0;
391 i++;
392 }
393
394 return 1;
395}
396
397/* Merge B into A. */
398static void
399charsets_union (scm_t_char_set *a, scm_t_char_set *b)
400{
401 size_t i = 0;
402 scm_t_wchar blo, bhi, n;
403
404 if (b->len == 0)
405 return;
406
407 if (a->len == 0)
408 {
409 a->len = b->len;
410 a->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * b->len,
411 "character-set");
412 memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * b->len);
413 return;
414 }
415
416 /* This needs optimization. */
417 while (i < b->len)
418 {
419 blo = b->ranges[i].lo;
420 bhi = b->ranges[i].hi;
421 for (n = blo; n <= bhi; n++)
422 scm_i_charset_set (a, n);
423
424 i++;
425 }
426
427 return;
428}
429
430/* Remove elements not both in A and B from A. */
431static void
432charsets_intersection (scm_t_char_set *a, scm_t_char_set *b)
433{
434 size_t i = 0;
435 scm_t_wchar blo, bhi, n;
436 scm_t_char_set *c;
437
438 if (a->len == 0)
439 return;
440
441 if (b->len == 0)
442 {
443 scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len,
444 "character-set");
445 a->len = 0;
446 return;
447 }
448
449 c = (scm_t_char_set *) scm_malloc (sizeof (scm_t_char_set));
450 c->len = 0;
451 c->ranges = NULL;
452
453 while (i < b->len)
454 {
455 blo = b->ranges[i].lo;
456 bhi = b->ranges[i].hi;
457 for (n = blo; n <= bhi; n++)
458 if (scm_i_charset_get (a, n))
459 scm_i_charset_set (c, n);
460 i++;
461 }
462 scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len,
463 "character-set");
464
465 a->len = c->len;
466 if (c->len != 0)
467 a->ranges = c->ranges;
468 else
469 a->ranges = NULL;
470 free (c);
471 return;
472}
473
f4cdfe61
MG
474#define SCM_ADD_RANGE(low, high) \
475 do { \
476 p->ranges[idx].lo = (low); \
477 p->ranges[idx++].hi = (high); \
478 } while (0)
479#define SCM_ADD_RANGE_SKIP_SURROGATES(low, high) \
480 do { \
481 p->ranges[idx].lo = (low); \
482 p->ranges[idx++].hi = SCM_CODEPOINT_SURROGATE_START - 1; \
483 p->ranges[idx].lo = SCM_CODEPOINT_SURROGATE_END + 1; \
484 p->ranges[idx++].hi = (high); \
485 } while (0)
486
487
488
f49dbcad
MG
489/* Make P the compelement of Q. */
490static void
491charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
492{
493 int k, idx;
494
f4cdfe61 495 idx = 0;
f49dbcad
MG
496 if (q->len == 0)
497 {
498 /* Fill with all valid codepoints. */
499 p->len = 2;
500 p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
501 "character-set");
f4cdfe61 502 SCM_ADD_RANGE_SKIP_SURROGATES (0, SCM_CODEPOINT_MAX);
f49dbcad
MG
503 return;
504 }
505
506 if (p->len > 0)
507 scm_gc_free (p->ranges, sizeof (scm_t_char_set) * p->len,
508 "character-set");
509
f4cdfe61 510 /* Count the number of ranges needed for the output. */
f49dbcad
MG
511 p->len = 0;
512 if (q->ranges[0].lo > 0)
513 p->len++;
514 if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
515 p->len++;
f4cdfe61 516 p->len += q->len;
f49dbcad
MG
517 p->ranges =
518 (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * p->len,
519 "character-set");
f49dbcad
MG
520 if (q->ranges[0].lo > 0)
521 {
f4cdfe61
MG
522 if (q->ranges[0].lo > SCM_CODEPOINT_SURROGATE_END)
523 SCM_ADD_RANGE_SKIP_SURROGATES (0, q->ranges[0].lo - 1);
524 else
525 SCM_ADD_RANGE (0, q->ranges[0].lo - 1);
f49dbcad
MG
526 }
527 for (k = 1; k < q->len; k++)
528 {
f4cdfe61
MG
529 if (q->ranges[k - 1].hi < SCM_CODEPOINT_SURROGATE_START
530 && q->ranges[k].lo - 1 > SCM_CODEPOINT_SURROGATE_END)
531 SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
532 else
533 SCM_ADD_RANGE (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
f49dbcad
MG
534 }
535 if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
536 {
f4cdfe61
MG
537 if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_SURROGATE_START)
538 SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
539 else
540 SCM_ADD_RANGE (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
f49dbcad
MG
541 }
542 return;
543}
f4cdfe61
MG
544#undef SCM_ADD_RANGE
545#undef SCM_ADD_RANGE_SKIP_SURROGATES
f49dbcad
MG
546
547/* Replace A with elements only found in one of A or B. */
548static void
549charsets_xor (scm_t_char_set *a, scm_t_char_set *b)
550{
551 size_t i = 0;
552 scm_t_wchar blo, bhi, n;
553
554 if (b->len == 0)
555 {
556 return;
557 }
63181a97 558
f49dbcad
MG
559 if (a->len == 0)
560 {
561 a->ranges =
562 (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) *
563 b->len, "character-set");
564 a->len = b->len;
565 memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len);
566 return;
567 }
568
569 while (i < b->len)
570 {
571 blo = b->ranges[i].lo;
572 bhi = b->ranges[i].hi;
573 for (n = blo; n <= bhi; n++)
574 {
575 if (scm_i_charset_get (a, n))
576 scm_i_charset_unset (a, n);
577 else
578 scm_i_charset_set (a, n);
579 }
580
581 i++;
582 }
583 return;
584}
63181a97
MV
585
586/* Smob print hook for character sets. */
587static int
588charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED)
589{
f49dbcad 590 size_t i;
63181a97 591 int first = 1;
f49dbcad
MG
592 scm_t_char_set *p;
593 const size_t max_ranges_to_print = 50;
594
595 p = SCM_CHARSET_DATA (charset);
63181a97
MV
596
597 scm_puts ("#<charset {", port);
f49dbcad
MG
598 for (i = 0; i < p->len; i++)
599 {
600 if (first)
601 first = 0;
602 else
603 scm_puts (" ", port);
604 scm_write (SCM_MAKE_CHAR (p->ranges[i].lo), port);
605 if (p->ranges[i].lo != p->ranges[i].hi)
606 {
607 scm_puts ("..", port);
608 scm_write (SCM_MAKE_CHAR (p->ranges[i].hi), port);
609 }
610 if (i >= max_ranges_to_print)
611 {
612 /* Too many to print here. Quit early. */
613 scm_puts (" ...", port);
614 break;
615 }
616 }
63181a97
MV
617 scm_puts ("}>", port);
618 return 1;
619}
620
621
622/* Smob free hook for character sets. */
623static size_t
624charset_free (SCM charset)
625{
f49dbcad
MG
626 scm_t_char_set *cs;
627 size_t len = 0;
628
629 cs = SCM_CHARSET_DATA (charset);
630 if (cs != NULL)
631 len = cs->len;
632 if (len > 0)
633 scm_gc_free (cs->ranges, sizeof (scm_t_char_range) * len,
634 "character-set");
635
636 cs->ranges = NULL;
637 cs->len = 0;
638
639 scm_gc_free (cs, sizeof (scm_t_char_set), "character-set");
640
641 scm_remember_upto_here_1 (charset);
642
643 return 0;
644}
645
646
647/* Smob print hook for character sets cursors. */
648static int
649charset_cursor_print (SCM cursor, SCM port,
650 scm_print_state *pstate SCM_UNUSED)
651{
652 scm_t_char_set_cursor *cur;
653
654 cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
655
656 scm_puts ("#<charset-cursor ", port);
657 if (cur->range == (size_t) (-1))
658 scm_puts ("(empty)", port);
659 else
660 {
661 scm_write (scm_from_size_t (cur->range), port);
662 scm_puts (":", port);
663 scm_write (scm_from_int32 (cur->n), port);
664 }
665 scm_puts (">", port);
666 return 1;
667}
668
669/* Smob free hook for character sets. */
670static size_t
671charset_cursor_free (SCM charset)
672{
673 scm_t_char_set_cursor *cur;
674
675 cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (charset);
676 scm_gc_free (cur, sizeof (scm_t_char_set_cursor), "charset-cursor");
677 scm_remember_upto_here_1 (charset);
678
679 return 0;
63181a97
MV
680}
681
682
683/* Create a new, empty character set. */
684static SCM
f49dbcad 685make_char_set (const char *func_name)
63181a97 686{
f49dbcad 687 scm_t_char_set *p;
63181a97 688
f49dbcad
MG
689 p = scm_gc_malloc (sizeof (scm_t_char_set), "character-set");
690 memset (p, 0, sizeof (scm_t_char_set));
63181a97
MV
691 SCM_RETURN_NEWSMOB (scm_tc16_charset, p);
692}
693
694
695SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0,
696 (SCM obj),
697 "Return @code{#t} if @var{obj} is a character set, @code{#f}\n"
698 "otherwise.")
699#define FUNC_NAME s_scm_char_set_p
700{
701 return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj));
702}
703#undef FUNC_NAME
704
705
706SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1,
707 (SCM char_sets),
708 "Return @code{#t} if all given character sets are equal.")
709#define FUNC_NAME s_scm_char_set_eq
710{
711 int argnum = 1;
f49dbcad 712 scm_t_char_set *cs1_data = NULL;
63181a97
MV
713
714 SCM_VALIDATE_REST_ARGUMENT (char_sets);
715
d2e53ed6 716 while (!scm_is_null (char_sets))
63181a97
MV
717 {
718 SCM csi = SCM_CAR (char_sets);
f49dbcad 719 scm_t_char_set *csi_data;
63181a97
MV
720
721 SCM_VALIDATE_SMOB (argnum, csi, charset);
722 argnum++;
f49dbcad 723 csi_data = SCM_CHARSET_DATA (csi);
63181a97 724 if (cs1_data == NULL)
f49dbcad
MG
725 cs1_data = csi_data;
726 else if (!charsets_equal (cs1_data, csi_data))
727 return SCM_BOOL_F;
63181a97
MV
728 char_sets = SCM_CDR (char_sets);
729 }
730 return SCM_BOOL_T;
731}
732#undef FUNC_NAME
733
734
735SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1,
736 (SCM char_sets),
737 "Return @code{#t} if every character set @var{cs}i is a subset\n"
738 "of character set @var{cs}i+1.")
739#define FUNC_NAME s_scm_char_set_leq
740{
741 int argnum = 1;
f49dbcad 742 scm_t_char_set *prev_data = NULL;
63181a97
MV
743
744 SCM_VALIDATE_REST_ARGUMENT (char_sets);
745
d2e53ed6 746 while (!scm_is_null (char_sets))
63181a97
MV
747 {
748 SCM csi = SCM_CAR (char_sets);
f49dbcad 749 scm_t_char_set *csi_data;
63181a97
MV
750
751 SCM_VALIDATE_SMOB (argnum, csi, charset);
752 argnum++;
f49dbcad 753 csi_data = SCM_CHARSET_DATA (csi);
63181a97 754 if (prev_data)
f49dbcad
MG
755 {
756 if (!charsets_leq (prev_data, csi_data))
757 return SCM_BOOL_F;
758 }
63181a97
MV
759 prev_data = csi_data;
760 char_sets = SCM_CDR (char_sets);
761 }
762 return SCM_BOOL_T;
763}
764#undef FUNC_NAME
765
766
767SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0,
768 (SCM cs, SCM bound),
769 "Compute a hash value for the character set @var{cs}. If\n"
770 "@var{bound} is given and non-zero, it restricts the\n"
771 "returned value to the range 0 @dots{} @var{bound - 1}.")
772#define FUNC_NAME s_scm_char_set_hash
773{
774 const unsigned long default_bnd = 871;
775 unsigned long bnd;
f49dbcad 776 scm_t_char_set *p;
63181a97
MV
777 unsigned long val = 0;
778 int k;
f49dbcad 779 scm_t_wchar c;
63181a97
MV
780
781 SCM_VALIDATE_SMOB (1, cs, charset);
782
783 if (SCM_UNBNDP (bound))
784 bnd = default_bnd;
785 else
786 {
787 bnd = scm_to_ulong (bound);
788 if (bnd == 0)
f49dbcad 789 bnd = default_bnd;
63181a97
MV
790 }
791
f49dbcad
MG
792 p = SCM_CHARSET_DATA (cs);
793 for (k = 0; k < p->len; k++)
63181a97 794 {
f49dbcad
MG
795 for (c = p->ranges[k].lo; c <= p->ranges[k].hi; c++)
796 val = c + (val << 1);
63181a97
MV
797 }
798 return scm_from_ulong (val % bnd);
799}
800#undef FUNC_NAME
801
802
803SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0,
f49dbcad 804 (SCM cs), "Return a cursor into the character set @var{cs}.")
63181a97
MV
805#define FUNC_NAME s_scm_char_set_cursor
806{
f49dbcad
MG
807 scm_t_char_set *cs_data;
808 scm_t_char_set_cursor *cur_data;
63181a97
MV
809
810 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
811 cs_data = SCM_CHARSET_DATA (cs);
812 cur_data =
813 (scm_t_char_set_cursor *) scm_gc_malloc (sizeof (scm_t_char_set_cursor),
814 "charset-cursor");
815 if (cs_data->len == 0)
63181a97 816 {
f49dbcad
MG
817 cur_data->range = (size_t) (-1);
818 cur_data->n = 0;
63181a97 819 }
f49dbcad
MG
820 else
821 {
822 cur_data->range = 0;
823 cur_data->n = cs_data->ranges[0].lo;
824 }
825 SCM_RETURN_NEWSMOB (scm_tc16_charset_cursor, cur_data);
63181a97
MV
826}
827#undef FUNC_NAME
828
829
830SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0,
f49dbcad
MG
831 (SCM cs, SCM cursor),
832 "Return the character at the current cursor position\n"
833 "@var{cursor} in the character set @var{cs}. It is an error to\n"
834 "pass a cursor for which @code{end-of-char-set?} returns true.")
63181a97
MV
835#define FUNC_NAME s_scm_char_set_ref
836{
f49dbcad
MG
837 scm_t_char_set *cs_data;
838 scm_t_char_set_cursor *cur_data;
839 size_t i;
840
63181a97 841 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
842 SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
843
844 cs_data = SCM_CHARSET_DATA (cs);
845 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
63181a97 846
f49dbcad
MG
847 /* Validate that this cursor is still true. */
848 i = cur_data->range;
849 if (i == (size_t) (-1)
850 || i >= cs_data->len
851 || cur_data->n < cs_data->ranges[i].lo
852 || cur_data->n > cs_data->ranges[i].hi)
63181a97 853 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
f49dbcad 854 return SCM_MAKE_CHAR (cur_data->n);
63181a97
MV
855}
856#undef FUNC_NAME
857
858
859SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0,
f49dbcad
MG
860 (SCM cs, SCM cursor),
861 "Advance the character set cursor @var{cursor} to the next\n"
862 "character in the character set @var{cs}. It is an error if the\n"
863 "cursor given satisfies @code{end-of-char-set?}.")
63181a97
MV
864#define FUNC_NAME s_scm_char_set_cursor_next
865{
f49dbcad
MG
866 scm_t_char_set *cs_data;
867 scm_t_char_set_cursor *cur_data;
868 size_t i;
869
63181a97 870 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
871 SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
872
873 cs_data = SCM_CHARSET_DATA (cs);
874 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
63181a97 875
f49dbcad
MG
876 /* Validate that this cursor is still true. */
877 i = cur_data->range;
878 if (i == (size_t) (-1)
879 || i >= cs_data->len
880 || cur_data->n < cs_data->ranges[i].lo
881 || cur_data->n > cs_data->ranges[i].hi)
63181a97 882 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
f49dbcad
MG
883 /* Increment the cursor. */
884 if (cur_data->n == cs_data->ranges[i].hi)
63181a97 885 {
f49dbcad
MG
886 if (i + 1 < cs_data->len)
887 {
888 cur_data->range = i + 1;
889 cur_data->n = cs_data->ranges[i + 1].lo;
890 }
891 else
892 {
893 /* This is the end of the road. */
894 cur_data->range = (size_t) (-1);
895 cur_data->n = 0;
896 }
63181a97 897 }
f49dbcad
MG
898 else
899 {
900 cur_data->n = cur_data->n + 1;
901 }
902
903 return cursor;
63181a97
MV
904}
905#undef FUNC_NAME
906
907
908SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0,
f49dbcad
MG
909 (SCM cursor),
910 "Return @code{#t} if @var{cursor} has reached the end of a\n"
911 "character set, @code{#f} otherwise.")
63181a97
MV
912#define FUNC_NAME s_scm_end_of_char_set_p
913{
f49dbcad
MG
914 scm_t_char_set_cursor *cur_data;
915 SCM_VALIDATE_SMOB (1, cursor, charset_cursor);
916
917 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
918 if (cur_data->range == (size_t) (-1))
919 return SCM_BOOL_T;
920
921 return SCM_BOOL_F;
63181a97
MV
922}
923#undef FUNC_NAME
924
925
926SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0,
f49dbcad
MG
927 (SCM kons, SCM knil, SCM cs),
928 "Fold the procedure @var{kons} over the character set @var{cs},\n"
929 "initializing it with @var{knil}.")
63181a97
MV
930#define FUNC_NAME s_scm_char_set_fold
931{
f49dbcad 932 scm_t_char_set *cs_data;
63181a97 933 int k;
f49dbcad 934 scm_t_wchar n;
63181a97
MV
935
936 SCM_VALIDATE_PROC (1, kons);
937 SCM_VALIDATE_SMOB (3, cs, charset);
938
f49dbcad
MG
939 cs_data = SCM_CHARSET_DATA (cs);
940
941 if (cs_data->len == 0)
942 return knil;
943
944 for (k = 0; k < cs_data->len; k++)
945 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
63181a97 946 {
f49dbcad 947 knil = scm_call_2 (kons, SCM_MAKE_CHAR (n), knil);
63181a97
MV
948 }
949 return knil;
950}
951#undef FUNC_NAME
952
953
954SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0,
955 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
956 "This is a fundamental constructor for character sets.\n"
957 "@itemize @bullet\n"
958 "@item @var{g} is used to generate a series of ``seed'' values\n"
959 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
960 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
961 "@item @var{p} tells us when to stop -- when it returns true\n"
962 "when applied to one of the seed values.\n"
963 "@item @var{f} maps each seed value to a character. These\n"
964 "characters are added to the base character set @var{base_cs} to\n"
965 "form the result; @var{base_cs} defaults to the empty set.\n"
966 "@end itemize")
967#define FUNC_NAME s_scm_char_set_unfold
968{
969 SCM result, tmp;
970
971 SCM_VALIDATE_PROC (1, p);
972 SCM_VALIDATE_PROC (2, f);
973 SCM_VALIDATE_PROC (3, g);
974 if (!SCM_UNBNDP (base_cs))
975 {
976 SCM_VALIDATE_SMOB (5, base_cs, charset);
977 result = scm_char_set_copy (base_cs);
978 }
979 else
980 result = make_char_set (FUNC_NAME);
981
982 tmp = scm_call_1 (p, seed);
983 while (scm_is_false (tmp))
984 {
985 SCM ch = scm_call_1 (f, seed);
986 if (!SCM_CHARP (ch))
987 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
988 SCM_CHARSET_SET (result, SCM_CHAR (ch));
989
990 seed = scm_call_1 (g, seed);
991 tmp = scm_call_1 (p, seed);
992 }
993 return result;
994}
995#undef FUNC_NAME
996
997
998SCM_DEFINE (scm_char_set_unfold_x, "char-set-unfold!", 5, 0, 0,
999 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
1000 "This is a fundamental constructor for character sets.\n"
1001 "@itemize @bullet\n"
1002 "@item @var{g} is used to generate a series of ``seed'' values\n"
1003 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
1004 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
1005 "@item @var{p} tells us when to stop -- when it returns true\n"
1006 "when applied to one of the seed values.\n"
1007 "@item @var{f} maps each seed value to a character. These\n"
1008 "characters are added to the base character set @var{base_cs} to\n"
1009 "form the result; @var{base_cs} defaults to the empty set.\n"
1010 "@end itemize")
1011#define FUNC_NAME s_scm_char_set_unfold_x
1012{
1013 SCM tmp;
1014
1015 SCM_VALIDATE_PROC (1, p);
1016 SCM_VALIDATE_PROC (2, f);
1017 SCM_VALIDATE_PROC (3, g);
1018 SCM_VALIDATE_SMOB (5, base_cs, charset);
1019
1020 tmp = scm_call_1 (p, seed);
1021 while (scm_is_false (tmp))
1022 {
1023 SCM ch = scm_call_1 (f, seed);
1024 if (!SCM_CHARP (ch))
1025 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
1026 SCM_CHARSET_SET (base_cs, SCM_CHAR (ch));
1027
1028 seed = scm_call_1 (g, seed);
1029 tmp = scm_call_1 (p, seed);
1030 }
1031 return base_cs;
1032}
1033#undef FUNC_NAME
1034
1035
1036SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0,
f49dbcad
MG
1037 (SCM proc, SCM cs),
1038 "Apply @var{proc} to every character in the character set\n"
1039 "@var{cs}. The return value is not specified.")
63181a97
MV
1040#define FUNC_NAME s_scm_char_set_for_each
1041{
f49dbcad 1042 scm_t_char_set *cs_data;
63181a97 1043 int k;
f49dbcad 1044 scm_t_wchar n;
63181a97
MV
1045
1046 SCM_VALIDATE_PROC (1, proc);
1047 SCM_VALIDATE_SMOB (2, cs, charset);
1048
f49dbcad
MG
1049 cs_data = SCM_CHARSET_DATA (cs);
1050
1051 if (cs_data->len == 0)
1052 return SCM_UNSPECIFIED;
1053
1054 for (k = 0; k < cs_data->len; k++)
1055 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1056 {
1057 scm_call_1 (proc, SCM_MAKE_CHAR (n));
1058 }
1059
63181a97
MV
1060 return SCM_UNSPECIFIED;
1061}
1062#undef FUNC_NAME
1063
1064
1065SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0,
1066 (SCM proc, SCM cs),
1067 "Map the procedure @var{proc} over every character in @var{cs}.\n"
1068 "@var{proc} must be a character -> character procedure.")
1069#define FUNC_NAME s_scm_char_set_map
1070{
1071 SCM result;
1072 int k;
f49dbcad
MG
1073 scm_t_char_set *cs_data;
1074 scm_t_wchar n;
63181a97
MV
1075
1076 SCM_VALIDATE_PROC (1, proc);
1077 SCM_VALIDATE_SMOB (2, cs, charset);
1078
1079 result = make_char_set (FUNC_NAME);
f49dbcad
MG
1080 cs_data = SCM_CHARSET_DATA (cs);
1081
1082 if (cs_data->len == 0)
1083 return result;
1084
1085 for (k = 0; k < cs_data->len; k++)
1086 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
63181a97 1087 {
f49dbcad
MG
1088 SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (n));
1089 if (!SCM_CHARP (ch))
1090 SCM_MISC_ERROR ("procedure ~S returned non-char",
1091 scm_list_1 (proc));
1092 SCM_CHARSET_SET (result, SCM_CHAR (ch));
63181a97
MV
1093 }
1094 return result;
1095}
1096#undef FUNC_NAME
1097
1098
1099SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0,
1100 (SCM cs),
1101 "Return a newly allocated character set containing all\n"
1102 "characters in @var{cs}.")
1103#define FUNC_NAME s_scm_char_set_copy
1104{
1105 SCM ret;
f49dbcad 1106 scm_t_char_set *p1, *p2;
63181a97
MV
1107
1108 SCM_VALIDATE_SMOB (1, cs, charset);
1109 ret = make_char_set (FUNC_NAME);
f49dbcad
MG
1110 p1 = SCM_CHARSET_DATA (cs);
1111 p2 = SCM_CHARSET_DATA (ret);
1112 p2->len = p1->len;
1113
1114 if (p1->len == 0)
1115 p2->ranges = NULL;
1116 else
1117 {
1118 p2->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * p1->len,
1119 "character-set");
1120 memcpy (p2->ranges, p1->ranges, sizeof (scm_t_char_range) * p1->len);
1121 }
1122
63181a97
MV
1123 return ret;
1124}
1125#undef FUNC_NAME
1126
1127
1128SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1,
1129 (SCM rest),
1130 "Return a character set containing all given characters.")
1131#define FUNC_NAME s_scm_char_set
1132{
1133 SCM cs;
63181a97
MV
1134 int argnum = 1;
1135
1136 SCM_VALIDATE_REST_ARGUMENT (rest);
1137 cs = make_char_set (FUNC_NAME);
d2e53ed6 1138 while (!scm_is_null (rest))
63181a97 1139 {
f49dbcad 1140 scm_t_wchar c;
63181a97
MV
1141
1142 SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c);
1143 argnum++;
1144 rest = SCM_CDR (rest);
f49dbcad 1145 SCM_CHARSET_SET (cs, c);
63181a97
MV
1146 }
1147 return cs;
1148}
1149#undef FUNC_NAME
1150
1151
1152SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0,
1153 (SCM list, SCM base_cs),
1154 "Convert the character list @var{list} to a character set. If\n"
1155 "the character set @var{base_cs} is given, the character in this\n"
1156 "set are also included in the result.")
1157#define FUNC_NAME s_scm_list_to_char_set
1158{
1159 SCM cs;
63181a97
MV
1160
1161 SCM_VALIDATE_LIST (1, list);
1162 if (SCM_UNBNDP (base_cs))
1163 cs = make_char_set (FUNC_NAME);
1164 else
1165 {
1166 SCM_VALIDATE_SMOB (2, base_cs, charset);
1167 cs = scm_char_set_copy (base_cs);
1168 }
d2e53ed6 1169 while (!scm_is_null (list))
63181a97
MV
1170 {
1171 SCM chr = SCM_CAR (list);
f49dbcad 1172 scm_t_wchar c;
63181a97
MV
1173
1174 SCM_VALIDATE_CHAR_COPY (0, chr, c);
1175 list = SCM_CDR (list);
1176
f49dbcad
MG
1177
1178 SCM_CHARSET_SET (cs, c);
63181a97
MV
1179 }
1180 return cs;
1181}
1182#undef FUNC_NAME
1183
1184
1185SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0,
f49dbcad
MG
1186 (SCM list, SCM base_cs),
1187 "Convert the character list @var{list} to a character set. The\n"
1188 "characters are added to @var{base_cs} and @var{base_cs} is\n"
1189 "returned.")
63181a97
MV
1190#define FUNC_NAME s_scm_list_to_char_set_x
1191{
63181a97
MV
1192 SCM_VALIDATE_LIST (1, list);
1193 SCM_VALIDATE_SMOB (2, base_cs, charset);
d2e53ed6 1194 while (!scm_is_null (list))
63181a97
MV
1195 {
1196 SCM chr = SCM_CAR (list);
f49dbcad 1197 scm_t_wchar c;
63181a97
MV
1198
1199 SCM_VALIDATE_CHAR_COPY (0, chr, c);
1200 list = SCM_CDR (list);
1201
f49dbcad 1202 SCM_CHARSET_SET (base_cs, c);
63181a97
MV
1203 }
1204 return base_cs;
1205}
1206#undef FUNC_NAME
1207
1208
1209SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0,
1210 (SCM str, SCM base_cs),
1211 "Convert the string @var{str} to a character set. If the\n"
1212 "character set @var{base_cs} is given, the characters in this\n"
1213 "set are also included in the result.")
1214#define FUNC_NAME s_scm_string_to_char_set
1215{
1216 SCM cs;
63181a97
MV
1217 size_t k = 0, len;
1218
1219 SCM_VALIDATE_STRING (1, str);
1220 if (SCM_UNBNDP (base_cs))
1221 cs = make_char_set (FUNC_NAME);
1222 else
1223 {
1224 SCM_VALIDATE_SMOB (2, base_cs, charset);
1225 cs = scm_char_set_copy (base_cs);
1226 }
63181a97
MV
1227 len = scm_i_string_length (str);
1228 while (k < len)
1229 {
f49dbcad
MG
1230 scm_t_wchar c = scm_i_string_ref (str, k++);
1231 SCM_CHARSET_SET (cs, c);
63181a97
MV
1232 }
1233 scm_remember_upto_here_1 (str);
1234 return cs;
1235}
1236#undef FUNC_NAME
1237
1238
1239SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0,
f49dbcad
MG
1240 (SCM str, SCM base_cs),
1241 "Convert the string @var{str} to a character set. The\n"
1242 "characters from the string are added to @var{base_cs}, and\n"
1243 "@var{base_cs} is returned.")
63181a97
MV
1244#define FUNC_NAME s_scm_string_to_char_set_x
1245{
63181a97
MV
1246 size_t k = 0, len;
1247
1248 SCM_VALIDATE_STRING (1, str);
1249 SCM_VALIDATE_SMOB (2, base_cs, charset);
63181a97
MV
1250 len = scm_i_string_length (str);
1251 while (k < len)
1252 {
f49dbcad
MG
1253 scm_t_wchar c = scm_i_string_ref (str, k++);
1254 SCM_CHARSET_SET (base_cs, c);
63181a97
MV
1255 }
1256 scm_remember_upto_here_1 (str);
1257 return base_cs;
1258}
1259#undef FUNC_NAME
1260
1261
1262SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0,
1263 (SCM pred, SCM cs, SCM base_cs),
1264 "Return a character set containing every character from @var{cs}\n"
1265 "so that it satisfies @var{pred}. If provided, the characters\n"
1266 "from @var{base_cs} are added to the result.")
1267#define FUNC_NAME s_scm_char_set_filter
1268{
1269 SCM ret;
1270 int k;
f49dbcad
MG
1271 scm_t_wchar n;
1272 scm_t_char_set *p;
63181a97
MV
1273
1274 SCM_VALIDATE_PROC (1, pred);
1275 SCM_VALIDATE_SMOB (2, cs, charset);
1276 if (!SCM_UNBNDP (base_cs))
1277 {
1278 SCM_VALIDATE_SMOB (3, base_cs, charset);
1279 ret = scm_char_set_copy (base_cs);
1280 }
1281 else
1282 ret = make_char_set (FUNC_NAME);
63181a97 1283
f49dbcad
MG
1284 p = SCM_CHARSET_DATA (cs);
1285
1286 if (p->len == 0)
1287 return ret;
1288
1289 for (k = 0; k < p->len; k++)
1290 for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
1291 {
1292 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1293
1294 if (scm_is_true (res))
1295 SCM_CHARSET_SET (ret, n);
1296 }
63181a97
MV
1297 return ret;
1298}
1299#undef FUNC_NAME
1300
1301
1302SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0,
1303 (SCM pred, SCM cs, SCM base_cs),
1304 "Return a character set containing every character from @var{cs}\n"
1305 "so that it satisfies @var{pred}. The characters are added to\n"
1306 "@var{base_cs} and @var{base_cs} is returned.")
1307#define FUNC_NAME s_scm_char_set_filter_x
1308{
1309 int k;
f49dbcad
MG
1310 scm_t_wchar n;
1311 scm_t_char_set *p;
63181a97
MV
1312
1313 SCM_VALIDATE_PROC (1, pred);
1314 SCM_VALIDATE_SMOB (2, cs, charset);
1315 SCM_VALIDATE_SMOB (3, base_cs, charset);
f49dbcad
MG
1316 p = SCM_CHARSET_DATA (cs);
1317 if (p->len == 0)
1318 return base_cs;
63181a97 1319
f49dbcad
MG
1320 for (k = 0; k < p->len; k++)
1321 for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
1322 {
bde543e8 1323 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
f49dbcad
MG
1324
1325 if (scm_is_true (res))
1326 SCM_CHARSET_SET (base_cs, n);
1327 }
63181a97
MV
1328 return base_cs;
1329}
1330#undef FUNC_NAME
1331
1332
91772d8f
MG
1333/* Return a character set containing all the characters from [LOWER,UPPER),
1334 giving range errors if ERROR, adding chars from BASE_CS, and recycling
1335 BASE_CS if REUSE is true. */
1336static SCM
1337scm_i_ucs_range_to_char_set (const char *FUNC_NAME, SCM lower, SCM upper,
1338 SCM error, SCM base_cs, int reuse)
63181a97
MV
1339{
1340 SCM cs;
1341 size_t clower, cupper;
63181a97
MV
1342
1343 clower = scm_to_size_t (lower);
91772d8f
MG
1344 cupper = scm_to_size_t (upper) - 1;
1345 SCM_ASSERT_RANGE (1, lower, clower >= 0);
1346 SCM_ASSERT_RANGE (2, upper, cupper >= 0);
63181a97
MV
1347 SCM_ASSERT_RANGE (2, upper, cupper >= clower);
1348 if (!SCM_UNBNDP (error))
1349 {
1350 if (scm_is_true (error))
f49dbcad
MG
1351 {
1352 SCM_ASSERT_RANGE (1, lower, SCM_IS_UNICODE_CHAR (clower));
1353 SCM_ASSERT_RANGE (2, upper, SCM_IS_UNICODE_CHAR (cupper));
91772d8f
MG
1354 if (clower < SCM_CODEPOINT_SURROGATE_START
1355 && cupper > SCM_CODEPOINT_SURROGATE_END)
1356 scm_error(scm_out_of_range_key,
1357 FUNC_NAME, "invalid range - contains surrogate characters: ~S to ~S",
1358 scm_list_2 (lower, upper), scm_list_1 (upper));
f49dbcad 1359 }
63181a97 1360 }
91772d8f 1361
63181a97
MV
1362 if (SCM_UNBNDP (base_cs))
1363 cs = make_char_set (FUNC_NAME);
1364 else
1365 {
1366 SCM_VALIDATE_SMOB (4, base_cs, charset);
91772d8f
MG
1367 if (reuse)
1368 cs = base_cs;
1369 else
1370 cs = scm_char_set_copy (base_cs);
63181a97 1371 }
91772d8f
MG
1372
1373 if ((clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END)
1374 && (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END))
1375 return cs;
1376
1377 if (clower > SCM_CODEPOINT_MAX)
1378 clower = SCM_CODEPOINT_MAX;
1379 if (clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END)
1380 clower = SCM_CODEPOINT_SURROGATE_END + 1;
1381 if (cupper > SCM_CODEPOINT_MAX)
1382 cupper = SCM_CODEPOINT_MAX;
1383 if (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END)
1384 cupper = SCM_CODEPOINT_SURROGATE_START - 1;
1385 if (clower < SCM_CODEPOINT_SURROGATE_START && cupper > SCM_CODEPOINT_SURROGATE_END)
63181a97 1386 {
91772d8f
MG
1387 scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, SCM_CODEPOINT_SURROGATE_START - 1);
1388 scm_i_charset_set_range (SCM_CHARSET_DATA (cs), SCM_CODEPOINT_SURROGATE_END + 1, cupper);
63181a97 1389 }
91772d8f
MG
1390 else
1391 scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, cupper);
63181a97
MV
1392 return cs;
1393}
91772d8f
MG
1394
1395SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0,
1396 (SCM lower, SCM upper, SCM error, SCM base_cs),
1397 "Return a character set containing all characters whose\n"
1398 "character codes lie in the half-open range\n"
1399 "[@var{lower},@var{upper}).\n"
1400 "\n"
1401 "If @var{error} is a true value, an error is signalled if the\n"
1402 "specified range contains characters which are not valid\n"
1403 "Unicode code points. If @var{error} is @code{#f},\n"
1404 "these characters are silently left out of the resultung\n"
1405 "character set.\n"
1406 "\n"
1407 "The characters in @var{base_cs} are added to the result, if\n"
1408 "given.")
1409#define FUNC_NAME s_scm_ucs_range_to_char_set
1410{
1411 return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper,
1412 error, base_cs, 0);
1413}
63181a97
MV
1414#undef FUNC_NAME
1415
1416
1417SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0,
1418 (SCM lower, SCM upper, SCM error, SCM base_cs),
1419 "Return a character set containing all characters whose\n"
1420 "character codes lie in the half-open range\n"
1421 "[@var{lower},@var{upper}).\n"
1422 "\n"
1423 "If @var{error} is a true value, an error is signalled if the\n"
1424 "specified range contains characters which are not contained in\n"
1425 "the implemented character range. If @var{error} is @code{#f},\n"
1426 "these characters are silently left out of the resultung\n"
1427 "character set.\n"
1428 "\n"
1429 "The characters are added to @var{base_cs} and @var{base_cs} is\n"
1430 "returned.")
1431#define FUNC_NAME s_scm_ucs_range_to_char_set_x
1432{
91772d8f
MG
1433 SCM_VALIDATE_SMOB (4, base_cs, charset);
1434 return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper,
1435 error, base_cs, 1);
63181a97
MV
1436}
1437#undef FUNC_NAME
1438
1439SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0,
1440 (SCM x),
1441 "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.")
1442#define FUNC_NAME s_scm_to_char_set
1443{
1444 if (scm_is_string (x))
1445 return scm_string_to_char_set (x, SCM_UNDEFINED);
1446 else if (SCM_CHARP (x))
1447 return scm_char_set (scm_list_1 (x));
1448 else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x))
1449 return x;
1450 else
1451 scm_wrong_type_arg (NULL, 0, x);
1452}
1453#undef FUNC_NAME
1454
1455SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0,
1456 (SCM cs),
1457 "Return the number of elements in character set @var{cs}.")
1458#define FUNC_NAME s_scm_char_set_size
1459{
1460 int k, count = 0;
f49dbcad 1461 scm_t_char_set *cs_data;
63181a97
MV
1462
1463 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
1464 cs_data = SCM_CHARSET_DATA (cs);
1465
1466 if (cs_data->len == 0)
1467 return scm_from_int (0);
1468
1469 for (k = 0; k < cs_data->len; k++)
1470 count += cs_data->ranges[k].hi - cs_data->ranges[k].lo + 1;
1471
1472 return scm_from_int (count);
63181a97
MV
1473}
1474#undef FUNC_NAME
1475
1476
1477SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0,
1478 (SCM pred, SCM cs),
1479 "Return the number of the elements int the character set\n"
1480 "@var{cs} which satisfy the predicate @var{pred}.")
1481#define FUNC_NAME s_scm_char_set_count
1482{
1483 int k, count = 0;
f49dbcad
MG
1484 scm_t_wchar n;
1485 scm_t_char_set *cs_data;
63181a97
MV
1486
1487 SCM_VALIDATE_PROC (1, pred);
1488 SCM_VALIDATE_SMOB (2, cs, charset);
f49dbcad
MG
1489 cs_data = SCM_CHARSET_DATA (cs);
1490 if (cs_data->len == 0)
1491 return scm_from_int (0);
63181a97 1492
f49dbcad
MG
1493 for (k = 0; k < cs_data->len; k++)
1494 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
63181a97 1495 {
f49dbcad
MG
1496 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1497 if (scm_is_true (res))
1498 count++;
63181a97
MV
1499 }
1500 return SCM_I_MAKINUM (count);
1501}
1502#undef FUNC_NAME
1503
1504
1505SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0,
1506 (SCM cs),
1507 "Return a list containing the elements of the character set\n"
1508 "@var{cs}.")
1509#define FUNC_NAME s_scm_char_set_to_list
1510{
1511 int k;
f49dbcad 1512 scm_t_wchar n;
63181a97 1513 SCM result = SCM_EOL;
f49dbcad 1514 scm_t_char_set *p;
63181a97
MV
1515
1516 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
1517 p = SCM_CHARSET_DATA (cs);
1518 if (p->len == 0)
1519 return SCM_EOL;
1520
1521 for (k = p->len - 1; k >= 0; k--)
1522 for (n = p->ranges[k].hi; n >= p->ranges[k].lo; n--)
1523 result = scm_cons (SCM_MAKE_CHAR (n), result);
63181a97
MV
1524 return result;
1525}
1526#undef FUNC_NAME
1527
1528
1529SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0,
1530 (SCM cs),
1531 "Return a string containing the elements of the character set\n"
1532 "@var{cs}. The order in which the characters are placed in the\n"
1533 "string is not defined.")
1534#define FUNC_NAME s_scm_char_set_to_string
1535{
1536 int k;
1537 int count = 0;
1538 int idx = 0;
f49dbcad 1539 int wide = 0;
63181a97 1540 SCM result;
f49dbcad
MG
1541 scm_t_wchar n;
1542 scm_t_char_set *cs_data;
1543 char *buf;
1544 scm_t_wchar *wbuf;
63181a97
MV
1545
1546 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
1547 cs_data = SCM_CHARSET_DATA (cs);
1548 if (cs_data->len == 0)
1549 return scm_nullstr;
1550
1551 if (cs_data->ranges[cs_data->len - 1].hi > 255)
1552 wide = 1;
1553
1554 count = scm_to_int (scm_char_set_size (cs));
1555 if (wide)
1556 result = scm_i_make_wide_string (count, &wbuf);
1557 else
1558 result = scm_i_make_string (count, &buf);
1559
1560 for (k = 0; k < cs_data->len; k++)
1561 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1562 {
1563 if (wide)
1564 wbuf[idx++] = n;
1565 else
1566 buf[idx++] = n;
1567 }
63181a97
MV
1568 return result;
1569}
1570#undef FUNC_NAME
1571
1572
1573SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0,
1574 (SCM cs, SCM ch),
1575 "Return @code{#t} iff the character @var{ch} is contained in the\n"
1576 "character set @var{cs}.")
1577#define FUNC_NAME s_scm_char_set_contains_p
1578{
1579 SCM_VALIDATE_SMOB (1, cs, charset);
1580 SCM_VALIDATE_CHAR (2, ch);
1581 return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch)));
1582}
1583#undef FUNC_NAME
1584
1585
1586SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0,
1587 (SCM pred, SCM cs),
1588 "Return a true value if every character in the character set\n"
1589 "@var{cs} satisfies the predicate @var{pred}.")
1590#define FUNC_NAME s_scm_char_set_every
1591{
1592 int k;
f49dbcad 1593 scm_t_wchar n;
63181a97 1594 SCM res = SCM_BOOL_T;
f49dbcad 1595 scm_t_char_set *cs_data;
63181a97
MV
1596
1597 SCM_VALIDATE_PROC (1, pred);
1598 SCM_VALIDATE_SMOB (2, cs, charset);
1599
f49dbcad
MG
1600 cs_data = SCM_CHARSET_DATA (cs);
1601 if (cs_data->len == 0)
1602 return SCM_BOOL_T;
1603
1604 for (k = 0; k < cs_data->len; k++)
1605 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
63181a97 1606 {
f49dbcad
MG
1607 res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1608 if (scm_is_false (res))
1609 return res;
63181a97 1610 }
f49dbcad 1611 return SCM_BOOL_T;
63181a97
MV
1612}
1613#undef FUNC_NAME
1614
1615
1616SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0,
1617 (SCM pred, SCM cs),
1618 "Return a true value if any character in the character set\n"
1619 "@var{cs} satisfies the predicate @var{pred}.")
1620#define FUNC_NAME s_scm_char_set_any
1621{
1622 int k;
f49dbcad
MG
1623 scm_t_wchar n;
1624 scm_t_char_set *cs_data;
63181a97
MV
1625
1626 SCM_VALIDATE_PROC (1, pred);
1627 SCM_VALIDATE_SMOB (2, cs, charset);
1628
693e7289
MG
1629 cs_data = SCM_CHARSET_DATA (cs);
1630 if (cs_data->len == 0)
1631 return SCM_BOOL_T;
f49dbcad
MG
1632
1633 for (k = 0; k < cs_data->len; k++)
1634 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
63181a97 1635 {
f49dbcad
MG
1636 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1637 if (scm_is_true (res))
1638 return res;
63181a97
MV
1639 }
1640 return SCM_BOOL_F;
1641}
1642#undef FUNC_NAME
1643
1644
1645SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1,
f49dbcad
MG
1646 (SCM cs, SCM rest),
1647 "Add all character arguments to the first argument, which must\n"
1648 "be a character set.")
63181a97
MV
1649#define FUNC_NAME s_scm_char_set_adjoin
1650{
63181a97
MV
1651 SCM_VALIDATE_SMOB (1, cs, charset);
1652 SCM_VALIDATE_REST_ARGUMENT (rest);
1653 cs = scm_char_set_copy (cs);
1654
d2e53ed6 1655 while (!scm_is_null (rest))
63181a97
MV
1656 {
1657 SCM chr = SCM_CAR (rest);
f49dbcad 1658 scm_t_wchar c;
63181a97
MV
1659
1660 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1661 rest = SCM_CDR (rest);
1662
f49dbcad 1663 SCM_CHARSET_SET (cs, c);
63181a97
MV
1664 }
1665 return cs;
1666}
1667#undef FUNC_NAME
1668
1669
1670SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1,
f49dbcad
MG
1671 (SCM cs, SCM rest),
1672 "Delete all character arguments from the first argument, which\n"
1673 "must be a character set.")
63181a97
MV
1674#define FUNC_NAME s_scm_char_set_delete
1675{
63181a97
MV
1676 SCM_VALIDATE_SMOB (1, cs, charset);
1677 SCM_VALIDATE_REST_ARGUMENT (rest);
1678 cs = scm_char_set_copy (cs);
1679
d2e53ed6 1680 while (!scm_is_null (rest))
63181a97
MV
1681 {
1682 SCM chr = SCM_CAR (rest);
f49dbcad 1683 scm_t_wchar c;
63181a97
MV
1684
1685 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1686 rest = SCM_CDR (rest);
1687
f49dbcad 1688 SCM_CHARSET_UNSET (cs, c);
63181a97
MV
1689 }
1690 return cs;
1691}
1692#undef FUNC_NAME
1693
1694
1695SCM_DEFINE (scm_char_set_adjoin_x, "char-set-adjoin!", 1, 0, 1,
f49dbcad
MG
1696 (SCM cs, SCM rest),
1697 "Add all character arguments to the first argument, which must\n"
1698 "be a character set.")
63181a97
MV
1699#define FUNC_NAME s_scm_char_set_adjoin_x
1700{
63181a97
MV
1701 SCM_VALIDATE_SMOB (1, cs, charset);
1702 SCM_VALIDATE_REST_ARGUMENT (rest);
1703
d2e53ed6 1704 while (!scm_is_null (rest))
63181a97
MV
1705 {
1706 SCM chr = SCM_CAR (rest);
f49dbcad 1707 scm_t_wchar c;
63181a97
MV
1708
1709 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1710 rest = SCM_CDR (rest);
1711
f49dbcad 1712 SCM_CHARSET_SET (cs, c);
63181a97
MV
1713 }
1714 return cs;
1715}
1716#undef FUNC_NAME
1717
1718
1719SCM_DEFINE (scm_char_set_delete_x, "char-set-delete!", 1, 0, 1,
f49dbcad
MG
1720 (SCM cs, SCM rest),
1721 "Delete all character arguments from the first argument, which\n"
1722 "must be a character set.")
63181a97
MV
1723#define FUNC_NAME s_scm_char_set_delete_x
1724{
63181a97
MV
1725 SCM_VALIDATE_SMOB (1, cs, charset);
1726 SCM_VALIDATE_REST_ARGUMENT (rest);
1727
d2e53ed6 1728 while (!scm_is_null (rest))
63181a97
MV
1729 {
1730 SCM chr = SCM_CAR (rest);
f49dbcad 1731 scm_t_wchar c;
63181a97
MV
1732
1733 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1734 rest = SCM_CDR (rest);
1735
f49dbcad 1736 SCM_CHARSET_UNSET (cs, c);
63181a97
MV
1737 }
1738 return cs;
1739}
1740#undef FUNC_NAME
1741
1742
1743SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0,
f49dbcad 1744 (SCM cs), "Return the complement of the character set @var{cs}.")
63181a97
MV
1745#define FUNC_NAME s_scm_char_set_complement
1746{
63181a97 1747 SCM res;
f49dbcad 1748 scm_t_char_set *p, *q;
63181a97
MV
1749
1750 SCM_VALIDATE_SMOB (1, cs, charset);
1751
1752 res = make_char_set (FUNC_NAME);
f49dbcad
MG
1753 p = SCM_CHARSET_DATA (res);
1754 q = SCM_CHARSET_DATA (cs);
1755
1756 charsets_complement (p, q);
63181a97
MV
1757 return res;
1758}
1759#undef FUNC_NAME
1760
1761
1762SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1,
1763 (SCM rest),
1764 "Return the union of all argument character sets.")
1765#define FUNC_NAME s_scm_char_set_union
1766{
1767 int c = 1;
1768 SCM res;
f49dbcad 1769 scm_t_char_set *p;
63181a97
MV
1770
1771 SCM_VALIDATE_REST_ARGUMENT (rest);
1772
1773 res = make_char_set (FUNC_NAME);
f49dbcad 1774 p = SCM_CHARSET_DATA (res);
d2e53ed6 1775 while (!scm_is_null (rest))
63181a97 1776 {
63181a97
MV
1777 SCM cs = SCM_CAR (rest);
1778 SCM_VALIDATE_SMOB (c, cs, charset);
1779 c++;
1780 rest = SCM_CDR (rest);
1781
f49dbcad
MG
1782
1783 charsets_union (p, (scm_t_char_set *) SCM_SMOB_DATA (cs));
63181a97
MV
1784 }
1785 return res;
1786}
1787#undef FUNC_NAME
1788
1789
1790SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1,
1791 (SCM rest),
1792 "Return the intersection of all argument character sets.")
1793#define FUNC_NAME s_scm_char_set_intersection
1794{
1795 SCM res;
1796
1797 SCM_VALIDATE_REST_ARGUMENT (rest);
1798
d2e53ed6 1799 if (scm_is_null (rest))
63181a97
MV
1800 res = make_char_set (FUNC_NAME);
1801 else
1802 {
f49dbcad 1803 scm_t_char_set *p;
63181a97
MV
1804 int argnum = 2;
1805
1806 res = scm_char_set_copy (SCM_CAR (rest));
f49dbcad 1807 p = SCM_CHARSET_DATA (res);
63181a97
MV
1808 rest = SCM_CDR (rest);
1809
d2e53ed6 1810 while (scm_is_pair (rest))
f49dbcad
MG
1811 {
1812 SCM cs = SCM_CAR (rest);
1813 scm_t_char_set *cs_data;
1814
1815 SCM_VALIDATE_SMOB (argnum, cs, charset);
1816 argnum++;
1817 cs_data = SCM_CHARSET_DATA (cs);
1818 rest = SCM_CDR (rest);
1819 charsets_intersection (p, cs_data);
1820 }
63181a97
MV
1821 }
1822
1823 return res;
1824}
1825#undef FUNC_NAME
1826
1827
1828SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1,
1829 (SCM cs1, SCM rest),
1830 "Return the difference of all argument character sets.")
1831#define FUNC_NAME s_scm_char_set_difference
1832{
1833 int c = 2;
f49dbcad
MG
1834 SCM res, compl;
1835 scm_t_char_set *p, *q;
63181a97
MV
1836
1837 SCM_VALIDATE_SMOB (1, cs1, charset);
1838 SCM_VALIDATE_REST_ARGUMENT (rest);
1839
1840 res = scm_char_set_copy (cs1);
f49dbcad
MG
1841 p = SCM_CHARSET_DATA (res);
1842 compl = make_char_set (FUNC_NAME);
1843 q = SCM_CHARSET_DATA (compl);
d2e53ed6 1844 while (!scm_is_null (rest))
63181a97 1845 {
63181a97
MV
1846 SCM cs = SCM_CAR (rest);
1847 SCM_VALIDATE_SMOB (c, cs, charset);
1848 c++;
1849 rest = SCM_CDR (rest);
1850
f49dbcad
MG
1851 charsets_complement (q, SCM_CHARSET_DATA (cs));
1852 charsets_intersection (p, q);
63181a97
MV
1853 }
1854 return res;
1855}
1856#undef FUNC_NAME
1857
1858
1859SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1,
1860 (SCM rest),
1861 "Return the exclusive-or of all argument character sets.")
1862#define FUNC_NAME s_scm_char_set_xor
1863{
1864 SCM res;
1865
1866 SCM_VALIDATE_REST_ARGUMENT (rest);
1867
d2e53ed6 1868 if (scm_is_null (rest))
63181a97
MV
1869 res = make_char_set (FUNC_NAME);
1870 else
1871 {
1872 int argnum = 2;
f49dbcad 1873 scm_t_char_set *p;
63181a97
MV
1874
1875 res = scm_char_set_copy (SCM_CAR (rest));
f49dbcad 1876 p = SCM_CHARSET_DATA (res);
63181a97
MV
1877 rest = SCM_CDR (rest);
1878
d2e53ed6 1879 while (scm_is_pair (rest))
f49dbcad
MG
1880 {
1881 SCM cs = SCM_CAR (rest);
1882 scm_t_char_set *cs_data;
1883
1884 SCM_VALIDATE_SMOB (argnum, cs, charset);
1885 argnum++;
1886 cs_data = SCM_CHARSET_DATA (cs);
1887 rest = SCM_CDR (rest);
1888
1889 charsets_xor (p, cs_data);
1890 }
63181a97
MV
1891 }
1892 return res;
1893}
1894#undef FUNC_NAME
1895
1896
1897SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1, 0, 1,
1898 (SCM cs1, SCM rest),
1899 "Return the difference and the intersection of all argument\n"
1900 "character sets.")
1901#define FUNC_NAME s_scm_char_set_diff_plus_intersection
1902{
1903 int c = 2;
1904 SCM res1, res2;
f49dbcad 1905 scm_t_char_set *p, *q;
63181a97
MV
1906
1907 SCM_VALIDATE_SMOB (1, cs1, charset);
1908 SCM_VALIDATE_REST_ARGUMENT (rest);
1909
1910 res1 = scm_char_set_copy (cs1);
1911 res2 = make_char_set (FUNC_NAME);
f49dbcad
MG
1912 p = SCM_CHARSET_DATA (res1);
1913 q = SCM_CHARSET_DATA (res2);
d2e53ed6 1914 while (!scm_is_null (rest))
63181a97 1915 {
63181a97 1916 SCM cs = SCM_CAR (rest);
f49dbcad 1917 scm_t_char_set *r;
63181a97
MV
1918
1919 SCM_VALIDATE_SMOB (c, cs, charset);
1920 c++;
f49dbcad 1921 r = SCM_CHARSET_DATA (cs);
63181a97 1922
f49dbcad
MG
1923 charsets_union (q, r);
1924 charsets_intersection (p, r);
63181a97
MV
1925 rest = SCM_CDR (rest);
1926 }
1927 return scm_values (scm_list_2 (res1, res2));
1928}
1929#undef FUNC_NAME
1930
1931
1932SCM_DEFINE (scm_char_set_complement_x, "char-set-complement!", 1, 0, 0,
f49dbcad 1933 (SCM cs), "Return the complement of the character set @var{cs}.")
63181a97
MV
1934#define FUNC_NAME s_scm_char_set_complement_x
1935{
63181a97 1936 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad 1937 cs = scm_char_set_complement (cs);
63181a97
MV
1938 return cs;
1939}
1940#undef FUNC_NAME
1941
1942
1943SCM_DEFINE (scm_char_set_union_x, "char-set-union!", 1, 0, 1,
f49dbcad
MG
1944 (SCM cs1, SCM rest),
1945 "Return the union of all argument character sets.")
63181a97
MV
1946#define FUNC_NAME s_scm_char_set_union_x
1947{
63181a97
MV
1948 SCM_VALIDATE_SMOB (1, cs1, charset);
1949 SCM_VALIDATE_REST_ARGUMENT (rest);
1950
f49dbcad 1951 cs1 = scm_char_set_union (scm_cons (cs1, rest));
63181a97
MV
1952 return cs1;
1953}
1954#undef FUNC_NAME
1955
1956
1957SCM_DEFINE (scm_char_set_intersection_x, "char-set-intersection!", 1, 0, 1,
f49dbcad
MG
1958 (SCM cs1, SCM rest),
1959 "Return the intersection of all argument character sets.")
63181a97
MV
1960#define FUNC_NAME s_scm_char_set_intersection_x
1961{
63181a97
MV
1962 SCM_VALIDATE_SMOB (1, cs1, charset);
1963 SCM_VALIDATE_REST_ARGUMENT (rest);
1964
f49dbcad 1965 cs1 = scm_char_set_intersection (scm_cons (cs1, rest));
63181a97
MV
1966 return cs1;
1967}
1968#undef FUNC_NAME
1969
1970
1971SCM_DEFINE (scm_char_set_difference_x, "char-set-difference!", 1, 0, 1,
f49dbcad
MG
1972 (SCM cs1, SCM rest),
1973 "Return the difference of all argument character sets.")
63181a97
MV
1974#define FUNC_NAME s_scm_char_set_difference_x
1975{
63181a97
MV
1976 SCM_VALIDATE_SMOB (1, cs1, charset);
1977 SCM_VALIDATE_REST_ARGUMENT (rest);
1978
f49dbcad 1979 cs1 = scm_char_set_difference (cs1, rest);
63181a97
MV
1980 return cs1;
1981}
1982#undef FUNC_NAME
1983
1984
1985SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1,
1986 (SCM cs1, SCM rest),
1987 "Return the exclusive-or of all argument character sets.")
1988#define FUNC_NAME s_scm_char_set_xor_x
1989{
1990 /* a side-effecting variant should presumably give consistent results:
1991 (define a (char-set #\a))
1992 (char-set-xor a a a) -> char set #\a
1993 (char-set-xor! a a a) -> char set #\a
f49dbcad 1994 */
7165abeb
MG
1995 cs1 = scm_char_set_xor (scm_cons (cs1, rest));
1996 return cs1;
63181a97
MV
1997}
1998#undef FUNC_NAME
1999
2000
f49dbcad
MG
2001SCM_DEFINE (scm_char_set_diff_plus_intersection_x,
2002 "char-set-diff+intersection!", 2, 0, 1, (SCM cs1, SCM cs2,
2003 SCM rest),
2004 "Return the difference and the intersection of all argument\n"
2005 "character sets.")
63181a97
MV
2006#define FUNC_NAME s_scm_char_set_diff_plus_intersection_x
2007{
f49dbcad 2008 SCM diff, intersect;
63181a97 2009
f49dbcad
MG
2010 diff = scm_char_set_difference (cs1, scm_cons (cs2, rest));
2011 intersect =
2012 scm_char_set_intersection (scm_cons (cs1, scm_cons (cs2, rest)));
2013 cs1 = diff;
2014 cs2 = intersect;
63181a97
MV
2015 return scm_values (scm_list_2 (cs1, cs2));
2016}
f49dbcad 2017#undef FUNC_NAME
d5ecf579 2018
a17d2654 2019\f
f49dbcad 2020
a17d2654
LC
2021/* Standard character sets. */
2022
63181a97
MV
2023SCM scm_char_set_lower_case;
2024SCM scm_char_set_upper_case;
2025SCM scm_char_set_title_case;
2026SCM scm_char_set_letter;
2027SCM scm_char_set_digit;
2028SCM scm_char_set_letter_and_digit;
2029SCM scm_char_set_graphic;
2030SCM scm_char_set_printing;
2031SCM scm_char_set_whitespace;
2032SCM scm_char_set_iso_control;
2033SCM scm_char_set_punctuation;
2034SCM scm_char_set_symbol;
2035SCM scm_char_set_hex_digit;
2036SCM scm_char_set_blank;
2037SCM scm_char_set_ascii;
2038SCM scm_char_set_empty;
2039SCM scm_char_set_full;
2040
63181a97 2041
a17d2654
LC
2042/* Create an empty character set and return it after binding it to NAME. */
2043static inline SCM
f49dbcad 2044define_charset (const char *name, const scm_t_char_set *p)
63181a97 2045{
f49dbcad
MG
2046 SCM cs;
2047
2048 SCM_NEWSMOB (cs, scm_tc16_charset, p);
63181a97
MV
2049 scm_c_define (name, cs);
2050 return scm_permanent_object (cs);
2051}
2052
f49dbcad
MG
2053#ifdef SCM_CHARSET_DEBUG
2054SCM_DEFINE (scm_debug_char_set, "debug-char-set", 1, 0, 0,
2055 (SCM charset),
2056 "Print out the internal C structure of @var{charset}.\n")
d5ecf579 2057#define FUNC_NAME s_scm_debug_char_set
63181a97 2058{
f49dbcad
MG
2059 int i;
2060 scm_t_char_set *cs = SCM_CHARSET_DATA (charset);
2061 fprintf (stderr, "cs %p\n", cs);
2062 fprintf (stderr, "len %d\n", cs->len);
2063 fprintf (stderr, "arr %p\n", cs->ranges);
2064 for (i = 0; i < cs->len; i++)
63181a97 2065 {
f49dbcad
MG
2066 if (cs->ranges[i].lo == cs->ranges[i].hi)
2067 fprintf (stderr, "%04x\n", cs->ranges[i].lo);
2068 else
2069 fprintf (stderr, "%04x..%04x\t[%d]\n",
2070 cs->ranges[i].lo,
2071 cs->ranges[i].hi, cs->ranges[i].hi - cs->ranges[i].lo + 1);
63181a97 2072 }
f49dbcad
MG
2073 printf ("\n");
2074 return SCM_UNSPECIFIED;
63181a97 2075}
f49dbcad 2076#undef FUNC_NAME
d5ecf579 2077#endif /* SCM_CHARSET_DEBUG */
a17d2654 2078\f
f49dbcad
MG
2079
2080
63181a97
MV
2081void
2082scm_init_srfi_14 (void)
2083{
f49dbcad 2084 scm_tc16_charset = scm_make_smob_type ("character-set", 0);
63181a97
MV
2085 scm_set_smob_free (scm_tc16_charset, charset_free);
2086 scm_set_smob_print (scm_tc16_charset, charset_print);
2087
f49dbcad
MG
2088 scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0);
2089 scm_set_smob_free (scm_tc16_charset_cursor, charset_cursor_free);
2090 scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print);
2091
2092 scm_char_set_upper_case =
2093 define_charset ("char-set:upper-case", &cs_upper_case);
2094 scm_char_set_lower_case =
2095 define_charset ("char-set:lower-case", &cs_lower_case);
2096 scm_char_set_title_case =
2097 define_charset ("char-set:title-case", &cs_title_case);
2098 scm_char_set_letter = define_charset ("char-set:letter", &cs_letter);
2099 scm_char_set_digit = define_charset ("char-set:digit", &cs_digit);
2100 scm_char_set_letter_and_digit =
2101 define_charset ("char-set:letter+digit", &cs_letter_plus_digit);
2102 scm_char_set_graphic = define_charset ("char-set:graphic", &cs_graphic);
2103 scm_char_set_printing = define_charset ("char-set:printing", &cs_printing);
2104 scm_char_set_whitespace =
2105 define_charset ("char-set:whitespace", &cs_whitespace);
2106 scm_char_set_iso_control =
2107 define_charset ("char-set:iso-control", &cs_iso_control);
2108 scm_char_set_punctuation =
2109 define_charset ("char-set:punctuation", &cs_punctuation);
2110 scm_char_set_symbol = define_charset ("char-set:symbol", &cs_symbol);
2111 scm_char_set_hex_digit =
2112 define_charset ("char-set:hex-digit", &cs_hex_digit);
2113 scm_char_set_blank = define_charset ("char-set:blank", &cs_blank);
2114 scm_char_set_ascii = define_charset ("char-set:ascii", &cs_ascii);
2115 scm_char_set_empty = define_charset ("char-set:empty", &cs_empty);
2116 scm_char_set_full = define_charset ("char-set:full", &cs_full);
63181a97
MV
2117
2118#include "libguile/srfi-14.x"
2119}
2120
2121/* End of srfi-14.c. */