defconst, defvar: proclaim special at compile-time
[bpt/guile.git] / libguile / srfi-14.c
CommitLineData
63181a97
MV
1/* srfi-14.c --- SRFI-14 procedures for Guile
2 *
4a655e50 3 * Copyright (C) 2001, 2004, 2006, 2007, 2009, 2011 Free Software Foundation, Inc.
63181a97
MV
4 *
5 * This library is free software; you can redistribute it and/or
53befeb7
NJ
6 * modify it under the terms of the GNU Lesser General Public License
7 * as published by the Free Software Foundation; either version 3 of
8 * the License, or (at your option) any later version.
63181a97 9 *
53befeb7
NJ
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
63181a97
MV
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
53befeb7
NJ
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301 USA
63181a97
MV
19 */
20
a17d2654
LC
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
24
25
63181a97 26#include <string.h>
f49dbcad 27#include <unictype.h>
63181a97
MV
28
29#include "libguile.h"
30#include "libguile/srfi-14.h"
f49dbcad 31#include "libguile/strings.h"
24d23822 32#include "libguile/chars.h"
63181a97 33
f49dbcad
MG
34/* Include the pre-computed standard charset data. */
35#include "libguile/srfi-14.i.c"
63181a97 36
719bb8cd
MG
37scm_t_char_range cs_full_ranges[] = {
38 {0x0000, SCM_CODEPOINT_SURROGATE_START - 1}
39 ,
40 {SCM_CODEPOINT_SURROGATE_END + 1, SCM_CODEPOINT_MAX}
41};
42
43scm_t_char_set cs_full = {
44 2,
45 cs_full_ranges
46};
47
48
f49dbcad 49#define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset))
63181a97 50
f49dbcad
MG
51#define SCM_CHARSET_SET(cs, idx) \
52 scm_i_charset_set (SCM_CHARSET_DATA (cs), idx)
63181a97 53
f49dbcad
MG
54#define SCM_CHARSET_UNSET(cs, idx) \
55 scm_i_charset_unset (SCM_CHARSET_DATA (cs), idx)
63181a97
MV
56
57/* Smob type code for character sets. */
58int scm_tc16_charset = 0;
f49dbcad
MG
59int scm_tc16_charset_cursor = 0;
60
61/* True if N exists in charset CS. */
62int
63scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n)
64{
65 size_t i;
66
67 i = 0;
68 while (i < cs->len)
69 {
70 if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi)
71 return 1;
72 i++;
73 }
74
75 return 0;
76}
77
78/* Put N into charset CS. */
79void
80scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n)
81{
82 size_t i;
83 size_t len;
84
85 len = cs->len;
86
87 i = 0;
88 while (i < len)
89 {
90 /* Already in this range */
91 if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi)
92 {
93 return;
94 }
95
96 if (n == cs->ranges[i].lo - 1)
97 {
98 /* This char is one below the current range. */
99 if (i > 0 && cs->ranges[i - 1].hi + 1 == n)
100 {
08ed8058
MG
101 /* It is also one above the previous range. */
102 /* This is an impossible condition: in the previous
103 iteration, the test for 'one above the current range'
104 should already have inserted the character here. */
105 abort ();
f49dbcad
MG
106 }
107 else
108 {
109 /* Expand the range down by one. */
110 cs->ranges[i].lo = n;
111 return;
112 }
113 }
114 else if (n == cs->ranges[i].hi + 1)
115 {
116 /* This char is one above the current range. */
117 if (i < len - 1 && cs->ranges[i + 1].lo - 1 == n)
118 {
119 /* It is also one below the next range, so combine them. */
120 cs->ranges[i].hi = cs->ranges[i + 1].hi;
121 if (i < len - 2)
122 memmove (cs->ranges + (i + 1), cs->ranges + (i + 2),
123 sizeof (scm_t_char_range) * (len - i - 2));
124 cs->ranges = scm_gc_realloc (cs->ranges,
125 sizeof (scm_t_char_range) * len,
126 sizeof (scm_t_char_range) * (len -
127 1),
128 "character-set");
129 cs->len = len - 1;
130 return;
131 }
132 else
133 {
134 /* Expand the range up by one. */
135 cs->ranges[i].hi = n;
136 return;
137 }
138 }
139 else if (n < cs->ranges[i].lo - 1)
140 {
141 /* This is a new range below the current one. */
142 cs->ranges = scm_gc_realloc (cs->ranges,
143 sizeof (scm_t_char_range) * len,
144 sizeof (scm_t_char_range) * (len + 1),
145 "character-set");
146 memmove (cs->ranges + (i + 1), cs->ranges + i,
147 sizeof (scm_t_char_range) * (len - i));
148 cs->ranges[i].lo = n;
149 cs->ranges[i].hi = n;
150 cs->len = len + 1;
151 return;
152 }
153
154 i++;
155 }
156
157 /* This is a new range above all previous ranges. */
158 if (len == 0)
159 {
160 cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set");
161 }
162 else
163 {
164 cs->ranges = scm_gc_realloc (cs->ranges,
165 sizeof (scm_t_char_range) * len,
166 sizeof (scm_t_char_range) * (len + 1),
167 "character-set");
168 }
169 cs->ranges[len].lo = n;
170 cs->ranges[len].hi = n;
171 cs->len = len + 1;
172
173 return;
174}
175
91772d8f
MG
176/* Put LO to HI inclusive into charset CS. */
177static void
178scm_i_charset_set_range (scm_t_char_set *cs, scm_t_wchar lo, scm_t_wchar hi)
179{
180 size_t i;
181
182 i = 0;
183 while (i < cs->len)
184 {
185 /* Already in this range */
186 if (cs->ranges[i].lo <= lo && cs->ranges[i].hi >= hi)
187 return;
188
189 /* cur: +---+
190 new: +---+
191 */
192 if (cs->ranges[i].lo - 1 > hi)
193 {
194 /* Add a new range below the current one. */
195 cs->ranges = scm_gc_realloc (cs->ranges,
196 sizeof (scm_t_char_range) * cs->len,
197 sizeof (scm_t_char_range) * (cs->len + 1),
198 "character-set");
199 memmove (cs->ranges + (i + 1), cs->ranges + i,
200 sizeof (scm_t_char_range) * (cs->len - i));
201 cs->ranges[i].lo = lo;
202 cs->ranges[i].hi = hi;
203 cs->len += 1;
204 return;
205 }
206
207 /* cur: +---+ or +---+ or +---+
208 new: +---+ +---+ +---+
209 */
210 if (cs->ranges[i].lo > lo
211 && (cs->ranges[i].lo - 1 <= hi && cs->ranges[i].hi >= hi))
212 {
213 cs->ranges[i].lo = lo;
214 return;
215 }
216
217 /* cur: +---+ or +---+ or +---+
218 new: +---+ +---+ +---+
219 */
220 else if (cs->ranges[i].hi + 1 >= lo && cs->ranges[i].hi < hi)
221 {
222 if (cs->ranges[i].lo > lo)
223 cs->ranges[i].lo = lo;
224 if (cs->ranges[i].hi < hi)
225 cs->ranges[i].hi = hi;
226 while (i < cs->len - 1)
227 {
228 /* cur: --+ +---+
229 new: -----+
230 */
231 if (cs->ranges[i + 1].lo - 1 > hi)
232 break;
233
234 /* cur: --+ +---+ or --+ +---+ or --+ +--+
235 new: -----+ ------+ ---------+
236 */
237 /* Combine this range with the previous one. */
238 if (cs->ranges[i + 1].hi > hi)
239 cs->ranges[i].hi = cs->ranges[i + 1].hi;
240 if (i + 1 < cs->len)
241 memmove (cs->ranges + i + 1, cs->ranges + i + 2,
242 sizeof (scm_t_char_range) * (cs->len - i - 2));
243 cs->ranges = scm_gc_realloc (cs->ranges,
244 sizeof (scm_t_char_range) * cs->len,
245 sizeof (scm_t_char_range) * (cs->len - 1),
246 "character-set");
247 cs->len -= 1;
248 }
249 return;
250 }
251 i ++;
252 }
253
254 /* This is a new range above all previous ranges. */
255 if (cs->len == 0)
256 {
257 cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set");
258 }
259 else
260 {
261 cs->ranges = scm_gc_realloc (cs->ranges,
262 sizeof (scm_t_char_range) * cs->len,
263 sizeof (scm_t_char_range) * (cs->len + 1),
264 "character-set");
265 }
266 cs->len += 1;
267 cs->ranges[cs->len - 1].lo = lo;
268 cs->ranges[cs->len - 1].hi = hi;
269
270 return;
271}
272
f49dbcad
MG
273/* If N is in charset CS, remove it. */
274void
275scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n)
276{
277 size_t i;
278 size_t len;
279
280 len = cs->len;
281
282 i = 0;
283 while (i < len)
284 {
285 if (n < cs->ranges[i].lo)
286 /* Not in this set. */
287 return;
288
289 if (n == cs->ranges[i].lo && n == cs->ranges[i].hi)
290 {
291 /* Remove this one-character range. */
292 if (len == 1)
293 {
294 scm_gc_free (cs->ranges,
295 sizeof (scm_t_char_range) * cs->len,
296 "character-set");
297 cs->ranges = NULL;
298 cs->len = 0;
299 return;
300 }
301 else if (i < len - 1)
302 {
303 memmove (cs->ranges + i, cs->ranges + (i + 1),
304 sizeof (scm_t_char_range) * (len - i - 1));
305 cs->ranges = scm_gc_realloc (cs->ranges,
306 sizeof (scm_t_char_range) * len,
307 sizeof (scm_t_char_range) * (len -
308 1),
309 "character-set");
310 cs->len = len - 1;
311 return;
312 }
313 else if (i == len - 1)
314 {
315 cs->ranges = scm_gc_realloc (cs->ranges,
316 sizeof (scm_t_char_range) * len,
317 sizeof (scm_t_char_range) * (len -
318 1),
319 "character-set");
320 cs->len = len - 1;
321 return;
322 }
323 }
324 else if (n == cs->ranges[i].lo)
325 {
326 /* Shrink this range from the left. */
327 cs->ranges[i].lo = n + 1;
328 return;
329 }
330 else if (n == cs->ranges[i].hi)
331 {
332 /* Shrink this range from the right. */
333 cs->ranges[i].hi = n - 1;
334 return;
335 }
336 else if (n > cs->ranges[i].lo && n < cs->ranges[i].hi)
337 {
338 /* Split this range into two pieces. */
339 cs->ranges = scm_gc_realloc (cs->ranges,
340 sizeof (scm_t_char_range) * len,
341 sizeof (scm_t_char_range) * (len + 1),
342 "character-set");
343 if (i < len - 1)
344 memmove (cs->ranges + (i + 2), cs->ranges + (i + 1),
345 sizeof (scm_t_char_range) * (len - i - 1));
346 cs->ranges[i + 1].hi = cs->ranges[i].hi;
347 cs->ranges[i + 1].lo = n + 1;
348 cs->ranges[i].hi = n - 1;
349 cs->len = len + 1;
350 return;
351 }
352
353 i++;
354 }
355
356 /* This value is above all ranges, so do nothing here. */
357 return;
358}
359
360static int
361charsets_equal (scm_t_char_set *a, scm_t_char_set *b)
362{
363 if (a->len != b->len)
364 return 0;
365
366 if (memcmp (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len) != 0)
367 return 0;
368
369 return 1;
370}
371
372/* Return true if every character in A is also in B. */
373static int
374charsets_leq (scm_t_char_set *a, scm_t_char_set *b)
375{
376 size_t i = 0, j = 0;
377 scm_t_wchar alo, ahi;
378
379 if (a->len == 0)
380 return 1;
381 if (b->len == 0)
382 return 0;
383 while (i < a->len)
384 {
385 alo = a->ranges[i].lo;
386 ahi = a->ranges[i].hi;
387 while (b->ranges[j].hi < alo)
388 {
389 if (j < b->len - 1)
390 j++;
391 else
392 return 0;
393 }
394 if (alo < b->ranges[j].lo || ahi > b->ranges[j].hi)
395 return 0;
396 i++;
397 }
398
399 return 1;
400}
401
402/* Merge B into A. */
403static void
404charsets_union (scm_t_char_set *a, scm_t_char_set *b)
405{
406 size_t i = 0;
aff31b0f 407 scm_t_wchar blo, bhi;
f49dbcad
MG
408
409 if (b->len == 0)
410 return;
411
412 if (a->len == 0)
413 {
414 a->len = b->len;
415 a->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * b->len,
416 "character-set");
417 memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * b->len);
418 return;
419 }
420
f49dbcad
MG
421 while (i < b->len)
422 {
423 blo = b->ranges[i].lo;
424 bhi = b->ranges[i].hi;
aff31b0f 425 scm_i_charset_set_range (a, blo, bhi);
f49dbcad
MG
426
427 i++;
428 }
429
430 return;
431}
432
433/* Remove elements not both in A and B from A. */
434static void
435charsets_intersection (scm_t_char_set *a, scm_t_char_set *b)
436{
437 size_t i = 0;
438 scm_t_wchar blo, bhi, n;
439 scm_t_char_set *c;
440
441 if (a->len == 0)
442 return;
443
444 if (b->len == 0)
445 {
446 scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len,
447 "character-set");
448 a->len = 0;
449 return;
450 }
451
452 c = (scm_t_char_set *) scm_malloc (sizeof (scm_t_char_set));
453 c->len = 0;
454 c->ranges = NULL;
455
456 while (i < b->len)
457 {
458 blo = b->ranges[i].lo;
459 bhi = b->ranges[i].hi;
460 for (n = blo; n <= bhi; n++)
461 if (scm_i_charset_get (a, n))
462 scm_i_charset_set (c, n);
463 i++;
464 }
465 scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len,
466 "character-set");
467
468 a->len = c->len;
469 if (c->len != 0)
470 a->ranges = c->ranges;
471 else
472 a->ranges = NULL;
473 free (c);
474 return;
475}
476
f4cdfe61
MG
477#define SCM_ADD_RANGE(low, high) \
478 do { \
479 p->ranges[idx].lo = (low); \
480 p->ranges[idx++].hi = (high); \
481 } while (0)
482#define SCM_ADD_RANGE_SKIP_SURROGATES(low, high) \
483 do { \
484 p->ranges[idx].lo = (low); \
485 p->ranges[idx++].hi = SCM_CODEPOINT_SURROGATE_START - 1; \
486 p->ranges[idx].lo = SCM_CODEPOINT_SURROGATE_END + 1; \
487 p->ranges[idx++].hi = (high); \
488 } while (0)
489
490
491
f49dbcad
MG
492/* Make P the compelement of Q. */
493static void
494charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
495{
496 int k, idx;
497
f4cdfe61 498 idx = 0;
f49dbcad
MG
499 if (q->len == 0)
500 {
501 /* Fill with all valid codepoints. */
502 p->len = 2;
503 p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
504 "character-set");
f4cdfe61 505 SCM_ADD_RANGE_SKIP_SURROGATES (0, SCM_CODEPOINT_MAX);
f49dbcad
MG
506 return;
507 }
508
509 if (p->len > 0)
510 scm_gc_free (p->ranges, sizeof (scm_t_char_set) * p->len,
511 "character-set");
512
f4cdfe61 513 /* Count the number of ranges needed for the output. */
f49dbcad
MG
514 p->len = 0;
515 if (q->ranges[0].lo > 0)
516 p->len++;
517 if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
518 p->len++;
f4cdfe61 519 p->len += q->len;
f49dbcad
MG
520 p->ranges =
521 (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * p->len,
522 "character-set");
f49dbcad
MG
523 if (q->ranges[0].lo > 0)
524 {
f4cdfe61
MG
525 if (q->ranges[0].lo > SCM_CODEPOINT_SURROGATE_END)
526 SCM_ADD_RANGE_SKIP_SURROGATES (0, q->ranges[0].lo - 1);
527 else
528 SCM_ADD_RANGE (0, q->ranges[0].lo - 1);
f49dbcad
MG
529 }
530 for (k = 1; k < q->len; k++)
531 {
f4cdfe61
MG
532 if (q->ranges[k - 1].hi < SCM_CODEPOINT_SURROGATE_START
533 && q->ranges[k].lo - 1 > SCM_CODEPOINT_SURROGATE_END)
534 SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
535 else
536 SCM_ADD_RANGE (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
f49dbcad
MG
537 }
538 if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
539 {
f4cdfe61
MG
540 if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_SURROGATE_START)
541 SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
542 else
543 SCM_ADD_RANGE (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
f49dbcad
MG
544 }
545 return;
546}
f4cdfe61
MG
547#undef SCM_ADD_RANGE
548#undef SCM_ADD_RANGE_SKIP_SURROGATES
f49dbcad
MG
549
550/* Replace A with elements only found in one of A or B. */
551static void
552charsets_xor (scm_t_char_set *a, scm_t_char_set *b)
553{
554 size_t i = 0;
555 scm_t_wchar blo, bhi, n;
556
557 if (b->len == 0)
558 {
559 return;
560 }
63181a97 561
f49dbcad
MG
562 if (a->len == 0)
563 {
564 a->ranges =
565 (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) *
566 b->len, "character-set");
567 a->len = b->len;
568 memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len);
569 return;
570 }
571
572 while (i < b->len)
573 {
574 blo = b->ranges[i].lo;
575 bhi = b->ranges[i].hi;
576 for (n = blo; n <= bhi; n++)
577 {
578 if (scm_i_charset_get (a, n))
579 scm_i_charset_unset (a, n);
580 else
581 scm_i_charset_set (a, n);
582 }
583
584 i++;
585 }
586 return;
587}
63181a97
MV
588
589/* Smob print hook for character sets. */
590static int
591charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED)
592{
f49dbcad 593 size_t i;
63181a97 594 int first = 1;
f49dbcad
MG
595 scm_t_char_set *p;
596 const size_t max_ranges_to_print = 50;
597
598 p = SCM_CHARSET_DATA (charset);
63181a97 599
0607ebbf 600 scm_puts_unlocked ("#<charset {", port);
f49dbcad
MG
601 for (i = 0; i < p->len; i++)
602 {
603 if (first)
604 first = 0;
605 else
0607ebbf 606 scm_puts_unlocked (" ", port);
f49dbcad
MG
607 scm_write (SCM_MAKE_CHAR (p->ranges[i].lo), port);
608 if (p->ranges[i].lo != p->ranges[i].hi)
609 {
0607ebbf 610 scm_puts_unlocked ("..", port);
f49dbcad
MG
611 scm_write (SCM_MAKE_CHAR (p->ranges[i].hi), port);
612 }
613 if (i >= max_ranges_to_print)
614 {
615 /* Too many to print here. Quit early. */
0607ebbf 616 scm_puts_unlocked (" ...", port);
f49dbcad
MG
617 break;
618 }
619 }
0607ebbf 620 scm_puts_unlocked ("}>", port);
63181a97
MV
621 return 1;
622}
623
f49dbcad
MG
624/* Smob print hook for character sets cursors. */
625static int
626charset_cursor_print (SCM cursor, SCM port,
627 scm_print_state *pstate SCM_UNUSED)
628{
629 scm_t_char_set_cursor *cur;
630
631 cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
632
0607ebbf 633 scm_puts_unlocked ("#<charset-cursor ", port);
f49dbcad 634 if (cur->range == (size_t) (-1))
0607ebbf 635 scm_puts_unlocked ("(empty)", port);
f49dbcad
MG
636 else
637 {
638 scm_write (scm_from_size_t (cur->range), port);
0607ebbf 639 scm_puts_unlocked (":", port);
f49dbcad
MG
640 scm_write (scm_from_int32 (cur->n), port);
641 }
0607ebbf 642 scm_puts_unlocked (">", port);
f49dbcad
MG
643 return 1;
644}
63181a97 645
63181a97
MV
646
647/* Create a new, empty character set. */
648static SCM
f49dbcad 649make_char_set (const char *func_name)
63181a97 650{
f49dbcad 651 scm_t_char_set *p;
63181a97 652
f49dbcad
MG
653 p = scm_gc_malloc (sizeof (scm_t_char_set), "character-set");
654 memset (p, 0, sizeof (scm_t_char_set));
63181a97
MV
655 SCM_RETURN_NEWSMOB (scm_tc16_charset, p);
656}
657
658
659SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0,
660 (SCM obj),
661 "Return @code{#t} if @var{obj} is a character set, @code{#f}\n"
662 "otherwise.")
663#define FUNC_NAME s_scm_char_set_p
664{
665 return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj));
666}
667#undef FUNC_NAME
668
669
670SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1,
671 (SCM char_sets),
672 "Return @code{#t} if all given character sets are equal.")
673#define FUNC_NAME s_scm_char_set_eq
674{
675 int argnum = 1;
f49dbcad 676 scm_t_char_set *cs1_data = NULL;
63181a97
MV
677
678 SCM_VALIDATE_REST_ARGUMENT (char_sets);
679
d2e53ed6 680 while (!scm_is_null (char_sets))
63181a97
MV
681 {
682 SCM csi = SCM_CAR (char_sets);
f49dbcad 683 scm_t_char_set *csi_data;
63181a97
MV
684
685 SCM_VALIDATE_SMOB (argnum, csi, charset);
686 argnum++;
f49dbcad 687 csi_data = SCM_CHARSET_DATA (csi);
63181a97 688 if (cs1_data == NULL)
f49dbcad
MG
689 cs1_data = csi_data;
690 else if (!charsets_equal (cs1_data, csi_data))
691 return SCM_BOOL_F;
63181a97
MV
692 char_sets = SCM_CDR (char_sets);
693 }
694 return SCM_BOOL_T;
695}
696#undef FUNC_NAME
697
698
699SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1,
700 (SCM char_sets),
b7e64f8b
BT
701 "Return @code{#t} if every character set @var{char_set}i is a subset\n"
702 "of character set @var{char_set}i+1.")
63181a97
MV
703#define FUNC_NAME s_scm_char_set_leq
704{
705 int argnum = 1;
f49dbcad 706 scm_t_char_set *prev_data = NULL;
63181a97
MV
707
708 SCM_VALIDATE_REST_ARGUMENT (char_sets);
709
d2e53ed6 710 while (!scm_is_null (char_sets))
63181a97
MV
711 {
712 SCM csi = SCM_CAR (char_sets);
f49dbcad 713 scm_t_char_set *csi_data;
63181a97
MV
714
715 SCM_VALIDATE_SMOB (argnum, csi, charset);
716 argnum++;
f49dbcad 717 csi_data = SCM_CHARSET_DATA (csi);
63181a97 718 if (prev_data)
f49dbcad
MG
719 {
720 if (!charsets_leq (prev_data, csi_data))
721 return SCM_BOOL_F;
722 }
63181a97
MV
723 prev_data = csi_data;
724 char_sets = SCM_CDR (char_sets);
725 }
726 return SCM_BOOL_T;
727}
728#undef FUNC_NAME
729
730
731SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0,
732 (SCM cs, SCM bound),
733 "Compute a hash value for the character set @var{cs}. If\n"
734 "@var{bound} is given and non-zero, it restricts the\n"
b7e64f8b 735 "returned value to the range 0 @dots{} @var{bound} - 1.")
63181a97
MV
736#define FUNC_NAME s_scm_char_set_hash
737{
738 const unsigned long default_bnd = 871;
739 unsigned long bnd;
f49dbcad 740 scm_t_char_set *p;
63181a97
MV
741 unsigned long val = 0;
742 int k;
f49dbcad 743 scm_t_wchar c;
63181a97
MV
744
745 SCM_VALIDATE_SMOB (1, cs, charset);
746
747 if (SCM_UNBNDP (bound))
748 bnd = default_bnd;
749 else
750 {
751 bnd = scm_to_ulong (bound);
752 if (bnd == 0)
f49dbcad 753 bnd = default_bnd;
63181a97
MV
754 }
755
f49dbcad
MG
756 p = SCM_CHARSET_DATA (cs);
757 for (k = 0; k < p->len; k++)
63181a97 758 {
f49dbcad
MG
759 for (c = p->ranges[k].lo; c <= p->ranges[k].hi; c++)
760 val = c + (val << 1);
63181a97
MV
761 }
762 return scm_from_ulong (val % bnd);
763}
764#undef FUNC_NAME
765
766
767SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0,
f49dbcad 768 (SCM cs), "Return a cursor into the character set @var{cs}.")
63181a97
MV
769#define FUNC_NAME s_scm_char_set_cursor
770{
f49dbcad
MG
771 scm_t_char_set *cs_data;
772 scm_t_char_set_cursor *cur_data;
63181a97
MV
773
774 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
775 cs_data = SCM_CHARSET_DATA (cs);
776 cur_data =
777 (scm_t_char_set_cursor *) scm_gc_malloc (sizeof (scm_t_char_set_cursor),
778 "charset-cursor");
779 if (cs_data->len == 0)
63181a97 780 {
f49dbcad
MG
781 cur_data->range = (size_t) (-1);
782 cur_data->n = 0;
63181a97 783 }
f49dbcad
MG
784 else
785 {
786 cur_data->range = 0;
787 cur_data->n = cs_data->ranges[0].lo;
788 }
789 SCM_RETURN_NEWSMOB (scm_tc16_charset_cursor, cur_data);
63181a97
MV
790}
791#undef FUNC_NAME
792
793
794SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0,
f49dbcad
MG
795 (SCM cs, SCM cursor),
796 "Return the character at the current cursor position\n"
797 "@var{cursor} in the character set @var{cs}. It is an error to\n"
798 "pass a cursor for which @code{end-of-char-set?} returns true.")
63181a97
MV
799#define FUNC_NAME s_scm_char_set_ref
800{
f49dbcad
MG
801 scm_t_char_set *cs_data;
802 scm_t_char_set_cursor *cur_data;
803 size_t i;
804
63181a97 805 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad 806 SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
63181a97 807
f49dbcad
MG
808 cs_data = SCM_CHARSET_DATA (cs);
809 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
63181a97 810
f49dbcad
MG
811 /* Validate that this cursor is still true. */
812 i = cur_data->range;
813 if (i == (size_t) (-1)
814 || i >= cs_data->len
815 || cur_data->n < cs_data->ranges[i].lo
816 || cur_data->n > cs_data->ranges[i].hi)
63181a97 817 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
f49dbcad 818 return SCM_MAKE_CHAR (cur_data->n);
63181a97
MV
819}
820#undef FUNC_NAME
821
822
823SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0,
f49dbcad
MG
824 (SCM cs, SCM cursor),
825 "Advance the character set cursor @var{cursor} to the next\n"
826 "character in the character set @var{cs}. It is an error if the\n"
827 "cursor given satisfies @code{end-of-char-set?}.")
63181a97
MV
828#define FUNC_NAME s_scm_char_set_cursor_next
829{
f49dbcad
MG
830 scm_t_char_set *cs_data;
831 scm_t_char_set_cursor *cur_data;
832 size_t i;
833
63181a97 834 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
835 SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
836
837 cs_data = SCM_CHARSET_DATA (cs);
838 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
63181a97 839
f49dbcad
MG
840 /* Validate that this cursor is still true. */
841 i = cur_data->range;
842 if (i == (size_t) (-1)
843 || i >= cs_data->len
844 || cur_data->n < cs_data->ranges[i].lo
845 || cur_data->n > cs_data->ranges[i].hi)
63181a97 846 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
f49dbcad
MG
847 /* Increment the cursor. */
848 if (cur_data->n == cs_data->ranges[i].hi)
63181a97 849 {
f49dbcad
MG
850 if (i + 1 < cs_data->len)
851 {
852 cur_data->range = i + 1;
853 cur_data->n = cs_data->ranges[i + 1].lo;
854 }
855 else
856 {
857 /* This is the end of the road. */
858 cur_data->range = (size_t) (-1);
859 cur_data->n = 0;
860 }
63181a97 861 }
f49dbcad
MG
862 else
863 {
864 cur_data->n = cur_data->n + 1;
865 }
866
867 return cursor;
63181a97
MV
868}
869#undef FUNC_NAME
870
871
872SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0,
f49dbcad
MG
873 (SCM cursor),
874 "Return @code{#t} if @var{cursor} has reached the end of a\n"
875 "character set, @code{#f} otherwise.")
63181a97
MV
876#define FUNC_NAME s_scm_end_of_char_set_p
877{
f49dbcad
MG
878 scm_t_char_set_cursor *cur_data;
879 SCM_VALIDATE_SMOB (1, cursor, charset_cursor);
880
881 cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
882 if (cur_data->range == (size_t) (-1))
883 return SCM_BOOL_T;
884
885 return SCM_BOOL_F;
63181a97
MV
886}
887#undef FUNC_NAME
888
889
890SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0,
f49dbcad
MG
891 (SCM kons, SCM knil, SCM cs),
892 "Fold the procedure @var{kons} over the character set @var{cs},\n"
893 "initializing it with @var{knil}.")
63181a97
MV
894#define FUNC_NAME s_scm_char_set_fold
895{
f49dbcad 896 scm_t_char_set *cs_data;
63181a97 897 int k;
f49dbcad 898 scm_t_wchar n;
63181a97
MV
899
900 SCM_VALIDATE_PROC (1, kons);
901 SCM_VALIDATE_SMOB (3, cs, charset);
902
f49dbcad
MG
903 cs_data = SCM_CHARSET_DATA (cs);
904
905 if (cs_data->len == 0)
906 return knil;
907
908 for (k = 0; k < cs_data->len; k++)
909 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
63181a97 910 {
f49dbcad 911 knil = scm_call_2 (kons, SCM_MAKE_CHAR (n), knil);
63181a97
MV
912 }
913 return knil;
914}
915#undef FUNC_NAME
916
917
918SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0,
919 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
920 "This is a fundamental constructor for character sets.\n"
921 "@itemize @bullet\n"
922 "@item @var{g} is used to generate a series of ``seed'' values\n"
923 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
924 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
925 "@item @var{p} tells us when to stop -- when it returns true\n"
926 "when applied to one of the seed values.\n"
927 "@item @var{f} maps each seed value to a character. These\n"
928 "characters are added to the base character set @var{base_cs} to\n"
929 "form the result; @var{base_cs} defaults to the empty set.\n"
930 "@end itemize")
931#define FUNC_NAME s_scm_char_set_unfold
932{
933 SCM result, tmp;
934
935 SCM_VALIDATE_PROC (1, p);
936 SCM_VALIDATE_PROC (2, f);
937 SCM_VALIDATE_PROC (3, g);
938 if (!SCM_UNBNDP (base_cs))
939 {
940 SCM_VALIDATE_SMOB (5, base_cs, charset);
941 result = scm_char_set_copy (base_cs);
942 }
943 else
944 result = make_char_set (FUNC_NAME);
945
946 tmp = scm_call_1 (p, seed);
947 while (scm_is_false (tmp))
948 {
949 SCM ch = scm_call_1 (f, seed);
950 if (!SCM_CHARP (ch))
951 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
952 SCM_CHARSET_SET (result, SCM_CHAR (ch));
953
954 seed = scm_call_1 (g, seed);
955 tmp = scm_call_1 (p, seed);
956 }
957 return result;
958}
959#undef FUNC_NAME
960
961
962SCM_DEFINE (scm_char_set_unfold_x, "char-set-unfold!", 5, 0, 0,
963 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
964 "This is a fundamental constructor for character sets.\n"
965 "@itemize @bullet\n"
966 "@item @var{g} is used to generate a series of ``seed'' values\n"
967 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
968 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
969 "@item @var{p} tells us when to stop -- when it returns true\n"
970 "when applied to one of the seed values.\n"
971 "@item @var{f} maps each seed value to a character. These\n"
972 "characters are added to the base character set @var{base_cs} to\n"
973 "form the result; @var{base_cs} defaults to the empty set.\n"
974 "@end itemize")
975#define FUNC_NAME s_scm_char_set_unfold_x
976{
977 SCM tmp;
978
979 SCM_VALIDATE_PROC (1, p);
980 SCM_VALIDATE_PROC (2, f);
981 SCM_VALIDATE_PROC (3, g);
982 SCM_VALIDATE_SMOB (5, base_cs, charset);
983
984 tmp = scm_call_1 (p, seed);
985 while (scm_is_false (tmp))
986 {
987 SCM ch = scm_call_1 (f, seed);
988 if (!SCM_CHARP (ch))
989 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
990 SCM_CHARSET_SET (base_cs, SCM_CHAR (ch));
991
992 seed = scm_call_1 (g, seed);
993 tmp = scm_call_1 (p, seed);
994 }
995 return base_cs;
996}
997#undef FUNC_NAME
998
999
1000SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0,
f49dbcad
MG
1001 (SCM proc, SCM cs),
1002 "Apply @var{proc} to every character in the character set\n"
1003 "@var{cs}. The return value is not specified.")
63181a97
MV
1004#define FUNC_NAME s_scm_char_set_for_each
1005{
f49dbcad 1006 scm_t_char_set *cs_data;
63181a97 1007 int k;
f49dbcad 1008 scm_t_wchar n;
63181a97
MV
1009
1010 SCM_VALIDATE_PROC (1, proc);
1011 SCM_VALIDATE_SMOB (2, cs, charset);
1012
f49dbcad
MG
1013 cs_data = SCM_CHARSET_DATA (cs);
1014
1015 if (cs_data->len == 0)
1016 return SCM_UNSPECIFIED;
1017
1018 for (k = 0; k < cs_data->len; k++)
1019 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1020 {
1021 scm_call_1 (proc, SCM_MAKE_CHAR (n));
1022 }
1023
63181a97
MV
1024 return SCM_UNSPECIFIED;
1025}
1026#undef FUNC_NAME
1027
1028
1029SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0,
1030 (SCM proc, SCM cs),
1031 "Map the procedure @var{proc} over every character in @var{cs}.\n"
1032 "@var{proc} must be a character -> character procedure.")
1033#define FUNC_NAME s_scm_char_set_map
1034{
1035 SCM result;
1036 int k;
f49dbcad
MG
1037 scm_t_char_set *cs_data;
1038 scm_t_wchar n;
63181a97
MV
1039
1040 SCM_VALIDATE_PROC (1, proc);
1041 SCM_VALIDATE_SMOB (2, cs, charset);
1042
1043 result = make_char_set (FUNC_NAME);
f49dbcad
MG
1044 cs_data = SCM_CHARSET_DATA (cs);
1045
1046 if (cs_data->len == 0)
1047 return result;
1048
1049 for (k = 0; k < cs_data->len; k++)
1050 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
63181a97 1051 {
f49dbcad
MG
1052 SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (n));
1053 if (!SCM_CHARP (ch))
1054 SCM_MISC_ERROR ("procedure ~S returned non-char",
1055 scm_list_1 (proc));
1056 SCM_CHARSET_SET (result, SCM_CHAR (ch));
63181a97
MV
1057 }
1058 return result;
1059}
1060#undef FUNC_NAME
1061
1062
1063SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0,
1064 (SCM cs),
1065 "Return a newly allocated character set containing all\n"
1066 "characters in @var{cs}.")
1067#define FUNC_NAME s_scm_char_set_copy
1068{
1069 SCM ret;
f49dbcad 1070 scm_t_char_set *p1, *p2;
63181a97
MV
1071
1072 SCM_VALIDATE_SMOB (1, cs, charset);
1073 ret = make_char_set (FUNC_NAME);
f49dbcad
MG
1074 p1 = SCM_CHARSET_DATA (cs);
1075 p2 = SCM_CHARSET_DATA (ret);
1076 p2->len = p1->len;
1077
1078 if (p1->len == 0)
1079 p2->ranges = NULL;
1080 else
1081 {
1082 p2->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * p1->len,
1083 "character-set");
1084 memcpy (p2->ranges, p1->ranges, sizeof (scm_t_char_range) * p1->len);
1085 }
1086
63181a97
MV
1087 return ret;
1088}
1089#undef FUNC_NAME
1090
1091
1092SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1,
1093 (SCM rest),
1094 "Return a character set containing all given characters.")
1095#define FUNC_NAME s_scm_char_set
1096{
1097 SCM cs;
63181a97
MV
1098 int argnum = 1;
1099
1100 SCM_VALIDATE_REST_ARGUMENT (rest);
1101 cs = make_char_set (FUNC_NAME);
d2e53ed6 1102 while (!scm_is_null (rest))
63181a97 1103 {
f49dbcad 1104 scm_t_wchar c;
63181a97
MV
1105
1106 SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c);
1107 argnum++;
1108 rest = SCM_CDR (rest);
f49dbcad 1109 SCM_CHARSET_SET (cs, c);
63181a97
MV
1110 }
1111 return cs;
1112}
1113#undef FUNC_NAME
1114
1115
1116SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0,
1117 (SCM list, SCM base_cs),
1118 "Convert the character list @var{list} to a character set. If\n"
1119 "the character set @var{base_cs} is given, the character in this\n"
1120 "set are also included in the result.")
1121#define FUNC_NAME s_scm_list_to_char_set
1122{
1123 SCM cs;
63181a97
MV
1124
1125 SCM_VALIDATE_LIST (1, list);
1126 if (SCM_UNBNDP (base_cs))
1127 cs = make_char_set (FUNC_NAME);
1128 else
1129 {
1130 SCM_VALIDATE_SMOB (2, base_cs, charset);
1131 cs = scm_char_set_copy (base_cs);
1132 }
d2e53ed6 1133 while (!scm_is_null (list))
63181a97
MV
1134 {
1135 SCM chr = SCM_CAR (list);
f49dbcad 1136 scm_t_wchar c;
63181a97
MV
1137
1138 SCM_VALIDATE_CHAR_COPY (0, chr, c);
1139 list = SCM_CDR (list);
1140
f49dbcad
MG
1141
1142 SCM_CHARSET_SET (cs, c);
63181a97
MV
1143 }
1144 return cs;
1145}
1146#undef FUNC_NAME
1147
1148
1149SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0,
f49dbcad
MG
1150 (SCM list, SCM base_cs),
1151 "Convert the character list @var{list} to a character set. The\n"
1152 "characters are added to @var{base_cs} and @var{base_cs} is\n"
1153 "returned.")
63181a97
MV
1154#define FUNC_NAME s_scm_list_to_char_set_x
1155{
63181a97
MV
1156 SCM_VALIDATE_LIST (1, list);
1157 SCM_VALIDATE_SMOB (2, base_cs, charset);
d2e53ed6 1158 while (!scm_is_null (list))
63181a97
MV
1159 {
1160 SCM chr = SCM_CAR (list);
f49dbcad 1161 scm_t_wchar c;
63181a97
MV
1162
1163 SCM_VALIDATE_CHAR_COPY (0, chr, c);
1164 list = SCM_CDR (list);
1165
f49dbcad 1166 SCM_CHARSET_SET (base_cs, c);
63181a97
MV
1167 }
1168 return base_cs;
1169}
1170#undef FUNC_NAME
1171
1172
1173SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0,
1174 (SCM str, SCM base_cs),
1175 "Convert the string @var{str} to a character set. If the\n"
1176 "character set @var{base_cs} is given, the characters in this\n"
1177 "set are also included in the result.")
1178#define FUNC_NAME s_scm_string_to_char_set
1179{
1180 SCM cs;
63181a97
MV
1181 size_t k = 0, len;
1182
1183 SCM_VALIDATE_STRING (1, str);
1184 if (SCM_UNBNDP (base_cs))
1185 cs = make_char_set (FUNC_NAME);
1186 else
1187 {
1188 SCM_VALIDATE_SMOB (2, base_cs, charset);
1189 cs = scm_char_set_copy (base_cs);
1190 }
63181a97
MV
1191 len = scm_i_string_length (str);
1192 while (k < len)
1193 {
f49dbcad
MG
1194 scm_t_wchar c = scm_i_string_ref (str, k++);
1195 SCM_CHARSET_SET (cs, c);
63181a97
MV
1196 }
1197 scm_remember_upto_here_1 (str);
1198 return cs;
1199}
1200#undef FUNC_NAME
1201
1202
1203SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0,
f49dbcad
MG
1204 (SCM str, SCM base_cs),
1205 "Convert the string @var{str} to a character set. The\n"
1206 "characters from the string are added to @var{base_cs}, and\n"
1207 "@var{base_cs} is returned.")
63181a97
MV
1208#define FUNC_NAME s_scm_string_to_char_set_x
1209{
63181a97
MV
1210 size_t k = 0, len;
1211
1212 SCM_VALIDATE_STRING (1, str);
1213 SCM_VALIDATE_SMOB (2, base_cs, charset);
63181a97
MV
1214 len = scm_i_string_length (str);
1215 while (k < len)
1216 {
f49dbcad
MG
1217 scm_t_wchar c = scm_i_string_ref (str, k++);
1218 SCM_CHARSET_SET (base_cs, c);
63181a97
MV
1219 }
1220 scm_remember_upto_here_1 (str);
1221 return base_cs;
1222}
1223#undef FUNC_NAME
1224
1225
1226SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0,
1227 (SCM pred, SCM cs, SCM base_cs),
1228 "Return a character set containing every character from @var{cs}\n"
1229 "so that it satisfies @var{pred}. If provided, the characters\n"
1230 "from @var{base_cs} are added to the result.")
1231#define FUNC_NAME s_scm_char_set_filter
1232{
1233 SCM ret;
1234 int k;
f49dbcad
MG
1235 scm_t_wchar n;
1236 scm_t_char_set *p;
63181a97
MV
1237
1238 SCM_VALIDATE_PROC (1, pred);
1239 SCM_VALIDATE_SMOB (2, cs, charset);
1240 if (!SCM_UNBNDP (base_cs))
1241 {
1242 SCM_VALIDATE_SMOB (3, base_cs, charset);
1243 ret = scm_char_set_copy (base_cs);
1244 }
1245 else
1246 ret = make_char_set (FUNC_NAME);
63181a97 1247
f49dbcad
MG
1248 p = SCM_CHARSET_DATA (cs);
1249
1250 if (p->len == 0)
1251 return ret;
1252
1253 for (k = 0; k < p->len; k++)
1254 for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
1255 {
1256 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1257
1258 if (scm_is_true (res))
1259 SCM_CHARSET_SET (ret, n);
1260 }
63181a97
MV
1261 return ret;
1262}
1263#undef FUNC_NAME
1264
1265
1266SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0,
1267 (SCM pred, SCM cs, SCM base_cs),
1268 "Return a character set containing every character from @var{cs}\n"
1269 "so that it satisfies @var{pred}. The characters are added to\n"
1270 "@var{base_cs} and @var{base_cs} is returned.")
1271#define FUNC_NAME s_scm_char_set_filter_x
1272{
1273 int k;
f49dbcad
MG
1274 scm_t_wchar n;
1275 scm_t_char_set *p;
63181a97
MV
1276
1277 SCM_VALIDATE_PROC (1, pred);
1278 SCM_VALIDATE_SMOB (2, cs, charset);
1279 SCM_VALIDATE_SMOB (3, base_cs, charset);
f49dbcad
MG
1280 p = SCM_CHARSET_DATA (cs);
1281 if (p->len == 0)
1282 return base_cs;
63181a97 1283
f49dbcad
MG
1284 for (k = 0; k < p->len; k++)
1285 for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
1286 {
bde543e8 1287 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
f49dbcad
MG
1288
1289 if (scm_is_true (res))
1290 SCM_CHARSET_SET (base_cs, n);
1291 }
63181a97
MV
1292 return base_cs;
1293}
1294#undef FUNC_NAME
1295
1296
91772d8f
MG
1297/* Return a character set containing all the characters from [LOWER,UPPER),
1298 giving range errors if ERROR, adding chars from BASE_CS, and recycling
1299 BASE_CS if REUSE is true. */
1300static SCM
1301scm_i_ucs_range_to_char_set (const char *FUNC_NAME, SCM lower, SCM upper,
1302 SCM error, SCM base_cs, int reuse)
63181a97
MV
1303{
1304 SCM cs;
1305 size_t clower, cupper;
63181a97
MV
1306
1307 clower = scm_to_size_t (lower);
91772d8f 1308 cupper = scm_to_size_t (upper) - 1;
63181a97
MV
1309 SCM_ASSERT_RANGE (2, upper, cupper >= clower);
1310 if (!SCM_UNBNDP (error))
1311 {
1312 if (scm_is_true (error))
f49dbcad
MG
1313 {
1314 SCM_ASSERT_RANGE (1, lower, SCM_IS_UNICODE_CHAR (clower));
1315 SCM_ASSERT_RANGE (2, upper, SCM_IS_UNICODE_CHAR (cupper));
91772d8f
MG
1316 if (clower < SCM_CODEPOINT_SURROGATE_START
1317 && cupper > SCM_CODEPOINT_SURROGATE_END)
1318 scm_error(scm_out_of_range_key,
1319 FUNC_NAME, "invalid range - contains surrogate characters: ~S to ~S",
1320 scm_list_2 (lower, upper), scm_list_1 (upper));
f49dbcad 1321 }
63181a97 1322 }
91772d8f 1323
63181a97
MV
1324 if (SCM_UNBNDP (base_cs))
1325 cs = make_char_set (FUNC_NAME);
1326 else
1327 {
4c0fc007 1328 SCM_VALIDATE_SMOB (3, base_cs, charset);
91772d8f
MG
1329 if (reuse)
1330 cs = base_cs;
1331 else
1332 cs = scm_char_set_copy (base_cs);
63181a97 1333 }
91772d8f
MG
1334
1335 if ((clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END)
1336 && (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END))
1337 return cs;
1338
1339 if (clower > SCM_CODEPOINT_MAX)
1340 clower = SCM_CODEPOINT_MAX;
1341 if (clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END)
1342 clower = SCM_CODEPOINT_SURROGATE_END + 1;
1343 if (cupper > SCM_CODEPOINT_MAX)
1344 cupper = SCM_CODEPOINT_MAX;
1345 if (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END)
1346 cupper = SCM_CODEPOINT_SURROGATE_START - 1;
1347 if (clower < SCM_CODEPOINT_SURROGATE_START && cupper > SCM_CODEPOINT_SURROGATE_END)
63181a97 1348 {
91772d8f
MG
1349 scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, SCM_CODEPOINT_SURROGATE_START - 1);
1350 scm_i_charset_set_range (SCM_CHARSET_DATA (cs), SCM_CODEPOINT_SURROGATE_END + 1, cupper);
63181a97 1351 }
91772d8f
MG
1352 else
1353 scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, cupper);
63181a97
MV
1354 return cs;
1355}
91772d8f
MG
1356
1357SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0,
1358 (SCM lower, SCM upper, SCM error, SCM base_cs),
1359 "Return a character set containing all characters whose\n"
1360 "character codes lie in the half-open range\n"
1361 "[@var{lower},@var{upper}).\n"
1362 "\n"
1363 "If @var{error} is a true value, an error is signalled if the\n"
1364 "specified range contains characters which are not valid\n"
1365 "Unicode code points. If @var{error} is @code{#f},\n"
ffb62a43 1366 "these characters are silently left out of the resulting\n"
91772d8f
MG
1367 "character set.\n"
1368 "\n"
1369 "The characters in @var{base_cs} are added to the result, if\n"
1370 "given.")
1371#define FUNC_NAME s_scm_ucs_range_to_char_set
1372{
1373 return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper,
1374 error, base_cs, 0);
1375}
63181a97
MV
1376#undef FUNC_NAME
1377
1378
1379SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0,
1380 (SCM lower, SCM upper, SCM error, SCM base_cs),
1381 "Return a character set containing all characters whose\n"
1382 "character codes lie in the half-open range\n"
1383 "[@var{lower},@var{upper}).\n"
1384 "\n"
1385 "If @var{error} is a true value, an error is signalled if the\n"
1386 "specified range contains characters which are not contained in\n"
1387 "the implemented character range. If @var{error} is @code{#f},\n"
ffb62a43 1388 "these characters are silently left out of the resulting\n"
63181a97
MV
1389 "character set.\n"
1390 "\n"
1391 "The characters are added to @var{base_cs} and @var{base_cs} is\n"
1392 "returned.")
1393#define FUNC_NAME s_scm_ucs_range_to_char_set_x
1394{
91772d8f
MG
1395 SCM_VALIDATE_SMOB (4, base_cs, charset);
1396 return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper,
1397 error, base_cs, 1);
63181a97
MV
1398}
1399#undef FUNC_NAME
1400
1401SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0,
1402 (SCM x),
1403 "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.")
1404#define FUNC_NAME s_scm_to_char_set
1405{
1406 if (scm_is_string (x))
1407 return scm_string_to_char_set (x, SCM_UNDEFINED);
1408 else if (SCM_CHARP (x))
1409 return scm_char_set (scm_list_1 (x));
1410 else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x))
1411 return x;
1412 else
1413 scm_wrong_type_arg (NULL, 0, x);
1414}
1415#undef FUNC_NAME
1416
1417SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0,
1418 (SCM cs),
1419 "Return the number of elements in character set @var{cs}.")
1420#define FUNC_NAME s_scm_char_set_size
1421{
1422 int k, count = 0;
f49dbcad 1423 scm_t_char_set *cs_data;
63181a97
MV
1424
1425 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
1426 cs_data = SCM_CHARSET_DATA (cs);
1427
1428 if (cs_data->len == 0)
1429 return scm_from_int (0);
1430
1431 for (k = 0; k < cs_data->len; k++)
1432 count += cs_data->ranges[k].hi - cs_data->ranges[k].lo + 1;
1433
1434 return scm_from_int (count);
63181a97
MV
1435}
1436#undef FUNC_NAME
1437
1438
1439SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0,
1440 (SCM pred, SCM cs),
1441 "Return the number of the elements int the character set\n"
1442 "@var{cs} which satisfy the predicate @var{pred}.")
1443#define FUNC_NAME s_scm_char_set_count
1444{
1445 int k, count = 0;
f49dbcad
MG
1446 scm_t_wchar n;
1447 scm_t_char_set *cs_data;
63181a97
MV
1448
1449 SCM_VALIDATE_PROC (1, pred);
1450 SCM_VALIDATE_SMOB (2, cs, charset);
f49dbcad
MG
1451 cs_data = SCM_CHARSET_DATA (cs);
1452 if (cs_data->len == 0)
1453 return scm_from_int (0);
63181a97 1454
f49dbcad
MG
1455 for (k = 0; k < cs_data->len; k++)
1456 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
63181a97 1457 {
f49dbcad
MG
1458 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1459 if (scm_is_true (res))
1460 count++;
63181a97
MV
1461 }
1462 return SCM_I_MAKINUM (count);
1463}
1464#undef FUNC_NAME
1465
1466
1467SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0,
1468 (SCM cs),
1469 "Return a list containing the elements of the character set\n"
1470 "@var{cs}.")
1471#define FUNC_NAME s_scm_char_set_to_list
1472{
1473 int k;
f49dbcad 1474 scm_t_wchar n;
63181a97 1475 SCM result = SCM_EOL;
f49dbcad 1476 scm_t_char_set *p;
63181a97
MV
1477
1478 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
1479 p = SCM_CHARSET_DATA (cs);
1480 if (p->len == 0)
1481 return SCM_EOL;
1482
1483 for (k = p->len - 1; k >= 0; k--)
1484 for (n = p->ranges[k].hi; n >= p->ranges[k].lo; n--)
1485 result = scm_cons (SCM_MAKE_CHAR (n), result);
63181a97
MV
1486 return result;
1487}
1488#undef FUNC_NAME
1489
1490
1491SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0,
1492 (SCM cs),
1493 "Return a string containing the elements of the character set\n"
1494 "@var{cs}. The order in which the characters are placed in the\n"
1495 "string is not defined.")
1496#define FUNC_NAME s_scm_char_set_to_string
1497{
1498 int k;
1499 int count = 0;
1500 int idx = 0;
f49dbcad 1501 int wide = 0;
63181a97 1502 SCM result;
f49dbcad
MG
1503 scm_t_wchar n;
1504 scm_t_char_set *cs_data;
1505 char *buf;
1506 scm_t_wchar *wbuf;
63181a97
MV
1507
1508 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad
MG
1509 cs_data = SCM_CHARSET_DATA (cs);
1510 if (cs_data->len == 0)
1511 return scm_nullstr;
1512
1513 if (cs_data->ranges[cs_data->len - 1].hi > 255)
1514 wide = 1;
1515
1516 count = scm_to_int (scm_char_set_size (cs));
1517 if (wide)
190d4b0d 1518 result = scm_i_make_wide_string (count, &wbuf, 0);
f49dbcad 1519 else
190d4b0d 1520 result = scm_i_make_string (count, &buf, 0);
f49dbcad
MG
1521
1522 for (k = 0; k < cs_data->len; k++)
1523 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
1524 {
1525 if (wide)
1526 wbuf[idx++] = n;
1527 else
1528 buf[idx++] = n;
1529 }
63181a97
MV
1530 return result;
1531}
1532#undef FUNC_NAME
1533
1534
1535SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0,
1536 (SCM cs, SCM ch),
1537 "Return @code{#t} iff the character @var{ch} is contained in the\n"
1538 "character set @var{cs}.")
1539#define FUNC_NAME s_scm_char_set_contains_p
1540{
1541 SCM_VALIDATE_SMOB (1, cs, charset);
1542 SCM_VALIDATE_CHAR (2, ch);
1543 return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch)));
1544}
1545#undef FUNC_NAME
1546
1547
1548SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0,
1549 (SCM pred, SCM cs),
1550 "Return a true value if every character in the character set\n"
1551 "@var{cs} satisfies the predicate @var{pred}.")
1552#define FUNC_NAME s_scm_char_set_every
1553{
1554 int k;
f49dbcad 1555 scm_t_wchar n;
63181a97 1556 SCM res = SCM_BOOL_T;
f49dbcad 1557 scm_t_char_set *cs_data;
63181a97
MV
1558
1559 SCM_VALIDATE_PROC (1, pred);
1560 SCM_VALIDATE_SMOB (2, cs, charset);
1561
f49dbcad
MG
1562 cs_data = SCM_CHARSET_DATA (cs);
1563 if (cs_data->len == 0)
1564 return SCM_BOOL_T;
1565
1566 for (k = 0; k < cs_data->len; k++)
1567 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
63181a97 1568 {
f49dbcad
MG
1569 res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1570 if (scm_is_false (res))
1571 return res;
63181a97 1572 }
f49dbcad 1573 return SCM_BOOL_T;
63181a97
MV
1574}
1575#undef FUNC_NAME
1576
1577
1578SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0,
1579 (SCM pred, SCM cs),
1580 "Return a true value if any character in the character set\n"
1581 "@var{cs} satisfies the predicate @var{pred}.")
1582#define FUNC_NAME s_scm_char_set_any
1583{
1584 int k;
f49dbcad
MG
1585 scm_t_wchar n;
1586 scm_t_char_set *cs_data;
63181a97
MV
1587
1588 SCM_VALIDATE_PROC (1, pred);
1589 SCM_VALIDATE_SMOB (2, cs, charset);
1590
693e7289
MG
1591 cs_data = SCM_CHARSET_DATA (cs);
1592 if (cs_data->len == 0)
1593 return SCM_BOOL_T;
f49dbcad
MG
1594
1595 for (k = 0; k < cs_data->len; k++)
1596 for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
63181a97 1597 {
f49dbcad
MG
1598 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
1599 if (scm_is_true (res))
1600 return res;
63181a97
MV
1601 }
1602 return SCM_BOOL_F;
1603}
1604#undef FUNC_NAME
1605
1606
1607SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1,
f49dbcad
MG
1608 (SCM cs, SCM rest),
1609 "Add all character arguments to the first argument, which must\n"
1610 "be a character set.")
63181a97
MV
1611#define FUNC_NAME s_scm_char_set_adjoin
1612{
63181a97
MV
1613 SCM_VALIDATE_SMOB (1, cs, charset);
1614 SCM_VALIDATE_REST_ARGUMENT (rest);
1615 cs = scm_char_set_copy (cs);
1616
d2e53ed6 1617 while (!scm_is_null (rest))
63181a97
MV
1618 {
1619 SCM chr = SCM_CAR (rest);
f49dbcad 1620 scm_t_wchar c;
63181a97
MV
1621
1622 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1623 rest = SCM_CDR (rest);
1624
f49dbcad 1625 SCM_CHARSET_SET (cs, c);
63181a97
MV
1626 }
1627 return cs;
1628}
1629#undef FUNC_NAME
1630
1631
1632SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1,
f49dbcad
MG
1633 (SCM cs, SCM rest),
1634 "Delete all character arguments from the first argument, which\n"
1635 "must be a character set.")
63181a97
MV
1636#define FUNC_NAME s_scm_char_set_delete
1637{
63181a97
MV
1638 SCM_VALIDATE_SMOB (1, cs, charset);
1639 SCM_VALIDATE_REST_ARGUMENT (rest);
1640 cs = scm_char_set_copy (cs);
1641
d2e53ed6 1642 while (!scm_is_null (rest))
63181a97
MV
1643 {
1644 SCM chr = SCM_CAR (rest);
f49dbcad 1645 scm_t_wchar c;
63181a97
MV
1646
1647 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1648 rest = SCM_CDR (rest);
1649
f49dbcad 1650 SCM_CHARSET_UNSET (cs, c);
63181a97
MV
1651 }
1652 return cs;
1653}
1654#undef FUNC_NAME
1655
1656
1657SCM_DEFINE (scm_char_set_adjoin_x, "char-set-adjoin!", 1, 0, 1,
f49dbcad
MG
1658 (SCM cs, SCM rest),
1659 "Add all character arguments to the first argument, which must\n"
1660 "be a character set.")
63181a97
MV
1661#define FUNC_NAME s_scm_char_set_adjoin_x
1662{
63181a97
MV
1663 SCM_VALIDATE_SMOB (1, cs, charset);
1664 SCM_VALIDATE_REST_ARGUMENT (rest);
1665
d2e53ed6 1666 while (!scm_is_null (rest))
63181a97
MV
1667 {
1668 SCM chr = SCM_CAR (rest);
f49dbcad 1669 scm_t_wchar c;
63181a97
MV
1670
1671 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1672 rest = SCM_CDR (rest);
1673
f49dbcad 1674 SCM_CHARSET_SET (cs, c);
63181a97
MV
1675 }
1676 return cs;
1677}
1678#undef FUNC_NAME
1679
1680
1681SCM_DEFINE (scm_char_set_delete_x, "char-set-delete!", 1, 0, 1,
f49dbcad
MG
1682 (SCM cs, SCM rest),
1683 "Delete all character arguments from the first argument, which\n"
1684 "must be a character set.")
63181a97
MV
1685#define FUNC_NAME s_scm_char_set_delete_x
1686{
63181a97
MV
1687 SCM_VALIDATE_SMOB (1, cs, charset);
1688 SCM_VALIDATE_REST_ARGUMENT (rest);
1689
d2e53ed6 1690 while (!scm_is_null (rest))
63181a97
MV
1691 {
1692 SCM chr = SCM_CAR (rest);
f49dbcad 1693 scm_t_wchar c;
63181a97
MV
1694
1695 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1696 rest = SCM_CDR (rest);
1697
f49dbcad 1698 SCM_CHARSET_UNSET (cs, c);
63181a97
MV
1699 }
1700 return cs;
1701}
1702#undef FUNC_NAME
1703
1704
1705SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0,
f49dbcad 1706 (SCM cs), "Return the complement of the character set @var{cs}.")
63181a97
MV
1707#define FUNC_NAME s_scm_char_set_complement
1708{
63181a97 1709 SCM res;
f49dbcad 1710 scm_t_char_set *p, *q;
63181a97
MV
1711
1712 SCM_VALIDATE_SMOB (1, cs, charset);
1713
1714 res = make_char_set (FUNC_NAME);
f49dbcad
MG
1715 p = SCM_CHARSET_DATA (res);
1716 q = SCM_CHARSET_DATA (cs);
1717
1718 charsets_complement (p, q);
63181a97
MV
1719 return res;
1720}
1721#undef FUNC_NAME
1722
1723
1724SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1,
1725 (SCM rest),
1726 "Return the union of all argument character sets.")
1727#define FUNC_NAME s_scm_char_set_union
1728{
1729 int c = 1;
1730 SCM res;
f49dbcad 1731 scm_t_char_set *p;
63181a97
MV
1732
1733 SCM_VALIDATE_REST_ARGUMENT (rest);
1734
1735 res = make_char_set (FUNC_NAME);
f49dbcad 1736 p = SCM_CHARSET_DATA (res);
d2e53ed6 1737 while (!scm_is_null (rest))
63181a97 1738 {
63181a97
MV
1739 SCM cs = SCM_CAR (rest);
1740 SCM_VALIDATE_SMOB (c, cs, charset);
1741 c++;
1742 rest = SCM_CDR (rest);
1743
f49dbcad
MG
1744
1745 charsets_union (p, (scm_t_char_set *) SCM_SMOB_DATA (cs));
63181a97
MV
1746 }
1747 return res;
1748}
1749#undef FUNC_NAME
1750
1751
1752SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1,
1753 (SCM rest),
1754 "Return the intersection of all argument character sets.")
1755#define FUNC_NAME s_scm_char_set_intersection
1756{
1757 SCM res;
1758
1759 SCM_VALIDATE_REST_ARGUMENT (rest);
1760
d2e53ed6 1761 if (scm_is_null (rest))
63181a97
MV
1762 res = make_char_set (FUNC_NAME);
1763 else
1764 {
f49dbcad 1765 scm_t_char_set *p;
63181a97
MV
1766 int argnum = 2;
1767
1768 res = scm_char_set_copy (SCM_CAR (rest));
f49dbcad 1769 p = SCM_CHARSET_DATA (res);
63181a97
MV
1770 rest = SCM_CDR (rest);
1771
d2e53ed6 1772 while (scm_is_pair (rest))
f49dbcad
MG
1773 {
1774 SCM cs = SCM_CAR (rest);
1775 scm_t_char_set *cs_data;
1776
1777 SCM_VALIDATE_SMOB (argnum, cs, charset);
1778 argnum++;
1779 cs_data = SCM_CHARSET_DATA (cs);
1780 rest = SCM_CDR (rest);
1781 charsets_intersection (p, cs_data);
1782 }
63181a97
MV
1783 }
1784
1785 return res;
1786}
1787#undef FUNC_NAME
1788
1789
1790SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1,
1791 (SCM cs1, SCM rest),
1792 "Return the difference of all argument character sets.")
1793#define FUNC_NAME s_scm_char_set_difference
1794{
1795 int c = 2;
f49dbcad
MG
1796 SCM res, compl;
1797 scm_t_char_set *p, *q;
63181a97
MV
1798
1799 SCM_VALIDATE_SMOB (1, cs1, charset);
1800 SCM_VALIDATE_REST_ARGUMENT (rest);
1801
1802 res = scm_char_set_copy (cs1);
f49dbcad
MG
1803 p = SCM_CHARSET_DATA (res);
1804 compl = make_char_set (FUNC_NAME);
1805 q = SCM_CHARSET_DATA (compl);
d2e53ed6 1806 while (!scm_is_null (rest))
63181a97 1807 {
63181a97
MV
1808 SCM cs = SCM_CAR (rest);
1809 SCM_VALIDATE_SMOB (c, cs, charset);
1810 c++;
1811 rest = SCM_CDR (rest);
1812
f49dbcad
MG
1813 charsets_complement (q, SCM_CHARSET_DATA (cs));
1814 charsets_intersection (p, q);
63181a97
MV
1815 }
1816 return res;
1817}
1818#undef FUNC_NAME
1819
1820
1821SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1,
1822 (SCM rest),
1823 "Return the exclusive-or of all argument character sets.")
1824#define FUNC_NAME s_scm_char_set_xor
1825{
1826 SCM res;
1827
1828 SCM_VALIDATE_REST_ARGUMENT (rest);
1829
d2e53ed6 1830 if (scm_is_null (rest))
63181a97
MV
1831 res = make_char_set (FUNC_NAME);
1832 else
1833 {
1834 int argnum = 2;
f49dbcad 1835 scm_t_char_set *p;
63181a97
MV
1836
1837 res = scm_char_set_copy (SCM_CAR (rest));
f49dbcad 1838 p = SCM_CHARSET_DATA (res);
63181a97
MV
1839 rest = SCM_CDR (rest);
1840
d2e53ed6 1841 while (scm_is_pair (rest))
f49dbcad
MG
1842 {
1843 SCM cs = SCM_CAR (rest);
1844 scm_t_char_set *cs_data;
1845
1846 SCM_VALIDATE_SMOB (argnum, cs, charset);
1847 argnum++;
1848 cs_data = SCM_CHARSET_DATA (cs);
1849 rest = SCM_CDR (rest);
1850
1851 charsets_xor (p, cs_data);
1852 }
63181a97
MV
1853 }
1854 return res;
1855}
1856#undef FUNC_NAME
1857
1858
1859SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1, 0, 1,
1860 (SCM cs1, SCM rest),
1861 "Return the difference and the intersection of all argument\n"
1862 "character sets.")
1863#define FUNC_NAME s_scm_char_set_diff_plus_intersection
1864{
1865 int c = 2;
1866 SCM res1, res2;
f49dbcad 1867 scm_t_char_set *p, *q;
63181a97
MV
1868
1869 SCM_VALIDATE_SMOB (1, cs1, charset);
1870 SCM_VALIDATE_REST_ARGUMENT (rest);
1871
1872 res1 = scm_char_set_copy (cs1);
1873 res2 = make_char_set (FUNC_NAME);
f49dbcad
MG
1874 p = SCM_CHARSET_DATA (res1);
1875 q = SCM_CHARSET_DATA (res2);
d2e53ed6 1876 while (!scm_is_null (rest))
63181a97 1877 {
63181a97 1878 SCM cs = SCM_CAR (rest);
f49dbcad 1879 scm_t_char_set *r;
63181a97
MV
1880
1881 SCM_VALIDATE_SMOB (c, cs, charset);
1882 c++;
f49dbcad 1883 r = SCM_CHARSET_DATA (cs);
63181a97 1884
f49dbcad
MG
1885 charsets_union (q, r);
1886 charsets_intersection (p, r);
63181a97
MV
1887 rest = SCM_CDR (rest);
1888 }
1889 return scm_values (scm_list_2 (res1, res2));
1890}
1891#undef FUNC_NAME
1892
1893
1894SCM_DEFINE (scm_char_set_complement_x, "char-set-complement!", 1, 0, 0,
f49dbcad 1895 (SCM cs), "Return the complement of the character set @var{cs}.")
63181a97
MV
1896#define FUNC_NAME s_scm_char_set_complement_x
1897{
63181a97 1898 SCM_VALIDATE_SMOB (1, cs, charset);
f49dbcad 1899 cs = scm_char_set_complement (cs);
63181a97
MV
1900 return cs;
1901}
1902#undef FUNC_NAME
1903
1904
1905SCM_DEFINE (scm_char_set_union_x, "char-set-union!", 1, 0, 1,
f49dbcad
MG
1906 (SCM cs1, SCM rest),
1907 "Return the union of all argument character sets.")
63181a97
MV
1908#define FUNC_NAME s_scm_char_set_union_x
1909{
63181a97
MV
1910 SCM_VALIDATE_SMOB (1, cs1, charset);
1911 SCM_VALIDATE_REST_ARGUMENT (rest);
1912
f49dbcad 1913 cs1 = scm_char_set_union (scm_cons (cs1, rest));
63181a97
MV
1914 return cs1;
1915}
1916#undef FUNC_NAME
1917
1918
1919SCM_DEFINE (scm_char_set_intersection_x, "char-set-intersection!", 1, 0, 1,
f49dbcad
MG
1920 (SCM cs1, SCM rest),
1921 "Return the intersection of all argument character sets.")
63181a97
MV
1922#define FUNC_NAME s_scm_char_set_intersection_x
1923{
63181a97
MV
1924 SCM_VALIDATE_SMOB (1, cs1, charset);
1925 SCM_VALIDATE_REST_ARGUMENT (rest);
1926
f49dbcad 1927 cs1 = scm_char_set_intersection (scm_cons (cs1, rest));
63181a97
MV
1928 return cs1;
1929}
1930#undef FUNC_NAME
1931
1932
1933SCM_DEFINE (scm_char_set_difference_x, "char-set-difference!", 1, 0, 1,
f49dbcad
MG
1934 (SCM cs1, SCM rest),
1935 "Return the difference of all argument character sets.")
63181a97
MV
1936#define FUNC_NAME s_scm_char_set_difference_x
1937{
63181a97
MV
1938 SCM_VALIDATE_SMOB (1, cs1, charset);
1939 SCM_VALIDATE_REST_ARGUMENT (rest);
1940
f49dbcad 1941 cs1 = scm_char_set_difference (cs1, rest);
63181a97
MV
1942 return cs1;
1943}
1944#undef FUNC_NAME
1945
1946
1947SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1,
1948 (SCM cs1, SCM rest),
1949 "Return the exclusive-or of all argument character sets.")
1950#define FUNC_NAME s_scm_char_set_xor_x
1951{
1952 /* a side-effecting variant should presumably give consistent results:
1953 (define a (char-set #\a))
1954 (char-set-xor a a a) -> char set #\a
1955 (char-set-xor! a a a) -> char set #\a
f49dbcad 1956 */
7165abeb
MG
1957 cs1 = scm_char_set_xor (scm_cons (cs1, rest));
1958 return cs1;
63181a97
MV
1959}
1960#undef FUNC_NAME
1961
1962
f49dbcad
MG
1963SCM_DEFINE (scm_char_set_diff_plus_intersection_x,
1964 "char-set-diff+intersection!", 2, 0, 1, (SCM cs1, SCM cs2,
1965 SCM rest),
1966 "Return the difference and the intersection of all argument\n"
1967 "character sets.")
63181a97
MV
1968#define FUNC_NAME s_scm_char_set_diff_plus_intersection_x
1969{
f49dbcad 1970 SCM diff, intersect;
63181a97 1971
f49dbcad
MG
1972 diff = scm_char_set_difference (cs1, scm_cons (cs2, rest));
1973 intersect =
1974 scm_char_set_intersection (scm_cons (cs1, scm_cons (cs2, rest)));
1975 cs1 = diff;
1976 cs2 = intersect;
63181a97
MV
1977 return scm_values (scm_list_2 (cs1, cs2));
1978}
1979#undef FUNC_NAME
1980
a17d2654 1981\f
f49dbcad 1982
a17d2654
LC
1983/* Standard character sets. */
1984
63181a97
MV
1985SCM scm_char_set_lower_case;
1986SCM scm_char_set_upper_case;
1987SCM scm_char_set_title_case;
1988SCM scm_char_set_letter;
1989SCM scm_char_set_digit;
1990SCM scm_char_set_letter_and_digit;
1991SCM scm_char_set_graphic;
1992SCM scm_char_set_printing;
1993SCM scm_char_set_whitespace;
1994SCM scm_char_set_iso_control;
1995SCM scm_char_set_punctuation;
1996SCM scm_char_set_symbol;
1997SCM scm_char_set_hex_digit;
1998SCM scm_char_set_blank;
1999SCM scm_char_set_ascii;
2000SCM scm_char_set_empty;
719bb8cd 2001SCM scm_char_set_designated;
63181a97
MV
2002SCM scm_char_set_full;
2003
63181a97 2004
a17d2654
LC
2005/* Create an empty character set and return it after binding it to NAME. */
2006static inline SCM
f49dbcad 2007define_charset (const char *name, const scm_t_char_set *p)
63181a97 2008{
f49dbcad
MG
2009 SCM cs;
2010
2011 SCM_NEWSMOB (cs, scm_tc16_charset, p);
63181a97 2012 scm_c_define (name, cs);
838aa000 2013 return cs;
63181a97
MV
2014}
2015
ba8477ec
MG
2016SCM_DEFINE (scm_sys_char_set_dump, "%char-set-dump", 1, 0, 0, (SCM charset),
2017 "Returns an association list containing debugging information\n"
2018 "for @var{charset}. The association list has the following entries."
2019 "@table @code\n"
2020 "@item char-set\n"
2021 "The char-set itself.\n"
2022 "@item len\n"
2023 "The number of character ranges the char-set contains\n"
2024 "@item ranges\n"
2025 "A list of lists where each sublist a range of code points\n"
2026 "and their associated characters"
2027 "@end table")
2028#define FUNC_NAME s_scm_sys_char_set_dump
63181a97 2029{
ba8477ec
MG
2030 SCM e1, e2, e3;
2031 SCM ranges = SCM_EOL, elt;
2032 size_t i;
2033 scm_t_char_set *cs;
2034 char codepoint_string_lo[9], codepoint_string_hi[9];
2035
2036 SCM_VALIDATE_SMOB (1, charset, charset);
2037 cs = SCM_CHARSET_DATA (charset);
2038
4a655e50 2039 e1 = scm_cons (scm_from_latin1_symbol ("char-set"),
ba8477ec 2040 charset);
4a655e50 2041 e2 = scm_cons (scm_from_latin1_symbol ("n"),
ba8477ec
MG
2042 scm_from_size_t (cs->len));
2043
f49dbcad 2044 for (i = 0; i < cs->len; i++)
63181a97 2045 {
ba8477ec
MG
2046 if (cs->ranges[i].lo > 0xFFFF)
2047 sprintf (codepoint_string_lo, "U+%06x", cs->ranges[i].lo);
f49dbcad 2048 else
ba8477ec
MG
2049 sprintf (codepoint_string_lo, "U+%04x", cs->ranges[i].lo);
2050 if (cs->ranges[i].hi > 0xFFFF)
2051 sprintf (codepoint_string_hi, "U+%06x", cs->ranges[i].hi);
f49dbcad 2052 else
ba8477ec
MG
2053 sprintf (codepoint_string_hi, "U+%04x", cs->ranges[i].hi);
2054
2055 elt = scm_list_4 (SCM_MAKE_CHAR (cs->ranges[i].lo),
2056 SCM_MAKE_CHAR (cs->ranges[i].hi),
2057 scm_from_locale_string (codepoint_string_lo),
2058 scm_from_locale_string (codepoint_string_hi));
2059 ranges = scm_append (scm_list_2 (ranges,
2060 scm_list_1 (elt)));
63181a97 2061 }
4a655e50 2062 e3 = scm_cons (scm_from_latin1_symbol ("ranges"),
ba8477ec
MG
2063 ranges);
2064
2065 return scm_list_3 (e1, e2, e3);
63181a97 2066}
f49dbcad 2067#undef FUNC_NAME
ba8477ec 2068
a17d2654 2069\f
f49dbcad
MG
2070
2071
63181a97
MV
2072void
2073scm_init_srfi_14 (void)
2074{
f49dbcad 2075 scm_tc16_charset = scm_make_smob_type ("character-set", 0);
63181a97
MV
2076 scm_set_smob_print (scm_tc16_charset, charset_print);
2077
f49dbcad 2078 scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0);
f49dbcad
MG
2079 scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print);
2080
2081 scm_char_set_upper_case =
2082 define_charset ("char-set:upper-case", &cs_upper_case);
2083 scm_char_set_lower_case =
2084 define_charset ("char-set:lower-case", &cs_lower_case);
2085 scm_char_set_title_case =
2086 define_charset ("char-set:title-case", &cs_title_case);
2087 scm_char_set_letter = define_charset ("char-set:letter", &cs_letter);
2088 scm_char_set_digit = define_charset ("char-set:digit", &cs_digit);
2089 scm_char_set_letter_and_digit =
2090 define_charset ("char-set:letter+digit", &cs_letter_plus_digit);
2091 scm_char_set_graphic = define_charset ("char-set:graphic", &cs_graphic);
2092 scm_char_set_printing = define_charset ("char-set:printing", &cs_printing);
2093 scm_char_set_whitespace =
2094 define_charset ("char-set:whitespace", &cs_whitespace);
2095 scm_char_set_iso_control =
2096 define_charset ("char-set:iso-control", &cs_iso_control);
2097 scm_char_set_punctuation =
2098 define_charset ("char-set:punctuation", &cs_punctuation);
2099 scm_char_set_symbol = define_charset ("char-set:symbol", &cs_symbol);
2100 scm_char_set_hex_digit =
2101 define_charset ("char-set:hex-digit", &cs_hex_digit);
2102 scm_char_set_blank = define_charset ("char-set:blank", &cs_blank);
2103 scm_char_set_ascii = define_charset ("char-set:ascii", &cs_ascii);
2104 scm_char_set_empty = define_charset ("char-set:empty", &cs_empty);
719bb8cd 2105 scm_char_set_designated = define_charset ("char-set:designated", &cs_designated);
f49dbcad 2106 scm_char_set_full = define_charset ("char-set:full", &cs_full);
63181a97
MV
2107
2108#include "libguile/srfi-14.x"
2109}
2110
2111/* End of srfi-14.c. */