(exception:string-contains-nul): New exception pattern.
[bpt/guile.git] / libguile / srfi-14.c
CommitLineData
63181a97
MV
1/* srfi-14.c --- SRFI-14 procedures for Guile
2 *
2b829bbb 3 * Copyright (C) 2001, 2004, 2006 Free Software Foundation, Inc.
63181a97
MV
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
92205699 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
63181a97
MV
18 */
19
a17d2654
LC
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24
25#define _GNU_SOURCE /* Ask for `isblank ()'. */
63181a97
MV
26
27#include <string.h>
28#include <ctype.h>
29
30#include "libguile.h"
31#include "libguile/srfi-14.h"
32
33
a17d2654
LC
34#define SCM_CHARSET_SET(cs, idx) \
35 (((long *) SCM_SMOB_DATA (cs))[(idx) / SCM_BITS_PER_LONG] |= \
63181a97
MV
36 (1L << ((idx) % SCM_BITS_PER_LONG)))
37
a17d2654
LC
38#define SCM_CHARSET_UNSET(cs, idx) \
39 (((long *) SCM_SMOB_DATA (cs))[(idx) / SCM_BITS_PER_LONG] &= \
40 (~(1L << ((idx) % SCM_BITS_PER_LONG))))
41
63181a97
MV
42#define BYTES_PER_CHARSET (SCM_CHARSET_SIZE / 8)
43#define LONGS_PER_CHARSET (SCM_CHARSET_SIZE / SCM_BITS_PER_LONG)
44
45
46/* Smob type code for character sets. */
47int scm_tc16_charset = 0;
48
49
50/* Smob print hook for character sets. */
51static int
52charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED)
53{
54 int i;
55 int first = 1;
56
57 scm_puts ("#<charset {", port);
58 for (i = 0; i < SCM_CHARSET_SIZE; i++)
59 if (SCM_CHARSET_GET (charset, i))
60 {
61 if (first)
62 first = 0;
63 else
64 scm_puts (" ", port);
65 scm_write (SCM_MAKE_CHAR (i), port);
66 }
67 scm_puts ("}>", port);
68 return 1;
69}
70
71
72/* Smob free hook for character sets. */
73static size_t
74charset_free (SCM charset)
75{
76 return scm_smob_free (charset);
77}
78
79
80/* Create a new, empty character set. */
81static SCM
82make_char_set (const char * func_name)
83{
84 long * p;
85
86 p = scm_gc_malloc (BYTES_PER_CHARSET, "character-set");
87 memset (p, 0, BYTES_PER_CHARSET);
88 SCM_RETURN_NEWSMOB (scm_tc16_charset, p);
89}
90
91
92SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0,
93 (SCM obj),
94 "Return @code{#t} if @var{obj} is a character set, @code{#f}\n"
95 "otherwise.")
96#define FUNC_NAME s_scm_char_set_p
97{
98 return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj));
99}
100#undef FUNC_NAME
101
102
103SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1,
104 (SCM char_sets),
105 "Return @code{#t} if all given character sets are equal.")
106#define FUNC_NAME s_scm_char_set_eq
107{
108 int argnum = 1;
109 long *cs1_data = NULL;
110
111 SCM_VALIDATE_REST_ARGUMENT (char_sets);
112
d2e53ed6 113 while (!scm_is_null (char_sets))
63181a97
MV
114 {
115 SCM csi = SCM_CAR (char_sets);
116 long *csi_data;
117
118 SCM_VALIDATE_SMOB (argnum, csi, charset);
119 argnum++;
120 csi_data = (long *) SCM_SMOB_DATA (csi);
121 if (cs1_data == NULL)
122 cs1_data = csi_data;
123 else if (memcmp (cs1_data, csi_data, BYTES_PER_CHARSET) != 0)
124 return SCM_BOOL_F;
125 char_sets = SCM_CDR (char_sets);
126 }
127 return SCM_BOOL_T;
128}
129#undef FUNC_NAME
130
131
132SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1,
133 (SCM char_sets),
134 "Return @code{#t} if every character set @var{cs}i is a subset\n"
135 "of character set @var{cs}i+1.")
136#define FUNC_NAME s_scm_char_set_leq
137{
138 int argnum = 1;
139 long *prev_data = NULL;
140
141 SCM_VALIDATE_REST_ARGUMENT (char_sets);
142
d2e53ed6 143 while (!scm_is_null (char_sets))
63181a97
MV
144 {
145 SCM csi = SCM_CAR (char_sets);
146 long *csi_data;
147
148 SCM_VALIDATE_SMOB (argnum, csi, charset);
149 argnum++;
150 csi_data = (long *) SCM_SMOB_DATA (csi);
151 if (prev_data)
152 {
153 int k;
154
155 for (k = 0; k < LONGS_PER_CHARSET; k++)
156 {
157 if ((prev_data[k] & csi_data[k]) != prev_data[k])
158 return SCM_BOOL_F;
159 }
160 }
161 prev_data = csi_data;
162 char_sets = SCM_CDR (char_sets);
163 }
164 return SCM_BOOL_T;
165}
166#undef FUNC_NAME
167
168
169SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0,
170 (SCM cs, SCM bound),
171 "Compute a hash value for the character set @var{cs}. If\n"
172 "@var{bound} is given and non-zero, it restricts the\n"
173 "returned value to the range 0 @dots{} @var{bound - 1}.")
174#define FUNC_NAME s_scm_char_set_hash
175{
176 const unsigned long default_bnd = 871;
177 unsigned long bnd;
178 long * p;
179 unsigned long val = 0;
180 int k;
181
182 SCM_VALIDATE_SMOB (1, cs, charset);
183
184 if (SCM_UNBNDP (bound))
185 bnd = default_bnd;
186 else
187 {
188 bnd = scm_to_ulong (bound);
189 if (bnd == 0)
190 bnd = default_bnd;
191 }
192
193 p = (long *) SCM_SMOB_DATA (cs);
194 for (k = 0; k < LONGS_PER_CHARSET; k++)
195 {
196 if (p[k] != 0)
197 val = p[k] + (val << 1);
198 }
199 return scm_from_ulong (val % bnd);
200}
201#undef FUNC_NAME
202
203
204SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0,
205 (SCM cs),
206 "Return a cursor into the character set @var{cs}.")
207#define FUNC_NAME s_scm_char_set_cursor
208{
209 int idx;
210
211 SCM_VALIDATE_SMOB (1, cs, charset);
212 for (idx = 0; idx < SCM_CHARSET_SIZE; idx++)
213 {
214 if (SCM_CHARSET_GET (cs, idx))
215 break;
216 }
217 return SCM_I_MAKINUM (idx);
218}
219#undef FUNC_NAME
220
221
222SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0,
223 (SCM cs, SCM cursor),
224 "Return the character at the current cursor position\n"
225 "@var{cursor} in the character set @var{cs}. It is an error to\n"
226 "pass a cursor for which @code{end-of-char-set?} returns true.")
227#define FUNC_NAME s_scm_char_set_ref
228{
229 size_t ccursor = scm_to_size_t (cursor);
230 SCM_VALIDATE_SMOB (1, cs, charset);
231
232 if (ccursor >= SCM_CHARSET_SIZE || !SCM_CHARSET_GET (cs, ccursor))
233 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
234 return SCM_MAKE_CHAR (ccursor);
235}
236#undef FUNC_NAME
237
238
239SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0,
240 (SCM cs, SCM cursor),
241 "Advance the character set cursor @var{cursor} to the next\n"
242 "character in the character set @var{cs}. It is an error if the\n"
243 "cursor given satisfies @code{end-of-char-set?}.")
244#define FUNC_NAME s_scm_char_set_cursor_next
245{
246 size_t ccursor = scm_to_size_t (cursor);
247 SCM_VALIDATE_SMOB (1, cs, charset);
248
249 if (ccursor >= SCM_CHARSET_SIZE || !SCM_CHARSET_GET (cs, ccursor))
250 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
251 for (ccursor++; ccursor < SCM_CHARSET_SIZE; ccursor++)
252 {
253 if (SCM_CHARSET_GET (cs, ccursor))
254 break;
255 }
256 return SCM_I_MAKINUM (ccursor);
257}
258#undef FUNC_NAME
259
260
261SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0,
262 (SCM cursor),
263 "Return @code{#t} if @var{cursor} has reached the end of a\n"
264 "character set, @code{#f} otherwise.")
265#define FUNC_NAME s_scm_end_of_char_set_p
266{
267 size_t ccursor = scm_to_size_t (cursor);
268 return scm_from_bool (ccursor >= SCM_CHARSET_SIZE);
269}
270#undef FUNC_NAME
271
272
273SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0,
274 (SCM kons, SCM knil, SCM cs),
275 "Fold the procedure @var{kons} over the character set @var{cs},\n"
276 "initializing it with @var{knil}.")
277#define FUNC_NAME s_scm_char_set_fold
278{
279 int k;
280
281 SCM_VALIDATE_PROC (1, kons);
282 SCM_VALIDATE_SMOB (3, cs, charset);
283
284 for (k = 0; k < SCM_CHARSET_SIZE; k++)
285 if (SCM_CHARSET_GET (cs, k))
286 {
287 knil = scm_call_2 (kons, SCM_MAKE_CHAR (k), knil);
288 }
289 return knil;
290}
291#undef FUNC_NAME
292
293
294SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0,
295 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
296 "This is a fundamental constructor for character sets.\n"
297 "@itemize @bullet\n"
298 "@item @var{g} is used to generate a series of ``seed'' values\n"
299 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
300 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
301 "@item @var{p} tells us when to stop -- when it returns true\n"
302 "when applied to one of the seed values.\n"
303 "@item @var{f} maps each seed value to a character. These\n"
304 "characters are added to the base character set @var{base_cs} to\n"
305 "form the result; @var{base_cs} defaults to the empty set.\n"
306 "@end itemize")
307#define FUNC_NAME s_scm_char_set_unfold
308{
309 SCM result, tmp;
310
311 SCM_VALIDATE_PROC (1, p);
312 SCM_VALIDATE_PROC (2, f);
313 SCM_VALIDATE_PROC (3, g);
314 if (!SCM_UNBNDP (base_cs))
315 {
316 SCM_VALIDATE_SMOB (5, base_cs, charset);
317 result = scm_char_set_copy (base_cs);
318 }
319 else
320 result = make_char_set (FUNC_NAME);
321
322 tmp = scm_call_1 (p, seed);
323 while (scm_is_false (tmp))
324 {
325 SCM ch = scm_call_1 (f, seed);
326 if (!SCM_CHARP (ch))
327 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
328 SCM_CHARSET_SET (result, SCM_CHAR (ch));
329
330 seed = scm_call_1 (g, seed);
331 tmp = scm_call_1 (p, seed);
332 }
333 return result;
334}
335#undef FUNC_NAME
336
337
338SCM_DEFINE (scm_char_set_unfold_x, "char-set-unfold!", 5, 0, 0,
339 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
340 "This is a fundamental constructor for character sets.\n"
341 "@itemize @bullet\n"
342 "@item @var{g} is used to generate a series of ``seed'' values\n"
343 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
344 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
345 "@item @var{p} tells us when to stop -- when it returns true\n"
346 "when applied to one of the seed values.\n"
347 "@item @var{f} maps each seed value to a character. These\n"
348 "characters are added to the base character set @var{base_cs} to\n"
349 "form the result; @var{base_cs} defaults to the empty set.\n"
350 "@end itemize")
351#define FUNC_NAME s_scm_char_set_unfold_x
352{
353 SCM tmp;
354
355 SCM_VALIDATE_PROC (1, p);
356 SCM_VALIDATE_PROC (2, f);
357 SCM_VALIDATE_PROC (3, g);
358 SCM_VALIDATE_SMOB (5, base_cs, charset);
359
360 tmp = scm_call_1 (p, seed);
361 while (scm_is_false (tmp))
362 {
363 SCM ch = scm_call_1 (f, seed);
364 if (!SCM_CHARP (ch))
365 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
366 SCM_CHARSET_SET (base_cs, SCM_CHAR (ch));
367
368 seed = scm_call_1 (g, seed);
369 tmp = scm_call_1 (p, seed);
370 }
371 return base_cs;
372}
373#undef FUNC_NAME
374
375
376SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0,
377 (SCM proc, SCM cs),
378 "Apply @var{proc} to every character in the character set\n"
379 "@var{cs}. The return value is not specified.")
380#define FUNC_NAME s_scm_char_set_for_each
381{
382 int k;
383
384 SCM_VALIDATE_PROC (1, proc);
385 SCM_VALIDATE_SMOB (2, cs, charset);
386
387 for (k = 0; k < SCM_CHARSET_SIZE; k++)
388 if (SCM_CHARSET_GET (cs, k))
389 scm_call_1 (proc, SCM_MAKE_CHAR (k));
390 return SCM_UNSPECIFIED;
391}
392#undef FUNC_NAME
393
394
395SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0,
396 (SCM proc, SCM cs),
397 "Map the procedure @var{proc} over every character in @var{cs}.\n"
398 "@var{proc} must be a character -> character procedure.")
399#define FUNC_NAME s_scm_char_set_map
400{
401 SCM result;
402 int k;
403
404 SCM_VALIDATE_PROC (1, proc);
405 SCM_VALIDATE_SMOB (2, cs, charset);
406
407 result = make_char_set (FUNC_NAME);
408 for (k = 0; k < SCM_CHARSET_SIZE; k++)
409 if (SCM_CHARSET_GET (cs, k))
410 {
411 SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (k));
412 if (!SCM_CHARP (ch))
413 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (proc));
414 SCM_CHARSET_SET (result, SCM_CHAR (ch));
415 }
416 return result;
417}
418#undef FUNC_NAME
419
420
421SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0,
422 (SCM cs),
423 "Return a newly allocated character set containing all\n"
424 "characters in @var{cs}.")
425#define FUNC_NAME s_scm_char_set_copy
426{
427 SCM ret;
428 long * p1, * p2;
429 int k;
430
431 SCM_VALIDATE_SMOB (1, cs, charset);
432 ret = make_char_set (FUNC_NAME);
433 p1 = (long *) SCM_SMOB_DATA (cs);
434 p2 = (long *) SCM_SMOB_DATA (ret);
435 for (k = 0; k < LONGS_PER_CHARSET; k++)
436 p2[k] = p1[k];
437 return ret;
438}
439#undef FUNC_NAME
440
441
442SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1,
443 (SCM rest),
444 "Return a character set containing all given characters.")
445#define FUNC_NAME s_scm_char_set
446{
447 SCM cs;
448 long * p;
449 int argnum = 1;
450
451 SCM_VALIDATE_REST_ARGUMENT (rest);
452 cs = make_char_set (FUNC_NAME);
453 p = (long *) SCM_SMOB_DATA (cs);
d2e53ed6 454 while (!scm_is_null (rest))
63181a97
MV
455 {
456 int c;
457
458 SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c);
459 argnum++;
460 rest = SCM_CDR (rest);
461 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
462 }
463 return cs;
464}
465#undef FUNC_NAME
466
467
468SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0,
469 (SCM list, SCM base_cs),
470 "Convert the character list @var{list} to a character set. If\n"
471 "the character set @var{base_cs} is given, the character in this\n"
472 "set are also included in the result.")
473#define FUNC_NAME s_scm_list_to_char_set
474{
475 SCM cs;
476 long * p;
477
478 SCM_VALIDATE_LIST (1, list);
479 if (SCM_UNBNDP (base_cs))
480 cs = make_char_set (FUNC_NAME);
481 else
482 {
483 SCM_VALIDATE_SMOB (2, base_cs, charset);
484 cs = scm_char_set_copy (base_cs);
485 }
486 p = (long *) SCM_SMOB_DATA (cs);
d2e53ed6 487 while (!scm_is_null (list))
63181a97
MV
488 {
489 SCM chr = SCM_CAR (list);
490 int c;
491
492 SCM_VALIDATE_CHAR_COPY (0, chr, c);
493 list = SCM_CDR (list);
494
495 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
496 }
497 return cs;
498}
499#undef FUNC_NAME
500
501
502SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0,
503 (SCM list, SCM base_cs),
504 "Convert the character list @var{list} to a character set. The\n"
505 "characters are added to @var{base_cs} and @var{base_cs} is\n"
506 "returned.")
507#define FUNC_NAME s_scm_list_to_char_set_x
508{
509 long * p;
510
511 SCM_VALIDATE_LIST (1, list);
512 SCM_VALIDATE_SMOB (2, base_cs, charset);
513 p = (long *) SCM_SMOB_DATA (base_cs);
d2e53ed6 514 while (!scm_is_null (list))
63181a97
MV
515 {
516 SCM chr = SCM_CAR (list);
517 int c;
518
519 SCM_VALIDATE_CHAR_COPY (0, chr, c);
520 list = SCM_CDR (list);
521
522 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
523 }
524 return base_cs;
525}
526#undef FUNC_NAME
527
528
529SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0,
530 (SCM str, SCM base_cs),
531 "Convert the string @var{str} to a character set. If the\n"
532 "character set @var{base_cs} is given, the characters in this\n"
533 "set are also included in the result.")
534#define FUNC_NAME s_scm_string_to_char_set
535{
536 SCM cs;
537 long * p;
538 const char * s;
539 size_t k = 0, len;
540
541 SCM_VALIDATE_STRING (1, str);
542 if (SCM_UNBNDP (base_cs))
543 cs = make_char_set (FUNC_NAME);
544 else
545 {
546 SCM_VALIDATE_SMOB (2, base_cs, charset);
547 cs = scm_char_set_copy (base_cs);
548 }
549 p = (long *) SCM_SMOB_DATA (cs);
550 s = scm_i_string_chars (str);
551 len = scm_i_string_length (str);
552 while (k < len)
553 {
554 int c = s[k++];
555 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
556 }
557 scm_remember_upto_here_1 (str);
558 return cs;
559}
560#undef FUNC_NAME
561
562
563SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0,
564 (SCM str, SCM base_cs),
565 "Convert the string @var{str} to a character set. The\n"
566 "characters from the string are added to @var{base_cs}, and\n"
567 "@var{base_cs} is returned.")
568#define FUNC_NAME s_scm_string_to_char_set_x
569{
570 long * p;
571 const char * s;
572 size_t k = 0, len;
573
574 SCM_VALIDATE_STRING (1, str);
575 SCM_VALIDATE_SMOB (2, base_cs, charset);
576 p = (long *) SCM_SMOB_DATA (base_cs);
577 s = scm_i_string_chars (str);
578 len = scm_i_string_length (str);
579 while (k < len)
580 {
581 int c = s[k++];
582 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
583 }
584 scm_remember_upto_here_1 (str);
585 return base_cs;
586}
587#undef FUNC_NAME
588
589
590SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0,
591 (SCM pred, SCM cs, SCM base_cs),
592 "Return a character set containing every character from @var{cs}\n"
593 "so that it satisfies @var{pred}. If provided, the characters\n"
594 "from @var{base_cs} are added to the result.")
595#define FUNC_NAME s_scm_char_set_filter
596{
597 SCM ret;
598 int k;
599 long * p;
600
601 SCM_VALIDATE_PROC (1, pred);
602 SCM_VALIDATE_SMOB (2, cs, charset);
603 if (!SCM_UNBNDP (base_cs))
604 {
605 SCM_VALIDATE_SMOB (3, base_cs, charset);
606 ret = scm_char_set_copy (base_cs);
607 }
608 else
609 ret = make_char_set (FUNC_NAME);
610 p = (long *) SCM_SMOB_DATA (ret);
611 for (k = 0; k < SCM_CHARSET_SIZE; k++)
612 {
613 if (SCM_CHARSET_GET (cs, k))
614 {
615 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
616
617 if (scm_is_true (res))
618 p[k / SCM_BITS_PER_LONG] |= 1L << (k % SCM_BITS_PER_LONG);
619 }
620 }
621 return ret;
622}
623#undef FUNC_NAME
624
625
626SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0,
627 (SCM pred, SCM cs, SCM base_cs),
628 "Return a character set containing every character from @var{cs}\n"
629 "so that it satisfies @var{pred}. The characters are added to\n"
630 "@var{base_cs} and @var{base_cs} is returned.")
631#define FUNC_NAME s_scm_char_set_filter_x
632{
633 int k;
634 long * p;
635
636 SCM_VALIDATE_PROC (1, pred);
637 SCM_VALIDATE_SMOB (2, cs, charset);
638 SCM_VALIDATE_SMOB (3, base_cs, charset);
639 p = (long *) SCM_SMOB_DATA (base_cs);
640 for (k = 0; k < SCM_CHARSET_SIZE; k++)
641 {
642 if (SCM_CHARSET_GET (cs, k))
643 {
644 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
645
646 if (scm_is_true (res))
647 p[k / SCM_BITS_PER_LONG] |= 1L << (k % SCM_BITS_PER_LONG);
648 }
649 }
650 return base_cs;
651}
652#undef FUNC_NAME
653
654
655SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0,
656 (SCM lower, SCM upper, SCM error, SCM base_cs),
657 "Return a character set containing all characters whose\n"
658 "character codes lie in the half-open range\n"
659 "[@var{lower},@var{upper}).\n"
660 "\n"
661 "If @var{error} is a true value, an error is signalled if the\n"
662 "specified range contains characters which are not contained in\n"
663 "the implemented character range. If @var{error} is @code{#f},\n"
664 "these characters are silently left out of the resultung\n"
665 "character set.\n"
666 "\n"
667 "The characters in @var{base_cs} are added to the result, if\n"
668 "given.")
669#define FUNC_NAME s_scm_ucs_range_to_char_set
670{
671 SCM cs;
672 size_t clower, cupper;
673 long * p;
674
675 clower = scm_to_size_t (lower);
676 cupper = scm_to_size_t (upper);
677 SCM_ASSERT_RANGE (2, upper, cupper >= clower);
678 if (!SCM_UNBNDP (error))
679 {
680 if (scm_is_true (error))
681 {
682 SCM_ASSERT_RANGE (1, lower, clower <= SCM_CHARSET_SIZE);
683 SCM_ASSERT_RANGE (2, upper, cupper <= SCM_CHARSET_SIZE);
684 }
685 }
686 if (clower > SCM_CHARSET_SIZE)
687 clower = SCM_CHARSET_SIZE;
688 if (cupper > SCM_CHARSET_SIZE)
689 cupper = SCM_CHARSET_SIZE;
690 if (SCM_UNBNDP (base_cs))
691 cs = make_char_set (FUNC_NAME);
692 else
693 {
694 SCM_VALIDATE_SMOB (4, base_cs, charset);
695 cs = scm_char_set_copy (base_cs);
696 }
697 p = (long *) SCM_SMOB_DATA (cs);
698 while (clower < cupper)
699 {
700 p[clower / SCM_BITS_PER_LONG] |= 1L << (clower % SCM_BITS_PER_LONG);
701 clower++;
702 }
703 return cs;
704}
705#undef FUNC_NAME
706
707
708SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0,
709 (SCM lower, SCM upper, SCM error, SCM base_cs),
710 "Return a character set containing all characters whose\n"
711 "character codes lie in the half-open range\n"
712 "[@var{lower},@var{upper}).\n"
713 "\n"
714 "If @var{error} is a true value, an error is signalled if the\n"
715 "specified range contains characters which are not contained in\n"
716 "the implemented character range. If @var{error} is @code{#f},\n"
717 "these characters are silently left out of the resultung\n"
718 "character set.\n"
719 "\n"
720 "The characters are added to @var{base_cs} and @var{base_cs} is\n"
721 "returned.")
722#define FUNC_NAME s_scm_ucs_range_to_char_set_x
723{
724 size_t clower, cupper;
725 long * p;
726
727 clower = scm_to_size_t (lower);
728 cupper = scm_to_size_t (upper);
729 SCM_ASSERT_RANGE (2, upper, cupper >= clower);
730 if (scm_is_true (error))
731 {
732 SCM_ASSERT_RANGE (1, lower, clower <= SCM_CHARSET_SIZE);
733 SCM_ASSERT_RANGE (2, upper, cupper <= SCM_CHARSET_SIZE);
734 }
735 if (clower > SCM_CHARSET_SIZE)
736 clower = SCM_CHARSET_SIZE;
737 if (cupper > SCM_CHARSET_SIZE)
738 cupper = SCM_CHARSET_SIZE;
739 p = (long *) SCM_SMOB_DATA (base_cs);
740 while (clower < cupper)
741 {
742 p[clower / SCM_BITS_PER_LONG] |= 1L << (clower % SCM_BITS_PER_LONG);
743 clower++;
744 }
745 return base_cs;
746}
747#undef FUNC_NAME
748
749SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0,
750 (SCM x),
751 "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.")
752#define FUNC_NAME s_scm_to_char_set
753{
754 if (scm_is_string (x))
755 return scm_string_to_char_set (x, SCM_UNDEFINED);
756 else if (SCM_CHARP (x))
757 return scm_char_set (scm_list_1 (x));
758 else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x))
759 return x;
760 else
761 scm_wrong_type_arg (NULL, 0, x);
762}
763#undef FUNC_NAME
764
765SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0,
766 (SCM cs),
767 "Return the number of elements in character set @var{cs}.")
768#define FUNC_NAME s_scm_char_set_size
769{
770 int k, count = 0;
771
772 SCM_VALIDATE_SMOB (1, cs, charset);
773 for (k = 0; k < SCM_CHARSET_SIZE; k++)
774 if (SCM_CHARSET_GET (cs, k))
775 count++;
776 return SCM_I_MAKINUM (count);
777}
778#undef FUNC_NAME
779
780
781SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0,
782 (SCM pred, SCM cs),
783 "Return the number of the elements int the character set\n"
784 "@var{cs} which satisfy the predicate @var{pred}.")
785#define FUNC_NAME s_scm_char_set_count
786{
787 int k, count = 0;
788
789 SCM_VALIDATE_PROC (1, pred);
790 SCM_VALIDATE_SMOB (2, cs, charset);
791
792 for (k = 0; k < SCM_CHARSET_SIZE; k++)
793 if (SCM_CHARSET_GET (cs, k))
794 {
795 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
796 if (scm_is_true (res))
797 count++;
798 }
799 return SCM_I_MAKINUM (count);
800}
801#undef FUNC_NAME
802
803
804SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0,
805 (SCM cs),
806 "Return a list containing the elements of the character set\n"
807 "@var{cs}.")
808#define FUNC_NAME s_scm_char_set_to_list
809{
810 int k;
811 SCM result = SCM_EOL;
812
813 SCM_VALIDATE_SMOB (1, cs, charset);
814 for (k = SCM_CHARSET_SIZE; k > 0; k--)
815 if (SCM_CHARSET_GET (cs, k - 1))
816 result = scm_cons (SCM_MAKE_CHAR (k - 1), result);
817 return result;
818}
819#undef FUNC_NAME
820
821
822SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0,
823 (SCM cs),
824 "Return a string containing the elements of the character set\n"
825 "@var{cs}. The order in which the characters are placed in the\n"
826 "string is not defined.")
827#define FUNC_NAME s_scm_char_set_to_string
828{
829 int k;
830 int count = 0;
831 int idx = 0;
832 SCM result;
833 char * p;
834
835 SCM_VALIDATE_SMOB (1, cs, charset);
836 for (k = 0; k < SCM_CHARSET_SIZE; k++)
837 if (SCM_CHARSET_GET (cs, k))
838 count++;
839 result = scm_i_make_string (count, &p);
840 for (k = 0; k < SCM_CHARSET_SIZE; k++)
841 if (SCM_CHARSET_GET (cs, k))
842 p[idx++] = k;
843 return result;
844}
845#undef FUNC_NAME
846
847
848SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0,
849 (SCM cs, SCM ch),
850 "Return @code{#t} iff the character @var{ch} is contained in the\n"
851 "character set @var{cs}.")
852#define FUNC_NAME s_scm_char_set_contains_p
853{
854 SCM_VALIDATE_SMOB (1, cs, charset);
855 SCM_VALIDATE_CHAR (2, ch);
856 return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch)));
857}
858#undef FUNC_NAME
859
860
861SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0,
862 (SCM pred, SCM cs),
863 "Return a true value if every character in the character set\n"
864 "@var{cs} satisfies the predicate @var{pred}.")
865#define FUNC_NAME s_scm_char_set_every
866{
867 int k;
868 SCM res = SCM_BOOL_T;
869
870 SCM_VALIDATE_PROC (1, pred);
871 SCM_VALIDATE_SMOB (2, cs, charset);
872
873 for (k = 0; k < SCM_CHARSET_SIZE; k++)
874 if (SCM_CHARSET_GET (cs, k))
875 {
876 res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
877 if (scm_is_false (res))
878 return res;
879 }
880 return res;
881}
882#undef FUNC_NAME
883
884
885SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0,
886 (SCM pred, SCM cs),
887 "Return a true value if any character in the character set\n"
888 "@var{cs} satisfies the predicate @var{pred}.")
889#define FUNC_NAME s_scm_char_set_any
890{
891 int k;
892
893 SCM_VALIDATE_PROC (1, pred);
894 SCM_VALIDATE_SMOB (2, cs, charset);
895
896 for (k = 0; k < SCM_CHARSET_SIZE; k++)
897 if (SCM_CHARSET_GET (cs, k))
898 {
899 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
900 if (scm_is_true (res))
901 return res;
902 }
903 return SCM_BOOL_F;
904}
905#undef FUNC_NAME
906
907
908SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1,
909 (SCM cs, SCM rest),
910 "Add all character arguments to the first argument, which must\n"
911 "be a character set.")
912#define FUNC_NAME s_scm_char_set_adjoin
913{
914 long * p;
915
916 SCM_VALIDATE_SMOB (1, cs, charset);
917 SCM_VALIDATE_REST_ARGUMENT (rest);
918 cs = scm_char_set_copy (cs);
919
920 p = (long *) SCM_SMOB_DATA (cs);
d2e53ed6 921 while (!scm_is_null (rest))
63181a97
MV
922 {
923 SCM chr = SCM_CAR (rest);
924 int c;
925
926 SCM_VALIDATE_CHAR_COPY (1, chr, c);
927 rest = SCM_CDR (rest);
928
929 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
930 }
931 return cs;
932}
933#undef FUNC_NAME
934
935
936SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1,
937 (SCM cs, SCM rest),
938 "Delete all character arguments from the first argument, which\n"
939 "must be a character set.")
940#define FUNC_NAME s_scm_char_set_delete
941{
942 long * p;
943
944 SCM_VALIDATE_SMOB (1, cs, charset);
945 SCM_VALIDATE_REST_ARGUMENT (rest);
946 cs = scm_char_set_copy (cs);
947
948 p = (long *) SCM_SMOB_DATA (cs);
d2e53ed6 949 while (!scm_is_null (rest))
63181a97
MV
950 {
951 SCM chr = SCM_CAR (rest);
952 int c;
953
954 SCM_VALIDATE_CHAR_COPY (1, chr, c);
955 rest = SCM_CDR (rest);
956
957 p[c / SCM_BITS_PER_LONG] &= ~(1L << (c % SCM_BITS_PER_LONG));
958 }
959 return cs;
960}
961#undef FUNC_NAME
962
963
964SCM_DEFINE (scm_char_set_adjoin_x, "char-set-adjoin!", 1, 0, 1,
965 (SCM cs, SCM rest),
966 "Add all character arguments to the first argument, which must\n"
967 "be a character set.")
968#define FUNC_NAME s_scm_char_set_adjoin_x
969{
970 long * p;
971
972 SCM_VALIDATE_SMOB (1, cs, charset);
973 SCM_VALIDATE_REST_ARGUMENT (rest);
974
975 p = (long *) SCM_SMOB_DATA (cs);
d2e53ed6 976 while (!scm_is_null (rest))
63181a97
MV
977 {
978 SCM chr = SCM_CAR (rest);
979 int c;
980
981 SCM_VALIDATE_CHAR_COPY (1, chr, c);
982 rest = SCM_CDR (rest);
983
984 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
985 }
986 return cs;
987}
988#undef FUNC_NAME
989
990
991SCM_DEFINE (scm_char_set_delete_x, "char-set-delete!", 1, 0, 1,
992 (SCM cs, SCM rest),
993 "Delete all character arguments from the first argument, which\n"
994 "must be a character set.")
995#define FUNC_NAME s_scm_char_set_delete_x
996{
997 long * p;
998
999 SCM_VALIDATE_SMOB (1, cs, charset);
1000 SCM_VALIDATE_REST_ARGUMENT (rest);
1001
1002 p = (long *) SCM_SMOB_DATA (cs);
d2e53ed6 1003 while (!scm_is_null (rest))
63181a97
MV
1004 {
1005 SCM chr = SCM_CAR (rest);
1006 int c;
1007
1008 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1009 rest = SCM_CDR (rest);
1010
1011 p[c / SCM_BITS_PER_LONG] &= ~(1L << (c % SCM_BITS_PER_LONG));
1012 }
1013 return cs;
1014}
1015#undef FUNC_NAME
1016
1017
1018SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0,
1019 (SCM cs),
1020 "Return the complement of the character set @var{cs}.")
1021#define FUNC_NAME s_scm_char_set_complement
1022{
1023 int k;
1024 SCM res;
1025 long * p, * q;
1026
1027 SCM_VALIDATE_SMOB (1, cs, charset);
1028
1029 res = make_char_set (FUNC_NAME);
1030 p = (long *) SCM_SMOB_DATA (res);
1031 q = (long *) SCM_SMOB_DATA (cs);
1032 for (k = 0; k < LONGS_PER_CHARSET; k++)
1033 p[k] = ~q[k];
1034 return res;
1035}
1036#undef FUNC_NAME
1037
1038
1039SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1,
1040 (SCM rest),
1041 "Return the union of all argument character sets.")
1042#define FUNC_NAME s_scm_char_set_union
1043{
1044 int c = 1;
1045 SCM res;
1046 long * p;
1047
1048 SCM_VALIDATE_REST_ARGUMENT (rest);
1049
1050 res = make_char_set (FUNC_NAME);
1051 p = (long *) SCM_SMOB_DATA (res);
d2e53ed6 1052 while (!scm_is_null (rest))
63181a97
MV
1053 {
1054 int k;
1055 SCM cs = SCM_CAR (rest);
1056 SCM_VALIDATE_SMOB (c, cs, charset);
1057 c++;
1058 rest = SCM_CDR (rest);
1059
1060 for (k = 0; k < LONGS_PER_CHARSET; k++)
1061 p[k] |= ((long *) SCM_SMOB_DATA (cs))[k];
1062 }
1063 return res;
1064}
1065#undef FUNC_NAME
1066
1067
1068SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1,
1069 (SCM rest),
1070 "Return the intersection of all argument character sets.")
1071#define FUNC_NAME s_scm_char_set_intersection
1072{
1073 SCM res;
1074
1075 SCM_VALIDATE_REST_ARGUMENT (rest);
1076
d2e53ed6 1077 if (scm_is_null (rest))
63181a97
MV
1078 res = make_char_set (FUNC_NAME);
1079 else
1080 {
1081 long *p;
1082 int argnum = 2;
1083
1084 res = scm_char_set_copy (SCM_CAR (rest));
1085 p = (long *) SCM_SMOB_DATA (res);
1086 rest = SCM_CDR (rest);
1087
d2e53ed6 1088 while (scm_is_pair (rest))
63181a97
MV
1089 {
1090 int k;
1091 SCM cs = SCM_CAR (rest);
1092 long *cs_data;
1093
1094 SCM_VALIDATE_SMOB (argnum, cs, charset);
1095 argnum++;
1096 cs_data = (long *) SCM_SMOB_DATA (cs);
1097 rest = SCM_CDR (rest);
1098 for (k = 0; k < LONGS_PER_CHARSET; k++)
1099 p[k] &= cs_data[k];
1100 }
1101 }
1102
1103 return res;
1104}
1105#undef FUNC_NAME
1106
1107
1108SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1,
1109 (SCM cs1, SCM rest),
1110 "Return the difference of all argument character sets.")
1111#define FUNC_NAME s_scm_char_set_difference
1112{
1113 int c = 2;
1114 SCM res;
1115 long * p;
1116
1117 SCM_VALIDATE_SMOB (1, cs1, charset);
1118 SCM_VALIDATE_REST_ARGUMENT (rest);
1119
1120 res = scm_char_set_copy (cs1);
1121 p = (long *) SCM_SMOB_DATA (res);
d2e53ed6 1122 while (!scm_is_null (rest))
63181a97
MV
1123 {
1124 int k;
1125 SCM cs = SCM_CAR (rest);
1126 SCM_VALIDATE_SMOB (c, cs, charset);
1127 c++;
1128 rest = SCM_CDR (rest);
1129
1130 for (k = 0; k < LONGS_PER_CHARSET; k++)
1131 p[k] &= ~((long *) SCM_SMOB_DATA (cs))[k];
1132 }
1133 return res;
1134}
1135#undef FUNC_NAME
1136
1137
1138SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1,
1139 (SCM rest),
1140 "Return the exclusive-or of all argument character sets.")
1141#define FUNC_NAME s_scm_char_set_xor
1142{
1143 SCM res;
1144
1145 SCM_VALIDATE_REST_ARGUMENT (rest);
1146
d2e53ed6 1147 if (scm_is_null (rest))
63181a97
MV
1148 res = make_char_set (FUNC_NAME);
1149 else
1150 {
1151 int argnum = 2;
1152 long * p;
1153
1154 res = scm_char_set_copy (SCM_CAR (rest));
1155 p = (long *) SCM_SMOB_DATA (res);
1156 rest = SCM_CDR (rest);
1157
d2e53ed6 1158 while (scm_is_pair (rest))
63181a97
MV
1159 {
1160 SCM cs = SCM_CAR (rest);
1161 long *cs_data;
1162 int k;
1163
1164 SCM_VALIDATE_SMOB (argnum, cs, charset);
1165 argnum++;
1166 cs_data = (long *) SCM_SMOB_DATA (cs);
1167 rest = SCM_CDR (rest);
1168
1169 for (k = 0; k < LONGS_PER_CHARSET; k++)
1170 p[k] ^= cs_data[k];
1171 }
1172 }
1173 return res;
1174}
1175#undef FUNC_NAME
1176
1177
1178SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1, 0, 1,
1179 (SCM cs1, SCM rest),
1180 "Return the difference and the intersection of all argument\n"
1181 "character sets.")
1182#define FUNC_NAME s_scm_char_set_diff_plus_intersection
1183{
1184 int c = 2;
1185 SCM res1, res2;
1186 long * p, * q;
1187
1188 SCM_VALIDATE_SMOB (1, cs1, charset);
1189 SCM_VALIDATE_REST_ARGUMENT (rest);
1190
1191 res1 = scm_char_set_copy (cs1);
1192 res2 = make_char_set (FUNC_NAME);
1193 p = (long *) SCM_SMOB_DATA (res1);
1194 q = (long *) SCM_SMOB_DATA (res2);
d2e53ed6 1195 while (!scm_is_null (rest))
63181a97
MV
1196 {
1197 int k;
1198 SCM cs = SCM_CAR (rest);
1199 long *r;
1200
1201 SCM_VALIDATE_SMOB (c, cs, charset);
1202 c++;
1203 r = (long *) SCM_SMOB_DATA (cs);
1204
1205 for (k = 0; k < LONGS_PER_CHARSET; k++)
1206 {
1207 q[k] |= p[k] & r[k];
1208 p[k] &= ~r[k];
1209 }
1210 rest = SCM_CDR (rest);
1211 }
1212 return scm_values (scm_list_2 (res1, res2));
1213}
1214#undef FUNC_NAME
1215
1216
1217SCM_DEFINE (scm_char_set_complement_x, "char-set-complement!", 1, 0, 0,
1218 (SCM cs),
1219 "Return the complement of the character set @var{cs}.")
1220#define FUNC_NAME s_scm_char_set_complement_x
1221{
1222 int k;
1223 long * p;
1224
1225 SCM_VALIDATE_SMOB (1, cs, charset);
1226 p = (long *) SCM_SMOB_DATA (cs);
1227 for (k = 0; k < LONGS_PER_CHARSET; k++)
1228 p[k] = ~p[k];
1229 return cs;
1230}
1231#undef FUNC_NAME
1232
1233
1234SCM_DEFINE (scm_char_set_union_x, "char-set-union!", 1, 0, 1,
1235 (SCM cs1, SCM rest),
1236 "Return the union of all argument character sets.")
1237#define FUNC_NAME s_scm_char_set_union_x
1238{
1239 int c = 2;
1240 long * p;
1241
1242 SCM_VALIDATE_SMOB (1, cs1, charset);
1243 SCM_VALIDATE_REST_ARGUMENT (rest);
1244
1245 p = (long *) SCM_SMOB_DATA (cs1);
d2e53ed6 1246 while (!scm_is_null (rest))
63181a97
MV
1247 {
1248 int k;
1249 SCM cs = SCM_CAR (rest);
1250 SCM_VALIDATE_SMOB (c, cs, charset);
1251 c++;
1252 rest = SCM_CDR (rest);
1253
1254 for (k = 0; k < LONGS_PER_CHARSET; k++)
1255 p[k] |= ((long *) SCM_SMOB_DATA (cs))[k];
1256 }
1257 return cs1;
1258}
1259#undef FUNC_NAME
1260
1261
1262SCM_DEFINE (scm_char_set_intersection_x, "char-set-intersection!", 1, 0, 1,
1263 (SCM cs1, SCM rest),
1264 "Return the intersection of all argument character sets.")
1265#define FUNC_NAME s_scm_char_set_intersection_x
1266{
1267 int c = 2;
1268 long * p;
1269
1270 SCM_VALIDATE_SMOB (1, cs1, charset);
1271 SCM_VALIDATE_REST_ARGUMENT (rest);
1272
1273 p = (long *) SCM_SMOB_DATA (cs1);
d2e53ed6 1274 while (!scm_is_null (rest))
63181a97
MV
1275 {
1276 int k;
1277 SCM cs = SCM_CAR (rest);
1278 SCM_VALIDATE_SMOB (c, cs, charset);
1279 c++;
1280 rest = SCM_CDR (rest);
1281
1282 for (k = 0; k < LONGS_PER_CHARSET; k++)
1283 p[k] &= ((long *) SCM_SMOB_DATA (cs))[k];
1284 }
1285 return cs1;
1286}
1287#undef FUNC_NAME
1288
1289
1290SCM_DEFINE (scm_char_set_difference_x, "char-set-difference!", 1, 0, 1,
1291 (SCM cs1, SCM rest),
1292 "Return the difference of all argument character sets.")
1293#define FUNC_NAME s_scm_char_set_difference_x
1294{
1295 int c = 2;
1296 long * p;
1297
1298 SCM_VALIDATE_SMOB (1, cs1, charset);
1299 SCM_VALIDATE_REST_ARGUMENT (rest);
1300
1301 p = (long *) SCM_SMOB_DATA (cs1);
d2e53ed6 1302 while (!scm_is_null (rest))
63181a97
MV
1303 {
1304 int k;
1305 SCM cs = SCM_CAR (rest);
1306 SCM_VALIDATE_SMOB (c, cs, charset);
1307 c++;
1308 rest = SCM_CDR (rest);
1309
1310 for (k = 0; k < LONGS_PER_CHARSET; k++)
1311 p[k] &= ~((long *) SCM_SMOB_DATA (cs))[k];
1312 }
1313 return cs1;
1314}
1315#undef FUNC_NAME
1316
1317
1318SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1,
1319 (SCM cs1, SCM rest),
1320 "Return the exclusive-or of all argument character sets.")
1321#define FUNC_NAME s_scm_char_set_xor_x
1322{
1323 /* a side-effecting variant should presumably give consistent results:
1324 (define a (char-set #\a))
1325 (char-set-xor a a a) -> char set #\a
1326 (char-set-xor! a a a) -> char set #\a
1327 */
1328 return scm_char_set_xor (scm_cons (cs1, rest));
1329
1330#if 0
1331 /* this would give (char-set-xor! a a a) -> empty char set. */
1332 int c = 2;
1333 long * p;
1334
1335 SCM_VALIDATE_SMOB (1, cs1, charset);
1336 SCM_VALIDATE_REST_ARGUMENT (rest);
1337
1338 p = (long *) SCM_SMOB_DATA (cs1);
d2e53ed6 1339 while (!scm_is_null (rest))
63181a97
MV
1340 {
1341 int k;
1342 SCM cs = SCM_CAR (rest);
1343 SCM_VALIDATE_SMOB (c, cs, charset);
1344 c++;
1345 rest = SCM_CDR (rest);
1346
1347 for (k = 0; k < LONGS_PER_CHARSET; k++)
1348 p[k] ^= ((long *) SCM_SMOB_DATA (cs))[k];
1349 }
1350 return cs1;
1351#endif
1352}
1353#undef FUNC_NAME
1354
1355
1356SCM_DEFINE (scm_char_set_diff_plus_intersection_x, "char-set-diff+intersection!", 2, 0, 1,
1357 (SCM cs1, SCM cs2, SCM rest),
1358 "Return the difference and the intersection of all argument\n"
1359 "character sets.")
1360#define FUNC_NAME s_scm_char_set_diff_plus_intersection_x
1361{
1362 int c = 3;
1363 long * p, * q;
1364 int k;
1365
1366 SCM_VALIDATE_SMOB (1, cs1, charset);
1367 SCM_VALIDATE_SMOB (2, cs2, charset);
1368 SCM_VALIDATE_REST_ARGUMENT (rest);
1369
1370 p = (long *) SCM_SMOB_DATA (cs1);
1371 q = (long *) SCM_SMOB_DATA (cs2);
1372 if (p == q)
1373 {
1374 /* (char-set-diff+intersection! a a ...): can't share storage,
1375 but we know the answer without checking for further
1376 arguments. */
1377 return scm_values (scm_list_2 (make_char_set (FUNC_NAME), cs1));
1378 }
1379 for (k = 0; k < LONGS_PER_CHARSET; k++)
1380 {
1381 long t = p[k];
1382
1383 p[k] &= ~q[k];
1384 q[k] = t & q[k];
1385 }
d2e53ed6 1386 while (!scm_is_null (rest))
63181a97
MV
1387 {
1388 SCM cs = SCM_CAR (rest);
1389 long *r;
1390
1391 SCM_VALIDATE_SMOB (c, cs, charset);
1392 c++;
1393 r = (long *) SCM_SMOB_DATA (cs);
1394
1395 for (k = 0; k < LONGS_PER_CHARSET; k++)
1396 {
1397 q[k] |= p[k] & r[k];
1398 p[k] &= ~r[k];
1399 }
1400 rest = SCM_CDR (rest);
1401 }
1402 return scm_values (scm_list_2 (cs1, cs2));
1403}
1404#undef FUNC_NAME
1405
a17d2654
LC
1406\f
1407/* Standard character sets. */
1408
63181a97
MV
1409SCM scm_char_set_lower_case;
1410SCM scm_char_set_upper_case;
1411SCM scm_char_set_title_case;
1412SCM scm_char_set_letter;
1413SCM scm_char_set_digit;
1414SCM scm_char_set_letter_and_digit;
1415SCM scm_char_set_graphic;
1416SCM scm_char_set_printing;
1417SCM scm_char_set_whitespace;
1418SCM scm_char_set_iso_control;
1419SCM scm_char_set_punctuation;
1420SCM scm_char_set_symbol;
1421SCM scm_char_set_hex_digit;
1422SCM scm_char_set_blank;
1423SCM scm_char_set_ascii;
1424SCM scm_char_set_empty;
1425SCM scm_char_set_full;
1426
63181a97 1427
a17d2654
LC
1428/* Create an empty character set and return it after binding it to NAME. */
1429static inline SCM
1430define_charset (const char *name)
63181a97 1431{
a17d2654 1432 SCM cs = make_char_set (NULL);
63181a97
MV
1433 scm_c_define (name, cs);
1434 return scm_permanent_object (cs);
1435}
1436
a17d2654
LC
1437/* Membership predicates for the various char sets.
1438
1439 XXX: The `punctuation' and `symbol' char sets have no direct equivalent in
1440 <ctype.h>. Thus, the predicates below yield correct results for ASCII,
1441 but they do not provide the result described by the SRFI for Latin-1. The
1442 correct Latin-1 result could only be obtained by hard-coding the
1443 characters listed by the SRFI, but the problem would remain for other
1444 8-bit charsets.
1445
1446 Similarly, character 0xA0 in Latin-1 (unbreakable space, `#\0240') should
1447 be part of `char-set:blank'. However, glibc's current (2006/09) Latin-1
1448 locales (which use the ISO 14652 "i18n" FDCC-set) do not consider it
1449 `blank' so it ends up in `char-set:punctuation'. */
1450#ifdef HAVE_ISBLANK
1451# define CSET_BLANK_PRED(c) (isblank (c))
1452#else
1453# define CSET_BLANK_PRED(c) \
1454 (((c) == ' ') || ((c) == '\t'))
1455#endif
1456
1457#define CSET_SYMBOL_PRED(c) \
1458 (((c) != '\0') && (strchr ("$+<=>^`|~", (c)) != NULL))
1459#define CSET_PUNCT_PRED(c) \
1460 ((ispunct (c)) && (!CSET_SYMBOL_PRED (c)))
1461
1462#define CSET_LOWER_PRED(c) (islower (c))
1463#define CSET_UPPER_PRED(c) (isupper (c))
1464#define CSET_LETTER_PRED(c) (isalpha (c))
1465#define CSET_DIGIT_PRED(c) (isdigit (c))
1466#define CSET_WHITESPACE_PRED(c) (isspace (c))
1467#define CSET_CONTROL_PRED(c) (iscntrl (c))
1468#define CSET_HEX_DIGIT_PRED(c) (isxdigit (c))
1469#define CSET_ASCII_PRED(c) (isascii (c))
1470
1471/* Some char sets are explicitly defined by the SRFI as a union of other char
1472 sets so we try to follow this closely. */
1473
1474#define CSET_LETTER_AND_DIGIT_PRED(c) \
1475 (CSET_LETTER_PRED (c) || CSET_DIGIT_PRED (c))
1476
1477#define CSET_GRAPHIC_PRED(c) \
1478 (CSET_LETTER_PRED (c) || CSET_DIGIT_PRED (c) \
1479 || CSET_PUNCT_PRED (c) || CSET_SYMBOL_PRED (c))
1480
1481#define CSET_PRINTING_PRED(c) \
1482 (CSET_GRAPHIC_PRED (c) || CSET_WHITESPACE_PRED (c))
1483
1484/* False and true predicates. */
1485#define CSET_TRUE_PRED(c) (1)
1486#define CSET_FALSE_PRED(c) (0)
1487
1488
1489/* Compute the contents of all the standard character sets. Computation may
1490 need to be re-done at `setlocale'-time because some char sets (e.g.,
1491 `char-set:letter') need to reflect the character set supported by Guile.
1492
1493 For instance, at startup time, the "C" locale is used, thus Guile supports
1494 only ASCII; therefore, `char-set:letter' only contains English letters.
1495 The user can change this by invoking `setlocale' and specifying a locale
1496 with an 8-bit charset, thereby augmenting some of the SRFI-14 standard
1497 character sets.
1498
1499 This works because some of the predicates used below to construct
1500 character sets (e.g., `isalpha(3)') are locale-dependent (so
1501 charset-dependent, though generally not language-dependent). For details,
1502 please see the `guile-devel' mailing list archive of September 2006. */
1503void
1504scm_srfi_14_compute_char_sets (void)
63181a97 1505{
a17d2654
LC
1506#define UPDATE_CSET(c, cset, pred) \
1507 do \
1508 { \
1509 if (pred (c)) \
1510 SCM_CHARSET_SET ((cset), (c)); \
1511 else \
1512 SCM_CHARSET_UNSET ((cset), (c)); \
1513 } \
1514 while (0)
1515
1516 register int ch;
1517
1518 for (ch = 0; ch < 256; ch++)
63181a97 1519 {
a17d2654
LC
1520 UPDATE_CSET (ch, scm_char_set_upper_case, CSET_UPPER_PRED);
1521 UPDATE_CSET (ch, scm_char_set_lower_case, CSET_LOWER_PRED);
1522 UPDATE_CSET (ch, scm_char_set_title_case, CSET_FALSE_PRED);
1523 UPDATE_CSET (ch, scm_char_set_letter, CSET_LETTER_PRED);
1524 UPDATE_CSET (ch, scm_char_set_digit, CSET_DIGIT_PRED);
1525 UPDATE_CSET (ch, scm_char_set_letter_and_digit,
1526 CSET_LETTER_AND_DIGIT_PRED);
1527 UPDATE_CSET (ch, scm_char_set_graphic, CSET_GRAPHIC_PRED);
1528 UPDATE_CSET (ch, scm_char_set_printing, CSET_PRINTING_PRED);
1529 UPDATE_CSET (ch, scm_char_set_whitespace, CSET_WHITESPACE_PRED);
1530 UPDATE_CSET (ch, scm_char_set_iso_control, CSET_CONTROL_PRED);
1531 UPDATE_CSET (ch, scm_char_set_punctuation, CSET_PUNCT_PRED);
1532 UPDATE_CSET (ch, scm_char_set_symbol, CSET_SYMBOL_PRED);
1533 UPDATE_CSET (ch, scm_char_set_hex_digit, CSET_HEX_DIGIT_PRED);
1534 UPDATE_CSET (ch, scm_char_set_blank, CSET_BLANK_PRED);
1535 UPDATE_CSET (ch, scm_char_set_ascii, CSET_ASCII_PRED);
1536 UPDATE_CSET (ch, scm_char_set_empty, CSET_FALSE_PRED);
1537 UPDATE_CSET (ch, scm_char_set_full, CSET_TRUE_PRED);
63181a97 1538 }
63181a97 1539
a17d2654 1540#undef UPDATE_CSET
63181a97
MV
1541}
1542
a17d2654 1543\f
63181a97
MV
1544void
1545scm_init_srfi_14 (void)
1546{
1547 scm_tc16_charset = scm_make_smob_type ("character-set",
1548 BYTES_PER_CHARSET);
1549 scm_set_smob_free (scm_tc16_charset, charset_free);
1550 scm_set_smob_print (scm_tc16_charset, charset_print);
1551
a17d2654
LC
1552 scm_char_set_upper_case = define_charset ("char-set:upper-case");
1553 scm_char_set_lower_case = define_charset ("char-set:lower-case");
1554 scm_char_set_title_case = define_charset ("char-set:title-case");
1555 scm_char_set_letter = define_charset ("char-set:letter");
1556 scm_char_set_digit = define_charset ("char-set:digit");
1557 scm_char_set_letter_and_digit = define_charset ("char-set:letter+digit");
1558 scm_char_set_graphic = define_charset ("char-set:graphic");
1559 scm_char_set_printing = define_charset ("char-set:printing");
1560 scm_char_set_whitespace = define_charset ("char-set:whitespace");
1561 scm_char_set_iso_control = define_charset ("char-set:iso-control");
1562 scm_char_set_punctuation = define_charset ("char-set:punctuation");
1563 scm_char_set_symbol = define_charset ("char-set:symbol");
1564 scm_char_set_hex_digit = define_charset ("char-set:hex-digit");
1565 scm_char_set_blank = define_charset ("char-set:blank");
1566 scm_char_set_ascii = define_charset ("char-set:ascii");
1567 scm_char_set_empty = define_charset ("char-set:empty");
1568 scm_char_set_full = define_charset ("char-set:full");
1569
1570 scm_srfi_14_compute_char_sets ();
63181a97
MV
1571
1572#include "libguile/srfi-14.x"
1573}
1574
1575/* End of srfi-14.c. */