Merge commit '29776e85da637ec4d44b2b2822d6934a50c0084b' into boehm-demers-weiser-gc
[bpt/guile.git] / libguile / srfi-14.c
1 /* srfi-14.c --- SRFI-14 procedures for Guile
2 *
3 * Copyright (C) 2001, 2004, 2006, 2007 Free Software Foundation, Inc.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24
25 #include <string.h>
26 #include <ctype.h>
27
28 #include "libguile.h"
29 #include "libguile/srfi-14.h"
30
31
32 #define SCM_CHARSET_SET(cs, idx) \
33 (((long *) SCM_SMOB_DATA (cs))[(idx) / SCM_BITS_PER_LONG] |= \
34 (1L << ((idx) % SCM_BITS_PER_LONG)))
35
36 #define SCM_CHARSET_UNSET(cs, idx) \
37 (((long *) SCM_SMOB_DATA (cs))[(idx) / SCM_BITS_PER_LONG] &= \
38 (~(1L << ((idx) % SCM_BITS_PER_LONG))))
39
40 #define BYTES_PER_CHARSET (SCM_CHARSET_SIZE / 8)
41 #define LONGS_PER_CHARSET (SCM_CHARSET_SIZE / SCM_BITS_PER_LONG)
42
43
44 /* Smob type code for character sets. */
45 int scm_tc16_charset = 0;
46
47
48 /* Smob print hook for character sets. */
49 static int
50 charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED)
51 {
52 int i;
53 int first = 1;
54
55 scm_puts ("#<charset {", port);
56 for (i = 0; i < SCM_CHARSET_SIZE; i++)
57 if (SCM_CHARSET_GET (charset, i))
58 {
59 if (first)
60 first = 0;
61 else
62 scm_puts (" ", port);
63 scm_write (SCM_MAKE_CHAR (i), port);
64 }
65 scm_puts ("}>", port);
66 return 1;
67 }
68
69
70
71 /* Create a new, empty character set. */
72 static SCM
73 make_char_set (const char * func_name)
74 {
75 long * p;
76
77 p = scm_gc_malloc (BYTES_PER_CHARSET, "character-set");
78 memset (p, 0, BYTES_PER_CHARSET);
79 SCM_RETURN_NEWSMOB (scm_tc16_charset, p);
80 }
81
82
83 SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0,
84 (SCM obj),
85 "Return @code{#t} if @var{obj} is a character set, @code{#f}\n"
86 "otherwise.")
87 #define FUNC_NAME s_scm_char_set_p
88 {
89 return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj));
90 }
91 #undef FUNC_NAME
92
93
94 SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1,
95 (SCM char_sets),
96 "Return @code{#t} if all given character sets are equal.")
97 #define FUNC_NAME s_scm_char_set_eq
98 {
99 int argnum = 1;
100 long *cs1_data = NULL;
101
102 SCM_VALIDATE_REST_ARGUMENT (char_sets);
103
104 while (!scm_is_null (char_sets))
105 {
106 SCM csi = SCM_CAR (char_sets);
107 long *csi_data;
108
109 SCM_VALIDATE_SMOB (argnum, csi, charset);
110 argnum++;
111 csi_data = (long *) SCM_SMOB_DATA (csi);
112 if (cs1_data == NULL)
113 cs1_data = csi_data;
114 else if (memcmp (cs1_data, csi_data, BYTES_PER_CHARSET) != 0)
115 return SCM_BOOL_F;
116 char_sets = SCM_CDR (char_sets);
117 }
118 return SCM_BOOL_T;
119 }
120 #undef FUNC_NAME
121
122
123 SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1,
124 (SCM char_sets),
125 "Return @code{#t} if every character set @var{cs}i is a subset\n"
126 "of character set @var{cs}i+1.")
127 #define FUNC_NAME s_scm_char_set_leq
128 {
129 int argnum = 1;
130 long *prev_data = NULL;
131
132 SCM_VALIDATE_REST_ARGUMENT (char_sets);
133
134 while (!scm_is_null (char_sets))
135 {
136 SCM csi = SCM_CAR (char_sets);
137 long *csi_data;
138
139 SCM_VALIDATE_SMOB (argnum, csi, charset);
140 argnum++;
141 csi_data = (long *) SCM_SMOB_DATA (csi);
142 if (prev_data)
143 {
144 int k;
145
146 for (k = 0; k < LONGS_PER_CHARSET; k++)
147 {
148 if ((prev_data[k] & csi_data[k]) != prev_data[k])
149 return SCM_BOOL_F;
150 }
151 }
152 prev_data = csi_data;
153 char_sets = SCM_CDR (char_sets);
154 }
155 return SCM_BOOL_T;
156 }
157 #undef FUNC_NAME
158
159
160 SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0,
161 (SCM cs, SCM bound),
162 "Compute a hash value for the character set @var{cs}. If\n"
163 "@var{bound} is given and non-zero, it restricts the\n"
164 "returned value to the range 0 @dots{} @var{bound - 1}.")
165 #define FUNC_NAME s_scm_char_set_hash
166 {
167 const unsigned long default_bnd = 871;
168 unsigned long bnd;
169 long * p;
170 unsigned long val = 0;
171 int k;
172
173 SCM_VALIDATE_SMOB (1, cs, charset);
174
175 if (SCM_UNBNDP (bound))
176 bnd = default_bnd;
177 else
178 {
179 bnd = scm_to_ulong (bound);
180 if (bnd == 0)
181 bnd = default_bnd;
182 }
183
184 p = (long *) SCM_SMOB_DATA (cs);
185 for (k = 0; k < LONGS_PER_CHARSET; k++)
186 {
187 if (p[k] != 0)
188 val = p[k] + (val << 1);
189 }
190 return scm_from_ulong (val % bnd);
191 }
192 #undef FUNC_NAME
193
194
195 SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0,
196 (SCM cs),
197 "Return a cursor into the character set @var{cs}.")
198 #define FUNC_NAME s_scm_char_set_cursor
199 {
200 int idx;
201
202 SCM_VALIDATE_SMOB (1, cs, charset);
203 for (idx = 0; idx < SCM_CHARSET_SIZE; idx++)
204 {
205 if (SCM_CHARSET_GET (cs, idx))
206 break;
207 }
208 return SCM_I_MAKINUM (idx);
209 }
210 #undef FUNC_NAME
211
212
213 SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0,
214 (SCM cs, SCM cursor),
215 "Return the character at the current cursor position\n"
216 "@var{cursor} in the character set @var{cs}. It is an error to\n"
217 "pass a cursor for which @code{end-of-char-set?} returns true.")
218 #define FUNC_NAME s_scm_char_set_ref
219 {
220 size_t ccursor = scm_to_size_t (cursor);
221 SCM_VALIDATE_SMOB (1, cs, charset);
222
223 if (ccursor >= SCM_CHARSET_SIZE || !SCM_CHARSET_GET (cs, ccursor))
224 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
225 return SCM_MAKE_CHAR (ccursor);
226 }
227 #undef FUNC_NAME
228
229
230 SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0,
231 (SCM cs, SCM cursor),
232 "Advance the character set cursor @var{cursor} to the next\n"
233 "character in the character set @var{cs}. It is an error if the\n"
234 "cursor given satisfies @code{end-of-char-set?}.")
235 #define FUNC_NAME s_scm_char_set_cursor_next
236 {
237 size_t ccursor = scm_to_size_t (cursor);
238 SCM_VALIDATE_SMOB (1, cs, charset);
239
240 if (ccursor >= SCM_CHARSET_SIZE || !SCM_CHARSET_GET (cs, ccursor))
241 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
242 for (ccursor++; ccursor < SCM_CHARSET_SIZE; ccursor++)
243 {
244 if (SCM_CHARSET_GET (cs, ccursor))
245 break;
246 }
247 return SCM_I_MAKINUM (ccursor);
248 }
249 #undef FUNC_NAME
250
251
252 SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0,
253 (SCM cursor),
254 "Return @code{#t} if @var{cursor} has reached the end of a\n"
255 "character set, @code{#f} otherwise.")
256 #define FUNC_NAME s_scm_end_of_char_set_p
257 {
258 size_t ccursor = scm_to_size_t (cursor);
259 return scm_from_bool (ccursor >= SCM_CHARSET_SIZE);
260 }
261 #undef FUNC_NAME
262
263
264 SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0,
265 (SCM kons, SCM knil, SCM cs),
266 "Fold the procedure @var{kons} over the character set @var{cs},\n"
267 "initializing it with @var{knil}.")
268 #define FUNC_NAME s_scm_char_set_fold
269 {
270 int k;
271
272 SCM_VALIDATE_PROC (1, kons);
273 SCM_VALIDATE_SMOB (3, cs, charset);
274
275 for (k = 0; k < SCM_CHARSET_SIZE; k++)
276 if (SCM_CHARSET_GET (cs, k))
277 {
278 knil = scm_call_2 (kons, SCM_MAKE_CHAR (k), knil);
279 }
280 return knil;
281 }
282 #undef FUNC_NAME
283
284
285 SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0,
286 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
287 "This is a fundamental constructor for character sets.\n"
288 "@itemize @bullet\n"
289 "@item @var{g} is used to generate a series of ``seed'' values\n"
290 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
291 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
292 "@item @var{p} tells us when to stop -- when it returns true\n"
293 "when applied to one of the seed values.\n"
294 "@item @var{f} maps each seed value to a character. These\n"
295 "characters are added to the base character set @var{base_cs} to\n"
296 "form the result; @var{base_cs} defaults to the empty set.\n"
297 "@end itemize")
298 #define FUNC_NAME s_scm_char_set_unfold
299 {
300 SCM result, tmp;
301
302 SCM_VALIDATE_PROC (1, p);
303 SCM_VALIDATE_PROC (2, f);
304 SCM_VALIDATE_PROC (3, g);
305 if (!SCM_UNBNDP (base_cs))
306 {
307 SCM_VALIDATE_SMOB (5, base_cs, charset);
308 result = scm_char_set_copy (base_cs);
309 }
310 else
311 result = make_char_set (FUNC_NAME);
312
313 tmp = scm_call_1 (p, seed);
314 while (scm_is_false (tmp))
315 {
316 SCM ch = scm_call_1 (f, seed);
317 if (!SCM_CHARP (ch))
318 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
319 SCM_CHARSET_SET (result, SCM_CHAR (ch));
320
321 seed = scm_call_1 (g, seed);
322 tmp = scm_call_1 (p, seed);
323 }
324 return result;
325 }
326 #undef FUNC_NAME
327
328
329 SCM_DEFINE (scm_char_set_unfold_x, "char-set-unfold!", 5, 0, 0,
330 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
331 "This is a fundamental constructor for character sets.\n"
332 "@itemize @bullet\n"
333 "@item @var{g} is used to generate a series of ``seed'' values\n"
334 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
335 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
336 "@item @var{p} tells us when to stop -- when it returns true\n"
337 "when applied to one of the seed values.\n"
338 "@item @var{f} maps each seed value to a character. These\n"
339 "characters are added to the base character set @var{base_cs} to\n"
340 "form the result; @var{base_cs} defaults to the empty set.\n"
341 "@end itemize")
342 #define FUNC_NAME s_scm_char_set_unfold_x
343 {
344 SCM tmp;
345
346 SCM_VALIDATE_PROC (1, p);
347 SCM_VALIDATE_PROC (2, f);
348 SCM_VALIDATE_PROC (3, g);
349 SCM_VALIDATE_SMOB (5, base_cs, charset);
350
351 tmp = scm_call_1 (p, seed);
352 while (scm_is_false (tmp))
353 {
354 SCM ch = scm_call_1 (f, seed);
355 if (!SCM_CHARP (ch))
356 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
357 SCM_CHARSET_SET (base_cs, SCM_CHAR (ch));
358
359 seed = scm_call_1 (g, seed);
360 tmp = scm_call_1 (p, seed);
361 }
362 return base_cs;
363 }
364 #undef FUNC_NAME
365
366
367 SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0,
368 (SCM proc, SCM cs),
369 "Apply @var{proc} to every character in the character set\n"
370 "@var{cs}. The return value is not specified.")
371 #define FUNC_NAME s_scm_char_set_for_each
372 {
373 int k;
374
375 SCM_VALIDATE_PROC (1, proc);
376 SCM_VALIDATE_SMOB (2, cs, charset);
377
378 for (k = 0; k < SCM_CHARSET_SIZE; k++)
379 if (SCM_CHARSET_GET (cs, k))
380 scm_call_1 (proc, SCM_MAKE_CHAR (k));
381 return SCM_UNSPECIFIED;
382 }
383 #undef FUNC_NAME
384
385
386 SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0,
387 (SCM proc, SCM cs),
388 "Map the procedure @var{proc} over every character in @var{cs}.\n"
389 "@var{proc} must be a character -> character procedure.")
390 #define FUNC_NAME s_scm_char_set_map
391 {
392 SCM result;
393 int k;
394
395 SCM_VALIDATE_PROC (1, proc);
396 SCM_VALIDATE_SMOB (2, cs, charset);
397
398 result = make_char_set (FUNC_NAME);
399 for (k = 0; k < SCM_CHARSET_SIZE; k++)
400 if (SCM_CHARSET_GET (cs, k))
401 {
402 SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (k));
403 if (!SCM_CHARP (ch))
404 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (proc));
405 SCM_CHARSET_SET (result, SCM_CHAR (ch));
406 }
407 return result;
408 }
409 #undef FUNC_NAME
410
411
412 SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0,
413 (SCM cs),
414 "Return a newly allocated character set containing all\n"
415 "characters in @var{cs}.")
416 #define FUNC_NAME s_scm_char_set_copy
417 {
418 SCM ret;
419 long * p1, * p2;
420 int k;
421
422 SCM_VALIDATE_SMOB (1, cs, charset);
423 ret = make_char_set (FUNC_NAME);
424 p1 = (long *) SCM_SMOB_DATA (cs);
425 p2 = (long *) SCM_SMOB_DATA (ret);
426 for (k = 0; k < LONGS_PER_CHARSET; k++)
427 p2[k] = p1[k];
428 return ret;
429 }
430 #undef FUNC_NAME
431
432
433 SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1,
434 (SCM rest),
435 "Return a character set containing all given characters.")
436 #define FUNC_NAME s_scm_char_set
437 {
438 SCM cs;
439 long * p;
440 int argnum = 1;
441
442 SCM_VALIDATE_REST_ARGUMENT (rest);
443 cs = make_char_set (FUNC_NAME);
444 p = (long *) SCM_SMOB_DATA (cs);
445 while (!scm_is_null (rest))
446 {
447 int c;
448
449 SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c);
450 argnum++;
451 rest = SCM_CDR (rest);
452 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
453 }
454 return cs;
455 }
456 #undef FUNC_NAME
457
458
459 SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0,
460 (SCM list, SCM base_cs),
461 "Convert the character list @var{list} to a character set. If\n"
462 "the character set @var{base_cs} is given, the character in this\n"
463 "set are also included in the result.")
464 #define FUNC_NAME s_scm_list_to_char_set
465 {
466 SCM cs;
467 long * p;
468
469 SCM_VALIDATE_LIST (1, list);
470 if (SCM_UNBNDP (base_cs))
471 cs = make_char_set (FUNC_NAME);
472 else
473 {
474 SCM_VALIDATE_SMOB (2, base_cs, charset);
475 cs = scm_char_set_copy (base_cs);
476 }
477 p = (long *) SCM_SMOB_DATA (cs);
478 while (!scm_is_null (list))
479 {
480 SCM chr = SCM_CAR (list);
481 int c;
482
483 SCM_VALIDATE_CHAR_COPY (0, chr, c);
484 list = SCM_CDR (list);
485
486 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
487 }
488 return cs;
489 }
490 #undef FUNC_NAME
491
492
493 SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0,
494 (SCM list, SCM base_cs),
495 "Convert the character list @var{list} to a character set. The\n"
496 "characters are added to @var{base_cs} and @var{base_cs} is\n"
497 "returned.")
498 #define FUNC_NAME s_scm_list_to_char_set_x
499 {
500 long * p;
501
502 SCM_VALIDATE_LIST (1, list);
503 SCM_VALIDATE_SMOB (2, base_cs, charset);
504 p = (long *) SCM_SMOB_DATA (base_cs);
505 while (!scm_is_null (list))
506 {
507 SCM chr = SCM_CAR (list);
508 int c;
509
510 SCM_VALIDATE_CHAR_COPY (0, chr, c);
511 list = SCM_CDR (list);
512
513 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
514 }
515 return base_cs;
516 }
517 #undef FUNC_NAME
518
519
520 SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0,
521 (SCM str, SCM base_cs),
522 "Convert the string @var{str} to a character set. If the\n"
523 "character set @var{base_cs} is given, the characters in this\n"
524 "set are also included in the result.")
525 #define FUNC_NAME s_scm_string_to_char_set
526 {
527 SCM cs;
528 long * p;
529 const char * s;
530 size_t k = 0, len;
531
532 SCM_VALIDATE_STRING (1, str);
533 if (SCM_UNBNDP (base_cs))
534 cs = make_char_set (FUNC_NAME);
535 else
536 {
537 SCM_VALIDATE_SMOB (2, base_cs, charset);
538 cs = scm_char_set_copy (base_cs);
539 }
540 p = (long *) SCM_SMOB_DATA (cs);
541 s = scm_i_string_chars (str);
542 len = scm_i_string_length (str);
543 while (k < len)
544 {
545 int c = s[k++];
546 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
547 }
548 scm_remember_upto_here_1 (str);
549 return cs;
550 }
551 #undef FUNC_NAME
552
553
554 SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0,
555 (SCM str, SCM base_cs),
556 "Convert the string @var{str} to a character set. The\n"
557 "characters from the string are added to @var{base_cs}, and\n"
558 "@var{base_cs} is returned.")
559 #define FUNC_NAME s_scm_string_to_char_set_x
560 {
561 long * p;
562 const char * s;
563 size_t k = 0, len;
564
565 SCM_VALIDATE_STRING (1, str);
566 SCM_VALIDATE_SMOB (2, base_cs, charset);
567 p = (long *) SCM_SMOB_DATA (base_cs);
568 s = scm_i_string_chars (str);
569 len = scm_i_string_length (str);
570 while (k < len)
571 {
572 int c = s[k++];
573 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
574 }
575 scm_remember_upto_here_1 (str);
576 return base_cs;
577 }
578 #undef FUNC_NAME
579
580
581 SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0,
582 (SCM pred, SCM cs, SCM base_cs),
583 "Return a character set containing every character from @var{cs}\n"
584 "so that it satisfies @var{pred}. If provided, the characters\n"
585 "from @var{base_cs} are added to the result.")
586 #define FUNC_NAME s_scm_char_set_filter
587 {
588 SCM ret;
589 int k;
590 long * p;
591
592 SCM_VALIDATE_PROC (1, pred);
593 SCM_VALIDATE_SMOB (2, cs, charset);
594 if (!SCM_UNBNDP (base_cs))
595 {
596 SCM_VALIDATE_SMOB (3, base_cs, charset);
597 ret = scm_char_set_copy (base_cs);
598 }
599 else
600 ret = make_char_set (FUNC_NAME);
601 p = (long *) SCM_SMOB_DATA (ret);
602 for (k = 0; k < SCM_CHARSET_SIZE; k++)
603 {
604 if (SCM_CHARSET_GET (cs, k))
605 {
606 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
607
608 if (scm_is_true (res))
609 p[k / SCM_BITS_PER_LONG] |= 1L << (k % SCM_BITS_PER_LONG);
610 }
611 }
612 return ret;
613 }
614 #undef FUNC_NAME
615
616
617 SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0,
618 (SCM pred, SCM cs, SCM base_cs),
619 "Return a character set containing every character from @var{cs}\n"
620 "so that it satisfies @var{pred}. The characters are added to\n"
621 "@var{base_cs} and @var{base_cs} is returned.")
622 #define FUNC_NAME s_scm_char_set_filter_x
623 {
624 int k;
625 long * p;
626
627 SCM_VALIDATE_PROC (1, pred);
628 SCM_VALIDATE_SMOB (2, cs, charset);
629 SCM_VALIDATE_SMOB (3, base_cs, charset);
630 p = (long *) SCM_SMOB_DATA (base_cs);
631 for (k = 0; k < SCM_CHARSET_SIZE; k++)
632 {
633 if (SCM_CHARSET_GET (cs, k))
634 {
635 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
636
637 if (scm_is_true (res))
638 p[k / SCM_BITS_PER_LONG] |= 1L << (k % SCM_BITS_PER_LONG);
639 }
640 }
641 return base_cs;
642 }
643 #undef FUNC_NAME
644
645
646 SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0,
647 (SCM lower, SCM upper, SCM error, SCM base_cs),
648 "Return a character set containing all characters whose\n"
649 "character codes lie in the half-open range\n"
650 "[@var{lower},@var{upper}).\n"
651 "\n"
652 "If @var{error} is a true value, an error is signalled if the\n"
653 "specified range contains characters which are not contained in\n"
654 "the implemented character range. If @var{error} is @code{#f},\n"
655 "these characters are silently left out of the resultung\n"
656 "character set.\n"
657 "\n"
658 "The characters in @var{base_cs} are added to the result, if\n"
659 "given.")
660 #define FUNC_NAME s_scm_ucs_range_to_char_set
661 {
662 SCM cs;
663 size_t clower, cupper;
664 long * p;
665
666 clower = scm_to_size_t (lower);
667 cupper = scm_to_size_t (upper);
668 SCM_ASSERT_RANGE (2, upper, cupper >= clower);
669 if (!SCM_UNBNDP (error))
670 {
671 if (scm_is_true (error))
672 {
673 SCM_ASSERT_RANGE (1, lower, clower <= SCM_CHARSET_SIZE);
674 SCM_ASSERT_RANGE (2, upper, cupper <= SCM_CHARSET_SIZE);
675 }
676 }
677 if (clower > SCM_CHARSET_SIZE)
678 clower = SCM_CHARSET_SIZE;
679 if (cupper > SCM_CHARSET_SIZE)
680 cupper = SCM_CHARSET_SIZE;
681 if (SCM_UNBNDP (base_cs))
682 cs = make_char_set (FUNC_NAME);
683 else
684 {
685 SCM_VALIDATE_SMOB (4, base_cs, charset);
686 cs = scm_char_set_copy (base_cs);
687 }
688 p = (long *) SCM_SMOB_DATA (cs);
689 while (clower < cupper)
690 {
691 p[clower / SCM_BITS_PER_LONG] |= 1L << (clower % SCM_BITS_PER_LONG);
692 clower++;
693 }
694 return cs;
695 }
696 #undef FUNC_NAME
697
698
699 SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0,
700 (SCM lower, SCM upper, SCM error, SCM base_cs),
701 "Return a character set containing all characters whose\n"
702 "character codes lie in the half-open range\n"
703 "[@var{lower},@var{upper}).\n"
704 "\n"
705 "If @var{error} is a true value, an error is signalled if the\n"
706 "specified range contains characters which are not contained in\n"
707 "the implemented character range. If @var{error} is @code{#f},\n"
708 "these characters are silently left out of the resultung\n"
709 "character set.\n"
710 "\n"
711 "The characters are added to @var{base_cs} and @var{base_cs} is\n"
712 "returned.")
713 #define FUNC_NAME s_scm_ucs_range_to_char_set_x
714 {
715 size_t clower, cupper;
716 long * p;
717
718 clower = scm_to_size_t (lower);
719 cupper = scm_to_size_t (upper);
720 SCM_ASSERT_RANGE (2, upper, cupper >= clower);
721 if (scm_is_true (error))
722 {
723 SCM_ASSERT_RANGE (1, lower, clower <= SCM_CHARSET_SIZE);
724 SCM_ASSERT_RANGE (2, upper, cupper <= SCM_CHARSET_SIZE);
725 }
726 if (clower > SCM_CHARSET_SIZE)
727 clower = SCM_CHARSET_SIZE;
728 if (cupper > SCM_CHARSET_SIZE)
729 cupper = SCM_CHARSET_SIZE;
730 p = (long *) SCM_SMOB_DATA (base_cs);
731 while (clower < cupper)
732 {
733 p[clower / SCM_BITS_PER_LONG] |= 1L << (clower % SCM_BITS_PER_LONG);
734 clower++;
735 }
736 return base_cs;
737 }
738 #undef FUNC_NAME
739
740 SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0,
741 (SCM x),
742 "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.")
743 #define FUNC_NAME s_scm_to_char_set
744 {
745 if (scm_is_string (x))
746 return scm_string_to_char_set (x, SCM_UNDEFINED);
747 else if (SCM_CHARP (x))
748 return scm_char_set (scm_list_1 (x));
749 else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x))
750 return x;
751 else
752 scm_wrong_type_arg (NULL, 0, x);
753 }
754 #undef FUNC_NAME
755
756 SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0,
757 (SCM cs),
758 "Return the number of elements in character set @var{cs}.")
759 #define FUNC_NAME s_scm_char_set_size
760 {
761 int k, count = 0;
762
763 SCM_VALIDATE_SMOB (1, cs, charset);
764 for (k = 0; k < SCM_CHARSET_SIZE; k++)
765 if (SCM_CHARSET_GET (cs, k))
766 count++;
767 return SCM_I_MAKINUM (count);
768 }
769 #undef FUNC_NAME
770
771
772 SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0,
773 (SCM pred, SCM cs),
774 "Return the number of the elements int the character set\n"
775 "@var{cs} which satisfy the predicate @var{pred}.")
776 #define FUNC_NAME s_scm_char_set_count
777 {
778 int k, count = 0;
779
780 SCM_VALIDATE_PROC (1, pred);
781 SCM_VALIDATE_SMOB (2, cs, charset);
782
783 for (k = 0; k < SCM_CHARSET_SIZE; k++)
784 if (SCM_CHARSET_GET (cs, k))
785 {
786 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
787 if (scm_is_true (res))
788 count++;
789 }
790 return SCM_I_MAKINUM (count);
791 }
792 #undef FUNC_NAME
793
794
795 SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0,
796 (SCM cs),
797 "Return a list containing the elements of the character set\n"
798 "@var{cs}.")
799 #define FUNC_NAME s_scm_char_set_to_list
800 {
801 int k;
802 SCM result = SCM_EOL;
803
804 SCM_VALIDATE_SMOB (1, cs, charset);
805 for (k = SCM_CHARSET_SIZE; k > 0; k--)
806 if (SCM_CHARSET_GET (cs, k - 1))
807 result = scm_cons (SCM_MAKE_CHAR (k - 1), result);
808 return result;
809 }
810 #undef FUNC_NAME
811
812
813 SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0,
814 (SCM cs),
815 "Return a string containing the elements of the character set\n"
816 "@var{cs}. The order in which the characters are placed in the\n"
817 "string is not defined.")
818 #define FUNC_NAME s_scm_char_set_to_string
819 {
820 int k;
821 int count = 0;
822 int idx = 0;
823 SCM result;
824 char * p;
825
826 SCM_VALIDATE_SMOB (1, cs, charset);
827 for (k = 0; k < SCM_CHARSET_SIZE; k++)
828 if (SCM_CHARSET_GET (cs, k))
829 count++;
830 result = scm_i_make_string (count, &p);
831 for (k = 0; k < SCM_CHARSET_SIZE; k++)
832 if (SCM_CHARSET_GET (cs, k))
833 p[idx++] = k;
834 return result;
835 }
836 #undef FUNC_NAME
837
838
839 SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0,
840 (SCM cs, SCM ch),
841 "Return @code{#t} iff the character @var{ch} is contained in the\n"
842 "character set @var{cs}.")
843 #define FUNC_NAME s_scm_char_set_contains_p
844 {
845 SCM_VALIDATE_SMOB (1, cs, charset);
846 SCM_VALIDATE_CHAR (2, ch);
847 return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch)));
848 }
849 #undef FUNC_NAME
850
851
852 SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0,
853 (SCM pred, SCM cs),
854 "Return a true value if every character in the character set\n"
855 "@var{cs} satisfies the predicate @var{pred}.")
856 #define FUNC_NAME s_scm_char_set_every
857 {
858 int k;
859 SCM res = SCM_BOOL_T;
860
861 SCM_VALIDATE_PROC (1, pred);
862 SCM_VALIDATE_SMOB (2, cs, charset);
863
864 for (k = 0; k < SCM_CHARSET_SIZE; k++)
865 if (SCM_CHARSET_GET (cs, k))
866 {
867 res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
868 if (scm_is_false (res))
869 return res;
870 }
871 return res;
872 }
873 #undef FUNC_NAME
874
875
876 SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0,
877 (SCM pred, SCM cs),
878 "Return a true value if any character in the character set\n"
879 "@var{cs} satisfies the predicate @var{pred}.")
880 #define FUNC_NAME s_scm_char_set_any
881 {
882 int k;
883
884 SCM_VALIDATE_PROC (1, pred);
885 SCM_VALIDATE_SMOB (2, cs, charset);
886
887 for (k = 0; k < SCM_CHARSET_SIZE; k++)
888 if (SCM_CHARSET_GET (cs, k))
889 {
890 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
891 if (scm_is_true (res))
892 return res;
893 }
894 return SCM_BOOL_F;
895 }
896 #undef FUNC_NAME
897
898
899 SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1,
900 (SCM cs, SCM rest),
901 "Add all character arguments to the first argument, which must\n"
902 "be a character set.")
903 #define FUNC_NAME s_scm_char_set_adjoin
904 {
905 long * p;
906
907 SCM_VALIDATE_SMOB (1, cs, charset);
908 SCM_VALIDATE_REST_ARGUMENT (rest);
909 cs = scm_char_set_copy (cs);
910
911 p = (long *) SCM_SMOB_DATA (cs);
912 while (!scm_is_null (rest))
913 {
914 SCM chr = SCM_CAR (rest);
915 int c;
916
917 SCM_VALIDATE_CHAR_COPY (1, chr, c);
918 rest = SCM_CDR (rest);
919
920 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
921 }
922 return cs;
923 }
924 #undef FUNC_NAME
925
926
927 SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1,
928 (SCM cs, SCM rest),
929 "Delete all character arguments from the first argument, which\n"
930 "must be a character set.")
931 #define FUNC_NAME s_scm_char_set_delete
932 {
933 long * p;
934
935 SCM_VALIDATE_SMOB (1, cs, charset);
936 SCM_VALIDATE_REST_ARGUMENT (rest);
937 cs = scm_char_set_copy (cs);
938
939 p = (long *) SCM_SMOB_DATA (cs);
940 while (!scm_is_null (rest))
941 {
942 SCM chr = SCM_CAR (rest);
943 int c;
944
945 SCM_VALIDATE_CHAR_COPY (1, chr, c);
946 rest = SCM_CDR (rest);
947
948 p[c / SCM_BITS_PER_LONG] &= ~(1L << (c % SCM_BITS_PER_LONG));
949 }
950 return cs;
951 }
952 #undef FUNC_NAME
953
954
955 SCM_DEFINE (scm_char_set_adjoin_x, "char-set-adjoin!", 1, 0, 1,
956 (SCM cs, SCM rest),
957 "Add all character arguments to the first argument, which must\n"
958 "be a character set.")
959 #define FUNC_NAME s_scm_char_set_adjoin_x
960 {
961 long * p;
962
963 SCM_VALIDATE_SMOB (1, cs, charset);
964 SCM_VALIDATE_REST_ARGUMENT (rest);
965
966 p = (long *) SCM_SMOB_DATA (cs);
967 while (!scm_is_null (rest))
968 {
969 SCM chr = SCM_CAR (rest);
970 int c;
971
972 SCM_VALIDATE_CHAR_COPY (1, chr, c);
973 rest = SCM_CDR (rest);
974
975 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
976 }
977 return cs;
978 }
979 #undef FUNC_NAME
980
981
982 SCM_DEFINE (scm_char_set_delete_x, "char-set-delete!", 1, 0, 1,
983 (SCM cs, SCM rest),
984 "Delete all character arguments from the first argument, which\n"
985 "must be a character set.")
986 #define FUNC_NAME s_scm_char_set_delete_x
987 {
988 long * p;
989
990 SCM_VALIDATE_SMOB (1, cs, charset);
991 SCM_VALIDATE_REST_ARGUMENT (rest);
992
993 p = (long *) SCM_SMOB_DATA (cs);
994 while (!scm_is_null (rest))
995 {
996 SCM chr = SCM_CAR (rest);
997 int c;
998
999 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1000 rest = SCM_CDR (rest);
1001
1002 p[c / SCM_BITS_PER_LONG] &= ~(1L << (c % SCM_BITS_PER_LONG));
1003 }
1004 return cs;
1005 }
1006 #undef FUNC_NAME
1007
1008
1009 SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0,
1010 (SCM cs),
1011 "Return the complement of the character set @var{cs}.")
1012 #define FUNC_NAME s_scm_char_set_complement
1013 {
1014 int k;
1015 SCM res;
1016 long * p, * q;
1017
1018 SCM_VALIDATE_SMOB (1, cs, charset);
1019
1020 res = make_char_set (FUNC_NAME);
1021 p = (long *) SCM_SMOB_DATA (res);
1022 q = (long *) SCM_SMOB_DATA (cs);
1023 for (k = 0; k < LONGS_PER_CHARSET; k++)
1024 p[k] = ~q[k];
1025 return res;
1026 }
1027 #undef FUNC_NAME
1028
1029
1030 SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1,
1031 (SCM rest),
1032 "Return the union of all argument character sets.")
1033 #define FUNC_NAME s_scm_char_set_union
1034 {
1035 int c = 1;
1036 SCM res;
1037 long * p;
1038
1039 SCM_VALIDATE_REST_ARGUMENT (rest);
1040
1041 res = make_char_set (FUNC_NAME);
1042 p = (long *) SCM_SMOB_DATA (res);
1043 while (!scm_is_null (rest))
1044 {
1045 int k;
1046 SCM cs = SCM_CAR (rest);
1047 SCM_VALIDATE_SMOB (c, cs, charset);
1048 c++;
1049 rest = SCM_CDR (rest);
1050
1051 for (k = 0; k < LONGS_PER_CHARSET; k++)
1052 p[k] |= ((long *) SCM_SMOB_DATA (cs))[k];
1053 }
1054 return res;
1055 }
1056 #undef FUNC_NAME
1057
1058
1059 SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1,
1060 (SCM rest),
1061 "Return the intersection of all argument character sets.")
1062 #define FUNC_NAME s_scm_char_set_intersection
1063 {
1064 SCM res;
1065
1066 SCM_VALIDATE_REST_ARGUMENT (rest);
1067
1068 if (scm_is_null (rest))
1069 res = make_char_set (FUNC_NAME);
1070 else
1071 {
1072 long *p;
1073 int argnum = 2;
1074
1075 res = scm_char_set_copy (SCM_CAR (rest));
1076 p = (long *) SCM_SMOB_DATA (res);
1077 rest = SCM_CDR (rest);
1078
1079 while (scm_is_pair (rest))
1080 {
1081 int k;
1082 SCM cs = SCM_CAR (rest);
1083 long *cs_data;
1084
1085 SCM_VALIDATE_SMOB (argnum, cs, charset);
1086 argnum++;
1087 cs_data = (long *) SCM_SMOB_DATA (cs);
1088 rest = SCM_CDR (rest);
1089 for (k = 0; k < LONGS_PER_CHARSET; k++)
1090 p[k] &= cs_data[k];
1091 }
1092 }
1093
1094 return res;
1095 }
1096 #undef FUNC_NAME
1097
1098
1099 SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1,
1100 (SCM cs1, SCM rest),
1101 "Return the difference of all argument character sets.")
1102 #define FUNC_NAME s_scm_char_set_difference
1103 {
1104 int c = 2;
1105 SCM res;
1106 long * p;
1107
1108 SCM_VALIDATE_SMOB (1, cs1, charset);
1109 SCM_VALIDATE_REST_ARGUMENT (rest);
1110
1111 res = scm_char_set_copy (cs1);
1112 p = (long *) SCM_SMOB_DATA (res);
1113 while (!scm_is_null (rest))
1114 {
1115 int k;
1116 SCM cs = SCM_CAR (rest);
1117 SCM_VALIDATE_SMOB (c, cs, charset);
1118 c++;
1119 rest = SCM_CDR (rest);
1120
1121 for (k = 0; k < LONGS_PER_CHARSET; k++)
1122 p[k] &= ~((long *) SCM_SMOB_DATA (cs))[k];
1123 }
1124 return res;
1125 }
1126 #undef FUNC_NAME
1127
1128
1129 SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1,
1130 (SCM rest),
1131 "Return the exclusive-or of all argument character sets.")
1132 #define FUNC_NAME s_scm_char_set_xor
1133 {
1134 SCM res;
1135
1136 SCM_VALIDATE_REST_ARGUMENT (rest);
1137
1138 if (scm_is_null (rest))
1139 res = make_char_set (FUNC_NAME);
1140 else
1141 {
1142 int argnum = 2;
1143 long * p;
1144
1145 res = scm_char_set_copy (SCM_CAR (rest));
1146 p = (long *) SCM_SMOB_DATA (res);
1147 rest = SCM_CDR (rest);
1148
1149 while (scm_is_pair (rest))
1150 {
1151 SCM cs = SCM_CAR (rest);
1152 long *cs_data;
1153 int k;
1154
1155 SCM_VALIDATE_SMOB (argnum, cs, charset);
1156 argnum++;
1157 cs_data = (long *) SCM_SMOB_DATA (cs);
1158 rest = SCM_CDR (rest);
1159
1160 for (k = 0; k < LONGS_PER_CHARSET; k++)
1161 p[k] ^= cs_data[k];
1162 }
1163 }
1164 return res;
1165 }
1166 #undef FUNC_NAME
1167
1168
1169 SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1, 0, 1,
1170 (SCM cs1, SCM rest),
1171 "Return the difference and the intersection of all argument\n"
1172 "character sets.")
1173 #define FUNC_NAME s_scm_char_set_diff_plus_intersection
1174 {
1175 int c = 2;
1176 SCM res1, res2;
1177 long * p, * q;
1178
1179 SCM_VALIDATE_SMOB (1, cs1, charset);
1180 SCM_VALIDATE_REST_ARGUMENT (rest);
1181
1182 res1 = scm_char_set_copy (cs1);
1183 res2 = make_char_set (FUNC_NAME);
1184 p = (long *) SCM_SMOB_DATA (res1);
1185 q = (long *) SCM_SMOB_DATA (res2);
1186 while (!scm_is_null (rest))
1187 {
1188 int k;
1189 SCM cs = SCM_CAR (rest);
1190 long *r;
1191
1192 SCM_VALIDATE_SMOB (c, cs, charset);
1193 c++;
1194 r = (long *) SCM_SMOB_DATA (cs);
1195
1196 for (k = 0; k < LONGS_PER_CHARSET; k++)
1197 {
1198 q[k] |= p[k] & r[k];
1199 p[k] &= ~r[k];
1200 }
1201 rest = SCM_CDR (rest);
1202 }
1203 return scm_values (scm_list_2 (res1, res2));
1204 }
1205 #undef FUNC_NAME
1206
1207
1208 SCM_DEFINE (scm_char_set_complement_x, "char-set-complement!", 1, 0, 0,
1209 (SCM cs),
1210 "Return the complement of the character set @var{cs}.")
1211 #define FUNC_NAME s_scm_char_set_complement_x
1212 {
1213 int k;
1214 long * p;
1215
1216 SCM_VALIDATE_SMOB (1, cs, charset);
1217 p = (long *) SCM_SMOB_DATA (cs);
1218 for (k = 0; k < LONGS_PER_CHARSET; k++)
1219 p[k] = ~p[k];
1220 return cs;
1221 }
1222 #undef FUNC_NAME
1223
1224
1225 SCM_DEFINE (scm_char_set_union_x, "char-set-union!", 1, 0, 1,
1226 (SCM cs1, SCM rest),
1227 "Return the union of all argument character sets.")
1228 #define FUNC_NAME s_scm_char_set_union_x
1229 {
1230 int c = 2;
1231 long * p;
1232
1233 SCM_VALIDATE_SMOB (1, cs1, charset);
1234 SCM_VALIDATE_REST_ARGUMENT (rest);
1235
1236 p = (long *) SCM_SMOB_DATA (cs1);
1237 while (!scm_is_null (rest))
1238 {
1239 int k;
1240 SCM cs = SCM_CAR (rest);
1241 SCM_VALIDATE_SMOB (c, cs, charset);
1242 c++;
1243 rest = SCM_CDR (rest);
1244
1245 for (k = 0; k < LONGS_PER_CHARSET; k++)
1246 p[k] |= ((long *) SCM_SMOB_DATA (cs))[k];
1247 }
1248 return cs1;
1249 }
1250 #undef FUNC_NAME
1251
1252
1253 SCM_DEFINE (scm_char_set_intersection_x, "char-set-intersection!", 1, 0, 1,
1254 (SCM cs1, SCM rest),
1255 "Return the intersection of all argument character sets.")
1256 #define FUNC_NAME s_scm_char_set_intersection_x
1257 {
1258 int c = 2;
1259 long * p;
1260
1261 SCM_VALIDATE_SMOB (1, cs1, charset);
1262 SCM_VALIDATE_REST_ARGUMENT (rest);
1263
1264 p = (long *) SCM_SMOB_DATA (cs1);
1265 while (!scm_is_null (rest))
1266 {
1267 int k;
1268 SCM cs = SCM_CAR (rest);
1269 SCM_VALIDATE_SMOB (c, cs, charset);
1270 c++;
1271 rest = SCM_CDR (rest);
1272
1273 for (k = 0; k < LONGS_PER_CHARSET; k++)
1274 p[k] &= ((long *) SCM_SMOB_DATA (cs))[k];
1275 }
1276 return cs1;
1277 }
1278 #undef FUNC_NAME
1279
1280
1281 SCM_DEFINE (scm_char_set_difference_x, "char-set-difference!", 1, 0, 1,
1282 (SCM cs1, SCM rest),
1283 "Return the difference of all argument character sets.")
1284 #define FUNC_NAME s_scm_char_set_difference_x
1285 {
1286 int c = 2;
1287 long * p;
1288
1289 SCM_VALIDATE_SMOB (1, cs1, charset);
1290 SCM_VALIDATE_REST_ARGUMENT (rest);
1291
1292 p = (long *) SCM_SMOB_DATA (cs1);
1293 while (!scm_is_null (rest))
1294 {
1295 int k;
1296 SCM cs = SCM_CAR (rest);
1297 SCM_VALIDATE_SMOB (c, cs, charset);
1298 c++;
1299 rest = SCM_CDR (rest);
1300
1301 for (k = 0; k < LONGS_PER_CHARSET; k++)
1302 p[k] &= ~((long *) SCM_SMOB_DATA (cs))[k];
1303 }
1304 return cs1;
1305 }
1306 #undef FUNC_NAME
1307
1308
1309 SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1,
1310 (SCM cs1, SCM rest),
1311 "Return the exclusive-or of all argument character sets.")
1312 #define FUNC_NAME s_scm_char_set_xor_x
1313 {
1314 /* a side-effecting variant should presumably give consistent results:
1315 (define a (char-set #\a))
1316 (char-set-xor a a a) -> char set #\a
1317 (char-set-xor! a a a) -> char set #\a
1318 */
1319 return scm_char_set_xor (scm_cons (cs1, rest));
1320
1321 #if 0
1322 /* this would give (char-set-xor! a a a) -> empty char set. */
1323 int c = 2;
1324 long * p;
1325
1326 SCM_VALIDATE_SMOB (1, cs1, charset);
1327 SCM_VALIDATE_REST_ARGUMENT (rest);
1328
1329 p = (long *) SCM_SMOB_DATA (cs1);
1330 while (!scm_is_null (rest))
1331 {
1332 int k;
1333 SCM cs = SCM_CAR (rest);
1334 SCM_VALIDATE_SMOB (c, cs, charset);
1335 c++;
1336 rest = SCM_CDR (rest);
1337
1338 for (k = 0; k < LONGS_PER_CHARSET; k++)
1339 p[k] ^= ((long *) SCM_SMOB_DATA (cs))[k];
1340 }
1341 return cs1;
1342 #endif
1343 }
1344 #undef FUNC_NAME
1345
1346
1347 SCM_DEFINE (scm_char_set_diff_plus_intersection_x, "char-set-diff+intersection!", 2, 0, 1,
1348 (SCM cs1, SCM cs2, SCM rest),
1349 "Return the difference and the intersection of all argument\n"
1350 "character sets.")
1351 #define FUNC_NAME s_scm_char_set_diff_plus_intersection_x
1352 {
1353 int c = 3;
1354 long * p, * q;
1355 int k;
1356
1357 SCM_VALIDATE_SMOB (1, cs1, charset);
1358 SCM_VALIDATE_SMOB (2, cs2, charset);
1359 SCM_VALIDATE_REST_ARGUMENT (rest);
1360
1361 p = (long *) SCM_SMOB_DATA (cs1);
1362 q = (long *) SCM_SMOB_DATA (cs2);
1363 if (p == q)
1364 {
1365 /* (char-set-diff+intersection! a a ...): can't share storage,
1366 but we know the answer without checking for further
1367 arguments. */
1368 return scm_values (scm_list_2 (make_char_set (FUNC_NAME), cs1));
1369 }
1370 for (k = 0; k < LONGS_PER_CHARSET; k++)
1371 {
1372 long t = p[k];
1373
1374 p[k] &= ~q[k];
1375 q[k] = t & q[k];
1376 }
1377 while (!scm_is_null (rest))
1378 {
1379 SCM cs = SCM_CAR (rest);
1380 long *r;
1381
1382 SCM_VALIDATE_SMOB (c, cs, charset);
1383 c++;
1384 r = (long *) SCM_SMOB_DATA (cs);
1385
1386 for (k = 0; k < LONGS_PER_CHARSET; k++)
1387 {
1388 q[k] |= p[k] & r[k];
1389 p[k] &= ~r[k];
1390 }
1391 rest = SCM_CDR (rest);
1392 }
1393 return scm_values (scm_list_2 (cs1, cs2));
1394 }
1395 #undef FUNC_NAME
1396
1397 \f
1398 /* Standard character sets. */
1399
1400 SCM scm_char_set_lower_case;
1401 SCM scm_char_set_upper_case;
1402 SCM scm_char_set_title_case;
1403 SCM scm_char_set_letter;
1404 SCM scm_char_set_digit;
1405 SCM scm_char_set_letter_and_digit;
1406 SCM scm_char_set_graphic;
1407 SCM scm_char_set_printing;
1408 SCM scm_char_set_whitespace;
1409 SCM scm_char_set_iso_control;
1410 SCM scm_char_set_punctuation;
1411 SCM scm_char_set_symbol;
1412 SCM scm_char_set_hex_digit;
1413 SCM scm_char_set_blank;
1414 SCM scm_char_set_ascii;
1415 SCM scm_char_set_empty;
1416 SCM scm_char_set_full;
1417
1418
1419 /* Create an empty character set and return it after binding it to NAME. */
1420 static inline SCM
1421 define_charset (const char *name)
1422 {
1423 SCM cs = make_char_set (NULL);
1424 scm_c_define (name, cs);
1425 return scm_permanent_object (cs);
1426 }
1427
1428 /* Membership predicates for the various char sets.
1429
1430 XXX: The `punctuation' and `symbol' char sets have no direct equivalent in
1431 <ctype.h>. Thus, the predicates below yield correct results for ASCII,
1432 but they do not provide the result described by the SRFI for Latin-1. The
1433 correct Latin-1 result could only be obtained by hard-coding the
1434 characters listed by the SRFI, but the problem would remain for other
1435 8-bit charsets.
1436
1437 Similarly, character 0xA0 in Latin-1 (unbreakable space, `#\0240') should
1438 be part of `char-set:blank'. However, glibc's current (2006/09) Latin-1
1439 locales (which use the ISO 14652 "i18n" FDCC-set) do not consider it
1440 `blank' so it ends up in `char-set:punctuation'. */
1441 #ifdef HAVE_ISBLANK
1442 # define CSET_BLANK_PRED(c) (isblank (c))
1443 #else
1444 # define CSET_BLANK_PRED(c) \
1445 (((c) == ' ') || ((c) == '\t'))
1446 #endif
1447
1448 #define CSET_SYMBOL_PRED(c) \
1449 (((c) != '\0') && (strchr ("$+<=>^`|~", (c)) != NULL))
1450 #define CSET_PUNCT_PRED(c) \
1451 ((ispunct (c)) && (!CSET_SYMBOL_PRED (c)))
1452
1453 #define CSET_LOWER_PRED(c) (islower (c))
1454 #define CSET_UPPER_PRED(c) (isupper (c))
1455 #define CSET_LETTER_PRED(c) (isalpha (c))
1456 #define CSET_DIGIT_PRED(c) (isdigit (c))
1457 #define CSET_WHITESPACE_PRED(c) (isspace (c))
1458 #define CSET_CONTROL_PRED(c) (iscntrl (c))
1459 #define CSET_HEX_DIGIT_PRED(c) (isxdigit (c))
1460 #define CSET_ASCII_PRED(c) (isascii (c))
1461
1462 /* Some char sets are explicitly defined by the SRFI as a union of other char
1463 sets so we try to follow this closely. */
1464
1465 #define CSET_LETTER_AND_DIGIT_PRED(c) \
1466 (CSET_LETTER_PRED (c) || CSET_DIGIT_PRED (c))
1467
1468 #define CSET_GRAPHIC_PRED(c) \
1469 (CSET_LETTER_PRED (c) || CSET_DIGIT_PRED (c) \
1470 || CSET_PUNCT_PRED (c) || CSET_SYMBOL_PRED (c))
1471
1472 #define CSET_PRINTING_PRED(c) \
1473 (CSET_GRAPHIC_PRED (c) || CSET_WHITESPACE_PRED (c))
1474
1475 /* False and true predicates. */
1476 #define CSET_TRUE_PRED(c) (1)
1477 #define CSET_FALSE_PRED(c) (0)
1478
1479
1480 /* Compute the contents of all the standard character sets. Computation may
1481 need to be re-done at `setlocale'-time because some char sets (e.g.,
1482 `char-set:letter') need to reflect the character set supported by Guile.
1483
1484 For instance, at startup time, the "C" locale is used, thus Guile supports
1485 only ASCII; therefore, `char-set:letter' only contains English letters.
1486 The user can change this by invoking `setlocale' and specifying a locale
1487 with an 8-bit charset, thereby augmenting some of the SRFI-14 standard
1488 character sets.
1489
1490 This works because some of the predicates used below to construct
1491 character sets (e.g., `isalpha(3)') are locale-dependent (so
1492 charset-dependent, though generally not language-dependent). For details,
1493 please see the `guile-devel' mailing list archive of September 2006. */
1494 void
1495 scm_srfi_14_compute_char_sets (void)
1496 {
1497 #define UPDATE_CSET(c, cset, pred) \
1498 do \
1499 { \
1500 if (pred (c)) \
1501 SCM_CHARSET_SET ((cset), (c)); \
1502 else \
1503 SCM_CHARSET_UNSET ((cset), (c)); \
1504 } \
1505 while (0)
1506
1507 register int ch;
1508
1509 for (ch = 0; ch < 256; ch++)
1510 {
1511 UPDATE_CSET (ch, scm_char_set_upper_case, CSET_UPPER_PRED);
1512 UPDATE_CSET (ch, scm_char_set_lower_case, CSET_LOWER_PRED);
1513 UPDATE_CSET (ch, scm_char_set_title_case, CSET_FALSE_PRED);
1514 UPDATE_CSET (ch, scm_char_set_letter, CSET_LETTER_PRED);
1515 UPDATE_CSET (ch, scm_char_set_digit, CSET_DIGIT_PRED);
1516 UPDATE_CSET (ch, scm_char_set_letter_and_digit,
1517 CSET_LETTER_AND_DIGIT_PRED);
1518 UPDATE_CSET (ch, scm_char_set_graphic, CSET_GRAPHIC_PRED);
1519 UPDATE_CSET (ch, scm_char_set_printing, CSET_PRINTING_PRED);
1520 UPDATE_CSET (ch, scm_char_set_whitespace, CSET_WHITESPACE_PRED);
1521 UPDATE_CSET (ch, scm_char_set_iso_control, CSET_CONTROL_PRED);
1522 UPDATE_CSET (ch, scm_char_set_punctuation, CSET_PUNCT_PRED);
1523 UPDATE_CSET (ch, scm_char_set_symbol, CSET_SYMBOL_PRED);
1524 UPDATE_CSET (ch, scm_char_set_hex_digit, CSET_HEX_DIGIT_PRED);
1525 UPDATE_CSET (ch, scm_char_set_blank, CSET_BLANK_PRED);
1526 UPDATE_CSET (ch, scm_char_set_ascii, CSET_ASCII_PRED);
1527 UPDATE_CSET (ch, scm_char_set_empty, CSET_FALSE_PRED);
1528 UPDATE_CSET (ch, scm_char_set_full, CSET_TRUE_PRED);
1529 }
1530
1531 #undef UPDATE_CSET
1532 }
1533
1534 \f
1535 void
1536 scm_init_srfi_14 (void)
1537 {
1538 scm_tc16_charset = scm_make_smob_type ("character-set",
1539 BYTES_PER_CHARSET);
1540 scm_set_smob_print (scm_tc16_charset, charset_print);
1541
1542 scm_char_set_upper_case = define_charset ("char-set:upper-case");
1543 scm_char_set_lower_case = define_charset ("char-set:lower-case");
1544 scm_char_set_title_case = define_charset ("char-set:title-case");
1545 scm_char_set_letter = define_charset ("char-set:letter");
1546 scm_char_set_digit = define_charset ("char-set:digit");
1547 scm_char_set_letter_and_digit = define_charset ("char-set:letter+digit");
1548 scm_char_set_graphic = define_charset ("char-set:graphic");
1549 scm_char_set_printing = define_charset ("char-set:printing");
1550 scm_char_set_whitespace = define_charset ("char-set:whitespace");
1551 scm_char_set_iso_control = define_charset ("char-set:iso-control");
1552 scm_char_set_punctuation = define_charset ("char-set:punctuation");
1553 scm_char_set_symbol = define_charset ("char-set:symbol");
1554 scm_char_set_hex_digit = define_charset ("char-set:hex-digit");
1555 scm_char_set_blank = define_charset ("char-set:blank");
1556 scm_char_set_ascii = define_charset ("char-set:ascii");
1557 scm_char_set_empty = define_charset ("char-set:empty");
1558 scm_char_set_full = define_charset ("char-set:full");
1559
1560 scm_srfi_14_compute_char_sets ();
1561
1562 #include "libguile/srfi-14.x"
1563 }
1564
1565 /* End of srfi-14.c. */