Merge branch 'master' of git://git.savannah.gnu.org/guile into elisp
[bpt/guile.git] / libguile / srfi-14.c
1 /* srfi-14.c --- SRFI-14 procedures for Guile
2 *
3 * Copyright (C) 2001, 2004, 2006, 2007 Free Software Foundation, Inc.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public License
7 * as published by the Free Software Foundation; either version 3 of
8 * the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301 USA
19 */
20
21 #ifdef HAVE_CONFIG_H
22 # include <config.h>
23 #endif
24
25
26 #include <string.h>
27 #include <ctype.h>
28
29 #include "libguile.h"
30 #include "libguile/srfi-14.h"
31
32
33 #define SCM_CHARSET_SET(cs, idx) \
34 (((long *) SCM_SMOB_DATA (cs))[(idx) / SCM_BITS_PER_LONG] |= \
35 (1L << ((idx) % SCM_BITS_PER_LONG)))
36
37 #define SCM_CHARSET_UNSET(cs, idx) \
38 (((long *) SCM_SMOB_DATA (cs))[(idx) / SCM_BITS_PER_LONG] &= \
39 (~(1L << ((idx) % SCM_BITS_PER_LONG))))
40
41 #define BYTES_PER_CHARSET (SCM_CHARSET_SIZE / 8)
42 #define LONGS_PER_CHARSET (SCM_CHARSET_SIZE / SCM_BITS_PER_LONG)
43
44
45 /* Smob type code for character sets. */
46 int scm_tc16_charset = 0;
47
48
49 /* Smob print hook for character sets. */
50 static int
51 charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED)
52 {
53 int i;
54 int first = 1;
55
56 scm_puts ("#<charset {", port);
57 for (i = 0; i < SCM_CHARSET_SIZE; i++)
58 if (SCM_CHARSET_GET (charset, i))
59 {
60 if (first)
61 first = 0;
62 else
63 scm_puts (" ", port);
64 scm_write (SCM_MAKE_CHAR (i), port);
65 }
66 scm_puts ("}>", port);
67 return 1;
68 }
69
70
71 /* Smob free hook for character sets. */
72 static size_t
73 charset_free (SCM charset)
74 {
75 return scm_smob_free (charset);
76 }
77
78
79 /* Create a new, empty character set. */
80 static SCM
81 make_char_set (const char * func_name)
82 {
83 long * p;
84
85 p = scm_gc_malloc (BYTES_PER_CHARSET, "character-set");
86 memset (p, 0, BYTES_PER_CHARSET);
87 SCM_RETURN_NEWSMOB (scm_tc16_charset, p);
88 }
89
90
91 SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0,
92 (SCM obj),
93 "Return @code{#t} if @var{obj} is a character set, @code{#f}\n"
94 "otherwise.")
95 #define FUNC_NAME s_scm_char_set_p
96 {
97 return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj));
98 }
99 #undef FUNC_NAME
100
101
102 SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1,
103 (SCM char_sets),
104 "Return @code{#t} if all given character sets are equal.")
105 #define FUNC_NAME s_scm_char_set_eq
106 {
107 int argnum = 1;
108 long *cs1_data = NULL;
109
110 SCM_VALIDATE_REST_ARGUMENT (char_sets);
111
112 while (!scm_is_null (char_sets))
113 {
114 SCM csi = SCM_CAR (char_sets);
115 long *csi_data;
116
117 SCM_VALIDATE_SMOB (argnum, csi, charset);
118 argnum++;
119 csi_data = (long *) SCM_SMOB_DATA (csi);
120 if (cs1_data == NULL)
121 cs1_data = csi_data;
122 else if (memcmp (cs1_data, csi_data, BYTES_PER_CHARSET) != 0)
123 return SCM_BOOL_F;
124 char_sets = SCM_CDR (char_sets);
125 }
126 return SCM_BOOL_T;
127 }
128 #undef FUNC_NAME
129
130
131 SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1,
132 (SCM char_sets),
133 "Return @code{#t} if every character set @var{cs}i is a subset\n"
134 "of character set @var{cs}i+1.")
135 #define FUNC_NAME s_scm_char_set_leq
136 {
137 int argnum = 1;
138 long *prev_data = NULL;
139
140 SCM_VALIDATE_REST_ARGUMENT (char_sets);
141
142 while (!scm_is_null (char_sets))
143 {
144 SCM csi = SCM_CAR (char_sets);
145 long *csi_data;
146
147 SCM_VALIDATE_SMOB (argnum, csi, charset);
148 argnum++;
149 csi_data = (long *) SCM_SMOB_DATA (csi);
150 if (prev_data)
151 {
152 int k;
153
154 for (k = 0; k < LONGS_PER_CHARSET; k++)
155 {
156 if ((prev_data[k] & csi_data[k]) != prev_data[k])
157 return SCM_BOOL_F;
158 }
159 }
160 prev_data = csi_data;
161 char_sets = SCM_CDR (char_sets);
162 }
163 return SCM_BOOL_T;
164 }
165 #undef FUNC_NAME
166
167
168 SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0,
169 (SCM cs, SCM bound),
170 "Compute a hash value for the character set @var{cs}. If\n"
171 "@var{bound} is given and non-zero, it restricts the\n"
172 "returned value to the range 0 @dots{} @var{bound - 1}.")
173 #define FUNC_NAME s_scm_char_set_hash
174 {
175 const unsigned long default_bnd = 871;
176 unsigned long bnd;
177 long * p;
178 unsigned long val = 0;
179 int k;
180
181 SCM_VALIDATE_SMOB (1, cs, charset);
182
183 if (SCM_UNBNDP (bound))
184 bnd = default_bnd;
185 else
186 {
187 bnd = scm_to_ulong (bound);
188 if (bnd == 0)
189 bnd = default_bnd;
190 }
191
192 p = (long *) SCM_SMOB_DATA (cs);
193 for (k = 0; k < LONGS_PER_CHARSET; k++)
194 {
195 if (p[k] != 0)
196 val = p[k] + (val << 1);
197 }
198 return scm_from_ulong (val % bnd);
199 }
200 #undef FUNC_NAME
201
202
203 SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0,
204 (SCM cs),
205 "Return a cursor into the character set @var{cs}.")
206 #define FUNC_NAME s_scm_char_set_cursor
207 {
208 int idx;
209
210 SCM_VALIDATE_SMOB (1, cs, charset);
211 for (idx = 0; idx < SCM_CHARSET_SIZE; idx++)
212 {
213 if (SCM_CHARSET_GET (cs, idx))
214 break;
215 }
216 return SCM_I_MAKINUM (idx);
217 }
218 #undef FUNC_NAME
219
220
221 SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0,
222 (SCM cs, SCM cursor),
223 "Return the character at the current cursor position\n"
224 "@var{cursor} in the character set @var{cs}. It is an error to\n"
225 "pass a cursor for which @code{end-of-char-set?} returns true.")
226 #define FUNC_NAME s_scm_char_set_ref
227 {
228 size_t ccursor = scm_to_size_t (cursor);
229 SCM_VALIDATE_SMOB (1, cs, charset);
230
231 if (ccursor >= SCM_CHARSET_SIZE || !SCM_CHARSET_GET (cs, ccursor))
232 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
233 return SCM_MAKE_CHAR (ccursor);
234 }
235 #undef FUNC_NAME
236
237
238 SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0,
239 (SCM cs, SCM cursor),
240 "Advance the character set cursor @var{cursor} to the next\n"
241 "character in the character set @var{cs}. It is an error if the\n"
242 "cursor given satisfies @code{end-of-char-set?}.")
243 #define FUNC_NAME s_scm_char_set_cursor_next
244 {
245 size_t ccursor = scm_to_size_t (cursor);
246 SCM_VALIDATE_SMOB (1, cs, charset);
247
248 if (ccursor >= SCM_CHARSET_SIZE || !SCM_CHARSET_GET (cs, ccursor))
249 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
250 for (ccursor++; ccursor < SCM_CHARSET_SIZE; ccursor++)
251 {
252 if (SCM_CHARSET_GET (cs, ccursor))
253 break;
254 }
255 return SCM_I_MAKINUM (ccursor);
256 }
257 #undef FUNC_NAME
258
259
260 SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0,
261 (SCM cursor),
262 "Return @code{#t} if @var{cursor} has reached the end of a\n"
263 "character set, @code{#f} otherwise.")
264 #define FUNC_NAME s_scm_end_of_char_set_p
265 {
266 size_t ccursor = scm_to_size_t (cursor);
267 return scm_from_bool (ccursor >= SCM_CHARSET_SIZE);
268 }
269 #undef FUNC_NAME
270
271
272 SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0,
273 (SCM kons, SCM knil, SCM cs),
274 "Fold the procedure @var{kons} over the character set @var{cs},\n"
275 "initializing it with @var{knil}.")
276 #define FUNC_NAME s_scm_char_set_fold
277 {
278 int k;
279
280 SCM_VALIDATE_PROC (1, kons);
281 SCM_VALIDATE_SMOB (3, cs, charset);
282
283 for (k = 0; k < SCM_CHARSET_SIZE; k++)
284 if (SCM_CHARSET_GET (cs, k))
285 {
286 knil = scm_call_2 (kons, SCM_MAKE_CHAR (k), knil);
287 }
288 return knil;
289 }
290 #undef FUNC_NAME
291
292
293 SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0,
294 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
295 "This is a fundamental constructor for character sets.\n"
296 "@itemize @bullet\n"
297 "@item @var{g} is used to generate a series of ``seed'' values\n"
298 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
299 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
300 "@item @var{p} tells us when to stop -- when it returns true\n"
301 "when applied to one of the seed values.\n"
302 "@item @var{f} maps each seed value to a character. These\n"
303 "characters are added to the base character set @var{base_cs} to\n"
304 "form the result; @var{base_cs} defaults to the empty set.\n"
305 "@end itemize")
306 #define FUNC_NAME s_scm_char_set_unfold
307 {
308 SCM result, tmp;
309
310 SCM_VALIDATE_PROC (1, p);
311 SCM_VALIDATE_PROC (2, f);
312 SCM_VALIDATE_PROC (3, g);
313 if (!SCM_UNBNDP (base_cs))
314 {
315 SCM_VALIDATE_SMOB (5, base_cs, charset);
316 result = scm_char_set_copy (base_cs);
317 }
318 else
319 result = make_char_set (FUNC_NAME);
320
321 tmp = scm_call_1 (p, seed);
322 while (scm_is_false (tmp))
323 {
324 SCM ch = scm_call_1 (f, seed);
325 if (!SCM_CHARP (ch))
326 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
327 SCM_CHARSET_SET (result, SCM_CHAR (ch));
328
329 seed = scm_call_1 (g, seed);
330 tmp = scm_call_1 (p, seed);
331 }
332 return result;
333 }
334 #undef FUNC_NAME
335
336
337 SCM_DEFINE (scm_char_set_unfold_x, "char-set-unfold!", 5, 0, 0,
338 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
339 "This is a fundamental constructor for character sets.\n"
340 "@itemize @bullet\n"
341 "@item @var{g} is used to generate a series of ``seed'' values\n"
342 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
343 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
344 "@item @var{p} tells us when to stop -- when it returns true\n"
345 "when applied to one of the seed values.\n"
346 "@item @var{f} maps each seed value to a character. These\n"
347 "characters are added to the base character set @var{base_cs} to\n"
348 "form the result; @var{base_cs} defaults to the empty set.\n"
349 "@end itemize")
350 #define FUNC_NAME s_scm_char_set_unfold_x
351 {
352 SCM tmp;
353
354 SCM_VALIDATE_PROC (1, p);
355 SCM_VALIDATE_PROC (2, f);
356 SCM_VALIDATE_PROC (3, g);
357 SCM_VALIDATE_SMOB (5, base_cs, charset);
358
359 tmp = scm_call_1 (p, seed);
360 while (scm_is_false (tmp))
361 {
362 SCM ch = scm_call_1 (f, seed);
363 if (!SCM_CHARP (ch))
364 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
365 SCM_CHARSET_SET (base_cs, SCM_CHAR (ch));
366
367 seed = scm_call_1 (g, seed);
368 tmp = scm_call_1 (p, seed);
369 }
370 return base_cs;
371 }
372 #undef FUNC_NAME
373
374
375 SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0,
376 (SCM proc, SCM cs),
377 "Apply @var{proc} to every character in the character set\n"
378 "@var{cs}. The return value is not specified.")
379 #define FUNC_NAME s_scm_char_set_for_each
380 {
381 int k;
382
383 SCM_VALIDATE_PROC (1, proc);
384 SCM_VALIDATE_SMOB (2, cs, charset);
385
386 for (k = 0; k < SCM_CHARSET_SIZE; k++)
387 if (SCM_CHARSET_GET (cs, k))
388 scm_call_1 (proc, SCM_MAKE_CHAR (k));
389 return SCM_UNSPECIFIED;
390 }
391 #undef FUNC_NAME
392
393
394 SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0,
395 (SCM proc, SCM cs),
396 "Map the procedure @var{proc} over every character in @var{cs}.\n"
397 "@var{proc} must be a character -> character procedure.")
398 #define FUNC_NAME s_scm_char_set_map
399 {
400 SCM result;
401 int k;
402
403 SCM_VALIDATE_PROC (1, proc);
404 SCM_VALIDATE_SMOB (2, cs, charset);
405
406 result = make_char_set (FUNC_NAME);
407 for (k = 0; k < SCM_CHARSET_SIZE; k++)
408 if (SCM_CHARSET_GET (cs, k))
409 {
410 SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (k));
411 if (!SCM_CHARP (ch))
412 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (proc));
413 SCM_CHARSET_SET (result, SCM_CHAR (ch));
414 }
415 return result;
416 }
417 #undef FUNC_NAME
418
419
420 SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0,
421 (SCM cs),
422 "Return a newly allocated character set containing all\n"
423 "characters in @var{cs}.")
424 #define FUNC_NAME s_scm_char_set_copy
425 {
426 SCM ret;
427 long * p1, * p2;
428 int k;
429
430 SCM_VALIDATE_SMOB (1, cs, charset);
431 ret = make_char_set (FUNC_NAME);
432 p1 = (long *) SCM_SMOB_DATA (cs);
433 p2 = (long *) SCM_SMOB_DATA (ret);
434 for (k = 0; k < LONGS_PER_CHARSET; k++)
435 p2[k] = p1[k];
436 return ret;
437 }
438 #undef FUNC_NAME
439
440
441 SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1,
442 (SCM rest),
443 "Return a character set containing all given characters.")
444 #define FUNC_NAME s_scm_char_set
445 {
446 SCM cs;
447 long * p;
448 int argnum = 1;
449
450 SCM_VALIDATE_REST_ARGUMENT (rest);
451 cs = make_char_set (FUNC_NAME);
452 p = (long *) SCM_SMOB_DATA (cs);
453 while (!scm_is_null (rest))
454 {
455 int c;
456
457 SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c);
458 argnum++;
459 rest = SCM_CDR (rest);
460 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
461 }
462 return cs;
463 }
464 #undef FUNC_NAME
465
466
467 SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0,
468 (SCM list, SCM base_cs),
469 "Convert the character list @var{list} to a character set. If\n"
470 "the character set @var{base_cs} is given, the character in this\n"
471 "set are also included in the result.")
472 #define FUNC_NAME s_scm_list_to_char_set
473 {
474 SCM cs;
475 long * p;
476
477 SCM_VALIDATE_LIST (1, list);
478 if (SCM_UNBNDP (base_cs))
479 cs = make_char_set (FUNC_NAME);
480 else
481 {
482 SCM_VALIDATE_SMOB (2, base_cs, charset);
483 cs = scm_char_set_copy (base_cs);
484 }
485 p = (long *) SCM_SMOB_DATA (cs);
486 while (!scm_is_null (list))
487 {
488 SCM chr = SCM_CAR (list);
489 int c;
490
491 SCM_VALIDATE_CHAR_COPY (0, chr, c);
492 list = SCM_CDR (list);
493
494 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
495 }
496 return cs;
497 }
498 #undef FUNC_NAME
499
500
501 SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0,
502 (SCM list, SCM base_cs),
503 "Convert the character list @var{list} to a character set. The\n"
504 "characters are added to @var{base_cs} and @var{base_cs} is\n"
505 "returned.")
506 #define FUNC_NAME s_scm_list_to_char_set_x
507 {
508 long * p;
509
510 SCM_VALIDATE_LIST (1, list);
511 SCM_VALIDATE_SMOB (2, base_cs, charset);
512 p = (long *) SCM_SMOB_DATA (base_cs);
513 while (!scm_is_null (list))
514 {
515 SCM chr = SCM_CAR (list);
516 int c;
517
518 SCM_VALIDATE_CHAR_COPY (0, chr, c);
519 list = SCM_CDR (list);
520
521 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
522 }
523 return base_cs;
524 }
525 #undef FUNC_NAME
526
527
528 SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0,
529 (SCM str, SCM base_cs),
530 "Convert the string @var{str} to a character set. If the\n"
531 "character set @var{base_cs} is given, the characters in this\n"
532 "set are also included in the result.")
533 #define FUNC_NAME s_scm_string_to_char_set
534 {
535 SCM cs;
536 long * p;
537 const char * s;
538 size_t k = 0, len;
539
540 SCM_VALIDATE_STRING (1, str);
541 if (SCM_UNBNDP (base_cs))
542 cs = make_char_set (FUNC_NAME);
543 else
544 {
545 SCM_VALIDATE_SMOB (2, base_cs, charset);
546 cs = scm_char_set_copy (base_cs);
547 }
548 p = (long *) SCM_SMOB_DATA (cs);
549 s = scm_i_string_chars (str);
550 len = scm_i_string_length (str);
551 while (k < len)
552 {
553 int c = s[k++];
554 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
555 }
556 scm_remember_upto_here_1 (str);
557 return cs;
558 }
559 #undef FUNC_NAME
560
561
562 SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0,
563 (SCM str, SCM base_cs),
564 "Convert the string @var{str} to a character set. The\n"
565 "characters from the string are added to @var{base_cs}, and\n"
566 "@var{base_cs} is returned.")
567 #define FUNC_NAME s_scm_string_to_char_set_x
568 {
569 long * p;
570 const char * s;
571 size_t k = 0, len;
572
573 SCM_VALIDATE_STRING (1, str);
574 SCM_VALIDATE_SMOB (2, base_cs, charset);
575 p = (long *) SCM_SMOB_DATA (base_cs);
576 s = scm_i_string_chars (str);
577 len = scm_i_string_length (str);
578 while (k < len)
579 {
580 int c = s[k++];
581 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
582 }
583 scm_remember_upto_here_1 (str);
584 return base_cs;
585 }
586 #undef FUNC_NAME
587
588
589 SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0,
590 (SCM pred, SCM cs, SCM base_cs),
591 "Return a character set containing every character from @var{cs}\n"
592 "so that it satisfies @var{pred}. If provided, the characters\n"
593 "from @var{base_cs} are added to the result.")
594 #define FUNC_NAME s_scm_char_set_filter
595 {
596 SCM ret;
597 int k;
598 long * p;
599
600 SCM_VALIDATE_PROC (1, pred);
601 SCM_VALIDATE_SMOB (2, cs, charset);
602 if (!SCM_UNBNDP (base_cs))
603 {
604 SCM_VALIDATE_SMOB (3, base_cs, charset);
605 ret = scm_char_set_copy (base_cs);
606 }
607 else
608 ret = make_char_set (FUNC_NAME);
609 p = (long *) SCM_SMOB_DATA (ret);
610 for (k = 0; k < SCM_CHARSET_SIZE; k++)
611 {
612 if (SCM_CHARSET_GET (cs, k))
613 {
614 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
615
616 if (scm_is_true (res))
617 p[k / SCM_BITS_PER_LONG] |= 1L << (k % SCM_BITS_PER_LONG);
618 }
619 }
620 return ret;
621 }
622 #undef FUNC_NAME
623
624
625 SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0,
626 (SCM pred, SCM cs, SCM base_cs),
627 "Return a character set containing every character from @var{cs}\n"
628 "so that it satisfies @var{pred}. The characters are added to\n"
629 "@var{base_cs} and @var{base_cs} is returned.")
630 #define FUNC_NAME s_scm_char_set_filter_x
631 {
632 int k;
633 long * p;
634
635 SCM_VALIDATE_PROC (1, pred);
636 SCM_VALIDATE_SMOB (2, cs, charset);
637 SCM_VALIDATE_SMOB (3, base_cs, charset);
638 p = (long *) SCM_SMOB_DATA (base_cs);
639 for (k = 0; k < SCM_CHARSET_SIZE; k++)
640 {
641 if (SCM_CHARSET_GET (cs, k))
642 {
643 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
644
645 if (scm_is_true (res))
646 p[k / SCM_BITS_PER_LONG] |= 1L << (k % SCM_BITS_PER_LONG);
647 }
648 }
649 return base_cs;
650 }
651 #undef FUNC_NAME
652
653
654 SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0,
655 (SCM lower, SCM upper, SCM error, SCM base_cs),
656 "Return a character set containing all characters whose\n"
657 "character codes lie in the half-open range\n"
658 "[@var{lower},@var{upper}).\n"
659 "\n"
660 "If @var{error} is a true value, an error is signalled if the\n"
661 "specified range contains characters which are not contained in\n"
662 "the implemented character range. If @var{error} is @code{#f},\n"
663 "these characters are silently left out of the resultung\n"
664 "character set.\n"
665 "\n"
666 "The characters in @var{base_cs} are added to the result, if\n"
667 "given.")
668 #define FUNC_NAME s_scm_ucs_range_to_char_set
669 {
670 SCM cs;
671 size_t clower, cupper;
672 long * p;
673
674 clower = scm_to_size_t (lower);
675 cupper = scm_to_size_t (upper);
676 SCM_ASSERT_RANGE (2, upper, cupper >= clower);
677 if (!SCM_UNBNDP (error))
678 {
679 if (scm_is_true (error))
680 {
681 SCM_ASSERT_RANGE (1, lower, clower <= SCM_CHARSET_SIZE);
682 SCM_ASSERT_RANGE (2, upper, cupper <= SCM_CHARSET_SIZE);
683 }
684 }
685 if (clower > SCM_CHARSET_SIZE)
686 clower = SCM_CHARSET_SIZE;
687 if (cupper > SCM_CHARSET_SIZE)
688 cupper = SCM_CHARSET_SIZE;
689 if (SCM_UNBNDP (base_cs))
690 cs = make_char_set (FUNC_NAME);
691 else
692 {
693 SCM_VALIDATE_SMOB (4, base_cs, charset);
694 cs = scm_char_set_copy (base_cs);
695 }
696 p = (long *) SCM_SMOB_DATA (cs);
697 while (clower < cupper)
698 {
699 p[clower / SCM_BITS_PER_LONG] |= 1L << (clower % SCM_BITS_PER_LONG);
700 clower++;
701 }
702 return cs;
703 }
704 #undef FUNC_NAME
705
706
707 SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0,
708 (SCM lower, SCM upper, SCM error, SCM base_cs),
709 "Return a character set containing all characters whose\n"
710 "character codes lie in the half-open range\n"
711 "[@var{lower},@var{upper}).\n"
712 "\n"
713 "If @var{error} is a true value, an error is signalled if the\n"
714 "specified range contains characters which are not contained in\n"
715 "the implemented character range. If @var{error} is @code{#f},\n"
716 "these characters are silently left out of the resultung\n"
717 "character set.\n"
718 "\n"
719 "The characters are added to @var{base_cs} and @var{base_cs} is\n"
720 "returned.")
721 #define FUNC_NAME s_scm_ucs_range_to_char_set_x
722 {
723 size_t clower, cupper;
724 long * p;
725
726 clower = scm_to_size_t (lower);
727 cupper = scm_to_size_t (upper);
728 SCM_ASSERT_RANGE (2, upper, cupper >= clower);
729 if (scm_is_true (error))
730 {
731 SCM_ASSERT_RANGE (1, lower, clower <= SCM_CHARSET_SIZE);
732 SCM_ASSERT_RANGE (2, upper, cupper <= SCM_CHARSET_SIZE);
733 }
734 if (clower > SCM_CHARSET_SIZE)
735 clower = SCM_CHARSET_SIZE;
736 if (cupper > SCM_CHARSET_SIZE)
737 cupper = SCM_CHARSET_SIZE;
738 p = (long *) SCM_SMOB_DATA (base_cs);
739 while (clower < cupper)
740 {
741 p[clower / SCM_BITS_PER_LONG] |= 1L << (clower % SCM_BITS_PER_LONG);
742 clower++;
743 }
744 return base_cs;
745 }
746 #undef FUNC_NAME
747
748 SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0,
749 (SCM x),
750 "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.")
751 #define FUNC_NAME s_scm_to_char_set
752 {
753 if (scm_is_string (x))
754 return scm_string_to_char_set (x, SCM_UNDEFINED);
755 else if (SCM_CHARP (x))
756 return scm_char_set (scm_list_1 (x));
757 else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x))
758 return x;
759 else
760 scm_wrong_type_arg (NULL, 0, x);
761 }
762 #undef FUNC_NAME
763
764 SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0,
765 (SCM cs),
766 "Return the number of elements in character set @var{cs}.")
767 #define FUNC_NAME s_scm_char_set_size
768 {
769 int k, count = 0;
770
771 SCM_VALIDATE_SMOB (1, cs, charset);
772 for (k = 0; k < SCM_CHARSET_SIZE; k++)
773 if (SCM_CHARSET_GET (cs, k))
774 count++;
775 return SCM_I_MAKINUM (count);
776 }
777 #undef FUNC_NAME
778
779
780 SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0,
781 (SCM pred, SCM cs),
782 "Return the number of the elements int the character set\n"
783 "@var{cs} which satisfy the predicate @var{pred}.")
784 #define FUNC_NAME s_scm_char_set_count
785 {
786 int k, count = 0;
787
788 SCM_VALIDATE_PROC (1, pred);
789 SCM_VALIDATE_SMOB (2, cs, charset);
790
791 for (k = 0; k < SCM_CHARSET_SIZE; k++)
792 if (SCM_CHARSET_GET (cs, k))
793 {
794 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
795 if (scm_is_true (res))
796 count++;
797 }
798 return SCM_I_MAKINUM (count);
799 }
800 #undef FUNC_NAME
801
802
803 SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0,
804 (SCM cs),
805 "Return a list containing the elements of the character set\n"
806 "@var{cs}.")
807 #define FUNC_NAME s_scm_char_set_to_list
808 {
809 int k;
810 SCM result = SCM_EOL;
811
812 SCM_VALIDATE_SMOB (1, cs, charset);
813 for (k = SCM_CHARSET_SIZE; k > 0; k--)
814 if (SCM_CHARSET_GET (cs, k - 1))
815 result = scm_cons (SCM_MAKE_CHAR (k - 1), result);
816 return result;
817 }
818 #undef FUNC_NAME
819
820
821 SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0,
822 (SCM cs),
823 "Return a string containing the elements of the character set\n"
824 "@var{cs}. The order in which the characters are placed in the\n"
825 "string is not defined.")
826 #define FUNC_NAME s_scm_char_set_to_string
827 {
828 int k;
829 int count = 0;
830 int idx = 0;
831 SCM result;
832 char * p;
833
834 SCM_VALIDATE_SMOB (1, cs, charset);
835 for (k = 0; k < SCM_CHARSET_SIZE; k++)
836 if (SCM_CHARSET_GET (cs, k))
837 count++;
838 result = scm_i_make_string (count, &p);
839 for (k = 0; k < SCM_CHARSET_SIZE; k++)
840 if (SCM_CHARSET_GET (cs, k))
841 p[idx++] = k;
842 return result;
843 }
844 #undef FUNC_NAME
845
846
847 SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0,
848 (SCM cs, SCM ch),
849 "Return @code{#t} iff the character @var{ch} is contained in the\n"
850 "character set @var{cs}.")
851 #define FUNC_NAME s_scm_char_set_contains_p
852 {
853 SCM_VALIDATE_SMOB (1, cs, charset);
854 SCM_VALIDATE_CHAR (2, ch);
855 return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch)));
856 }
857 #undef FUNC_NAME
858
859
860 SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0,
861 (SCM pred, SCM cs),
862 "Return a true value if every character in the character set\n"
863 "@var{cs} satisfies the predicate @var{pred}.")
864 #define FUNC_NAME s_scm_char_set_every
865 {
866 int k;
867 SCM res = SCM_BOOL_T;
868
869 SCM_VALIDATE_PROC (1, pred);
870 SCM_VALIDATE_SMOB (2, cs, charset);
871
872 for (k = 0; k < SCM_CHARSET_SIZE; k++)
873 if (SCM_CHARSET_GET (cs, k))
874 {
875 res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
876 if (scm_is_false (res))
877 return res;
878 }
879 return res;
880 }
881 #undef FUNC_NAME
882
883
884 SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0,
885 (SCM pred, SCM cs),
886 "Return a true value if any character in the character set\n"
887 "@var{cs} satisfies the predicate @var{pred}.")
888 #define FUNC_NAME s_scm_char_set_any
889 {
890 int k;
891
892 SCM_VALIDATE_PROC (1, pred);
893 SCM_VALIDATE_SMOB (2, cs, charset);
894
895 for (k = 0; k < SCM_CHARSET_SIZE; k++)
896 if (SCM_CHARSET_GET (cs, k))
897 {
898 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
899 if (scm_is_true (res))
900 return res;
901 }
902 return SCM_BOOL_F;
903 }
904 #undef FUNC_NAME
905
906
907 SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1,
908 (SCM cs, SCM rest),
909 "Add all character arguments to the first argument, which must\n"
910 "be a character set.")
911 #define FUNC_NAME s_scm_char_set_adjoin
912 {
913 long * p;
914
915 SCM_VALIDATE_SMOB (1, cs, charset);
916 SCM_VALIDATE_REST_ARGUMENT (rest);
917 cs = scm_char_set_copy (cs);
918
919 p = (long *) SCM_SMOB_DATA (cs);
920 while (!scm_is_null (rest))
921 {
922 SCM chr = SCM_CAR (rest);
923 int c;
924
925 SCM_VALIDATE_CHAR_COPY (1, chr, c);
926 rest = SCM_CDR (rest);
927
928 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
929 }
930 return cs;
931 }
932 #undef FUNC_NAME
933
934
935 SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1,
936 (SCM cs, SCM rest),
937 "Delete all character arguments from the first argument, which\n"
938 "must be a character set.")
939 #define FUNC_NAME s_scm_char_set_delete
940 {
941 long * p;
942
943 SCM_VALIDATE_SMOB (1, cs, charset);
944 SCM_VALIDATE_REST_ARGUMENT (rest);
945 cs = scm_char_set_copy (cs);
946
947 p = (long *) SCM_SMOB_DATA (cs);
948 while (!scm_is_null (rest))
949 {
950 SCM chr = SCM_CAR (rest);
951 int c;
952
953 SCM_VALIDATE_CHAR_COPY (1, chr, c);
954 rest = SCM_CDR (rest);
955
956 p[c / SCM_BITS_PER_LONG] &= ~(1L << (c % SCM_BITS_PER_LONG));
957 }
958 return cs;
959 }
960 #undef FUNC_NAME
961
962
963 SCM_DEFINE (scm_char_set_adjoin_x, "char-set-adjoin!", 1, 0, 1,
964 (SCM cs, SCM rest),
965 "Add all character arguments to the first argument, which must\n"
966 "be a character set.")
967 #define FUNC_NAME s_scm_char_set_adjoin_x
968 {
969 long * p;
970
971 SCM_VALIDATE_SMOB (1, cs, charset);
972 SCM_VALIDATE_REST_ARGUMENT (rest);
973
974 p = (long *) SCM_SMOB_DATA (cs);
975 while (!scm_is_null (rest))
976 {
977 SCM chr = SCM_CAR (rest);
978 int c;
979
980 SCM_VALIDATE_CHAR_COPY (1, chr, c);
981 rest = SCM_CDR (rest);
982
983 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
984 }
985 return cs;
986 }
987 #undef FUNC_NAME
988
989
990 SCM_DEFINE (scm_char_set_delete_x, "char-set-delete!", 1, 0, 1,
991 (SCM cs, SCM rest),
992 "Delete all character arguments from the first argument, which\n"
993 "must be a character set.")
994 #define FUNC_NAME s_scm_char_set_delete_x
995 {
996 long * p;
997
998 SCM_VALIDATE_SMOB (1, cs, charset);
999 SCM_VALIDATE_REST_ARGUMENT (rest);
1000
1001 p = (long *) SCM_SMOB_DATA (cs);
1002 while (!scm_is_null (rest))
1003 {
1004 SCM chr = SCM_CAR (rest);
1005 int c;
1006
1007 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1008 rest = SCM_CDR (rest);
1009
1010 p[c / SCM_BITS_PER_LONG] &= ~(1L << (c % SCM_BITS_PER_LONG));
1011 }
1012 return cs;
1013 }
1014 #undef FUNC_NAME
1015
1016
1017 SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0,
1018 (SCM cs),
1019 "Return the complement of the character set @var{cs}.")
1020 #define FUNC_NAME s_scm_char_set_complement
1021 {
1022 int k;
1023 SCM res;
1024 long * p, * q;
1025
1026 SCM_VALIDATE_SMOB (1, cs, charset);
1027
1028 res = make_char_set (FUNC_NAME);
1029 p = (long *) SCM_SMOB_DATA (res);
1030 q = (long *) SCM_SMOB_DATA (cs);
1031 for (k = 0; k < LONGS_PER_CHARSET; k++)
1032 p[k] = ~q[k];
1033 return res;
1034 }
1035 #undef FUNC_NAME
1036
1037
1038 SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1,
1039 (SCM rest),
1040 "Return the union of all argument character sets.")
1041 #define FUNC_NAME s_scm_char_set_union
1042 {
1043 int c = 1;
1044 SCM res;
1045 long * p;
1046
1047 SCM_VALIDATE_REST_ARGUMENT (rest);
1048
1049 res = make_char_set (FUNC_NAME);
1050 p = (long *) SCM_SMOB_DATA (res);
1051 while (!scm_is_null (rest))
1052 {
1053 int k;
1054 SCM cs = SCM_CAR (rest);
1055 SCM_VALIDATE_SMOB (c, cs, charset);
1056 c++;
1057 rest = SCM_CDR (rest);
1058
1059 for (k = 0; k < LONGS_PER_CHARSET; k++)
1060 p[k] |= ((long *) SCM_SMOB_DATA (cs))[k];
1061 }
1062 return res;
1063 }
1064 #undef FUNC_NAME
1065
1066
1067 SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1,
1068 (SCM rest),
1069 "Return the intersection of all argument character sets.")
1070 #define FUNC_NAME s_scm_char_set_intersection
1071 {
1072 SCM res;
1073
1074 SCM_VALIDATE_REST_ARGUMENT (rest);
1075
1076 if (scm_is_null (rest))
1077 res = make_char_set (FUNC_NAME);
1078 else
1079 {
1080 long *p;
1081 int argnum = 2;
1082
1083 res = scm_char_set_copy (SCM_CAR (rest));
1084 p = (long *) SCM_SMOB_DATA (res);
1085 rest = SCM_CDR (rest);
1086
1087 while (scm_is_pair (rest))
1088 {
1089 int k;
1090 SCM cs = SCM_CAR (rest);
1091 long *cs_data;
1092
1093 SCM_VALIDATE_SMOB (argnum, cs, charset);
1094 argnum++;
1095 cs_data = (long *) SCM_SMOB_DATA (cs);
1096 rest = SCM_CDR (rest);
1097 for (k = 0; k < LONGS_PER_CHARSET; k++)
1098 p[k] &= cs_data[k];
1099 }
1100 }
1101
1102 return res;
1103 }
1104 #undef FUNC_NAME
1105
1106
1107 SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1,
1108 (SCM cs1, SCM rest),
1109 "Return the difference of all argument character sets.")
1110 #define FUNC_NAME s_scm_char_set_difference
1111 {
1112 int c = 2;
1113 SCM res;
1114 long * p;
1115
1116 SCM_VALIDATE_SMOB (1, cs1, charset);
1117 SCM_VALIDATE_REST_ARGUMENT (rest);
1118
1119 res = scm_char_set_copy (cs1);
1120 p = (long *) SCM_SMOB_DATA (res);
1121 while (!scm_is_null (rest))
1122 {
1123 int k;
1124 SCM cs = SCM_CAR (rest);
1125 SCM_VALIDATE_SMOB (c, cs, charset);
1126 c++;
1127 rest = SCM_CDR (rest);
1128
1129 for (k = 0; k < LONGS_PER_CHARSET; k++)
1130 p[k] &= ~((long *) SCM_SMOB_DATA (cs))[k];
1131 }
1132 return res;
1133 }
1134 #undef FUNC_NAME
1135
1136
1137 SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1,
1138 (SCM rest),
1139 "Return the exclusive-or of all argument character sets.")
1140 #define FUNC_NAME s_scm_char_set_xor
1141 {
1142 SCM res;
1143
1144 SCM_VALIDATE_REST_ARGUMENT (rest);
1145
1146 if (scm_is_null (rest))
1147 res = make_char_set (FUNC_NAME);
1148 else
1149 {
1150 int argnum = 2;
1151 long * p;
1152
1153 res = scm_char_set_copy (SCM_CAR (rest));
1154 p = (long *) SCM_SMOB_DATA (res);
1155 rest = SCM_CDR (rest);
1156
1157 while (scm_is_pair (rest))
1158 {
1159 SCM cs = SCM_CAR (rest);
1160 long *cs_data;
1161 int k;
1162
1163 SCM_VALIDATE_SMOB (argnum, cs, charset);
1164 argnum++;
1165 cs_data = (long *) SCM_SMOB_DATA (cs);
1166 rest = SCM_CDR (rest);
1167
1168 for (k = 0; k < LONGS_PER_CHARSET; k++)
1169 p[k] ^= cs_data[k];
1170 }
1171 }
1172 return res;
1173 }
1174 #undef FUNC_NAME
1175
1176
1177 SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1, 0, 1,
1178 (SCM cs1, SCM rest),
1179 "Return the difference and the intersection of all argument\n"
1180 "character sets.")
1181 #define FUNC_NAME s_scm_char_set_diff_plus_intersection
1182 {
1183 int c = 2;
1184 SCM res1, res2;
1185 long * p, * q;
1186
1187 SCM_VALIDATE_SMOB (1, cs1, charset);
1188 SCM_VALIDATE_REST_ARGUMENT (rest);
1189
1190 res1 = scm_char_set_copy (cs1);
1191 res2 = make_char_set (FUNC_NAME);
1192 p = (long *) SCM_SMOB_DATA (res1);
1193 q = (long *) SCM_SMOB_DATA (res2);
1194 while (!scm_is_null (rest))
1195 {
1196 int k;
1197 SCM cs = SCM_CAR (rest);
1198 long *r;
1199
1200 SCM_VALIDATE_SMOB (c, cs, charset);
1201 c++;
1202 r = (long *) SCM_SMOB_DATA (cs);
1203
1204 for (k = 0; k < LONGS_PER_CHARSET; k++)
1205 {
1206 q[k] |= p[k] & r[k];
1207 p[k] &= ~r[k];
1208 }
1209 rest = SCM_CDR (rest);
1210 }
1211 return scm_values (scm_list_2 (res1, res2));
1212 }
1213 #undef FUNC_NAME
1214
1215
1216 SCM_DEFINE (scm_char_set_complement_x, "char-set-complement!", 1, 0, 0,
1217 (SCM cs),
1218 "Return the complement of the character set @var{cs}.")
1219 #define FUNC_NAME s_scm_char_set_complement_x
1220 {
1221 int k;
1222 long * p;
1223
1224 SCM_VALIDATE_SMOB (1, cs, charset);
1225 p = (long *) SCM_SMOB_DATA (cs);
1226 for (k = 0; k < LONGS_PER_CHARSET; k++)
1227 p[k] = ~p[k];
1228 return cs;
1229 }
1230 #undef FUNC_NAME
1231
1232
1233 SCM_DEFINE (scm_char_set_union_x, "char-set-union!", 1, 0, 1,
1234 (SCM cs1, SCM rest),
1235 "Return the union of all argument character sets.")
1236 #define FUNC_NAME s_scm_char_set_union_x
1237 {
1238 int c = 2;
1239 long * p;
1240
1241 SCM_VALIDATE_SMOB (1, cs1, charset);
1242 SCM_VALIDATE_REST_ARGUMENT (rest);
1243
1244 p = (long *) SCM_SMOB_DATA (cs1);
1245 while (!scm_is_null (rest))
1246 {
1247 int k;
1248 SCM cs = SCM_CAR (rest);
1249 SCM_VALIDATE_SMOB (c, cs, charset);
1250 c++;
1251 rest = SCM_CDR (rest);
1252
1253 for (k = 0; k < LONGS_PER_CHARSET; k++)
1254 p[k] |= ((long *) SCM_SMOB_DATA (cs))[k];
1255 }
1256 return cs1;
1257 }
1258 #undef FUNC_NAME
1259
1260
1261 SCM_DEFINE (scm_char_set_intersection_x, "char-set-intersection!", 1, 0, 1,
1262 (SCM cs1, SCM rest),
1263 "Return the intersection of all argument character sets.")
1264 #define FUNC_NAME s_scm_char_set_intersection_x
1265 {
1266 int c = 2;
1267 long * p;
1268
1269 SCM_VALIDATE_SMOB (1, cs1, charset);
1270 SCM_VALIDATE_REST_ARGUMENT (rest);
1271
1272 p = (long *) SCM_SMOB_DATA (cs1);
1273 while (!scm_is_null (rest))
1274 {
1275 int k;
1276 SCM cs = SCM_CAR (rest);
1277 SCM_VALIDATE_SMOB (c, cs, charset);
1278 c++;
1279 rest = SCM_CDR (rest);
1280
1281 for (k = 0; k < LONGS_PER_CHARSET; k++)
1282 p[k] &= ((long *) SCM_SMOB_DATA (cs))[k];
1283 }
1284 return cs1;
1285 }
1286 #undef FUNC_NAME
1287
1288
1289 SCM_DEFINE (scm_char_set_difference_x, "char-set-difference!", 1, 0, 1,
1290 (SCM cs1, SCM rest),
1291 "Return the difference of all argument character sets.")
1292 #define FUNC_NAME s_scm_char_set_difference_x
1293 {
1294 int c = 2;
1295 long * p;
1296
1297 SCM_VALIDATE_SMOB (1, cs1, charset);
1298 SCM_VALIDATE_REST_ARGUMENT (rest);
1299
1300 p = (long *) SCM_SMOB_DATA (cs1);
1301 while (!scm_is_null (rest))
1302 {
1303 int k;
1304 SCM cs = SCM_CAR (rest);
1305 SCM_VALIDATE_SMOB (c, cs, charset);
1306 c++;
1307 rest = SCM_CDR (rest);
1308
1309 for (k = 0; k < LONGS_PER_CHARSET; k++)
1310 p[k] &= ~((long *) SCM_SMOB_DATA (cs))[k];
1311 }
1312 return cs1;
1313 }
1314 #undef FUNC_NAME
1315
1316
1317 SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1,
1318 (SCM cs1, SCM rest),
1319 "Return the exclusive-or of all argument character sets.")
1320 #define FUNC_NAME s_scm_char_set_xor_x
1321 {
1322 /* a side-effecting variant should presumably give consistent results:
1323 (define a (char-set #\a))
1324 (char-set-xor a a a) -> char set #\a
1325 (char-set-xor! a a a) -> char set #\a
1326 */
1327 return scm_char_set_xor (scm_cons (cs1, rest));
1328
1329 #if 0
1330 /* this would give (char-set-xor! a a a) -> empty char set. */
1331 int c = 2;
1332 long * p;
1333
1334 SCM_VALIDATE_SMOB (1, cs1, charset);
1335 SCM_VALIDATE_REST_ARGUMENT (rest);
1336
1337 p = (long *) SCM_SMOB_DATA (cs1);
1338 while (!scm_is_null (rest))
1339 {
1340 int k;
1341 SCM cs = SCM_CAR (rest);
1342 SCM_VALIDATE_SMOB (c, cs, charset);
1343 c++;
1344 rest = SCM_CDR (rest);
1345
1346 for (k = 0; k < LONGS_PER_CHARSET; k++)
1347 p[k] ^= ((long *) SCM_SMOB_DATA (cs))[k];
1348 }
1349 return cs1;
1350 #endif
1351 }
1352 #undef FUNC_NAME
1353
1354
1355 SCM_DEFINE (scm_char_set_diff_plus_intersection_x, "char-set-diff+intersection!", 2, 0, 1,
1356 (SCM cs1, SCM cs2, SCM rest),
1357 "Return the difference and the intersection of all argument\n"
1358 "character sets.")
1359 #define FUNC_NAME s_scm_char_set_diff_plus_intersection_x
1360 {
1361 int c = 3;
1362 long * p, * q;
1363 int k;
1364
1365 SCM_VALIDATE_SMOB (1, cs1, charset);
1366 SCM_VALIDATE_SMOB (2, cs2, charset);
1367 SCM_VALIDATE_REST_ARGUMENT (rest);
1368
1369 p = (long *) SCM_SMOB_DATA (cs1);
1370 q = (long *) SCM_SMOB_DATA (cs2);
1371 if (p == q)
1372 {
1373 /* (char-set-diff+intersection! a a ...): can't share storage,
1374 but we know the answer without checking for further
1375 arguments. */
1376 return scm_values (scm_list_2 (make_char_set (FUNC_NAME), cs1));
1377 }
1378 for (k = 0; k < LONGS_PER_CHARSET; k++)
1379 {
1380 long t = p[k];
1381
1382 p[k] &= ~q[k];
1383 q[k] = t & q[k];
1384 }
1385 while (!scm_is_null (rest))
1386 {
1387 SCM cs = SCM_CAR (rest);
1388 long *r;
1389
1390 SCM_VALIDATE_SMOB (c, cs, charset);
1391 c++;
1392 r = (long *) SCM_SMOB_DATA (cs);
1393
1394 for (k = 0; k < LONGS_PER_CHARSET; k++)
1395 {
1396 q[k] |= p[k] & r[k];
1397 p[k] &= ~r[k];
1398 }
1399 rest = SCM_CDR (rest);
1400 }
1401 return scm_values (scm_list_2 (cs1, cs2));
1402 }
1403 #undef FUNC_NAME
1404
1405 \f
1406 /* Standard character sets. */
1407
1408 SCM scm_char_set_lower_case;
1409 SCM scm_char_set_upper_case;
1410 SCM scm_char_set_title_case;
1411 SCM scm_char_set_letter;
1412 SCM scm_char_set_digit;
1413 SCM scm_char_set_letter_and_digit;
1414 SCM scm_char_set_graphic;
1415 SCM scm_char_set_printing;
1416 SCM scm_char_set_whitespace;
1417 SCM scm_char_set_iso_control;
1418 SCM scm_char_set_punctuation;
1419 SCM scm_char_set_symbol;
1420 SCM scm_char_set_hex_digit;
1421 SCM scm_char_set_blank;
1422 SCM scm_char_set_ascii;
1423 SCM scm_char_set_empty;
1424 SCM scm_char_set_full;
1425
1426
1427 /* Create an empty character set and return it after binding it to NAME. */
1428 static inline SCM
1429 define_charset (const char *name)
1430 {
1431 SCM cs = make_char_set (NULL);
1432 scm_c_define (name, cs);
1433 return scm_permanent_object (cs);
1434 }
1435
1436 /* Membership predicates for the various char sets.
1437
1438 XXX: The `punctuation' and `symbol' char sets have no direct equivalent in
1439 <ctype.h>. Thus, the predicates below yield correct results for ASCII,
1440 but they do not provide the result described by the SRFI for Latin-1. The
1441 correct Latin-1 result could only be obtained by hard-coding the
1442 characters listed by the SRFI, but the problem would remain for other
1443 8-bit charsets.
1444
1445 Similarly, character 0xA0 in Latin-1 (unbreakable space, `#\0240') should
1446 be part of `char-set:blank'. However, glibc's current (2006/09) Latin-1
1447 locales (which use the ISO 14652 "i18n" FDCC-set) do not consider it
1448 `blank' so it ends up in `char-set:punctuation'. */
1449 #ifdef HAVE_ISBLANK
1450 # define CSET_BLANK_PRED(c) (isblank (c))
1451 #else
1452 # define CSET_BLANK_PRED(c) \
1453 (((c) == ' ') || ((c) == '\t'))
1454 #endif
1455
1456 #define CSET_SYMBOL_PRED(c) \
1457 (((c) != '\0') && (strchr ("$+<=>^`|~", (c)) != NULL))
1458 #define CSET_PUNCT_PRED(c) \
1459 ((ispunct (c)) && (!CSET_SYMBOL_PRED (c)))
1460
1461 #define CSET_LOWER_PRED(c) (islower (c))
1462 #define CSET_UPPER_PRED(c) (isupper (c))
1463 #define CSET_LETTER_PRED(c) (isalpha (c))
1464 #define CSET_DIGIT_PRED(c) (isdigit (c))
1465 #define CSET_WHITESPACE_PRED(c) (isspace (c))
1466 #define CSET_CONTROL_PRED(c) (iscntrl (c))
1467 #define CSET_HEX_DIGIT_PRED(c) (isxdigit (c))
1468 #define CSET_ASCII_PRED(c) (isascii (c))
1469
1470 /* Some char sets are explicitly defined by the SRFI as a union of other char
1471 sets so we try to follow this closely. */
1472
1473 #define CSET_LETTER_AND_DIGIT_PRED(c) \
1474 (CSET_LETTER_PRED (c) || CSET_DIGIT_PRED (c))
1475
1476 #define CSET_GRAPHIC_PRED(c) \
1477 (CSET_LETTER_PRED (c) || CSET_DIGIT_PRED (c) \
1478 || CSET_PUNCT_PRED (c) || CSET_SYMBOL_PRED (c))
1479
1480 #define CSET_PRINTING_PRED(c) \
1481 (CSET_GRAPHIC_PRED (c) || CSET_WHITESPACE_PRED (c))
1482
1483 /* False and true predicates. */
1484 #define CSET_TRUE_PRED(c) (1)
1485 #define CSET_FALSE_PRED(c) (0)
1486
1487
1488 /* Compute the contents of all the standard character sets. Computation may
1489 need to be re-done at `setlocale'-time because some char sets (e.g.,
1490 `char-set:letter') need to reflect the character set supported by Guile.
1491
1492 For instance, at startup time, the "C" locale is used, thus Guile supports
1493 only ASCII; therefore, `char-set:letter' only contains English letters.
1494 The user can change this by invoking `setlocale' and specifying a locale
1495 with an 8-bit charset, thereby augmenting some of the SRFI-14 standard
1496 character sets.
1497
1498 This works because some of the predicates used below to construct
1499 character sets (e.g., `isalpha(3)') are locale-dependent (so
1500 charset-dependent, though generally not language-dependent). For details,
1501 please see the `guile-devel' mailing list archive of September 2006. */
1502 void
1503 scm_srfi_14_compute_char_sets (void)
1504 {
1505 #define UPDATE_CSET(c, cset, pred) \
1506 do \
1507 { \
1508 if (pred (c)) \
1509 SCM_CHARSET_SET ((cset), (c)); \
1510 else \
1511 SCM_CHARSET_UNSET ((cset), (c)); \
1512 } \
1513 while (0)
1514
1515 register int ch;
1516
1517 for (ch = 0; ch < 256; ch++)
1518 {
1519 UPDATE_CSET (ch, scm_char_set_upper_case, CSET_UPPER_PRED);
1520 UPDATE_CSET (ch, scm_char_set_lower_case, CSET_LOWER_PRED);
1521 UPDATE_CSET (ch, scm_char_set_title_case, CSET_FALSE_PRED);
1522 UPDATE_CSET (ch, scm_char_set_letter, CSET_LETTER_PRED);
1523 UPDATE_CSET (ch, scm_char_set_digit, CSET_DIGIT_PRED);
1524 UPDATE_CSET (ch, scm_char_set_letter_and_digit,
1525 CSET_LETTER_AND_DIGIT_PRED);
1526 UPDATE_CSET (ch, scm_char_set_graphic, CSET_GRAPHIC_PRED);
1527 UPDATE_CSET (ch, scm_char_set_printing, CSET_PRINTING_PRED);
1528 UPDATE_CSET (ch, scm_char_set_whitespace, CSET_WHITESPACE_PRED);
1529 UPDATE_CSET (ch, scm_char_set_iso_control, CSET_CONTROL_PRED);
1530 UPDATE_CSET (ch, scm_char_set_punctuation, CSET_PUNCT_PRED);
1531 UPDATE_CSET (ch, scm_char_set_symbol, CSET_SYMBOL_PRED);
1532 UPDATE_CSET (ch, scm_char_set_hex_digit, CSET_HEX_DIGIT_PRED);
1533 UPDATE_CSET (ch, scm_char_set_blank, CSET_BLANK_PRED);
1534 UPDATE_CSET (ch, scm_char_set_ascii, CSET_ASCII_PRED);
1535 UPDATE_CSET (ch, scm_char_set_empty, CSET_FALSE_PRED);
1536 UPDATE_CSET (ch, scm_char_set_full, CSET_TRUE_PRED);
1537 }
1538
1539 #undef UPDATE_CSET
1540 }
1541
1542 \f
1543 void
1544 scm_init_srfi_14 (void)
1545 {
1546 scm_tc16_charset = scm_make_smob_type ("character-set",
1547 BYTES_PER_CHARSET);
1548 scm_set_smob_free (scm_tc16_charset, charset_free);
1549 scm_set_smob_print (scm_tc16_charset, charset_print);
1550
1551 scm_char_set_upper_case = define_charset ("char-set:upper-case");
1552 scm_char_set_lower_case = define_charset ("char-set:lower-case");
1553 scm_char_set_title_case = define_charset ("char-set:title-case");
1554 scm_char_set_letter = define_charset ("char-set:letter");
1555 scm_char_set_digit = define_charset ("char-set:digit");
1556 scm_char_set_letter_and_digit = define_charset ("char-set:letter+digit");
1557 scm_char_set_graphic = define_charset ("char-set:graphic");
1558 scm_char_set_printing = define_charset ("char-set:printing");
1559 scm_char_set_whitespace = define_charset ("char-set:whitespace");
1560 scm_char_set_iso_control = define_charset ("char-set:iso-control");
1561 scm_char_set_punctuation = define_charset ("char-set:punctuation");
1562 scm_char_set_symbol = define_charset ("char-set:symbol");
1563 scm_char_set_hex_digit = define_charset ("char-set:hex-digit");
1564 scm_char_set_blank = define_charset ("char-set:blank");
1565 scm_char_set_ascii = define_charset ("char-set:ascii");
1566 scm_char_set_empty = define_charset ("char-set:empty");
1567 scm_char_set_full = define_charset ("char-set:full");
1568
1569 scm_srfi_14_compute_char_sets ();
1570
1571 #include "libguile/srfi-14.x"
1572 }
1573
1574 /* End of srfi-14.c. */