Revert "(scm_shell_usage): Note need for subscription to bug-guile@gnu.org."
[bpt/guile.git] / libguile / srfi-14.c
1 /* srfi-14.c --- SRFI-14 procedures for Guile
2 *
3 * Copyright (C) 2001, 2004, 2006, 2007 Free Software Foundation, Inc.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24
25 #include <string.h>
26 #include <ctype.h>
27
28 #include "libguile.h"
29 #include "libguile/srfi-14.h"
30
31
32 #define SCM_CHARSET_SET(cs, idx) \
33 (((long *) SCM_SMOB_DATA (cs))[(idx) / SCM_BITS_PER_LONG] |= \
34 (1L << ((idx) % SCM_BITS_PER_LONG)))
35
36 #define SCM_CHARSET_UNSET(cs, idx) \
37 (((long *) SCM_SMOB_DATA (cs))[(idx) / SCM_BITS_PER_LONG] &= \
38 (~(1L << ((idx) % SCM_BITS_PER_LONG))))
39
40 #define BYTES_PER_CHARSET (SCM_CHARSET_SIZE / 8)
41 #define LONGS_PER_CHARSET (SCM_CHARSET_SIZE / SCM_BITS_PER_LONG)
42
43
44 /* Smob type code for character sets. */
45 int scm_tc16_charset = 0;
46
47
48 /* Smob print hook for character sets. */
49 static int
50 charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED)
51 {
52 int i;
53 int first = 1;
54
55 scm_puts ("#<charset {", port);
56 for (i = 0; i < SCM_CHARSET_SIZE; i++)
57 if (SCM_CHARSET_GET (charset, i))
58 {
59 if (first)
60 first = 0;
61 else
62 scm_puts (" ", port);
63 scm_write (SCM_MAKE_CHAR (i), port);
64 }
65 scm_puts ("}>", port);
66 return 1;
67 }
68
69
70 /* Smob free hook for character sets. */
71 static size_t
72 charset_free (SCM charset)
73 {
74 return scm_smob_free (charset);
75 }
76
77
78 /* Create a new, empty character set. */
79 static SCM
80 make_char_set (const char * func_name)
81 {
82 long * p;
83
84 p = scm_gc_malloc (BYTES_PER_CHARSET, "character-set");
85 memset (p, 0, BYTES_PER_CHARSET);
86 SCM_RETURN_NEWSMOB (scm_tc16_charset, p);
87 }
88
89
90 SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0,
91 (SCM obj),
92 "Return @code{#t} if @var{obj} is a character set, @code{#f}\n"
93 "otherwise.")
94 #define FUNC_NAME s_scm_char_set_p
95 {
96 return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj));
97 }
98 #undef FUNC_NAME
99
100
101 SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1,
102 (SCM char_sets),
103 "Return @code{#t} if all given character sets are equal.")
104 #define FUNC_NAME s_scm_char_set_eq
105 {
106 int argnum = 1;
107 long *cs1_data = NULL;
108
109 SCM_VALIDATE_REST_ARGUMENT (char_sets);
110
111 while (!scm_is_null (char_sets))
112 {
113 SCM csi = SCM_CAR (char_sets);
114 long *csi_data;
115
116 SCM_VALIDATE_SMOB (argnum, csi, charset);
117 argnum++;
118 csi_data = (long *) SCM_SMOB_DATA (csi);
119 if (cs1_data == NULL)
120 cs1_data = csi_data;
121 else if (memcmp (cs1_data, csi_data, BYTES_PER_CHARSET) != 0)
122 return SCM_BOOL_F;
123 char_sets = SCM_CDR (char_sets);
124 }
125 return SCM_BOOL_T;
126 }
127 #undef FUNC_NAME
128
129
130 SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1,
131 (SCM char_sets),
132 "Return @code{#t} if every character set @var{cs}i is a subset\n"
133 "of character set @var{cs}i+1.")
134 #define FUNC_NAME s_scm_char_set_leq
135 {
136 int argnum = 1;
137 long *prev_data = NULL;
138
139 SCM_VALIDATE_REST_ARGUMENT (char_sets);
140
141 while (!scm_is_null (char_sets))
142 {
143 SCM csi = SCM_CAR (char_sets);
144 long *csi_data;
145
146 SCM_VALIDATE_SMOB (argnum, csi, charset);
147 argnum++;
148 csi_data = (long *) SCM_SMOB_DATA (csi);
149 if (prev_data)
150 {
151 int k;
152
153 for (k = 0; k < LONGS_PER_CHARSET; k++)
154 {
155 if ((prev_data[k] & csi_data[k]) != prev_data[k])
156 return SCM_BOOL_F;
157 }
158 }
159 prev_data = csi_data;
160 char_sets = SCM_CDR (char_sets);
161 }
162 return SCM_BOOL_T;
163 }
164 #undef FUNC_NAME
165
166
167 SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0,
168 (SCM cs, SCM bound),
169 "Compute a hash value for the character set @var{cs}. If\n"
170 "@var{bound} is given and non-zero, it restricts the\n"
171 "returned value to the range 0 @dots{} @var{bound - 1}.")
172 #define FUNC_NAME s_scm_char_set_hash
173 {
174 const unsigned long default_bnd = 871;
175 unsigned long bnd;
176 long * p;
177 unsigned long val = 0;
178 int k;
179
180 SCM_VALIDATE_SMOB (1, cs, charset);
181
182 if (SCM_UNBNDP (bound))
183 bnd = default_bnd;
184 else
185 {
186 bnd = scm_to_ulong (bound);
187 if (bnd == 0)
188 bnd = default_bnd;
189 }
190
191 p = (long *) SCM_SMOB_DATA (cs);
192 for (k = 0; k < LONGS_PER_CHARSET; k++)
193 {
194 if (p[k] != 0)
195 val = p[k] + (val << 1);
196 }
197 return scm_from_ulong (val % bnd);
198 }
199 #undef FUNC_NAME
200
201
202 SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0,
203 (SCM cs),
204 "Return a cursor into the character set @var{cs}.")
205 #define FUNC_NAME s_scm_char_set_cursor
206 {
207 int idx;
208
209 SCM_VALIDATE_SMOB (1, cs, charset);
210 for (idx = 0; idx < SCM_CHARSET_SIZE; idx++)
211 {
212 if (SCM_CHARSET_GET (cs, idx))
213 break;
214 }
215 return SCM_I_MAKINUM (idx);
216 }
217 #undef FUNC_NAME
218
219
220 SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0,
221 (SCM cs, SCM cursor),
222 "Return the character at the current cursor position\n"
223 "@var{cursor} in the character set @var{cs}. It is an error to\n"
224 "pass a cursor for which @code{end-of-char-set?} returns true.")
225 #define FUNC_NAME s_scm_char_set_ref
226 {
227 size_t ccursor = scm_to_size_t (cursor);
228 SCM_VALIDATE_SMOB (1, cs, charset);
229
230 if (ccursor >= SCM_CHARSET_SIZE || !SCM_CHARSET_GET (cs, ccursor))
231 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
232 return SCM_MAKE_CHAR (ccursor);
233 }
234 #undef FUNC_NAME
235
236
237 SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0,
238 (SCM cs, SCM cursor),
239 "Advance the character set cursor @var{cursor} to the next\n"
240 "character in the character set @var{cs}. It is an error if the\n"
241 "cursor given satisfies @code{end-of-char-set?}.")
242 #define FUNC_NAME s_scm_char_set_cursor_next
243 {
244 size_t ccursor = scm_to_size_t (cursor);
245 SCM_VALIDATE_SMOB (1, cs, charset);
246
247 if (ccursor >= SCM_CHARSET_SIZE || !SCM_CHARSET_GET (cs, ccursor))
248 SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
249 for (ccursor++; ccursor < SCM_CHARSET_SIZE; ccursor++)
250 {
251 if (SCM_CHARSET_GET (cs, ccursor))
252 break;
253 }
254 return SCM_I_MAKINUM (ccursor);
255 }
256 #undef FUNC_NAME
257
258
259 SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0,
260 (SCM cursor),
261 "Return @code{#t} if @var{cursor} has reached the end of a\n"
262 "character set, @code{#f} otherwise.")
263 #define FUNC_NAME s_scm_end_of_char_set_p
264 {
265 size_t ccursor = scm_to_size_t (cursor);
266 return scm_from_bool (ccursor >= SCM_CHARSET_SIZE);
267 }
268 #undef FUNC_NAME
269
270
271 SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0,
272 (SCM kons, SCM knil, SCM cs),
273 "Fold the procedure @var{kons} over the character set @var{cs},\n"
274 "initializing it with @var{knil}.")
275 #define FUNC_NAME s_scm_char_set_fold
276 {
277 int k;
278
279 SCM_VALIDATE_PROC (1, kons);
280 SCM_VALIDATE_SMOB (3, cs, charset);
281
282 for (k = 0; k < SCM_CHARSET_SIZE; k++)
283 if (SCM_CHARSET_GET (cs, k))
284 {
285 knil = scm_call_2 (kons, SCM_MAKE_CHAR (k), knil);
286 }
287 return knil;
288 }
289 #undef FUNC_NAME
290
291
292 SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0,
293 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
294 "This is a fundamental constructor for character sets.\n"
295 "@itemize @bullet\n"
296 "@item @var{g} is used to generate a series of ``seed'' values\n"
297 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
298 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
299 "@item @var{p} tells us when to stop -- when it returns true\n"
300 "when applied to one of the seed values.\n"
301 "@item @var{f} maps each seed value to a character. These\n"
302 "characters are added to the base character set @var{base_cs} to\n"
303 "form the result; @var{base_cs} defaults to the empty set.\n"
304 "@end itemize")
305 #define FUNC_NAME s_scm_char_set_unfold
306 {
307 SCM result, tmp;
308
309 SCM_VALIDATE_PROC (1, p);
310 SCM_VALIDATE_PROC (2, f);
311 SCM_VALIDATE_PROC (3, g);
312 if (!SCM_UNBNDP (base_cs))
313 {
314 SCM_VALIDATE_SMOB (5, base_cs, charset);
315 result = scm_char_set_copy (base_cs);
316 }
317 else
318 result = make_char_set (FUNC_NAME);
319
320 tmp = scm_call_1 (p, seed);
321 while (scm_is_false (tmp))
322 {
323 SCM ch = scm_call_1 (f, seed);
324 if (!SCM_CHARP (ch))
325 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
326 SCM_CHARSET_SET (result, SCM_CHAR (ch));
327
328 seed = scm_call_1 (g, seed);
329 tmp = scm_call_1 (p, seed);
330 }
331 return result;
332 }
333 #undef FUNC_NAME
334
335
336 SCM_DEFINE (scm_char_set_unfold_x, "char-set-unfold!", 5, 0, 0,
337 (SCM p, SCM f, SCM g, SCM seed, SCM base_cs),
338 "This is a fundamental constructor for character sets.\n"
339 "@itemize @bullet\n"
340 "@item @var{g} is used to generate a series of ``seed'' values\n"
341 "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n"
342 "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n"
343 "@item @var{p} tells us when to stop -- when it returns true\n"
344 "when applied to one of the seed values.\n"
345 "@item @var{f} maps each seed value to a character. These\n"
346 "characters are added to the base character set @var{base_cs} to\n"
347 "form the result; @var{base_cs} defaults to the empty set.\n"
348 "@end itemize")
349 #define FUNC_NAME s_scm_char_set_unfold_x
350 {
351 SCM tmp;
352
353 SCM_VALIDATE_PROC (1, p);
354 SCM_VALIDATE_PROC (2, f);
355 SCM_VALIDATE_PROC (3, g);
356 SCM_VALIDATE_SMOB (5, base_cs, charset);
357
358 tmp = scm_call_1 (p, seed);
359 while (scm_is_false (tmp))
360 {
361 SCM ch = scm_call_1 (f, seed);
362 if (!SCM_CHARP (ch))
363 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
364 SCM_CHARSET_SET (base_cs, SCM_CHAR (ch));
365
366 seed = scm_call_1 (g, seed);
367 tmp = scm_call_1 (p, seed);
368 }
369 return base_cs;
370 }
371 #undef FUNC_NAME
372
373
374 SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0,
375 (SCM proc, SCM cs),
376 "Apply @var{proc} to every character in the character set\n"
377 "@var{cs}. The return value is not specified.")
378 #define FUNC_NAME s_scm_char_set_for_each
379 {
380 int k;
381
382 SCM_VALIDATE_PROC (1, proc);
383 SCM_VALIDATE_SMOB (2, cs, charset);
384
385 for (k = 0; k < SCM_CHARSET_SIZE; k++)
386 if (SCM_CHARSET_GET (cs, k))
387 scm_call_1 (proc, SCM_MAKE_CHAR (k));
388 return SCM_UNSPECIFIED;
389 }
390 #undef FUNC_NAME
391
392
393 SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0,
394 (SCM proc, SCM cs),
395 "Map the procedure @var{proc} over every character in @var{cs}.\n"
396 "@var{proc} must be a character -> character procedure.")
397 #define FUNC_NAME s_scm_char_set_map
398 {
399 SCM result;
400 int k;
401
402 SCM_VALIDATE_PROC (1, proc);
403 SCM_VALIDATE_SMOB (2, cs, charset);
404
405 result = make_char_set (FUNC_NAME);
406 for (k = 0; k < SCM_CHARSET_SIZE; k++)
407 if (SCM_CHARSET_GET (cs, k))
408 {
409 SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (k));
410 if (!SCM_CHARP (ch))
411 SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (proc));
412 SCM_CHARSET_SET (result, SCM_CHAR (ch));
413 }
414 return result;
415 }
416 #undef FUNC_NAME
417
418
419 SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0,
420 (SCM cs),
421 "Return a newly allocated character set containing all\n"
422 "characters in @var{cs}.")
423 #define FUNC_NAME s_scm_char_set_copy
424 {
425 SCM ret;
426 long * p1, * p2;
427 int k;
428
429 SCM_VALIDATE_SMOB (1, cs, charset);
430 ret = make_char_set (FUNC_NAME);
431 p1 = (long *) SCM_SMOB_DATA (cs);
432 p2 = (long *) SCM_SMOB_DATA (ret);
433 for (k = 0; k < LONGS_PER_CHARSET; k++)
434 p2[k] = p1[k];
435 return ret;
436 }
437 #undef FUNC_NAME
438
439
440 SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1,
441 (SCM rest),
442 "Return a character set containing all given characters.")
443 #define FUNC_NAME s_scm_char_set
444 {
445 SCM cs;
446 long * p;
447 int argnum = 1;
448
449 SCM_VALIDATE_REST_ARGUMENT (rest);
450 cs = make_char_set (FUNC_NAME);
451 p = (long *) SCM_SMOB_DATA (cs);
452 while (!scm_is_null (rest))
453 {
454 int c;
455
456 SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c);
457 argnum++;
458 rest = SCM_CDR (rest);
459 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
460 }
461 return cs;
462 }
463 #undef FUNC_NAME
464
465
466 SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0,
467 (SCM list, SCM base_cs),
468 "Convert the character list @var{list} to a character set. If\n"
469 "the character set @var{base_cs} is given, the character in this\n"
470 "set are also included in the result.")
471 #define FUNC_NAME s_scm_list_to_char_set
472 {
473 SCM cs;
474 long * p;
475
476 SCM_VALIDATE_LIST (1, list);
477 if (SCM_UNBNDP (base_cs))
478 cs = make_char_set (FUNC_NAME);
479 else
480 {
481 SCM_VALIDATE_SMOB (2, base_cs, charset);
482 cs = scm_char_set_copy (base_cs);
483 }
484 p = (long *) SCM_SMOB_DATA (cs);
485 while (!scm_is_null (list))
486 {
487 SCM chr = SCM_CAR (list);
488 int c;
489
490 SCM_VALIDATE_CHAR_COPY (0, chr, c);
491 list = SCM_CDR (list);
492
493 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
494 }
495 return cs;
496 }
497 #undef FUNC_NAME
498
499
500 SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0,
501 (SCM list, SCM base_cs),
502 "Convert the character list @var{list} to a character set. The\n"
503 "characters are added to @var{base_cs} and @var{base_cs} is\n"
504 "returned.")
505 #define FUNC_NAME s_scm_list_to_char_set_x
506 {
507 long * p;
508
509 SCM_VALIDATE_LIST (1, list);
510 SCM_VALIDATE_SMOB (2, base_cs, charset);
511 p = (long *) SCM_SMOB_DATA (base_cs);
512 while (!scm_is_null (list))
513 {
514 SCM chr = SCM_CAR (list);
515 int c;
516
517 SCM_VALIDATE_CHAR_COPY (0, chr, c);
518 list = SCM_CDR (list);
519
520 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
521 }
522 return base_cs;
523 }
524 #undef FUNC_NAME
525
526
527 SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0,
528 (SCM str, SCM base_cs),
529 "Convert the string @var{str} to a character set. If the\n"
530 "character set @var{base_cs} is given, the characters in this\n"
531 "set are also included in the result.")
532 #define FUNC_NAME s_scm_string_to_char_set
533 {
534 SCM cs;
535 long * p;
536 const char * s;
537 size_t k = 0, len;
538
539 SCM_VALIDATE_STRING (1, str);
540 if (SCM_UNBNDP (base_cs))
541 cs = make_char_set (FUNC_NAME);
542 else
543 {
544 SCM_VALIDATE_SMOB (2, base_cs, charset);
545 cs = scm_char_set_copy (base_cs);
546 }
547 p = (long *) SCM_SMOB_DATA (cs);
548 s = scm_i_string_chars (str);
549 len = scm_i_string_length (str);
550 while (k < len)
551 {
552 int c = s[k++];
553 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
554 }
555 scm_remember_upto_here_1 (str);
556 return cs;
557 }
558 #undef FUNC_NAME
559
560
561 SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0,
562 (SCM str, SCM base_cs),
563 "Convert the string @var{str} to a character set. The\n"
564 "characters from the string are added to @var{base_cs}, and\n"
565 "@var{base_cs} is returned.")
566 #define FUNC_NAME s_scm_string_to_char_set_x
567 {
568 long * p;
569 const char * s;
570 size_t k = 0, len;
571
572 SCM_VALIDATE_STRING (1, str);
573 SCM_VALIDATE_SMOB (2, base_cs, charset);
574 p = (long *) SCM_SMOB_DATA (base_cs);
575 s = scm_i_string_chars (str);
576 len = scm_i_string_length (str);
577 while (k < len)
578 {
579 int c = s[k++];
580 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
581 }
582 scm_remember_upto_here_1 (str);
583 return base_cs;
584 }
585 #undef FUNC_NAME
586
587
588 SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0,
589 (SCM pred, SCM cs, SCM base_cs),
590 "Return a character set containing every character from @var{cs}\n"
591 "so that it satisfies @var{pred}. If provided, the characters\n"
592 "from @var{base_cs} are added to the result.")
593 #define FUNC_NAME s_scm_char_set_filter
594 {
595 SCM ret;
596 int k;
597 long * p;
598
599 SCM_VALIDATE_PROC (1, pred);
600 SCM_VALIDATE_SMOB (2, cs, charset);
601 if (!SCM_UNBNDP (base_cs))
602 {
603 SCM_VALIDATE_SMOB (3, base_cs, charset);
604 ret = scm_char_set_copy (base_cs);
605 }
606 else
607 ret = make_char_set (FUNC_NAME);
608 p = (long *) SCM_SMOB_DATA (ret);
609 for (k = 0; k < SCM_CHARSET_SIZE; k++)
610 {
611 if (SCM_CHARSET_GET (cs, k))
612 {
613 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
614
615 if (scm_is_true (res))
616 p[k / SCM_BITS_PER_LONG] |= 1L << (k % SCM_BITS_PER_LONG);
617 }
618 }
619 return ret;
620 }
621 #undef FUNC_NAME
622
623
624 SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0,
625 (SCM pred, SCM cs, SCM base_cs),
626 "Return a character set containing every character from @var{cs}\n"
627 "so that it satisfies @var{pred}. The characters are added to\n"
628 "@var{base_cs} and @var{base_cs} is returned.")
629 #define FUNC_NAME s_scm_char_set_filter_x
630 {
631 int k;
632 long * p;
633
634 SCM_VALIDATE_PROC (1, pred);
635 SCM_VALIDATE_SMOB (2, cs, charset);
636 SCM_VALIDATE_SMOB (3, base_cs, charset);
637 p = (long *) SCM_SMOB_DATA (base_cs);
638 for (k = 0; k < SCM_CHARSET_SIZE; k++)
639 {
640 if (SCM_CHARSET_GET (cs, k))
641 {
642 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
643
644 if (scm_is_true (res))
645 p[k / SCM_BITS_PER_LONG] |= 1L << (k % SCM_BITS_PER_LONG);
646 }
647 }
648 return base_cs;
649 }
650 #undef FUNC_NAME
651
652
653 SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0,
654 (SCM lower, SCM upper, SCM error, SCM base_cs),
655 "Return a character set containing all characters whose\n"
656 "character codes lie in the half-open range\n"
657 "[@var{lower},@var{upper}).\n"
658 "\n"
659 "If @var{error} is a true value, an error is signalled if the\n"
660 "specified range contains characters which are not contained in\n"
661 "the implemented character range. If @var{error} is @code{#f},\n"
662 "these characters are silently left out of the resultung\n"
663 "character set.\n"
664 "\n"
665 "The characters in @var{base_cs} are added to the result, if\n"
666 "given.")
667 #define FUNC_NAME s_scm_ucs_range_to_char_set
668 {
669 SCM cs;
670 size_t clower, cupper;
671 long * p;
672
673 clower = scm_to_size_t (lower);
674 cupper = scm_to_size_t (upper);
675 SCM_ASSERT_RANGE (2, upper, cupper >= clower);
676 if (!SCM_UNBNDP (error))
677 {
678 if (scm_is_true (error))
679 {
680 SCM_ASSERT_RANGE (1, lower, clower <= SCM_CHARSET_SIZE);
681 SCM_ASSERT_RANGE (2, upper, cupper <= SCM_CHARSET_SIZE);
682 }
683 }
684 if (clower > SCM_CHARSET_SIZE)
685 clower = SCM_CHARSET_SIZE;
686 if (cupper > SCM_CHARSET_SIZE)
687 cupper = SCM_CHARSET_SIZE;
688 if (SCM_UNBNDP (base_cs))
689 cs = make_char_set (FUNC_NAME);
690 else
691 {
692 SCM_VALIDATE_SMOB (4, base_cs, charset);
693 cs = scm_char_set_copy (base_cs);
694 }
695 p = (long *) SCM_SMOB_DATA (cs);
696 while (clower < cupper)
697 {
698 p[clower / SCM_BITS_PER_LONG] |= 1L << (clower % SCM_BITS_PER_LONG);
699 clower++;
700 }
701 return cs;
702 }
703 #undef FUNC_NAME
704
705
706 SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0,
707 (SCM lower, SCM upper, SCM error, SCM base_cs),
708 "Return a character set containing all characters whose\n"
709 "character codes lie in the half-open range\n"
710 "[@var{lower},@var{upper}).\n"
711 "\n"
712 "If @var{error} is a true value, an error is signalled if the\n"
713 "specified range contains characters which are not contained in\n"
714 "the implemented character range. If @var{error} is @code{#f},\n"
715 "these characters are silently left out of the resultung\n"
716 "character set.\n"
717 "\n"
718 "The characters are added to @var{base_cs} and @var{base_cs} is\n"
719 "returned.")
720 #define FUNC_NAME s_scm_ucs_range_to_char_set_x
721 {
722 size_t clower, cupper;
723 long * p;
724
725 clower = scm_to_size_t (lower);
726 cupper = scm_to_size_t (upper);
727 SCM_ASSERT_RANGE (2, upper, cupper >= clower);
728 if (scm_is_true (error))
729 {
730 SCM_ASSERT_RANGE (1, lower, clower <= SCM_CHARSET_SIZE);
731 SCM_ASSERT_RANGE (2, upper, cupper <= SCM_CHARSET_SIZE);
732 }
733 if (clower > SCM_CHARSET_SIZE)
734 clower = SCM_CHARSET_SIZE;
735 if (cupper > SCM_CHARSET_SIZE)
736 cupper = SCM_CHARSET_SIZE;
737 p = (long *) SCM_SMOB_DATA (base_cs);
738 while (clower < cupper)
739 {
740 p[clower / SCM_BITS_PER_LONG] |= 1L << (clower % SCM_BITS_PER_LONG);
741 clower++;
742 }
743 return base_cs;
744 }
745 #undef FUNC_NAME
746
747 SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0,
748 (SCM x),
749 "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.")
750 #define FUNC_NAME s_scm_to_char_set
751 {
752 if (scm_is_string (x))
753 return scm_string_to_char_set (x, SCM_UNDEFINED);
754 else if (SCM_CHARP (x))
755 return scm_char_set (scm_list_1 (x));
756 else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x))
757 return x;
758 else
759 scm_wrong_type_arg (NULL, 0, x);
760 }
761 #undef FUNC_NAME
762
763 SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0,
764 (SCM cs),
765 "Return the number of elements in character set @var{cs}.")
766 #define FUNC_NAME s_scm_char_set_size
767 {
768 int k, count = 0;
769
770 SCM_VALIDATE_SMOB (1, cs, charset);
771 for (k = 0; k < SCM_CHARSET_SIZE; k++)
772 if (SCM_CHARSET_GET (cs, k))
773 count++;
774 return SCM_I_MAKINUM (count);
775 }
776 #undef FUNC_NAME
777
778
779 SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0,
780 (SCM pred, SCM cs),
781 "Return the number of the elements int the character set\n"
782 "@var{cs} which satisfy the predicate @var{pred}.")
783 #define FUNC_NAME s_scm_char_set_count
784 {
785 int k, count = 0;
786
787 SCM_VALIDATE_PROC (1, pred);
788 SCM_VALIDATE_SMOB (2, cs, charset);
789
790 for (k = 0; k < SCM_CHARSET_SIZE; k++)
791 if (SCM_CHARSET_GET (cs, k))
792 {
793 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
794 if (scm_is_true (res))
795 count++;
796 }
797 return SCM_I_MAKINUM (count);
798 }
799 #undef FUNC_NAME
800
801
802 SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0,
803 (SCM cs),
804 "Return a list containing the elements of the character set\n"
805 "@var{cs}.")
806 #define FUNC_NAME s_scm_char_set_to_list
807 {
808 int k;
809 SCM result = SCM_EOL;
810
811 SCM_VALIDATE_SMOB (1, cs, charset);
812 for (k = SCM_CHARSET_SIZE; k > 0; k--)
813 if (SCM_CHARSET_GET (cs, k - 1))
814 result = scm_cons (SCM_MAKE_CHAR (k - 1), result);
815 return result;
816 }
817 #undef FUNC_NAME
818
819
820 SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0,
821 (SCM cs),
822 "Return a string containing the elements of the character set\n"
823 "@var{cs}. The order in which the characters are placed in the\n"
824 "string is not defined.")
825 #define FUNC_NAME s_scm_char_set_to_string
826 {
827 int k;
828 int count = 0;
829 int idx = 0;
830 SCM result;
831 char * p;
832
833 SCM_VALIDATE_SMOB (1, cs, charset);
834 for (k = 0; k < SCM_CHARSET_SIZE; k++)
835 if (SCM_CHARSET_GET (cs, k))
836 count++;
837 result = scm_i_make_string (count, &p);
838 for (k = 0; k < SCM_CHARSET_SIZE; k++)
839 if (SCM_CHARSET_GET (cs, k))
840 p[idx++] = k;
841 return result;
842 }
843 #undef FUNC_NAME
844
845
846 SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0,
847 (SCM cs, SCM ch),
848 "Return @code{#t} iff the character @var{ch} is contained in the\n"
849 "character set @var{cs}.")
850 #define FUNC_NAME s_scm_char_set_contains_p
851 {
852 SCM_VALIDATE_SMOB (1, cs, charset);
853 SCM_VALIDATE_CHAR (2, ch);
854 return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch)));
855 }
856 #undef FUNC_NAME
857
858
859 SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0,
860 (SCM pred, SCM cs),
861 "Return a true value if every character in the character set\n"
862 "@var{cs} satisfies the predicate @var{pred}.")
863 #define FUNC_NAME s_scm_char_set_every
864 {
865 int k;
866 SCM res = SCM_BOOL_T;
867
868 SCM_VALIDATE_PROC (1, pred);
869 SCM_VALIDATE_SMOB (2, cs, charset);
870
871 for (k = 0; k < SCM_CHARSET_SIZE; k++)
872 if (SCM_CHARSET_GET (cs, k))
873 {
874 res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
875 if (scm_is_false (res))
876 return res;
877 }
878 return res;
879 }
880 #undef FUNC_NAME
881
882
883 SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0,
884 (SCM pred, SCM cs),
885 "Return a true value if any character in the character set\n"
886 "@var{cs} satisfies the predicate @var{pred}.")
887 #define FUNC_NAME s_scm_char_set_any
888 {
889 int k;
890
891 SCM_VALIDATE_PROC (1, pred);
892 SCM_VALIDATE_SMOB (2, cs, charset);
893
894 for (k = 0; k < SCM_CHARSET_SIZE; k++)
895 if (SCM_CHARSET_GET (cs, k))
896 {
897 SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
898 if (scm_is_true (res))
899 return res;
900 }
901 return SCM_BOOL_F;
902 }
903 #undef FUNC_NAME
904
905
906 SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1,
907 (SCM cs, SCM rest),
908 "Add all character arguments to the first argument, which must\n"
909 "be a character set.")
910 #define FUNC_NAME s_scm_char_set_adjoin
911 {
912 long * p;
913
914 SCM_VALIDATE_SMOB (1, cs, charset);
915 SCM_VALIDATE_REST_ARGUMENT (rest);
916 cs = scm_char_set_copy (cs);
917
918 p = (long *) SCM_SMOB_DATA (cs);
919 while (!scm_is_null (rest))
920 {
921 SCM chr = SCM_CAR (rest);
922 int c;
923
924 SCM_VALIDATE_CHAR_COPY (1, chr, c);
925 rest = SCM_CDR (rest);
926
927 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
928 }
929 return cs;
930 }
931 #undef FUNC_NAME
932
933
934 SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1,
935 (SCM cs, SCM rest),
936 "Delete all character arguments from the first argument, which\n"
937 "must be a character set.")
938 #define FUNC_NAME s_scm_char_set_delete
939 {
940 long * p;
941
942 SCM_VALIDATE_SMOB (1, cs, charset);
943 SCM_VALIDATE_REST_ARGUMENT (rest);
944 cs = scm_char_set_copy (cs);
945
946 p = (long *) SCM_SMOB_DATA (cs);
947 while (!scm_is_null (rest))
948 {
949 SCM chr = SCM_CAR (rest);
950 int c;
951
952 SCM_VALIDATE_CHAR_COPY (1, chr, c);
953 rest = SCM_CDR (rest);
954
955 p[c / SCM_BITS_PER_LONG] &= ~(1L << (c % SCM_BITS_PER_LONG));
956 }
957 return cs;
958 }
959 #undef FUNC_NAME
960
961
962 SCM_DEFINE (scm_char_set_adjoin_x, "char-set-adjoin!", 1, 0, 1,
963 (SCM cs, SCM rest),
964 "Add all character arguments to the first argument, which must\n"
965 "be a character set.")
966 #define FUNC_NAME s_scm_char_set_adjoin_x
967 {
968 long * p;
969
970 SCM_VALIDATE_SMOB (1, cs, charset);
971 SCM_VALIDATE_REST_ARGUMENT (rest);
972
973 p = (long *) SCM_SMOB_DATA (cs);
974 while (!scm_is_null (rest))
975 {
976 SCM chr = SCM_CAR (rest);
977 int c;
978
979 SCM_VALIDATE_CHAR_COPY (1, chr, c);
980 rest = SCM_CDR (rest);
981
982 p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG);
983 }
984 return cs;
985 }
986 #undef FUNC_NAME
987
988
989 SCM_DEFINE (scm_char_set_delete_x, "char-set-delete!", 1, 0, 1,
990 (SCM cs, SCM rest),
991 "Delete all character arguments from the first argument, which\n"
992 "must be a character set.")
993 #define FUNC_NAME s_scm_char_set_delete_x
994 {
995 long * p;
996
997 SCM_VALIDATE_SMOB (1, cs, charset);
998 SCM_VALIDATE_REST_ARGUMENT (rest);
999
1000 p = (long *) SCM_SMOB_DATA (cs);
1001 while (!scm_is_null (rest))
1002 {
1003 SCM chr = SCM_CAR (rest);
1004 int c;
1005
1006 SCM_VALIDATE_CHAR_COPY (1, chr, c);
1007 rest = SCM_CDR (rest);
1008
1009 p[c / SCM_BITS_PER_LONG] &= ~(1L << (c % SCM_BITS_PER_LONG));
1010 }
1011 return cs;
1012 }
1013 #undef FUNC_NAME
1014
1015
1016 SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0,
1017 (SCM cs),
1018 "Return the complement of the character set @var{cs}.")
1019 #define FUNC_NAME s_scm_char_set_complement
1020 {
1021 int k;
1022 SCM res;
1023 long * p, * q;
1024
1025 SCM_VALIDATE_SMOB (1, cs, charset);
1026
1027 res = make_char_set (FUNC_NAME);
1028 p = (long *) SCM_SMOB_DATA (res);
1029 q = (long *) SCM_SMOB_DATA (cs);
1030 for (k = 0; k < LONGS_PER_CHARSET; k++)
1031 p[k] = ~q[k];
1032 return res;
1033 }
1034 #undef FUNC_NAME
1035
1036
1037 SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1,
1038 (SCM rest),
1039 "Return the union of all argument character sets.")
1040 #define FUNC_NAME s_scm_char_set_union
1041 {
1042 int c = 1;
1043 SCM res;
1044 long * p;
1045
1046 SCM_VALIDATE_REST_ARGUMENT (rest);
1047
1048 res = make_char_set (FUNC_NAME);
1049 p = (long *) SCM_SMOB_DATA (res);
1050 while (!scm_is_null (rest))
1051 {
1052 int k;
1053 SCM cs = SCM_CAR (rest);
1054 SCM_VALIDATE_SMOB (c, cs, charset);
1055 c++;
1056 rest = SCM_CDR (rest);
1057
1058 for (k = 0; k < LONGS_PER_CHARSET; k++)
1059 p[k] |= ((long *) SCM_SMOB_DATA (cs))[k];
1060 }
1061 return res;
1062 }
1063 #undef FUNC_NAME
1064
1065
1066 SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1,
1067 (SCM rest),
1068 "Return the intersection of all argument character sets.")
1069 #define FUNC_NAME s_scm_char_set_intersection
1070 {
1071 SCM res;
1072
1073 SCM_VALIDATE_REST_ARGUMENT (rest);
1074
1075 if (scm_is_null (rest))
1076 res = make_char_set (FUNC_NAME);
1077 else
1078 {
1079 long *p;
1080 int argnum = 2;
1081
1082 res = scm_char_set_copy (SCM_CAR (rest));
1083 p = (long *) SCM_SMOB_DATA (res);
1084 rest = SCM_CDR (rest);
1085
1086 while (scm_is_pair (rest))
1087 {
1088 int k;
1089 SCM cs = SCM_CAR (rest);
1090 long *cs_data;
1091
1092 SCM_VALIDATE_SMOB (argnum, cs, charset);
1093 argnum++;
1094 cs_data = (long *) SCM_SMOB_DATA (cs);
1095 rest = SCM_CDR (rest);
1096 for (k = 0; k < LONGS_PER_CHARSET; k++)
1097 p[k] &= cs_data[k];
1098 }
1099 }
1100
1101 return res;
1102 }
1103 #undef FUNC_NAME
1104
1105
1106 SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1,
1107 (SCM cs1, SCM rest),
1108 "Return the difference of all argument character sets.")
1109 #define FUNC_NAME s_scm_char_set_difference
1110 {
1111 int c = 2;
1112 SCM res;
1113 long * p;
1114
1115 SCM_VALIDATE_SMOB (1, cs1, charset);
1116 SCM_VALIDATE_REST_ARGUMENT (rest);
1117
1118 res = scm_char_set_copy (cs1);
1119 p = (long *) SCM_SMOB_DATA (res);
1120 while (!scm_is_null (rest))
1121 {
1122 int k;
1123 SCM cs = SCM_CAR (rest);
1124 SCM_VALIDATE_SMOB (c, cs, charset);
1125 c++;
1126 rest = SCM_CDR (rest);
1127
1128 for (k = 0; k < LONGS_PER_CHARSET; k++)
1129 p[k] &= ~((long *) SCM_SMOB_DATA (cs))[k];
1130 }
1131 return res;
1132 }
1133 #undef FUNC_NAME
1134
1135
1136 SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1,
1137 (SCM rest),
1138 "Return the exclusive-or of all argument character sets.")
1139 #define FUNC_NAME s_scm_char_set_xor
1140 {
1141 SCM res;
1142
1143 SCM_VALIDATE_REST_ARGUMENT (rest);
1144
1145 if (scm_is_null (rest))
1146 res = make_char_set (FUNC_NAME);
1147 else
1148 {
1149 int argnum = 2;
1150 long * p;
1151
1152 res = scm_char_set_copy (SCM_CAR (rest));
1153 p = (long *) SCM_SMOB_DATA (res);
1154 rest = SCM_CDR (rest);
1155
1156 while (scm_is_pair (rest))
1157 {
1158 SCM cs = SCM_CAR (rest);
1159 long *cs_data;
1160 int k;
1161
1162 SCM_VALIDATE_SMOB (argnum, cs, charset);
1163 argnum++;
1164 cs_data = (long *) SCM_SMOB_DATA (cs);
1165 rest = SCM_CDR (rest);
1166
1167 for (k = 0; k < LONGS_PER_CHARSET; k++)
1168 p[k] ^= cs_data[k];
1169 }
1170 }
1171 return res;
1172 }
1173 #undef FUNC_NAME
1174
1175
1176 SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1, 0, 1,
1177 (SCM cs1, SCM rest),
1178 "Return the difference and the intersection of all argument\n"
1179 "character sets.")
1180 #define FUNC_NAME s_scm_char_set_diff_plus_intersection
1181 {
1182 int c = 2;
1183 SCM res1, res2;
1184 long * p, * q;
1185
1186 SCM_VALIDATE_SMOB (1, cs1, charset);
1187 SCM_VALIDATE_REST_ARGUMENT (rest);
1188
1189 res1 = scm_char_set_copy (cs1);
1190 res2 = make_char_set (FUNC_NAME);
1191 p = (long *) SCM_SMOB_DATA (res1);
1192 q = (long *) SCM_SMOB_DATA (res2);
1193 while (!scm_is_null (rest))
1194 {
1195 int k;
1196 SCM cs = SCM_CAR (rest);
1197 long *r;
1198
1199 SCM_VALIDATE_SMOB (c, cs, charset);
1200 c++;
1201 r = (long *) SCM_SMOB_DATA (cs);
1202
1203 for (k = 0; k < LONGS_PER_CHARSET; k++)
1204 {
1205 q[k] |= p[k] & r[k];
1206 p[k] &= ~r[k];
1207 }
1208 rest = SCM_CDR (rest);
1209 }
1210 return scm_values (scm_list_2 (res1, res2));
1211 }
1212 #undef FUNC_NAME
1213
1214
1215 SCM_DEFINE (scm_char_set_complement_x, "char-set-complement!", 1, 0, 0,
1216 (SCM cs),
1217 "Return the complement of the character set @var{cs}.")
1218 #define FUNC_NAME s_scm_char_set_complement_x
1219 {
1220 int k;
1221 long * p;
1222
1223 SCM_VALIDATE_SMOB (1, cs, charset);
1224 p = (long *) SCM_SMOB_DATA (cs);
1225 for (k = 0; k < LONGS_PER_CHARSET; k++)
1226 p[k] = ~p[k];
1227 return cs;
1228 }
1229 #undef FUNC_NAME
1230
1231
1232 SCM_DEFINE (scm_char_set_union_x, "char-set-union!", 1, 0, 1,
1233 (SCM cs1, SCM rest),
1234 "Return the union of all argument character sets.")
1235 #define FUNC_NAME s_scm_char_set_union_x
1236 {
1237 int c = 2;
1238 long * p;
1239
1240 SCM_VALIDATE_SMOB (1, cs1, charset);
1241 SCM_VALIDATE_REST_ARGUMENT (rest);
1242
1243 p = (long *) SCM_SMOB_DATA (cs1);
1244 while (!scm_is_null (rest))
1245 {
1246 int k;
1247 SCM cs = SCM_CAR (rest);
1248 SCM_VALIDATE_SMOB (c, cs, charset);
1249 c++;
1250 rest = SCM_CDR (rest);
1251
1252 for (k = 0; k < LONGS_PER_CHARSET; k++)
1253 p[k] |= ((long *) SCM_SMOB_DATA (cs))[k];
1254 }
1255 return cs1;
1256 }
1257 #undef FUNC_NAME
1258
1259
1260 SCM_DEFINE (scm_char_set_intersection_x, "char-set-intersection!", 1, 0, 1,
1261 (SCM cs1, SCM rest),
1262 "Return the intersection of all argument character sets.")
1263 #define FUNC_NAME s_scm_char_set_intersection_x
1264 {
1265 int c = 2;
1266 long * p;
1267
1268 SCM_VALIDATE_SMOB (1, cs1, charset);
1269 SCM_VALIDATE_REST_ARGUMENT (rest);
1270
1271 p = (long *) SCM_SMOB_DATA (cs1);
1272 while (!scm_is_null (rest))
1273 {
1274 int k;
1275 SCM cs = SCM_CAR (rest);
1276 SCM_VALIDATE_SMOB (c, cs, charset);
1277 c++;
1278 rest = SCM_CDR (rest);
1279
1280 for (k = 0; k < LONGS_PER_CHARSET; k++)
1281 p[k] &= ((long *) SCM_SMOB_DATA (cs))[k];
1282 }
1283 return cs1;
1284 }
1285 #undef FUNC_NAME
1286
1287
1288 SCM_DEFINE (scm_char_set_difference_x, "char-set-difference!", 1, 0, 1,
1289 (SCM cs1, SCM rest),
1290 "Return the difference of all argument character sets.")
1291 #define FUNC_NAME s_scm_char_set_difference_x
1292 {
1293 int c = 2;
1294 long * p;
1295
1296 SCM_VALIDATE_SMOB (1, cs1, charset);
1297 SCM_VALIDATE_REST_ARGUMENT (rest);
1298
1299 p = (long *) SCM_SMOB_DATA (cs1);
1300 while (!scm_is_null (rest))
1301 {
1302 int k;
1303 SCM cs = SCM_CAR (rest);
1304 SCM_VALIDATE_SMOB (c, cs, charset);
1305 c++;
1306 rest = SCM_CDR (rest);
1307
1308 for (k = 0; k < LONGS_PER_CHARSET; k++)
1309 p[k] &= ~((long *) SCM_SMOB_DATA (cs))[k];
1310 }
1311 return cs1;
1312 }
1313 #undef FUNC_NAME
1314
1315
1316 SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1,
1317 (SCM cs1, SCM rest),
1318 "Return the exclusive-or of all argument character sets.")
1319 #define FUNC_NAME s_scm_char_set_xor_x
1320 {
1321 /* a side-effecting variant should presumably give consistent results:
1322 (define a (char-set #\a))
1323 (char-set-xor a a a) -> char set #\a
1324 (char-set-xor! a a a) -> char set #\a
1325 */
1326 return scm_char_set_xor (scm_cons (cs1, rest));
1327
1328 #if 0
1329 /* this would give (char-set-xor! a a a) -> empty char set. */
1330 int c = 2;
1331 long * p;
1332
1333 SCM_VALIDATE_SMOB (1, cs1, charset);
1334 SCM_VALIDATE_REST_ARGUMENT (rest);
1335
1336 p = (long *) SCM_SMOB_DATA (cs1);
1337 while (!scm_is_null (rest))
1338 {
1339 int k;
1340 SCM cs = SCM_CAR (rest);
1341 SCM_VALIDATE_SMOB (c, cs, charset);
1342 c++;
1343 rest = SCM_CDR (rest);
1344
1345 for (k = 0; k < LONGS_PER_CHARSET; k++)
1346 p[k] ^= ((long *) SCM_SMOB_DATA (cs))[k];
1347 }
1348 return cs1;
1349 #endif
1350 }
1351 #undef FUNC_NAME
1352
1353
1354 SCM_DEFINE (scm_char_set_diff_plus_intersection_x, "char-set-diff+intersection!", 2, 0, 1,
1355 (SCM cs1, SCM cs2, SCM rest),
1356 "Return the difference and the intersection of all argument\n"
1357 "character sets.")
1358 #define FUNC_NAME s_scm_char_set_diff_plus_intersection_x
1359 {
1360 int c = 3;
1361 long * p, * q;
1362 int k;
1363
1364 SCM_VALIDATE_SMOB (1, cs1, charset);
1365 SCM_VALIDATE_SMOB (2, cs2, charset);
1366 SCM_VALIDATE_REST_ARGUMENT (rest);
1367
1368 p = (long *) SCM_SMOB_DATA (cs1);
1369 q = (long *) SCM_SMOB_DATA (cs2);
1370 if (p == q)
1371 {
1372 /* (char-set-diff+intersection! a a ...): can't share storage,
1373 but we know the answer without checking for further
1374 arguments. */
1375 return scm_values (scm_list_2 (make_char_set (FUNC_NAME), cs1));
1376 }
1377 for (k = 0; k < LONGS_PER_CHARSET; k++)
1378 {
1379 long t = p[k];
1380
1381 p[k] &= ~q[k];
1382 q[k] = t & q[k];
1383 }
1384 while (!scm_is_null (rest))
1385 {
1386 SCM cs = SCM_CAR (rest);
1387 long *r;
1388
1389 SCM_VALIDATE_SMOB (c, cs, charset);
1390 c++;
1391 r = (long *) SCM_SMOB_DATA (cs);
1392
1393 for (k = 0; k < LONGS_PER_CHARSET; k++)
1394 {
1395 q[k] |= p[k] & r[k];
1396 p[k] &= ~r[k];
1397 }
1398 rest = SCM_CDR (rest);
1399 }
1400 return scm_values (scm_list_2 (cs1, cs2));
1401 }
1402 #undef FUNC_NAME
1403
1404 \f
1405 /* Standard character sets. */
1406
1407 SCM scm_char_set_lower_case;
1408 SCM scm_char_set_upper_case;
1409 SCM scm_char_set_title_case;
1410 SCM scm_char_set_letter;
1411 SCM scm_char_set_digit;
1412 SCM scm_char_set_letter_and_digit;
1413 SCM scm_char_set_graphic;
1414 SCM scm_char_set_printing;
1415 SCM scm_char_set_whitespace;
1416 SCM scm_char_set_iso_control;
1417 SCM scm_char_set_punctuation;
1418 SCM scm_char_set_symbol;
1419 SCM scm_char_set_hex_digit;
1420 SCM scm_char_set_blank;
1421 SCM scm_char_set_ascii;
1422 SCM scm_char_set_empty;
1423 SCM scm_char_set_full;
1424
1425
1426 /* Create an empty character set and return it after binding it to NAME. */
1427 static inline SCM
1428 define_charset (const char *name)
1429 {
1430 SCM cs = make_char_set (NULL);
1431 scm_c_define (name, cs);
1432 return scm_permanent_object (cs);
1433 }
1434
1435 /* Membership predicates for the various char sets.
1436
1437 XXX: The `punctuation' and `symbol' char sets have no direct equivalent in
1438 <ctype.h>. Thus, the predicates below yield correct results for ASCII,
1439 but they do not provide the result described by the SRFI for Latin-1. The
1440 correct Latin-1 result could only be obtained by hard-coding the
1441 characters listed by the SRFI, but the problem would remain for other
1442 8-bit charsets.
1443
1444 Similarly, character 0xA0 in Latin-1 (unbreakable space, `#\0240') should
1445 be part of `char-set:blank'. However, glibc's current (2006/09) Latin-1
1446 locales (which use the ISO 14652 "i18n" FDCC-set) do not consider it
1447 `blank' so it ends up in `char-set:punctuation'. */
1448 #ifdef HAVE_ISBLANK
1449 # define CSET_BLANK_PRED(c) (isblank (c))
1450 #else
1451 # define CSET_BLANK_PRED(c) \
1452 (((c) == ' ') || ((c) == '\t'))
1453 #endif
1454
1455 #define CSET_SYMBOL_PRED(c) \
1456 (((c) != '\0') && (strchr ("$+<=>^`|~", (c)) != NULL))
1457 #define CSET_PUNCT_PRED(c) \
1458 ((ispunct (c)) && (!CSET_SYMBOL_PRED (c)))
1459
1460 #define CSET_LOWER_PRED(c) (islower (c))
1461 #define CSET_UPPER_PRED(c) (isupper (c))
1462 #define CSET_LETTER_PRED(c) (isalpha (c))
1463 #define CSET_DIGIT_PRED(c) (isdigit (c))
1464 #define CSET_WHITESPACE_PRED(c) (isspace (c))
1465 #define CSET_CONTROL_PRED(c) (iscntrl (c))
1466 #define CSET_HEX_DIGIT_PRED(c) (isxdigit (c))
1467 #define CSET_ASCII_PRED(c) (isascii (c))
1468
1469 /* Some char sets are explicitly defined by the SRFI as a union of other char
1470 sets so we try to follow this closely. */
1471
1472 #define CSET_LETTER_AND_DIGIT_PRED(c) \
1473 (CSET_LETTER_PRED (c) || CSET_DIGIT_PRED (c))
1474
1475 #define CSET_GRAPHIC_PRED(c) \
1476 (CSET_LETTER_PRED (c) || CSET_DIGIT_PRED (c) \
1477 || CSET_PUNCT_PRED (c) || CSET_SYMBOL_PRED (c))
1478
1479 #define CSET_PRINTING_PRED(c) \
1480 (CSET_GRAPHIC_PRED (c) || CSET_WHITESPACE_PRED (c))
1481
1482 /* False and true predicates. */
1483 #define CSET_TRUE_PRED(c) (1)
1484 #define CSET_FALSE_PRED(c) (0)
1485
1486
1487 /* Compute the contents of all the standard character sets. Computation may
1488 need to be re-done at `setlocale'-time because some char sets (e.g.,
1489 `char-set:letter') need to reflect the character set supported by Guile.
1490
1491 For instance, at startup time, the "C" locale is used, thus Guile supports
1492 only ASCII; therefore, `char-set:letter' only contains English letters.
1493 The user can change this by invoking `setlocale' and specifying a locale
1494 with an 8-bit charset, thereby augmenting some of the SRFI-14 standard
1495 character sets.
1496
1497 This works because some of the predicates used below to construct
1498 character sets (e.g., `isalpha(3)') are locale-dependent (so
1499 charset-dependent, though generally not language-dependent). For details,
1500 please see the `guile-devel' mailing list archive of September 2006. */
1501 void
1502 scm_srfi_14_compute_char_sets (void)
1503 {
1504 #define UPDATE_CSET(c, cset, pred) \
1505 do \
1506 { \
1507 if (pred (c)) \
1508 SCM_CHARSET_SET ((cset), (c)); \
1509 else \
1510 SCM_CHARSET_UNSET ((cset), (c)); \
1511 } \
1512 while (0)
1513
1514 register int ch;
1515
1516 for (ch = 0; ch < 256; ch++)
1517 {
1518 UPDATE_CSET (ch, scm_char_set_upper_case, CSET_UPPER_PRED);
1519 UPDATE_CSET (ch, scm_char_set_lower_case, CSET_LOWER_PRED);
1520 UPDATE_CSET (ch, scm_char_set_title_case, CSET_FALSE_PRED);
1521 UPDATE_CSET (ch, scm_char_set_letter, CSET_LETTER_PRED);
1522 UPDATE_CSET (ch, scm_char_set_digit, CSET_DIGIT_PRED);
1523 UPDATE_CSET (ch, scm_char_set_letter_and_digit,
1524 CSET_LETTER_AND_DIGIT_PRED);
1525 UPDATE_CSET (ch, scm_char_set_graphic, CSET_GRAPHIC_PRED);
1526 UPDATE_CSET (ch, scm_char_set_printing, CSET_PRINTING_PRED);
1527 UPDATE_CSET (ch, scm_char_set_whitespace, CSET_WHITESPACE_PRED);
1528 UPDATE_CSET (ch, scm_char_set_iso_control, CSET_CONTROL_PRED);
1529 UPDATE_CSET (ch, scm_char_set_punctuation, CSET_PUNCT_PRED);
1530 UPDATE_CSET (ch, scm_char_set_symbol, CSET_SYMBOL_PRED);
1531 UPDATE_CSET (ch, scm_char_set_hex_digit, CSET_HEX_DIGIT_PRED);
1532 UPDATE_CSET (ch, scm_char_set_blank, CSET_BLANK_PRED);
1533 UPDATE_CSET (ch, scm_char_set_ascii, CSET_ASCII_PRED);
1534 UPDATE_CSET (ch, scm_char_set_empty, CSET_FALSE_PRED);
1535 UPDATE_CSET (ch, scm_char_set_full, CSET_TRUE_PRED);
1536 }
1537
1538 #undef UPDATE_CSET
1539 }
1540
1541 \f
1542 void
1543 scm_init_srfi_14 (void)
1544 {
1545 scm_tc16_charset = scm_make_smob_type ("character-set",
1546 BYTES_PER_CHARSET);
1547 scm_set_smob_free (scm_tc16_charset, charset_free);
1548 scm_set_smob_print (scm_tc16_charset, charset_print);
1549
1550 scm_char_set_upper_case = define_charset ("char-set:upper-case");
1551 scm_char_set_lower_case = define_charset ("char-set:lower-case");
1552 scm_char_set_title_case = define_charset ("char-set:title-case");
1553 scm_char_set_letter = define_charset ("char-set:letter");
1554 scm_char_set_digit = define_charset ("char-set:digit");
1555 scm_char_set_letter_and_digit = define_charset ("char-set:letter+digit");
1556 scm_char_set_graphic = define_charset ("char-set:graphic");
1557 scm_char_set_printing = define_charset ("char-set:printing");
1558 scm_char_set_whitespace = define_charset ("char-set:whitespace");
1559 scm_char_set_iso_control = define_charset ("char-set:iso-control");
1560 scm_char_set_punctuation = define_charset ("char-set:punctuation");
1561 scm_char_set_symbol = define_charset ("char-set:symbol");
1562 scm_char_set_hex_digit = define_charset ("char-set:hex-digit");
1563 scm_char_set_blank = define_charset ("char-set:blank");
1564 scm_char_set_ascii = define_charset ("char-set:ascii");
1565 scm_char_set_empty = define_charset ("char-set:empty");
1566 scm_char_set_full = define_charset ("char-set:full");
1567
1568 scm_srfi_14_compute_char_sets ();
1569
1570 #include "libguile/srfi-14.x"
1571 }
1572
1573 /* End of srfi-14.c. */