Commit | Line | Data |
---|---|---|
63181a97 MV |
1 | /* srfi-14.c --- SRFI-14 procedures for Guile |
2 | * | |
2b829bbb | 3 | * Copyright (C) 2001, 2004, 2006 Free Software Foundation, Inc. |
63181a97 MV |
4 | * |
5 | * This library is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU Lesser General Public | |
7 | * License as published by the Free Software Foundation; either | |
8 | * version 2.1 of the License, or (at your option) any later version. | |
9 | * | |
10 | * This library is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * Lesser General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU Lesser General Public | |
16 | * License along with this library; if not, write to the Free Software | |
92205699 | 17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
63181a97 MV |
18 | */ |
19 | ||
20 | ||
21 | #include <string.h> | |
22 | #include <ctype.h> | |
23 | ||
24 | #include "libguile.h" | |
25 | #include "libguile/srfi-14.h" | |
26 | ||
27 | ||
28 | #define SCM_CHARSET_SET(cs, idx) \ | |
29 | (((long *) SCM_SMOB_DATA (cs))[(idx) / SCM_BITS_PER_LONG] |= \ | |
30 | (1L << ((idx) % SCM_BITS_PER_LONG))) | |
31 | ||
32 | #define BYTES_PER_CHARSET (SCM_CHARSET_SIZE / 8) | |
33 | #define LONGS_PER_CHARSET (SCM_CHARSET_SIZE / SCM_BITS_PER_LONG) | |
34 | ||
35 | ||
36 | /* Smob type code for character sets. */ | |
37 | int scm_tc16_charset = 0; | |
38 | ||
39 | ||
40 | /* Smob print hook for character sets. */ | |
41 | static int | |
42 | charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED) | |
43 | { | |
44 | int i; | |
45 | int first = 1; | |
46 | ||
47 | scm_puts ("#<charset {", port); | |
48 | for (i = 0; i < SCM_CHARSET_SIZE; i++) | |
49 | if (SCM_CHARSET_GET (charset, i)) | |
50 | { | |
51 | if (first) | |
52 | first = 0; | |
53 | else | |
54 | scm_puts (" ", port); | |
55 | scm_write (SCM_MAKE_CHAR (i), port); | |
56 | } | |
57 | scm_puts ("}>", port); | |
58 | return 1; | |
59 | } | |
60 | ||
61 | ||
62 | /* Smob free hook for character sets. */ | |
63 | static size_t | |
64 | charset_free (SCM charset) | |
65 | { | |
66 | return scm_smob_free (charset); | |
67 | } | |
68 | ||
69 | ||
70 | /* Create a new, empty character set. */ | |
71 | static SCM | |
72 | make_char_set (const char * func_name) | |
73 | { | |
74 | long * p; | |
75 | ||
76 | p = scm_gc_malloc (BYTES_PER_CHARSET, "character-set"); | |
77 | memset (p, 0, BYTES_PER_CHARSET); | |
78 | SCM_RETURN_NEWSMOB (scm_tc16_charset, p); | |
79 | } | |
80 | ||
81 | ||
82 | SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0, | |
83 | (SCM obj), | |
84 | "Return @code{#t} if @var{obj} is a character set, @code{#f}\n" | |
85 | "otherwise.") | |
86 | #define FUNC_NAME s_scm_char_set_p | |
87 | { | |
88 | return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj)); | |
89 | } | |
90 | #undef FUNC_NAME | |
91 | ||
92 | ||
93 | SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1, | |
94 | (SCM char_sets), | |
95 | "Return @code{#t} if all given character sets are equal.") | |
96 | #define FUNC_NAME s_scm_char_set_eq | |
97 | { | |
98 | int argnum = 1; | |
99 | long *cs1_data = NULL; | |
100 | ||
101 | SCM_VALIDATE_REST_ARGUMENT (char_sets); | |
102 | ||
d2e53ed6 | 103 | while (!scm_is_null (char_sets)) |
63181a97 MV |
104 | { |
105 | SCM csi = SCM_CAR (char_sets); | |
106 | long *csi_data; | |
107 | ||
108 | SCM_VALIDATE_SMOB (argnum, csi, charset); | |
109 | argnum++; | |
110 | csi_data = (long *) SCM_SMOB_DATA (csi); | |
111 | if (cs1_data == NULL) | |
112 | cs1_data = csi_data; | |
113 | else if (memcmp (cs1_data, csi_data, BYTES_PER_CHARSET) != 0) | |
114 | return SCM_BOOL_F; | |
115 | char_sets = SCM_CDR (char_sets); | |
116 | } | |
117 | return SCM_BOOL_T; | |
118 | } | |
119 | #undef FUNC_NAME | |
120 | ||
121 | ||
122 | SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1, | |
123 | (SCM char_sets), | |
124 | "Return @code{#t} if every character set @var{cs}i is a subset\n" | |
125 | "of character set @var{cs}i+1.") | |
126 | #define FUNC_NAME s_scm_char_set_leq | |
127 | { | |
128 | int argnum = 1; | |
129 | long *prev_data = NULL; | |
130 | ||
131 | SCM_VALIDATE_REST_ARGUMENT (char_sets); | |
132 | ||
d2e53ed6 | 133 | while (!scm_is_null (char_sets)) |
63181a97 MV |
134 | { |
135 | SCM csi = SCM_CAR (char_sets); | |
136 | long *csi_data; | |
137 | ||
138 | SCM_VALIDATE_SMOB (argnum, csi, charset); | |
139 | argnum++; | |
140 | csi_data = (long *) SCM_SMOB_DATA (csi); | |
141 | if (prev_data) | |
142 | { | |
143 | int k; | |
144 | ||
145 | for (k = 0; k < LONGS_PER_CHARSET; k++) | |
146 | { | |
147 | if ((prev_data[k] & csi_data[k]) != prev_data[k]) | |
148 | return SCM_BOOL_F; | |
149 | } | |
150 | } | |
151 | prev_data = csi_data; | |
152 | char_sets = SCM_CDR (char_sets); | |
153 | } | |
154 | return SCM_BOOL_T; | |
155 | } | |
156 | #undef FUNC_NAME | |
157 | ||
158 | ||
159 | SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0, | |
160 | (SCM cs, SCM bound), | |
161 | "Compute a hash value for the character set @var{cs}. If\n" | |
162 | "@var{bound} is given and non-zero, it restricts the\n" | |
163 | "returned value to the range 0 @dots{} @var{bound - 1}.") | |
164 | #define FUNC_NAME s_scm_char_set_hash | |
165 | { | |
166 | const unsigned long default_bnd = 871; | |
167 | unsigned long bnd; | |
168 | long * p; | |
169 | unsigned long val = 0; | |
170 | int k; | |
171 | ||
172 | SCM_VALIDATE_SMOB (1, cs, charset); | |
173 | ||
174 | if (SCM_UNBNDP (bound)) | |
175 | bnd = default_bnd; | |
176 | else | |
177 | { | |
178 | bnd = scm_to_ulong (bound); | |
179 | if (bnd == 0) | |
180 | bnd = default_bnd; | |
181 | } | |
182 | ||
183 | p = (long *) SCM_SMOB_DATA (cs); | |
184 | for (k = 0; k < LONGS_PER_CHARSET; k++) | |
185 | { | |
186 | if (p[k] != 0) | |
187 | val = p[k] + (val << 1); | |
188 | } | |
189 | return scm_from_ulong (val % bnd); | |
190 | } | |
191 | #undef FUNC_NAME | |
192 | ||
193 | ||
194 | SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0, | |
195 | (SCM cs), | |
196 | "Return a cursor into the character set @var{cs}.") | |
197 | #define FUNC_NAME s_scm_char_set_cursor | |
198 | { | |
199 | int idx; | |
200 | ||
201 | SCM_VALIDATE_SMOB (1, cs, charset); | |
202 | for (idx = 0; idx < SCM_CHARSET_SIZE; idx++) | |
203 | { | |
204 | if (SCM_CHARSET_GET (cs, idx)) | |
205 | break; | |
206 | } | |
207 | return SCM_I_MAKINUM (idx); | |
208 | } | |
209 | #undef FUNC_NAME | |
210 | ||
211 | ||
212 | SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0, | |
213 | (SCM cs, SCM cursor), | |
214 | "Return the character at the current cursor position\n" | |
215 | "@var{cursor} in the character set @var{cs}. It is an error to\n" | |
216 | "pass a cursor for which @code{end-of-char-set?} returns true.") | |
217 | #define FUNC_NAME s_scm_char_set_ref | |
218 | { | |
219 | size_t ccursor = scm_to_size_t (cursor); | |
220 | SCM_VALIDATE_SMOB (1, cs, charset); | |
221 | ||
222 | if (ccursor >= SCM_CHARSET_SIZE || !SCM_CHARSET_GET (cs, ccursor)) | |
223 | SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor)); | |
224 | return SCM_MAKE_CHAR (ccursor); | |
225 | } | |
226 | #undef FUNC_NAME | |
227 | ||
228 | ||
229 | SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0, | |
230 | (SCM cs, SCM cursor), | |
231 | "Advance the character set cursor @var{cursor} to the next\n" | |
232 | "character in the character set @var{cs}. It is an error if the\n" | |
233 | "cursor given satisfies @code{end-of-char-set?}.") | |
234 | #define FUNC_NAME s_scm_char_set_cursor_next | |
235 | { | |
236 | size_t ccursor = scm_to_size_t (cursor); | |
237 | SCM_VALIDATE_SMOB (1, cs, charset); | |
238 | ||
239 | if (ccursor >= SCM_CHARSET_SIZE || !SCM_CHARSET_GET (cs, ccursor)) | |
240 | SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor)); | |
241 | for (ccursor++; ccursor < SCM_CHARSET_SIZE; ccursor++) | |
242 | { | |
243 | if (SCM_CHARSET_GET (cs, ccursor)) | |
244 | break; | |
245 | } | |
246 | return SCM_I_MAKINUM (ccursor); | |
247 | } | |
248 | #undef FUNC_NAME | |
249 | ||
250 | ||
251 | SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0, | |
252 | (SCM cursor), | |
253 | "Return @code{#t} if @var{cursor} has reached the end of a\n" | |
254 | "character set, @code{#f} otherwise.") | |
255 | #define FUNC_NAME s_scm_end_of_char_set_p | |
256 | { | |
257 | size_t ccursor = scm_to_size_t (cursor); | |
258 | return scm_from_bool (ccursor >= SCM_CHARSET_SIZE); | |
259 | } | |
260 | #undef FUNC_NAME | |
261 | ||
262 | ||
263 | SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0, | |
264 | (SCM kons, SCM knil, SCM cs), | |
265 | "Fold the procedure @var{kons} over the character set @var{cs},\n" | |
266 | "initializing it with @var{knil}.") | |
267 | #define FUNC_NAME s_scm_char_set_fold | |
268 | { | |
269 | int k; | |
270 | ||
271 | SCM_VALIDATE_PROC (1, kons); | |
272 | SCM_VALIDATE_SMOB (3, cs, charset); | |
273 | ||
274 | for (k = 0; k < SCM_CHARSET_SIZE; k++) | |
275 | if (SCM_CHARSET_GET (cs, k)) | |
276 | { | |
277 | knil = scm_call_2 (kons, SCM_MAKE_CHAR (k), knil); | |
278 | } | |
279 | return knil; | |
280 | } | |
281 | #undef FUNC_NAME | |
282 | ||
283 | ||
284 | SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0, | |
285 | (SCM p, SCM f, SCM g, SCM seed, SCM base_cs), | |
286 | "This is a fundamental constructor for character sets.\n" | |
287 | "@itemize @bullet\n" | |
288 | "@item @var{g} is used to generate a series of ``seed'' values\n" | |
289 | "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n" | |
290 | "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n" | |
291 | "@item @var{p} tells us when to stop -- when it returns true\n" | |
292 | "when applied to one of the seed values.\n" | |
293 | "@item @var{f} maps each seed value to a character. These\n" | |
294 | "characters are added to the base character set @var{base_cs} to\n" | |
295 | "form the result; @var{base_cs} defaults to the empty set.\n" | |
296 | "@end itemize") | |
297 | #define FUNC_NAME s_scm_char_set_unfold | |
298 | { | |
299 | SCM result, tmp; | |
300 | ||
301 | SCM_VALIDATE_PROC (1, p); | |
302 | SCM_VALIDATE_PROC (2, f); | |
303 | SCM_VALIDATE_PROC (3, g); | |
304 | if (!SCM_UNBNDP (base_cs)) | |
305 | { | |
306 | SCM_VALIDATE_SMOB (5, base_cs, charset); | |
307 | result = scm_char_set_copy (base_cs); | |
308 | } | |
309 | else | |
310 | result = make_char_set (FUNC_NAME); | |
311 | ||
312 | tmp = scm_call_1 (p, seed); | |
313 | while (scm_is_false (tmp)) | |
314 | { | |
315 | SCM ch = scm_call_1 (f, seed); | |
316 | if (!SCM_CHARP (ch)) | |
317 | SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f)); | |
318 | SCM_CHARSET_SET (result, SCM_CHAR (ch)); | |
319 | ||
320 | seed = scm_call_1 (g, seed); | |
321 | tmp = scm_call_1 (p, seed); | |
322 | } | |
323 | return result; | |
324 | } | |
325 | #undef FUNC_NAME | |
326 | ||
327 | ||
328 | SCM_DEFINE (scm_char_set_unfold_x, "char-set-unfold!", 5, 0, 0, | |
329 | (SCM p, SCM f, SCM g, SCM seed, SCM base_cs), | |
330 | "This is a fundamental constructor for character sets.\n" | |
331 | "@itemize @bullet\n" | |
332 | "@item @var{g} is used to generate a series of ``seed'' values\n" | |
333 | "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n" | |
334 | "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n" | |
335 | "@item @var{p} tells us when to stop -- when it returns true\n" | |
336 | "when applied to one of the seed values.\n" | |
337 | "@item @var{f} maps each seed value to a character. These\n" | |
338 | "characters are added to the base character set @var{base_cs} to\n" | |
339 | "form the result; @var{base_cs} defaults to the empty set.\n" | |
340 | "@end itemize") | |
341 | #define FUNC_NAME s_scm_char_set_unfold_x | |
342 | { | |
343 | SCM tmp; | |
344 | ||
345 | SCM_VALIDATE_PROC (1, p); | |
346 | SCM_VALIDATE_PROC (2, f); | |
347 | SCM_VALIDATE_PROC (3, g); | |
348 | SCM_VALIDATE_SMOB (5, base_cs, charset); | |
349 | ||
350 | tmp = scm_call_1 (p, seed); | |
351 | while (scm_is_false (tmp)) | |
352 | { | |
353 | SCM ch = scm_call_1 (f, seed); | |
354 | if (!SCM_CHARP (ch)) | |
355 | SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f)); | |
356 | SCM_CHARSET_SET (base_cs, SCM_CHAR (ch)); | |
357 | ||
358 | seed = scm_call_1 (g, seed); | |
359 | tmp = scm_call_1 (p, seed); | |
360 | } | |
361 | return base_cs; | |
362 | } | |
363 | #undef FUNC_NAME | |
364 | ||
365 | ||
366 | SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0, | |
367 | (SCM proc, SCM cs), | |
368 | "Apply @var{proc} to every character in the character set\n" | |
369 | "@var{cs}. The return value is not specified.") | |
370 | #define FUNC_NAME s_scm_char_set_for_each | |
371 | { | |
372 | int k; | |
373 | ||
374 | SCM_VALIDATE_PROC (1, proc); | |
375 | SCM_VALIDATE_SMOB (2, cs, charset); | |
376 | ||
377 | for (k = 0; k < SCM_CHARSET_SIZE; k++) | |
378 | if (SCM_CHARSET_GET (cs, k)) | |
379 | scm_call_1 (proc, SCM_MAKE_CHAR (k)); | |
380 | return SCM_UNSPECIFIED; | |
381 | } | |
382 | #undef FUNC_NAME | |
383 | ||
384 | ||
385 | SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0, | |
386 | (SCM proc, SCM cs), | |
387 | "Map the procedure @var{proc} over every character in @var{cs}.\n" | |
388 | "@var{proc} must be a character -> character procedure.") | |
389 | #define FUNC_NAME s_scm_char_set_map | |
390 | { | |
391 | SCM result; | |
392 | int k; | |
393 | ||
394 | SCM_VALIDATE_PROC (1, proc); | |
395 | SCM_VALIDATE_SMOB (2, cs, charset); | |
396 | ||
397 | result = make_char_set (FUNC_NAME); | |
398 | for (k = 0; k < SCM_CHARSET_SIZE; k++) | |
399 | if (SCM_CHARSET_GET (cs, k)) | |
400 | { | |
401 | SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (k)); | |
402 | if (!SCM_CHARP (ch)) | |
403 | SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (proc)); | |
404 | SCM_CHARSET_SET (result, SCM_CHAR (ch)); | |
405 | } | |
406 | return result; | |
407 | } | |
408 | #undef FUNC_NAME | |
409 | ||
410 | ||
411 | SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0, | |
412 | (SCM cs), | |
413 | "Return a newly allocated character set containing all\n" | |
414 | "characters in @var{cs}.") | |
415 | #define FUNC_NAME s_scm_char_set_copy | |
416 | { | |
417 | SCM ret; | |
418 | long * p1, * p2; | |
419 | int k; | |
420 | ||
421 | SCM_VALIDATE_SMOB (1, cs, charset); | |
422 | ret = make_char_set (FUNC_NAME); | |
423 | p1 = (long *) SCM_SMOB_DATA (cs); | |
424 | p2 = (long *) SCM_SMOB_DATA (ret); | |
425 | for (k = 0; k < LONGS_PER_CHARSET; k++) | |
426 | p2[k] = p1[k]; | |
427 | return ret; | |
428 | } | |
429 | #undef FUNC_NAME | |
430 | ||
431 | ||
432 | SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1, | |
433 | (SCM rest), | |
434 | "Return a character set containing all given characters.") | |
435 | #define FUNC_NAME s_scm_char_set | |
436 | { | |
437 | SCM cs; | |
438 | long * p; | |
439 | int argnum = 1; | |
440 | ||
441 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
442 | cs = make_char_set (FUNC_NAME); | |
443 | p = (long *) SCM_SMOB_DATA (cs); | |
d2e53ed6 | 444 | while (!scm_is_null (rest)) |
63181a97 MV |
445 | { |
446 | int c; | |
447 | ||
448 | SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c); | |
449 | argnum++; | |
450 | rest = SCM_CDR (rest); | |
451 | p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG); | |
452 | } | |
453 | return cs; | |
454 | } | |
455 | #undef FUNC_NAME | |
456 | ||
457 | ||
458 | SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0, | |
459 | (SCM list, SCM base_cs), | |
460 | "Convert the character list @var{list} to a character set. If\n" | |
461 | "the character set @var{base_cs} is given, the character in this\n" | |
462 | "set are also included in the result.") | |
463 | #define FUNC_NAME s_scm_list_to_char_set | |
464 | { | |
465 | SCM cs; | |
466 | long * p; | |
467 | ||
468 | SCM_VALIDATE_LIST (1, list); | |
469 | if (SCM_UNBNDP (base_cs)) | |
470 | cs = make_char_set (FUNC_NAME); | |
471 | else | |
472 | { | |
473 | SCM_VALIDATE_SMOB (2, base_cs, charset); | |
474 | cs = scm_char_set_copy (base_cs); | |
475 | } | |
476 | p = (long *) SCM_SMOB_DATA (cs); | |
d2e53ed6 | 477 | while (!scm_is_null (list)) |
63181a97 MV |
478 | { |
479 | SCM chr = SCM_CAR (list); | |
480 | int c; | |
481 | ||
482 | SCM_VALIDATE_CHAR_COPY (0, chr, c); | |
483 | list = SCM_CDR (list); | |
484 | ||
485 | p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG); | |
486 | } | |
487 | return cs; | |
488 | } | |
489 | #undef FUNC_NAME | |
490 | ||
491 | ||
492 | SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0, | |
493 | (SCM list, SCM base_cs), | |
494 | "Convert the character list @var{list} to a character set. The\n" | |
495 | "characters are added to @var{base_cs} and @var{base_cs} is\n" | |
496 | "returned.") | |
497 | #define FUNC_NAME s_scm_list_to_char_set_x | |
498 | { | |
499 | long * p; | |
500 | ||
501 | SCM_VALIDATE_LIST (1, list); | |
502 | SCM_VALIDATE_SMOB (2, base_cs, charset); | |
503 | p = (long *) SCM_SMOB_DATA (base_cs); | |
d2e53ed6 | 504 | while (!scm_is_null (list)) |
63181a97 MV |
505 | { |
506 | SCM chr = SCM_CAR (list); | |
507 | int c; | |
508 | ||
509 | SCM_VALIDATE_CHAR_COPY (0, chr, c); | |
510 | list = SCM_CDR (list); | |
511 | ||
512 | p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG); | |
513 | } | |
514 | return base_cs; | |
515 | } | |
516 | #undef FUNC_NAME | |
517 | ||
518 | ||
519 | SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0, | |
520 | (SCM str, SCM base_cs), | |
521 | "Convert the string @var{str} to a character set. If the\n" | |
522 | "character set @var{base_cs} is given, the characters in this\n" | |
523 | "set are also included in the result.") | |
524 | #define FUNC_NAME s_scm_string_to_char_set | |
525 | { | |
526 | SCM cs; | |
527 | long * p; | |
528 | const char * s; | |
529 | size_t k = 0, len; | |
530 | ||
531 | SCM_VALIDATE_STRING (1, str); | |
532 | if (SCM_UNBNDP (base_cs)) | |
533 | cs = make_char_set (FUNC_NAME); | |
534 | else | |
535 | { | |
536 | SCM_VALIDATE_SMOB (2, base_cs, charset); | |
537 | cs = scm_char_set_copy (base_cs); | |
538 | } | |
539 | p = (long *) SCM_SMOB_DATA (cs); | |
540 | s = scm_i_string_chars (str); | |
541 | len = scm_i_string_length (str); | |
542 | while (k < len) | |
543 | { | |
544 | int c = s[k++]; | |
545 | p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG); | |
546 | } | |
547 | scm_remember_upto_here_1 (str); | |
548 | return cs; | |
549 | } | |
550 | #undef FUNC_NAME | |
551 | ||
552 | ||
553 | SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0, | |
554 | (SCM str, SCM base_cs), | |
555 | "Convert the string @var{str} to a character set. The\n" | |
556 | "characters from the string are added to @var{base_cs}, and\n" | |
557 | "@var{base_cs} is returned.") | |
558 | #define FUNC_NAME s_scm_string_to_char_set_x | |
559 | { | |
560 | long * p; | |
561 | const char * s; | |
562 | size_t k = 0, len; | |
563 | ||
564 | SCM_VALIDATE_STRING (1, str); | |
565 | SCM_VALIDATE_SMOB (2, base_cs, charset); | |
566 | p = (long *) SCM_SMOB_DATA (base_cs); | |
567 | s = scm_i_string_chars (str); | |
568 | len = scm_i_string_length (str); | |
569 | while (k < len) | |
570 | { | |
571 | int c = s[k++]; | |
572 | p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG); | |
573 | } | |
574 | scm_remember_upto_here_1 (str); | |
575 | return base_cs; | |
576 | } | |
577 | #undef FUNC_NAME | |
578 | ||
579 | ||
580 | SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0, | |
581 | (SCM pred, SCM cs, SCM base_cs), | |
582 | "Return a character set containing every character from @var{cs}\n" | |
583 | "so that it satisfies @var{pred}. If provided, the characters\n" | |
584 | "from @var{base_cs} are added to the result.") | |
585 | #define FUNC_NAME s_scm_char_set_filter | |
586 | { | |
587 | SCM ret; | |
588 | int k; | |
589 | long * p; | |
590 | ||
591 | SCM_VALIDATE_PROC (1, pred); | |
592 | SCM_VALIDATE_SMOB (2, cs, charset); | |
593 | if (!SCM_UNBNDP (base_cs)) | |
594 | { | |
595 | SCM_VALIDATE_SMOB (3, base_cs, charset); | |
596 | ret = scm_char_set_copy (base_cs); | |
597 | } | |
598 | else | |
599 | ret = make_char_set (FUNC_NAME); | |
600 | p = (long *) SCM_SMOB_DATA (ret); | |
601 | for (k = 0; k < SCM_CHARSET_SIZE; k++) | |
602 | { | |
603 | if (SCM_CHARSET_GET (cs, k)) | |
604 | { | |
605 | SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k)); | |
606 | ||
607 | if (scm_is_true (res)) | |
608 | p[k / SCM_BITS_PER_LONG] |= 1L << (k % SCM_BITS_PER_LONG); | |
609 | } | |
610 | } | |
611 | return ret; | |
612 | } | |
613 | #undef FUNC_NAME | |
614 | ||
615 | ||
616 | SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0, | |
617 | (SCM pred, SCM cs, SCM base_cs), | |
618 | "Return a character set containing every character from @var{cs}\n" | |
619 | "so that it satisfies @var{pred}. The characters are added to\n" | |
620 | "@var{base_cs} and @var{base_cs} is returned.") | |
621 | #define FUNC_NAME s_scm_char_set_filter_x | |
622 | { | |
623 | int k; | |
624 | long * p; | |
625 | ||
626 | SCM_VALIDATE_PROC (1, pred); | |
627 | SCM_VALIDATE_SMOB (2, cs, charset); | |
628 | SCM_VALIDATE_SMOB (3, base_cs, charset); | |
629 | p = (long *) SCM_SMOB_DATA (base_cs); | |
630 | for (k = 0; k < SCM_CHARSET_SIZE; k++) | |
631 | { | |
632 | if (SCM_CHARSET_GET (cs, k)) | |
633 | { | |
634 | SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k)); | |
635 | ||
636 | if (scm_is_true (res)) | |
637 | p[k / SCM_BITS_PER_LONG] |= 1L << (k % SCM_BITS_PER_LONG); | |
638 | } | |
639 | } | |
640 | return base_cs; | |
641 | } | |
642 | #undef FUNC_NAME | |
643 | ||
644 | ||
645 | SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0, | |
646 | (SCM lower, SCM upper, SCM error, SCM base_cs), | |
647 | "Return a character set containing all characters whose\n" | |
648 | "character codes lie in the half-open range\n" | |
649 | "[@var{lower},@var{upper}).\n" | |
650 | "\n" | |
651 | "If @var{error} is a true value, an error is signalled if the\n" | |
652 | "specified range contains characters which are not contained in\n" | |
653 | "the implemented character range. If @var{error} is @code{#f},\n" | |
654 | "these characters are silently left out of the resultung\n" | |
655 | "character set.\n" | |
656 | "\n" | |
657 | "The characters in @var{base_cs} are added to the result, if\n" | |
658 | "given.") | |
659 | #define FUNC_NAME s_scm_ucs_range_to_char_set | |
660 | { | |
661 | SCM cs; | |
662 | size_t clower, cupper; | |
663 | long * p; | |
664 | ||
665 | clower = scm_to_size_t (lower); | |
666 | cupper = scm_to_size_t (upper); | |
667 | SCM_ASSERT_RANGE (2, upper, cupper >= clower); | |
668 | if (!SCM_UNBNDP (error)) | |
669 | { | |
670 | if (scm_is_true (error)) | |
671 | { | |
672 | SCM_ASSERT_RANGE (1, lower, clower <= SCM_CHARSET_SIZE); | |
673 | SCM_ASSERT_RANGE (2, upper, cupper <= SCM_CHARSET_SIZE); | |
674 | } | |
675 | } | |
676 | if (clower > SCM_CHARSET_SIZE) | |
677 | clower = SCM_CHARSET_SIZE; | |
678 | if (cupper > SCM_CHARSET_SIZE) | |
679 | cupper = SCM_CHARSET_SIZE; | |
680 | if (SCM_UNBNDP (base_cs)) | |
681 | cs = make_char_set (FUNC_NAME); | |
682 | else | |
683 | { | |
684 | SCM_VALIDATE_SMOB (4, base_cs, charset); | |
685 | cs = scm_char_set_copy (base_cs); | |
686 | } | |
687 | p = (long *) SCM_SMOB_DATA (cs); | |
688 | while (clower < cupper) | |
689 | { | |
690 | p[clower / SCM_BITS_PER_LONG] |= 1L << (clower % SCM_BITS_PER_LONG); | |
691 | clower++; | |
692 | } | |
693 | return cs; | |
694 | } | |
695 | #undef FUNC_NAME | |
696 | ||
697 | ||
698 | SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0, | |
699 | (SCM lower, SCM upper, SCM error, SCM base_cs), | |
700 | "Return a character set containing all characters whose\n" | |
701 | "character codes lie in the half-open range\n" | |
702 | "[@var{lower},@var{upper}).\n" | |
703 | "\n" | |
704 | "If @var{error} is a true value, an error is signalled if the\n" | |
705 | "specified range contains characters which are not contained in\n" | |
706 | "the implemented character range. If @var{error} is @code{#f},\n" | |
707 | "these characters are silently left out of the resultung\n" | |
708 | "character set.\n" | |
709 | "\n" | |
710 | "The characters are added to @var{base_cs} and @var{base_cs} is\n" | |
711 | "returned.") | |
712 | #define FUNC_NAME s_scm_ucs_range_to_char_set_x | |
713 | { | |
714 | size_t clower, cupper; | |
715 | long * p; | |
716 | ||
717 | clower = scm_to_size_t (lower); | |
718 | cupper = scm_to_size_t (upper); | |
719 | SCM_ASSERT_RANGE (2, upper, cupper >= clower); | |
720 | if (scm_is_true (error)) | |
721 | { | |
722 | SCM_ASSERT_RANGE (1, lower, clower <= SCM_CHARSET_SIZE); | |
723 | SCM_ASSERT_RANGE (2, upper, cupper <= SCM_CHARSET_SIZE); | |
724 | } | |
725 | if (clower > SCM_CHARSET_SIZE) | |
726 | clower = SCM_CHARSET_SIZE; | |
727 | if (cupper > SCM_CHARSET_SIZE) | |
728 | cupper = SCM_CHARSET_SIZE; | |
729 | p = (long *) SCM_SMOB_DATA (base_cs); | |
730 | while (clower < cupper) | |
731 | { | |
732 | p[clower / SCM_BITS_PER_LONG] |= 1L << (clower % SCM_BITS_PER_LONG); | |
733 | clower++; | |
734 | } | |
735 | return base_cs; | |
736 | } | |
737 | #undef FUNC_NAME | |
738 | ||
739 | SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0, | |
740 | (SCM x), | |
741 | "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.") | |
742 | #define FUNC_NAME s_scm_to_char_set | |
743 | { | |
744 | if (scm_is_string (x)) | |
745 | return scm_string_to_char_set (x, SCM_UNDEFINED); | |
746 | else if (SCM_CHARP (x)) | |
747 | return scm_char_set (scm_list_1 (x)); | |
748 | else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x)) | |
749 | return x; | |
750 | else | |
751 | scm_wrong_type_arg (NULL, 0, x); | |
752 | } | |
753 | #undef FUNC_NAME | |
754 | ||
755 | SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0, | |
756 | (SCM cs), | |
757 | "Return the number of elements in character set @var{cs}.") | |
758 | #define FUNC_NAME s_scm_char_set_size | |
759 | { | |
760 | int k, count = 0; | |
761 | ||
762 | SCM_VALIDATE_SMOB (1, cs, charset); | |
763 | for (k = 0; k < SCM_CHARSET_SIZE; k++) | |
764 | if (SCM_CHARSET_GET (cs, k)) | |
765 | count++; | |
766 | return SCM_I_MAKINUM (count); | |
767 | } | |
768 | #undef FUNC_NAME | |
769 | ||
770 | ||
771 | SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0, | |
772 | (SCM pred, SCM cs), | |
773 | "Return the number of the elements int the character set\n" | |
774 | "@var{cs} which satisfy the predicate @var{pred}.") | |
775 | #define FUNC_NAME s_scm_char_set_count | |
776 | { | |
777 | int k, count = 0; | |
778 | ||
779 | SCM_VALIDATE_PROC (1, pred); | |
780 | SCM_VALIDATE_SMOB (2, cs, charset); | |
781 | ||
782 | for (k = 0; k < SCM_CHARSET_SIZE; k++) | |
783 | if (SCM_CHARSET_GET (cs, k)) | |
784 | { | |
785 | SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k)); | |
786 | if (scm_is_true (res)) | |
787 | count++; | |
788 | } | |
789 | return SCM_I_MAKINUM (count); | |
790 | } | |
791 | #undef FUNC_NAME | |
792 | ||
793 | ||
794 | SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0, | |
795 | (SCM cs), | |
796 | "Return a list containing the elements of the character set\n" | |
797 | "@var{cs}.") | |
798 | #define FUNC_NAME s_scm_char_set_to_list | |
799 | { | |
800 | int k; | |
801 | SCM result = SCM_EOL; | |
802 | ||
803 | SCM_VALIDATE_SMOB (1, cs, charset); | |
804 | for (k = SCM_CHARSET_SIZE; k > 0; k--) | |
805 | if (SCM_CHARSET_GET (cs, k - 1)) | |
806 | result = scm_cons (SCM_MAKE_CHAR (k - 1), result); | |
807 | return result; | |
808 | } | |
809 | #undef FUNC_NAME | |
810 | ||
811 | ||
812 | SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0, | |
813 | (SCM cs), | |
814 | "Return a string containing the elements of the character set\n" | |
815 | "@var{cs}. The order in which the characters are placed in the\n" | |
816 | "string is not defined.") | |
817 | #define FUNC_NAME s_scm_char_set_to_string | |
818 | { | |
819 | int k; | |
820 | int count = 0; | |
821 | int idx = 0; | |
822 | SCM result; | |
823 | char * p; | |
824 | ||
825 | SCM_VALIDATE_SMOB (1, cs, charset); | |
826 | for (k = 0; k < SCM_CHARSET_SIZE; k++) | |
827 | if (SCM_CHARSET_GET (cs, k)) | |
828 | count++; | |
829 | result = scm_i_make_string (count, &p); | |
830 | for (k = 0; k < SCM_CHARSET_SIZE; k++) | |
831 | if (SCM_CHARSET_GET (cs, k)) | |
832 | p[idx++] = k; | |
833 | return result; | |
834 | } | |
835 | #undef FUNC_NAME | |
836 | ||
837 | ||
838 | SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0, | |
839 | (SCM cs, SCM ch), | |
840 | "Return @code{#t} iff the character @var{ch} is contained in the\n" | |
841 | "character set @var{cs}.") | |
842 | #define FUNC_NAME s_scm_char_set_contains_p | |
843 | { | |
844 | SCM_VALIDATE_SMOB (1, cs, charset); | |
845 | SCM_VALIDATE_CHAR (2, ch); | |
846 | return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch))); | |
847 | } | |
848 | #undef FUNC_NAME | |
849 | ||
850 | ||
851 | SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0, | |
852 | (SCM pred, SCM cs), | |
853 | "Return a true value if every character in the character set\n" | |
854 | "@var{cs} satisfies the predicate @var{pred}.") | |
855 | #define FUNC_NAME s_scm_char_set_every | |
856 | { | |
857 | int k; | |
858 | SCM res = SCM_BOOL_T; | |
859 | ||
860 | SCM_VALIDATE_PROC (1, pred); | |
861 | SCM_VALIDATE_SMOB (2, cs, charset); | |
862 | ||
863 | for (k = 0; k < SCM_CHARSET_SIZE; k++) | |
864 | if (SCM_CHARSET_GET (cs, k)) | |
865 | { | |
866 | res = scm_call_1 (pred, SCM_MAKE_CHAR (k)); | |
867 | if (scm_is_false (res)) | |
868 | return res; | |
869 | } | |
870 | return res; | |
871 | } | |
872 | #undef FUNC_NAME | |
873 | ||
874 | ||
875 | SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0, | |
876 | (SCM pred, SCM cs), | |
877 | "Return a true value if any character in the character set\n" | |
878 | "@var{cs} satisfies the predicate @var{pred}.") | |
879 | #define FUNC_NAME s_scm_char_set_any | |
880 | { | |
881 | int k; | |
882 | ||
883 | SCM_VALIDATE_PROC (1, pred); | |
884 | SCM_VALIDATE_SMOB (2, cs, charset); | |
885 | ||
886 | for (k = 0; k < SCM_CHARSET_SIZE; k++) | |
887 | if (SCM_CHARSET_GET (cs, k)) | |
888 | { | |
889 | SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k)); | |
890 | if (scm_is_true (res)) | |
891 | return res; | |
892 | } | |
893 | return SCM_BOOL_F; | |
894 | } | |
895 | #undef FUNC_NAME | |
896 | ||
897 | ||
898 | SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1, | |
899 | (SCM cs, SCM rest), | |
900 | "Add all character arguments to the first argument, which must\n" | |
901 | "be a character set.") | |
902 | #define FUNC_NAME s_scm_char_set_adjoin | |
903 | { | |
904 | long * p; | |
905 | ||
906 | SCM_VALIDATE_SMOB (1, cs, charset); | |
907 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
908 | cs = scm_char_set_copy (cs); | |
909 | ||
910 | p = (long *) SCM_SMOB_DATA (cs); | |
d2e53ed6 | 911 | while (!scm_is_null (rest)) |
63181a97 MV |
912 | { |
913 | SCM chr = SCM_CAR (rest); | |
914 | int c; | |
915 | ||
916 | SCM_VALIDATE_CHAR_COPY (1, chr, c); | |
917 | rest = SCM_CDR (rest); | |
918 | ||
919 | p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG); | |
920 | } | |
921 | return cs; | |
922 | } | |
923 | #undef FUNC_NAME | |
924 | ||
925 | ||
926 | SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1, | |
927 | (SCM cs, SCM rest), | |
928 | "Delete all character arguments from the first argument, which\n" | |
929 | "must be a character set.") | |
930 | #define FUNC_NAME s_scm_char_set_delete | |
931 | { | |
932 | long * p; | |
933 | ||
934 | SCM_VALIDATE_SMOB (1, cs, charset); | |
935 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
936 | cs = scm_char_set_copy (cs); | |
937 | ||
938 | p = (long *) SCM_SMOB_DATA (cs); | |
d2e53ed6 | 939 | while (!scm_is_null (rest)) |
63181a97 MV |
940 | { |
941 | SCM chr = SCM_CAR (rest); | |
942 | int c; | |
943 | ||
944 | SCM_VALIDATE_CHAR_COPY (1, chr, c); | |
945 | rest = SCM_CDR (rest); | |
946 | ||
947 | p[c / SCM_BITS_PER_LONG] &= ~(1L << (c % SCM_BITS_PER_LONG)); | |
948 | } | |
949 | return cs; | |
950 | } | |
951 | #undef FUNC_NAME | |
952 | ||
953 | ||
954 | SCM_DEFINE (scm_char_set_adjoin_x, "char-set-adjoin!", 1, 0, 1, | |
955 | (SCM cs, SCM rest), | |
956 | "Add all character arguments to the first argument, which must\n" | |
957 | "be a character set.") | |
958 | #define FUNC_NAME s_scm_char_set_adjoin_x | |
959 | { | |
960 | long * p; | |
961 | ||
962 | SCM_VALIDATE_SMOB (1, cs, charset); | |
963 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
964 | ||
965 | p = (long *) SCM_SMOB_DATA (cs); | |
d2e53ed6 | 966 | while (!scm_is_null (rest)) |
63181a97 MV |
967 | { |
968 | SCM chr = SCM_CAR (rest); | |
969 | int c; | |
970 | ||
971 | SCM_VALIDATE_CHAR_COPY (1, chr, c); | |
972 | rest = SCM_CDR (rest); | |
973 | ||
974 | p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG); | |
975 | } | |
976 | return cs; | |
977 | } | |
978 | #undef FUNC_NAME | |
979 | ||
980 | ||
981 | SCM_DEFINE (scm_char_set_delete_x, "char-set-delete!", 1, 0, 1, | |
982 | (SCM cs, SCM rest), | |
983 | "Delete all character arguments from the first argument, which\n" | |
984 | "must be a character set.") | |
985 | #define FUNC_NAME s_scm_char_set_delete_x | |
986 | { | |
987 | long * p; | |
988 | ||
989 | SCM_VALIDATE_SMOB (1, cs, charset); | |
990 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
991 | ||
992 | p = (long *) SCM_SMOB_DATA (cs); | |
d2e53ed6 | 993 | while (!scm_is_null (rest)) |
63181a97 MV |
994 | { |
995 | SCM chr = SCM_CAR (rest); | |
996 | int c; | |
997 | ||
998 | SCM_VALIDATE_CHAR_COPY (1, chr, c); | |
999 | rest = SCM_CDR (rest); | |
1000 | ||
1001 | p[c / SCM_BITS_PER_LONG] &= ~(1L << (c % SCM_BITS_PER_LONG)); | |
1002 | } | |
1003 | return cs; | |
1004 | } | |
1005 | #undef FUNC_NAME | |
1006 | ||
1007 | ||
1008 | SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0, | |
1009 | (SCM cs), | |
1010 | "Return the complement of the character set @var{cs}.") | |
1011 | #define FUNC_NAME s_scm_char_set_complement | |
1012 | { | |
1013 | int k; | |
1014 | SCM res; | |
1015 | long * p, * q; | |
1016 | ||
1017 | SCM_VALIDATE_SMOB (1, cs, charset); | |
1018 | ||
1019 | res = make_char_set (FUNC_NAME); | |
1020 | p = (long *) SCM_SMOB_DATA (res); | |
1021 | q = (long *) SCM_SMOB_DATA (cs); | |
1022 | for (k = 0; k < LONGS_PER_CHARSET; k++) | |
1023 | p[k] = ~q[k]; | |
1024 | return res; | |
1025 | } | |
1026 | #undef FUNC_NAME | |
1027 | ||
1028 | ||
1029 | SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1, | |
1030 | (SCM rest), | |
1031 | "Return the union of all argument character sets.") | |
1032 | #define FUNC_NAME s_scm_char_set_union | |
1033 | { | |
1034 | int c = 1; | |
1035 | SCM res; | |
1036 | long * p; | |
1037 | ||
1038 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1039 | ||
1040 | res = make_char_set (FUNC_NAME); | |
1041 | p = (long *) SCM_SMOB_DATA (res); | |
d2e53ed6 | 1042 | while (!scm_is_null (rest)) |
63181a97 MV |
1043 | { |
1044 | int k; | |
1045 | SCM cs = SCM_CAR (rest); | |
1046 | SCM_VALIDATE_SMOB (c, cs, charset); | |
1047 | c++; | |
1048 | rest = SCM_CDR (rest); | |
1049 | ||
1050 | for (k = 0; k < LONGS_PER_CHARSET; k++) | |
1051 | p[k] |= ((long *) SCM_SMOB_DATA (cs))[k]; | |
1052 | } | |
1053 | return res; | |
1054 | } | |
1055 | #undef FUNC_NAME | |
1056 | ||
1057 | ||
1058 | SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1, | |
1059 | (SCM rest), | |
1060 | "Return the intersection of all argument character sets.") | |
1061 | #define FUNC_NAME s_scm_char_set_intersection | |
1062 | { | |
1063 | SCM res; | |
1064 | ||
1065 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1066 | ||
d2e53ed6 | 1067 | if (scm_is_null (rest)) |
63181a97 MV |
1068 | res = make_char_set (FUNC_NAME); |
1069 | else | |
1070 | { | |
1071 | long *p; | |
1072 | int argnum = 2; | |
1073 | ||
1074 | res = scm_char_set_copy (SCM_CAR (rest)); | |
1075 | p = (long *) SCM_SMOB_DATA (res); | |
1076 | rest = SCM_CDR (rest); | |
1077 | ||
d2e53ed6 | 1078 | while (scm_is_pair (rest)) |
63181a97 MV |
1079 | { |
1080 | int k; | |
1081 | SCM cs = SCM_CAR (rest); | |
1082 | long *cs_data; | |
1083 | ||
1084 | SCM_VALIDATE_SMOB (argnum, cs, charset); | |
1085 | argnum++; | |
1086 | cs_data = (long *) SCM_SMOB_DATA (cs); | |
1087 | rest = SCM_CDR (rest); | |
1088 | for (k = 0; k < LONGS_PER_CHARSET; k++) | |
1089 | p[k] &= cs_data[k]; | |
1090 | } | |
1091 | } | |
1092 | ||
1093 | return res; | |
1094 | } | |
1095 | #undef FUNC_NAME | |
1096 | ||
1097 | ||
1098 | SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1, | |
1099 | (SCM cs1, SCM rest), | |
1100 | "Return the difference of all argument character sets.") | |
1101 | #define FUNC_NAME s_scm_char_set_difference | |
1102 | { | |
1103 | int c = 2; | |
1104 | SCM res; | |
1105 | long * p; | |
1106 | ||
1107 | SCM_VALIDATE_SMOB (1, cs1, charset); | |
1108 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1109 | ||
1110 | res = scm_char_set_copy (cs1); | |
1111 | p = (long *) SCM_SMOB_DATA (res); | |
d2e53ed6 | 1112 | while (!scm_is_null (rest)) |
63181a97 MV |
1113 | { |
1114 | int k; | |
1115 | SCM cs = SCM_CAR (rest); | |
1116 | SCM_VALIDATE_SMOB (c, cs, charset); | |
1117 | c++; | |
1118 | rest = SCM_CDR (rest); | |
1119 | ||
1120 | for (k = 0; k < LONGS_PER_CHARSET; k++) | |
1121 | p[k] &= ~((long *) SCM_SMOB_DATA (cs))[k]; | |
1122 | } | |
1123 | return res; | |
1124 | } | |
1125 | #undef FUNC_NAME | |
1126 | ||
1127 | ||
1128 | SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1, | |
1129 | (SCM rest), | |
1130 | "Return the exclusive-or of all argument character sets.") | |
1131 | #define FUNC_NAME s_scm_char_set_xor | |
1132 | { | |
1133 | SCM res; | |
1134 | ||
1135 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1136 | ||
d2e53ed6 | 1137 | if (scm_is_null (rest)) |
63181a97 MV |
1138 | res = make_char_set (FUNC_NAME); |
1139 | else | |
1140 | { | |
1141 | int argnum = 2; | |
1142 | long * p; | |
1143 | ||
1144 | res = scm_char_set_copy (SCM_CAR (rest)); | |
1145 | p = (long *) SCM_SMOB_DATA (res); | |
1146 | rest = SCM_CDR (rest); | |
1147 | ||
d2e53ed6 | 1148 | while (scm_is_pair (rest)) |
63181a97 MV |
1149 | { |
1150 | SCM cs = SCM_CAR (rest); | |
1151 | long *cs_data; | |
1152 | int k; | |
1153 | ||
1154 | SCM_VALIDATE_SMOB (argnum, cs, charset); | |
1155 | argnum++; | |
1156 | cs_data = (long *) SCM_SMOB_DATA (cs); | |
1157 | rest = SCM_CDR (rest); | |
1158 | ||
1159 | for (k = 0; k < LONGS_PER_CHARSET; k++) | |
1160 | p[k] ^= cs_data[k]; | |
1161 | } | |
1162 | } | |
1163 | return res; | |
1164 | } | |
1165 | #undef FUNC_NAME | |
1166 | ||
1167 | ||
1168 | SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1, 0, 1, | |
1169 | (SCM cs1, SCM rest), | |
1170 | "Return the difference and the intersection of all argument\n" | |
1171 | "character sets.") | |
1172 | #define FUNC_NAME s_scm_char_set_diff_plus_intersection | |
1173 | { | |
1174 | int c = 2; | |
1175 | SCM res1, res2; | |
1176 | long * p, * q; | |
1177 | ||
1178 | SCM_VALIDATE_SMOB (1, cs1, charset); | |
1179 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1180 | ||
1181 | res1 = scm_char_set_copy (cs1); | |
1182 | res2 = make_char_set (FUNC_NAME); | |
1183 | p = (long *) SCM_SMOB_DATA (res1); | |
1184 | q = (long *) SCM_SMOB_DATA (res2); | |
d2e53ed6 | 1185 | while (!scm_is_null (rest)) |
63181a97 MV |
1186 | { |
1187 | int k; | |
1188 | SCM cs = SCM_CAR (rest); | |
1189 | long *r; | |
1190 | ||
1191 | SCM_VALIDATE_SMOB (c, cs, charset); | |
1192 | c++; | |
1193 | r = (long *) SCM_SMOB_DATA (cs); | |
1194 | ||
1195 | for (k = 0; k < LONGS_PER_CHARSET; k++) | |
1196 | { | |
1197 | q[k] |= p[k] & r[k]; | |
1198 | p[k] &= ~r[k]; | |
1199 | } | |
1200 | rest = SCM_CDR (rest); | |
1201 | } | |
1202 | return scm_values (scm_list_2 (res1, res2)); | |
1203 | } | |
1204 | #undef FUNC_NAME | |
1205 | ||
1206 | ||
1207 | SCM_DEFINE (scm_char_set_complement_x, "char-set-complement!", 1, 0, 0, | |
1208 | (SCM cs), | |
1209 | "Return the complement of the character set @var{cs}.") | |
1210 | #define FUNC_NAME s_scm_char_set_complement_x | |
1211 | { | |
1212 | int k; | |
1213 | long * p; | |
1214 | ||
1215 | SCM_VALIDATE_SMOB (1, cs, charset); | |
1216 | p = (long *) SCM_SMOB_DATA (cs); | |
1217 | for (k = 0; k < LONGS_PER_CHARSET; k++) | |
1218 | p[k] = ~p[k]; | |
1219 | return cs; | |
1220 | } | |
1221 | #undef FUNC_NAME | |
1222 | ||
1223 | ||
1224 | SCM_DEFINE (scm_char_set_union_x, "char-set-union!", 1, 0, 1, | |
1225 | (SCM cs1, SCM rest), | |
1226 | "Return the union of all argument character sets.") | |
1227 | #define FUNC_NAME s_scm_char_set_union_x | |
1228 | { | |
1229 | int c = 2; | |
1230 | long * p; | |
1231 | ||
1232 | SCM_VALIDATE_SMOB (1, cs1, charset); | |
1233 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1234 | ||
1235 | p = (long *) SCM_SMOB_DATA (cs1); | |
d2e53ed6 | 1236 | while (!scm_is_null (rest)) |
63181a97 MV |
1237 | { |
1238 | int k; | |
1239 | SCM cs = SCM_CAR (rest); | |
1240 | SCM_VALIDATE_SMOB (c, cs, charset); | |
1241 | c++; | |
1242 | rest = SCM_CDR (rest); | |
1243 | ||
1244 | for (k = 0; k < LONGS_PER_CHARSET; k++) | |
1245 | p[k] |= ((long *) SCM_SMOB_DATA (cs))[k]; | |
1246 | } | |
1247 | return cs1; | |
1248 | } | |
1249 | #undef FUNC_NAME | |
1250 | ||
1251 | ||
1252 | SCM_DEFINE (scm_char_set_intersection_x, "char-set-intersection!", 1, 0, 1, | |
1253 | (SCM cs1, SCM rest), | |
1254 | "Return the intersection of all argument character sets.") | |
1255 | #define FUNC_NAME s_scm_char_set_intersection_x | |
1256 | { | |
1257 | int c = 2; | |
1258 | long * p; | |
1259 | ||
1260 | SCM_VALIDATE_SMOB (1, cs1, charset); | |
1261 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1262 | ||
1263 | p = (long *) SCM_SMOB_DATA (cs1); | |
d2e53ed6 | 1264 | while (!scm_is_null (rest)) |
63181a97 MV |
1265 | { |
1266 | int k; | |
1267 | SCM cs = SCM_CAR (rest); | |
1268 | SCM_VALIDATE_SMOB (c, cs, charset); | |
1269 | c++; | |
1270 | rest = SCM_CDR (rest); | |
1271 | ||
1272 | for (k = 0; k < LONGS_PER_CHARSET; k++) | |
1273 | p[k] &= ((long *) SCM_SMOB_DATA (cs))[k]; | |
1274 | } | |
1275 | return cs1; | |
1276 | } | |
1277 | #undef FUNC_NAME | |
1278 | ||
1279 | ||
1280 | SCM_DEFINE (scm_char_set_difference_x, "char-set-difference!", 1, 0, 1, | |
1281 | (SCM cs1, SCM rest), | |
1282 | "Return the difference of all argument character sets.") | |
1283 | #define FUNC_NAME s_scm_char_set_difference_x | |
1284 | { | |
1285 | int c = 2; | |
1286 | long * p; | |
1287 | ||
1288 | SCM_VALIDATE_SMOB (1, cs1, charset); | |
1289 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1290 | ||
1291 | p = (long *) SCM_SMOB_DATA (cs1); | |
d2e53ed6 | 1292 | while (!scm_is_null (rest)) |
63181a97 MV |
1293 | { |
1294 | int k; | |
1295 | SCM cs = SCM_CAR (rest); | |
1296 | SCM_VALIDATE_SMOB (c, cs, charset); | |
1297 | c++; | |
1298 | rest = SCM_CDR (rest); | |
1299 | ||
1300 | for (k = 0; k < LONGS_PER_CHARSET; k++) | |
1301 | p[k] &= ~((long *) SCM_SMOB_DATA (cs))[k]; | |
1302 | } | |
1303 | return cs1; | |
1304 | } | |
1305 | #undef FUNC_NAME | |
1306 | ||
1307 | ||
1308 | SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1, | |
1309 | (SCM cs1, SCM rest), | |
1310 | "Return the exclusive-or of all argument character sets.") | |
1311 | #define FUNC_NAME s_scm_char_set_xor_x | |
1312 | { | |
1313 | /* a side-effecting variant should presumably give consistent results: | |
1314 | (define a (char-set #\a)) | |
1315 | (char-set-xor a a a) -> char set #\a | |
1316 | (char-set-xor! a a a) -> char set #\a | |
1317 | */ | |
1318 | return scm_char_set_xor (scm_cons (cs1, rest)); | |
1319 | ||
1320 | #if 0 | |
1321 | /* this would give (char-set-xor! a a a) -> empty char set. */ | |
1322 | int c = 2; | |
1323 | long * p; | |
1324 | ||
1325 | SCM_VALIDATE_SMOB (1, cs1, charset); | |
1326 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1327 | ||
1328 | p = (long *) SCM_SMOB_DATA (cs1); | |
d2e53ed6 | 1329 | while (!scm_is_null (rest)) |
63181a97 MV |
1330 | { |
1331 | int k; | |
1332 | SCM cs = SCM_CAR (rest); | |
1333 | SCM_VALIDATE_SMOB (c, cs, charset); | |
1334 | c++; | |
1335 | rest = SCM_CDR (rest); | |
1336 | ||
1337 | for (k = 0; k < LONGS_PER_CHARSET; k++) | |
1338 | p[k] ^= ((long *) SCM_SMOB_DATA (cs))[k]; | |
1339 | } | |
1340 | return cs1; | |
1341 | #endif | |
1342 | } | |
1343 | #undef FUNC_NAME | |
1344 | ||
1345 | ||
1346 | SCM_DEFINE (scm_char_set_diff_plus_intersection_x, "char-set-diff+intersection!", 2, 0, 1, | |
1347 | (SCM cs1, SCM cs2, SCM rest), | |
1348 | "Return the difference and the intersection of all argument\n" | |
1349 | "character sets.") | |
1350 | #define FUNC_NAME s_scm_char_set_diff_plus_intersection_x | |
1351 | { | |
1352 | int c = 3; | |
1353 | long * p, * q; | |
1354 | int k; | |
1355 | ||
1356 | SCM_VALIDATE_SMOB (1, cs1, charset); | |
1357 | SCM_VALIDATE_SMOB (2, cs2, charset); | |
1358 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1359 | ||
1360 | p = (long *) SCM_SMOB_DATA (cs1); | |
1361 | q = (long *) SCM_SMOB_DATA (cs2); | |
1362 | if (p == q) | |
1363 | { | |
1364 | /* (char-set-diff+intersection! a a ...): can't share storage, | |
1365 | but we know the answer without checking for further | |
1366 | arguments. */ | |
1367 | return scm_values (scm_list_2 (make_char_set (FUNC_NAME), cs1)); | |
1368 | } | |
1369 | for (k = 0; k < LONGS_PER_CHARSET; k++) | |
1370 | { | |
1371 | long t = p[k]; | |
1372 | ||
1373 | p[k] &= ~q[k]; | |
1374 | q[k] = t & q[k]; | |
1375 | } | |
d2e53ed6 | 1376 | while (!scm_is_null (rest)) |
63181a97 MV |
1377 | { |
1378 | SCM cs = SCM_CAR (rest); | |
1379 | long *r; | |
1380 | ||
1381 | SCM_VALIDATE_SMOB (c, cs, charset); | |
1382 | c++; | |
1383 | r = (long *) SCM_SMOB_DATA (cs); | |
1384 | ||
1385 | for (k = 0; k < LONGS_PER_CHARSET; k++) | |
1386 | { | |
1387 | q[k] |= p[k] & r[k]; | |
1388 | p[k] &= ~r[k]; | |
1389 | } | |
1390 | rest = SCM_CDR (rest); | |
1391 | } | |
1392 | return scm_values (scm_list_2 (cs1, cs2)); | |
1393 | } | |
1394 | #undef FUNC_NAME | |
1395 | ||
1396 | SCM scm_char_set_lower_case; | |
1397 | SCM scm_char_set_upper_case; | |
1398 | SCM scm_char_set_title_case; | |
1399 | SCM scm_char_set_letter; | |
1400 | SCM scm_char_set_digit; | |
1401 | SCM scm_char_set_letter_and_digit; | |
1402 | SCM scm_char_set_graphic; | |
1403 | SCM scm_char_set_printing; | |
1404 | SCM scm_char_set_whitespace; | |
1405 | SCM scm_char_set_iso_control; | |
1406 | SCM scm_char_set_punctuation; | |
1407 | SCM scm_char_set_symbol; | |
1408 | SCM scm_char_set_hex_digit; | |
1409 | SCM scm_char_set_blank; | |
1410 | SCM scm_char_set_ascii; | |
1411 | SCM scm_char_set_empty; | |
1412 | SCM scm_char_set_full; | |
1413 | ||
1414 | static SCM | |
1415 | make_predset (int (*pred) (int)) | |
1416 | { | |
1417 | int ch; | |
1418 | SCM cs = make_char_set (NULL); | |
1419 | for (ch = 0; ch < 256; ch++) | |
1420 | if (pred (ch)) | |
1421 | SCM_CHARSET_SET (cs, ch); | |
1422 | return cs; | |
1423 | } | |
1424 | ||
1425 | static SCM | |
1426 | define_predset (const char *name, int (*pred) (int)) | |
1427 | { | |
1428 | SCM cs = make_predset (pred); | |
1429 | scm_c_define (name, cs); | |
1430 | return scm_permanent_object (cs); | |
1431 | } | |
1432 | ||
1433 | static SCM | |
1434 | make_strset (const char *str) | |
1435 | { | |
1436 | SCM cs = make_char_set (NULL); | |
1437 | while (*str) | |
1438 | { | |
1439 | SCM_CHARSET_SET (cs, *str); | |
1440 | str++; | |
1441 | } | |
1442 | return cs; | |
1443 | } | |
1444 | ||
1445 | static SCM | |
1446 | define_strset (const char *name, const char *str) | |
1447 | { | |
1448 | SCM cs = make_strset (str); | |
1449 | scm_c_define (name, cs); | |
1450 | return scm_permanent_object (cs); | |
1451 | } | |
1452 | ||
1453 | static int false (int ch) { return 0; } | |
1454 | static int true (int ch) { return 1; } | |
1455 | ||
1456 | void | |
1457 | scm_init_srfi_14 (void) | |
1458 | { | |
1459 | scm_tc16_charset = scm_make_smob_type ("character-set", | |
1460 | BYTES_PER_CHARSET); | |
1461 | scm_set_smob_free (scm_tc16_charset, charset_free); | |
1462 | scm_set_smob_print (scm_tc16_charset, charset_print); | |
1463 | ||
1464 | scm_char_set_upper_case = define_predset ("char-set:upper-case", isupper); | |
1465 | scm_char_set_lower_case = define_predset ("char-set:lower-case", islower); | |
1466 | scm_char_set_title_case = define_predset ("char-set:title-case", false); | |
1467 | scm_char_set_letter = define_predset ("char-set:letter", isalpha); | |
1468 | scm_char_set_digit = define_predset ("char-set:digit", isdigit); | |
1469 | scm_char_set_letter_and_digit = define_predset ("char-set:letter+digit", | |
1470 | isalnum); | |
1471 | scm_char_set_graphic = define_predset ("char-set:graphic", isgraph); | |
1472 | scm_char_set_printing = define_predset ("char-set:printing", isprint); | |
1473 | scm_char_set_whitespace = define_predset ("char-set:whitespace", isspace); | |
1474 | scm_char_set_iso_control = define_predset ("char-set:iso-control", iscntrl); | |
1475 | scm_char_set_punctuation = define_predset ("char-set:punctuation", ispunct); | |
1476 | scm_char_set_symbol = define_strset ("char-set:symbol", "$+<=>^`|~"); | |
1477 | scm_char_set_hex_digit = define_predset ("char-set:hex-digit", isxdigit); | |
1478 | scm_char_set_blank = define_strset ("char-set:blank", " \t"); | |
1479 | scm_char_set_ascii = define_predset ("char-set:ascii", isascii); | |
1480 | scm_char_set_empty = define_predset ("char-set:empty", false); | |
1481 | scm_char_set_full = define_predset ("char-set:full", true); | |
1482 | ||
1483 | #include "libguile/srfi-14.x" | |
1484 | } | |
1485 | ||
1486 | /* End of srfi-14.c. */ |