Commit | Line | Data |
---|---|---|
63181a97 MV |
1 | /* srfi-14.c --- SRFI-14 procedures for Guile |
2 | * | |
6caac03c | 3 | * Copyright (C) 2001, 2004, 2006, 2007 Free Software Foundation, Inc. |
63181a97 MV |
4 | * |
5 | * This library is free software; you can redistribute it and/or | |
53befeb7 NJ |
6 | * modify it under the terms of the GNU Lesser General Public License |
7 | * as published by the Free Software Foundation; either version 3 of | |
8 | * the License, or (at your option) any later version. | |
63181a97 | 9 | * |
53befeb7 NJ |
10 | * This library is distributed in the hope that it will be useful, but |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
63181a97 MV |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | * Lesser General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU Lesser General Public | |
16 | * License along with this library; if not, write to the Free Software | |
53befeb7 NJ |
17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
18 | * 02110-1301 USA | |
63181a97 MV |
19 | */ |
20 | ||
a17d2654 LC |
21 | #ifdef HAVE_CONFIG_H |
22 | # include <config.h> | |
23 | #endif | |
24 | ||
25 | ||
63181a97 | 26 | #include <string.h> |
f49dbcad | 27 | #include <unictype.h> |
63181a97 MV |
28 | |
29 | #include "libguile.h" | |
30 | #include "libguile/srfi-14.h" | |
f49dbcad | 31 | #include "libguile/strings.h" |
24d23822 | 32 | #include "libguile/chars.h" |
63181a97 | 33 | |
f49dbcad MG |
34 | /* Include the pre-computed standard charset data. */ |
35 | #include "libguile/srfi-14.i.c" | |
63181a97 | 36 | |
f49dbcad | 37 | #define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset)) |
63181a97 | 38 | |
f49dbcad MG |
39 | #define SCM_CHARSET_SET(cs, idx) \ |
40 | scm_i_charset_set (SCM_CHARSET_DATA (cs), idx) | |
63181a97 | 41 | |
f49dbcad MG |
42 | #define SCM_CHARSET_UNSET(cs, idx) \ |
43 | scm_i_charset_unset (SCM_CHARSET_DATA (cs), idx) | |
63181a97 MV |
44 | |
45 | /* Smob type code for character sets. */ | |
46 | int scm_tc16_charset = 0; | |
f49dbcad MG |
47 | int scm_tc16_charset_cursor = 0; |
48 | ||
49 | /* True if N exists in charset CS. */ | |
50 | int | |
51 | scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n) | |
52 | { | |
53 | size_t i; | |
54 | ||
55 | i = 0; | |
56 | while (i < cs->len) | |
57 | { | |
58 | if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi) | |
59 | return 1; | |
60 | i++; | |
61 | } | |
62 | ||
63 | return 0; | |
64 | } | |
65 | ||
66 | /* Put N into charset CS. */ | |
67 | void | |
68 | scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n) | |
69 | { | |
70 | size_t i; | |
71 | size_t len; | |
72 | ||
73 | len = cs->len; | |
74 | ||
75 | i = 0; | |
76 | while (i < len) | |
77 | { | |
78 | /* Already in this range */ | |
79 | if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi) | |
80 | { | |
81 | return; | |
82 | } | |
83 | ||
84 | if (n == cs->ranges[i].lo - 1) | |
85 | { | |
86 | /* This char is one below the current range. */ | |
87 | if (i > 0 && cs->ranges[i - 1].hi + 1 == n) | |
88 | { | |
89 | /* It is also one above the previous range, so combine them. */ | |
90 | cs->ranges[i - 1].hi = cs->ranges[i].hi; | |
91 | if (i < len - 1) | |
92 | memmove (cs->ranges + i, cs->ranges + (i + 1), | |
93 | sizeof (scm_t_char_range) * (len - i - 1)); | |
94 | cs->ranges = scm_gc_realloc (cs->ranges, | |
95 | sizeof (scm_t_char_range) * len, | |
96 | sizeof (scm_t_char_range) * (len - | |
97 | 1), | |
98 | "character-set"); | |
99 | cs->len = len - 1; | |
100 | return; | |
101 | } | |
102 | else | |
103 | { | |
104 | /* Expand the range down by one. */ | |
105 | cs->ranges[i].lo = n; | |
106 | return; | |
107 | } | |
108 | } | |
109 | else if (n == cs->ranges[i].hi + 1) | |
110 | { | |
111 | /* This char is one above the current range. */ | |
112 | if (i < len - 1 && cs->ranges[i + 1].lo - 1 == n) | |
113 | { | |
114 | /* It is also one below the next range, so combine them. */ | |
115 | cs->ranges[i].hi = cs->ranges[i + 1].hi; | |
116 | if (i < len - 2) | |
117 | memmove (cs->ranges + (i + 1), cs->ranges + (i + 2), | |
118 | sizeof (scm_t_char_range) * (len - i - 2)); | |
119 | cs->ranges = scm_gc_realloc (cs->ranges, | |
120 | sizeof (scm_t_char_range) * len, | |
121 | sizeof (scm_t_char_range) * (len - | |
122 | 1), | |
123 | "character-set"); | |
124 | cs->len = len - 1; | |
125 | return; | |
126 | } | |
127 | else | |
128 | { | |
129 | /* Expand the range up by one. */ | |
130 | cs->ranges[i].hi = n; | |
131 | return; | |
132 | } | |
133 | } | |
134 | else if (n < cs->ranges[i].lo - 1) | |
135 | { | |
136 | /* This is a new range below the current one. */ | |
137 | cs->ranges = scm_gc_realloc (cs->ranges, | |
138 | sizeof (scm_t_char_range) * len, | |
139 | sizeof (scm_t_char_range) * (len + 1), | |
140 | "character-set"); | |
141 | memmove (cs->ranges + (i + 1), cs->ranges + i, | |
142 | sizeof (scm_t_char_range) * (len - i)); | |
143 | cs->ranges[i].lo = n; | |
144 | cs->ranges[i].hi = n; | |
145 | cs->len = len + 1; | |
146 | return; | |
147 | } | |
148 | ||
149 | i++; | |
150 | } | |
151 | ||
152 | /* This is a new range above all previous ranges. */ | |
153 | if (len == 0) | |
154 | { | |
155 | cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set"); | |
156 | } | |
157 | else | |
158 | { | |
159 | cs->ranges = scm_gc_realloc (cs->ranges, | |
160 | sizeof (scm_t_char_range) * len, | |
161 | sizeof (scm_t_char_range) * (len + 1), | |
162 | "character-set"); | |
163 | } | |
164 | cs->ranges[len].lo = n; | |
165 | cs->ranges[len].hi = n; | |
166 | cs->len = len + 1; | |
167 | ||
168 | return; | |
169 | } | |
170 | ||
171 | /* If N is in charset CS, remove it. */ | |
172 | void | |
173 | scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n) | |
174 | { | |
175 | size_t i; | |
176 | size_t len; | |
177 | ||
178 | len = cs->len; | |
179 | ||
180 | i = 0; | |
181 | while (i < len) | |
182 | { | |
183 | if (n < cs->ranges[i].lo) | |
184 | /* Not in this set. */ | |
185 | return; | |
186 | ||
187 | if (n == cs->ranges[i].lo && n == cs->ranges[i].hi) | |
188 | { | |
189 | /* Remove this one-character range. */ | |
190 | if (len == 1) | |
191 | { | |
192 | scm_gc_free (cs->ranges, | |
193 | sizeof (scm_t_char_range) * cs->len, | |
194 | "character-set"); | |
195 | cs->ranges = NULL; | |
196 | cs->len = 0; | |
197 | return; | |
198 | } | |
199 | else if (i < len - 1) | |
200 | { | |
201 | memmove (cs->ranges + i, cs->ranges + (i + 1), | |
202 | sizeof (scm_t_char_range) * (len - i - 1)); | |
203 | cs->ranges = scm_gc_realloc (cs->ranges, | |
204 | sizeof (scm_t_char_range) * len, | |
205 | sizeof (scm_t_char_range) * (len - | |
206 | 1), | |
207 | "character-set"); | |
208 | cs->len = len - 1; | |
209 | return; | |
210 | } | |
211 | else if (i == len - 1) | |
212 | { | |
213 | cs->ranges = scm_gc_realloc (cs->ranges, | |
214 | sizeof (scm_t_char_range) * len, | |
215 | sizeof (scm_t_char_range) * (len - | |
216 | 1), | |
217 | "character-set"); | |
218 | cs->len = len - 1; | |
219 | return; | |
220 | } | |
221 | } | |
222 | else if (n == cs->ranges[i].lo) | |
223 | { | |
224 | /* Shrink this range from the left. */ | |
225 | cs->ranges[i].lo = n + 1; | |
226 | return; | |
227 | } | |
228 | else if (n == cs->ranges[i].hi) | |
229 | { | |
230 | /* Shrink this range from the right. */ | |
231 | cs->ranges[i].hi = n - 1; | |
232 | return; | |
233 | } | |
234 | else if (n > cs->ranges[i].lo && n < cs->ranges[i].hi) | |
235 | { | |
236 | /* Split this range into two pieces. */ | |
237 | cs->ranges = scm_gc_realloc (cs->ranges, | |
238 | sizeof (scm_t_char_range) * len, | |
239 | sizeof (scm_t_char_range) * (len + 1), | |
240 | "character-set"); | |
241 | if (i < len - 1) | |
242 | memmove (cs->ranges + (i + 2), cs->ranges + (i + 1), | |
243 | sizeof (scm_t_char_range) * (len - i - 1)); | |
244 | cs->ranges[i + 1].hi = cs->ranges[i].hi; | |
245 | cs->ranges[i + 1].lo = n + 1; | |
246 | cs->ranges[i].hi = n - 1; | |
247 | cs->len = len + 1; | |
248 | return; | |
249 | } | |
250 | ||
251 | i++; | |
252 | } | |
253 | ||
254 | /* This value is above all ranges, so do nothing here. */ | |
255 | return; | |
256 | } | |
257 | ||
258 | static int | |
259 | charsets_equal (scm_t_char_set *a, scm_t_char_set *b) | |
260 | { | |
261 | if (a->len != b->len) | |
262 | return 0; | |
263 | ||
264 | if (memcmp (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len) != 0) | |
265 | return 0; | |
266 | ||
267 | return 1; | |
268 | } | |
269 | ||
270 | /* Return true if every character in A is also in B. */ | |
271 | static int | |
272 | charsets_leq (scm_t_char_set *a, scm_t_char_set *b) | |
273 | { | |
274 | size_t i = 0, j = 0; | |
275 | scm_t_wchar alo, ahi; | |
276 | ||
277 | if (a->len == 0) | |
278 | return 1; | |
279 | if (b->len == 0) | |
280 | return 0; | |
281 | while (i < a->len) | |
282 | { | |
283 | alo = a->ranges[i].lo; | |
284 | ahi = a->ranges[i].hi; | |
285 | while (b->ranges[j].hi < alo) | |
286 | { | |
287 | if (j < b->len - 1) | |
288 | j++; | |
289 | else | |
290 | return 0; | |
291 | } | |
292 | if (alo < b->ranges[j].lo || ahi > b->ranges[j].hi) | |
293 | return 0; | |
294 | i++; | |
295 | } | |
296 | ||
297 | return 1; | |
298 | } | |
299 | ||
300 | /* Merge B into A. */ | |
301 | static void | |
302 | charsets_union (scm_t_char_set *a, scm_t_char_set *b) | |
303 | { | |
304 | size_t i = 0; | |
305 | scm_t_wchar blo, bhi, n; | |
306 | ||
307 | if (b->len == 0) | |
308 | return; | |
309 | ||
310 | if (a->len == 0) | |
311 | { | |
312 | a->len = b->len; | |
313 | a->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * b->len, | |
314 | "character-set"); | |
315 | memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * b->len); | |
316 | return; | |
317 | } | |
318 | ||
319 | /* This needs optimization. */ | |
320 | while (i < b->len) | |
321 | { | |
322 | blo = b->ranges[i].lo; | |
323 | bhi = b->ranges[i].hi; | |
324 | for (n = blo; n <= bhi; n++) | |
325 | scm_i_charset_set (a, n); | |
326 | ||
327 | i++; | |
328 | } | |
329 | ||
330 | return; | |
331 | } | |
332 | ||
333 | /* Remove elements not both in A and B from A. */ | |
334 | static void | |
335 | charsets_intersection (scm_t_char_set *a, scm_t_char_set *b) | |
336 | { | |
337 | size_t i = 0; | |
338 | scm_t_wchar blo, bhi, n; | |
339 | scm_t_char_set *c; | |
340 | ||
341 | if (a->len == 0) | |
342 | return; | |
343 | ||
344 | if (b->len == 0) | |
345 | { | |
346 | scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len, | |
347 | "character-set"); | |
348 | a->len = 0; | |
349 | return; | |
350 | } | |
351 | ||
352 | c = (scm_t_char_set *) scm_malloc (sizeof (scm_t_char_set)); | |
353 | c->len = 0; | |
354 | c->ranges = NULL; | |
355 | ||
356 | while (i < b->len) | |
357 | { | |
358 | blo = b->ranges[i].lo; | |
359 | bhi = b->ranges[i].hi; | |
360 | for (n = blo; n <= bhi; n++) | |
361 | if (scm_i_charset_get (a, n)) | |
362 | scm_i_charset_set (c, n); | |
363 | i++; | |
364 | } | |
365 | scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len, | |
366 | "character-set"); | |
367 | ||
368 | a->len = c->len; | |
369 | if (c->len != 0) | |
370 | a->ranges = c->ranges; | |
371 | else | |
372 | a->ranges = NULL; | |
373 | free (c); | |
374 | return; | |
375 | } | |
376 | ||
377 | /* Make P the compelement of Q. */ | |
378 | static void | |
379 | charsets_complement (scm_t_char_set *p, scm_t_char_set *q) | |
380 | { | |
381 | int k, idx; | |
382 | ||
383 | if (q->len == 0) | |
384 | { | |
385 | /* Fill with all valid codepoints. */ | |
386 | p->len = 2; | |
387 | p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2, | |
388 | "character-set"); | |
389 | p->ranges[0].lo = 0; | |
24d23822 MG |
390 | p->ranges[0].hi = SCM_CODEPOINT_SURROGATE_START - 1; |
391 | p->ranges[1].lo = SCM_CODEPOINT_SURROGATE_END + 1; | |
f49dbcad MG |
392 | p->ranges[1].hi = SCM_CODEPOINT_MAX; |
393 | return; | |
394 | } | |
395 | ||
396 | if (p->len > 0) | |
397 | scm_gc_free (p->ranges, sizeof (scm_t_char_set) * p->len, | |
398 | "character-set"); | |
399 | ||
400 | p->len = 0; | |
401 | if (q->ranges[0].lo > 0) | |
402 | p->len++; | |
403 | if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX) | |
404 | p->len++; | |
405 | p->len += q->len - 1; | |
406 | p->ranges = | |
407 | (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * p->len, | |
408 | "character-set"); | |
409 | idx = 0; | |
410 | if (q->ranges[0].lo > 0) | |
411 | { | |
412 | p->ranges[idx].lo = 0; | |
413 | p->ranges[idx++].hi = q->ranges[0].lo - 1; | |
414 | } | |
415 | for (k = 1; k < q->len; k++) | |
416 | { | |
417 | p->ranges[idx].lo = q->ranges[k - 1].hi + 1; | |
418 | p->ranges[idx++].hi = q->ranges[k].lo - 1; | |
419 | } | |
420 | if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX) | |
421 | { | |
422 | p->ranges[idx].lo = q->ranges[q->len - 1].hi + 1; | |
423 | p->ranges[idx].hi = SCM_CODEPOINT_MAX; | |
424 | } | |
425 | return; | |
426 | } | |
427 | ||
428 | /* Replace A with elements only found in one of A or B. */ | |
429 | static void | |
430 | charsets_xor (scm_t_char_set *a, scm_t_char_set *b) | |
431 | { | |
432 | size_t i = 0; | |
433 | scm_t_wchar blo, bhi, n; | |
434 | ||
435 | if (b->len == 0) | |
436 | { | |
437 | return; | |
438 | } | |
63181a97 | 439 | |
f49dbcad MG |
440 | if (a->len == 0) |
441 | { | |
442 | a->ranges = | |
443 | (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * | |
444 | b->len, "character-set"); | |
445 | a->len = b->len; | |
446 | memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len); | |
447 | return; | |
448 | } | |
449 | ||
450 | while (i < b->len) | |
451 | { | |
452 | blo = b->ranges[i].lo; | |
453 | bhi = b->ranges[i].hi; | |
454 | for (n = blo; n <= bhi; n++) | |
455 | { | |
456 | if (scm_i_charset_get (a, n)) | |
457 | scm_i_charset_unset (a, n); | |
458 | else | |
459 | scm_i_charset_set (a, n); | |
460 | } | |
461 | ||
462 | i++; | |
463 | } | |
464 | return; | |
465 | } | |
63181a97 MV |
466 | |
467 | /* Smob print hook for character sets. */ | |
468 | static int | |
469 | charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED) | |
470 | { | |
f49dbcad | 471 | size_t i; |
63181a97 | 472 | int first = 1; |
f49dbcad MG |
473 | scm_t_char_set *p; |
474 | const size_t max_ranges_to_print = 50; | |
475 | ||
476 | p = SCM_CHARSET_DATA (charset); | |
63181a97 MV |
477 | |
478 | scm_puts ("#<charset {", port); | |
f49dbcad MG |
479 | for (i = 0; i < p->len; i++) |
480 | { | |
481 | if (first) | |
482 | first = 0; | |
483 | else | |
484 | scm_puts (" ", port); | |
485 | scm_write (SCM_MAKE_CHAR (p->ranges[i].lo), port); | |
486 | if (p->ranges[i].lo != p->ranges[i].hi) | |
487 | { | |
488 | scm_puts ("..", port); | |
489 | scm_write (SCM_MAKE_CHAR (p->ranges[i].hi), port); | |
490 | } | |
491 | if (i >= max_ranges_to_print) | |
492 | { | |
493 | /* Too many to print here. Quit early. */ | |
494 | scm_puts (" ...", port); | |
495 | break; | |
496 | } | |
497 | } | |
63181a97 MV |
498 | scm_puts ("}>", port); |
499 | return 1; | |
500 | } | |
501 | ||
502 | ||
503 | /* Smob free hook for character sets. */ | |
504 | static size_t | |
505 | charset_free (SCM charset) | |
506 | { | |
f49dbcad MG |
507 | scm_t_char_set *cs; |
508 | size_t len = 0; | |
509 | ||
510 | cs = SCM_CHARSET_DATA (charset); | |
511 | if (cs != NULL) | |
512 | len = cs->len; | |
513 | if (len > 0) | |
514 | scm_gc_free (cs->ranges, sizeof (scm_t_char_range) * len, | |
515 | "character-set"); | |
516 | ||
517 | cs->ranges = NULL; | |
518 | cs->len = 0; | |
519 | ||
520 | scm_gc_free (cs, sizeof (scm_t_char_set), "character-set"); | |
521 | ||
522 | scm_remember_upto_here_1 (charset); | |
523 | ||
524 | return 0; | |
525 | } | |
526 | ||
527 | ||
528 | /* Smob print hook for character sets cursors. */ | |
529 | static int | |
530 | charset_cursor_print (SCM cursor, SCM port, | |
531 | scm_print_state *pstate SCM_UNUSED) | |
532 | { | |
533 | scm_t_char_set_cursor *cur; | |
534 | ||
535 | cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor); | |
536 | ||
537 | scm_puts ("#<charset-cursor ", port); | |
538 | if (cur->range == (size_t) (-1)) | |
539 | scm_puts ("(empty)", port); | |
540 | else | |
541 | { | |
542 | scm_write (scm_from_size_t (cur->range), port); | |
543 | scm_puts (":", port); | |
544 | scm_write (scm_from_int32 (cur->n), port); | |
545 | } | |
546 | scm_puts (">", port); | |
547 | return 1; | |
548 | } | |
549 | ||
550 | /* Smob free hook for character sets. */ | |
551 | static size_t | |
552 | charset_cursor_free (SCM charset) | |
553 | { | |
554 | scm_t_char_set_cursor *cur; | |
555 | ||
556 | cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (charset); | |
557 | scm_gc_free (cur, sizeof (scm_t_char_set_cursor), "charset-cursor"); | |
558 | scm_remember_upto_here_1 (charset); | |
559 | ||
560 | return 0; | |
63181a97 MV |
561 | } |
562 | ||
563 | ||
564 | /* Create a new, empty character set. */ | |
565 | static SCM | |
f49dbcad | 566 | make_char_set (const char *func_name) |
63181a97 | 567 | { |
f49dbcad | 568 | scm_t_char_set *p; |
63181a97 | 569 | |
f49dbcad MG |
570 | p = scm_gc_malloc (sizeof (scm_t_char_set), "character-set"); |
571 | memset (p, 0, sizeof (scm_t_char_set)); | |
63181a97 MV |
572 | SCM_RETURN_NEWSMOB (scm_tc16_charset, p); |
573 | } | |
574 | ||
575 | ||
576 | SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0, | |
577 | (SCM obj), | |
578 | "Return @code{#t} if @var{obj} is a character set, @code{#f}\n" | |
579 | "otherwise.") | |
580 | #define FUNC_NAME s_scm_char_set_p | |
581 | { | |
582 | return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj)); | |
583 | } | |
584 | #undef FUNC_NAME | |
585 | ||
586 | ||
587 | SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1, | |
588 | (SCM char_sets), | |
589 | "Return @code{#t} if all given character sets are equal.") | |
590 | #define FUNC_NAME s_scm_char_set_eq | |
591 | { | |
592 | int argnum = 1; | |
f49dbcad | 593 | scm_t_char_set *cs1_data = NULL; |
63181a97 MV |
594 | |
595 | SCM_VALIDATE_REST_ARGUMENT (char_sets); | |
596 | ||
d2e53ed6 | 597 | while (!scm_is_null (char_sets)) |
63181a97 MV |
598 | { |
599 | SCM csi = SCM_CAR (char_sets); | |
f49dbcad | 600 | scm_t_char_set *csi_data; |
63181a97 MV |
601 | |
602 | SCM_VALIDATE_SMOB (argnum, csi, charset); | |
603 | argnum++; | |
f49dbcad | 604 | csi_data = SCM_CHARSET_DATA (csi); |
63181a97 | 605 | if (cs1_data == NULL) |
f49dbcad MG |
606 | cs1_data = csi_data; |
607 | else if (!charsets_equal (cs1_data, csi_data)) | |
608 | return SCM_BOOL_F; | |
63181a97 MV |
609 | char_sets = SCM_CDR (char_sets); |
610 | } | |
611 | return SCM_BOOL_T; | |
612 | } | |
613 | #undef FUNC_NAME | |
614 | ||
615 | ||
616 | SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1, | |
617 | (SCM char_sets), | |
618 | "Return @code{#t} if every character set @var{cs}i is a subset\n" | |
619 | "of character set @var{cs}i+1.") | |
620 | #define FUNC_NAME s_scm_char_set_leq | |
621 | { | |
622 | int argnum = 1; | |
f49dbcad | 623 | scm_t_char_set *prev_data = NULL; |
63181a97 MV |
624 | |
625 | SCM_VALIDATE_REST_ARGUMENT (char_sets); | |
626 | ||
d2e53ed6 | 627 | while (!scm_is_null (char_sets)) |
63181a97 MV |
628 | { |
629 | SCM csi = SCM_CAR (char_sets); | |
f49dbcad | 630 | scm_t_char_set *csi_data; |
63181a97 MV |
631 | |
632 | SCM_VALIDATE_SMOB (argnum, csi, charset); | |
633 | argnum++; | |
f49dbcad | 634 | csi_data = SCM_CHARSET_DATA (csi); |
63181a97 | 635 | if (prev_data) |
f49dbcad MG |
636 | { |
637 | if (!charsets_leq (prev_data, csi_data)) | |
638 | return SCM_BOOL_F; | |
639 | } | |
63181a97 MV |
640 | prev_data = csi_data; |
641 | char_sets = SCM_CDR (char_sets); | |
642 | } | |
643 | return SCM_BOOL_T; | |
644 | } | |
645 | #undef FUNC_NAME | |
646 | ||
647 | ||
648 | SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0, | |
649 | (SCM cs, SCM bound), | |
650 | "Compute a hash value for the character set @var{cs}. If\n" | |
651 | "@var{bound} is given and non-zero, it restricts the\n" | |
652 | "returned value to the range 0 @dots{} @var{bound - 1}.") | |
653 | #define FUNC_NAME s_scm_char_set_hash | |
654 | { | |
655 | const unsigned long default_bnd = 871; | |
656 | unsigned long bnd; | |
f49dbcad | 657 | scm_t_char_set *p; |
63181a97 MV |
658 | unsigned long val = 0; |
659 | int k; | |
f49dbcad | 660 | scm_t_wchar c; |
63181a97 MV |
661 | |
662 | SCM_VALIDATE_SMOB (1, cs, charset); | |
663 | ||
664 | if (SCM_UNBNDP (bound)) | |
665 | bnd = default_bnd; | |
666 | else | |
667 | { | |
668 | bnd = scm_to_ulong (bound); | |
669 | if (bnd == 0) | |
f49dbcad | 670 | bnd = default_bnd; |
63181a97 MV |
671 | } |
672 | ||
f49dbcad MG |
673 | p = SCM_CHARSET_DATA (cs); |
674 | for (k = 0; k < p->len; k++) | |
63181a97 | 675 | { |
f49dbcad MG |
676 | for (c = p->ranges[k].lo; c <= p->ranges[k].hi; c++) |
677 | val = c + (val << 1); | |
63181a97 MV |
678 | } |
679 | return scm_from_ulong (val % bnd); | |
680 | } | |
681 | #undef FUNC_NAME | |
682 | ||
683 | ||
684 | SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0, | |
f49dbcad | 685 | (SCM cs), "Return a cursor into the character set @var{cs}.") |
63181a97 MV |
686 | #define FUNC_NAME s_scm_char_set_cursor |
687 | { | |
f49dbcad MG |
688 | scm_t_char_set *cs_data; |
689 | scm_t_char_set_cursor *cur_data; | |
63181a97 MV |
690 | |
691 | SCM_VALIDATE_SMOB (1, cs, charset); | |
f49dbcad MG |
692 | cs_data = SCM_CHARSET_DATA (cs); |
693 | cur_data = | |
694 | (scm_t_char_set_cursor *) scm_gc_malloc (sizeof (scm_t_char_set_cursor), | |
695 | "charset-cursor"); | |
696 | if (cs_data->len == 0) | |
63181a97 | 697 | { |
f49dbcad MG |
698 | cur_data->range = (size_t) (-1); |
699 | cur_data->n = 0; | |
63181a97 | 700 | } |
f49dbcad MG |
701 | else |
702 | { | |
703 | cur_data->range = 0; | |
704 | cur_data->n = cs_data->ranges[0].lo; | |
705 | } | |
706 | SCM_RETURN_NEWSMOB (scm_tc16_charset_cursor, cur_data); | |
63181a97 MV |
707 | } |
708 | #undef FUNC_NAME | |
709 | ||
710 | ||
711 | SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0, | |
f49dbcad MG |
712 | (SCM cs, SCM cursor), |
713 | "Return the character at the current cursor position\n" | |
714 | "@var{cursor} in the character set @var{cs}. It is an error to\n" | |
715 | "pass a cursor for which @code{end-of-char-set?} returns true.") | |
63181a97 MV |
716 | #define FUNC_NAME s_scm_char_set_ref |
717 | { | |
f49dbcad MG |
718 | scm_t_char_set *cs_data; |
719 | scm_t_char_set_cursor *cur_data; | |
720 | size_t i; | |
721 | ||
63181a97 | 722 | SCM_VALIDATE_SMOB (1, cs, charset); |
f49dbcad MG |
723 | SCM_VALIDATE_SMOB (2, cursor, charset_cursor); |
724 | ||
725 | cs_data = SCM_CHARSET_DATA (cs); | |
726 | cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor); | |
63181a97 | 727 | |
f49dbcad MG |
728 | /* Validate that this cursor is still true. */ |
729 | i = cur_data->range; | |
730 | if (i == (size_t) (-1) | |
731 | || i >= cs_data->len | |
732 | || cur_data->n < cs_data->ranges[i].lo | |
733 | || cur_data->n > cs_data->ranges[i].hi) | |
63181a97 | 734 | SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor)); |
f49dbcad | 735 | return SCM_MAKE_CHAR (cur_data->n); |
63181a97 MV |
736 | } |
737 | #undef FUNC_NAME | |
738 | ||
739 | ||
740 | SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0, | |
f49dbcad MG |
741 | (SCM cs, SCM cursor), |
742 | "Advance the character set cursor @var{cursor} to the next\n" | |
743 | "character in the character set @var{cs}. It is an error if the\n" | |
744 | "cursor given satisfies @code{end-of-char-set?}.") | |
63181a97 MV |
745 | #define FUNC_NAME s_scm_char_set_cursor_next |
746 | { | |
f49dbcad MG |
747 | scm_t_char_set *cs_data; |
748 | scm_t_char_set_cursor *cur_data; | |
749 | size_t i; | |
750 | ||
63181a97 | 751 | SCM_VALIDATE_SMOB (1, cs, charset); |
f49dbcad MG |
752 | SCM_VALIDATE_SMOB (2, cursor, charset_cursor); |
753 | ||
754 | cs_data = SCM_CHARSET_DATA (cs); | |
755 | cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor); | |
63181a97 | 756 | |
f49dbcad MG |
757 | /* Validate that this cursor is still true. */ |
758 | i = cur_data->range; | |
759 | if (i == (size_t) (-1) | |
760 | || i >= cs_data->len | |
761 | || cur_data->n < cs_data->ranges[i].lo | |
762 | || cur_data->n > cs_data->ranges[i].hi) | |
63181a97 | 763 | SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor)); |
f49dbcad MG |
764 | /* Increment the cursor. */ |
765 | if (cur_data->n == cs_data->ranges[i].hi) | |
63181a97 | 766 | { |
f49dbcad MG |
767 | if (i + 1 < cs_data->len) |
768 | { | |
769 | cur_data->range = i + 1; | |
770 | cur_data->n = cs_data->ranges[i + 1].lo; | |
771 | } | |
772 | else | |
773 | { | |
774 | /* This is the end of the road. */ | |
775 | cur_data->range = (size_t) (-1); | |
776 | cur_data->n = 0; | |
777 | } | |
63181a97 | 778 | } |
f49dbcad MG |
779 | else |
780 | { | |
781 | cur_data->n = cur_data->n + 1; | |
782 | } | |
783 | ||
784 | return cursor; | |
63181a97 MV |
785 | } |
786 | #undef FUNC_NAME | |
787 | ||
788 | ||
789 | SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0, | |
f49dbcad MG |
790 | (SCM cursor), |
791 | "Return @code{#t} if @var{cursor} has reached the end of a\n" | |
792 | "character set, @code{#f} otherwise.") | |
63181a97 MV |
793 | #define FUNC_NAME s_scm_end_of_char_set_p |
794 | { | |
f49dbcad MG |
795 | scm_t_char_set_cursor *cur_data; |
796 | SCM_VALIDATE_SMOB (1, cursor, charset_cursor); | |
797 | ||
798 | cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor); | |
799 | if (cur_data->range == (size_t) (-1)) | |
800 | return SCM_BOOL_T; | |
801 | ||
802 | return SCM_BOOL_F; | |
63181a97 MV |
803 | } |
804 | #undef FUNC_NAME | |
805 | ||
806 | ||
807 | SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0, | |
f49dbcad MG |
808 | (SCM kons, SCM knil, SCM cs), |
809 | "Fold the procedure @var{kons} over the character set @var{cs},\n" | |
810 | "initializing it with @var{knil}.") | |
63181a97 MV |
811 | #define FUNC_NAME s_scm_char_set_fold |
812 | { | |
f49dbcad | 813 | scm_t_char_set *cs_data; |
63181a97 | 814 | int k; |
f49dbcad | 815 | scm_t_wchar n; |
63181a97 MV |
816 | |
817 | SCM_VALIDATE_PROC (1, kons); | |
818 | SCM_VALIDATE_SMOB (3, cs, charset); | |
819 | ||
f49dbcad MG |
820 | cs_data = SCM_CHARSET_DATA (cs); |
821 | ||
822 | if (cs_data->len == 0) | |
823 | return knil; | |
824 | ||
825 | for (k = 0; k < cs_data->len; k++) | |
826 | for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) | |
63181a97 | 827 | { |
f49dbcad | 828 | knil = scm_call_2 (kons, SCM_MAKE_CHAR (n), knil); |
63181a97 MV |
829 | } |
830 | return knil; | |
831 | } | |
832 | #undef FUNC_NAME | |
833 | ||
834 | ||
835 | SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0, | |
836 | (SCM p, SCM f, SCM g, SCM seed, SCM base_cs), | |
837 | "This is a fundamental constructor for character sets.\n" | |
838 | "@itemize @bullet\n" | |
839 | "@item @var{g} is used to generate a series of ``seed'' values\n" | |
840 | "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n" | |
841 | "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n" | |
842 | "@item @var{p} tells us when to stop -- when it returns true\n" | |
843 | "when applied to one of the seed values.\n" | |
844 | "@item @var{f} maps each seed value to a character. These\n" | |
845 | "characters are added to the base character set @var{base_cs} to\n" | |
846 | "form the result; @var{base_cs} defaults to the empty set.\n" | |
847 | "@end itemize") | |
848 | #define FUNC_NAME s_scm_char_set_unfold | |
849 | { | |
850 | SCM result, tmp; | |
851 | ||
852 | SCM_VALIDATE_PROC (1, p); | |
853 | SCM_VALIDATE_PROC (2, f); | |
854 | SCM_VALIDATE_PROC (3, g); | |
855 | if (!SCM_UNBNDP (base_cs)) | |
856 | { | |
857 | SCM_VALIDATE_SMOB (5, base_cs, charset); | |
858 | result = scm_char_set_copy (base_cs); | |
859 | } | |
860 | else | |
861 | result = make_char_set (FUNC_NAME); | |
862 | ||
863 | tmp = scm_call_1 (p, seed); | |
864 | while (scm_is_false (tmp)) | |
865 | { | |
866 | SCM ch = scm_call_1 (f, seed); | |
867 | if (!SCM_CHARP (ch)) | |
868 | SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f)); | |
869 | SCM_CHARSET_SET (result, SCM_CHAR (ch)); | |
870 | ||
871 | seed = scm_call_1 (g, seed); | |
872 | tmp = scm_call_1 (p, seed); | |
873 | } | |
874 | return result; | |
875 | } | |
876 | #undef FUNC_NAME | |
877 | ||
878 | ||
879 | SCM_DEFINE (scm_char_set_unfold_x, "char-set-unfold!", 5, 0, 0, | |
880 | (SCM p, SCM f, SCM g, SCM seed, SCM base_cs), | |
881 | "This is a fundamental constructor for character sets.\n" | |
882 | "@itemize @bullet\n" | |
883 | "@item @var{g} is used to generate a series of ``seed'' values\n" | |
884 | "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n" | |
885 | "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n" | |
886 | "@item @var{p} tells us when to stop -- when it returns true\n" | |
887 | "when applied to one of the seed values.\n" | |
888 | "@item @var{f} maps each seed value to a character. These\n" | |
889 | "characters are added to the base character set @var{base_cs} to\n" | |
890 | "form the result; @var{base_cs} defaults to the empty set.\n" | |
891 | "@end itemize") | |
892 | #define FUNC_NAME s_scm_char_set_unfold_x | |
893 | { | |
894 | SCM tmp; | |
895 | ||
896 | SCM_VALIDATE_PROC (1, p); | |
897 | SCM_VALIDATE_PROC (2, f); | |
898 | SCM_VALIDATE_PROC (3, g); | |
899 | SCM_VALIDATE_SMOB (5, base_cs, charset); | |
900 | ||
901 | tmp = scm_call_1 (p, seed); | |
902 | while (scm_is_false (tmp)) | |
903 | { | |
904 | SCM ch = scm_call_1 (f, seed); | |
905 | if (!SCM_CHARP (ch)) | |
906 | SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f)); | |
907 | SCM_CHARSET_SET (base_cs, SCM_CHAR (ch)); | |
908 | ||
909 | seed = scm_call_1 (g, seed); | |
910 | tmp = scm_call_1 (p, seed); | |
911 | } | |
912 | return base_cs; | |
913 | } | |
914 | #undef FUNC_NAME | |
915 | ||
916 | ||
917 | SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0, | |
f49dbcad MG |
918 | (SCM proc, SCM cs), |
919 | "Apply @var{proc} to every character in the character set\n" | |
920 | "@var{cs}. The return value is not specified.") | |
63181a97 MV |
921 | #define FUNC_NAME s_scm_char_set_for_each |
922 | { | |
f49dbcad | 923 | scm_t_char_set *cs_data; |
63181a97 | 924 | int k; |
f49dbcad | 925 | scm_t_wchar n; |
63181a97 MV |
926 | |
927 | SCM_VALIDATE_PROC (1, proc); | |
928 | SCM_VALIDATE_SMOB (2, cs, charset); | |
929 | ||
f49dbcad MG |
930 | cs_data = SCM_CHARSET_DATA (cs); |
931 | ||
932 | if (cs_data->len == 0) | |
933 | return SCM_UNSPECIFIED; | |
934 | ||
935 | for (k = 0; k < cs_data->len; k++) | |
936 | for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) | |
937 | { | |
938 | scm_call_1 (proc, SCM_MAKE_CHAR (n)); | |
939 | } | |
940 | ||
63181a97 MV |
941 | return SCM_UNSPECIFIED; |
942 | } | |
943 | #undef FUNC_NAME | |
944 | ||
945 | ||
946 | SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0, | |
947 | (SCM proc, SCM cs), | |
948 | "Map the procedure @var{proc} over every character in @var{cs}.\n" | |
949 | "@var{proc} must be a character -> character procedure.") | |
950 | #define FUNC_NAME s_scm_char_set_map | |
951 | { | |
952 | SCM result; | |
953 | int k; | |
f49dbcad MG |
954 | scm_t_char_set *cs_data; |
955 | scm_t_wchar n; | |
63181a97 MV |
956 | |
957 | SCM_VALIDATE_PROC (1, proc); | |
958 | SCM_VALIDATE_SMOB (2, cs, charset); | |
959 | ||
960 | result = make_char_set (FUNC_NAME); | |
f49dbcad MG |
961 | cs_data = SCM_CHARSET_DATA (cs); |
962 | ||
963 | if (cs_data->len == 0) | |
964 | return result; | |
965 | ||
966 | for (k = 0; k < cs_data->len; k++) | |
967 | for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) | |
63181a97 | 968 | { |
f49dbcad MG |
969 | SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (n)); |
970 | if (!SCM_CHARP (ch)) | |
971 | SCM_MISC_ERROR ("procedure ~S returned non-char", | |
972 | scm_list_1 (proc)); | |
973 | SCM_CHARSET_SET (result, SCM_CHAR (ch)); | |
63181a97 MV |
974 | } |
975 | return result; | |
976 | } | |
977 | #undef FUNC_NAME | |
978 | ||
979 | ||
980 | SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0, | |
981 | (SCM cs), | |
982 | "Return a newly allocated character set containing all\n" | |
983 | "characters in @var{cs}.") | |
984 | #define FUNC_NAME s_scm_char_set_copy | |
985 | { | |
986 | SCM ret; | |
f49dbcad | 987 | scm_t_char_set *p1, *p2; |
63181a97 MV |
988 | |
989 | SCM_VALIDATE_SMOB (1, cs, charset); | |
990 | ret = make_char_set (FUNC_NAME); | |
f49dbcad MG |
991 | p1 = SCM_CHARSET_DATA (cs); |
992 | p2 = SCM_CHARSET_DATA (ret); | |
993 | p2->len = p1->len; | |
994 | ||
995 | if (p1->len == 0) | |
996 | p2->ranges = NULL; | |
997 | else | |
998 | { | |
999 | p2->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * p1->len, | |
1000 | "character-set"); | |
1001 | memcpy (p2->ranges, p1->ranges, sizeof (scm_t_char_range) * p1->len); | |
1002 | } | |
1003 | ||
63181a97 MV |
1004 | return ret; |
1005 | } | |
1006 | #undef FUNC_NAME | |
1007 | ||
1008 | ||
1009 | SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1, | |
1010 | (SCM rest), | |
1011 | "Return a character set containing all given characters.") | |
1012 | #define FUNC_NAME s_scm_char_set | |
1013 | { | |
1014 | SCM cs; | |
63181a97 MV |
1015 | int argnum = 1; |
1016 | ||
1017 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1018 | cs = make_char_set (FUNC_NAME); | |
d2e53ed6 | 1019 | while (!scm_is_null (rest)) |
63181a97 | 1020 | { |
f49dbcad | 1021 | scm_t_wchar c; |
63181a97 MV |
1022 | |
1023 | SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c); | |
1024 | argnum++; | |
1025 | rest = SCM_CDR (rest); | |
f49dbcad | 1026 | SCM_CHARSET_SET (cs, c); |
63181a97 MV |
1027 | } |
1028 | return cs; | |
1029 | } | |
1030 | #undef FUNC_NAME | |
1031 | ||
1032 | ||
1033 | SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0, | |
1034 | (SCM list, SCM base_cs), | |
1035 | "Convert the character list @var{list} to a character set. If\n" | |
1036 | "the character set @var{base_cs} is given, the character in this\n" | |
1037 | "set are also included in the result.") | |
1038 | #define FUNC_NAME s_scm_list_to_char_set | |
1039 | { | |
1040 | SCM cs; | |
63181a97 MV |
1041 | |
1042 | SCM_VALIDATE_LIST (1, list); | |
1043 | if (SCM_UNBNDP (base_cs)) | |
1044 | cs = make_char_set (FUNC_NAME); | |
1045 | else | |
1046 | { | |
1047 | SCM_VALIDATE_SMOB (2, base_cs, charset); | |
1048 | cs = scm_char_set_copy (base_cs); | |
1049 | } | |
d2e53ed6 | 1050 | while (!scm_is_null (list)) |
63181a97 MV |
1051 | { |
1052 | SCM chr = SCM_CAR (list); | |
f49dbcad | 1053 | scm_t_wchar c; |
63181a97 MV |
1054 | |
1055 | SCM_VALIDATE_CHAR_COPY (0, chr, c); | |
1056 | list = SCM_CDR (list); | |
1057 | ||
f49dbcad MG |
1058 | |
1059 | SCM_CHARSET_SET (cs, c); | |
63181a97 MV |
1060 | } |
1061 | return cs; | |
1062 | } | |
1063 | #undef FUNC_NAME | |
1064 | ||
1065 | ||
1066 | SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0, | |
f49dbcad MG |
1067 | (SCM list, SCM base_cs), |
1068 | "Convert the character list @var{list} to a character set. The\n" | |
1069 | "characters are added to @var{base_cs} and @var{base_cs} is\n" | |
1070 | "returned.") | |
63181a97 MV |
1071 | #define FUNC_NAME s_scm_list_to_char_set_x |
1072 | { | |
63181a97 MV |
1073 | SCM_VALIDATE_LIST (1, list); |
1074 | SCM_VALIDATE_SMOB (2, base_cs, charset); | |
d2e53ed6 | 1075 | while (!scm_is_null (list)) |
63181a97 MV |
1076 | { |
1077 | SCM chr = SCM_CAR (list); | |
f49dbcad | 1078 | scm_t_wchar c; |
63181a97 MV |
1079 | |
1080 | SCM_VALIDATE_CHAR_COPY (0, chr, c); | |
1081 | list = SCM_CDR (list); | |
1082 | ||
f49dbcad | 1083 | SCM_CHARSET_SET (base_cs, c); |
63181a97 MV |
1084 | } |
1085 | return base_cs; | |
1086 | } | |
1087 | #undef FUNC_NAME | |
1088 | ||
1089 | ||
1090 | SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0, | |
1091 | (SCM str, SCM base_cs), | |
1092 | "Convert the string @var{str} to a character set. If the\n" | |
1093 | "character set @var{base_cs} is given, the characters in this\n" | |
1094 | "set are also included in the result.") | |
1095 | #define FUNC_NAME s_scm_string_to_char_set | |
1096 | { | |
1097 | SCM cs; | |
63181a97 MV |
1098 | size_t k = 0, len; |
1099 | ||
1100 | SCM_VALIDATE_STRING (1, str); | |
1101 | if (SCM_UNBNDP (base_cs)) | |
1102 | cs = make_char_set (FUNC_NAME); | |
1103 | else | |
1104 | { | |
1105 | SCM_VALIDATE_SMOB (2, base_cs, charset); | |
1106 | cs = scm_char_set_copy (base_cs); | |
1107 | } | |
63181a97 MV |
1108 | len = scm_i_string_length (str); |
1109 | while (k < len) | |
1110 | { | |
f49dbcad MG |
1111 | scm_t_wchar c = scm_i_string_ref (str, k++); |
1112 | SCM_CHARSET_SET (cs, c); | |
63181a97 MV |
1113 | } |
1114 | scm_remember_upto_here_1 (str); | |
1115 | return cs; | |
1116 | } | |
1117 | #undef FUNC_NAME | |
1118 | ||
1119 | ||
1120 | SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0, | |
f49dbcad MG |
1121 | (SCM str, SCM base_cs), |
1122 | "Convert the string @var{str} to a character set. The\n" | |
1123 | "characters from the string are added to @var{base_cs}, and\n" | |
1124 | "@var{base_cs} is returned.") | |
63181a97 MV |
1125 | #define FUNC_NAME s_scm_string_to_char_set_x |
1126 | { | |
63181a97 MV |
1127 | size_t k = 0, len; |
1128 | ||
1129 | SCM_VALIDATE_STRING (1, str); | |
1130 | SCM_VALIDATE_SMOB (2, base_cs, charset); | |
63181a97 MV |
1131 | len = scm_i_string_length (str); |
1132 | while (k < len) | |
1133 | { | |
f49dbcad MG |
1134 | scm_t_wchar c = scm_i_string_ref (str, k++); |
1135 | SCM_CHARSET_SET (base_cs, c); | |
63181a97 MV |
1136 | } |
1137 | scm_remember_upto_here_1 (str); | |
1138 | return base_cs; | |
1139 | } | |
1140 | #undef FUNC_NAME | |
1141 | ||
1142 | ||
1143 | SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0, | |
1144 | (SCM pred, SCM cs, SCM base_cs), | |
1145 | "Return a character set containing every character from @var{cs}\n" | |
1146 | "so that it satisfies @var{pred}. If provided, the characters\n" | |
1147 | "from @var{base_cs} are added to the result.") | |
1148 | #define FUNC_NAME s_scm_char_set_filter | |
1149 | { | |
1150 | SCM ret; | |
1151 | int k; | |
f49dbcad MG |
1152 | scm_t_wchar n; |
1153 | scm_t_char_set *p; | |
63181a97 MV |
1154 | |
1155 | SCM_VALIDATE_PROC (1, pred); | |
1156 | SCM_VALIDATE_SMOB (2, cs, charset); | |
1157 | if (!SCM_UNBNDP (base_cs)) | |
1158 | { | |
1159 | SCM_VALIDATE_SMOB (3, base_cs, charset); | |
1160 | ret = scm_char_set_copy (base_cs); | |
1161 | } | |
1162 | else | |
1163 | ret = make_char_set (FUNC_NAME); | |
63181a97 | 1164 | |
f49dbcad MG |
1165 | p = SCM_CHARSET_DATA (cs); |
1166 | ||
1167 | if (p->len == 0) | |
1168 | return ret; | |
1169 | ||
1170 | for (k = 0; k < p->len; k++) | |
1171 | for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++) | |
1172 | { | |
1173 | SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); | |
1174 | ||
1175 | if (scm_is_true (res)) | |
1176 | SCM_CHARSET_SET (ret, n); | |
1177 | } | |
63181a97 MV |
1178 | return ret; |
1179 | } | |
1180 | #undef FUNC_NAME | |
1181 | ||
1182 | ||
1183 | SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0, | |
1184 | (SCM pred, SCM cs, SCM base_cs), | |
1185 | "Return a character set containing every character from @var{cs}\n" | |
1186 | "so that it satisfies @var{pred}. The characters are added to\n" | |
1187 | "@var{base_cs} and @var{base_cs} is returned.") | |
1188 | #define FUNC_NAME s_scm_char_set_filter_x | |
1189 | { | |
1190 | int k; | |
f49dbcad MG |
1191 | scm_t_wchar n; |
1192 | scm_t_char_set *p; | |
63181a97 MV |
1193 | |
1194 | SCM_VALIDATE_PROC (1, pred); | |
1195 | SCM_VALIDATE_SMOB (2, cs, charset); | |
1196 | SCM_VALIDATE_SMOB (3, base_cs, charset); | |
f49dbcad MG |
1197 | p = SCM_CHARSET_DATA (cs); |
1198 | if (p->len == 0) | |
1199 | return base_cs; | |
63181a97 | 1200 | |
f49dbcad MG |
1201 | for (k = 0; k < p->len; k++) |
1202 | for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++) | |
1203 | { | |
1204 | SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k)); | |
1205 | ||
1206 | if (scm_is_true (res)) | |
1207 | SCM_CHARSET_SET (base_cs, n); | |
1208 | } | |
63181a97 MV |
1209 | return base_cs; |
1210 | } | |
1211 | #undef FUNC_NAME | |
1212 | ||
1213 | ||
1214 | SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0, | |
1215 | (SCM lower, SCM upper, SCM error, SCM base_cs), | |
1216 | "Return a character set containing all characters whose\n" | |
1217 | "character codes lie in the half-open range\n" | |
1218 | "[@var{lower},@var{upper}).\n" | |
1219 | "\n" | |
1220 | "If @var{error} is a true value, an error is signalled if the\n" | |
1221 | "specified range contains characters which are not contained in\n" | |
1222 | "the implemented character range. If @var{error} is @code{#f},\n" | |
1223 | "these characters are silently left out of the resultung\n" | |
1224 | "character set.\n" | |
1225 | "\n" | |
1226 | "The characters in @var{base_cs} are added to the result, if\n" | |
1227 | "given.") | |
1228 | #define FUNC_NAME s_scm_ucs_range_to_char_set | |
1229 | { | |
1230 | SCM cs; | |
1231 | size_t clower, cupper; | |
63181a97 MV |
1232 | |
1233 | clower = scm_to_size_t (lower); | |
1234 | cupper = scm_to_size_t (upper); | |
1235 | SCM_ASSERT_RANGE (2, upper, cupper >= clower); | |
1236 | if (!SCM_UNBNDP (error)) | |
1237 | { | |
1238 | if (scm_is_true (error)) | |
f49dbcad MG |
1239 | { |
1240 | SCM_ASSERT_RANGE (1, lower, SCM_IS_UNICODE_CHAR (clower)); | |
1241 | SCM_ASSERT_RANGE (2, upper, SCM_IS_UNICODE_CHAR (cupper)); | |
1242 | } | |
63181a97 | 1243 | } |
f49dbcad MG |
1244 | if (clower > 0x10FFFF) |
1245 | clower = 0x10FFFF; | |
1246 | if (cupper > 0x10FFFF) | |
1247 | cupper = 0x10FFFF; | |
63181a97 MV |
1248 | if (SCM_UNBNDP (base_cs)) |
1249 | cs = make_char_set (FUNC_NAME); | |
1250 | else | |
1251 | { | |
1252 | SCM_VALIDATE_SMOB (4, base_cs, charset); | |
1253 | cs = scm_char_set_copy (base_cs); | |
1254 | } | |
f49dbcad MG |
1255 | /* It not be difficult to write a more optimized version of the |
1256 | following. */ | |
63181a97 MV |
1257 | while (clower < cupper) |
1258 | { | |
f49dbcad | 1259 | SCM_CHARSET_SET (cs, clower); |
63181a97 MV |
1260 | clower++; |
1261 | } | |
1262 | return cs; | |
1263 | } | |
1264 | #undef FUNC_NAME | |
1265 | ||
1266 | ||
1267 | SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0, | |
1268 | (SCM lower, SCM upper, SCM error, SCM base_cs), | |
1269 | "Return a character set containing all characters whose\n" | |
1270 | "character codes lie in the half-open range\n" | |
1271 | "[@var{lower},@var{upper}).\n" | |
1272 | "\n" | |
1273 | "If @var{error} is a true value, an error is signalled if the\n" | |
1274 | "specified range contains characters which are not contained in\n" | |
1275 | "the implemented character range. If @var{error} is @code{#f},\n" | |
1276 | "these characters are silently left out of the resultung\n" | |
1277 | "character set.\n" | |
1278 | "\n" | |
1279 | "The characters are added to @var{base_cs} and @var{base_cs} is\n" | |
1280 | "returned.") | |
1281 | #define FUNC_NAME s_scm_ucs_range_to_char_set_x | |
1282 | { | |
1283 | size_t clower, cupper; | |
63181a97 MV |
1284 | |
1285 | clower = scm_to_size_t (lower); | |
1286 | cupper = scm_to_size_t (upper); | |
1287 | SCM_ASSERT_RANGE (2, upper, cupper >= clower); | |
1288 | if (scm_is_true (error)) | |
1289 | { | |
f49dbcad MG |
1290 | SCM_ASSERT_RANGE (1, lower, SCM_IS_UNICODE_CHAR (clower)); |
1291 | SCM_ASSERT_RANGE (2, upper, SCM_IS_UNICODE_CHAR (cupper)); | |
63181a97 | 1292 | } |
f49dbcad MG |
1293 | if (clower > SCM_CODEPOINT_MAX) |
1294 | clower = SCM_CODEPOINT_MAX; | |
1295 | if (cupper > SCM_CODEPOINT_MAX) | |
1296 | cupper = SCM_CODEPOINT_MAX; | |
1297 | ||
63181a97 MV |
1298 | while (clower < cupper) |
1299 | { | |
f49dbcad MG |
1300 | if (SCM_IS_UNICODE_CHAR (clower)) |
1301 | SCM_CHARSET_SET (base_cs, clower); | |
63181a97 MV |
1302 | clower++; |
1303 | } | |
1304 | return base_cs; | |
1305 | } | |
1306 | #undef FUNC_NAME | |
1307 | ||
1308 | SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0, | |
1309 | (SCM x), | |
1310 | "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.") | |
1311 | #define FUNC_NAME s_scm_to_char_set | |
1312 | { | |
1313 | if (scm_is_string (x)) | |
1314 | return scm_string_to_char_set (x, SCM_UNDEFINED); | |
1315 | else if (SCM_CHARP (x)) | |
1316 | return scm_char_set (scm_list_1 (x)); | |
1317 | else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x)) | |
1318 | return x; | |
1319 | else | |
1320 | scm_wrong_type_arg (NULL, 0, x); | |
1321 | } | |
1322 | #undef FUNC_NAME | |
1323 | ||
1324 | SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0, | |
1325 | (SCM cs), | |
1326 | "Return the number of elements in character set @var{cs}.") | |
1327 | #define FUNC_NAME s_scm_char_set_size | |
1328 | { | |
1329 | int k, count = 0; | |
f49dbcad | 1330 | scm_t_char_set *cs_data; |
63181a97 MV |
1331 | |
1332 | SCM_VALIDATE_SMOB (1, cs, charset); | |
f49dbcad MG |
1333 | cs_data = SCM_CHARSET_DATA (cs); |
1334 | ||
1335 | if (cs_data->len == 0) | |
1336 | return scm_from_int (0); | |
1337 | ||
1338 | for (k = 0; k < cs_data->len; k++) | |
1339 | count += cs_data->ranges[k].hi - cs_data->ranges[k].lo + 1; | |
1340 | ||
1341 | return scm_from_int (count); | |
63181a97 MV |
1342 | } |
1343 | #undef FUNC_NAME | |
1344 | ||
1345 | ||
1346 | SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0, | |
1347 | (SCM pred, SCM cs), | |
1348 | "Return the number of the elements int the character set\n" | |
1349 | "@var{cs} which satisfy the predicate @var{pred}.") | |
1350 | #define FUNC_NAME s_scm_char_set_count | |
1351 | { | |
1352 | int k, count = 0; | |
f49dbcad MG |
1353 | scm_t_wchar n; |
1354 | scm_t_char_set *cs_data; | |
63181a97 MV |
1355 | |
1356 | SCM_VALIDATE_PROC (1, pred); | |
1357 | SCM_VALIDATE_SMOB (2, cs, charset); | |
f49dbcad MG |
1358 | cs_data = SCM_CHARSET_DATA (cs); |
1359 | if (cs_data->len == 0) | |
1360 | return scm_from_int (0); | |
63181a97 | 1361 | |
f49dbcad MG |
1362 | for (k = 0; k < cs_data->len; k++) |
1363 | for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) | |
63181a97 | 1364 | { |
f49dbcad MG |
1365 | SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); |
1366 | if (scm_is_true (res)) | |
1367 | count++; | |
63181a97 MV |
1368 | } |
1369 | return SCM_I_MAKINUM (count); | |
1370 | } | |
1371 | #undef FUNC_NAME | |
1372 | ||
1373 | ||
1374 | SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0, | |
1375 | (SCM cs), | |
1376 | "Return a list containing the elements of the character set\n" | |
1377 | "@var{cs}.") | |
1378 | #define FUNC_NAME s_scm_char_set_to_list | |
1379 | { | |
1380 | int k; | |
f49dbcad | 1381 | scm_t_wchar n; |
63181a97 | 1382 | SCM result = SCM_EOL; |
f49dbcad | 1383 | scm_t_char_set *p; |
63181a97 MV |
1384 | |
1385 | SCM_VALIDATE_SMOB (1, cs, charset); | |
f49dbcad MG |
1386 | p = SCM_CHARSET_DATA (cs); |
1387 | if (p->len == 0) | |
1388 | return SCM_EOL; | |
1389 | ||
1390 | for (k = p->len - 1; k >= 0; k--) | |
1391 | for (n = p->ranges[k].hi; n >= p->ranges[k].lo; n--) | |
1392 | result = scm_cons (SCM_MAKE_CHAR (n), result); | |
63181a97 MV |
1393 | return result; |
1394 | } | |
1395 | #undef FUNC_NAME | |
1396 | ||
1397 | ||
1398 | SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0, | |
1399 | (SCM cs), | |
1400 | "Return a string containing the elements of the character set\n" | |
1401 | "@var{cs}. The order in which the characters are placed in the\n" | |
1402 | "string is not defined.") | |
1403 | #define FUNC_NAME s_scm_char_set_to_string | |
1404 | { | |
1405 | int k; | |
1406 | int count = 0; | |
1407 | int idx = 0; | |
f49dbcad | 1408 | int wide = 0; |
63181a97 | 1409 | SCM result; |
f49dbcad MG |
1410 | scm_t_wchar n; |
1411 | scm_t_char_set *cs_data; | |
1412 | char *buf; | |
1413 | scm_t_wchar *wbuf; | |
63181a97 MV |
1414 | |
1415 | SCM_VALIDATE_SMOB (1, cs, charset); | |
f49dbcad MG |
1416 | cs_data = SCM_CHARSET_DATA (cs); |
1417 | if (cs_data->len == 0) | |
1418 | return scm_nullstr; | |
1419 | ||
1420 | if (cs_data->ranges[cs_data->len - 1].hi > 255) | |
1421 | wide = 1; | |
1422 | ||
1423 | count = scm_to_int (scm_char_set_size (cs)); | |
1424 | if (wide) | |
1425 | result = scm_i_make_wide_string (count, &wbuf); | |
1426 | else | |
1427 | result = scm_i_make_string (count, &buf); | |
1428 | ||
1429 | for (k = 0; k < cs_data->len; k++) | |
1430 | for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) | |
1431 | { | |
1432 | if (wide) | |
1433 | wbuf[idx++] = n; | |
1434 | else | |
1435 | buf[idx++] = n; | |
1436 | } | |
63181a97 MV |
1437 | return result; |
1438 | } | |
1439 | #undef FUNC_NAME | |
1440 | ||
1441 | ||
1442 | SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0, | |
1443 | (SCM cs, SCM ch), | |
1444 | "Return @code{#t} iff the character @var{ch} is contained in the\n" | |
1445 | "character set @var{cs}.") | |
1446 | #define FUNC_NAME s_scm_char_set_contains_p | |
1447 | { | |
1448 | SCM_VALIDATE_SMOB (1, cs, charset); | |
1449 | SCM_VALIDATE_CHAR (2, ch); | |
1450 | return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch))); | |
1451 | } | |
1452 | #undef FUNC_NAME | |
1453 | ||
1454 | ||
1455 | SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0, | |
1456 | (SCM pred, SCM cs), | |
1457 | "Return a true value if every character in the character set\n" | |
1458 | "@var{cs} satisfies the predicate @var{pred}.") | |
1459 | #define FUNC_NAME s_scm_char_set_every | |
1460 | { | |
1461 | int k; | |
f49dbcad | 1462 | scm_t_wchar n; |
63181a97 | 1463 | SCM res = SCM_BOOL_T; |
f49dbcad | 1464 | scm_t_char_set *cs_data; |
63181a97 MV |
1465 | |
1466 | SCM_VALIDATE_PROC (1, pred); | |
1467 | SCM_VALIDATE_SMOB (2, cs, charset); | |
1468 | ||
f49dbcad MG |
1469 | cs_data = SCM_CHARSET_DATA (cs); |
1470 | if (cs_data->len == 0) | |
1471 | return SCM_BOOL_T; | |
1472 | ||
1473 | for (k = 0; k < cs_data->len; k++) | |
1474 | for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) | |
63181a97 | 1475 | { |
f49dbcad MG |
1476 | res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); |
1477 | if (scm_is_false (res)) | |
1478 | return res; | |
63181a97 | 1479 | } |
f49dbcad | 1480 | return SCM_BOOL_T; |
63181a97 MV |
1481 | } |
1482 | #undef FUNC_NAME | |
1483 | ||
1484 | ||
1485 | SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0, | |
1486 | (SCM pred, SCM cs), | |
1487 | "Return a true value if any character in the character set\n" | |
1488 | "@var{cs} satisfies the predicate @var{pred}.") | |
1489 | #define FUNC_NAME s_scm_char_set_any | |
1490 | { | |
1491 | int k; | |
f49dbcad MG |
1492 | scm_t_wchar n; |
1493 | scm_t_char_set *cs_data; | |
63181a97 MV |
1494 | |
1495 | SCM_VALIDATE_PROC (1, pred); | |
1496 | SCM_VALIDATE_SMOB (2, cs, charset); | |
1497 | ||
f49dbcad MG |
1498 | cs_data = (scm_t_char_set *) cs; |
1499 | ||
1500 | for (k = 0; k < cs_data->len; k++) | |
1501 | for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) | |
63181a97 | 1502 | { |
f49dbcad MG |
1503 | SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); |
1504 | if (scm_is_true (res)) | |
1505 | return res; | |
63181a97 MV |
1506 | } |
1507 | return SCM_BOOL_F; | |
1508 | } | |
1509 | #undef FUNC_NAME | |
1510 | ||
1511 | ||
1512 | SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1, | |
f49dbcad MG |
1513 | (SCM cs, SCM rest), |
1514 | "Add all character arguments to the first argument, which must\n" | |
1515 | "be a character set.") | |
63181a97 MV |
1516 | #define FUNC_NAME s_scm_char_set_adjoin |
1517 | { | |
63181a97 MV |
1518 | SCM_VALIDATE_SMOB (1, cs, charset); |
1519 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1520 | cs = scm_char_set_copy (cs); | |
1521 | ||
d2e53ed6 | 1522 | while (!scm_is_null (rest)) |
63181a97 MV |
1523 | { |
1524 | SCM chr = SCM_CAR (rest); | |
f49dbcad | 1525 | scm_t_wchar c; |
63181a97 MV |
1526 | |
1527 | SCM_VALIDATE_CHAR_COPY (1, chr, c); | |
1528 | rest = SCM_CDR (rest); | |
1529 | ||
f49dbcad | 1530 | SCM_CHARSET_SET (cs, c); |
63181a97 MV |
1531 | } |
1532 | return cs; | |
1533 | } | |
1534 | #undef FUNC_NAME | |
1535 | ||
1536 | ||
1537 | SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1, | |
f49dbcad MG |
1538 | (SCM cs, SCM rest), |
1539 | "Delete all character arguments from the first argument, which\n" | |
1540 | "must be a character set.") | |
63181a97 MV |
1541 | #define FUNC_NAME s_scm_char_set_delete |
1542 | { | |
63181a97 MV |
1543 | SCM_VALIDATE_SMOB (1, cs, charset); |
1544 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1545 | cs = scm_char_set_copy (cs); | |
1546 | ||
d2e53ed6 | 1547 | while (!scm_is_null (rest)) |
63181a97 MV |
1548 | { |
1549 | SCM chr = SCM_CAR (rest); | |
f49dbcad | 1550 | scm_t_wchar c; |
63181a97 MV |
1551 | |
1552 | SCM_VALIDATE_CHAR_COPY (1, chr, c); | |
1553 | rest = SCM_CDR (rest); | |
1554 | ||
f49dbcad | 1555 | SCM_CHARSET_UNSET (cs, c); |
63181a97 MV |
1556 | } |
1557 | return cs; | |
1558 | } | |
1559 | #undef FUNC_NAME | |
1560 | ||
1561 | ||
1562 | SCM_DEFINE (scm_char_set_adjoin_x, "char-set-adjoin!", 1, 0, 1, | |
f49dbcad MG |
1563 | (SCM cs, SCM rest), |
1564 | "Add all character arguments to the first argument, which must\n" | |
1565 | "be a character set.") | |
63181a97 MV |
1566 | #define FUNC_NAME s_scm_char_set_adjoin_x |
1567 | { | |
63181a97 MV |
1568 | SCM_VALIDATE_SMOB (1, cs, charset); |
1569 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1570 | ||
d2e53ed6 | 1571 | while (!scm_is_null (rest)) |
63181a97 MV |
1572 | { |
1573 | SCM chr = SCM_CAR (rest); | |
f49dbcad | 1574 | scm_t_wchar c; |
63181a97 MV |
1575 | |
1576 | SCM_VALIDATE_CHAR_COPY (1, chr, c); | |
1577 | rest = SCM_CDR (rest); | |
1578 | ||
f49dbcad | 1579 | SCM_CHARSET_SET (cs, c); |
63181a97 MV |
1580 | } |
1581 | return cs; | |
1582 | } | |
1583 | #undef FUNC_NAME | |
1584 | ||
1585 | ||
1586 | SCM_DEFINE (scm_char_set_delete_x, "char-set-delete!", 1, 0, 1, | |
f49dbcad MG |
1587 | (SCM cs, SCM rest), |
1588 | "Delete all character arguments from the first argument, which\n" | |
1589 | "must be a character set.") | |
63181a97 MV |
1590 | #define FUNC_NAME s_scm_char_set_delete_x |
1591 | { | |
63181a97 MV |
1592 | SCM_VALIDATE_SMOB (1, cs, charset); |
1593 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1594 | ||
d2e53ed6 | 1595 | while (!scm_is_null (rest)) |
63181a97 MV |
1596 | { |
1597 | SCM chr = SCM_CAR (rest); | |
f49dbcad | 1598 | scm_t_wchar c; |
63181a97 MV |
1599 | |
1600 | SCM_VALIDATE_CHAR_COPY (1, chr, c); | |
1601 | rest = SCM_CDR (rest); | |
1602 | ||
f49dbcad | 1603 | SCM_CHARSET_UNSET (cs, c); |
63181a97 MV |
1604 | } |
1605 | return cs; | |
1606 | } | |
1607 | #undef FUNC_NAME | |
1608 | ||
1609 | ||
1610 | SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0, | |
f49dbcad | 1611 | (SCM cs), "Return the complement of the character set @var{cs}.") |
63181a97 MV |
1612 | #define FUNC_NAME s_scm_char_set_complement |
1613 | { | |
63181a97 | 1614 | SCM res; |
f49dbcad | 1615 | scm_t_char_set *p, *q; |
63181a97 MV |
1616 | |
1617 | SCM_VALIDATE_SMOB (1, cs, charset); | |
1618 | ||
1619 | res = make_char_set (FUNC_NAME); | |
f49dbcad MG |
1620 | p = SCM_CHARSET_DATA (res); |
1621 | q = SCM_CHARSET_DATA (cs); | |
1622 | ||
1623 | charsets_complement (p, q); | |
63181a97 MV |
1624 | return res; |
1625 | } | |
1626 | #undef FUNC_NAME | |
1627 | ||
1628 | ||
1629 | SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1, | |
1630 | (SCM rest), | |
1631 | "Return the union of all argument character sets.") | |
1632 | #define FUNC_NAME s_scm_char_set_union | |
1633 | { | |
1634 | int c = 1; | |
1635 | SCM res; | |
f49dbcad | 1636 | scm_t_char_set *p; |
63181a97 MV |
1637 | |
1638 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1639 | ||
1640 | res = make_char_set (FUNC_NAME); | |
f49dbcad | 1641 | p = SCM_CHARSET_DATA (res); |
d2e53ed6 | 1642 | while (!scm_is_null (rest)) |
63181a97 | 1643 | { |
63181a97 MV |
1644 | SCM cs = SCM_CAR (rest); |
1645 | SCM_VALIDATE_SMOB (c, cs, charset); | |
1646 | c++; | |
1647 | rest = SCM_CDR (rest); | |
1648 | ||
f49dbcad MG |
1649 | |
1650 | charsets_union (p, (scm_t_char_set *) SCM_SMOB_DATA (cs)); | |
63181a97 MV |
1651 | } |
1652 | return res; | |
1653 | } | |
1654 | #undef FUNC_NAME | |
1655 | ||
1656 | ||
1657 | SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1, | |
1658 | (SCM rest), | |
1659 | "Return the intersection of all argument character sets.") | |
1660 | #define FUNC_NAME s_scm_char_set_intersection | |
1661 | { | |
1662 | SCM res; | |
1663 | ||
1664 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1665 | ||
d2e53ed6 | 1666 | if (scm_is_null (rest)) |
63181a97 MV |
1667 | res = make_char_set (FUNC_NAME); |
1668 | else | |
1669 | { | |
f49dbcad | 1670 | scm_t_char_set *p; |
63181a97 MV |
1671 | int argnum = 2; |
1672 | ||
1673 | res = scm_char_set_copy (SCM_CAR (rest)); | |
f49dbcad | 1674 | p = SCM_CHARSET_DATA (res); |
63181a97 MV |
1675 | rest = SCM_CDR (rest); |
1676 | ||
d2e53ed6 | 1677 | while (scm_is_pair (rest)) |
f49dbcad MG |
1678 | { |
1679 | SCM cs = SCM_CAR (rest); | |
1680 | scm_t_char_set *cs_data; | |
1681 | ||
1682 | SCM_VALIDATE_SMOB (argnum, cs, charset); | |
1683 | argnum++; | |
1684 | cs_data = SCM_CHARSET_DATA (cs); | |
1685 | rest = SCM_CDR (rest); | |
1686 | charsets_intersection (p, cs_data); | |
1687 | } | |
63181a97 MV |
1688 | } |
1689 | ||
1690 | return res; | |
1691 | } | |
1692 | #undef FUNC_NAME | |
1693 | ||
1694 | ||
1695 | SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1, | |
1696 | (SCM cs1, SCM rest), | |
1697 | "Return the difference of all argument character sets.") | |
1698 | #define FUNC_NAME s_scm_char_set_difference | |
1699 | { | |
1700 | int c = 2; | |
f49dbcad MG |
1701 | SCM res, compl; |
1702 | scm_t_char_set *p, *q; | |
63181a97 MV |
1703 | |
1704 | SCM_VALIDATE_SMOB (1, cs1, charset); | |
1705 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1706 | ||
1707 | res = scm_char_set_copy (cs1); | |
f49dbcad MG |
1708 | p = SCM_CHARSET_DATA (res); |
1709 | compl = make_char_set (FUNC_NAME); | |
1710 | q = SCM_CHARSET_DATA (compl); | |
d2e53ed6 | 1711 | while (!scm_is_null (rest)) |
63181a97 | 1712 | { |
63181a97 MV |
1713 | SCM cs = SCM_CAR (rest); |
1714 | SCM_VALIDATE_SMOB (c, cs, charset); | |
1715 | c++; | |
1716 | rest = SCM_CDR (rest); | |
1717 | ||
f49dbcad MG |
1718 | charsets_complement (q, SCM_CHARSET_DATA (cs)); |
1719 | charsets_intersection (p, q); | |
63181a97 MV |
1720 | } |
1721 | return res; | |
1722 | } | |
1723 | #undef FUNC_NAME | |
1724 | ||
1725 | ||
1726 | SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1, | |
1727 | (SCM rest), | |
1728 | "Return the exclusive-or of all argument character sets.") | |
1729 | #define FUNC_NAME s_scm_char_set_xor | |
1730 | { | |
1731 | SCM res; | |
1732 | ||
1733 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1734 | ||
d2e53ed6 | 1735 | if (scm_is_null (rest)) |
63181a97 MV |
1736 | res = make_char_set (FUNC_NAME); |
1737 | else | |
1738 | { | |
1739 | int argnum = 2; | |
f49dbcad | 1740 | scm_t_char_set *p; |
63181a97 MV |
1741 | |
1742 | res = scm_char_set_copy (SCM_CAR (rest)); | |
f49dbcad | 1743 | p = SCM_CHARSET_DATA (res); |
63181a97 MV |
1744 | rest = SCM_CDR (rest); |
1745 | ||
d2e53ed6 | 1746 | while (scm_is_pair (rest)) |
f49dbcad MG |
1747 | { |
1748 | SCM cs = SCM_CAR (rest); | |
1749 | scm_t_char_set *cs_data; | |
1750 | ||
1751 | SCM_VALIDATE_SMOB (argnum, cs, charset); | |
1752 | argnum++; | |
1753 | cs_data = SCM_CHARSET_DATA (cs); | |
1754 | rest = SCM_CDR (rest); | |
1755 | ||
1756 | charsets_xor (p, cs_data); | |
1757 | } | |
63181a97 MV |
1758 | } |
1759 | return res; | |
1760 | } | |
1761 | #undef FUNC_NAME | |
1762 | ||
1763 | ||
1764 | SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1, 0, 1, | |
1765 | (SCM cs1, SCM rest), | |
1766 | "Return the difference and the intersection of all argument\n" | |
1767 | "character sets.") | |
1768 | #define FUNC_NAME s_scm_char_set_diff_plus_intersection | |
1769 | { | |
1770 | int c = 2; | |
1771 | SCM res1, res2; | |
f49dbcad | 1772 | scm_t_char_set *p, *q; |
63181a97 MV |
1773 | |
1774 | SCM_VALIDATE_SMOB (1, cs1, charset); | |
1775 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1776 | ||
1777 | res1 = scm_char_set_copy (cs1); | |
1778 | res2 = make_char_set (FUNC_NAME); | |
f49dbcad MG |
1779 | p = SCM_CHARSET_DATA (res1); |
1780 | q = SCM_CHARSET_DATA (res2); | |
d2e53ed6 | 1781 | while (!scm_is_null (rest)) |
63181a97 | 1782 | { |
63181a97 | 1783 | SCM cs = SCM_CAR (rest); |
f49dbcad | 1784 | scm_t_char_set *r; |
63181a97 MV |
1785 | |
1786 | SCM_VALIDATE_SMOB (c, cs, charset); | |
1787 | c++; | |
f49dbcad | 1788 | r = SCM_CHARSET_DATA (cs); |
63181a97 | 1789 | |
f49dbcad MG |
1790 | charsets_union (q, r); |
1791 | charsets_intersection (p, r); | |
63181a97 MV |
1792 | rest = SCM_CDR (rest); |
1793 | } | |
1794 | return scm_values (scm_list_2 (res1, res2)); | |
1795 | } | |
1796 | #undef FUNC_NAME | |
1797 | ||
1798 | ||
1799 | SCM_DEFINE (scm_char_set_complement_x, "char-set-complement!", 1, 0, 0, | |
f49dbcad | 1800 | (SCM cs), "Return the complement of the character set @var{cs}.") |
63181a97 MV |
1801 | #define FUNC_NAME s_scm_char_set_complement_x |
1802 | { | |
63181a97 | 1803 | SCM_VALIDATE_SMOB (1, cs, charset); |
f49dbcad | 1804 | cs = scm_char_set_complement (cs); |
63181a97 MV |
1805 | return cs; |
1806 | } | |
1807 | #undef FUNC_NAME | |
1808 | ||
1809 | ||
1810 | SCM_DEFINE (scm_char_set_union_x, "char-set-union!", 1, 0, 1, | |
f49dbcad MG |
1811 | (SCM cs1, SCM rest), |
1812 | "Return the union of all argument character sets.") | |
63181a97 MV |
1813 | #define FUNC_NAME s_scm_char_set_union_x |
1814 | { | |
63181a97 MV |
1815 | SCM_VALIDATE_SMOB (1, cs1, charset); |
1816 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1817 | ||
f49dbcad | 1818 | cs1 = scm_char_set_union (scm_cons (cs1, rest)); |
63181a97 MV |
1819 | return cs1; |
1820 | } | |
1821 | #undef FUNC_NAME | |
1822 | ||
1823 | ||
1824 | SCM_DEFINE (scm_char_set_intersection_x, "char-set-intersection!", 1, 0, 1, | |
f49dbcad MG |
1825 | (SCM cs1, SCM rest), |
1826 | "Return the intersection of all argument character sets.") | |
63181a97 MV |
1827 | #define FUNC_NAME s_scm_char_set_intersection_x |
1828 | { | |
63181a97 MV |
1829 | SCM_VALIDATE_SMOB (1, cs1, charset); |
1830 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1831 | ||
f49dbcad | 1832 | cs1 = scm_char_set_intersection (scm_cons (cs1, rest)); |
63181a97 MV |
1833 | return cs1; |
1834 | } | |
1835 | #undef FUNC_NAME | |
1836 | ||
1837 | ||
1838 | SCM_DEFINE (scm_char_set_difference_x, "char-set-difference!", 1, 0, 1, | |
f49dbcad MG |
1839 | (SCM cs1, SCM rest), |
1840 | "Return the difference of all argument character sets.") | |
63181a97 MV |
1841 | #define FUNC_NAME s_scm_char_set_difference_x |
1842 | { | |
63181a97 MV |
1843 | SCM_VALIDATE_SMOB (1, cs1, charset); |
1844 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1845 | ||
f49dbcad | 1846 | cs1 = scm_char_set_difference (cs1, rest); |
63181a97 MV |
1847 | return cs1; |
1848 | } | |
1849 | #undef FUNC_NAME | |
1850 | ||
1851 | ||
1852 | SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1, | |
1853 | (SCM cs1, SCM rest), | |
1854 | "Return the exclusive-or of all argument character sets.") | |
1855 | #define FUNC_NAME s_scm_char_set_xor_x | |
1856 | { | |
1857 | /* a side-effecting variant should presumably give consistent results: | |
1858 | (define a (char-set #\a)) | |
1859 | (char-set-xor a a a) -> char set #\a | |
1860 | (char-set-xor! a a a) -> char set #\a | |
f49dbcad | 1861 | */ |
7165abeb MG |
1862 | cs1 = scm_char_set_xor (scm_cons (cs1, rest)); |
1863 | return cs1; | |
63181a97 MV |
1864 | } |
1865 | #undef FUNC_NAME | |
1866 | ||
1867 | ||
f49dbcad MG |
1868 | SCM_DEFINE (scm_char_set_diff_plus_intersection_x, |
1869 | "char-set-diff+intersection!", 2, 0, 1, (SCM cs1, SCM cs2, | |
1870 | SCM rest), | |
1871 | "Return the difference and the intersection of all argument\n" | |
1872 | "character sets.") | |
63181a97 MV |
1873 | #define FUNC_NAME s_scm_char_set_diff_plus_intersection_x |
1874 | { | |
f49dbcad | 1875 | SCM diff, intersect; |
63181a97 | 1876 | |
f49dbcad MG |
1877 | diff = scm_char_set_difference (cs1, scm_cons (cs2, rest)); |
1878 | intersect = | |
1879 | scm_char_set_intersection (scm_cons (cs1, scm_cons (cs2, rest))); | |
1880 | cs1 = diff; | |
1881 | cs2 = intersect; | |
63181a97 MV |
1882 | return scm_values (scm_list_2 (cs1, cs2)); |
1883 | } | |
f49dbcad | 1884 | #undef FUNC_NAME |
d5ecf579 | 1885 | |
a17d2654 | 1886 | \f |
f49dbcad | 1887 | |
a17d2654 LC |
1888 | /* Standard character sets. */ |
1889 | ||
63181a97 MV |
1890 | SCM scm_char_set_lower_case; |
1891 | SCM scm_char_set_upper_case; | |
1892 | SCM scm_char_set_title_case; | |
1893 | SCM scm_char_set_letter; | |
1894 | SCM scm_char_set_digit; | |
1895 | SCM scm_char_set_letter_and_digit; | |
1896 | SCM scm_char_set_graphic; | |
1897 | SCM scm_char_set_printing; | |
1898 | SCM scm_char_set_whitespace; | |
1899 | SCM scm_char_set_iso_control; | |
1900 | SCM scm_char_set_punctuation; | |
1901 | SCM scm_char_set_symbol; | |
1902 | SCM scm_char_set_hex_digit; | |
1903 | SCM scm_char_set_blank; | |
1904 | SCM scm_char_set_ascii; | |
1905 | SCM scm_char_set_empty; | |
1906 | SCM scm_char_set_full; | |
1907 | ||
63181a97 | 1908 | |
a17d2654 LC |
1909 | /* Create an empty character set and return it after binding it to NAME. */ |
1910 | static inline SCM | |
f49dbcad | 1911 | define_charset (const char *name, const scm_t_char_set *p) |
63181a97 | 1912 | { |
f49dbcad MG |
1913 | SCM cs; |
1914 | ||
1915 | SCM_NEWSMOB (cs, scm_tc16_charset, p); | |
63181a97 MV |
1916 | scm_c_define (name, cs); |
1917 | return scm_permanent_object (cs); | |
1918 | } | |
1919 | ||
f49dbcad MG |
1920 | #ifdef SCM_CHARSET_DEBUG |
1921 | SCM_DEFINE (scm_debug_char_set, "debug-char-set", 1, 0, 0, | |
1922 | (SCM charset), | |
1923 | "Print out the internal C structure of @var{charset}.\n") | |
d5ecf579 | 1924 | #define FUNC_NAME s_scm_debug_char_set |
63181a97 | 1925 | { |
f49dbcad MG |
1926 | int i; |
1927 | scm_t_char_set *cs = SCM_CHARSET_DATA (charset); | |
1928 | fprintf (stderr, "cs %p\n", cs); | |
1929 | fprintf (stderr, "len %d\n", cs->len); | |
1930 | fprintf (stderr, "arr %p\n", cs->ranges); | |
1931 | for (i = 0; i < cs->len; i++) | |
63181a97 | 1932 | { |
f49dbcad MG |
1933 | if (cs->ranges[i].lo == cs->ranges[i].hi) |
1934 | fprintf (stderr, "%04x\n", cs->ranges[i].lo); | |
1935 | else | |
1936 | fprintf (stderr, "%04x..%04x\t[%d]\n", | |
1937 | cs->ranges[i].lo, | |
1938 | cs->ranges[i].hi, cs->ranges[i].hi - cs->ranges[i].lo + 1); | |
63181a97 | 1939 | } |
f49dbcad MG |
1940 | printf ("\n"); |
1941 | return SCM_UNSPECIFIED; | |
63181a97 | 1942 | } |
f49dbcad | 1943 | #undef FUNC_NAME |
d5ecf579 | 1944 | #endif /* SCM_CHARSET_DEBUG */ |
a17d2654 | 1945 | \f |
f49dbcad MG |
1946 | |
1947 | ||
63181a97 MV |
1948 | void |
1949 | scm_init_srfi_14 (void) | |
1950 | { | |
f49dbcad | 1951 | scm_tc16_charset = scm_make_smob_type ("character-set", 0); |
63181a97 MV |
1952 | scm_set_smob_free (scm_tc16_charset, charset_free); |
1953 | scm_set_smob_print (scm_tc16_charset, charset_print); | |
1954 | ||
f49dbcad MG |
1955 | scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0); |
1956 | scm_set_smob_free (scm_tc16_charset_cursor, charset_cursor_free); | |
1957 | scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print); | |
1958 | ||
1959 | scm_char_set_upper_case = | |
1960 | define_charset ("char-set:upper-case", &cs_upper_case); | |
1961 | scm_char_set_lower_case = | |
1962 | define_charset ("char-set:lower-case", &cs_lower_case); | |
1963 | scm_char_set_title_case = | |
1964 | define_charset ("char-set:title-case", &cs_title_case); | |
1965 | scm_char_set_letter = define_charset ("char-set:letter", &cs_letter); | |
1966 | scm_char_set_digit = define_charset ("char-set:digit", &cs_digit); | |
1967 | scm_char_set_letter_and_digit = | |
1968 | define_charset ("char-set:letter+digit", &cs_letter_plus_digit); | |
1969 | scm_char_set_graphic = define_charset ("char-set:graphic", &cs_graphic); | |
1970 | scm_char_set_printing = define_charset ("char-set:printing", &cs_printing); | |
1971 | scm_char_set_whitespace = | |
1972 | define_charset ("char-set:whitespace", &cs_whitespace); | |
1973 | scm_char_set_iso_control = | |
1974 | define_charset ("char-set:iso-control", &cs_iso_control); | |
1975 | scm_char_set_punctuation = | |
1976 | define_charset ("char-set:punctuation", &cs_punctuation); | |
1977 | scm_char_set_symbol = define_charset ("char-set:symbol", &cs_symbol); | |
1978 | scm_char_set_hex_digit = | |
1979 | define_charset ("char-set:hex-digit", &cs_hex_digit); | |
1980 | scm_char_set_blank = define_charset ("char-set:blank", &cs_blank); | |
1981 | scm_char_set_ascii = define_charset ("char-set:ascii", &cs_ascii); | |
1982 | scm_char_set_empty = define_charset ("char-set:empty", &cs_empty); | |
1983 | scm_char_set_full = define_charset ("char-set:full", &cs_full); | |
63181a97 MV |
1984 | |
1985 | #include "libguile/srfi-14.x" | |
1986 | } | |
1987 | ||
1988 | /* End of srfi-14.c. */ |