Commit | Line | Data |
---|---|---|
63181a97 MV |
1 | /* srfi-14.c --- SRFI-14 procedures for Guile |
2 | * | |
4a655e50 | 3 | * Copyright (C) 2001, 2004, 2006, 2007, 2009, 2011 Free Software Foundation, Inc. |
63181a97 MV |
4 | * |
5 | * This library is free software; you can redistribute it and/or | |
53befeb7 NJ |
6 | * modify it under the terms of the GNU Lesser General Public License |
7 | * as published by the Free Software Foundation; either version 3 of | |
8 | * the License, or (at your option) any later version. | |
63181a97 | 9 | * |
53befeb7 NJ |
10 | * This library is distributed in the hope that it will be useful, but |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
63181a97 MV |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | * Lesser General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU Lesser General Public | |
16 | * License along with this library; if not, write to the Free Software | |
53befeb7 NJ |
17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
18 | * 02110-1301 USA | |
63181a97 MV |
19 | */ |
20 | ||
a17d2654 LC |
21 | #ifdef HAVE_CONFIG_H |
22 | # include <config.h> | |
23 | #endif | |
24 | ||
25 | ||
63181a97 | 26 | #include <string.h> |
f49dbcad | 27 | #include <unictype.h> |
63181a97 MV |
28 | |
29 | #include "libguile.h" | |
30 | #include "libguile/srfi-14.h" | |
f49dbcad | 31 | #include "libguile/strings.h" |
24d23822 | 32 | #include "libguile/chars.h" |
63181a97 | 33 | |
f49dbcad MG |
34 | /* Include the pre-computed standard charset data. */ |
35 | #include "libguile/srfi-14.i.c" | |
63181a97 | 36 | |
719bb8cd MG |
37 | scm_t_char_range cs_full_ranges[] = { |
38 | {0x0000, SCM_CODEPOINT_SURROGATE_START - 1} | |
39 | , | |
40 | {SCM_CODEPOINT_SURROGATE_END + 1, SCM_CODEPOINT_MAX} | |
41 | }; | |
42 | ||
43 | scm_t_char_set cs_full = { | |
44 | 2, | |
45 | cs_full_ranges | |
46 | }; | |
47 | ||
48 | ||
f49dbcad | 49 | #define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset)) |
63181a97 | 50 | |
f49dbcad MG |
51 | #define SCM_CHARSET_SET(cs, idx) \ |
52 | scm_i_charset_set (SCM_CHARSET_DATA (cs), idx) | |
63181a97 | 53 | |
f49dbcad MG |
54 | #define SCM_CHARSET_UNSET(cs, idx) \ |
55 | scm_i_charset_unset (SCM_CHARSET_DATA (cs), idx) | |
63181a97 MV |
56 | |
57 | /* Smob type code for character sets. */ | |
58 | int scm_tc16_charset = 0; | |
f49dbcad MG |
59 | int scm_tc16_charset_cursor = 0; |
60 | ||
61 | /* True if N exists in charset CS. */ | |
62 | int | |
63 | scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n) | |
64 | { | |
65 | size_t i; | |
66 | ||
67 | i = 0; | |
68 | while (i < cs->len) | |
69 | { | |
70 | if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi) | |
71 | return 1; | |
72 | i++; | |
73 | } | |
74 | ||
75 | return 0; | |
76 | } | |
77 | ||
78 | /* Put N into charset CS. */ | |
79 | void | |
80 | scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n) | |
81 | { | |
82 | size_t i; | |
83 | size_t len; | |
84 | ||
85 | len = cs->len; | |
86 | ||
87 | i = 0; | |
88 | while (i < len) | |
89 | { | |
90 | /* Already in this range */ | |
91 | if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi) | |
92 | { | |
93 | return; | |
94 | } | |
95 | ||
96 | if (n == cs->ranges[i].lo - 1) | |
97 | { | |
98 | /* This char is one below the current range. */ | |
99 | if (i > 0 && cs->ranges[i - 1].hi + 1 == n) | |
100 | { | |
08ed8058 MG |
101 | /* It is also one above the previous range. */ |
102 | /* This is an impossible condition: in the previous | |
103 | iteration, the test for 'one above the current range' | |
104 | should already have inserted the character here. */ | |
105 | abort (); | |
f49dbcad MG |
106 | } |
107 | else | |
108 | { | |
109 | /* Expand the range down by one. */ | |
110 | cs->ranges[i].lo = n; | |
111 | return; | |
112 | } | |
113 | } | |
114 | else if (n == cs->ranges[i].hi + 1) | |
115 | { | |
116 | /* This char is one above the current range. */ | |
117 | if (i < len - 1 && cs->ranges[i + 1].lo - 1 == n) | |
118 | { | |
119 | /* It is also one below the next range, so combine them. */ | |
120 | cs->ranges[i].hi = cs->ranges[i + 1].hi; | |
121 | if (i < len - 2) | |
122 | memmove (cs->ranges + (i + 1), cs->ranges + (i + 2), | |
123 | sizeof (scm_t_char_range) * (len - i - 2)); | |
124 | cs->ranges = scm_gc_realloc (cs->ranges, | |
125 | sizeof (scm_t_char_range) * len, | |
126 | sizeof (scm_t_char_range) * (len - | |
127 | 1), | |
128 | "character-set"); | |
129 | cs->len = len - 1; | |
130 | return; | |
131 | } | |
132 | else | |
133 | { | |
134 | /* Expand the range up by one. */ | |
135 | cs->ranges[i].hi = n; | |
136 | return; | |
137 | } | |
138 | } | |
139 | else if (n < cs->ranges[i].lo - 1) | |
140 | { | |
141 | /* This is a new range below the current one. */ | |
142 | cs->ranges = scm_gc_realloc (cs->ranges, | |
143 | sizeof (scm_t_char_range) * len, | |
144 | sizeof (scm_t_char_range) * (len + 1), | |
145 | "character-set"); | |
146 | memmove (cs->ranges + (i + 1), cs->ranges + i, | |
147 | sizeof (scm_t_char_range) * (len - i)); | |
148 | cs->ranges[i].lo = n; | |
149 | cs->ranges[i].hi = n; | |
150 | cs->len = len + 1; | |
151 | return; | |
152 | } | |
153 | ||
154 | i++; | |
155 | } | |
156 | ||
157 | /* This is a new range above all previous ranges. */ | |
158 | if (len == 0) | |
159 | { | |
160 | cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set"); | |
161 | } | |
162 | else | |
163 | { | |
164 | cs->ranges = scm_gc_realloc (cs->ranges, | |
165 | sizeof (scm_t_char_range) * len, | |
166 | sizeof (scm_t_char_range) * (len + 1), | |
167 | "character-set"); | |
168 | } | |
169 | cs->ranges[len].lo = n; | |
170 | cs->ranges[len].hi = n; | |
171 | cs->len = len + 1; | |
172 | ||
173 | return; | |
174 | } | |
175 | ||
91772d8f MG |
176 | /* Put LO to HI inclusive into charset CS. */ |
177 | static void | |
178 | scm_i_charset_set_range (scm_t_char_set *cs, scm_t_wchar lo, scm_t_wchar hi) | |
179 | { | |
180 | size_t i; | |
181 | ||
182 | i = 0; | |
183 | while (i < cs->len) | |
184 | { | |
185 | /* Already in this range */ | |
186 | if (cs->ranges[i].lo <= lo && cs->ranges[i].hi >= hi) | |
187 | return; | |
188 | ||
189 | /* cur: +---+ | |
190 | new: +---+ | |
191 | */ | |
192 | if (cs->ranges[i].lo - 1 > hi) | |
193 | { | |
194 | /* Add a new range below the current one. */ | |
195 | cs->ranges = scm_gc_realloc (cs->ranges, | |
196 | sizeof (scm_t_char_range) * cs->len, | |
197 | sizeof (scm_t_char_range) * (cs->len + 1), | |
198 | "character-set"); | |
199 | memmove (cs->ranges + (i + 1), cs->ranges + i, | |
200 | sizeof (scm_t_char_range) * (cs->len - i)); | |
201 | cs->ranges[i].lo = lo; | |
202 | cs->ranges[i].hi = hi; | |
203 | cs->len += 1; | |
204 | return; | |
205 | } | |
206 | ||
207 | /* cur: +---+ or +---+ or +---+ | |
208 | new: +---+ +---+ +---+ | |
209 | */ | |
210 | if (cs->ranges[i].lo > lo | |
211 | && (cs->ranges[i].lo - 1 <= hi && cs->ranges[i].hi >= hi)) | |
212 | { | |
213 | cs->ranges[i].lo = lo; | |
214 | return; | |
215 | } | |
216 | ||
217 | /* cur: +---+ or +---+ or +---+ | |
218 | new: +---+ +---+ +---+ | |
219 | */ | |
220 | else if (cs->ranges[i].hi + 1 >= lo && cs->ranges[i].hi < hi) | |
221 | { | |
222 | if (cs->ranges[i].lo > lo) | |
223 | cs->ranges[i].lo = lo; | |
224 | if (cs->ranges[i].hi < hi) | |
225 | cs->ranges[i].hi = hi; | |
226 | while (i < cs->len - 1) | |
227 | { | |
228 | /* cur: --+ +---+ | |
229 | new: -----+ | |
230 | */ | |
231 | if (cs->ranges[i + 1].lo - 1 > hi) | |
232 | break; | |
233 | ||
234 | /* cur: --+ +---+ or --+ +---+ or --+ +--+ | |
235 | new: -----+ ------+ ---------+ | |
236 | */ | |
237 | /* Combine this range with the previous one. */ | |
238 | if (cs->ranges[i + 1].hi > hi) | |
239 | cs->ranges[i].hi = cs->ranges[i + 1].hi; | |
240 | if (i + 1 < cs->len) | |
241 | memmove (cs->ranges + i + 1, cs->ranges + i + 2, | |
242 | sizeof (scm_t_char_range) * (cs->len - i - 2)); | |
243 | cs->ranges = scm_gc_realloc (cs->ranges, | |
244 | sizeof (scm_t_char_range) * cs->len, | |
245 | sizeof (scm_t_char_range) * (cs->len - 1), | |
246 | "character-set"); | |
247 | cs->len -= 1; | |
248 | } | |
249 | return; | |
250 | } | |
251 | i ++; | |
252 | } | |
253 | ||
254 | /* This is a new range above all previous ranges. */ | |
255 | if (cs->len == 0) | |
256 | { | |
257 | cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set"); | |
258 | } | |
259 | else | |
260 | { | |
261 | cs->ranges = scm_gc_realloc (cs->ranges, | |
262 | sizeof (scm_t_char_range) * cs->len, | |
263 | sizeof (scm_t_char_range) * (cs->len + 1), | |
264 | "character-set"); | |
265 | } | |
266 | cs->len += 1; | |
267 | cs->ranges[cs->len - 1].lo = lo; | |
268 | cs->ranges[cs->len - 1].hi = hi; | |
269 | ||
270 | return; | |
271 | } | |
272 | ||
f49dbcad MG |
273 | /* If N is in charset CS, remove it. */ |
274 | void | |
275 | scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n) | |
276 | { | |
277 | size_t i; | |
278 | size_t len; | |
279 | ||
280 | len = cs->len; | |
281 | ||
282 | i = 0; | |
283 | while (i < len) | |
284 | { | |
285 | if (n < cs->ranges[i].lo) | |
286 | /* Not in this set. */ | |
287 | return; | |
288 | ||
289 | if (n == cs->ranges[i].lo && n == cs->ranges[i].hi) | |
290 | { | |
291 | /* Remove this one-character range. */ | |
292 | if (len == 1) | |
293 | { | |
294 | scm_gc_free (cs->ranges, | |
295 | sizeof (scm_t_char_range) * cs->len, | |
296 | "character-set"); | |
297 | cs->ranges = NULL; | |
298 | cs->len = 0; | |
299 | return; | |
300 | } | |
301 | else if (i < len - 1) | |
302 | { | |
303 | memmove (cs->ranges + i, cs->ranges + (i + 1), | |
304 | sizeof (scm_t_char_range) * (len - i - 1)); | |
305 | cs->ranges = scm_gc_realloc (cs->ranges, | |
306 | sizeof (scm_t_char_range) * len, | |
307 | sizeof (scm_t_char_range) * (len - | |
308 | 1), | |
309 | "character-set"); | |
310 | cs->len = len - 1; | |
311 | return; | |
312 | } | |
313 | else if (i == len - 1) | |
314 | { | |
315 | cs->ranges = scm_gc_realloc (cs->ranges, | |
316 | sizeof (scm_t_char_range) * len, | |
317 | sizeof (scm_t_char_range) * (len - | |
318 | 1), | |
319 | "character-set"); | |
320 | cs->len = len - 1; | |
321 | return; | |
322 | } | |
323 | } | |
324 | else if (n == cs->ranges[i].lo) | |
325 | { | |
326 | /* Shrink this range from the left. */ | |
327 | cs->ranges[i].lo = n + 1; | |
328 | return; | |
329 | } | |
330 | else if (n == cs->ranges[i].hi) | |
331 | { | |
332 | /* Shrink this range from the right. */ | |
333 | cs->ranges[i].hi = n - 1; | |
334 | return; | |
335 | } | |
336 | else if (n > cs->ranges[i].lo && n < cs->ranges[i].hi) | |
337 | { | |
338 | /* Split this range into two pieces. */ | |
339 | cs->ranges = scm_gc_realloc (cs->ranges, | |
340 | sizeof (scm_t_char_range) * len, | |
341 | sizeof (scm_t_char_range) * (len + 1), | |
342 | "character-set"); | |
343 | if (i < len - 1) | |
344 | memmove (cs->ranges + (i + 2), cs->ranges + (i + 1), | |
345 | sizeof (scm_t_char_range) * (len - i - 1)); | |
346 | cs->ranges[i + 1].hi = cs->ranges[i].hi; | |
347 | cs->ranges[i + 1].lo = n + 1; | |
348 | cs->ranges[i].hi = n - 1; | |
349 | cs->len = len + 1; | |
350 | return; | |
351 | } | |
352 | ||
353 | i++; | |
354 | } | |
355 | ||
356 | /* This value is above all ranges, so do nothing here. */ | |
357 | return; | |
358 | } | |
359 | ||
360 | static int | |
361 | charsets_equal (scm_t_char_set *a, scm_t_char_set *b) | |
362 | { | |
363 | if (a->len != b->len) | |
364 | return 0; | |
365 | ||
366 | if (memcmp (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len) != 0) | |
367 | return 0; | |
368 | ||
369 | return 1; | |
370 | } | |
371 | ||
372 | /* Return true if every character in A is also in B. */ | |
373 | static int | |
374 | charsets_leq (scm_t_char_set *a, scm_t_char_set *b) | |
375 | { | |
376 | size_t i = 0, j = 0; | |
377 | scm_t_wchar alo, ahi; | |
378 | ||
379 | if (a->len == 0) | |
380 | return 1; | |
381 | if (b->len == 0) | |
382 | return 0; | |
383 | while (i < a->len) | |
384 | { | |
385 | alo = a->ranges[i].lo; | |
386 | ahi = a->ranges[i].hi; | |
387 | while (b->ranges[j].hi < alo) | |
388 | { | |
389 | if (j < b->len - 1) | |
390 | j++; | |
391 | else | |
392 | return 0; | |
393 | } | |
394 | if (alo < b->ranges[j].lo || ahi > b->ranges[j].hi) | |
395 | return 0; | |
396 | i++; | |
397 | } | |
398 | ||
399 | return 1; | |
400 | } | |
401 | ||
402 | /* Merge B into A. */ | |
403 | static void | |
404 | charsets_union (scm_t_char_set *a, scm_t_char_set *b) | |
405 | { | |
406 | size_t i = 0; | |
aff31b0f | 407 | scm_t_wchar blo, bhi; |
f49dbcad MG |
408 | |
409 | if (b->len == 0) | |
410 | return; | |
411 | ||
412 | if (a->len == 0) | |
413 | { | |
414 | a->len = b->len; | |
415 | a->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * b->len, | |
416 | "character-set"); | |
417 | memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * b->len); | |
418 | return; | |
419 | } | |
420 | ||
f49dbcad MG |
421 | while (i < b->len) |
422 | { | |
423 | blo = b->ranges[i].lo; | |
424 | bhi = b->ranges[i].hi; | |
aff31b0f | 425 | scm_i_charset_set_range (a, blo, bhi); |
f49dbcad MG |
426 | |
427 | i++; | |
428 | } | |
429 | ||
430 | return; | |
431 | } | |
432 | ||
433 | /* Remove elements not both in A and B from A. */ | |
434 | static void | |
435 | charsets_intersection (scm_t_char_set *a, scm_t_char_set *b) | |
436 | { | |
437 | size_t i = 0; | |
438 | scm_t_wchar blo, bhi, n; | |
439 | scm_t_char_set *c; | |
440 | ||
441 | if (a->len == 0) | |
442 | return; | |
443 | ||
444 | if (b->len == 0) | |
445 | { | |
446 | scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len, | |
447 | "character-set"); | |
448 | a->len = 0; | |
449 | return; | |
450 | } | |
451 | ||
452 | c = (scm_t_char_set *) scm_malloc (sizeof (scm_t_char_set)); | |
453 | c->len = 0; | |
454 | c->ranges = NULL; | |
455 | ||
456 | while (i < b->len) | |
457 | { | |
458 | blo = b->ranges[i].lo; | |
459 | bhi = b->ranges[i].hi; | |
460 | for (n = blo; n <= bhi; n++) | |
461 | if (scm_i_charset_get (a, n)) | |
462 | scm_i_charset_set (c, n); | |
463 | i++; | |
464 | } | |
465 | scm_gc_free (a->ranges, sizeof (scm_t_char_range) * a->len, | |
466 | "character-set"); | |
467 | ||
468 | a->len = c->len; | |
469 | if (c->len != 0) | |
470 | a->ranges = c->ranges; | |
471 | else | |
472 | a->ranges = NULL; | |
473 | free (c); | |
474 | return; | |
475 | } | |
476 | ||
f4cdfe61 MG |
477 | #define SCM_ADD_RANGE(low, high) \ |
478 | do { \ | |
479 | p->ranges[idx].lo = (low); \ | |
480 | p->ranges[idx++].hi = (high); \ | |
481 | } while (0) | |
482 | #define SCM_ADD_RANGE_SKIP_SURROGATES(low, high) \ | |
483 | do { \ | |
484 | p->ranges[idx].lo = (low); \ | |
485 | p->ranges[idx++].hi = SCM_CODEPOINT_SURROGATE_START - 1; \ | |
486 | p->ranges[idx].lo = SCM_CODEPOINT_SURROGATE_END + 1; \ | |
487 | p->ranges[idx++].hi = (high); \ | |
488 | } while (0) | |
489 | ||
490 | ||
491 | ||
f49dbcad MG |
492 | /* Make P the compelement of Q. */ |
493 | static void | |
494 | charsets_complement (scm_t_char_set *p, scm_t_char_set *q) | |
495 | { | |
496 | int k, idx; | |
497 | ||
f4cdfe61 | 498 | idx = 0; |
f49dbcad MG |
499 | if (q->len == 0) |
500 | { | |
501 | /* Fill with all valid codepoints. */ | |
502 | p->len = 2; | |
503 | p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2, | |
504 | "character-set"); | |
f4cdfe61 | 505 | SCM_ADD_RANGE_SKIP_SURROGATES (0, SCM_CODEPOINT_MAX); |
f49dbcad MG |
506 | return; |
507 | } | |
508 | ||
509 | if (p->len > 0) | |
510 | scm_gc_free (p->ranges, sizeof (scm_t_char_set) * p->len, | |
511 | "character-set"); | |
512 | ||
f4cdfe61 | 513 | /* Count the number of ranges needed for the output. */ |
f49dbcad MG |
514 | p->len = 0; |
515 | if (q->ranges[0].lo > 0) | |
516 | p->len++; | |
517 | if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX) | |
518 | p->len++; | |
f4cdfe61 | 519 | p->len += q->len; |
f49dbcad MG |
520 | p->ranges = |
521 | (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * p->len, | |
522 | "character-set"); | |
f49dbcad MG |
523 | if (q->ranges[0].lo > 0) |
524 | { | |
f4cdfe61 MG |
525 | if (q->ranges[0].lo > SCM_CODEPOINT_SURROGATE_END) |
526 | SCM_ADD_RANGE_SKIP_SURROGATES (0, q->ranges[0].lo - 1); | |
527 | else | |
528 | SCM_ADD_RANGE (0, q->ranges[0].lo - 1); | |
f49dbcad MG |
529 | } |
530 | for (k = 1; k < q->len; k++) | |
531 | { | |
f4cdfe61 MG |
532 | if (q->ranges[k - 1].hi < SCM_CODEPOINT_SURROGATE_START |
533 | && q->ranges[k].lo - 1 > SCM_CODEPOINT_SURROGATE_END) | |
534 | SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1); | |
535 | else | |
536 | SCM_ADD_RANGE (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1); | |
f49dbcad MG |
537 | } |
538 | if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX) | |
539 | { | |
f4cdfe61 MG |
540 | if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_SURROGATE_START) |
541 | SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX); | |
542 | else | |
543 | SCM_ADD_RANGE (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX); | |
f49dbcad MG |
544 | } |
545 | return; | |
546 | } | |
f4cdfe61 MG |
547 | #undef SCM_ADD_RANGE |
548 | #undef SCM_ADD_RANGE_SKIP_SURROGATES | |
f49dbcad MG |
549 | |
550 | /* Replace A with elements only found in one of A or B. */ | |
551 | static void | |
552 | charsets_xor (scm_t_char_set *a, scm_t_char_set *b) | |
553 | { | |
554 | size_t i = 0; | |
555 | scm_t_wchar blo, bhi, n; | |
556 | ||
557 | if (b->len == 0) | |
558 | { | |
559 | return; | |
560 | } | |
63181a97 | 561 | |
f49dbcad MG |
562 | if (a->len == 0) |
563 | { | |
564 | a->ranges = | |
565 | (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * | |
566 | b->len, "character-set"); | |
567 | a->len = b->len; | |
568 | memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len); | |
569 | return; | |
570 | } | |
571 | ||
572 | while (i < b->len) | |
573 | { | |
574 | blo = b->ranges[i].lo; | |
575 | bhi = b->ranges[i].hi; | |
576 | for (n = blo; n <= bhi; n++) | |
577 | { | |
578 | if (scm_i_charset_get (a, n)) | |
579 | scm_i_charset_unset (a, n); | |
580 | else | |
581 | scm_i_charset_set (a, n); | |
582 | } | |
583 | ||
584 | i++; | |
585 | } | |
586 | return; | |
587 | } | |
63181a97 MV |
588 | |
589 | /* Smob print hook for character sets. */ | |
590 | static int | |
591 | charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED) | |
592 | { | |
f49dbcad | 593 | size_t i; |
63181a97 | 594 | int first = 1; |
f49dbcad MG |
595 | scm_t_char_set *p; |
596 | const size_t max_ranges_to_print = 50; | |
597 | ||
598 | p = SCM_CHARSET_DATA (charset); | |
63181a97 | 599 | |
0607ebbf | 600 | scm_puts_unlocked ("#<charset {", port); |
f49dbcad MG |
601 | for (i = 0; i < p->len; i++) |
602 | { | |
603 | if (first) | |
604 | first = 0; | |
605 | else | |
0607ebbf | 606 | scm_puts_unlocked (" ", port); |
f49dbcad MG |
607 | scm_write (SCM_MAKE_CHAR (p->ranges[i].lo), port); |
608 | if (p->ranges[i].lo != p->ranges[i].hi) | |
609 | { | |
0607ebbf | 610 | scm_puts_unlocked ("..", port); |
f49dbcad MG |
611 | scm_write (SCM_MAKE_CHAR (p->ranges[i].hi), port); |
612 | } | |
613 | if (i >= max_ranges_to_print) | |
614 | { | |
615 | /* Too many to print here. Quit early. */ | |
0607ebbf | 616 | scm_puts_unlocked (" ...", port); |
f49dbcad MG |
617 | break; |
618 | } | |
619 | } | |
0607ebbf | 620 | scm_puts_unlocked ("}>", port); |
63181a97 MV |
621 | return 1; |
622 | } | |
623 | ||
f49dbcad MG |
624 | /* Smob print hook for character sets cursors. */ |
625 | static int | |
626 | charset_cursor_print (SCM cursor, SCM port, | |
627 | scm_print_state *pstate SCM_UNUSED) | |
628 | { | |
629 | scm_t_char_set_cursor *cur; | |
630 | ||
631 | cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor); | |
632 | ||
0607ebbf | 633 | scm_puts_unlocked ("#<charset-cursor ", port); |
f49dbcad | 634 | if (cur->range == (size_t) (-1)) |
0607ebbf | 635 | scm_puts_unlocked ("(empty)", port); |
f49dbcad MG |
636 | else |
637 | { | |
638 | scm_write (scm_from_size_t (cur->range), port); | |
0607ebbf | 639 | scm_puts_unlocked (":", port); |
f49dbcad MG |
640 | scm_write (scm_from_int32 (cur->n), port); |
641 | } | |
0607ebbf | 642 | scm_puts_unlocked (">", port); |
f49dbcad MG |
643 | return 1; |
644 | } | |
63181a97 | 645 | |
63181a97 MV |
646 | |
647 | /* Create a new, empty character set. */ | |
648 | static SCM | |
f49dbcad | 649 | make_char_set (const char *func_name) |
63181a97 | 650 | { |
f49dbcad | 651 | scm_t_char_set *p; |
63181a97 | 652 | |
f49dbcad MG |
653 | p = scm_gc_malloc (sizeof (scm_t_char_set), "character-set"); |
654 | memset (p, 0, sizeof (scm_t_char_set)); | |
63181a97 MV |
655 | SCM_RETURN_NEWSMOB (scm_tc16_charset, p); |
656 | } | |
657 | ||
658 | ||
659 | SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0, | |
660 | (SCM obj), | |
661 | "Return @code{#t} if @var{obj} is a character set, @code{#f}\n" | |
662 | "otherwise.") | |
663 | #define FUNC_NAME s_scm_char_set_p | |
664 | { | |
665 | return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj)); | |
666 | } | |
667 | #undef FUNC_NAME | |
668 | ||
669 | ||
670 | SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1, | |
671 | (SCM char_sets), | |
672 | "Return @code{#t} if all given character sets are equal.") | |
673 | #define FUNC_NAME s_scm_char_set_eq | |
674 | { | |
675 | int argnum = 1; | |
f49dbcad | 676 | scm_t_char_set *cs1_data = NULL; |
63181a97 MV |
677 | |
678 | SCM_VALIDATE_REST_ARGUMENT (char_sets); | |
679 | ||
d2e53ed6 | 680 | while (!scm_is_null (char_sets)) |
63181a97 MV |
681 | { |
682 | SCM csi = SCM_CAR (char_sets); | |
f49dbcad | 683 | scm_t_char_set *csi_data; |
63181a97 MV |
684 | |
685 | SCM_VALIDATE_SMOB (argnum, csi, charset); | |
686 | argnum++; | |
f49dbcad | 687 | csi_data = SCM_CHARSET_DATA (csi); |
63181a97 | 688 | if (cs1_data == NULL) |
f49dbcad MG |
689 | cs1_data = csi_data; |
690 | else if (!charsets_equal (cs1_data, csi_data)) | |
691 | return SCM_BOOL_F; | |
63181a97 MV |
692 | char_sets = SCM_CDR (char_sets); |
693 | } | |
694 | return SCM_BOOL_T; | |
695 | } | |
696 | #undef FUNC_NAME | |
697 | ||
698 | ||
699 | SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1, | |
700 | (SCM char_sets), | |
701 | "Return @code{#t} if every character set @var{cs}i is a subset\n" | |
702 | "of character set @var{cs}i+1.") | |
703 | #define FUNC_NAME s_scm_char_set_leq | |
704 | { | |
705 | int argnum = 1; | |
f49dbcad | 706 | scm_t_char_set *prev_data = NULL; |
63181a97 MV |
707 | |
708 | SCM_VALIDATE_REST_ARGUMENT (char_sets); | |
709 | ||
d2e53ed6 | 710 | while (!scm_is_null (char_sets)) |
63181a97 MV |
711 | { |
712 | SCM csi = SCM_CAR (char_sets); | |
f49dbcad | 713 | scm_t_char_set *csi_data; |
63181a97 MV |
714 | |
715 | SCM_VALIDATE_SMOB (argnum, csi, charset); | |
716 | argnum++; | |
f49dbcad | 717 | csi_data = SCM_CHARSET_DATA (csi); |
63181a97 | 718 | if (prev_data) |
f49dbcad MG |
719 | { |
720 | if (!charsets_leq (prev_data, csi_data)) | |
721 | return SCM_BOOL_F; | |
722 | } | |
63181a97 MV |
723 | prev_data = csi_data; |
724 | char_sets = SCM_CDR (char_sets); | |
725 | } | |
726 | return SCM_BOOL_T; | |
727 | } | |
728 | #undef FUNC_NAME | |
729 | ||
730 | ||
731 | SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0, | |
732 | (SCM cs, SCM bound), | |
733 | "Compute a hash value for the character set @var{cs}. If\n" | |
734 | "@var{bound} is given and non-zero, it restricts the\n" | |
735 | "returned value to the range 0 @dots{} @var{bound - 1}.") | |
736 | #define FUNC_NAME s_scm_char_set_hash | |
737 | { | |
738 | const unsigned long default_bnd = 871; | |
739 | unsigned long bnd; | |
f49dbcad | 740 | scm_t_char_set *p; |
63181a97 MV |
741 | unsigned long val = 0; |
742 | int k; | |
f49dbcad | 743 | scm_t_wchar c; |
63181a97 MV |
744 | |
745 | SCM_VALIDATE_SMOB (1, cs, charset); | |
746 | ||
747 | if (SCM_UNBNDP (bound)) | |
748 | bnd = default_bnd; | |
749 | else | |
750 | { | |
751 | bnd = scm_to_ulong (bound); | |
752 | if (bnd == 0) | |
f49dbcad | 753 | bnd = default_bnd; |
63181a97 MV |
754 | } |
755 | ||
f49dbcad MG |
756 | p = SCM_CHARSET_DATA (cs); |
757 | for (k = 0; k < p->len; k++) | |
63181a97 | 758 | { |
f49dbcad MG |
759 | for (c = p->ranges[k].lo; c <= p->ranges[k].hi; c++) |
760 | val = c + (val << 1); | |
63181a97 MV |
761 | } |
762 | return scm_from_ulong (val % bnd); | |
763 | } | |
764 | #undef FUNC_NAME | |
765 | ||
766 | ||
767 | SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0, | |
f49dbcad | 768 | (SCM cs), "Return a cursor into the character set @var{cs}.") |
63181a97 MV |
769 | #define FUNC_NAME s_scm_char_set_cursor |
770 | { | |
f49dbcad MG |
771 | scm_t_char_set *cs_data; |
772 | scm_t_char_set_cursor *cur_data; | |
63181a97 MV |
773 | |
774 | SCM_VALIDATE_SMOB (1, cs, charset); | |
f49dbcad MG |
775 | cs_data = SCM_CHARSET_DATA (cs); |
776 | cur_data = | |
777 | (scm_t_char_set_cursor *) scm_gc_malloc (sizeof (scm_t_char_set_cursor), | |
778 | "charset-cursor"); | |
779 | if (cs_data->len == 0) | |
63181a97 | 780 | { |
f49dbcad MG |
781 | cur_data->range = (size_t) (-1); |
782 | cur_data->n = 0; | |
63181a97 | 783 | } |
f49dbcad MG |
784 | else |
785 | { | |
786 | cur_data->range = 0; | |
787 | cur_data->n = cs_data->ranges[0].lo; | |
788 | } | |
789 | SCM_RETURN_NEWSMOB (scm_tc16_charset_cursor, cur_data); | |
63181a97 MV |
790 | } |
791 | #undef FUNC_NAME | |
792 | ||
793 | ||
794 | SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0, | |
f49dbcad MG |
795 | (SCM cs, SCM cursor), |
796 | "Return the character at the current cursor position\n" | |
797 | "@var{cursor} in the character set @var{cs}. It is an error to\n" | |
798 | "pass a cursor for which @code{end-of-char-set?} returns true.") | |
63181a97 MV |
799 | #define FUNC_NAME s_scm_char_set_ref |
800 | { | |
f49dbcad MG |
801 | scm_t_char_set *cs_data; |
802 | scm_t_char_set_cursor *cur_data; | |
803 | size_t i; | |
804 | ||
63181a97 | 805 | SCM_VALIDATE_SMOB (1, cs, charset); |
f49dbcad | 806 | SCM_VALIDATE_SMOB (2, cursor, charset_cursor); |
63181a97 | 807 | |
f49dbcad MG |
808 | cs_data = SCM_CHARSET_DATA (cs); |
809 | cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor); | |
63181a97 | 810 | |
f49dbcad MG |
811 | /* Validate that this cursor is still true. */ |
812 | i = cur_data->range; | |
813 | if (i == (size_t) (-1) | |
814 | || i >= cs_data->len | |
815 | || cur_data->n < cs_data->ranges[i].lo | |
816 | || cur_data->n > cs_data->ranges[i].hi) | |
63181a97 | 817 | SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor)); |
f49dbcad | 818 | return SCM_MAKE_CHAR (cur_data->n); |
63181a97 MV |
819 | } |
820 | #undef FUNC_NAME | |
821 | ||
822 | ||
823 | SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0, | |
f49dbcad MG |
824 | (SCM cs, SCM cursor), |
825 | "Advance the character set cursor @var{cursor} to the next\n" | |
826 | "character in the character set @var{cs}. It is an error if the\n" | |
827 | "cursor given satisfies @code{end-of-char-set?}.") | |
63181a97 MV |
828 | #define FUNC_NAME s_scm_char_set_cursor_next |
829 | { | |
f49dbcad MG |
830 | scm_t_char_set *cs_data; |
831 | scm_t_char_set_cursor *cur_data; | |
832 | size_t i; | |
833 | ||
63181a97 | 834 | SCM_VALIDATE_SMOB (1, cs, charset); |
f49dbcad MG |
835 | SCM_VALIDATE_SMOB (2, cursor, charset_cursor); |
836 | ||
837 | cs_data = SCM_CHARSET_DATA (cs); | |
838 | cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor); | |
63181a97 | 839 | |
f49dbcad MG |
840 | /* Validate that this cursor is still true. */ |
841 | i = cur_data->range; | |
842 | if (i == (size_t) (-1) | |
843 | || i >= cs_data->len | |
844 | || cur_data->n < cs_data->ranges[i].lo | |
845 | || cur_data->n > cs_data->ranges[i].hi) | |
63181a97 | 846 | SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor)); |
f49dbcad MG |
847 | /* Increment the cursor. */ |
848 | if (cur_data->n == cs_data->ranges[i].hi) | |
63181a97 | 849 | { |
f49dbcad MG |
850 | if (i + 1 < cs_data->len) |
851 | { | |
852 | cur_data->range = i + 1; | |
853 | cur_data->n = cs_data->ranges[i + 1].lo; | |
854 | } | |
855 | else | |
856 | { | |
857 | /* This is the end of the road. */ | |
858 | cur_data->range = (size_t) (-1); | |
859 | cur_data->n = 0; | |
860 | } | |
63181a97 | 861 | } |
f49dbcad MG |
862 | else |
863 | { | |
864 | cur_data->n = cur_data->n + 1; | |
865 | } | |
866 | ||
867 | return cursor; | |
63181a97 MV |
868 | } |
869 | #undef FUNC_NAME | |
870 | ||
871 | ||
872 | SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0, | |
f49dbcad MG |
873 | (SCM cursor), |
874 | "Return @code{#t} if @var{cursor} has reached the end of a\n" | |
875 | "character set, @code{#f} otherwise.") | |
63181a97 MV |
876 | #define FUNC_NAME s_scm_end_of_char_set_p |
877 | { | |
f49dbcad MG |
878 | scm_t_char_set_cursor *cur_data; |
879 | SCM_VALIDATE_SMOB (1, cursor, charset_cursor); | |
880 | ||
881 | cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor); | |
882 | if (cur_data->range == (size_t) (-1)) | |
883 | return SCM_BOOL_T; | |
884 | ||
885 | return SCM_BOOL_F; | |
63181a97 MV |
886 | } |
887 | #undef FUNC_NAME | |
888 | ||
889 | ||
890 | SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0, | |
f49dbcad MG |
891 | (SCM kons, SCM knil, SCM cs), |
892 | "Fold the procedure @var{kons} over the character set @var{cs},\n" | |
893 | "initializing it with @var{knil}.") | |
63181a97 MV |
894 | #define FUNC_NAME s_scm_char_set_fold |
895 | { | |
f49dbcad | 896 | scm_t_char_set *cs_data; |
63181a97 | 897 | int k; |
f49dbcad | 898 | scm_t_wchar n; |
63181a97 MV |
899 | |
900 | SCM_VALIDATE_PROC (1, kons); | |
901 | SCM_VALIDATE_SMOB (3, cs, charset); | |
902 | ||
f49dbcad MG |
903 | cs_data = SCM_CHARSET_DATA (cs); |
904 | ||
905 | if (cs_data->len == 0) | |
906 | return knil; | |
907 | ||
908 | for (k = 0; k < cs_data->len; k++) | |
909 | for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) | |
63181a97 | 910 | { |
f49dbcad | 911 | knil = scm_call_2 (kons, SCM_MAKE_CHAR (n), knil); |
63181a97 MV |
912 | } |
913 | return knil; | |
914 | } | |
915 | #undef FUNC_NAME | |
916 | ||
917 | ||
918 | SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0, | |
919 | (SCM p, SCM f, SCM g, SCM seed, SCM base_cs), | |
920 | "This is a fundamental constructor for character sets.\n" | |
921 | "@itemize @bullet\n" | |
922 | "@item @var{g} is used to generate a series of ``seed'' values\n" | |
923 | "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n" | |
924 | "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n" | |
925 | "@item @var{p} tells us when to stop -- when it returns true\n" | |
926 | "when applied to one of the seed values.\n" | |
927 | "@item @var{f} maps each seed value to a character. These\n" | |
928 | "characters are added to the base character set @var{base_cs} to\n" | |
929 | "form the result; @var{base_cs} defaults to the empty set.\n" | |
930 | "@end itemize") | |
931 | #define FUNC_NAME s_scm_char_set_unfold | |
932 | { | |
933 | SCM result, tmp; | |
934 | ||
935 | SCM_VALIDATE_PROC (1, p); | |
936 | SCM_VALIDATE_PROC (2, f); | |
937 | SCM_VALIDATE_PROC (3, g); | |
938 | if (!SCM_UNBNDP (base_cs)) | |
939 | { | |
940 | SCM_VALIDATE_SMOB (5, base_cs, charset); | |
941 | result = scm_char_set_copy (base_cs); | |
942 | } | |
943 | else | |
944 | result = make_char_set (FUNC_NAME); | |
945 | ||
946 | tmp = scm_call_1 (p, seed); | |
947 | while (scm_is_false (tmp)) | |
948 | { | |
949 | SCM ch = scm_call_1 (f, seed); | |
950 | if (!SCM_CHARP (ch)) | |
951 | SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f)); | |
952 | SCM_CHARSET_SET (result, SCM_CHAR (ch)); | |
953 | ||
954 | seed = scm_call_1 (g, seed); | |
955 | tmp = scm_call_1 (p, seed); | |
956 | } | |
957 | return result; | |
958 | } | |
959 | #undef FUNC_NAME | |
960 | ||
961 | ||
962 | SCM_DEFINE (scm_char_set_unfold_x, "char-set-unfold!", 5, 0, 0, | |
963 | (SCM p, SCM f, SCM g, SCM seed, SCM base_cs), | |
964 | "This is a fundamental constructor for character sets.\n" | |
965 | "@itemize @bullet\n" | |
966 | "@item @var{g} is used to generate a series of ``seed'' values\n" | |
967 | "from the initial seed: @var{seed}, (@var{g} @var{seed}),\n" | |
968 | "(@var{g}^2 @var{seed}), (@var{g}^3 @var{seed}), @dots{}\n" | |
969 | "@item @var{p} tells us when to stop -- when it returns true\n" | |
970 | "when applied to one of the seed values.\n" | |
971 | "@item @var{f} maps each seed value to a character. These\n" | |
972 | "characters are added to the base character set @var{base_cs} to\n" | |
973 | "form the result; @var{base_cs} defaults to the empty set.\n" | |
974 | "@end itemize") | |
975 | #define FUNC_NAME s_scm_char_set_unfold_x | |
976 | { | |
977 | SCM tmp; | |
978 | ||
979 | SCM_VALIDATE_PROC (1, p); | |
980 | SCM_VALIDATE_PROC (2, f); | |
981 | SCM_VALIDATE_PROC (3, g); | |
982 | SCM_VALIDATE_SMOB (5, base_cs, charset); | |
983 | ||
984 | tmp = scm_call_1 (p, seed); | |
985 | while (scm_is_false (tmp)) | |
986 | { | |
987 | SCM ch = scm_call_1 (f, seed); | |
988 | if (!SCM_CHARP (ch)) | |
989 | SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f)); | |
990 | SCM_CHARSET_SET (base_cs, SCM_CHAR (ch)); | |
991 | ||
992 | seed = scm_call_1 (g, seed); | |
993 | tmp = scm_call_1 (p, seed); | |
994 | } | |
995 | return base_cs; | |
996 | } | |
997 | #undef FUNC_NAME | |
998 | ||
999 | ||
1000 | SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0, | |
f49dbcad MG |
1001 | (SCM proc, SCM cs), |
1002 | "Apply @var{proc} to every character in the character set\n" | |
1003 | "@var{cs}. The return value is not specified.") | |
63181a97 MV |
1004 | #define FUNC_NAME s_scm_char_set_for_each |
1005 | { | |
f49dbcad | 1006 | scm_t_char_set *cs_data; |
63181a97 | 1007 | int k; |
f49dbcad | 1008 | scm_t_wchar n; |
63181a97 MV |
1009 | |
1010 | SCM_VALIDATE_PROC (1, proc); | |
1011 | SCM_VALIDATE_SMOB (2, cs, charset); | |
1012 | ||
f49dbcad MG |
1013 | cs_data = SCM_CHARSET_DATA (cs); |
1014 | ||
1015 | if (cs_data->len == 0) | |
1016 | return SCM_UNSPECIFIED; | |
1017 | ||
1018 | for (k = 0; k < cs_data->len; k++) | |
1019 | for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) | |
1020 | { | |
1021 | scm_call_1 (proc, SCM_MAKE_CHAR (n)); | |
1022 | } | |
1023 | ||
63181a97 MV |
1024 | return SCM_UNSPECIFIED; |
1025 | } | |
1026 | #undef FUNC_NAME | |
1027 | ||
1028 | ||
1029 | SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0, | |
1030 | (SCM proc, SCM cs), | |
1031 | "Map the procedure @var{proc} over every character in @var{cs}.\n" | |
1032 | "@var{proc} must be a character -> character procedure.") | |
1033 | #define FUNC_NAME s_scm_char_set_map | |
1034 | { | |
1035 | SCM result; | |
1036 | int k; | |
f49dbcad MG |
1037 | scm_t_char_set *cs_data; |
1038 | scm_t_wchar n; | |
63181a97 MV |
1039 | |
1040 | SCM_VALIDATE_PROC (1, proc); | |
1041 | SCM_VALIDATE_SMOB (2, cs, charset); | |
1042 | ||
1043 | result = make_char_set (FUNC_NAME); | |
f49dbcad MG |
1044 | cs_data = SCM_CHARSET_DATA (cs); |
1045 | ||
1046 | if (cs_data->len == 0) | |
1047 | return result; | |
1048 | ||
1049 | for (k = 0; k < cs_data->len; k++) | |
1050 | for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) | |
63181a97 | 1051 | { |
f49dbcad MG |
1052 | SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (n)); |
1053 | if (!SCM_CHARP (ch)) | |
1054 | SCM_MISC_ERROR ("procedure ~S returned non-char", | |
1055 | scm_list_1 (proc)); | |
1056 | SCM_CHARSET_SET (result, SCM_CHAR (ch)); | |
63181a97 MV |
1057 | } |
1058 | return result; | |
1059 | } | |
1060 | #undef FUNC_NAME | |
1061 | ||
1062 | ||
1063 | SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0, | |
1064 | (SCM cs), | |
1065 | "Return a newly allocated character set containing all\n" | |
1066 | "characters in @var{cs}.") | |
1067 | #define FUNC_NAME s_scm_char_set_copy | |
1068 | { | |
1069 | SCM ret; | |
f49dbcad | 1070 | scm_t_char_set *p1, *p2; |
63181a97 MV |
1071 | |
1072 | SCM_VALIDATE_SMOB (1, cs, charset); | |
1073 | ret = make_char_set (FUNC_NAME); | |
f49dbcad MG |
1074 | p1 = SCM_CHARSET_DATA (cs); |
1075 | p2 = SCM_CHARSET_DATA (ret); | |
1076 | p2->len = p1->len; | |
1077 | ||
1078 | if (p1->len == 0) | |
1079 | p2->ranges = NULL; | |
1080 | else | |
1081 | { | |
1082 | p2->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * p1->len, | |
1083 | "character-set"); | |
1084 | memcpy (p2->ranges, p1->ranges, sizeof (scm_t_char_range) * p1->len); | |
1085 | } | |
1086 | ||
63181a97 MV |
1087 | return ret; |
1088 | } | |
1089 | #undef FUNC_NAME | |
1090 | ||
1091 | ||
1092 | SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1, | |
1093 | (SCM rest), | |
1094 | "Return a character set containing all given characters.") | |
1095 | #define FUNC_NAME s_scm_char_set | |
1096 | { | |
1097 | SCM cs; | |
63181a97 MV |
1098 | int argnum = 1; |
1099 | ||
1100 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1101 | cs = make_char_set (FUNC_NAME); | |
d2e53ed6 | 1102 | while (!scm_is_null (rest)) |
63181a97 | 1103 | { |
f49dbcad | 1104 | scm_t_wchar c; |
63181a97 MV |
1105 | |
1106 | SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c); | |
1107 | argnum++; | |
1108 | rest = SCM_CDR (rest); | |
f49dbcad | 1109 | SCM_CHARSET_SET (cs, c); |
63181a97 MV |
1110 | } |
1111 | return cs; | |
1112 | } | |
1113 | #undef FUNC_NAME | |
1114 | ||
1115 | ||
1116 | SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0, | |
1117 | (SCM list, SCM base_cs), | |
1118 | "Convert the character list @var{list} to a character set. If\n" | |
1119 | "the character set @var{base_cs} is given, the character in this\n" | |
1120 | "set are also included in the result.") | |
1121 | #define FUNC_NAME s_scm_list_to_char_set | |
1122 | { | |
1123 | SCM cs; | |
63181a97 MV |
1124 | |
1125 | SCM_VALIDATE_LIST (1, list); | |
1126 | if (SCM_UNBNDP (base_cs)) | |
1127 | cs = make_char_set (FUNC_NAME); | |
1128 | else | |
1129 | { | |
1130 | SCM_VALIDATE_SMOB (2, base_cs, charset); | |
1131 | cs = scm_char_set_copy (base_cs); | |
1132 | } | |
d2e53ed6 | 1133 | while (!scm_is_null (list)) |
63181a97 MV |
1134 | { |
1135 | SCM chr = SCM_CAR (list); | |
f49dbcad | 1136 | scm_t_wchar c; |
63181a97 MV |
1137 | |
1138 | SCM_VALIDATE_CHAR_COPY (0, chr, c); | |
1139 | list = SCM_CDR (list); | |
1140 | ||
f49dbcad MG |
1141 | |
1142 | SCM_CHARSET_SET (cs, c); | |
63181a97 MV |
1143 | } |
1144 | return cs; | |
1145 | } | |
1146 | #undef FUNC_NAME | |
1147 | ||
1148 | ||
1149 | SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0, | |
f49dbcad MG |
1150 | (SCM list, SCM base_cs), |
1151 | "Convert the character list @var{list} to a character set. The\n" | |
1152 | "characters are added to @var{base_cs} and @var{base_cs} is\n" | |
1153 | "returned.") | |
63181a97 MV |
1154 | #define FUNC_NAME s_scm_list_to_char_set_x |
1155 | { | |
63181a97 MV |
1156 | SCM_VALIDATE_LIST (1, list); |
1157 | SCM_VALIDATE_SMOB (2, base_cs, charset); | |
d2e53ed6 | 1158 | while (!scm_is_null (list)) |
63181a97 MV |
1159 | { |
1160 | SCM chr = SCM_CAR (list); | |
f49dbcad | 1161 | scm_t_wchar c; |
63181a97 MV |
1162 | |
1163 | SCM_VALIDATE_CHAR_COPY (0, chr, c); | |
1164 | list = SCM_CDR (list); | |
1165 | ||
f49dbcad | 1166 | SCM_CHARSET_SET (base_cs, c); |
63181a97 MV |
1167 | } |
1168 | return base_cs; | |
1169 | } | |
1170 | #undef FUNC_NAME | |
1171 | ||
1172 | ||
1173 | SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0, | |
1174 | (SCM str, SCM base_cs), | |
1175 | "Convert the string @var{str} to a character set. If the\n" | |
1176 | "character set @var{base_cs} is given, the characters in this\n" | |
1177 | "set are also included in the result.") | |
1178 | #define FUNC_NAME s_scm_string_to_char_set | |
1179 | { | |
1180 | SCM cs; | |
63181a97 MV |
1181 | size_t k = 0, len; |
1182 | ||
1183 | SCM_VALIDATE_STRING (1, str); | |
1184 | if (SCM_UNBNDP (base_cs)) | |
1185 | cs = make_char_set (FUNC_NAME); | |
1186 | else | |
1187 | { | |
1188 | SCM_VALIDATE_SMOB (2, base_cs, charset); | |
1189 | cs = scm_char_set_copy (base_cs); | |
1190 | } | |
63181a97 MV |
1191 | len = scm_i_string_length (str); |
1192 | while (k < len) | |
1193 | { | |
f49dbcad MG |
1194 | scm_t_wchar c = scm_i_string_ref (str, k++); |
1195 | SCM_CHARSET_SET (cs, c); | |
63181a97 MV |
1196 | } |
1197 | scm_remember_upto_here_1 (str); | |
1198 | return cs; | |
1199 | } | |
1200 | #undef FUNC_NAME | |
1201 | ||
1202 | ||
1203 | SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0, | |
f49dbcad MG |
1204 | (SCM str, SCM base_cs), |
1205 | "Convert the string @var{str} to a character set. The\n" | |
1206 | "characters from the string are added to @var{base_cs}, and\n" | |
1207 | "@var{base_cs} is returned.") | |
63181a97 MV |
1208 | #define FUNC_NAME s_scm_string_to_char_set_x |
1209 | { | |
63181a97 MV |
1210 | size_t k = 0, len; |
1211 | ||
1212 | SCM_VALIDATE_STRING (1, str); | |
1213 | SCM_VALIDATE_SMOB (2, base_cs, charset); | |
63181a97 MV |
1214 | len = scm_i_string_length (str); |
1215 | while (k < len) | |
1216 | { | |
f49dbcad MG |
1217 | scm_t_wchar c = scm_i_string_ref (str, k++); |
1218 | SCM_CHARSET_SET (base_cs, c); | |
63181a97 MV |
1219 | } |
1220 | scm_remember_upto_here_1 (str); | |
1221 | return base_cs; | |
1222 | } | |
1223 | #undef FUNC_NAME | |
1224 | ||
1225 | ||
1226 | SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0, | |
1227 | (SCM pred, SCM cs, SCM base_cs), | |
1228 | "Return a character set containing every character from @var{cs}\n" | |
1229 | "so that it satisfies @var{pred}. If provided, the characters\n" | |
1230 | "from @var{base_cs} are added to the result.") | |
1231 | #define FUNC_NAME s_scm_char_set_filter | |
1232 | { | |
1233 | SCM ret; | |
1234 | int k; | |
f49dbcad MG |
1235 | scm_t_wchar n; |
1236 | scm_t_char_set *p; | |
63181a97 MV |
1237 | |
1238 | SCM_VALIDATE_PROC (1, pred); | |
1239 | SCM_VALIDATE_SMOB (2, cs, charset); | |
1240 | if (!SCM_UNBNDP (base_cs)) | |
1241 | { | |
1242 | SCM_VALIDATE_SMOB (3, base_cs, charset); | |
1243 | ret = scm_char_set_copy (base_cs); | |
1244 | } | |
1245 | else | |
1246 | ret = make_char_set (FUNC_NAME); | |
63181a97 | 1247 | |
f49dbcad MG |
1248 | p = SCM_CHARSET_DATA (cs); |
1249 | ||
1250 | if (p->len == 0) | |
1251 | return ret; | |
1252 | ||
1253 | for (k = 0; k < p->len; k++) | |
1254 | for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++) | |
1255 | { | |
1256 | SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); | |
1257 | ||
1258 | if (scm_is_true (res)) | |
1259 | SCM_CHARSET_SET (ret, n); | |
1260 | } | |
63181a97 MV |
1261 | return ret; |
1262 | } | |
1263 | #undef FUNC_NAME | |
1264 | ||
1265 | ||
1266 | SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0, | |
1267 | (SCM pred, SCM cs, SCM base_cs), | |
1268 | "Return a character set containing every character from @var{cs}\n" | |
1269 | "so that it satisfies @var{pred}. The characters are added to\n" | |
1270 | "@var{base_cs} and @var{base_cs} is returned.") | |
1271 | #define FUNC_NAME s_scm_char_set_filter_x | |
1272 | { | |
1273 | int k; | |
f49dbcad MG |
1274 | scm_t_wchar n; |
1275 | scm_t_char_set *p; | |
63181a97 MV |
1276 | |
1277 | SCM_VALIDATE_PROC (1, pred); | |
1278 | SCM_VALIDATE_SMOB (2, cs, charset); | |
1279 | SCM_VALIDATE_SMOB (3, base_cs, charset); | |
f49dbcad MG |
1280 | p = SCM_CHARSET_DATA (cs); |
1281 | if (p->len == 0) | |
1282 | return base_cs; | |
63181a97 | 1283 | |
f49dbcad MG |
1284 | for (k = 0; k < p->len; k++) |
1285 | for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++) | |
1286 | { | |
bde543e8 | 1287 | SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); |
f49dbcad MG |
1288 | |
1289 | if (scm_is_true (res)) | |
1290 | SCM_CHARSET_SET (base_cs, n); | |
1291 | } | |
63181a97 MV |
1292 | return base_cs; |
1293 | } | |
1294 | #undef FUNC_NAME | |
1295 | ||
1296 | ||
91772d8f MG |
1297 | /* Return a character set containing all the characters from [LOWER,UPPER), |
1298 | giving range errors if ERROR, adding chars from BASE_CS, and recycling | |
1299 | BASE_CS if REUSE is true. */ | |
1300 | static SCM | |
1301 | scm_i_ucs_range_to_char_set (const char *FUNC_NAME, SCM lower, SCM upper, | |
1302 | SCM error, SCM base_cs, int reuse) | |
63181a97 MV |
1303 | { |
1304 | SCM cs; | |
1305 | size_t clower, cupper; | |
63181a97 MV |
1306 | |
1307 | clower = scm_to_size_t (lower); | |
91772d8f | 1308 | cupper = scm_to_size_t (upper) - 1; |
63181a97 MV |
1309 | SCM_ASSERT_RANGE (2, upper, cupper >= clower); |
1310 | if (!SCM_UNBNDP (error)) | |
1311 | { | |
1312 | if (scm_is_true (error)) | |
f49dbcad MG |
1313 | { |
1314 | SCM_ASSERT_RANGE (1, lower, SCM_IS_UNICODE_CHAR (clower)); | |
1315 | SCM_ASSERT_RANGE (2, upper, SCM_IS_UNICODE_CHAR (cupper)); | |
91772d8f MG |
1316 | if (clower < SCM_CODEPOINT_SURROGATE_START |
1317 | && cupper > SCM_CODEPOINT_SURROGATE_END) | |
1318 | scm_error(scm_out_of_range_key, | |
1319 | FUNC_NAME, "invalid range - contains surrogate characters: ~S to ~S", | |
1320 | scm_list_2 (lower, upper), scm_list_1 (upper)); | |
f49dbcad | 1321 | } |
63181a97 | 1322 | } |
91772d8f | 1323 | |
63181a97 MV |
1324 | if (SCM_UNBNDP (base_cs)) |
1325 | cs = make_char_set (FUNC_NAME); | |
1326 | else | |
1327 | { | |
4c0fc007 | 1328 | SCM_VALIDATE_SMOB (3, base_cs, charset); |
91772d8f MG |
1329 | if (reuse) |
1330 | cs = base_cs; | |
1331 | else | |
1332 | cs = scm_char_set_copy (base_cs); | |
63181a97 | 1333 | } |
91772d8f MG |
1334 | |
1335 | if ((clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END) | |
1336 | && (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END)) | |
1337 | return cs; | |
1338 | ||
1339 | if (clower > SCM_CODEPOINT_MAX) | |
1340 | clower = SCM_CODEPOINT_MAX; | |
1341 | if (clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END) | |
1342 | clower = SCM_CODEPOINT_SURROGATE_END + 1; | |
1343 | if (cupper > SCM_CODEPOINT_MAX) | |
1344 | cupper = SCM_CODEPOINT_MAX; | |
1345 | if (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END) | |
1346 | cupper = SCM_CODEPOINT_SURROGATE_START - 1; | |
1347 | if (clower < SCM_CODEPOINT_SURROGATE_START && cupper > SCM_CODEPOINT_SURROGATE_END) | |
63181a97 | 1348 | { |
91772d8f MG |
1349 | scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, SCM_CODEPOINT_SURROGATE_START - 1); |
1350 | scm_i_charset_set_range (SCM_CHARSET_DATA (cs), SCM_CODEPOINT_SURROGATE_END + 1, cupper); | |
63181a97 | 1351 | } |
91772d8f MG |
1352 | else |
1353 | scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, cupper); | |
63181a97 MV |
1354 | return cs; |
1355 | } | |
91772d8f MG |
1356 | |
1357 | SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0, | |
1358 | (SCM lower, SCM upper, SCM error, SCM base_cs), | |
1359 | "Return a character set containing all characters whose\n" | |
1360 | "character codes lie in the half-open range\n" | |
1361 | "[@var{lower},@var{upper}).\n" | |
1362 | "\n" | |
1363 | "If @var{error} is a true value, an error is signalled if the\n" | |
1364 | "specified range contains characters which are not valid\n" | |
1365 | "Unicode code points. If @var{error} is @code{#f},\n" | |
ffb62a43 | 1366 | "these characters are silently left out of the resulting\n" |
91772d8f MG |
1367 | "character set.\n" |
1368 | "\n" | |
1369 | "The characters in @var{base_cs} are added to the result, if\n" | |
1370 | "given.") | |
1371 | #define FUNC_NAME s_scm_ucs_range_to_char_set | |
1372 | { | |
1373 | return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper, | |
1374 | error, base_cs, 0); | |
1375 | } | |
63181a97 MV |
1376 | #undef FUNC_NAME |
1377 | ||
1378 | ||
1379 | SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0, | |
1380 | (SCM lower, SCM upper, SCM error, SCM base_cs), | |
1381 | "Return a character set containing all characters whose\n" | |
1382 | "character codes lie in the half-open range\n" | |
1383 | "[@var{lower},@var{upper}).\n" | |
1384 | "\n" | |
1385 | "If @var{error} is a true value, an error is signalled if the\n" | |
1386 | "specified range contains characters which are not contained in\n" | |
1387 | "the implemented character range. If @var{error} is @code{#f},\n" | |
ffb62a43 | 1388 | "these characters are silently left out of the resulting\n" |
63181a97 MV |
1389 | "character set.\n" |
1390 | "\n" | |
1391 | "The characters are added to @var{base_cs} and @var{base_cs} is\n" | |
1392 | "returned.") | |
1393 | #define FUNC_NAME s_scm_ucs_range_to_char_set_x | |
1394 | { | |
91772d8f MG |
1395 | SCM_VALIDATE_SMOB (4, base_cs, charset); |
1396 | return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper, | |
1397 | error, base_cs, 1); | |
63181a97 MV |
1398 | } |
1399 | #undef FUNC_NAME | |
1400 | ||
1401 | SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0, | |
1402 | (SCM x), | |
1403 | "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.") | |
1404 | #define FUNC_NAME s_scm_to_char_set | |
1405 | { | |
1406 | if (scm_is_string (x)) | |
1407 | return scm_string_to_char_set (x, SCM_UNDEFINED); | |
1408 | else if (SCM_CHARP (x)) | |
1409 | return scm_char_set (scm_list_1 (x)); | |
1410 | else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x)) | |
1411 | return x; | |
1412 | else | |
1413 | scm_wrong_type_arg (NULL, 0, x); | |
1414 | } | |
1415 | #undef FUNC_NAME | |
1416 | ||
1417 | SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0, | |
1418 | (SCM cs), | |
1419 | "Return the number of elements in character set @var{cs}.") | |
1420 | #define FUNC_NAME s_scm_char_set_size | |
1421 | { | |
1422 | int k, count = 0; | |
f49dbcad | 1423 | scm_t_char_set *cs_data; |
63181a97 MV |
1424 | |
1425 | SCM_VALIDATE_SMOB (1, cs, charset); | |
f49dbcad MG |
1426 | cs_data = SCM_CHARSET_DATA (cs); |
1427 | ||
1428 | if (cs_data->len == 0) | |
1429 | return scm_from_int (0); | |
1430 | ||
1431 | for (k = 0; k < cs_data->len; k++) | |
1432 | count += cs_data->ranges[k].hi - cs_data->ranges[k].lo + 1; | |
1433 | ||
1434 | return scm_from_int (count); | |
63181a97 MV |
1435 | } |
1436 | #undef FUNC_NAME | |
1437 | ||
1438 | ||
1439 | SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0, | |
1440 | (SCM pred, SCM cs), | |
1441 | "Return the number of the elements int the character set\n" | |
1442 | "@var{cs} which satisfy the predicate @var{pred}.") | |
1443 | #define FUNC_NAME s_scm_char_set_count | |
1444 | { | |
1445 | int k, count = 0; | |
f49dbcad MG |
1446 | scm_t_wchar n; |
1447 | scm_t_char_set *cs_data; | |
63181a97 MV |
1448 | |
1449 | SCM_VALIDATE_PROC (1, pred); | |
1450 | SCM_VALIDATE_SMOB (2, cs, charset); | |
f49dbcad MG |
1451 | cs_data = SCM_CHARSET_DATA (cs); |
1452 | if (cs_data->len == 0) | |
1453 | return scm_from_int (0); | |
63181a97 | 1454 | |
f49dbcad MG |
1455 | for (k = 0; k < cs_data->len; k++) |
1456 | for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) | |
63181a97 | 1457 | { |
f49dbcad MG |
1458 | SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); |
1459 | if (scm_is_true (res)) | |
1460 | count++; | |
63181a97 MV |
1461 | } |
1462 | return SCM_I_MAKINUM (count); | |
1463 | } | |
1464 | #undef FUNC_NAME | |
1465 | ||
1466 | ||
1467 | SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0, | |
1468 | (SCM cs), | |
1469 | "Return a list containing the elements of the character set\n" | |
1470 | "@var{cs}.") | |
1471 | #define FUNC_NAME s_scm_char_set_to_list | |
1472 | { | |
1473 | int k; | |
f49dbcad | 1474 | scm_t_wchar n; |
63181a97 | 1475 | SCM result = SCM_EOL; |
f49dbcad | 1476 | scm_t_char_set *p; |
63181a97 MV |
1477 | |
1478 | SCM_VALIDATE_SMOB (1, cs, charset); | |
f49dbcad MG |
1479 | p = SCM_CHARSET_DATA (cs); |
1480 | if (p->len == 0) | |
1481 | return SCM_EOL; | |
1482 | ||
1483 | for (k = p->len - 1; k >= 0; k--) | |
1484 | for (n = p->ranges[k].hi; n >= p->ranges[k].lo; n--) | |
1485 | result = scm_cons (SCM_MAKE_CHAR (n), result); | |
63181a97 MV |
1486 | return result; |
1487 | } | |
1488 | #undef FUNC_NAME | |
1489 | ||
1490 | ||
1491 | SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0, | |
1492 | (SCM cs), | |
1493 | "Return a string containing the elements of the character set\n" | |
1494 | "@var{cs}. The order in which the characters are placed in the\n" | |
1495 | "string is not defined.") | |
1496 | #define FUNC_NAME s_scm_char_set_to_string | |
1497 | { | |
1498 | int k; | |
1499 | int count = 0; | |
1500 | int idx = 0; | |
f49dbcad | 1501 | int wide = 0; |
63181a97 | 1502 | SCM result; |
f49dbcad MG |
1503 | scm_t_wchar n; |
1504 | scm_t_char_set *cs_data; | |
1505 | char *buf; | |
1506 | scm_t_wchar *wbuf; | |
63181a97 MV |
1507 | |
1508 | SCM_VALIDATE_SMOB (1, cs, charset); | |
f49dbcad MG |
1509 | cs_data = SCM_CHARSET_DATA (cs); |
1510 | if (cs_data->len == 0) | |
1511 | return scm_nullstr; | |
1512 | ||
1513 | if (cs_data->ranges[cs_data->len - 1].hi > 255) | |
1514 | wide = 1; | |
1515 | ||
1516 | count = scm_to_int (scm_char_set_size (cs)); | |
1517 | if (wide) | |
190d4b0d | 1518 | result = scm_i_make_wide_string (count, &wbuf, 0); |
f49dbcad | 1519 | else |
190d4b0d | 1520 | result = scm_i_make_string (count, &buf, 0); |
f49dbcad MG |
1521 | |
1522 | for (k = 0; k < cs_data->len; k++) | |
1523 | for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) | |
1524 | { | |
1525 | if (wide) | |
1526 | wbuf[idx++] = n; | |
1527 | else | |
1528 | buf[idx++] = n; | |
1529 | } | |
63181a97 MV |
1530 | return result; |
1531 | } | |
1532 | #undef FUNC_NAME | |
1533 | ||
1534 | ||
1535 | SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0, | |
1536 | (SCM cs, SCM ch), | |
1537 | "Return @code{#t} iff the character @var{ch} is contained in the\n" | |
1538 | "character set @var{cs}.") | |
1539 | #define FUNC_NAME s_scm_char_set_contains_p | |
1540 | { | |
1541 | SCM_VALIDATE_SMOB (1, cs, charset); | |
1542 | SCM_VALIDATE_CHAR (2, ch); | |
1543 | return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch))); | |
1544 | } | |
1545 | #undef FUNC_NAME | |
1546 | ||
1547 | ||
1548 | SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0, | |
1549 | (SCM pred, SCM cs), | |
1550 | "Return a true value if every character in the character set\n" | |
1551 | "@var{cs} satisfies the predicate @var{pred}.") | |
1552 | #define FUNC_NAME s_scm_char_set_every | |
1553 | { | |
1554 | int k; | |
f49dbcad | 1555 | scm_t_wchar n; |
63181a97 | 1556 | SCM res = SCM_BOOL_T; |
f49dbcad | 1557 | scm_t_char_set *cs_data; |
63181a97 MV |
1558 | |
1559 | SCM_VALIDATE_PROC (1, pred); | |
1560 | SCM_VALIDATE_SMOB (2, cs, charset); | |
1561 | ||
f49dbcad MG |
1562 | cs_data = SCM_CHARSET_DATA (cs); |
1563 | if (cs_data->len == 0) | |
1564 | return SCM_BOOL_T; | |
1565 | ||
1566 | for (k = 0; k < cs_data->len; k++) | |
1567 | for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) | |
63181a97 | 1568 | { |
f49dbcad MG |
1569 | res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); |
1570 | if (scm_is_false (res)) | |
1571 | return res; | |
63181a97 | 1572 | } |
f49dbcad | 1573 | return SCM_BOOL_T; |
63181a97 MV |
1574 | } |
1575 | #undef FUNC_NAME | |
1576 | ||
1577 | ||
1578 | SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0, | |
1579 | (SCM pred, SCM cs), | |
1580 | "Return a true value if any character in the character set\n" | |
1581 | "@var{cs} satisfies the predicate @var{pred}.") | |
1582 | #define FUNC_NAME s_scm_char_set_any | |
1583 | { | |
1584 | int k; | |
f49dbcad MG |
1585 | scm_t_wchar n; |
1586 | scm_t_char_set *cs_data; | |
63181a97 MV |
1587 | |
1588 | SCM_VALIDATE_PROC (1, pred); | |
1589 | SCM_VALIDATE_SMOB (2, cs, charset); | |
1590 | ||
693e7289 MG |
1591 | cs_data = SCM_CHARSET_DATA (cs); |
1592 | if (cs_data->len == 0) | |
1593 | return SCM_BOOL_T; | |
f49dbcad MG |
1594 | |
1595 | for (k = 0; k < cs_data->len; k++) | |
1596 | for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) | |
63181a97 | 1597 | { |
f49dbcad MG |
1598 | SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); |
1599 | if (scm_is_true (res)) | |
1600 | return res; | |
63181a97 MV |
1601 | } |
1602 | return SCM_BOOL_F; | |
1603 | } | |
1604 | #undef FUNC_NAME | |
1605 | ||
1606 | ||
1607 | SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1, | |
f49dbcad MG |
1608 | (SCM cs, SCM rest), |
1609 | "Add all character arguments to the first argument, which must\n" | |
1610 | "be a character set.") | |
63181a97 MV |
1611 | #define FUNC_NAME s_scm_char_set_adjoin |
1612 | { | |
63181a97 MV |
1613 | SCM_VALIDATE_SMOB (1, cs, charset); |
1614 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1615 | cs = scm_char_set_copy (cs); | |
1616 | ||
d2e53ed6 | 1617 | while (!scm_is_null (rest)) |
63181a97 MV |
1618 | { |
1619 | SCM chr = SCM_CAR (rest); | |
f49dbcad | 1620 | scm_t_wchar c; |
63181a97 MV |
1621 | |
1622 | SCM_VALIDATE_CHAR_COPY (1, chr, c); | |
1623 | rest = SCM_CDR (rest); | |
1624 | ||
f49dbcad | 1625 | SCM_CHARSET_SET (cs, c); |
63181a97 MV |
1626 | } |
1627 | return cs; | |
1628 | } | |
1629 | #undef FUNC_NAME | |
1630 | ||
1631 | ||
1632 | SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1, | |
f49dbcad MG |
1633 | (SCM cs, SCM rest), |
1634 | "Delete all character arguments from the first argument, which\n" | |
1635 | "must be a character set.") | |
63181a97 MV |
1636 | #define FUNC_NAME s_scm_char_set_delete |
1637 | { | |
63181a97 MV |
1638 | SCM_VALIDATE_SMOB (1, cs, charset); |
1639 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1640 | cs = scm_char_set_copy (cs); | |
1641 | ||
d2e53ed6 | 1642 | while (!scm_is_null (rest)) |
63181a97 MV |
1643 | { |
1644 | SCM chr = SCM_CAR (rest); | |
f49dbcad | 1645 | scm_t_wchar c; |
63181a97 MV |
1646 | |
1647 | SCM_VALIDATE_CHAR_COPY (1, chr, c); | |
1648 | rest = SCM_CDR (rest); | |
1649 | ||
f49dbcad | 1650 | SCM_CHARSET_UNSET (cs, c); |
63181a97 MV |
1651 | } |
1652 | return cs; | |
1653 | } | |
1654 | #undef FUNC_NAME | |
1655 | ||
1656 | ||
1657 | SCM_DEFINE (scm_char_set_adjoin_x, "char-set-adjoin!", 1, 0, 1, | |
f49dbcad MG |
1658 | (SCM cs, SCM rest), |
1659 | "Add all character arguments to the first argument, which must\n" | |
1660 | "be a character set.") | |
63181a97 MV |
1661 | #define FUNC_NAME s_scm_char_set_adjoin_x |
1662 | { | |
63181a97 MV |
1663 | SCM_VALIDATE_SMOB (1, cs, charset); |
1664 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1665 | ||
d2e53ed6 | 1666 | while (!scm_is_null (rest)) |
63181a97 MV |
1667 | { |
1668 | SCM chr = SCM_CAR (rest); | |
f49dbcad | 1669 | scm_t_wchar c; |
63181a97 MV |
1670 | |
1671 | SCM_VALIDATE_CHAR_COPY (1, chr, c); | |
1672 | rest = SCM_CDR (rest); | |
1673 | ||
f49dbcad | 1674 | SCM_CHARSET_SET (cs, c); |
63181a97 MV |
1675 | } |
1676 | return cs; | |
1677 | } | |
1678 | #undef FUNC_NAME | |
1679 | ||
1680 | ||
1681 | SCM_DEFINE (scm_char_set_delete_x, "char-set-delete!", 1, 0, 1, | |
f49dbcad MG |
1682 | (SCM cs, SCM rest), |
1683 | "Delete all character arguments from the first argument, which\n" | |
1684 | "must be a character set.") | |
63181a97 MV |
1685 | #define FUNC_NAME s_scm_char_set_delete_x |
1686 | { | |
63181a97 MV |
1687 | SCM_VALIDATE_SMOB (1, cs, charset); |
1688 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1689 | ||
d2e53ed6 | 1690 | while (!scm_is_null (rest)) |
63181a97 MV |
1691 | { |
1692 | SCM chr = SCM_CAR (rest); | |
f49dbcad | 1693 | scm_t_wchar c; |
63181a97 MV |
1694 | |
1695 | SCM_VALIDATE_CHAR_COPY (1, chr, c); | |
1696 | rest = SCM_CDR (rest); | |
1697 | ||
f49dbcad | 1698 | SCM_CHARSET_UNSET (cs, c); |
63181a97 MV |
1699 | } |
1700 | return cs; | |
1701 | } | |
1702 | #undef FUNC_NAME | |
1703 | ||
1704 | ||
1705 | SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0, | |
f49dbcad | 1706 | (SCM cs), "Return the complement of the character set @var{cs}.") |
63181a97 MV |
1707 | #define FUNC_NAME s_scm_char_set_complement |
1708 | { | |
63181a97 | 1709 | SCM res; |
f49dbcad | 1710 | scm_t_char_set *p, *q; |
63181a97 MV |
1711 | |
1712 | SCM_VALIDATE_SMOB (1, cs, charset); | |
1713 | ||
1714 | res = make_char_set (FUNC_NAME); | |
f49dbcad MG |
1715 | p = SCM_CHARSET_DATA (res); |
1716 | q = SCM_CHARSET_DATA (cs); | |
1717 | ||
1718 | charsets_complement (p, q); | |
63181a97 MV |
1719 | return res; |
1720 | } | |
1721 | #undef FUNC_NAME | |
1722 | ||
1723 | ||
1724 | SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1, | |
1725 | (SCM rest), | |
1726 | "Return the union of all argument character sets.") | |
1727 | #define FUNC_NAME s_scm_char_set_union | |
1728 | { | |
1729 | int c = 1; | |
1730 | SCM res; | |
f49dbcad | 1731 | scm_t_char_set *p; |
63181a97 MV |
1732 | |
1733 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1734 | ||
1735 | res = make_char_set (FUNC_NAME); | |
f49dbcad | 1736 | p = SCM_CHARSET_DATA (res); |
d2e53ed6 | 1737 | while (!scm_is_null (rest)) |
63181a97 | 1738 | { |
63181a97 MV |
1739 | SCM cs = SCM_CAR (rest); |
1740 | SCM_VALIDATE_SMOB (c, cs, charset); | |
1741 | c++; | |
1742 | rest = SCM_CDR (rest); | |
1743 | ||
f49dbcad MG |
1744 | |
1745 | charsets_union (p, (scm_t_char_set *) SCM_SMOB_DATA (cs)); | |
63181a97 MV |
1746 | } |
1747 | return res; | |
1748 | } | |
1749 | #undef FUNC_NAME | |
1750 | ||
1751 | ||
1752 | SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1, | |
1753 | (SCM rest), | |
1754 | "Return the intersection of all argument character sets.") | |
1755 | #define FUNC_NAME s_scm_char_set_intersection | |
1756 | { | |
1757 | SCM res; | |
1758 | ||
1759 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1760 | ||
d2e53ed6 | 1761 | if (scm_is_null (rest)) |
63181a97 MV |
1762 | res = make_char_set (FUNC_NAME); |
1763 | else | |
1764 | { | |
f49dbcad | 1765 | scm_t_char_set *p; |
63181a97 MV |
1766 | int argnum = 2; |
1767 | ||
1768 | res = scm_char_set_copy (SCM_CAR (rest)); | |
f49dbcad | 1769 | p = SCM_CHARSET_DATA (res); |
63181a97 MV |
1770 | rest = SCM_CDR (rest); |
1771 | ||
d2e53ed6 | 1772 | while (scm_is_pair (rest)) |
f49dbcad MG |
1773 | { |
1774 | SCM cs = SCM_CAR (rest); | |
1775 | scm_t_char_set *cs_data; | |
1776 | ||
1777 | SCM_VALIDATE_SMOB (argnum, cs, charset); | |
1778 | argnum++; | |
1779 | cs_data = SCM_CHARSET_DATA (cs); | |
1780 | rest = SCM_CDR (rest); | |
1781 | charsets_intersection (p, cs_data); | |
1782 | } | |
63181a97 MV |
1783 | } |
1784 | ||
1785 | return res; | |
1786 | } | |
1787 | #undef FUNC_NAME | |
1788 | ||
1789 | ||
1790 | SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1, | |
1791 | (SCM cs1, SCM rest), | |
1792 | "Return the difference of all argument character sets.") | |
1793 | #define FUNC_NAME s_scm_char_set_difference | |
1794 | { | |
1795 | int c = 2; | |
f49dbcad MG |
1796 | SCM res, compl; |
1797 | scm_t_char_set *p, *q; | |
63181a97 MV |
1798 | |
1799 | SCM_VALIDATE_SMOB (1, cs1, charset); | |
1800 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1801 | ||
1802 | res = scm_char_set_copy (cs1); | |
f49dbcad MG |
1803 | p = SCM_CHARSET_DATA (res); |
1804 | compl = make_char_set (FUNC_NAME); | |
1805 | q = SCM_CHARSET_DATA (compl); | |
d2e53ed6 | 1806 | while (!scm_is_null (rest)) |
63181a97 | 1807 | { |
63181a97 MV |
1808 | SCM cs = SCM_CAR (rest); |
1809 | SCM_VALIDATE_SMOB (c, cs, charset); | |
1810 | c++; | |
1811 | rest = SCM_CDR (rest); | |
1812 | ||
f49dbcad MG |
1813 | charsets_complement (q, SCM_CHARSET_DATA (cs)); |
1814 | charsets_intersection (p, q); | |
63181a97 MV |
1815 | } |
1816 | return res; | |
1817 | } | |
1818 | #undef FUNC_NAME | |
1819 | ||
1820 | ||
1821 | SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1, | |
1822 | (SCM rest), | |
1823 | "Return the exclusive-or of all argument character sets.") | |
1824 | #define FUNC_NAME s_scm_char_set_xor | |
1825 | { | |
1826 | SCM res; | |
1827 | ||
1828 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1829 | ||
d2e53ed6 | 1830 | if (scm_is_null (rest)) |
63181a97 MV |
1831 | res = make_char_set (FUNC_NAME); |
1832 | else | |
1833 | { | |
1834 | int argnum = 2; | |
f49dbcad | 1835 | scm_t_char_set *p; |
63181a97 MV |
1836 | |
1837 | res = scm_char_set_copy (SCM_CAR (rest)); | |
f49dbcad | 1838 | p = SCM_CHARSET_DATA (res); |
63181a97 MV |
1839 | rest = SCM_CDR (rest); |
1840 | ||
d2e53ed6 | 1841 | while (scm_is_pair (rest)) |
f49dbcad MG |
1842 | { |
1843 | SCM cs = SCM_CAR (rest); | |
1844 | scm_t_char_set *cs_data; | |
1845 | ||
1846 | SCM_VALIDATE_SMOB (argnum, cs, charset); | |
1847 | argnum++; | |
1848 | cs_data = SCM_CHARSET_DATA (cs); | |
1849 | rest = SCM_CDR (rest); | |
1850 | ||
1851 | charsets_xor (p, cs_data); | |
1852 | } | |
63181a97 MV |
1853 | } |
1854 | return res; | |
1855 | } | |
1856 | #undef FUNC_NAME | |
1857 | ||
1858 | ||
1859 | SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1, 0, 1, | |
1860 | (SCM cs1, SCM rest), | |
1861 | "Return the difference and the intersection of all argument\n" | |
1862 | "character sets.") | |
1863 | #define FUNC_NAME s_scm_char_set_diff_plus_intersection | |
1864 | { | |
1865 | int c = 2; | |
1866 | SCM res1, res2; | |
f49dbcad | 1867 | scm_t_char_set *p, *q; |
63181a97 MV |
1868 | |
1869 | SCM_VALIDATE_SMOB (1, cs1, charset); | |
1870 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1871 | ||
1872 | res1 = scm_char_set_copy (cs1); | |
1873 | res2 = make_char_set (FUNC_NAME); | |
f49dbcad MG |
1874 | p = SCM_CHARSET_DATA (res1); |
1875 | q = SCM_CHARSET_DATA (res2); | |
d2e53ed6 | 1876 | while (!scm_is_null (rest)) |
63181a97 | 1877 | { |
63181a97 | 1878 | SCM cs = SCM_CAR (rest); |
f49dbcad | 1879 | scm_t_char_set *r; |
63181a97 MV |
1880 | |
1881 | SCM_VALIDATE_SMOB (c, cs, charset); | |
1882 | c++; | |
f49dbcad | 1883 | r = SCM_CHARSET_DATA (cs); |
63181a97 | 1884 | |
f49dbcad MG |
1885 | charsets_union (q, r); |
1886 | charsets_intersection (p, r); | |
63181a97 MV |
1887 | rest = SCM_CDR (rest); |
1888 | } | |
1889 | return scm_values (scm_list_2 (res1, res2)); | |
1890 | } | |
1891 | #undef FUNC_NAME | |
1892 | ||
1893 | ||
1894 | SCM_DEFINE (scm_char_set_complement_x, "char-set-complement!", 1, 0, 0, | |
f49dbcad | 1895 | (SCM cs), "Return the complement of the character set @var{cs}.") |
63181a97 MV |
1896 | #define FUNC_NAME s_scm_char_set_complement_x |
1897 | { | |
63181a97 | 1898 | SCM_VALIDATE_SMOB (1, cs, charset); |
f49dbcad | 1899 | cs = scm_char_set_complement (cs); |
63181a97 MV |
1900 | return cs; |
1901 | } | |
1902 | #undef FUNC_NAME | |
1903 | ||
1904 | ||
1905 | SCM_DEFINE (scm_char_set_union_x, "char-set-union!", 1, 0, 1, | |
f49dbcad MG |
1906 | (SCM cs1, SCM rest), |
1907 | "Return the union of all argument character sets.") | |
63181a97 MV |
1908 | #define FUNC_NAME s_scm_char_set_union_x |
1909 | { | |
63181a97 MV |
1910 | SCM_VALIDATE_SMOB (1, cs1, charset); |
1911 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1912 | ||
f49dbcad | 1913 | cs1 = scm_char_set_union (scm_cons (cs1, rest)); |
63181a97 MV |
1914 | return cs1; |
1915 | } | |
1916 | #undef FUNC_NAME | |
1917 | ||
1918 | ||
1919 | SCM_DEFINE (scm_char_set_intersection_x, "char-set-intersection!", 1, 0, 1, | |
f49dbcad MG |
1920 | (SCM cs1, SCM rest), |
1921 | "Return the intersection of all argument character sets.") | |
63181a97 MV |
1922 | #define FUNC_NAME s_scm_char_set_intersection_x |
1923 | { | |
63181a97 MV |
1924 | SCM_VALIDATE_SMOB (1, cs1, charset); |
1925 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1926 | ||
f49dbcad | 1927 | cs1 = scm_char_set_intersection (scm_cons (cs1, rest)); |
63181a97 MV |
1928 | return cs1; |
1929 | } | |
1930 | #undef FUNC_NAME | |
1931 | ||
1932 | ||
1933 | SCM_DEFINE (scm_char_set_difference_x, "char-set-difference!", 1, 0, 1, | |
f49dbcad MG |
1934 | (SCM cs1, SCM rest), |
1935 | "Return the difference of all argument character sets.") | |
63181a97 MV |
1936 | #define FUNC_NAME s_scm_char_set_difference_x |
1937 | { | |
63181a97 MV |
1938 | SCM_VALIDATE_SMOB (1, cs1, charset); |
1939 | SCM_VALIDATE_REST_ARGUMENT (rest); | |
1940 | ||
f49dbcad | 1941 | cs1 = scm_char_set_difference (cs1, rest); |
63181a97 MV |
1942 | return cs1; |
1943 | } | |
1944 | #undef FUNC_NAME | |
1945 | ||
1946 | ||
1947 | SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1, | |
1948 | (SCM cs1, SCM rest), | |
1949 | "Return the exclusive-or of all argument character sets.") | |
1950 | #define FUNC_NAME s_scm_char_set_xor_x | |
1951 | { | |
1952 | /* a side-effecting variant should presumably give consistent results: | |
1953 | (define a (char-set #\a)) | |
1954 | (char-set-xor a a a) -> char set #\a | |
1955 | (char-set-xor! a a a) -> char set #\a | |
f49dbcad | 1956 | */ |
7165abeb MG |
1957 | cs1 = scm_char_set_xor (scm_cons (cs1, rest)); |
1958 | return cs1; | |
63181a97 MV |
1959 | } |
1960 | #undef FUNC_NAME | |
1961 | ||
1962 | ||
f49dbcad MG |
1963 | SCM_DEFINE (scm_char_set_diff_plus_intersection_x, |
1964 | "char-set-diff+intersection!", 2, 0, 1, (SCM cs1, SCM cs2, | |
1965 | SCM rest), | |
1966 | "Return the difference and the intersection of all argument\n" | |
1967 | "character sets.") | |
63181a97 MV |
1968 | #define FUNC_NAME s_scm_char_set_diff_plus_intersection_x |
1969 | { | |
f49dbcad | 1970 | SCM diff, intersect; |
63181a97 | 1971 | |
f49dbcad MG |
1972 | diff = scm_char_set_difference (cs1, scm_cons (cs2, rest)); |
1973 | intersect = | |
1974 | scm_char_set_intersection (scm_cons (cs1, scm_cons (cs2, rest))); | |
1975 | cs1 = diff; | |
1976 | cs2 = intersect; | |
63181a97 MV |
1977 | return scm_values (scm_list_2 (cs1, cs2)); |
1978 | } | |
1979 | #undef FUNC_NAME | |
1980 | ||
a17d2654 | 1981 | \f |
f49dbcad | 1982 | |
a17d2654 LC |
1983 | /* Standard character sets. */ |
1984 | ||
63181a97 MV |
1985 | SCM scm_char_set_lower_case; |
1986 | SCM scm_char_set_upper_case; | |
1987 | SCM scm_char_set_title_case; | |
1988 | SCM scm_char_set_letter; | |
1989 | SCM scm_char_set_digit; | |
1990 | SCM scm_char_set_letter_and_digit; | |
1991 | SCM scm_char_set_graphic; | |
1992 | SCM scm_char_set_printing; | |
1993 | SCM scm_char_set_whitespace; | |
1994 | SCM scm_char_set_iso_control; | |
1995 | SCM scm_char_set_punctuation; | |
1996 | SCM scm_char_set_symbol; | |
1997 | SCM scm_char_set_hex_digit; | |
1998 | SCM scm_char_set_blank; | |
1999 | SCM scm_char_set_ascii; | |
2000 | SCM scm_char_set_empty; | |
719bb8cd | 2001 | SCM scm_char_set_designated; |
63181a97 MV |
2002 | SCM scm_char_set_full; |
2003 | ||
63181a97 | 2004 | |
a17d2654 LC |
2005 | /* Create an empty character set and return it after binding it to NAME. */ |
2006 | static inline SCM | |
f49dbcad | 2007 | define_charset (const char *name, const scm_t_char_set *p) |
63181a97 | 2008 | { |
f49dbcad MG |
2009 | SCM cs; |
2010 | ||
2011 | SCM_NEWSMOB (cs, scm_tc16_charset, p); | |
63181a97 | 2012 | scm_c_define (name, cs); |
838aa000 | 2013 | return cs; |
63181a97 MV |
2014 | } |
2015 | ||
ba8477ec MG |
2016 | SCM_DEFINE (scm_sys_char_set_dump, "%char-set-dump", 1, 0, 0, (SCM charset), |
2017 | "Returns an association list containing debugging information\n" | |
2018 | "for @var{charset}. The association list has the following entries." | |
2019 | "@table @code\n" | |
2020 | "@item char-set\n" | |
2021 | "The char-set itself.\n" | |
2022 | "@item len\n" | |
2023 | "The number of character ranges the char-set contains\n" | |
2024 | "@item ranges\n" | |
2025 | "A list of lists where each sublist a range of code points\n" | |
2026 | "and their associated characters" | |
2027 | "@end table") | |
2028 | #define FUNC_NAME s_scm_sys_char_set_dump | |
63181a97 | 2029 | { |
ba8477ec MG |
2030 | SCM e1, e2, e3; |
2031 | SCM ranges = SCM_EOL, elt; | |
2032 | size_t i; | |
2033 | scm_t_char_set *cs; | |
2034 | char codepoint_string_lo[9], codepoint_string_hi[9]; | |
2035 | ||
2036 | SCM_VALIDATE_SMOB (1, charset, charset); | |
2037 | cs = SCM_CHARSET_DATA (charset); | |
2038 | ||
4a655e50 | 2039 | e1 = scm_cons (scm_from_latin1_symbol ("char-set"), |
ba8477ec | 2040 | charset); |
4a655e50 | 2041 | e2 = scm_cons (scm_from_latin1_symbol ("n"), |
ba8477ec MG |
2042 | scm_from_size_t (cs->len)); |
2043 | ||
f49dbcad | 2044 | for (i = 0; i < cs->len; i++) |
63181a97 | 2045 | { |
ba8477ec MG |
2046 | if (cs->ranges[i].lo > 0xFFFF) |
2047 | sprintf (codepoint_string_lo, "U+%06x", cs->ranges[i].lo); | |
f49dbcad | 2048 | else |
ba8477ec MG |
2049 | sprintf (codepoint_string_lo, "U+%04x", cs->ranges[i].lo); |
2050 | if (cs->ranges[i].hi > 0xFFFF) | |
2051 | sprintf (codepoint_string_hi, "U+%06x", cs->ranges[i].hi); | |
f49dbcad | 2052 | else |
ba8477ec MG |
2053 | sprintf (codepoint_string_hi, "U+%04x", cs->ranges[i].hi); |
2054 | ||
2055 | elt = scm_list_4 (SCM_MAKE_CHAR (cs->ranges[i].lo), | |
2056 | SCM_MAKE_CHAR (cs->ranges[i].hi), | |
2057 | scm_from_locale_string (codepoint_string_lo), | |
2058 | scm_from_locale_string (codepoint_string_hi)); | |
2059 | ranges = scm_append (scm_list_2 (ranges, | |
2060 | scm_list_1 (elt))); | |
63181a97 | 2061 | } |
4a655e50 | 2062 | e3 = scm_cons (scm_from_latin1_symbol ("ranges"), |
ba8477ec MG |
2063 | ranges); |
2064 | ||
2065 | return scm_list_3 (e1, e2, e3); | |
63181a97 | 2066 | } |
f49dbcad | 2067 | #undef FUNC_NAME |
ba8477ec | 2068 | |
a17d2654 | 2069 | \f |
f49dbcad MG |
2070 | |
2071 | ||
63181a97 MV |
2072 | void |
2073 | scm_init_srfi_14 (void) | |
2074 | { | |
f49dbcad | 2075 | scm_tc16_charset = scm_make_smob_type ("character-set", 0); |
63181a97 MV |
2076 | scm_set_smob_print (scm_tc16_charset, charset_print); |
2077 | ||
f49dbcad | 2078 | scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0); |
f49dbcad MG |
2079 | scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print); |
2080 | ||
2081 | scm_char_set_upper_case = | |
2082 | define_charset ("char-set:upper-case", &cs_upper_case); | |
2083 | scm_char_set_lower_case = | |
2084 | define_charset ("char-set:lower-case", &cs_lower_case); | |
2085 | scm_char_set_title_case = | |
2086 | define_charset ("char-set:title-case", &cs_title_case); | |
2087 | scm_char_set_letter = define_charset ("char-set:letter", &cs_letter); | |
2088 | scm_char_set_digit = define_charset ("char-set:digit", &cs_digit); | |
2089 | scm_char_set_letter_and_digit = | |
2090 | define_charset ("char-set:letter+digit", &cs_letter_plus_digit); | |
2091 | scm_char_set_graphic = define_charset ("char-set:graphic", &cs_graphic); | |
2092 | scm_char_set_printing = define_charset ("char-set:printing", &cs_printing); | |
2093 | scm_char_set_whitespace = | |
2094 | define_charset ("char-set:whitespace", &cs_whitespace); | |
2095 | scm_char_set_iso_control = | |
2096 | define_charset ("char-set:iso-control", &cs_iso_control); | |
2097 | scm_char_set_punctuation = | |
2098 | define_charset ("char-set:punctuation", &cs_punctuation); | |
2099 | scm_char_set_symbol = define_charset ("char-set:symbol", &cs_symbol); | |
2100 | scm_char_set_hex_digit = | |
2101 | define_charset ("char-set:hex-digit", &cs_hex_digit); | |
2102 | scm_char_set_blank = define_charset ("char-set:blank", &cs_blank); | |
2103 | scm_char_set_ascii = define_charset ("char-set:ascii", &cs_ascii); | |
2104 | scm_char_set_empty = define_charset ("char-set:empty", &cs_empty); | |
719bb8cd | 2105 | scm_char_set_designated = define_charset ("char-set:designated", &cs_designated); |
f49dbcad | 2106 | scm_char_set_full = define_charset ("char-set:full", &cs_full); |
63181a97 MV |
2107 | |
2108 | #include "libguile/srfi-14.x" | |
2109 | } | |
2110 | ||
2111 | /* End of srfi-14.c. */ |