Commit | Line | Data |
---|---|---|
394449d5 | 1 | /* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009, |
cb8aaef4 MW |
2 | * 2010, 2011, 2014 Free Software Foundation, Inc. |
3 | * | |
73be1d9e | 4 | * This library is free software; you can redistribute it and/or |
53befeb7 NJ |
5 | * modify it under the terms of the GNU Lesser General Public License |
6 | * as published by the Free Software Foundation; either version 3 of | |
7 | * the License, or (at your option) any later version. | |
0f2d19dd | 8 | * |
53befeb7 NJ |
9 | * This library is distributed in the hope that it will be useful, but |
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
73be1d9e MV |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | * Lesser General Public License for more details. | |
0f2d19dd | 13 | * |
73be1d9e MV |
14 | * You should have received a copy of the GNU Lesser General Public |
15 | * License along with this library; if not, write to the Free Software | |
53befeb7 NJ |
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
17 | * 02110-1301 USA | |
73be1d9e | 18 | */ |
1bbd0b84 | 19 | |
1bbd0b84 | 20 | |
0f2d19dd | 21 | \f |
dbb605f5 LC |
22 | #ifdef HAVE_CONFIG_H |
23 | # include <config.h> | |
24 | #endif | |
0f2d19dd | 25 | |
0f2d19dd | 26 | #include <ctype.h> |
465380de | 27 | #include <limits.h> |
904a78f1 | 28 | #include <unicase.h> |
0ca3a342 | 29 | #include <unictype.h> |
904a78f1 | 30 | |
a0599745 MD |
31 | #include "libguile/_scm.h" |
32 | #include "libguile/validate.h" | |
0f2d19dd | 33 | |
a0599745 | 34 | #include "libguile/chars.h" |
1fdbbd4c MV |
35 | #include "libguile/srfi-14.h" |
36 | ||
0f2d19dd JB |
37 | \f |
38 | ||
a1ec6916 | 39 | SCM_DEFINE (scm_char_p, "char?", 1, 0, 0, |
1bbd0b84 | 40 | (SCM x), |
cdbc7418 | 41 | "Return @code{#t} iff @var{x} is a character, else @code{#f}.") |
1bbd0b84 | 42 | #define FUNC_NAME s_scm_char_p |
0f2d19dd | 43 | { |
7888309b | 44 | return scm_from_bool (SCM_CHARP(x)); |
0f2d19dd | 45 | } |
1bbd0b84 | 46 | #undef FUNC_NAME |
0f2d19dd | 47 | |
f1d19308 | 48 | static SCM scm_i_char_eq_p (SCM x, SCM y, SCM rest); |
8a1f4f98 AW |
49 | SCM_DEFINE (scm_i_char_eq_p, "char=?", 0, 2, 1, |
50 | (SCM x, SCM y, SCM rest), | |
51 | "Return @code{#t} if the Unicode code point of @var{x} is equal to the\n" | |
52 | "code point of @var{y}, else @code{#f}.\n") | |
53 | #define FUNC_NAME s_scm_i_char_eq_p | |
54 | { | |
55 | if (SCM_UNBNDP (x) || SCM_UNBNDP (y)) | |
56 | return SCM_BOOL_T; | |
57 | while (!scm_is_null (rest)) | |
58 | { | |
59 | if (scm_is_false (scm_char_eq_p (x, y))) | |
60 | return SCM_BOOL_F; | |
61 | x = y; | |
62 | y = scm_car (rest); | |
63 | rest = scm_cdr (rest); | |
64 | } | |
65 | return scm_char_eq_p (x, y); | |
66 | } | |
67 | #undef FUNC_NAME | |
68 | ||
69 | SCM scm_char_eq_p (SCM x, SCM y) | |
70 | #define FUNC_NAME s_scm_i_char_eq_p | |
0f2d19dd | 71 | { |
362306b9 DH |
72 | SCM_VALIDATE_CHAR (1, x); |
73 | SCM_VALIDATE_CHAR (2, y); | |
bc36d050 | 74 | return scm_from_bool (scm_is_eq (x, y)); |
0f2d19dd | 75 | } |
1bbd0b84 | 76 | #undef FUNC_NAME |
0f2d19dd JB |
77 | |
78 | ||
f1d19308 | 79 | static SCM scm_i_char_less_p (SCM x, SCM y, SCM rest); |
8a1f4f98 AW |
80 | SCM_DEFINE (scm_i_char_less_p, "char<?", 0, 2, 1, |
81 | (SCM x, SCM y, SCM rest), | |
82 | "Return @code{#t} iff the code point of @var{x} is less than the code\n" | |
83 | "point of @var{y}, else @code{#f}.") | |
84 | #define FUNC_NAME s_scm_i_char_less_p | |
85 | { | |
86 | if (SCM_UNBNDP (x) || SCM_UNBNDP (y)) | |
87 | return SCM_BOOL_T; | |
88 | while (!scm_is_null (rest)) | |
89 | { | |
90 | if (scm_is_false (scm_char_less_p (x, y))) | |
91 | return SCM_BOOL_F; | |
92 | x = y; | |
93 | y = scm_car (rest); | |
94 | rest = scm_cdr (rest); | |
95 | } | |
96 | return scm_char_less_p (x, y); | |
97 | } | |
98 | #undef FUNC_NAME | |
99 | ||
100 | SCM scm_char_less_p (SCM x, SCM y) | |
101 | #define FUNC_NAME s_scm_i_char_less_p | |
0f2d19dd | 102 | { |
34d19ef6 HWN |
103 | SCM_VALIDATE_CHAR (1, x); |
104 | SCM_VALIDATE_CHAR (2, y); | |
7888309b | 105 | return scm_from_bool (SCM_CHAR(x) < SCM_CHAR(y)); |
0f2d19dd | 106 | } |
1bbd0b84 | 107 | #undef FUNC_NAME |
0f2d19dd | 108 | |
f1d19308 | 109 | static SCM scm_i_char_leq_p (SCM x, SCM y, SCM rest); |
8a1f4f98 AW |
110 | SCM_DEFINE (scm_i_char_leq_p, "char<=?", 0, 2, 1, |
111 | (SCM x, SCM y, SCM rest), | |
112 | "Return @code{#t} if the Unicode code point of @var{x} is less than or\n" | |
113 | "equal to the code point of @var{y}, else @code{#f}.") | |
114 | #define FUNC_NAME s_scm_i_char_leq_p | |
115 | { | |
116 | if (SCM_UNBNDP (x) || SCM_UNBNDP (y)) | |
117 | return SCM_BOOL_T; | |
118 | while (!scm_is_null (rest)) | |
119 | { | |
120 | if (scm_is_false (scm_char_leq_p (x, y))) | |
121 | return SCM_BOOL_F; | |
122 | x = y; | |
123 | y = scm_car (rest); | |
124 | rest = scm_cdr (rest); | |
125 | } | |
126 | return scm_char_leq_p (x, y); | |
127 | } | |
128 | #undef FUNC_NAME | |
129 | ||
130 | SCM scm_char_leq_p (SCM x, SCM y) | |
131 | #define FUNC_NAME s_scm_i_char_leq_p | |
0f2d19dd | 132 | { |
34d19ef6 HWN |
133 | SCM_VALIDATE_CHAR (1, x); |
134 | SCM_VALIDATE_CHAR (2, y); | |
7888309b | 135 | return scm_from_bool (SCM_CHAR(x) <= SCM_CHAR(y)); |
0f2d19dd | 136 | } |
1bbd0b84 | 137 | #undef FUNC_NAME |
0f2d19dd | 138 | |
f1d19308 | 139 | static SCM scm_i_char_gr_p (SCM x, SCM y, SCM rest); |
8a1f4f98 AW |
140 | SCM_DEFINE (scm_i_char_gr_p, "char>?", 0, 2, 1, |
141 | (SCM x, SCM y, SCM rest), | |
142 | "Return @code{#t} if the Unicode code point of @var{x} is greater than\n" | |
143 | "the code point of @var{y}, else @code{#f}.") | |
144 | #define FUNC_NAME s_scm_i_char_gr_p | |
145 | { | |
146 | if (SCM_UNBNDP (x) || SCM_UNBNDP (y)) | |
147 | return SCM_BOOL_T; | |
148 | while (!scm_is_null (rest)) | |
149 | { | |
150 | if (scm_is_false (scm_char_gr_p (x, y))) | |
151 | return SCM_BOOL_F; | |
152 | x = y; | |
153 | y = scm_car (rest); | |
154 | rest = scm_cdr (rest); | |
155 | } | |
156 | return scm_char_gr_p (x, y); | |
157 | } | |
158 | #undef FUNC_NAME | |
159 | ||
160 | SCM scm_char_gr_p (SCM x, SCM y) | |
161 | #define FUNC_NAME s_scm_i_char_gr_p | |
0f2d19dd | 162 | { |
34d19ef6 HWN |
163 | SCM_VALIDATE_CHAR (1, x); |
164 | SCM_VALIDATE_CHAR (2, y); | |
7888309b | 165 | return scm_from_bool (SCM_CHAR(x) > SCM_CHAR(y)); |
0f2d19dd | 166 | } |
1bbd0b84 | 167 | #undef FUNC_NAME |
0f2d19dd | 168 | |
f1d19308 | 169 | static SCM scm_i_char_geq_p (SCM x, SCM y, SCM rest); |
8a1f4f98 AW |
170 | SCM_DEFINE (scm_i_char_geq_p, "char>=?", 0, 2, 1, |
171 | (SCM x, SCM y, SCM rest), | |
172 | "Return @code{#t} if the Unicode code point of @var{x} is greater than\n" | |
173 | "or equal to the code point of @var{y}, else @code{#f}.") | |
174 | #define FUNC_NAME s_scm_i_char_geq_p | |
175 | { | |
176 | if (SCM_UNBNDP (x) || SCM_UNBNDP (y)) | |
177 | return SCM_BOOL_T; | |
178 | while (!scm_is_null (rest)) | |
179 | { | |
180 | if (scm_is_false (scm_char_geq_p (x, y))) | |
181 | return SCM_BOOL_F; | |
182 | x = y; | |
183 | y = scm_car (rest); | |
184 | rest = scm_cdr (rest); | |
185 | } | |
186 | return scm_char_geq_p (x, y); | |
187 | } | |
188 | #undef FUNC_NAME | |
189 | ||
190 | SCM scm_char_geq_p (SCM x, SCM y) | |
191 | #define FUNC_NAME s_scm_i_char_geq_p | |
0f2d19dd | 192 | { |
34d19ef6 HWN |
193 | SCM_VALIDATE_CHAR (1, x); |
194 | SCM_VALIDATE_CHAR (2, y); | |
7888309b | 195 | return scm_from_bool (SCM_CHAR(x) >= SCM_CHAR(y)); |
0f2d19dd | 196 | } |
1bbd0b84 | 197 | #undef FUNC_NAME |
0f2d19dd | 198 | |
3f12aedb MG |
199 | /* FIXME?: R6RS specifies that these comparisons are case-folded. |
200 | This is the same thing as comparing the uppercase characters in | |
201 | practice, but, not in theory. Unicode has table containing their | |
202 | definition of case-folded character mappings. A more correct | |
203 | implementation would be to use that table and make a char-foldcase | |
204 | function. */ | |
205 | ||
f1d19308 | 206 | static SCM scm_i_char_ci_eq_p (SCM x, SCM y, SCM rest); |
8a1f4f98 AW |
207 | SCM_DEFINE (scm_i_char_ci_eq_p, "char-ci=?", 0, 2, 1, |
208 | (SCM x, SCM y, SCM rest), | |
209 | "Return @code{#t} if the case-folded Unicode code point of @var{x} is\n" | |
210 | "the same as the case-folded code point of @var{y}, else @code{#f}.") | |
211 | #define FUNC_NAME s_scm_i_char_ci_eq_p | |
212 | { | |
213 | if (SCM_UNBNDP (x) || SCM_UNBNDP (y)) | |
214 | return SCM_BOOL_T; | |
215 | while (!scm_is_null (rest)) | |
216 | { | |
217 | if (scm_is_false (scm_char_ci_eq_p (x, y))) | |
218 | return SCM_BOOL_F; | |
219 | x = y; | |
220 | y = scm_car (rest); | |
221 | rest = scm_cdr (rest); | |
222 | } | |
223 | return scm_char_ci_eq_p (x, y); | |
224 | } | |
225 | #undef FUNC_NAME | |
226 | ||
227 | SCM scm_char_ci_eq_p (SCM x, SCM y) | |
228 | #define FUNC_NAME s_scm_i_char_ci_eq_p | |
0f2d19dd | 229 | { |
34d19ef6 HWN |
230 | SCM_VALIDATE_CHAR (1, x); |
231 | SCM_VALIDATE_CHAR (2, y); | |
7888309b | 232 | return scm_from_bool (scm_c_upcase(SCM_CHAR(x))==scm_c_upcase(SCM_CHAR(y))); |
0f2d19dd | 233 | } |
1bbd0b84 | 234 | #undef FUNC_NAME |
0f2d19dd | 235 | |
f1d19308 | 236 | static SCM scm_i_char_ci_less_p (SCM x, SCM y, SCM rest); |
8a1f4f98 AW |
237 | SCM_DEFINE (scm_i_char_ci_less_p, "char-ci<?", 0, 2, 1, |
238 | (SCM x, SCM y, SCM rest), | |
239 | "Return @code{#t} if the case-folded Unicode code point of @var{x} is\n" | |
240 | "less than the case-folded code point of @var{y}, else @code{#f}.") | |
241 | #define FUNC_NAME s_scm_i_char_ci_less_p | |
242 | { | |
243 | if (SCM_UNBNDP (x) || SCM_UNBNDP (y)) | |
244 | return SCM_BOOL_T; | |
245 | while (!scm_is_null (rest)) | |
246 | { | |
247 | if (scm_is_false (scm_char_ci_less_p (x, y))) | |
248 | return SCM_BOOL_F; | |
249 | x = y; | |
250 | y = scm_car (rest); | |
251 | rest = scm_cdr (rest); | |
252 | } | |
253 | return scm_char_ci_less_p (x, y); | |
254 | } | |
255 | #undef FUNC_NAME | |
256 | ||
257 | SCM scm_char_ci_less_p (SCM x, SCM y) | |
258 | #define FUNC_NAME s_scm_i_char_ci_less_p | |
0f2d19dd | 259 | { |
34d19ef6 HWN |
260 | SCM_VALIDATE_CHAR (1, x); |
261 | SCM_VALIDATE_CHAR (2, y); | |
7888309b | 262 | return scm_from_bool ((scm_c_upcase(SCM_CHAR(x))) < scm_c_upcase(SCM_CHAR(y))); |
0f2d19dd | 263 | } |
1bbd0b84 | 264 | #undef FUNC_NAME |
0f2d19dd | 265 | |
f1d19308 | 266 | static SCM scm_i_char_ci_leq_p (SCM x, SCM y, SCM rest); |
8a1f4f98 AW |
267 | SCM_DEFINE (scm_i_char_ci_leq_p, "char-ci<=?", 0, 2, 1, |
268 | (SCM x, SCM y, SCM rest), | |
ffb62a43 | 269 | "Return @code{#t} iff the case-folded Unicode code point of @var{x} is\n" |
8a1f4f98 AW |
270 | "less than or equal to the case-folded code point of @var{y}, else\n" |
271 | "@code{#f}") | |
272 | #define FUNC_NAME s_scm_i_char_ci_leq_p | |
273 | { | |
274 | if (SCM_UNBNDP (x) || SCM_UNBNDP (y)) | |
275 | return SCM_BOOL_T; | |
276 | while (!scm_is_null (rest)) | |
277 | { | |
278 | if (scm_is_false (scm_char_ci_leq_p (x, y))) | |
279 | return SCM_BOOL_F; | |
280 | x = y; | |
281 | y = scm_car (rest); | |
282 | rest = scm_cdr (rest); | |
283 | } | |
284 | return scm_char_ci_leq_p (x, y); | |
285 | } | |
286 | #undef FUNC_NAME | |
287 | ||
288 | SCM scm_char_ci_leq_p (SCM x, SCM y) | |
289 | #define FUNC_NAME s_scm_i_char_ci_leq_p | |
0f2d19dd | 290 | { |
34d19ef6 HWN |
291 | SCM_VALIDATE_CHAR (1, x); |
292 | SCM_VALIDATE_CHAR (2, y); | |
7888309b | 293 | return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) <= scm_c_upcase(SCM_CHAR(y))); |
0f2d19dd | 294 | } |
1bbd0b84 | 295 | #undef FUNC_NAME |
0f2d19dd | 296 | |
f1d19308 | 297 | static SCM scm_i_char_ci_gr_p (SCM x, SCM y, SCM rest); |
8a1f4f98 AW |
298 | SCM_DEFINE (scm_i_char_ci_gr_p, "char-ci>?", 0, 2, 1, |
299 | (SCM x, SCM y, SCM rest), | |
300 | "Return @code{#t} iff the case-folded code point of @var{x} is greater\n" | |
301 | "than the case-folded code point of @var{y}, else @code{#f}.") | |
302 | #define FUNC_NAME s_scm_i_char_ci_gr_p | |
303 | { | |
304 | if (SCM_UNBNDP (x) || SCM_UNBNDP (y)) | |
305 | return SCM_BOOL_T; | |
306 | while (!scm_is_null (rest)) | |
307 | { | |
308 | if (scm_is_false (scm_char_ci_gr_p (x, y))) | |
309 | return SCM_BOOL_F; | |
310 | x = y; | |
311 | y = scm_car (rest); | |
312 | rest = scm_cdr (rest); | |
313 | } | |
314 | return scm_char_ci_gr_p (x, y); | |
315 | } | |
316 | #undef FUNC_NAME | |
317 | ||
318 | SCM scm_char_ci_gr_p (SCM x, SCM y) | |
319 | #define FUNC_NAME s_scm_i_char_ci_gr_p | |
0f2d19dd | 320 | { |
34d19ef6 HWN |
321 | SCM_VALIDATE_CHAR (1, x); |
322 | SCM_VALIDATE_CHAR (2, y); | |
7888309b | 323 | return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) > scm_c_upcase(SCM_CHAR(y))); |
0f2d19dd | 324 | } |
1bbd0b84 | 325 | #undef FUNC_NAME |
0f2d19dd | 326 | |
f1d19308 | 327 | static SCM scm_i_char_ci_geq_p (SCM x, SCM y, SCM rest); |
8a1f4f98 AW |
328 | SCM_DEFINE (scm_i_char_ci_geq_p, "char-ci>=?", 0, 2, 1, |
329 | (SCM x, SCM y, SCM rest), | |
330 | "Return @code{#t} iff the case-folded Unicode code point of @var{x} is\n" | |
331 | "greater than or equal to the case-folded code point of @var{y}, else\n" | |
332 | "@code{#f}.") | |
333 | #define FUNC_NAME s_scm_i_char_ci_geq_p | |
334 | { | |
335 | if (SCM_UNBNDP (x) || SCM_UNBNDP (y)) | |
336 | return SCM_BOOL_T; | |
337 | while (!scm_is_null (rest)) | |
338 | { | |
339 | if (scm_is_false (scm_char_ci_geq_p (x, y))) | |
340 | return SCM_BOOL_F; | |
341 | x = y; | |
342 | y = scm_car (rest); | |
343 | rest = scm_cdr (rest); | |
344 | } | |
345 | return scm_char_ci_geq_p (x, y); | |
346 | } | |
347 | #undef FUNC_NAME | |
348 | ||
349 | SCM scm_char_ci_geq_p (SCM x, SCM y) | |
350 | #define FUNC_NAME s_scm_i_char_ci_geq_p | |
0f2d19dd | 351 | { |
34d19ef6 HWN |
352 | SCM_VALIDATE_CHAR (1, x); |
353 | SCM_VALIDATE_CHAR (2, y); | |
7888309b | 354 | return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) >= scm_c_upcase(SCM_CHAR(y))); |
0f2d19dd | 355 | } |
1bbd0b84 | 356 | #undef FUNC_NAME |
0f2d19dd JB |
357 | |
358 | ||
3b3b36dd | 359 | SCM_DEFINE (scm_char_alphabetic_p, "char-alphabetic?", 1, 0, 0, |
1bbd0b84 | 360 | (SCM chr), |
1fdbbd4c | 361 | "Return @code{#t} iff @var{chr} is alphabetic, else @code{#f}.\n") |
1bbd0b84 | 362 | #define FUNC_NAME s_scm_char_alphabetic_p |
0f2d19dd | 363 | { |
1fdbbd4c | 364 | return scm_char_set_contains_p (scm_char_set_letter, chr); |
0f2d19dd | 365 | } |
1bbd0b84 | 366 | #undef FUNC_NAME |
0f2d19dd | 367 | |
3b3b36dd | 368 | SCM_DEFINE (scm_char_numeric_p, "char-numeric?", 1, 0, 0, |
1bbd0b84 | 369 | (SCM chr), |
1fdbbd4c | 370 | "Return @code{#t} iff @var{chr} is numeric, else @code{#f}.\n") |
1bbd0b84 | 371 | #define FUNC_NAME s_scm_char_numeric_p |
0f2d19dd | 372 | { |
1fdbbd4c | 373 | return scm_char_set_contains_p (scm_char_set_digit, chr); |
0f2d19dd | 374 | } |
1bbd0b84 | 375 | #undef FUNC_NAME |
0f2d19dd | 376 | |
3b3b36dd | 377 | SCM_DEFINE (scm_char_whitespace_p, "char-whitespace?", 1, 0, 0, |
1bbd0b84 | 378 | (SCM chr), |
1fdbbd4c | 379 | "Return @code{#t} iff @var{chr} is whitespace, else @code{#f}.\n") |
1bbd0b84 | 380 | #define FUNC_NAME s_scm_char_whitespace_p |
0f2d19dd | 381 | { |
1fdbbd4c | 382 | return scm_char_set_contains_p (scm_char_set_whitespace, chr); |
0f2d19dd | 383 | } |
1bbd0b84 | 384 | #undef FUNC_NAME |
0f2d19dd JB |
385 | |
386 | ||
3b3b36dd | 387 | SCM_DEFINE (scm_char_upper_case_p, "char-upper-case?", 1, 0, 0, |
1bbd0b84 | 388 | (SCM chr), |
1fdbbd4c | 389 | "Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.\n") |
1bbd0b84 | 390 | #define FUNC_NAME s_scm_char_upper_case_p |
0f2d19dd | 391 | { |
1fdbbd4c | 392 | return scm_char_set_contains_p (scm_char_set_upper_case, chr); |
0f2d19dd | 393 | } |
1bbd0b84 | 394 | #undef FUNC_NAME |
0f2d19dd JB |
395 | |
396 | ||
3b3b36dd | 397 | SCM_DEFINE (scm_char_lower_case_p, "char-lower-case?", 1, 0, 0, |
1bbd0b84 | 398 | (SCM chr), |
1fdbbd4c | 399 | "Return @code{#t} iff @var{chr} is lowercase, else @code{#f}.\n") |
1bbd0b84 | 400 | #define FUNC_NAME s_scm_char_lower_case_p |
0f2d19dd | 401 | { |
1fdbbd4c | 402 | return scm_char_set_contains_p (scm_char_set_lower_case, chr); |
0f2d19dd | 403 | } |
1bbd0b84 | 404 | #undef FUNC_NAME |
0f2d19dd | 405 | |
a1ec6916 | 406 | SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0, |
1bbd0b84 | 407 | (SCM chr), |
bb15a36c MG |
408 | "Return @code{#t} iff @var{chr} is either uppercase or lowercase, else\n" |
409 | "@code{#f}.\n") | |
1bbd0b84 | 410 | #define FUNC_NAME s_scm_char_is_both_p |
0f2d19dd | 411 | { |
1fdbbd4c MV |
412 | if (scm_is_true (scm_char_set_contains_p (scm_char_set_lower_case, chr))) |
413 | return SCM_BOOL_T; | |
414 | return scm_char_set_contains_p (scm_char_set_upper_case, chr); | |
0f2d19dd | 415 | } |
1bbd0b84 | 416 | #undef FUNC_NAME |
0f2d19dd JB |
417 | |
418 | ||
a1ec6916 | 419 | SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0, |
1bbd0b84 | 420 | (SCM chr), |
bb15a36c | 421 | "Return the Unicode code point of @var{chr}.") |
1bbd0b84 | 422 | #define FUNC_NAME s_scm_char_to_integer |
0f2d19dd | 423 | { |
34d19ef6 | 424 | SCM_VALIDATE_CHAR (1, chr); |
904a78f1 | 425 | return scm_from_uint32 (SCM_CHAR(chr)); |
0f2d19dd | 426 | } |
1bbd0b84 | 427 | #undef FUNC_NAME |
0f2d19dd JB |
428 | |
429 | ||
3b3b36dd | 430 | SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0, |
1bbd0b84 | 431 | (SCM n), |
bb15a36c MG |
432 | "Return the character that has Unicode code point @var{n}. The integer\n" |
433 | "@var{n} must be a valid code point. Valid code points are in the\n" | |
434 | "ranges 0 to @code{#xD7FF} inclusive or @code{#xE000} to\n" | |
435 | "@code{#x10FFFF} inclusive.") | |
1bbd0b84 | 436 | #define FUNC_NAME s_scm_integer_to_char |
0f2d19dd | 437 | { |
904a78f1 MG |
438 | scm_t_wchar cn; |
439 | ||
440 | cn = scm_to_wchar (n); | |
441 | ||
442 | /* Avoid the surrogates. */ | |
443 | if (!SCM_IS_UNICODE_CHAR (cn)) | |
444 | scm_out_of_range (FUNC_NAME, n); | |
445 | ||
446 | return SCM_MAKE_CHAR (cn); | |
0f2d19dd | 447 | } |
1bbd0b84 | 448 | #undef FUNC_NAME |
0f2d19dd JB |
449 | |
450 | ||
3b3b36dd | 451 | SCM_DEFINE (scm_char_upcase, "char-upcase", 1, 0, 0, |
1bbd0b84 | 452 | (SCM chr), |
cdbc7418 | 453 | "Return the uppercase character version of @var{chr}.") |
1bbd0b84 | 454 | #define FUNC_NAME s_scm_char_upcase |
0f2d19dd | 455 | { |
34d19ef6 | 456 | SCM_VALIDATE_CHAR (1, chr); |
904a78f1 | 457 | return SCM_MAKE_CHAR (scm_c_upcase (SCM_CHAR (chr))); |
0f2d19dd | 458 | } |
1bbd0b84 | 459 | #undef FUNC_NAME |
0f2d19dd JB |
460 | |
461 | ||
3b3b36dd | 462 | SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0, |
1bbd0b84 | 463 | (SCM chr), |
cdbc7418 | 464 | "Return the lowercase character version of @var{chr}.") |
1bbd0b84 | 465 | #define FUNC_NAME s_scm_char_downcase |
0f2d19dd | 466 | { |
34d19ef6 | 467 | SCM_VALIDATE_CHAR (1, chr); |
904a78f1 | 468 | return SCM_MAKE_CHAR (scm_c_downcase (SCM_CHAR(chr))); |
0f2d19dd | 469 | } |
1bbd0b84 | 470 | #undef FUNC_NAME |
0f2d19dd | 471 | |
820f33aa JG |
472 | SCM_DEFINE (scm_char_titlecase, "char-titlecase", 1, 0, 0, |
473 | (SCM chr), | |
474 | "Return the titlecase character version of @var{chr}.") | |
475 | #define FUNC_NAME s_scm_char_titlecase | |
476 | { | |
477 | SCM_VALIDATE_CHAR (1, chr); | |
478 | return SCM_MAKE_CHAR (scm_c_titlecase (SCM_CHAR(chr))); | |
479 | } | |
480 | #undef FUNC_NAME | |
481 | ||
0ca3a342 JG |
482 | SCM_DEFINE (scm_char_general_category, "char-general-category", 1, 0, 0, |
483 | (SCM chr), | |
484 | "Return a symbol representing the Unicode general category of " | |
485 | "@var{chr} or @code{#f} if a named category cannot be found.") | |
486 | #define FUNC_NAME s_scm_char_general_category | |
487 | { | |
488 | const char *sym; | |
489 | uc_general_category_t cat; | |
490 | ||
491 | SCM_VALIDATE_CHAR (1, chr); | |
492 | cat = uc_general_category (SCM_CHAR (chr)); | |
493 | sym = uc_general_category_name (cat); | |
494 | ||
495 | if (sym != NULL) | |
25d50a05 | 496 | return scm_from_utf8_symbol (sym); |
0ca3a342 JG |
497 | return SCM_BOOL_F; |
498 | } | |
499 | #undef FUNC_NAME | |
500 | ||
0f2d19dd JB |
501 | \f |
502 | ||
503 | ||
504 | ||
84fad130 HWN |
505 | /* |
506 | TODO: change name to scm_i_.. ? --hwn | |
507 | */ | |
508 | ||
1cc91f1b | 509 | |
904a78f1 MG |
510 | scm_t_wchar |
511 | scm_c_upcase (scm_t_wchar c) | |
0f2d19dd | 512 | { |
f49dbcad | 513 | return uc_toupper ((int) c); |
0f2d19dd JB |
514 | } |
515 | ||
1cc91f1b | 516 | |
904a78f1 MG |
517 | scm_t_wchar |
518 | scm_c_downcase (scm_t_wchar c) | |
0f2d19dd | 519 | { |
f49dbcad | 520 | return uc_tolower ((int) c); |
0f2d19dd JB |
521 | } |
522 | ||
820f33aa JG |
523 | scm_t_wchar |
524 | scm_c_titlecase (scm_t_wchar c) | |
525 | { | |
526 | return uc_totitle ((int) c); | |
527 | } | |
528 | ||
77332b21 | 529 | \f |
0f2d19dd | 530 | |
77332b21 MG |
531 | /* There are a few sets of character names: R5RS, Guile |
532 | extensions for control characters, and leftover Guile extensions. | |
533 | They are listed in order of precedence. */ | |
534 | ||
64bad3f5 MG |
535 | static const char *const scm_r5rs_charnames[] = { |
536 | "space", "newline" | |
537 | }; | |
77332b21 | 538 | |
64bad3f5 | 539 | static const scm_t_uint32 const scm_r5rs_charnums[] = { |
15b6a6b2 | 540 | 0x20, 0x0a |
64bad3f5 | 541 | }; |
77332b21 | 542 | |
64bad3f5 | 543 | #define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *)) |
77332b21 | 544 | |
15b6a6b2 MG |
545 | static const char *const scm_r6rs_charnames[] = { |
546 | "nul", "alarm", "backspace", "tab", "linefeed", "vtab", "page", | |
547 | "return", "esc", "delete" | |
548 | /* 'space' and 'newline' are already included from the R5RS list. */ | |
549 | }; | |
550 | ||
551 | static const scm_t_uint32 const scm_r6rs_charnums[] = { | |
552 | 0x00, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, | |
553 | 0x0d, 0x1b, 0x7f | |
554 | }; | |
555 | ||
556 | #define SCM_N_R6RS_CHARNAMES (sizeof (scm_r6rs_charnames) / sizeof (char *)) | |
557 | ||
394449d5 MW |
558 | static const char *const scm_r7rs_charnames[] = { |
559 | "escape" | |
560 | }; | |
561 | ||
562 | static const scm_t_uint32 const scm_r7rs_charnums[] = { | |
563 | 0x1b | |
564 | }; | |
565 | ||
566 | #define SCM_N_R7RS_CHARNAMES (sizeof (scm_r7rs_charnames) / sizeof (char *)) | |
567 | ||
77332b21 | 568 | /* The abbreviated names for control characters. */ |
64bad3f5 MG |
569 | static const char *const scm_C0_control_charnames[] = { |
570 | /* C0 controls */ | |
571 | "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", | |
572 | "bs", "ht", "lf", "vt", "ff", "cr", "so", "si", | |
573 | "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", | |
574 | "can", "em", "sub", "esc", "fs", "gs", "rs", "us", | |
575 | "sp", "del" | |
576 | }; | |
577 | ||
578 | static const scm_t_uint32 const scm_C0_control_charnums[] = { | |
579 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |
580 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | |
581 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, | |
582 | 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, | |
583 | 0x20, 0x7f | |
584 | }; | |
585 | ||
586 | #define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *)) | |
587 | ||
588 | static const char *const scm_alt_charnames[] = { | |
15b6a6b2 | 589 | "null", "nl", "np" |
64bad3f5 MG |
590 | }; |
591 | ||
592 | static const scm_t_uint32 const scm_alt_charnums[] = { | |
15b6a6b2 | 593 | 0x00, 0x0a, 0x0c |
64bad3f5 MG |
594 | }; |
595 | ||
596 | #define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *)) | |
77332b21 MG |
597 | |
598 | /* Returns the string charname for a character if it exists, or NULL | |
599 | otherwise. */ | |
600 | const char * | |
601 | scm_i_charname (SCM chr) | |
602 | { | |
744c8724 | 603 | size_t c; |
77332b21 | 604 | scm_t_uint32 i = SCM_CHAR (chr); |
0f2d19dd | 605 | |
64bad3f5 | 606 | for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++) |
77332b21 MG |
607 | if (scm_r5rs_charnums[c] == i) |
608 | return scm_r5rs_charnames[c]; | |
0f2d19dd | 609 | |
15b6a6b2 MG |
610 | for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++) |
611 | if (scm_r6rs_charnums[c] == i) | |
612 | return scm_r6rs_charnames[c]; | |
613 | ||
394449d5 MW |
614 | for (c = 0; c < SCM_N_R7RS_CHARNAMES; c++) |
615 | if (scm_r7rs_charnums[c] == i) | |
616 | return scm_r7rs_charnames[c]; | |
617 | ||
64bad3f5 | 618 | for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++) |
77332b21 MG |
619 | if (scm_C0_control_charnums[c] == i) |
620 | return scm_C0_control_charnames[c]; | |
621 | ||
b3498751 MG |
622 | /* Since the characters in scm_alt_charnums is a subset of |
623 | scm_C0_control_charnums, this code is never reached. */ | |
64bad3f5 | 624 | for (c = 0; c < SCM_N_ALT_CHARNAMES; c++) |
77332b21 | 625 | if (scm_alt_charnums[c] == i) |
b3498751 | 626 | return scm_alt_charnames[c]; |
0f2d19dd | 627 | |
77332b21 MG |
628 | return NULL; |
629 | } | |
630 | ||
631 | /* Return a character from a string charname. */ | |
632 | SCM | |
633 | scm_i_charname_to_char (const char *charname, size_t charname_len) | |
634 | { | |
744c8724 | 635 | size_t c; |
77332b21 | 636 | |
15b6a6b2 | 637 | /* The R5RS charnames. These are supposed to be case insensitive. */ |
64bad3f5 | 638 | for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++) |
77332b21 MG |
639 | if ((strlen (scm_r5rs_charnames[c]) == charname_len) |
640 | && (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len))) | |
641 | return SCM_MAKE_CHAR (scm_r5rs_charnums[c]); | |
642 | ||
394449d5 MW |
643 | /* The R6RS charnames. R6RS says that these should be case-sensitive. |
644 | They are left as case-insensitive to avoid confusion. */ | |
15b6a6b2 MG |
645 | for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++) |
646 | if ((strlen (scm_r6rs_charnames[c]) == charname_len) | |
647 | && (!strncasecmp (scm_r6rs_charnames[c], charname, charname_len))) | |
648 | return SCM_MAKE_CHAR (scm_r6rs_charnums[c]); | |
649 | ||
394449d5 MW |
650 | /* The R7RS charnames. R7RS says that these should be case-sensitive. |
651 | They are left as case-insensitive to avoid confusion. */ | |
652 | for (c = 0; c < SCM_N_R7RS_CHARNAMES; c++) | |
653 | if ((strlen (scm_r7rs_charnames[c]) == charname_len) | |
654 | && (!strncasecmp (scm_r7rs_charnames[c], charname, charname_len))) | |
655 | return SCM_MAKE_CHAR (scm_r7rs_charnums[c]); | |
656 | ||
15b6a6b2 MG |
657 | /* Then come the controls. By Guile convention, these are not case |
658 | sensitive. */ | |
64bad3f5 | 659 | for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++) |
77332b21 MG |
660 | if ((strlen (scm_C0_control_charnames[c]) == charname_len) |
661 | && (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len))) | |
662 | return SCM_MAKE_CHAR (scm_C0_control_charnums[c]); | |
663 | ||
664 | /* Lastly are some old names carried over for compatibility. */ | |
64bad3f5 | 665 | for (c = 0; c < SCM_N_ALT_CHARNAMES; c++) |
77332b21 MG |
666 | if ((strlen (scm_alt_charnames[c]) == charname_len) |
667 | && (!strncasecmp (scm_alt_charnames[c], charname, charname_len))) | |
668 | return SCM_MAKE_CHAR (scm_alt_charnums[c]); | |
64bad3f5 | 669 | |
77332b21 MG |
670 | return SCM_BOOL_F; |
671 | } | |
0f2d19dd JB |
672 | |
673 | \f | |
674 | ||
1cc91f1b | 675 | |
0f2d19dd JB |
676 | void |
677 | scm_init_chars () | |
0f2d19dd | 678 | { |
a0599745 | 679 | #include "libguile/chars.x" |
0f2d19dd JB |
680 | } |
681 | ||
89e00824 ML |
682 | |
683 | /* | |
684 | Local Variables: | |
685 | c-file-style: "gnu" | |
686 | End: | |
687 | */ |