1 /* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
30 #include "libguile/_scm.h"
31 #include "libguile/validate.h"
33 #include "libguile/chars.h"
34 #include "libguile/srfi-14.h"
38 SCM_DEFINE (scm_char_p
, "char?", 1, 0, 0,
40 "Return @code{#t} iff @var{x} is a character, else @code{#f}.")
41 #define FUNC_NAME s_scm_char_p
43 return scm_from_bool (SCM_CHARP(x
));
47 static SCM
scm_i_char_eq_p (SCM x
, SCM y
, SCM rest
);
48 SCM_DEFINE (scm_i_char_eq_p
, "char=?", 0, 2, 1,
49 (SCM x
, SCM y
, SCM rest
),
50 "Return @code{#t} if the Unicode code point of @var{x} is equal to the\n"
51 "code point of @var{y}, else @code{#f}.\n")
52 #define FUNC_NAME s_scm_i_char_eq_p
54 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
56 while (!scm_is_null (rest
))
58 if (scm_is_false (scm_char_eq_p (x
, y
)))
62 rest
= scm_cdr (rest
);
64 return scm_char_eq_p (x
, y
);
68 SCM
scm_char_eq_p (SCM x
, SCM y
)
69 #define FUNC_NAME s_scm_i_char_eq_p
71 SCM_VALIDATE_CHAR (1, x
);
72 SCM_VALIDATE_CHAR (2, y
);
73 return scm_from_bool (scm_is_eq (x
, y
));
78 static SCM
scm_i_char_less_p (SCM x
, SCM y
, SCM rest
);
79 SCM_DEFINE (scm_i_char_less_p
, "char<?", 0, 2, 1,
80 (SCM x
, SCM y
, SCM rest
),
81 "Return @code{#t} iff the code point of @var{x} is less than the code\n"
82 "point of @var{y}, else @code{#f}.")
83 #define FUNC_NAME s_scm_i_char_less_p
85 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
87 while (!scm_is_null (rest
))
89 if (scm_is_false (scm_char_less_p (x
, y
)))
93 rest
= scm_cdr (rest
);
95 return scm_char_less_p (x
, y
);
99 SCM
scm_char_less_p (SCM x
, SCM y
)
100 #define FUNC_NAME s_scm_i_char_less_p
102 SCM_VALIDATE_CHAR (1, x
);
103 SCM_VALIDATE_CHAR (2, y
);
104 return scm_from_bool (SCM_CHAR(x
) < SCM_CHAR(y
));
108 static SCM
scm_i_char_leq_p (SCM x
, SCM y
, SCM rest
);
109 SCM_DEFINE (scm_i_char_leq_p
, "char<=?", 0, 2, 1,
110 (SCM x
, SCM y
, SCM rest
),
111 "Return @code{#t} if the Unicode code point of @var{x} is less than or\n"
112 "equal to the code point of @var{y}, else @code{#f}.")
113 #define FUNC_NAME s_scm_i_char_leq_p
115 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
117 while (!scm_is_null (rest
))
119 if (scm_is_false (scm_char_leq_p (x
, y
)))
123 rest
= scm_cdr (rest
);
125 return scm_char_leq_p (x
, y
);
129 SCM
scm_char_leq_p (SCM x
, SCM y
)
130 #define FUNC_NAME s_scm_i_char_leq_p
132 SCM_VALIDATE_CHAR (1, x
);
133 SCM_VALIDATE_CHAR (2, y
);
134 return scm_from_bool (SCM_CHAR(x
) <= SCM_CHAR(y
));
138 static SCM
scm_i_char_gr_p (SCM x
, SCM y
, SCM rest
);
139 SCM_DEFINE (scm_i_char_gr_p
, "char>?", 0, 2, 1,
140 (SCM x
, SCM y
, SCM rest
),
141 "Return @code{#t} if the Unicode code point of @var{x} is greater than\n"
142 "the code point of @var{y}, else @code{#f}.")
143 #define FUNC_NAME s_scm_i_char_gr_p
145 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
147 while (!scm_is_null (rest
))
149 if (scm_is_false (scm_char_gr_p (x
, y
)))
153 rest
= scm_cdr (rest
);
155 return scm_char_gr_p (x
, y
);
159 SCM
scm_char_gr_p (SCM x
, SCM y
)
160 #define FUNC_NAME s_scm_i_char_gr_p
162 SCM_VALIDATE_CHAR (1, x
);
163 SCM_VALIDATE_CHAR (2, y
);
164 return scm_from_bool (SCM_CHAR(x
) > SCM_CHAR(y
));
168 static SCM
scm_i_char_geq_p (SCM x
, SCM y
, SCM rest
);
169 SCM_DEFINE (scm_i_char_geq_p
, "char>=?", 0, 2, 1,
170 (SCM x
, SCM y
, SCM rest
),
171 "Return @code{#t} if the Unicode code point of @var{x} is greater than\n"
172 "or equal to the code point of @var{y}, else @code{#f}.")
173 #define FUNC_NAME s_scm_i_char_geq_p
175 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
177 while (!scm_is_null (rest
))
179 if (scm_is_false (scm_char_geq_p (x
, y
)))
183 rest
= scm_cdr (rest
);
185 return scm_char_geq_p (x
, y
);
189 SCM
scm_char_geq_p (SCM x
, SCM y
)
190 #define FUNC_NAME s_scm_i_char_geq_p
192 SCM_VALIDATE_CHAR (1, x
);
193 SCM_VALIDATE_CHAR (2, y
);
194 return scm_from_bool (SCM_CHAR(x
) >= SCM_CHAR(y
));
198 /* FIXME?: R6RS specifies that these comparisons are case-folded.
199 This is the same thing as comparing the uppercase characters in
200 practice, but, not in theory. Unicode has table containing their
201 definition of case-folded character mappings. A more correct
202 implementation would be to use that table and make a char-foldcase
205 static SCM
scm_i_char_ci_eq_p (SCM x
, SCM y
, SCM rest
);
206 SCM_DEFINE (scm_i_char_ci_eq_p
, "char-ci=?", 0, 2, 1,
207 (SCM x
, SCM y
, SCM rest
),
208 "Return @code{#t} if the case-folded Unicode code point of @var{x} is\n"
209 "the same as the case-folded code point of @var{y}, else @code{#f}.")
210 #define FUNC_NAME s_scm_i_char_ci_eq_p
212 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
214 while (!scm_is_null (rest
))
216 if (scm_is_false (scm_char_ci_eq_p (x
, y
)))
220 rest
= scm_cdr (rest
);
222 return scm_char_ci_eq_p (x
, y
);
226 SCM
scm_char_ci_eq_p (SCM x
, SCM y
)
227 #define FUNC_NAME s_scm_i_char_ci_eq_p
229 SCM_VALIDATE_CHAR (1, x
);
230 SCM_VALIDATE_CHAR (2, y
);
231 return scm_from_bool (scm_c_upcase(SCM_CHAR(x
))==scm_c_upcase(SCM_CHAR(y
)));
235 static SCM
scm_i_char_ci_less_p (SCM x
, SCM y
, SCM rest
);
236 SCM_DEFINE (scm_i_char_ci_less_p
, "char-ci<?", 0, 2, 1,
237 (SCM x
, SCM y
, SCM rest
),
238 "Return @code{#t} if the case-folded Unicode code point of @var{x} is\n"
239 "less than the case-folded code point of @var{y}, else @code{#f}.")
240 #define FUNC_NAME s_scm_i_char_ci_less_p
242 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
244 while (!scm_is_null (rest
))
246 if (scm_is_false (scm_char_ci_less_p (x
, y
)))
250 rest
= scm_cdr (rest
);
252 return scm_char_ci_less_p (x
, y
);
256 SCM
scm_char_ci_less_p (SCM x
, SCM y
)
257 #define FUNC_NAME s_scm_i_char_ci_less_p
259 SCM_VALIDATE_CHAR (1, x
);
260 SCM_VALIDATE_CHAR (2, y
);
261 return scm_from_bool ((scm_c_upcase(SCM_CHAR(x
))) < scm_c_upcase(SCM_CHAR(y
)));
265 static SCM
scm_i_char_ci_leq_p (SCM x
, SCM y
, SCM rest
);
266 SCM_DEFINE (scm_i_char_ci_leq_p
, "char-ci<=?", 0, 2, 1,
267 (SCM x
, SCM y
, SCM rest
),
268 "Return @code{#t} iff the case-folded Unicode code point of @var{x} is\n"
269 "less than or equal to the case-folded code point of @var{y}, else\n"
271 #define FUNC_NAME s_scm_i_char_ci_leq_p
273 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
275 while (!scm_is_null (rest
))
277 if (scm_is_false (scm_char_ci_leq_p (x
, y
)))
281 rest
= scm_cdr (rest
);
283 return scm_char_ci_leq_p (x
, y
);
287 SCM
scm_char_ci_leq_p (SCM x
, SCM y
)
288 #define FUNC_NAME s_scm_i_char_ci_leq_p
290 SCM_VALIDATE_CHAR (1, x
);
291 SCM_VALIDATE_CHAR (2, y
);
292 return scm_from_bool (scm_c_upcase(SCM_CHAR(x
)) <= scm_c_upcase(SCM_CHAR(y
)));
296 static SCM
scm_i_char_ci_gr_p (SCM x
, SCM y
, SCM rest
);
297 SCM_DEFINE (scm_i_char_ci_gr_p
, "char-ci>?", 0, 2, 1,
298 (SCM x
, SCM y
, SCM rest
),
299 "Return @code{#t} iff the case-folded code point of @var{x} is greater\n"
300 "than the case-folded code point of @var{y}, else @code{#f}.")
301 #define FUNC_NAME s_scm_i_char_ci_gr_p
303 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
305 while (!scm_is_null (rest
))
307 if (scm_is_false (scm_char_ci_gr_p (x
, y
)))
311 rest
= scm_cdr (rest
);
313 return scm_char_ci_gr_p (x
, y
);
317 SCM
scm_char_ci_gr_p (SCM x
, SCM y
)
318 #define FUNC_NAME s_scm_i_char_ci_gr_p
320 SCM_VALIDATE_CHAR (1, x
);
321 SCM_VALIDATE_CHAR (2, y
);
322 return scm_from_bool (scm_c_upcase(SCM_CHAR(x
)) > scm_c_upcase(SCM_CHAR(y
)));
326 static SCM
scm_i_char_ci_geq_p (SCM x
, SCM y
, SCM rest
);
327 SCM_DEFINE (scm_i_char_ci_geq_p
, "char-ci>=?", 0, 2, 1,
328 (SCM x
, SCM y
, SCM rest
),
329 "Return @code{#t} iff the case-folded Unicode code point of @var{x} is\n"
330 "greater than or equal to the case-folded code point of @var{y}, else\n"
332 #define FUNC_NAME s_scm_i_char_ci_geq_p
334 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
336 while (!scm_is_null (rest
))
338 if (scm_is_false (scm_char_ci_geq_p (x
, y
)))
342 rest
= scm_cdr (rest
);
344 return scm_char_ci_geq_p (x
, y
);
348 SCM
scm_char_ci_geq_p (SCM x
, SCM y
)
349 #define FUNC_NAME s_scm_i_char_ci_geq_p
351 SCM_VALIDATE_CHAR (1, x
);
352 SCM_VALIDATE_CHAR (2, y
);
353 return scm_from_bool (scm_c_upcase(SCM_CHAR(x
)) >= scm_c_upcase(SCM_CHAR(y
)));
358 SCM_DEFINE (scm_char_alphabetic_p
, "char-alphabetic?", 1, 0, 0,
360 "Return @code{#t} iff @var{chr} is alphabetic, else @code{#f}.\n")
361 #define FUNC_NAME s_scm_char_alphabetic_p
363 return scm_char_set_contains_p (scm_char_set_letter
, chr
);
367 SCM_DEFINE (scm_char_numeric_p
, "char-numeric?", 1, 0, 0,
369 "Return @code{#t} iff @var{chr} is numeric, else @code{#f}.\n")
370 #define FUNC_NAME s_scm_char_numeric_p
372 return scm_char_set_contains_p (scm_char_set_digit
, chr
);
376 SCM_DEFINE (scm_char_whitespace_p
, "char-whitespace?", 1, 0, 0,
378 "Return @code{#t} iff @var{chr} is whitespace, else @code{#f}.\n")
379 #define FUNC_NAME s_scm_char_whitespace_p
381 return scm_char_set_contains_p (scm_char_set_whitespace
, chr
);
386 SCM_DEFINE (scm_char_upper_case_p
, "char-upper-case?", 1, 0, 0,
388 "Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.\n")
389 #define FUNC_NAME s_scm_char_upper_case_p
391 return scm_char_set_contains_p (scm_char_set_upper_case
, chr
);
396 SCM_DEFINE (scm_char_lower_case_p
, "char-lower-case?", 1, 0, 0,
398 "Return @code{#t} iff @var{chr} is lowercase, else @code{#f}.\n")
399 #define FUNC_NAME s_scm_char_lower_case_p
401 return scm_char_set_contains_p (scm_char_set_lower_case
, chr
);
405 SCM_DEFINE (scm_char_is_both_p
, "char-is-both?", 1, 0, 0,
407 "Return @code{#t} iff @var{chr} is either uppercase or lowercase, else\n"
409 #define FUNC_NAME s_scm_char_is_both_p
411 if (scm_is_true (scm_char_set_contains_p (scm_char_set_lower_case
, chr
)))
413 return scm_char_set_contains_p (scm_char_set_upper_case
, chr
);
418 SCM_DEFINE (scm_char_to_integer
, "char->integer", 1, 0, 0,
420 "Return the Unicode code point of @var{chr}.")
421 #define FUNC_NAME s_scm_char_to_integer
423 SCM_VALIDATE_CHAR (1, chr
);
424 return scm_from_uint32 (SCM_CHAR(chr
));
429 SCM_DEFINE (scm_integer_to_char
, "integer->char", 1, 0, 0,
431 "Return the character that has Unicode code point @var{n}. The integer\n"
432 "@var{n} must be a valid code point. Valid code points are in the\n"
433 "ranges 0 to @code{#xD7FF} inclusive or @code{#xE000} to\n"
434 "@code{#x10FFFF} inclusive.")
435 #define FUNC_NAME s_scm_integer_to_char
439 cn
= scm_to_wchar (n
);
441 /* Avoid the surrogates. */
442 if (!SCM_IS_UNICODE_CHAR (cn
))
443 scm_out_of_range (FUNC_NAME
, n
);
445 return SCM_MAKE_CHAR (cn
);
450 SCM_DEFINE (scm_char_upcase
, "char-upcase", 1, 0, 0,
452 "Return the uppercase character version of @var{chr}.")
453 #define FUNC_NAME s_scm_char_upcase
455 SCM_VALIDATE_CHAR (1, chr
);
456 return SCM_MAKE_CHAR (scm_c_upcase (SCM_CHAR (chr
)));
461 SCM_DEFINE (scm_char_downcase
, "char-downcase", 1, 0, 0,
463 "Return the lowercase character version of @var{chr}.")
464 #define FUNC_NAME s_scm_char_downcase
466 SCM_VALIDATE_CHAR (1, chr
);
467 return SCM_MAKE_CHAR (scm_c_downcase (SCM_CHAR(chr
)));
471 SCM_DEFINE (scm_char_titlecase
, "char-titlecase", 1, 0, 0,
473 "Return the titlecase character version of @var{chr}.")
474 #define FUNC_NAME s_scm_char_titlecase
476 SCM_VALIDATE_CHAR (1, chr
);
477 return SCM_MAKE_CHAR (scm_c_titlecase (SCM_CHAR(chr
)));
481 SCM_DEFINE (scm_char_general_category
, "char-general-category", 1, 0, 0,
483 "Return a symbol representing the Unicode general category of "
484 "@var{chr} or @code{#f} if a named category cannot be found.")
485 #define FUNC_NAME s_scm_char_general_category
488 uc_general_category_t cat
;
490 SCM_VALIDATE_CHAR (1, chr
);
491 cat
= uc_general_category (SCM_CHAR (chr
));
492 sym
= uc_general_category_name (cat
);
495 return scm_from_utf8_symbol (sym
);
505 TODO: change name to scm_i_.. ? --hwn
510 scm_c_upcase (scm_t_wchar c
)
512 return uc_toupper ((int) c
);
517 scm_c_downcase (scm_t_wchar c
)
519 return uc_tolower ((int) c
);
523 scm_c_titlecase (scm_t_wchar c
)
525 return uc_totitle ((int) c
);
530 /* There are a few sets of character names: R5RS, Guile
531 extensions for control characters, and leftover Guile extensions.
532 They are listed in order of precedence. */
534 static const char *const scm_r5rs_charnames
[] = {
538 static const scm_t_uint32
const scm_r5rs_charnums
[] = {
542 #define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *))
544 static const char *const scm_r6rs_charnames
[] = {
545 "nul", "alarm", "backspace", "tab", "linefeed", "vtab", "page",
546 "return", "esc", "delete"
547 /* 'space' and 'newline' are already included from the R5RS list. */
550 static const scm_t_uint32
const scm_r6rs_charnums
[] = {
551 0x00, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
555 #define SCM_N_R6RS_CHARNAMES (sizeof (scm_r6rs_charnames) / sizeof (char *))
557 /* The abbreviated names for control characters. */
558 static const char *const scm_C0_control_charnames
[] = {
560 "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
561 "bs", "ht", "lf", "vt", "ff", "cr", "so", "si",
562 "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
563 "can", "em", "sub", "esc", "fs", "gs", "rs", "us",
567 static const scm_t_uint32
const scm_C0_control_charnums
[] = {
568 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
569 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
570 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
571 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
575 #define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *))
577 static const char *const scm_alt_charnames
[] = {
581 static const scm_t_uint32
const scm_alt_charnums
[] = {
585 #define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
587 /* Returns the string charname for a character if it exists, or NULL
590 scm_i_charname (SCM chr
)
593 scm_t_uint32 i
= SCM_CHAR (chr
);
595 for (c
= 0; c
< SCM_N_R5RS_CHARNAMES
; c
++)
596 if (scm_r5rs_charnums
[c
] == i
)
597 return scm_r5rs_charnames
[c
];
599 for (c
= 0; c
< SCM_N_R6RS_CHARNAMES
; c
++)
600 if (scm_r6rs_charnums
[c
] == i
)
601 return scm_r6rs_charnames
[c
];
603 for (c
= 0; c
< SCM_N_C0_CONTROL_CHARNAMES
; c
++)
604 if (scm_C0_control_charnums
[c
] == i
)
605 return scm_C0_control_charnames
[c
];
607 /* Since the characters in scm_alt_charnums is a subset of
608 scm_C0_control_charnums, this code is never reached. */
609 for (c
= 0; c
< SCM_N_ALT_CHARNAMES
; c
++)
610 if (scm_alt_charnums
[c
] == i
)
611 return scm_alt_charnames
[c
];
616 /* Return a character from a string charname. */
618 scm_i_charname_to_char (const char *charname
, size_t charname_len
)
622 /* The R5RS charnames. These are supposed to be case insensitive. */
623 for (c
= 0; c
< SCM_N_R5RS_CHARNAMES
; c
++)
624 if ((strlen (scm_r5rs_charnames
[c
]) == charname_len
)
625 && (!strncasecmp (scm_r5rs_charnames
[c
], charname
, charname_len
)))
626 return SCM_MAKE_CHAR (scm_r5rs_charnums
[c
]);
628 /* The R6RS charnames. R6RS says that these should be case-sensitive. They
629 are left as case-insensitive to avoid confusion. */
630 for (c
= 0; c
< SCM_N_R6RS_CHARNAMES
; c
++)
631 if ((strlen (scm_r6rs_charnames
[c
]) == charname_len
)
632 && (!strncasecmp (scm_r6rs_charnames
[c
], charname
, charname_len
)))
633 return SCM_MAKE_CHAR (scm_r6rs_charnums
[c
]);
635 /* Then come the controls. By Guile convention, these are not case
637 for (c
= 0; c
< SCM_N_C0_CONTROL_CHARNAMES
; c
++)
638 if ((strlen (scm_C0_control_charnames
[c
]) == charname_len
)
639 && (!strncasecmp (scm_C0_control_charnames
[c
], charname
, charname_len
)))
640 return SCM_MAKE_CHAR (scm_C0_control_charnums
[c
]);
642 /* Lastly are some old names carried over for compatibility. */
643 for (c
= 0; c
< SCM_N_ALT_CHARNAMES
; c
++)
644 if ((strlen (scm_alt_charnames
[c
]) == charname_len
)
645 && (!strncasecmp (scm_alt_charnames
[c
], charname
, charname_len
)))
646 return SCM_MAKE_CHAR (scm_alt_charnums
[c
]);
657 #include "libguile/chars.x"