1 /* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009,
2 * 2010, 2011, 2014 Free Software Foundation, Inc.
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 #include "libguile/_scm.h"
32 #include "libguile/validate.h"
34 #include "libguile/chars.h"
35 #include "libguile/srfi-14.h"
39 SCM_DEFINE (scm_char_p
, "char?", 1, 0, 0,
41 "Return @code{#t} iff @var{x} is a character, else @code{#f}.")
42 #define FUNC_NAME s_scm_char_p
44 return scm_from_bool (SCM_CHARP(x
));
48 static SCM
scm_i_char_eq_p (SCM x
, SCM y
, SCM rest
);
49 SCM_DEFINE (scm_i_char_eq_p
, "char=?", 0, 2, 1,
50 (SCM x
, SCM y
, SCM rest
),
51 "Return @code{#t} if the Unicode code point of @var{x} is equal to the\n"
52 "code point of @var{y}, else @code{#f}.\n")
53 #define FUNC_NAME s_scm_i_char_eq_p
55 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
57 while (!scm_is_null (rest
))
59 if (scm_is_false (scm_char_eq_p (x
, y
)))
63 rest
= scm_cdr (rest
);
65 return scm_char_eq_p (x
, y
);
69 SCM
scm_char_eq_p (SCM x
, SCM y
)
70 #define FUNC_NAME s_scm_i_char_eq_p
72 SCM_VALIDATE_CHAR (1, x
);
73 SCM_VALIDATE_CHAR (2, y
);
74 return scm_from_bool (scm_is_eq (x
, y
));
79 static SCM
scm_i_char_less_p (SCM x
, SCM y
, SCM rest
);
80 SCM_DEFINE (scm_i_char_less_p
, "char<?", 0, 2, 1,
81 (SCM x
, SCM y
, SCM rest
),
82 "Return @code{#t} iff the code point of @var{x} is less than the code\n"
83 "point of @var{y}, else @code{#f}.")
84 #define FUNC_NAME s_scm_i_char_less_p
86 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
88 while (!scm_is_null (rest
))
90 if (scm_is_false (scm_char_less_p (x
, y
)))
94 rest
= scm_cdr (rest
);
96 return scm_char_less_p (x
, y
);
100 SCM
scm_char_less_p (SCM x
, SCM y
)
101 #define FUNC_NAME s_scm_i_char_less_p
103 SCM_VALIDATE_CHAR (1, x
);
104 SCM_VALIDATE_CHAR (2, y
);
105 return scm_from_bool (SCM_CHAR(x
) < SCM_CHAR(y
));
109 static SCM
scm_i_char_leq_p (SCM x
, SCM y
, SCM rest
);
110 SCM_DEFINE (scm_i_char_leq_p
, "char<=?", 0, 2, 1,
111 (SCM x
, SCM y
, SCM rest
),
112 "Return @code{#t} if the Unicode code point of @var{x} is less than or\n"
113 "equal to the code point of @var{y}, else @code{#f}.")
114 #define FUNC_NAME s_scm_i_char_leq_p
116 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
118 while (!scm_is_null (rest
))
120 if (scm_is_false (scm_char_leq_p (x
, y
)))
124 rest
= scm_cdr (rest
);
126 return scm_char_leq_p (x
, y
);
130 SCM
scm_char_leq_p (SCM x
, SCM y
)
131 #define FUNC_NAME s_scm_i_char_leq_p
133 SCM_VALIDATE_CHAR (1, x
);
134 SCM_VALIDATE_CHAR (2, y
);
135 return scm_from_bool (SCM_CHAR(x
) <= SCM_CHAR(y
));
139 static SCM
scm_i_char_gr_p (SCM x
, SCM y
, SCM rest
);
140 SCM_DEFINE (scm_i_char_gr_p
, "char>?", 0, 2, 1,
141 (SCM x
, SCM y
, SCM rest
),
142 "Return @code{#t} if the Unicode code point of @var{x} is greater than\n"
143 "the code point of @var{y}, else @code{#f}.")
144 #define FUNC_NAME s_scm_i_char_gr_p
146 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
148 while (!scm_is_null (rest
))
150 if (scm_is_false (scm_char_gr_p (x
, y
)))
154 rest
= scm_cdr (rest
);
156 return scm_char_gr_p (x
, y
);
160 SCM
scm_char_gr_p (SCM x
, SCM y
)
161 #define FUNC_NAME s_scm_i_char_gr_p
163 SCM_VALIDATE_CHAR (1, x
);
164 SCM_VALIDATE_CHAR (2, y
);
165 return scm_from_bool (SCM_CHAR(x
) > SCM_CHAR(y
));
169 static SCM
scm_i_char_geq_p (SCM x
, SCM y
, SCM rest
);
170 SCM_DEFINE (scm_i_char_geq_p
, "char>=?", 0, 2, 1,
171 (SCM x
, SCM y
, SCM rest
),
172 "Return @code{#t} if the Unicode code point of @var{x} is greater than\n"
173 "or equal to the code point of @var{y}, else @code{#f}.")
174 #define FUNC_NAME s_scm_i_char_geq_p
176 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
178 while (!scm_is_null (rest
))
180 if (scm_is_false (scm_char_geq_p (x
, y
)))
184 rest
= scm_cdr (rest
);
186 return scm_char_geq_p (x
, y
);
190 SCM
scm_char_geq_p (SCM x
, SCM y
)
191 #define FUNC_NAME s_scm_i_char_geq_p
193 SCM_VALIDATE_CHAR (1, x
);
194 SCM_VALIDATE_CHAR (2, y
);
195 return scm_from_bool (SCM_CHAR(x
) >= SCM_CHAR(y
));
199 /* FIXME?: R6RS specifies that these comparisons are case-folded.
200 This is the same thing as comparing the uppercase characters in
201 practice, but, not in theory. Unicode has table containing their
202 definition of case-folded character mappings. A more correct
203 implementation would be to use that table and make a char-foldcase
206 static SCM
scm_i_char_ci_eq_p (SCM x
, SCM y
, SCM rest
);
207 SCM_DEFINE (scm_i_char_ci_eq_p
, "char-ci=?", 0, 2, 1,
208 (SCM x
, SCM y
, SCM rest
),
209 "Return @code{#t} if the case-folded Unicode code point of @var{x} is\n"
210 "the same as the case-folded code point of @var{y}, else @code{#f}.")
211 #define FUNC_NAME s_scm_i_char_ci_eq_p
213 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
215 while (!scm_is_null (rest
))
217 if (scm_is_false (scm_char_ci_eq_p (x
, y
)))
221 rest
= scm_cdr (rest
);
223 return scm_char_ci_eq_p (x
, y
);
227 SCM
scm_char_ci_eq_p (SCM x
, SCM y
)
228 #define FUNC_NAME s_scm_i_char_ci_eq_p
230 SCM_VALIDATE_CHAR (1, x
);
231 SCM_VALIDATE_CHAR (2, y
);
232 return scm_from_bool (scm_c_upcase(SCM_CHAR(x
))==scm_c_upcase(SCM_CHAR(y
)));
236 static SCM
scm_i_char_ci_less_p (SCM x
, SCM y
, SCM rest
);
237 SCM_DEFINE (scm_i_char_ci_less_p
, "char-ci<?", 0, 2, 1,
238 (SCM x
, SCM y
, SCM rest
),
239 "Return @code{#t} if the case-folded Unicode code point of @var{x} is\n"
240 "less than the case-folded code point of @var{y}, else @code{#f}.")
241 #define FUNC_NAME s_scm_i_char_ci_less_p
243 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
245 while (!scm_is_null (rest
))
247 if (scm_is_false (scm_char_ci_less_p (x
, y
)))
251 rest
= scm_cdr (rest
);
253 return scm_char_ci_less_p (x
, y
);
257 SCM
scm_char_ci_less_p (SCM x
, SCM y
)
258 #define FUNC_NAME s_scm_i_char_ci_less_p
260 SCM_VALIDATE_CHAR (1, x
);
261 SCM_VALIDATE_CHAR (2, y
);
262 return scm_from_bool ((scm_c_upcase(SCM_CHAR(x
))) < scm_c_upcase(SCM_CHAR(y
)));
266 static SCM
scm_i_char_ci_leq_p (SCM x
, SCM y
, SCM rest
);
267 SCM_DEFINE (scm_i_char_ci_leq_p
, "char-ci<=?", 0, 2, 1,
268 (SCM x
, SCM y
, SCM rest
),
269 "Return @code{#t} iff the case-folded Unicode code point of @var{x} is\n"
270 "less than or equal to the case-folded code point of @var{y}, else\n"
272 #define FUNC_NAME s_scm_i_char_ci_leq_p
274 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
276 while (!scm_is_null (rest
))
278 if (scm_is_false (scm_char_ci_leq_p (x
, y
)))
282 rest
= scm_cdr (rest
);
284 return scm_char_ci_leq_p (x
, y
);
288 SCM
scm_char_ci_leq_p (SCM x
, SCM y
)
289 #define FUNC_NAME s_scm_i_char_ci_leq_p
291 SCM_VALIDATE_CHAR (1, x
);
292 SCM_VALIDATE_CHAR (2, y
);
293 return scm_from_bool (scm_c_upcase(SCM_CHAR(x
)) <= scm_c_upcase(SCM_CHAR(y
)));
297 static SCM
scm_i_char_ci_gr_p (SCM x
, SCM y
, SCM rest
);
298 SCM_DEFINE (scm_i_char_ci_gr_p
, "char-ci>?", 0, 2, 1,
299 (SCM x
, SCM y
, SCM rest
),
300 "Return @code{#t} iff the case-folded code point of @var{x} is greater\n"
301 "than the case-folded code point of @var{y}, else @code{#f}.")
302 #define FUNC_NAME s_scm_i_char_ci_gr_p
304 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
306 while (!scm_is_null (rest
))
308 if (scm_is_false (scm_char_ci_gr_p (x
, y
)))
312 rest
= scm_cdr (rest
);
314 return scm_char_ci_gr_p (x
, y
);
318 SCM
scm_char_ci_gr_p (SCM x
, SCM y
)
319 #define FUNC_NAME s_scm_i_char_ci_gr_p
321 SCM_VALIDATE_CHAR (1, x
);
322 SCM_VALIDATE_CHAR (2, y
);
323 return scm_from_bool (scm_c_upcase(SCM_CHAR(x
)) > scm_c_upcase(SCM_CHAR(y
)));
327 static SCM
scm_i_char_ci_geq_p (SCM x
, SCM y
, SCM rest
);
328 SCM_DEFINE (scm_i_char_ci_geq_p
, "char-ci>=?", 0, 2, 1,
329 (SCM x
, SCM y
, SCM rest
),
330 "Return @code{#t} iff the case-folded Unicode code point of @var{x} is\n"
331 "greater than or equal to the case-folded code point of @var{y}, else\n"
333 #define FUNC_NAME s_scm_i_char_ci_geq_p
335 if (SCM_UNBNDP (x
) || SCM_UNBNDP (y
))
337 while (!scm_is_null (rest
))
339 if (scm_is_false (scm_char_ci_geq_p (x
, y
)))
343 rest
= scm_cdr (rest
);
345 return scm_char_ci_geq_p (x
, y
);
349 SCM
scm_char_ci_geq_p (SCM x
, SCM y
)
350 #define FUNC_NAME s_scm_i_char_ci_geq_p
352 SCM_VALIDATE_CHAR (1, x
);
353 SCM_VALIDATE_CHAR (2, y
);
354 return scm_from_bool (scm_c_upcase(SCM_CHAR(x
)) >= scm_c_upcase(SCM_CHAR(y
)));
359 SCM_DEFINE (scm_char_alphabetic_p
, "char-alphabetic?", 1, 0, 0,
361 "Return @code{#t} iff @var{chr} is alphabetic, else @code{#f}.\n")
362 #define FUNC_NAME s_scm_char_alphabetic_p
364 return scm_char_set_contains_p (scm_char_set_letter
, chr
);
368 SCM_DEFINE (scm_char_numeric_p
, "char-numeric?", 1, 0, 0,
370 "Return @code{#t} iff @var{chr} is numeric, else @code{#f}.\n")
371 #define FUNC_NAME s_scm_char_numeric_p
373 return scm_char_set_contains_p (scm_char_set_digit
, chr
);
377 SCM_DEFINE (scm_char_whitespace_p
, "char-whitespace?", 1, 0, 0,
379 "Return @code{#t} iff @var{chr} is whitespace, else @code{#f}.\n")
380 #define FUNC_NAME s_scm_char_whitespace_p
382 return scm_char_set_contains_p (scm_char_set_whitespace
, chr
);
387 SCM_DEFINE (scm_char_upper_case_p
, "char-upper-case?", 1, 0, 0,
389 "Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.\n")
390 #define FUNC_NAME s_scm_char_upper_case_p
392 return scm_char_set_contains_p (scm_char_set_upper_case
, chr
);
397 SCM_DEFINE (scm_char_lower_case_p
, "char-lower-case?", 1, 0, 0,
399 "Return @code{#t} iff @var{chr} is lowercase, else @code{#f}.\n")
400 #define FUNC_NAME s_scm_char_lower_case_p
402 return scm_char_set_contains_p (scm_char_set_lower_case
, chr
);
406 SCM_DEFINE (scm_char_is_both_p
, "char-is-both?", 1, 0, 0,
408 "Return @code{#t} iff @var{chr} is either uppercase or lowercase, else\n"
410 #define FUNC_NAME s_scm_char_is_both_p
412 if (scm_is_true (scm_char_set_contains_p (scm_char_set_lower_case
, chr
)))
414 return scm_char_set_contains_p (scm_char_set_upper_case
, chr
);
419 SCM_DEFINE (scm_char_to_integer
, "char->integer", 1, 0, 0,
421 "Return the Unicode code point of @var{chr}.")
422 #define FUNC_NAME s_scm_char_to_integer
424 SCM_VALIDATE_CHAR (1, chr
);
425 return scm_from_uint32 (SCM_CHAR(chr
));
430 SCM_DEFINE (scm_integer_to_char
, "integer->char", 1, 0, 0,
432 "Return the character that has Unicode code point @var{n}. The integer\n"
433 "@var{n} must be a valid code point. Valid code points are in the\n"
434 "ranges 0 to @code{#xD7FF} inclusive or @code{#xE000} to\n"
435 "@code{#x10FFFF} inclusive.")
436 #define FUNC_NAME s_scm_integer_to_char
440 cn
= scm_to_wchar (n
);
442 /* Avoid the surrogates. */
443 if (!SCM_IS_UNICODE_CHAR (cn
))
444 scm_out_of_range (FUNC_NAME
, n
);
446 return SCM_MAKE_CHAR (cn
);
451 SCM_DEFINE (scm_char_upcase
, "char-upcase", 1, 0, 0,
453 "Return the uppercase character version of @var{chr}.")
454 #define FUNC_NAME s_scm_char_upcase
456 SCM_VALIDATE_CHAR (1, chr
);
457 return SCM_MAKE_CHAR (scm_c_upcase (SCM_CHAR (chr
)));
462 SCM_DEFINE (scm_char_downcase
, "char-downcase", 1, 0, 0,
464 "Return the lowercase character version of @var{chr}.")
465 #define FUNC_NAME s_scm_char_downcase
467 SCM_VALIDATE_CHAR (1, chr
);
468 return SCM_MAKE_CHAR (scm_c_downcase (SCM_CHAR(chr
)));
472 SCM_DEFINE (scm_char_titlecase
, "char-titlecase", 1, 0, 0,
474 "Return the titlecase character version of @var{chr}.")
475 #define FUNC_NAME s_scm_char_titlecase
477 SCM_VALIDATE_CHAR (1, chr
);
478 return SCM_MAKE_CHAR (scm_c_titlecase (SCM_CHAR(chr
)));
482 SCM_DEFINE (scm_char_general_category
, "char-general-category", 1, 0, 0,
484 "Return a symbol representing the Unicode general category of "
485 "@var{chr} or @code{#f} if a named category cannot be found.")
486 #define FUNC_NAME s_scm_char_general_category
489 uc_general_category_t cat
;
491 SCM_VALIDATE_CHAR (1, chr
);
492 cat
= uc_general_category (SCM_CHAR (chr
));
493 sym
= uc_general_category_name (cat
);
496 return scm_from_utf8_symbol (sym
);
506 TODO: change name to scm_i_.. ? --hwn
511 scm_c_upcase (scm_t_wchar c
)
513 return uc_toupper ((int) c
);
518 scm_c_downcase (scm_t_wchar c
)
520 return uc_tolower ((int) c
);
524 scm_c_titlecase (scm_t_wchar c
)
526 return uc_totitle ((int) c
);
531 /* There are a few sets of character names: R5RS, Guile
532 extensions for control characters, and leftover Guile extensions.
533 They are listed in order of precedence. */
535 static const char *const scm_r5rs_charnames
[] = {
539 static const scm_t_uint32
const scm_r5rs_charnums
[] = {
543 #define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *))
545 static const char *const scm_r6rs_charnames
[] = {
546 "nul", "alarm", "backspace", "tab", "linefeed", "vtab", "page",
547 "return", "esc", "delete"
548 /* 'space' and 'newline' are already included from the R5RS list. */
551 static const scm_t_uint32
const scm_r6rs_charnums
[] = {
552 0x00, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
556 #define SCM_N_R6RS_CHARNAMES (sizeof (scm_r6rs_charnames) / sizeof (char *))
558 static const char *const scm_r7rs_charnames
[] = {
562 static const scm_t_uint32
const scm_r7rs_charnums
[] = {
566 #define SCM_N_R7RS_CHARNAMES (sizeof (scm_r7rs_charnames) / sizeof (char *))
568 /* The abbreviated names for control characters. */
569 static const char *const scm_C0_control_charnames
[] = {
571 "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
572 "bs", "ht", "lf", "vt", "ff", "cr", "so", "si",
573 "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
574 "can", "em", "sub", "esc", "fs", "gs", "rs", "us",
578 static const scm_t_uint32
const scm_C0_control_charnums
[] = {
579 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
580 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
581 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
582 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
586 #define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *))
588 static const char *const scm_alt_charnames
[] = {
592 static const scm_t_uint32
const scm_alt_charnums
[] = {
596 #define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
598 /* Returns the string charname for a character if it exists, or NULL
601 scm_i_charname (SCM chr
)
604 scm_t_uint32 i
= SCM_CHAR (chr
);
606 for (c
= 0; c
< SCM_N_R5RS_CHARNAMES
; c
++)
607 if (scm_r5rs_charnums
[c
] == i
)
608 return scm_r5rs_charnames
[c
];
610 for (c
= 0; c
< SCM_N_R6RS_CHARNAMES
; c
++)
611 if (scm_r6rs_charnums
[c
] == i
)
612 return scm_r6rs_charnames
[c
];
614 for (c
= 0; c
< SCM_N_R7RS_CHARNAMES
; c
++)
615 if (scm_r7rs_charnums
[c
] == i
)
616 return scm_r7rs_charnames
[c
];
618 for (c
= 0; c
< SCM_N_C0_CONTROL_CHARNAMES
; c
++)
619 if (scm_C0_control_charnums
[c
] == i
)
620 return scm_C0_control_charnames
[c
];
622 /* Since the characters in scm_alt_charnums is a subset of
623 scm_C0_control_charnums, this code is never reached. */
624 for (c
= 0; c
< SCM_N_ALT_CHARNAMES
; c
++)
625 if (scm_alt_charnums
[c
] == i
)
626 return scm_alt_charnames
[c
];
631 /* Return a character from a string charname. */
633 scm_i_charname_to_char (const char *charname
, size_t charname_len
)
637 /* The R5RS charnames. These are supposed to be case insensitive. */
638 for (c
= 0; c
< SCM_N_R5RS_CHARNAMES
; c
++)
639 if ((strlen (scm_r5rs_charnames
[c
]) == charname_len
)
640 && (!strncasecmp (scm_r5rs_charnames
[c
], charname
, charname_len
)))
641 return SCM_MAKE_CHAR (scm_r5rs_charnums
[c
]);
643 /* The R6RS charnames. R6RS says that these should be case-sensitive.
644 They are left as case-insensitive to avoid confusion. */
645 for (c
= 0; c
< SCM_N_R6RS_CHARNAMES
; c
++)
646 if ((strlen (scm_r6rs_charnames
[c
]) == charname_len
)
647 && (!strncasecmp (scm_r6rs_charnames
[c
], charname
, charname_len
)))
648 return SCM_MAKE_CHAR (scm_r6rs_charnums
[c
]);
650 /* The R7RS charnames. R7RS says that these should be case-sensitive.
651 They are left as case-insensitive to avoid confusion. */
652 for (c
= 0; c
< SCM_N_R7RS_CHARNAMES
; c
++)
653 if ((strlen (scm_r7rs_charnames
[c
]) == charname_len
)
654 && (!strncasecmp (scm_r7rs_charnames
[c
], charname
, charname_len
)))
655 return SCM_MAKE_CHAR (scm_r7rs_charnums
[c
]);
657 /* Then come the controls. By Guile convention, these are not case
659 for (c
= 0; c
< SCM_N_C0_CONTROL_CHARNAMES
; c
++)
660 if ((strlen (scm_C0_control_charnames
[c
]) == charname_len
)
661 && (!strncasecmp (scm_C0_control_charnames
[c
], charname
, charname_len
)))
662 return SCM_MAKE_CHAR (scm_C0_control_charnums
[c
]);
664 /* Lastly are some old names carried over for compatibility. */
665 for (c
= 0; c
< SCM_N_ALT_CHARNAMES
; c
++)
666 if ((strlen (scm_alt_charnames
[c
]) == charname_len
)
667 && (!strncasecmp (scm_alt_charnames
[c
], charname
, charname_len
)))
668 return SCM_MAKE_CHAR (scm_alt_charnums
[c
]);
679 #include "libguile/chars.x"