Update docs and docstrings for Unicode characters
[bpt/guile.git] / libguile / chars.c
CommitLineData
904a78f1 1/* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e 3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
0f2d19dd 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd 20\f
dbb605f5
LC
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
0f2d19dd 24
0f2d19dd 25#include <ctype.h>
465380de 26#include <limits.h>
904a78f1
MG
27#include <unicase.h>
28
a0599745
MD
29#include "libguile/_scm.h"
30#include "libguile/validate.h"
0f2d19dd 31
a0599745 32#include "libguile/chars.h"
1fdbbd4c
MV
33#include "libguile/srfi-14.h"
34
0f2d19dd
JB
35\f
36
a1ec6916 37SCM_DEFINE (scm_char_p, "char?", 1, 0, 0,
1bbd0b84 38 (SCM x),
cdbc7418 39 "Return @code{#t} iff @var{x} is a character, else @code{#f}.")
1bbd0b84 40#define FUNC_NAME s_scm_char_p
0f2d19dd 41{
7888309b 42 return scm_from_bool (SCM_CHARP(x));
0f2d19dd 43}
1bbd0b84 44#undef FUNC_NAME
0f2d19dd 45
c3ee7520 46SCM_DEFINE1 (scm_char_eq_p, "char=?", scm_tc7_rpsubr,
1bbd0b84 47 (SCM x, SCM y),
bb15a36c
MG
48 "Return @code{#t} if the Unicode code point of @var{x} is equal to the\n"
49 "code point of @var{y}, else @code{#f}.\n")
1bbd0b84 50#define FUNC_NAME s_scm_char_eq_p
0f2d19dd 51{
362306b9
DH
52 SCM_VALIDATE_CHAR (1, x);
53 SCM_VALIDATE_CHAR (2, y);
bc36d050 54 return scm_from_bool (scm_is_eq (x, y));
0f2d19dd 55}
1bbd0b84 56#undef FUNC_NAME
0f2d19dd
JB
57
58
c3ee7520 59SCM_DEFINE1 (scm_char_less_p, "char<?", scm_tc7_rpsubr,
1bbd0b84 60 (SCM x, SCM y),
3f12aedb
MG
61 "Return @code{#t} iff the code point of @var{x} is less than the code\n"
62 "point of @var{y}, else @code{#f}.")
1bbd0b84 63#define FUNC_NAME s_scm_char_less_p
0f2d19dd 64{
34d19ef6
HWN
65 SCM_VALIDATE_CHAR (1, x);
66 SCM_VALIDATE_CHAR (2, y);
7888309b 67 return scm_from_bool (SCM_CHAR(x) < SCM_CHAR(y));
0f2d19dd 68}
1bbd0b84 69#undef FUNC_NAME
0f2d19dd 70
c3ee7520 71SCM_DEFINE1 (scm_char_leq_p, "char<=?", scm_tc7_rpsubr,
1bbd0b84 72 (SCM x, SCM y),
bb15a36c
MG
73 "Return @code{#t} if the Unicode code point of @var{x} is less than or\n"
74 "equal to the code point of @var{y}, else @code{#f}.")
1bbd0b84 75#define FUNC_NAME s_scm_char_leq_p
0f2d19dd 76{
34d19ef6
HWN
77 SCM_VALIDATE_CHAR (1, x);
78 SCM_VALIDATE_CHAR (2, y);
7888309b 79 return scm_from_bool (SCM_CHAR(x) <= SCM_CHAR(y));
0f2d19dd 80}
1bbd0b84 81#undef FUNC_NAME
0f2d19dd 82
c3ee7520 83SCM_DEFINE1 (scm_char_gr_p, "char>?", scm_tc7_rpsubr,
1bbd0b84 84 (SCM x, SCM y),
bb15a36c
MG
85 "Return @code{#t} if the Unicode code point of @var{x} is greater than\n"
86 "the code point of @var{y}, else @code{#f}.")
1bbd0b84 87#define FUNC_NAME s_scm_char_gr_p
0f2d19dd 88{
34d19ef6
HWN
89 SCM_VALIDATE_CHAR (1, x);
90 SCM_VALIDATE_CHAR (2, y);
7888309b 91 return scm_from_bool (SCM_CHAR(x) > SCM_CHAR(y));
0f2d19dd 92}
1bbd0b84 93#undef FUNC_NAME
0f2d19dd 94
c3ee7520 95SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr,
1bbd0b84 96 (SCM x, SCM y),
bb15a36c
MG
97 "Return @code{#t} if the Unicode code point of @var{x} is greater than\n"
98 "or equal to the code point of @var{y}, else @code{#f}.")
1bbd0b84 99#define FUNC_NAME s_scm_char_geq_p
0f2d19dd 100{
34d19ef6
HWN
101 SCM_VALIDATE_CHAR (1, x);
102 SCM_VALIDATE_CHAR (2, y);
7888309b 103 return scm_from_bool (SCM_CHAR(x) >= SCM_CHAR(y));
0f2d19dd 104}
1bbd0b84 105#undef FUNC_NAME
0f2d19dd 106
3f12aedb
MG
107/* FIXME?: R6RS specifies that these comparisons are case-folded.
108 This is the same thing as comparing the uppercase characters in
109 practice, but, not in theory. Unicode has table containing their
110 definition of case-folded character mappings. A more correct
111 implementation would be to use that table and make a char-foldcase
112 function. */
113
c3ee7520 114SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr,
1bbd0b84 115 (SCM x, SCM y),
bb15a36c
MG
116 "Return @code{#t} if the case-folded Unicode code point of @var{x} is\n"
117 "the same as the case-folded code point of @var{y}, else @code{#f}.")
1bbd0b84 118#define FUNC_NAME s_scm_char_ci_eq_p
0f2d19dd 119{
34d19ef6
HWN
120 SCM_VALIDATE_CHAR (1, x);
121 SCM_VALIDATE_CHAR (2, y);
7888309b 122 return scm_from_bool (scm_c_upcase(SCM_CHAR(x))==scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 123}
1bbd0b84 124#undef FUNC_NAME
0f2d19dd 125
c3ee7520 126SCM_DEFINE1 (scm_char_ci_less_p, "char-ci<?", scm_tc7_rpsubr,
1bbd0b84 127 (SCM x, SCM y),
bb15a36c
MG
128 "Return @code{#t} if the case-folded Unicode code point of @var{x} is\n"
129 "less than the case-folded code point of @var{y}, else @code{#f}.")
1bbd0b84 130#define FUNC_NAME s_scm_char_ci_less_p
0f2d19dd 131{
34d19ef6
HWN
132 SCM_VALIDATE_CHAR (1, x);
133 SCM_VALIDATE_CHAR (2, y);
7888309b 134 return scm_from_bool ((scm_c_upcase(SCM_CHAR(x))) < scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 135}
1bbd0b84 136#undef FUNC_NAME
0f2d19dd 137
c3ee7520 138SCM_DEFINE1 (scm_char_ci_leq_p, "char-ci<=?", scm_tc7_rpsubr,
1bbd0b84 139 (SCM x, SCM y),
bb15a36c
MG
140 "Return @code{#t} iff the case-folded Unicodd code point of @var{x} is\n"
141 "less than or equal to the case-folded code point of @var{y}, else\n"
3f12aedb 142 "@code{#f}")
1bbd0b84 143#define FUNC_NAME s_scm_char_ci_leq_p
0f2d19dd 144{
34d19ef6
HWN
145 SCM_VALIDATE_CHAR (1, x);
146 SCM_VALIDATE_CHAR (2, y);
7888309b 147 return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) <= scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 148}
1bbd0b84 149#undef FUNC_NAME
0f2d19dd 150
c3ee7520 151SCM_DEFINE1 (scm_char_ci_gr_p, "char-ci>?", scm_tc7_rpsubr,
1bbd0b84 152 (SCM x, SCM y),
3f12aedb
MG
153 "Return @code{#t} iff the case-folded code point of @var{x} is greater\n"
154 "than the case-folded code point of @var{y}, else @code{#f}.")
1bbd0b84 155#define FUNC_NAME s_scm_char_ci_gr_p
0f2d19dd 156{
34d19ef6
HWN
157 SCM_VALIDATE_CHAR (1, x);
158 SCM_VALIDATE_CHAR (2, y);
7888309b 159 return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) > scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 160}
1bbd0b84 161#undef FUNC_NAME
0f2d19dd 162
c3ee7520 163SCM_DEFINE1 (scm_char_ci_geq_p, "char-ci>=?", scm_tc7_rpsubr,
1bbd0b84 164 (SCM x, SCM y),
bb15a36c
MG
165 "Return @code{#t} iff the case-folded Unicode code point of @var{x} is\n"
166 "greater than or equal to the case-folded code point of @var{y}, else\n"
3f12aedb 167 "@code{#f}.")
1bbd0b84 168#define FUNC_NAME s_scm_char_ci_geq_p
0f2d19dd 169{
34d19ef6
HWN
170 SCM_VALIDATE_CHAR (1, x);
171 SCM_VALIDATE_CHAR (2, y);
7888309b 172 return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) >= scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 173}
1bbd0b84 174#undef FUNC_NAME
0f2d19dd
JB
175
176
3b3b36dd 177SCM_DEFINE (scm_char_alphabetic_p, "char-alphabetic?", 1, 0, 0,
1bbd0b84 178 (SCM chr),
1fdbbd4c 179 "Return @code{#t} iff @var{chr} is alphabetic, else @code{#f}.\n")
1bbd0b84 180#define FUNC_NAME s_scm_char_alphabetic_p
0f2d19dd 181{
1fdbbd4c 182 return scm_char_set_contains_p (scm_char_set_letter, chr);
0f2d19dd 183}
1bbd0b84 184#undef FUNC_NAME
0f2d19dd 185
3b3b36dd 186SCM_DEFINE (scm_char_numeric_p, "char-numeric?", 1, 0, 0,
1bbd0b84 187 (SCM chr),
1fdbbd4c 188 "Return @code{#t} iff @var{chr} is numeric, else @code{#f}.\n")
1bbd0b84 189#define FUNC_NAME s_scm_char_numeric_p
0f2d19dd 190{
1fdbbd4c 191 return scm_char_set_contains_p (scm_char_set_digit, chr);
0f2d19dd 192}
1bbd0b84 193#undef FUNC_NAME
0f2d19dd 194
3b3b36dd 195SCM_DEFINE (scm_char_whitespace_p, "char-whitespace?", 1, 0, 0,
1bbd0b84 196 (SCM chr),
1fdbbd4c 197 "Return @code{#t} iff @var{chr} is whitespace, else @code{#f}.\n")
1bbd0b84 198#define FUNC_NAME s_scm_char_whitespace_p
0f2d19dd 199{
1fdbbd4c 200 return scm_char_set_contains_p (scm_char_set_whitespace, chr);
0f2d19dd 201}
1bbd0b84 202#undef FUNC_NAME
0f2d19dd
JB
203
204
3b3b36dd 205SCM_DEFINE (scm_char_upper_case_p, "char-upper-case?", 1, 0, 0,
1bbd0b84 206 (SCM chr),
1fdbbd4c 207 "Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.\n")
1bbd0b84 208#define FUNC_NAME s_scm_char_upper_case_p
0f2d19dd 209{
1fdbbd4c 210 return scm_char_set_contains_p (scm_char_set_upper_case, chr);
0f2d19dd 211}
1bbd0b84 212#undef FUNC_NAME
0f2d19dd
JB
213
214
3b3b36dd 215SCM_DEFINE (scm_char_lower_case_p, "char-lower-case?", 1, 0, 0,
1bbd0b84 216 (SCM chr),
1fdbbd4c 217 "Return @code{#t} iff @var{chr} is lowercase, else @code{#f}.\n")
1bbd0b84 218#define FUNC_NAME s_scm_char_lower_case_p
0f2d19dd 219{
1fdbbd4c 220 return scm_char_set_contains_p (scm_char_set_lower_case, chr);
0f2d19dd 221}
1bbd0b84 222#undef FUNC_NAME
0f2d19dd
JB
223
224
a1ec6916 225SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0,
1bbd0b84 226 (SCM chr),
bb15a36c
MG
227 "Return @code{#t} iff @var{chr} is either uppercase or lowercase, else\n"
228 "@code{#f}.\n")
1bbd0b84 229#define FUNC_NAME s_scm_char_is_both_p
0f2d19dd 230{
1fdbbd4c
MV
231 if (scm_is_true (scm_char_set_contains_p (scm_char_set_lower_case, chr)))
232 return SCM_BOOL_T;
233 return scm_char_set_contains_p (scm_char_set_upper_case, chr);
0f2d19dd 234}
1bbd0b84 235#undef FUNC_NAME
0f2d19dd
JB
236
237
a1ec6916 238SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0,
1bbd0b84 239 (SCM chr),
bb15a36c 240 "Return the Unicode code point of @var{chr}.")
1bbd0b84 241#define FUNC_NAME s_scm_char_to_integer
0f2d19dd 242{
34d19ef6 243 SCM_VALIDATE_CHAR (1, chr);
904a78f1 244 return scm_from_uint32 (SCM_CHAR(chr));
0f2d19dd 245}
1bbd0b84 246#undef FUNC_NAME
0f2d19dd
JB
247
248
3b3b36dd 249SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0,
1bbd0b84 250 (SCM n),
bb15a36c
MG
251 "Return the character that has Unicode code point @var{n}. The integer\n"
252 "@var{n} must be a valid code point. Valid code points are in the\n"
253 "ranges 0 to @code{#xD7FF} inclusive or @code{#xE000} to\n"
254 "@code{#x10FFFF} inclusive.")
1bbd0b84 255#define FUNC_NAME s_scm_integer_to_char
0f2d19dd 256{
904a78f1
MG
257 scm_t_wchar cn;
258
259 cn = scm_to_wchar (n);
260
261 /* Avoid the surrogates. */
262 if (!SCM_IS_UNICODE_CHAR (cn))
263 scm_out_of_range (FUNC_NAME, n);
264
265 return SCM_MAKE_CHAR (cn);
0f2d19dd 266}
1bbd0b84 267#undef FUNC_NAME
0f2d19dd
JB
268
269
3b3b36dd 270SCM_DEFINE (scm_char_upcase, "char-upcase", 1, 0, 0,
1bbd0b84 271 (SCM chr),
cdbc7418 272 "Return the uppercase character version of @var{chr}.")
1bbd0b84 273#define FUNC_NAME s_scm_char_upcase
0f2d19dd 274{
34d19ef6 275 SCM_VALIDATE_CHAR (1, chr);
904a78f1 276 return SCM_MAKE_CHAR (scm_c_upcase (SCM_CHAR (chr)));
0f2d19dd 277}
1bbd0b84 278#undef FUNC_NAME
0f2d19dd
JB
279
280
3b3b36dd 281SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0,
1bbd0b84 282 (SCM chr),
cdbc7418 283 "Return the lowercase character version of @var{chr}.")
1bbd0b84 284#define FUNC_NAME s_scm_char_downcase
0f2d19dd 285{
34d19ef6 286 SCM_VALIDATE_CHAR (1, chr);
904a78f1 287 return SCM_MAKE_CHAR (scm_c_downcase (SCM_CHAR(chr)));
0f2d19dd 288}
1bbd0b84 289#undef FUNC_NAME
0f2d19dd
JB
290
291\f
292
293
294
84fad130
HWN
295/*
296TODO: change name to scm_i_.. ? --hwn
297*/
298
1cc91f1b 299
904a78f1
MG
300scm_t_wchar
301scm_c_upcase (scm_t_wchar c)
0f2d19dd 302{
f49dbcad 303 return uc_toupper ((int) c);
0f2d19dd
JB
304}
305
1cc91f1b 306
904a78f1
MG
307scm_t_wchar
308scm_c_downcase (scm_t_wchar c)
0f2d19dd 309{
f49dbcad 310 return uc_tolower ((int) c);
0f2d19dd
JB
311}
312
77332b21 313\f
0f2d19dd 314
77332b21
MG
315/* There are a few sets of character names: R5RS, Guile
316 extensions for control characters, and leftover Guile extensions.
317 They are listed in order of precedence. */
318
64bad3f5
MG
319static const char *const scm_r5rs_charnames[] = {
320 "space", "newline"
321};
77332b21 322
64bad3f5
MG
323static const scm_t_uint32 const scm_r5rs_charnums[] = {
324 0x20, 0x0A
325};
77332b21 326
64bad3f5 327#define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *))
77332b21
MG
328
329/* The abbreviated names for control characters. */
64bad3f5
MG
330static const char *const scm_C0_control_charnames[] = {
331 /* C0 controls */
332 "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
333 "bs", "ht", "lf", "vt", "ff", "cr", "so", "si",
334 "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
335 "can", "em", "sub", "esc", "fs", "gs", "rs", "us",
336 "sp", "del"
337};
338
339static const scm_t_uint32 const scm_C0_control_charnums[] = {
340 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
341 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
342 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
343 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
344 0x20, 0x7f
345};
346
347#define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *))
348
349static const char *const scm_alt_charnames[] = {
350 "null", "backspace", "tab", "nl", "newline", "np", "page", "return",
351};
352
353static const scm_t_uint32 const scm_alt_charnums[] = {
354 0x00, 0x08, 0x09, 0x0a, 0x0a, 0x0c, 0x0c, 0x0d
355};
356
357#define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
77332b21
MG
358
359/* Returns the string charname for a character if it exists, or NULL
360 otherwise. */
361const char *
362scm_i_charname (SCM chr)
363{
744c8724 364 size_t c;
77332b21 365 scm_t_uint32 i = SCM_CHAR (chr);
0f2d19dd 366
64bad3f5 367 for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
77332b21
MG
368 if (scm_r5rs_charnums[c] == i)
369 return scm_r5rs_charnames[c];
0f2d19dd 370
64bad3f5 371 for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
77332b21
MG
372 if (scm_C0_control_charnums[c] == i)
373 return scm_C0_control_charnames[c];
374
64bad3f5 375 for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
77332b21
MG
376 if (scm_alt_charnums[c] == i)
377 return scm_alt_charnames[i];
0f2d19dd 378
77332b21
MG
379 return NULL;
380}
381
382/* Return a character from a string charname. */
383SCM
384scm_i_charname_to_char (const char *charname, size_t charname_len)
385{
744c8724 386 size_t c;
77332b21
MG
387
388 /* The R5RS charnames. These are supposed to be case
389 insensitive. */
64bad3f5 390 for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
77332b21
MG
391 if ((strlen (scm_r5rs_charnames[c]) == charname_len)
392 && (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len)))
393 return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
394
395 /* Then come the controls. These are not case sensitive. */
64bad3f5 396 for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
77332b21
MG
397 if ((strlen (scm_C0_control_charnames[c]) == charname_len)
398 && (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len)))
399 return SCM_MAKE_CHAR (scm_C0_control_charnums[c]);
400
401 /* Lastly are some old names carried over for compatibility. */
64bad3f5 402 for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
77332b21
MG
403 if ((strlen (scm_alt_charnames[c]) == charname_len)
404 && (!strncasecmp (scm_alt_charnames[c], charname, charname_len)))
405 return SCM_MAKE_CHAR (scm_alt_charnums[c]);
64bad3f5 406
77332b21
MG
407 return SCM_BOOL_F;
408}
0f2d19dd
JB
409
410\f
411
1cc91f1b 412
0f2d19dd
JB
413void
414scm_init_chars ()
0f2d19dd 415{
a0599745 416#include "libguile/chars.x"
0f2d19dd
JB
417}
418
89e00824
ML
419
420/*
421 Local Variables:
422 c-file-style: "gnu"
423 End:
424*/