Update docs for Unicode characters
[bpt/guile.git] / libguile / chars.c
CommitLineData
904a78f1 1/* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e 3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
0f2d19dd 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd 20\f
dbb605f5
LC
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
0f2d19dd 24
0f2d19dd 25#include <ctype.h>
465380de 26#include <limits.h>
904a78f1
MG
27#include <unicase.h>
28
a0599745
MD
29#include "libguile/_scm.h"
30#include "libguile/validate.h"
0f2d19dd 31
a0599745 32#include "libguile/chars.h"
1fdbbd4c
MV
33#include "libguile/srfi-14.h"
34
0f2d19dd
JB
35\f
36
a1ec6916 37SCM_DEFINE (scm_char_p, "char?", 1, 0, 0,
1bbd0b84 38 (SCM x),
cdbc7418 39 "Return @code{#t} iff @var{x} is a character, else @code{#f}.")
1bbd0b84 40#define FUNC_NAME s_scm_char_p
0f2d19dd 41{
7888309b 42 return scm_from_bool (SCM_CHARP(x));
0f2d19dd 43}
1bbd0b84 44#undef FUNC_NAME
0f2d19dd 45
c3ee7520 46SCM_DEFINE1 (scm_char_eq_p, "char=?", scm_tc7_rpsubr,
1bbd0b84 47 (SCM x, SCM y),
3f12aedb
MG
48 "Return @code{#t} iff code point of @var{x} is equal to the code point\n"
49 "of @var{y}, else @code{#f}.\n")
1bbd0b84 50#define FUNC_NAME s_scm_char_eq_p
0f2d19dd 51{
362306b9
DH
52 SCM_VALIDATE_CHAR (1, x);
53 SCM_VALIDATE_CHAR (2, y);
bc36d050 54 return scm_from_bool (scm_is_eq (x, y));
0f2d19dd 55}
1bbd0b84 56#undef FUNC_NAME
0f2d19dd
JB
57
58
c3ee7520 59SCM_DEFINE1 (scm_char_less_p, "char<?", scm_tc7_rpsubr,
1bbd0b84 60 (SCM x, SCM y),
3f12aedb
MG
61 "Return @code{#t} iff the code point of @var{x} is less than the code\n"
62 "point of @var{y}, else @code{#f}.")
1bbd0b84 63#define FUNC_NAME s_scm_char_less_p
0f2d19dd 64{
34d19ef6
HWN
65 SCM_VALIDATE_CHAR (1, x);
66 SCM_VALIDATE_CHAR (2, y);
7888309b 67 return scm_from_bool (SCM_CHAR(x) < SCM_CHAR(y));
0f2d19dd 68}
1bbd0b84 69#undef FUNC_NAME
0f2d19dd 70
c3ee7520 71SCM_DEFINE1 (scm_char_leq_p, "char<=?", scm_tc7_rpsubr,
1bbd0b84 72 (SCM x, SCM y),
3f12aedb
MG
73 "Return @code{#t} iff the code point of @var{x} is less than or equal\n"
74 "to the code point of @var{y}, else @code{#f}.")
1bbd0b84 75#define FUNC_NAME s_scm_char_leq_p
0f2d19dd 76{
34d19ef6
HWN
77 SCM_VALIDATE_CHAR (1, x);
78 SCM_VALIDATE_CHAR (2, y);
7888309b 79 return scm_from_bool (SCM_CHAR(x) <= SCM_CHAR(y));
0f2d19dd 80}
1bbd0b84 81#undef FUNC_NAME
0f2d19dd 82
c3ee7520 83SCM_DEFINE1 (scm_char_gr_p, "char>?", scm_tc7_rpsubr,
1bbd0b84 84 (SCM x, SCM y),
3f12aedb
MG
85 "Return @code{#t} iff the code point of @var{x} is greater than the\n"
86 "code point of @var{y}, else @code{#f}.")
1bbd0b84 87#define FUNC_NAME s_scm_char_gr_p
0f2d19dd 88{
34d19ef6
HWN
89 SCM_VALIDATE_CHAR (1, x);
90 SCM_VALIDATE_CHAR (2, y);
7888309b 91 return scm_from_bool (SCM_CHAR(x) > SCM_CHAR(y));
0f2d19dd 92}
1bbd0b84 93#undef FUNC_NAME
0f2d19dd 94
c3ee7520 95SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr,
1bbd0b84 96 (SCM x, SCM y),
3f12aedb
MG
97 "Return @code{#t} iff the code point of @var{x} is greater than or\n"
98 "equal to the code point of @var{y}, else @code{#f}.")
1bbd0b84 99#define FUNC_NAME s_scm_char_geq_p
0f2d19dd 100{
34d19ef6
HWN
101 SCM_VALIDATE_CHAR (1, x);
102 SCM_VALIDATE_CHAR (2, y);
7888309b 103 return scm_from_bool (SCM_CHAR(x) >= SCM_CHAR(y));
0f2d19dd 104}
1bbd0b84 105#undef FUNC_NAME
0f2d19dd 106
3f12aedb
MG
107/* FIXME?: R6RS specifies that these comparisons are case-folded.
108 This is the same thing as comparing the uppercase characters in
109 practice, but, not in theory. Unicode has table containing their
110 definition of case-folded character mappings. A more correct
111 implementation would be to use that table and make a char-foldcase
112 function. */
113
c3ee7520 114SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr,
1bbd0b84 115 (SCM x, SCM y),
3f12aedb
MG
116 "Return @code{#t} iff the case-folded code point of @var{x} is the same\n"
117 "as the case-folded code point of @var{y}, else @code{#f}.")
1bbd0b84 118#define FUNC_NAME s_scm_char_ci_eq_p
0f2d19dd 119{
34d19ef6
HWN
120 SCM_VALIDATE_CHAR (1, x);
121 SCM_VALIDATE_CHAR (2, y);
7888309b 122 return scm_from_bool (scm_c_upcase(SCM_CHAR(x))==scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 123}
1bbd0b84 124#undef FUNC_NAME
0f2d19dd 125
c3ee7520 126SCM_DEFINE1 (scm_char_ci_less_p, "char-ci<?", scm_tc7_rpsubr,
1bbd0b84 127 (SCM x, SCM y),
3f12aedb
MG
128 "Return @code{#t} iff the case-folded code point of @var{x} is less\n"
129 "than the case-folded code point of @var{y}, else @code{#f}.")
1bbd0b84 130#define FUNC_NAME s_scm_char_ci_less_p
0f2d19dd 131{
34d19ef6
HWN
132 SCM_VALIDATE_CHAR (1, x);
133 SCM_VALIDATE_CHAR (2, y);
7888309b 134 return scm_from_bool ((scm_c_upcase(SCM_CHAR(x))) < scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 135}
1bbd0b84 136#undef FUNC_NAME
0f2d19dd 137
c3ee7520 138SCM_DEFINE1 (scm_char_ci_leq_p, "char-ci<=?", scm_tc7_rpsubr,
1bbd0b84 139 (SCM x, SCM y),
3f12aedb
MG
140 "Return @code{#t} iff the case-folded code point of @var{x} is less\n"
141 "than or equal to the case-folded code point of @var{y}, else\n"
142 "@code{#f}")
1bbd0b84 143#define FUNC_NAME s_scm_char_ci_leq_p
0f2d19dd 144{
34d19ef6
HWN
145 SCM_VALIDATE_CHAR (1, x);
146 SCM_VALIDATE_CHAR (2, y);
7888309b 147 return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) <= scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 148}
1bbd0b84 149#undef FUNC_NAME
0f2d19dd 150
c3ee7520 151SCM_DEFINE1 (scm_char_ci_gr_p, "char-ci>?", scm_tc7_rpsubr,
1bbd0b84 152 (SCM x, SCM y),
3f12aedb
MG
153 "Return @code{#t} iff the case-folded code point of @var{x} is greater\n"
154 "than the case-folded code point of @var{y}, else @code{#f}.")
1bbd0b84 155#define FUNC_NAME s_scm_char_ci_gr_p
0f2d19dd 156{
34d19ef6
HWN
157 SCM_VALIDATE_CHAR (1, x);
158 SCM_VALIDATE_CHAR (2, y);
7888309b 159 return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) > scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 160}
1bbd0b84 161#undef FUNC_NAME
0f2d19dd 162
c3ee7520 163SCM_DEFINE1 (scm_char_ci_geq_p, "char-ci>=?", scm_tc7_rpsubr,
1bbd0b84 164 (SCM x, SCM y),
3f12aedb
MG
165 "Return @code{#t} iff the case-folded code point of @var{x} is greater\n"
166 "than or equal to the case-folded code point of @var{y}, else\n"
167 "@code{#f}.")
1bbd0b84 168#define FUNC_NAME s_scm_char_ci_geq_p
0f2d19dd 169{
34d19ef6
HWN
170 SCM_VALIDATE_CHAR (1, x);
171 SCM_VALIDATE_CHAR (2, y);
7888309b 172 return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) >= scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 173}
1bbd0b84 174#undef FUNC_NAME
0f2d19dd
JB
175
176
3b3b36dd 177SCM_DEFINE (scm_char_alphabetic_p, "char-alphabetic?", 1, 0, 0,
1bbd0b84 178 (SCM chr),
1fdbbd4c 179 "Return @code{#t} iff @var{chr} is alphabetic, else @code{#f}.\n")
1bbd0b84 180#define FUNC_NAME s_scm_char_alphabetic_p
0f2d19dd 181{
1fdbbd4c 182 return scm_char_set_contains_p (scm_char_set_letter, chr);
0f2d19dd 183}
1bbd0b84 184#undef FUNC_NAME
0f2d19dd 185
3b3b36dd 186SCM_DEFINE (scm_char_numeric_p, "char-numeric?", 1, 0, 0,
1bbd0b84 187 (SCM chr),
1fdbbd4c 188 "Return @code{#t} iff @var{chr} is numeric, else @code{#f}.\n")
1bbd0b84 189#define FUNC_NAME s_scm_char_numeric_p
0f2d19dd 190{
1fdbbd4c 191 return scm_char_set_contains_p (scm_char_set_digit, chr);
0f2d19dd 192}
1bbd0b84 193#undef FUNC_NAME
0f2d19dd 194
3b3b36dd 195SCM_DEFINE (scm_char_whitespace_p, "char-whitespace?", 1, 0, 0,
1bbd0b84 196 (SCM chr),
1fdbbd4c 197 "Return @code{#t} iff @var{chr} is whitespace, else @code{#f}.\n")
1bbd0b84 198#define FUNC_NAME s_scm_char_whitespace_p
0f2d19dd 199{
1fdbbd4c 200 return scm_char_set_contains_p (scm_char_set_whitespace, chr);
0f2d19dd 201}
1bbd0b84 202#undef FUNC_NAME
0f2d19dd
JB
203
204
3b3b36dd 205SCM_DEFINE (scm_char_upper_case_p, "char-upper-case?", 1, 0, 0,
1bbd0b84 206 (SCM chr),
1fdbbd4c 207 "Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.\n")
1bbd0b84 208#define FUNC_NAME s_scm_char_upper_case_p
0f2d19dd 209{
1fdbbd4c 210 return scm_char_set_contains_p (scm_char_set_upper_case, chr);
0f2d19dd 211}
1bbd0b84 212#undef FUNC_NAME
0f2d19dd
JB
213
214
3b3b36dd 215SCM_DEFINE (scm_char_lower_case_p, "char-lower-case?", 1, 0, 0,
1bbd0b84 216 (SCM chr),
1fdbbd4c 217 "Return @code{#t} iff @var{chr} is lowercase, else @code{#f}.\n")
1bbd0b84 218#define FUNC_NAME s_scm_char_lower_case_p
0f2d19dd 219{
1fdbbd4c 220 return scm_char_set_contains_p (scm_char_set_lower_case, chr);
0f2d19dd 221}
1bbd0b84 222#undef FUNC_NAME
0f2d19dd
JB
223
224
a1ec6916 225SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0,
1bbd0b84 226 (SCM chr),
1fdbbd4c 227 "Return @code{#t} iff @var{chr} is either uppercase or lowercase, else @code{#f}.\n")
1bbd0b84 228#define FUNC_NAME s_scm_char_is_both_p
0f2d19dd 229{
1fdbbd4c
MV
230 if (scm_is_true (scm_char_set_contains_p (scm_char_set_lower_case, chr)))
231 return SCM_BOOL_T;
232 return scm_char_set_contains_p (scm_char_set_upper_case, chr);
0f2d19dd 233}
1bbd0b84 234#undef FUNC_NAME
0f2d19dd
JB
235
236
a1ec6916 237SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0,
1bbd0b84 238 (SCM chr),
3f12aedb 239 "Return the code point of @var{chr}.")
1bbd0b84 240#define FUNC_NAME s_scm_char_to_integer
0f2d19dd 241{
34d19ef6 242 SCM_VALIDATE_CHAR (1, chr);
904a78f1 243 return scm_from_uint32 (SCM_CHAR(chr));
0f2d19dd 244}
1bbd0b84 245#undef FUNC_NAME
0f2d19dd
JB
246
247
3b3b36dd 248SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0,
1bbd0b84 249 (SCM n),
3f12aedb
MG
250 "Return the character that has code point @var{n}. The integer @var{n}\n"
251 "must be a valid code point. Valid code points are in the ranges 0 to\n"
252 "@code{#xD7FF} inclusive or @code{#xE000} to @code{#x10FFFF} inclusive.")
1bbd0b84 253#define FUNC_NAME s_scm_integer_to_char
0f2d19dd 254{
904a78f1
MG
255 scm_t_wchar cn;
256
257 cn = scm_to_wchar (n);
258
259 /* Avoid the surrogates. */
260 if (!SCM_IS_UNICODE_CHAR (cn))
261 scm_out_of_range (FUNC_NAME, n);
262
263 return SCM_MAKE_CHAR (cn);
0f2d19dd 264}
1bbd0b84 265#undef FUNC_NAME
0f2d19dd
JB
266
267
3b3b36dd 268SCM_DEFINE (scm_char_upcase, "char-upcase", 1, 0, 0,
1bbd0b84 269 (SCM chr),
cdbc7418 270 "Return the uppercase character version of @var{chr}.")
1bbd0b84 271#define FUNC_NAME s_scm_char_upcase
0f2d19dd 272{
34d19ef6 273 SCM_VALIDATE_CHAR (1, chr);
904a78f1 274 return SCM_MAKE_CHAR (scm_c_upcase (SCM_CHAR (chr)));
0f2d19dd 275}
1bbd0b84 276#undef FUNC_NAME
0f2d19dd
JB
277
278
3b3b36dd 279SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0,
1bbd0b84 280 (SCM chr),
cdbc7418 281 "Return the lowercase character version of @var{chr}.")
1bbd0b84 282#define FUNC_NAME s_scm_char_downcase
0f2d19dd 283{
34d19ef6 284 SCM_VALIDATE_CHAR (1, chr);
904a78f1 285 return SCM_MAKE_CHAR (scm_c_downcase (SCM_CHAR(chr)));
0f2d19dd 286}
1bbd0b84 287#undef FUNC_NAME
0f2d19dd
JB
288
289\f
290
291
292
84fad130
HWN
293/*
294TODO: change name to scm_i_.. ? --hwn
295*/
296
1cc91f1b 297
904a78f1
MG
298scm_t_wchar
299scm_c_upcase (scm_t_wchar c)
0f2d19dd 300{
f49dbcad 301 return uc_toupper ((int) c);
0f2d19dd
JB
302}
303
1cc91f1b 304
904a78f1
MG
305scm_t_wchar
306scm_c_downcase (scm_t_wchar c)
0f2d19dd 307{
f49dbcad 308 return uc_tolower ((int) c);
0f2d19dd
JB
309}
310
77332b21 311\f
0f2d19dd 312
77332b21
MG
313/* There are a few sets of character names: R5RS, Guile
314 extensions for control characters, and leftover Guile extensions.
315 They are listed in order of precedence. */
316
64bad3f5
MG
317static const char *const scm_r5rs_charnames[] = {
318 "space", "newline"
319};
77332b21 320
64bad3f5
MG
321static const scm_t_uint32 const scm_r5rs_charnums[] = {
322 0x20, 0x0A
323};
77332b21 324
64bad3f5 325#define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *))
77332b21
MG
326
327/* The abbreviated names for control characters. */
64bad3f5
MG
328static const char *const scm_C0_control_charnames[] = {
329 /* C0 controls */
330 "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
331 "bs", "ht", "lf", "vt", "ff", "cr", "so", "si",
332 "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
333 "can", "em", "sub", "esc", "fs", "gs", "rs", "us",
334 "sp", "del"
335};
336
337static const scm_t_uint32 const scm_C0_control_charnums[] = {
338 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
339 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
340 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
341 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
342 0x20, 0x7f
343};
344
345#define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *))
346
347static const char *const scm_alt_charnames[] = {
348 "null", "backspace", "tab", "nl", "newline", "np", "page", "return",
349};
350
351static const scm_t_uint32 const scm_alt_charnums[] = {
352 0x00, 0x08, 0x09, 0x0a, 0x0a, 0x0c, 0x0c, 0x0d
353};
354
355#define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
77332b21
MG
356
357/* Returns the string charname for a character if it exists, or NULL
358 otherwise. */
359const char *
360scm_i_charname (SCM chr)
361{
744c8724 362 size_t c;
77332b21 363 scm_t_uint32 i = SCM_CHAR (chr);
0f2d19dd 364
64bad3f5 365 for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
77332b21
MG
366 if (scm_r5rs_charnums[c] == i)
367 return scm_r5rs_charnames[c];
0f2d19dd 368
64bad3f5 369 for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
77332b21
MG
370 if (scm_C0_control_charnums[c] == i)
371 return scm_C0_control_charnames[c];
372
64bad3f5 373 for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
77332b21
MG
374 if (scm_alt_charnums[c] == i)
375 return scm_alt_charnames[i];
0f2d19dd 376
77332b21
MG
377 return NULL;
378}
379
380/* Return a character from a string charname. */
381SCM
382scm_i_charname_to_char (const char *charname, size_t charname_len)
383{
744c8724 384 size_t c;
77332b21
MG
385
386 /* The R5RS charnames. These are supposed to be case
387 insensitive. */
64bad3f5 388 for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
77332b21
MG
389 if ((strlen (scm_r5rs_charnames[c]) == charname_len)
390 && (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len)))
391 return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
392
393 /* Then come the controls. These are not case sensitive. */
64bad3f5 394 for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
77332b21
MG
395 if ((strlen (scm_C0_control_charnames[c]) == charname_len)
396 && (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len)))
397 return SCM_MAKE_CHAR (scm_C0_control_charnums[c]);
398
399 /* Lastly are some old names carried over for compatibility. */
64bad3f5 400 for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
77332b21
MG
401 if ((strlen (scm_alt_charnames[c]) == charname_len)
402 && (!strncasecmp (scm_alt_charnames[c], charname, charname_len)))
403 return SCM_MAKE_CHAR (scm_alt_charnums[c]);
64bad3f5 404
77332b21
MG
405 return SCM_BOOL_F;
406}
0f2d19dd
JB
407
408\f
409
1cc91f1b 410
0f2d19dd
JB
411void
412scm_init_chars ()
0f2d19dd 413{
a0599745 414#include "libguile/chars.x"
0f2d19dd
JB
415}
416
89e00824
ML
417
418/*
419 Local Variables:
420 c-file-style: "gnu"
421 End:
422*/