Tests for display and writing of characters
[bpt/guile.git] / libguile / chars.c
CommitLineData
904a78f1 1/* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e 3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
0f2d19dd 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd 20\f
dbb605f5
LC
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
0f2d19dd 24
0f2d19dd 25#include <ctype.h>
465380de 26#include <limits.h>
904a78f1
MG
27#include <unicase.h>
28
a0599745
MD
29#include "libguile/_scm.h"
30#include "libguile/validate.h"
0f2d19dd 31
a0599745 32#include "libguile/chars.h"
1fdbbd4c
MV
33#include "libguile/srfi-14.h"
34
0f2d19dd
JB
35\f
36
a1ec6916 37SCM_DEFINE (scm_char_p, "char?", 1, 0, 0,
1bbd0b84 38 (SCM x),
cdbc7418 39 "Return @code{#t} iff @var{x} is a character, else @code{#f}.")
1bbd0b84 40#define FUNC_NAME s_scm_char_p
0f2d19dd 41{
7888309b 42 return scm_from_bool (SCM_CHARP(x));
0f2d19dd 43}
1bbd0b84 44#undef FUNC_NAME
0f2d19dd 45
c3ee7520 46SCM_DEFINE1 (scm_char_eq_p, "char=?", scm_tc7_rpsubr,
1bbd0b84 47 (SCM x, SCM y),
cdbc7418 48 "Return @code{#t} iff @var{x} is the same character as @var{y}, else @code{#f}.")
1bbd0b84 49#define FUNC_NAME s_scm_char_eq_p
0f2d19dd 50{
362306b9
DH
51 SCM_VALIDATE_CHAR (1, x);
52 SCM_VALIDATE_CHAR (2, y);
bc36d050 53 return scm_from_bool (scm_is_eq (x, y));
0f2d19dd 54}
1bbd0b84 55#undef FUNC_NAME
0f2d19dd
JB
56
57
c3ee7520 58SCM_DEFINE1 (scm_char_less_p, "char<?", scm_tc7_rpsubr,
1bbd0b84 59 (SCM x, SCM y),
904a78f1 60 "Return @code{#t} iff @var{x} is less than @var{y} in the Unicode sequence,\n"
cdbc7418 61 "else @code{#f}.")
1bbd0b84 62#define FUNC_NAME s_scm_char_less_p
0f2d19dd 63{
34d19ef6
HWN
64 SCM_VALIDATE_CHAR (1, x);
65 SCM_VALIDATE_CHAR (2, y);
7888309b 66 return scm_from_bool (SCM_CHAR(x) < SCM_CHAR(y));
0f2d19dd 67}
1bbd0b84 68#undef FUNC_NAME
0f2d19dd 69
c3ee7520 70SCM_DEFINE1 (scm_char_leq_p, "char<=?", scm_tc7_rpsubr,
1bbd0b84 71 (SCM x, SCM y),
cdbc7418 72 "Return @code{#t} iff @var{x} is less than or equal to @var{y} in the\n"
904a78f1 73 "Unicode sequence, else @code{#f}.")
1bbd0b84 74#define FUNC_NAME s_scm_char_leq_p
0f2d19dd 75{
34d19ef6
HWN
76 SCM_VALIDATE_CHAR (1, x);
77 SCM_VALIDATE_CHAR (2, y);
7888309b 78 return scm_from_bool (SCM_CHAR(x) <= SCM_CHAR(y));
0f2d19dd 79}
1bbd0b84 80#undef FUNC_NAME
0f2d19dd 81
c3ee7520 82SCM_DEFINE1 (scm_char_gr_p, "char>?", scm_tc7_rpsubr,
1bbd0b84 83 (SCM x, SCM y),
904a78f1 84 "Return @code{#t} iff @var{x} is greater than @var{y} in the Unicode\n"
cdbc7418 85 "sequence, else @code{#f}.")
1bbd0b84 86#define FUNC_NAME s_scm_char_gr_p
0f2d19dd 87{
34d19ef6
HWN
88 SCM_VALIDATE_CHAR (1, x);
89 SCM_VALIDATE_CHAR (2, y);
7888309b 90 return scm_from_bool (SCM_CHAR(x) > SCM_CHAR(y));
0f2d19dd 91}
1bbd0b84 92#undef FUNC_NAME
0f2d19dd 93
c3ee7520 94SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr,
1bbd0b84 95 (SCM x, SCM y),
cdbc7418 96 "Return @code{#t} iff @var{x} is greater than or equal to @var{y} in the\n"
904a78f1 97 "Unicode sequence, else @code{#f}.")
1bbd0b84 98#define FUNC_NAME s_scm_char_geq_p
0f2d19dd 99{
34d19ef6
HWN
100 SCM_VALIDATE_CHAR (1, x);
101 SCM_VALIDATE_CHAR (2, y);
7888309b 102 return scm_from_bool (SCM_CHAR(x) >= SCM_CHAR(y));
0f2d19dd 103}
1bbd0b84 104#undef FUNC_NAME
0f2d19dd 105
c3ee7520 106SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr,
1bbd0b84 107 (SCM x, SCM y),
cdbc7418 108 "Return @code{#t} iff @var{x} is the same character as @var{y} ignoring\n"
904a78f1 109 "case, else @code{#f}. Case is locale free and not context sensitive.")
1bbd0b84 110#define FUNC_NAME s_scm_char_ci_eq_p
0f2d19dd 111{
34d19ef6
HWN
112 SCM_VALIDATE_CHAR (1, x);
113 SCM_VALIDATE_CHAR (2, y);
7888309b 114 return scm_from_bool (scm_c_upcase(SCM_CHAR(x))==scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 115}
1bbd0b84 116#undef FUNC_NAME
0f2d19dd 117
c3ee7520 118SCM_DEFINE1 (scm_char_ci_less_p, "char-ci<?", scm_tc7_rpsubr,
1bbd0b84 119 (SCM x, SCM y),
904a78f1
MG
120 "Return @code{#t} iff the Unicode uppercase form of @var{x} is less\n"
121 "than the Unicode uppercase form @var{y} in the Unicode sequence,\n"
122 "else @code{#f}.")
1bbd0b84 123#define FUNC_NAME s_scm_char_ci_less_p
0f2d19dd 124{
34d19ef6
HWN
125 SCM_VALIDATE_CHAR (1, x);
126 SCM_VALIDATE_CHAR (2, y);
7888309b 127 return scm_from_bool ((scm_c_upcase(SCM_CHAR(x))) < scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 128}
1bbd0b84 129#undef FUNC_NAME
0f2d19dd 130
c3ee7520 131SCM_DEFINE1 (scm_char_ci_leq_p, "char-ci<=?", scm_tc7_rpsubr,
1bbd0b84 132 (SCM x, SCM y),
904a78f1
MG
133 "Return @code{#t} iff the Unicode uppercase form of @var{x} is less\n"
134 "than or equal to the Unicode uppercase form of @var{y} in the\n"
135 "Unicode sequence, else @code{#f}.")
1bbd0b84 136#define FUNC_NAME s_scm_char_ci_leq_p
0f2d19dd 137{
34d19ef6
HWN
138 SCM_VALIDATE_CHAR (1, x);
139 SCM_VALIDATE_CHAR (2, y);
7888309b 140 return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) <= scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 141}
1bbd0b84 142#undef FUNC_NAME
0f2d19dd 143
c3ee7520 144SCM_DEFINE1 (scm_char_ci_gr_p, "char-ci>?", scm_tc7_rpsubr,
1bbd0b84 145 (SCM x, SCM y),
904a78f1
MG
146 "Return @code{#t} iff the Unicode uppercase form of @var{x} is greater\n"
147 "than the Unicode uppercase form of @var{y} in the Unicode\n"
148 "sequence, else @code{#f}.")
1bbd0b84 149#define FUNC_NAME s_scm_char_ci_gr_p
0f2d19dd 150{
34d19ef6
HWN
151 SCM_VALIDATE_CHAR (1, x);
152 SCM_VALIDATE_CHAR (2, y);
7888309b 153 return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) > scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 154}
1bbd0b84 155#undef FUNC_NAME
0f2d19dd 156
c3ee7520 157SCM_DEFINE1 (scm_char_ci_geq_p, "char-ci>=?", scm_tc7_rpsubr,
1bbd0b84 158 (SCM x, SCM y),
904a78f1
MG
159 "Return @code{#t} iff the Unicode uppercase form of @var{x} is greater\n"
160 "than or equal to the Unicode uppercase form of @var{y} in the\n"
161 "Unicode sequence, else @code{#f}.")
1bbd0b84 162#define FUNC_NAME s_scm_char_ci_geq_p
0f2d19dd 163{
34d19ef6
HWN
164 SCM_VALIDATE_CHAR (1, x);
165 SCM_VALIDATE_CHAR (2, y);
7888309b 166 return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) >= scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 167}
1bbd0b84 168#undef FUNC_NAME
0f2d19dd
JB
169
170
3b3b36dd 171SCM_DEFINE (scm_char_alphabetic_p, "char-alphabetic?", 1, 0, 0,
1bbd0b84 172 (SCM chr),
1fdbbd4c 173 "Return @code{#t} iff @var{chr} is alphabetic, else @code{#f}.\n")
1bbd0b84 174#define FUNC_NAME s_scm_char_alphabetic_p
0f2d19dd 175{
1fdbbd4c 176 return scm_char_set_contains_p (scm_char_set_letter, chr);
0f2d19dd 177}
1bbd0b84 178#undef FUNC_NAME
0f2d19dd 179
3b3b36dd 180SCM_DEFINE (scm_char_numeric_p, "char-numeric?", 1, 0, 0,
1bbd0b84 181 (SCM chr),
1fdbbd4c 182 "Return @code{#t} iff @var{chr} is numeric, else @code{#f}.\n")
1bbd0b84 183#define FUNC_NAME s_scm_char_numeric_p
0f2d19dd 184{
1fdbbd4c 185 return scm_char_set_contains_p (scm_char_set_digit, chr);
0f2d19dd 186}
1bbd0b84 187#undef FUNC_NAME
0f2d19dd 188
3b3b36dd 189SCM_DEFINE (scm_char_whitespace_p, "char-whitespace?", 1, 0, 0,
1bbd0b84 190 (SCM chr),
1fdbbd4c 191 "Return @code{#t} iff @var{chr} is whitespace, else @code{#f}.\n")
1bbd0b84 192#define FUNC_NAME s_scm_char_whitespace_p
0f2d19dd 193{
1fdbbd4c 194 return scm_char_set_contains_p (scm_char_set_whitespace, chr);
0f2d19dd 195}
1bbd0b84 196#undef FUNC_NAME
0f2d19dd
JB
197
198
199
3b3b36dd 200SCM_DEFINE (scm_char_upper_case_p, "char-upper-case?", 1, 0, 0,
1bbd0b84 201 (SCM chr),
1fdbbd4c 202 "Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.\n")
1bbd0b84 203#define FUNC_NAME s_scm_char_upper_case_p
0f2d19dd 204{
1fdbbd4c 205 return scm_char_set_contains_p (scm_char_set_upper_case, chr);
0f2d19dd 206}
1bbd0b84 207#undef FUNC_NAME
0f2d19dd
JB
208
209
3b3b36dd 210SCM_DEFINE (scm_char_lower_case_p, "char-lower-case?", 1, 0, 0,
1bbd0b84 211 (SCM chr),
1fdbbd4c 212 "Return @code{#t} iff @var{chr} is lowercase, else @code{#f}.\n")
1bbd0b84 213#define FUNC_NAME s_scm_char_lower_case_p
0f2d19dd 214{
1fdbbd4c 215 return scm_char_set_contains_p (scm_char_set_lower_case, chr);
0f2d19dd 216}
1bbd0b84 217#undef FUNC_NAME
0f2d19dd
JB
218
219
220
a1ec6916 221SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0,
1bbd0b84 222 (SCM chr),
1fdbbd4c 223 "Return @code{#t} iff @var{chr} is either uppercase or lowercase, else @code{#f}.\n")
1bbd0b84 224#define FUNC_NAME s_scm_char_is_both_p
0f2d19dd 225{
1fdbbd4c
MV
226 if (scm_is_true (scm_char_set_contains_p (scm_char_set_lower_case, chr)))
227 return SCM_BOOL_T;
228 return scm_char_set_contains_p (scm_char_set_upper_case, chr);
0f2d19dd 229}
1bbd0b84 230#undef FUNC_NAME
0f2d19dd
JB
231
232
233
234
a1ec6916 235SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0,
1bbd0b84 236 (SCM chr),
cdbc7418
NJ
237 "Return the number corresponding to ordinal position of @var{chr} in the\n"
238 "ASCII sequence.")
1bbd0b84 239#define FUNC_NAME s_scm_char_to_integer
0f2d19dd 240{
34d19ef6 241 SCM_VALIDATE_CHAR (1, chr);
904a78f1 242 return scm_from_uint32 (SCM_CHAR(chr));
0f2d19dd 243}
1bbd0b84 244#undef FUNC_NAME
0f2d19dd
JB
245
246
247
3b3b36dd 248SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0,
1bbd0b84 249 (SCM n),
cdbc7418 250 "Return the character at position @var{n} in the ASCII sequence.")
1bbd0b84 251#define FUNC_NAME s_scm_integer_to_char
0f2d19dd 252{
904a78f1
MG
253 scm_t_wchar cn;
254
255 cn = scm_to_wchar (n);
256
257 /* Avoid the surrogates. */
258 if (!SCM_IS_UNICODE_CHAR (cn))
259 scm_out_of_range (FUNC_NAME, n);
260
261 return SCM_MAKE_CHAR (cn);
0f2d19dd 262}
1bbd0b84 263#undef FUNC_NAME
0f2d19dd
JB
264
265
3b3b36dd 266SCM_DEFINE (scm_char_upcase, "char-upcase", 1, 0, 0,
1bbd0b84 267 (SCM chr),
cdbc7418 268 "Return the uppercase character version of @var{chr}.")
1bbd0b84 269#define FUNC_NAME s_scm_char_upcase
0f2d19dd 270{
34d19ef6 271 SCM_VALIDATE_CHAR (1, chr);
904a78f1 272 return SCM_MAKE_CHAR (scm_c_upcase (SCM_CHAR (chr)));
0f2d19dd 273}
1bbd0b84 274#undef FUNC_NAME
0f2d19dd
JB
275
276
3b3b36dd 277SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0,
1bbd0b84 278 (SCM chr),
cdbc7418 279 "Return the lowercase character version of @var{chr}.")
1bbd0b84 280#define FUNC_NAME s_scm_char_downcase
0f2d19dd 281{
34d19ef6 282 SCM_VALIDATE_CHAR (1, chr);
904a78f1 283 return SCM_MAKE_CHAR (scm_c_downcase (SCM_CHAR(chr)));
0f2d19dd 284}
1bbd0b84 285#undef FUNC_NAME
0f2d19dd
JB
286
287\f
288
289
290
84fad130
HWN
291/*
292TODO: change name to scm_i_.. ? --hwn
293*/
294
1cc91f1b 295
904a78f1
MG
296scm_t_wchar
297scm_c_upcase (scm_t_wchar c)
0f2d19dd 298{
f49dbcad 299 return uc_toupper ((int) c);
0f2d19dd
JB
300}
301
1cc91f1b 302
904a78f1
MG
303scm_t_wchar
304scm_c_downcase (scm_t_wchar c)
0f2d19dd 305{
f49dbcad 306 return uc_tolower ((int) c);
0f2d19dd
JB
307}
308
77332b21 309\f
0f2d19dd 310
77332b21
MG
311/* There are a few sets of character names: R5RS, Guile
312 extensions for control characters, and leftover Guile extensions.
313 They are listed in order of precedence. */
314
64bad3f5
MG
315static const char *const scm_r5rs_charnames[] = {
316 "space", "newline"
317};
77332b21 318
64bad3f5
MG
319static const scm_t_uint32 const scm_r5rs_charnums[] = {
320 0x20, 0x0A
321};
77332b21 322
64bad3f5 323#define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *))
77332b21
MG
324
325/* The abbreviated names for control characters. */
64bad3f5
MG
326static const char *const scm_C0_control_charnames[] = {
327 /* C0 controls */
328 "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
329 "bs", "ht", "lf", "vt", "ff", "cr", "so", "si",
330 "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
331 "can", "em", "sub", "esc", "fs", "gs", "rs", "us",
332 "sp", "del"
333};
334
335static const scm_t_uint32 const scm_C0_control_charnums[] = {
336 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
337 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
338 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
339 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
340 0x20, 0x7f
341};
342
343#define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *))
344
345static const char *const scm_alt_charnames[] = {
346 "null", "backspace", "tab", "nl", "newline", "np", "page", "return",
347};
348
349static const scm_t_uint32 const scm_alt_charnums[] = {
350 0x00, 0x08, 0x09, 0x0a, 0x0a, 0x0c, 0x0c, 0x0d
351};
352
353#define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
77332b21
MG
354
355/* Returns the string charname for a character if it exists, or NULL
356 otherwise. */
357const char *
358scm_i_charname (SCM chr)
359{
744c8724 360 size_t c;
77332b21 361 scm_t_uint32 i = SCM_CHAR (chr);
0f2d19dd 362
64bad3f5 363 for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
77332b21
MG
364 if (scm_r5rs_charnums[c] == i)
365 return scm_r5rs_charnames[c];
0f2d19dd 366
64bad3f5 367 for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
77332b21
MG
368 if (scm_C0_control_charnums[c] == i)
369 return scm_C0_control_charnames[c];
370
64bad3f5 371 for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
77332b21
MG
372 if (scm_alt_charnums[c] == i)
373 return scm_alt_charnames[i];
0f2d19dd 374
77332b21
MG
375 return NULL;
376}
377
378/* Return a character from a string charname. */
379SCM
380scm_i_charname_to_char (const char *charname, size_t charname_len)
381{
744c8724 382 size_t c;
77332b21
MG
383
384 /* The R5RS charnames. These are supposed to be case
385 insensitive. */
64bad3f5 386 for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
77332b21
MG
387 if ((strlen (scm_r5rs_charnames[c]) == charname_len)
388 && (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len)))
389 return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
390
391 /* Then come the controls. These are not case sensitive. */
64bad3f5 392 for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
77332b21
MG
393 if ((strlen (scm_C0_control_charnames[c]) == charname_len)
394 && (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len)))
395 return SCM_MAKE_CHAR (scm_C0_control_charnums[c]);
396
397 /* Lastly are some old names carried over for compatibility. */
64bad3f5 398 for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
77332b21
MG
399 if ((strlen (scm_alt_charnames[c]) == charname_len)
400 && (!strncasecmp (scm_alt_charnames[c], charname, charname_len)))
401 return SCM_MAKE_CHAR (scm_alt_charnums[c]);
64bad3f5 402
77332b21
MG
403 return SCM_BOOL_F;
404}
0f2d19dd
JB
405
406\f
407
1cc91f1b 408
0f2d19dd
JB
409void
410scm_init_chars ()
0f2d19dd 411{
a0599745 412#include "libguile/chars.x"
0f2d19dd
JB
413}
414
89e00824
ML
415
416/*
417 Local Variables:
418 c-file-style: "gnu"
419 End:
420*/