Avoid unitialized and unused warnings in scm_string_append
[bpt/guile.git] / libguile / chars.c
CommitLineData
904a78f1 1/* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e 3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
0f2d19dd 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd 20\f
dbb605f5
LC
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
0f2d19dd 24
0f2d19dd 25#include <ctype.h>
465380de 26#include <limits.h>
904a78f1
MG
27#include <unicase.h>
28
a0599745
MD
29#include "libguile/_scm.h"
30#include "libguile/validate.h"
0f2d19dd 31
a0599745 32#include "libguile/chars.h"
1fdbbd4c
MV
33#include "libguile/srfi-14.h"
34
0f2d19dd
JB
35\f
36
a1ec6916 37SCM_DEFINE (scm_char_p, "char?", 1, 0, 0,
1bbd0b84 38 (SCM x),
cdbc7418 39 "Return @code{#t} iff @var{x} is a character, else @code{#f}.")
1bbd0b84 40#define FUNC_NAME s_scm_char_p
0f2d19dd 41{
7888309b 42 return scm_from_bool (SCM_CHARP(x));
0f2d19dd 43}
1bbd0b84 44#undef FUNC_NAME
0f2d19dd 45
c3ee7520 46SCM_DEFINE1 (scm_char_eq_p, "char=?", scm_tc7_rpsubr,
1bbd0b84 47 (SCM x, SCM y),
cdbc7418 48 "Return @code{#t} iff @var{x} is the same character as @var{y}, else @code{#f}.")
1bbd0b84 49#define FUNC_NAME s_scm_char_eq_p
0f2d19dd 50{
362306b9
DH
51 SCM_VALIDATE_CHAR (1, x);
52 SCM_VALIDATE_CHAR (2, y);
bc36d050 53 return scm_from_bool (scm_is_eq (x, y));
0f2d19dd 54}
1bbd0b84 55#undef FUNC_NAME
0f2d19dd
JB
56
57
c3ee7520 58SCM_DEFINE1 (scm_char_less_p, "char<?", scm_tc7_rpsubr,
1bbd0b84 59 (SCM x, SCM y),
904a78f1 60 "Return @code{#t} iff @var{x} is less than @var{y} in the Unicode sequence,\n"
cdbc7418 61 "else @code{#f}.")
1bbd0b84 62#define FUNC_NAME s_scm_char_less_p
0f2d19dd 63{
34d19ef6
HWN
64 SCM_VALIDATE_CHAR (1, x);
65 SCM_VALIDATE_CHAR (2, y);
7888309b 66 return scm_from_bool (SCM_CHAR(x) < SCM_CHAR(y));
0f2d19dd 67}
1bbd0b84 68#undef FUNC_NAME
0f2d19dd 69
c3ee7520 70SCM_DEFINE1 (scm_char_leq_p, "char<=?", scm_tc7_rpsubr,
1bbd0b84 71 (SCM x, SCM y),
cdbc7418 72 "Return @code{#t} iff @var{x} is less than or equal to @var{y} in the\n"
904a78f1 73 "Unicode sequence, else @code{#f}.")
1bbd0b84 74#define FUNC_NAME s_scm_char_leq_p
0f2d19dd 75{
34d19ef6
HWN
76 SCM_VALIDATE_CHAR (1, x);
77 SCM_VALIDATE_CHAR (2, y);
7888309b 78 return scm_from_bool (SCM_CHAR(x) <= SCM_CHAR(y));
0f2d19dd 79}
1bbd0b84 80#undef FUNC_NAME
0f2d19dd 81
c3ee7520 82SCM_DEFINE1 (scm_char_gr_p, "char>?", scm_tc7_rpsubr,
1bbd0b84 83 (SCM x, SCM y),
904a78f1 84 "Return @code{#t} iff @var{x} is greater than @var{y} in the Unicode\n"
cdbc7418 85 "sequence, else @code{#f}.")
1bbd0b84 86#define FUNC_NAME s_scm_char_gr_p
0f2d19dd 87{
34d19ef6
HWN
88 SCM_VALIDATE_CHAR (1, x);
89 SCM_VALIDATE_CHAR (2, y);
7888309b 90 return scm_from_bool (SCM_CHAR(x) > SCM_CHAR(y));
0f2d19dd 91}
1bbd0b84 92#undef FUNC_NAME
0f2d19dd 93
c3ee7520 94SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr,
1bbd0b84 95 (SCM x, SCM y),
cdbc7418 96 "Return @code{#t} iff @var{x} is greater than or equal to @var{y} in the\n"
904a78f1 97 "Unicode sequence, else @code{#f}.")
1bbd0b84 98#define FUNC_NAME s_scm_char_geq_p
0f2d19dd 99{
34d19ef6
HWN
100 SCM_VALIDATE_CHAR (1, x);
101 SCM_VALIDATE_CHAR (2, y);
7888309b 102 return scm_from_bool (SCM_CHAR(x) >= SCM_CHAR(y));
0f2d19dd 103}
1bbd0b84 104#undef FUNC_NAME
0f2d19dd 105
c3ee7520 106SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr,
1bbd0b84 107 (SCM x, SCM y),
cdbc7418 108 "Return @code{#t} iff @var{x} is the same character as @var{y} ignoring\n"
904a78f1 109 "case, else @code{#f}. Case is locale free and not context sensitive.")
1bbd0b84 110#define FUNC_NAME s_scm_char_ci_eq_p
0f2d19dd 111{
34d19ef6
HWN
112 SCM_VALIDATE_CHAR (1, x);
113 SCM_VALIDATE_CHAR (2, y);
7888309b 114 return scm_from_bool (scm_c_upcase(SCM_CHAR(x))==scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 115}
1bbd0b84 116#undef FUNC_NAME
0f2d19dd 117
c3ee7520 118SCM_DEFINE1 (scm_char_ci_less_p, "char-ci<?", scm_tc7_rpsubr,
1bbd0b84 119 (SCM x, SCM y),
904a78f1
MG
120 "Return @code{#t} iff the Unicode uppercase form of @var{x} is less\n"
121 "than the Unicode uppercase form @var{y} in the Unicode sequence,\n"
122 "else @code{#f}.")
1bbd0b84 123#define FUNC_NAME s_scm_char_ci_less_p
0f2d19dd 124{
34d19ef6
HWN
125 SCM_VALIDATE_CHAR (1, x);
126 SCM_VALIDATE_CHAR (2, y);
7888309b 127 return scm_from_bool ((scm_c_upcase(SCM_CHAR(x))) < scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 128}
1bbd0b84 129#undef FUNC_NAME
0f2d19dd 130
c3ee7520 131SCM_DEFINE1 (scm_char_ci_leq_p, "char-ci<=?", scm_tc7_rpsubr,
1bbd0b84 132 (SCM x, SCM y),
904a78f1
MG
133 "Return @code{#t} iff the Unicode uppercase form of @var{x} is less\n"
134 "than or equal to the Unicode uppercase form of @var{y} in the\n"
135 "Unicode sequence, else @code{#f}.")
1bbd0b84 136#define FUNC_NAME s_scm_char_ci_leq_p
0f2d19dd 137{
34d19ef6
HWN
138 SCM_VALIDATE_CHAR (1, x);
139 SCM_VALIDATE_CHAR (2, y);
7888309b 140 return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) <= scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 141}
1bbd0b84 142#undef FUNC_NAME
0f2d19dd 143
c3ee7520 144SCM_DEFINE1 (scm_char_ci_gr_p, "char-ci>?", scm_tc7_rpsubr,
1bbd0b84 145 (SCM x, SCM y),
904a78f1
MG
146 "Return @code{#t} iff the Unicode uppercase form of @var{x} is greater\n"
147 "than the Unicode uppercase form of @var{y} in the Unicode\n"
148 "sequence, else @code{#f}.")
1bbd0b84 149#define FUNC_NAME s_scm_char_ci_gr_p
0f2d19dd 150{
34d19ef6
HWN
151 SCM_VALIDATE_CHAR (1, x);
152 SCM_VALIDATE_CHAR (2, y);
7888309b 153 return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) > scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 154}
1bbd0b84 155#undef FUNC_NAME
0f2d19dd 156
c3ee7520 157SCM_DEFINE1 (scm_char_ci_geq_p, "char-ci>=?", scm_tc7_rpsubr,
1bbd0b84 158 (SCM x, SCM y),
904a78f1
MG
159 "Return @code{#t} iff the Unicode uppercase form of @var{x} is greater\n"
160 "than or equal to the Unicode uppercase form of @var{y} in the\n"
161 "Unicode sequence, else @code{#f}.")
1bbd0b84 162#define FUNC_NAME s_scm_char_ci_geq_p
0f2d19dd 163{
34d19ef6
HWN
164 SCM_VALIDATE_CHAR (1, x);
165 SCM_VALIDATE_CHAR (2, y);
7888309b 166 return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) >= scm_c_upcase(SCM_CHAR(y)));
0f2d19dd 167}
1bbd0b84 168#undef FUNC_NAME
0f2d19dd
JB
169
170
3b3b36dd 171SCM_DEFINE (scm_char_alphabetic_p, "char-alphabetic?", 1, 0, 0,
1bbd0b84 172 (SCM chr),
1fdbbd4c 173 "Return @code{#t} iff @var{chr} is alphabetic, else @code{#f}.\n")
1bbd0b84 174#define FUNC_NAME s_scm_char_alphabetic_p
0f2d19dd 175{
1fdbbd4c 176 return scm_char_set_contains_p (scm_char_set_letter, chr);
0f2d19dd 177}
1bbd0b84 178#undef FUNC_NAME
0f2d19dd 179
3b3b36dd 180SCM_DEFINE (scm_char_numeric_p, "char-numeric?", 1, 0, 0,
1bbd0b84 181 (SCM chr),
1fdbbd4c 182 "Return @code{#t} iff @var{chr} is numeric, else @code{#f}.\n")
1bbd0b84 183#define FUNC_NAME s_scm_char_numeric_p
0f2d19dd 184{
1fdbbd4c 185 return scm_char_set_contains_p (scm_char_set_digit, chr);
0f2d19dd 186}
1bbd0b84 187#undef FUNC_NAME
0f2d19dd 188
3b3b36dd 189SCM_DEFINE (scm_char_whitespace_p, "char-whitespace?", 1, 0, 0,
1bbd0b84 190 (SCM chr),
1fdbbd4c 191 "Return @code{#t} iff @var{chr} is whitespace, else @code{#f}.\n")
1bbd0b84 192#define FUNC_NAME s_scm_char_whitespace_p
0f2d19dd 193{
1fdbbd4c 194 return scm_char_set_contains_p (scm_char_set_whitespace, chr);
0f2d19dd 195}
1bbd0b84 196#undef FUNC_NAME
0f2d19dd
JB
197
198
199
3b3b36dd 200SCM_DEFINE (scm_char_upper_case_p, "char-upper-case?", 1, 0, 0,
1bbd0b84 201 (SCM chr),
1fdbbd4c 202 "Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.\n")
1bbd0b84 203#define FUNC_NAME s_scm_char_upper_case_p
0f2d19dd 204{
1fdbbd4c 205 return scm_char_set_contains_p (scm_char_set_upper_case, chr);
0f2d19dd 206}
1bbd0b84 207#undef FUNC_NAME
0f2d19dd
JB
208
209
3b3b36dd 210SCM_DEFINE (scm_char_lower_case_p, "char-lower-case?", 1, 0, 0,
1bbd0b84 211 (SCM chr),
1fdbbd4c 212 "Return @code{#t} iff @var{chr} is lowercase, else @code{#f}.\n")
1bbd0b84 213#define FUNC_NAME s_scm_char_lower_case_p
0f2d19dd 214{
1fdbbd4c 215 return scm_char_set_contains_p (scm_char_set_lower_case, chr);
0f2d19dd 216}
1bbd0b84 217#undef FUNC_NAME
0f2d19dd
JB
218
219
220
a1ec6916 221SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0,
1bbd0b84 222 (SCM chr),
1fdbbd4c 223 "Return @code{#t} iff @var{chr} is either uppercase or lowercase, else @code{#f}.\n")
1bbd0b84 224#define FUNC_NAME s_scm_char_is_both_p
0f2d19dd 225{
1fdbbd4c
MV
226 if (scm_is_true (scm_char_set_contains_p (scm_char_set_lower_case, chr)))
227 return SCM_BOOL_T;
228 return scm_char_set_contains_p (scm_char_set_upper_case, chr);
0f2d19dd 229}
1bbd0b84 230#undef FUNC_NAME
0f2d19dd
JB
231
232
233
234
a1ec6916 235SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0,
1bbd0b84 236 (SCM chr),
cdbc7418
NJ
237 "Return the number corresponding to ordinal position of @var{chr} in the\n"
238 "ASCII sequence.")
1bbd0b84 239#define FUNC_NAME s_scm_char_to_integer
0f2d19dd 240{
34d19ef6 241 SCM_VALIDATE_CHAR (1, chr);
904a78f1 242 return scm_from_uint32 (SCM_CHAR(chr));
0f2d19dd 243}
1bbd0b84 244#undef FUNC_NAME
0f2d19dd
JB
245
246
247
3b3b36dd 248SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0,
1bbd0b84 249 (SCM n),
cdbc7418 250 "Return the character at position @var{n} in the ASCII sequence.")
1bbd0b84 251#define FUNC_NAME s_scm_integer_to_char
0f2d19dd 252{
904a78f1
MG
253 scm_t_wchar cn;
254
255 cn = scm_to_wchar (n);
256
257 /* Avoid the surrogates. */
258 if (!SCM_IS_UNICODE_CHAR (cn))
259 scm_out_of_range (FUNC_NAME, n);
260
261 return SCM_MAKE_CHAR (cn);
0f2d19dd 262}
1bbd0b84 263#undef FUNC_NAME
0f2d19dd
JB
264
265
3b3b36dd 266SCM_DEFINE (scm_char_upcase, "char-upcase", 1, 0, 0,
1bbd0b84 267 (SCM chr),
cdbc7418 268 "Return the uppercase character version of @var{chr}.")
1bbd0b84 269#define FUNC_NAME s_scm_char_upcase
0f2d19dd 270{
34d19ef6 271 SCM_VALIDATE_CHAR (1, chr);
904a78f1 272 return SCM_MAKE_CHAR (scm_c_upcase (SCM_CHAR (chr)));
0f2d19dd 273}
1bbd0b84 274#undef FUNC_NAME
0f2d19dd
JB
275
276
3b3b36dd 277SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0,
1bbd0b84 278 (SCM chr),
cdbc7418 279 "Return the lowercase character version of @var{chr}.")
1bbd0b84 280#define FUNC_NAME s_scm_char_downcase
0f2d19dd 281{
34d19ef6 282 SCM_VALIDATE_CHAR (1, chr);
904a78f1 283 return SCM_MAKE_CHAR (scm_c_downcase (SCM_CHAR(chr)));
0f2d19dd 284}
1bbd0b84 285#undef FUNC_NAME
0f2d19dd
JB
286
287\f
288
289
290
84fad130
HWN
291/*
292TODO: change name to scm_i_.. ? --hwn
293*/
294
1cc91f1b 295
904a78f1
MG
296scm_t_wchar
297scm_c_upcase (scm_t_wchar c)
0f2d19dd 298{
4cd00cc6
MG
299 if (c > 255)
300 return c;
301
302 return toupper ((int) c);
0f2d19dd
JB
303}
304
1cc91f1b 305
904a78f1
MG
306scm_t_wchar
307scm_c_downcase (scm_t_wchar c)
0f2d19dd 308{
4cd00cc6
MG
309 if (c > 255)
310 return c;
311
312 return tolower ((int) c);
0f2d19dd
JB
313}
314
77332b21 315\f
0f2d19dd 316
77332b21
MG
317/* There are a few sets of character names: R5RS, Guile
318 extensions for control characters, and leftover Guile extensions.
319 They are listed in order of precedence. */
320
64bad3f5
MG
321static const char *const scm_r5rs_charnames[] = {
322 "space", "newline"
323};
77332b21 324
64bad3f5
MG
325static const scm_t_uint32 const scm_r5rs_charnums[] = {
326 0x20, 0x0A
327};
77332b21 328
64bad3f5 329#define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *))
77332b21
MG
330
331/* The abbreviated names for control characters. */
64bad3f5
MG
332static const char *const scm_C0_control_charnames[] = {
333 /* C0 controls */
334 "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
335 "bs", "ht", "lf", "vt", "ff", "cr", "so", "si",
336 "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
337 "can", "em", "sub", "esc", "fs", "gs", "rs", "us",
338 "sp", "del"
339};
340
341static const scm_t_uint32 const scm_C0_control_charnums[] = {
342 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
343 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
344 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
345 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
346 0x20, 0x7f
347};
348
349#define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *))
350
351static const char *const scm_alt_charnames[] = {
352 "null", "backspace", "tab", "nl", "newline", "np", "page", "return",
353};
354
355static const scm_t_uint32 const scm_alt_charnums[] = {
356 0x00, 0x08, 0x09, 0x0a, 0x0a, 0x0c, 0x0c, 0x0d
357};
358
359#define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
77332b21
MG
360
361/* Returns the string charname for a character if it exists, or NULL
362 otherwise. */
363const char *
364scm_i_charname (SCM chr)
365{
366 int c;
367 scm_t_uint32 i = SCM_CHAR (chr);
0f2d19dd 368
64bad3f5 369 for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
77332b21
MG
370 if (scm_r5rs_charnums[c] == i)
371 return scm_r5rs_charnames[c];
0f2d19dd 372
64bad3f5 373 for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
77332b21
MG
374 if (scm_C0_control_charnums[c] == i)
375 return scm_C0_control_charnames[c];
376
64bad3f5 377 for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
77332b21
MG
378 if (scm_alt_charnums[c] == i)
379 return scm_alt_charnames[i];
0f2d19dd 380
77332b21
MG
381 return NULL;
382}
383
384/* Return a character from a string charname. */
385SCM
386scm_i_charname_to_char (const char *charname, size_t charname_len)
387{
388 int c;
389
390 /* The R5RS charnames. These are supposed to be case
391 insensitive. */
64bad3f5 392 for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
77332b21
MG
393 if ((strlen (scm_r5rs_charnames[c]) == charname_len)
394 && (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len)))
395 return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
396
397 /* Then come the controls. These are not case sensitive. */
64bad3f5 398 for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
77332b21
MG
399 if ((strlen (scm_C0_control_charnames[c]) == charname_len)
400 && (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len)))
401 return SCM_MAKE_CHAR (scm_C0_control_charnums[c]);
402
403 /* Lastly are some old names carried over for compatibility. */
64bad3f5 404 for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
77332b21
MG
405 if ((strlen (scm_alt_charnames[c]) == charname_len)
406 && (!strncasecmp (scm_alt_charnames[c], charname, charname_len)))
407 return SCM_MAKE_CHAR (scm_alt_charnums[c]);
64bad3f5 408
77332b21
MG
409 return SCM_BOOL_F;
410}
0f2d19dd
JB
411
412\f
413
1cc91f1b 414
0f2d19dd
JB
415void
416scm_init_chars ()
0f2d19dd 417{
a0599745 418#include "libguile/chars.x"
0f2d19dd
JB
419}
420
89e00824
ML
421
422/*
423 Local Variables:
424 c-file-style: "gnu"
425 End:
426*/