Merge branch 'ossau-gds-dev'
[bpt/guile.git] / libguile / chars.c
1 /* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
17 */
18
19
20 \f
21 #ifdef HAVE_CONFIG_H
22 # include <config.h>
23 #endif
24
25 #include <ctype.h>
26 #include <limits.h>
27 #include <unicase.h>
28
29 #include "libguile/_scm.h"
30 #include "libguile/validate.h"
31
32 #include "libguile/chars.h"
33 #include "libguile/srfi-14.h"
34
35 \f
36
37 SCM_DEFINE (scm_char_p, "char?", 1, 0, 0,
38 (SCM x),
39 "Return @code{#t} iff @var{x} is a character, else @code{#f}.")
40 #define FUNC_NAME s_scm_char_p
41 {
42 return scm_from_bool (SCM_CHARP(x));
43 }
44 #undef FUNC_NAME
45
46 SCM_DEFINE1 (scm_char_eq_p, "char=?", scm_tc7_rpsubr,
47 (SCM x, SCM y),
48 "Return @code{#t} iff @var{x} is the same character as @var{y}, else @code{#f}.")
49 #define FUNC_NAME s_scm_char_eq_p
50 {
51 SCM_VALIDATE_CHAR (1, x);
52 SCM_VALIDATE_CHAR (2, y);
53 return scm_from_bool (scm_is_eq (x, y));
54 }
55 #undef FUNC_NAME
56
57
58 SCM_DEFINE1 (scm_char_less_p, "char<?", scm_tc7_rpsubr,
59 (SCM x, SCM y),
60 "Return @code{#t} iff @var{x} is less than @var{y} in the Unicode sequence,\n"
61 "else @code{#f}.")
62 #define FUNC_NAME s_scm_char_less_p
63 {
64 SCM_VALIDATE_CHAR (1, x);
65 SCM_VALIDATE_CHAR (2, y);
66 return scm_from_bool (SCM_CHAR(x) < SCM_CHAR(y));
67 }
68 #undef FUNC_NAME
69
70 SCM_DEFINE1 (scm_char_leq_p, "char<=?", scm_tc7_rpsubr,
71 (SCM x, SCM y),
72 "Return @code{#t} iff @var{x} is less than or equal to @var{y} in the\n"
73 "Unicode sequence, else @code{#f}.")
74 #define FUNC_NAME s_scm_char_leq_p
75 {
76 SCM_VALIDATE_CHAR (1, x);
77 SCM_VALIDATE_CHAR (2, y);
78 return scm_from_bool (SCM_CHAR(x) <= SCM_CHAR(y));
79 }
80 #undef FUNC_NAME
81
82 SCM_DEFINE1 (scm_char_gr_p, "char>?", scm_tc7_rpsubr,
83 (SCM x, SCM y),
84 "Return @code{#t} iff @var{x} is greater than @var{y} in the Unicode\n"
85 "sequence, else @code{#f}.")
86 #define FUNC_NAME s_scm_char_gr_p
87 {
88 SCM_VALIDATE_CHAR (1, x);
89 SCM_VALIDATE_CHAR (2, y);
90 return scm_from_bool (SCM_CHAR(x) > SCM_CHAR(y));
91 }
92 #undef FUNC_NAME
93
94 SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr,
95 (SCM x, SCM y),
96 "Return @code{#t} iff @var{x} is greater than or equal to @var{y} in the\n"
97 "Unicode sequence, else @code{#f}.")
98 #define FUNC_NAME s_scm_char_geq_p
99 {
100 SCM_VALIDATE_CHAR (1, x);
101 SCM_VALIDATE_CHAR (2, y);
102 return scm_from_bool (SCM_CHAR(x) >= SCM_CHAR(y));
103 }
104 #undef FUNC_NAME
105
106 SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr,
107 (SCM x, SCM y),
108 "Return @code{#t} iff @var{x} is the same character as @var{y} ignoring\n"
109 "case, else @code{#f}. Case is locale free and not context sensitive.")
110 #define FUNC_NAME s_scm_char_ci_eq_p
111 {
112 SCM_VALIDATE_CHAR (1, x);
113 SCM_VALIDATE_CHAR (2, y);
114 return scm_from_bool (scm_c_upcase(SCM_CHAR(x))==scm_c_upcase(SCM_CHAR(y)));
115 }
116 #undef FUNC_NAME
117
118 SCM_DEFINE1 (scm_char_ci_less_p, "char-ci<?", scm_tc7_rpsubr,
119 (SCM x, SCM y),
120 "Return @code{#t} iff the Unicode uppercase form of @var{x} is less\n"
121 "than the Unicode uppercase form @var{y} in the Unicode sequence,\n"
122 "else @code{#f}.")
123 #define FUNC_NAME s_scm_char_ci_less_p
124 {
125 SCM_VALIDATE_CHAR (1, x);
126 SCM_VALIDATE_CHAR (2, y);
127 return scm_from_bool ((scm_c_upcase(SCM_CHAR(x))) < scm_c_upcase(SCM_CHAR(y)));
128 }
129 #undef FUNC_NAME
130
131 SCM_DEFINE1 (scm_char_ci_leq_p, "char-ci<=?", scm_tc7_rpsubr,
132 (SCM x, SCM y),
133 "Return @code{#t} iff the Unicode uppercase form of @var{x} is less\n"
134 "than or equal to the Unicode uppercase form of @var{y} in the\n"
135 "Unicode sequence, else @code{#f}.")
136 #define FUNC_NAME s_scm_char_ci_leq_p
137 {
138 SCM_VALIDATE_CHAR (1, x);
139 SCM_VALIDATE_CHAR (2, y);
140 return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) <= scm_c_upcase(SCM_CHAR(y)));
141 }
142 #undef FUNC_NAME
143
144 SCM_DEFINE1 (scm_char_ci_gr_p, "char-ci>?", scm_tc7_rpsubr,
145 (SCM x, SCM y),
146 "Return @code{#t} iff the Unicode uppercase form of @var{x} is greater\n"
147 "than the Unicode uppercase form of @var{y} in the Unicode\n"
148 "sequence, else @code{#f}.")
149 #define FUNC_NAME s_scm_char_ci_gr_p
150 {
151 SCM_VALIDATE_CHAR (1, x);
152 SCM_VALIDATE_CHAR (2, y);
153 return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) > scm_c_upcase(SCM_CHAR(y)));
154 }
155 #undef FUNC_NAME
156
157 SCM_DEFINE1 (scm_char_ci_geq_p, "char-ci>=?", scm_tc7_rpsubr,
158 (SCM x, SCM y),
159 "Return @code{#t} iff the Unicode uppercase form of @var{x} is greater\n"
160 "than or equal to the Unicode uppercase form of @var{y} in the\n"
161 "Unicode sequence, else @code{#f}.")
162 #define FUNC_NAME s_scm_char_ci_geq_p
163 {
164 SCM_VALIDATE_CHAR (1, x);
165 SCM_VALIDATE_CHAR (2, y);
166 return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) >= scm_c_upcase(SCM_CHAR(y)));
167 }
168 #undef FUNC_NAME
169
170
171 SCM_DEFINE (scm_char_alphabetic_p, "char-alphabetic?", 1, 0, 0,
172 (SCM chr),
173 "Return @code{#t} iff @var{chr} is alphabetic, else @code{#f}.\n")
174 #define FUNC_NAME s_scm_char_alphabetic_p
175 {
176 return scm_char_set_contains_p (scm_char_set_letter, chr);
177 }
178 #undef FUNC_NAME
179
180 SCM_DEFINE (scm_char_numeric_p, "char-numeric?", 1, 0, 0,
181 (SCM chr),
182 "Return @code{#t} iff @var{chr} is numeric, else @code{#f}.\n")
183 #define FUNC_NAME s_scm_char_numeric_p
184 {
185 return scm_char_set_contains_p (scm_char_set_digit, chr);
186 }
187 #undef FUNC_NAME
188
189 SCM_DEFINE (scm_char_whitespace_p, "char-whitespace?", 1, 0, 0,
190 (SCM chr),
191 "Return @code{#t} iff @var{chr} is whitespace, else @code{#f}.\n")
192 #define FUNC_NAME s_scm_char_whitespace_p
193 {
194 return scm_char_set_contains_p (scm_char_set_whitespace, chr);
195 }
196 #undef FUNC_NAME
197
198
199
200 SCM_DEFINE (scm_char_upper_case_p, "char-upper-case?", 1, 0, 0,
201 (SCM chr),
202 "Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.\n")
203 #define FUNC_NAME s_scm_char_upper_case_p
204 {
205 return scm_char_set_contains_p (scm_char_set_upper_case, chr);
206 }
207 #undef FUNC_NAME
208
209
210 SCM_DEFINE (scm_char_lower_case_p, "char-lower-case?", 1, 0, 0,
211 (SCM chr),
212 "Return @code{#t} iff @var{chr} is lowercase, else @code{#f}.\n")
213 #define FUNC_NAME s_scm_char_lower_case_p
214 {
215 return scm_char_set_contains_p (scm_char_set_lower_case, chr);
216 }
217 #undef FUNC_NAME
218
219
220
221 SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0,
222 (SCM chr),
223 "Return @code{#t} iff @var{chr} is either uppercase or lowercase, else @code{#f}.\n")
224 #define FUNC_NAME s_scm_char_is_both_p
225 {
226 if (scm_is_true (scm_char_set_contains_p (scm_char_set_lower_case, chr)))
227 return SCM_BOOL_T;
228 return scm_char_set_contains_p (scm_char_set_upper_case, chr);
229 }
230 #undef FUNC_NAME
231
232
233
234
235 SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0,
236 (SCM chr),
237 "Return the number corresponding to ordinal position of @var{chr} in the\n"
238 "ASCII sequence.")
239 #define FUNC_NAME s_scm_char_to_integer
240 {
241 SCM_VALIDATE_CHAR (1, chr);
242 return scm_from_uint32 (SCM_CHAR(chr));
243 }
244 #undef FUNC_NAME
245
246
247
248 SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0,
249 (SCM n),
250 "Return the character at position @var{n} in the ASCII sequence.")
251 #define FUNC_NAME s_scm_integer_to_char
252 {
253 scm_t_wchar cn;
254
255 cn = scm_to_wchar (n);
256
257 /* Avoid the surrogates. */
258 if (!SCM_IS_UNICODE_CHAR (cn))
259 scm_out_of_range (FUNC_NAME, n);
260
261 return SCM_MAKE_CHAR (cn);
262 }
263 #undef FUNC_NAME
264
265
266 SCM_DEFINE (scm_char_upcase, "char-upcase", 1, 0, 0,
267 (SCM chr),
268 "Return the uppercase character version of @var{chr}.")
269 #define FUNC_NAME s_scm_char_upcase
270 {
271 SCM_VALIDATE_CHAR (1, chr);
272 return SCM_MAKE_CHAR (scm_c_upcase (SCM_CHAR (chr)));
273 }
274 #undef FUNC_NAME
275
276
277 SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0,
278 (SCM chr),
279 "Return the lowercase character version of @var{chr}.")
280 #define FUNC_NAME s_scm_char_downcase
281 {
282 SCM_VALIDATE_CHAR (1, chr);
283 return SCM_MAKE_CHAR (scm_c_downcase (SCM_CHAR(chr)));
284 }
285 #undef FUNC_NAME
286
287 \f
288
289
290
291 /*
292 TODO: change name to scm_i_.. ? --hwn
293 */
294
295
296 scm_t_wchar
297 scm_c_upcase (scm_t_wchar c)
298 {
299 return uc_toupper ((int) c);
300 }
301
302
303 scm_t_wchar
304 scm_c_downcase (scm_t_wchar c)
305 {
306 return uc_tolower ((int) c);
307 }
308
309 \f
310
311 /* There are a few sets of character names: R5RS, Guile
312 extensions for control characters, and leftover Guile extensions.
313 They are listed in order of precedence. */
314
315 static const char *const scm_r5rs_charnames[] = {
316 "space", "newline"
317 };
318
319 static const scm_t_uint32 const scm_r5rs_charnums[] = {
320 0x20, 0x0A
321 };
322
323 #define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *))
324
325 /* The abbreviated names for control characters. */
326 static const char *const scm_C0_control_charnames[] = {
327 /* C0 controls */
328 "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
329 "bs", "ht", "lf", "vt", "ff", "cr", "so", "si",
330 "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
331 "can", "em", "sub", "esc", "fs", "gs", "rs", "us",
332 "sp", "del"
333 };
334
335 static const scm_t_uint32 const scm_C0_control_charnums[] = {
336 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
337 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
338 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
339 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
340 0x20, 0x7f
341 };
342
343 #define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *))
344
345 static const char *const scm_alt_charnames[] = {
346 "null", "backspace", "tab", "nl", "newline", "np", "page", "return",
347 };
348
349 static const scm_t_uint32 const scm_alt_charnums[] = {
350 0x00, 0x08, 0x09, 0x0a, 0x0a, 0x0c, 0x0c, 0x0d
351 };
352
353 #define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
354
355 /* Returns the string charname for a character if it exists, or NULL
356 otherwise. */
357 const char *
358 scm_i_charname (SCM chr)
359 {
360 size_t c;
361 scm_t_uint32 i = SCM_CHAR (chr);
362
363 for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
364 if (scm_r5rs_charnums[c] == i)
365 return scm_r5rs_charnames[c];
366
367 for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
368 if (scm_C0_control_charnums[c] == i)
369 return scm_C0_control_charnames[c];
370
371 for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
372 if (scm_alt_charnums[c] == i)
373 return scm_alt_charnames[i];
374
375 return NULL;
376 }
377
378 /* Return a character from a string charname. */
379 SCM
380 scm_i_charname_to_char (const char *charname, size_t charname_len)
381 {
382 size_t c;
383
384 /* The R5RS charnames. These are supposed to be case
385 insensitive. */
386 for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
387 if ((strlen (scm_r5rs_charnames[c]) == charname_len)
388 && (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len)))
389 return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
390
391 /* Then come the controls. These are not case sensitive. */
392 for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
393 if ((strlen (scm_C0_control_charnames[c]) == charname_len)
394 && (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len)))
395 return SCM_MAKE_CHAR (scm_C0_control_charnums[c]);
396
397 /* Lastly are some old names carried over for compatibility. */
398 for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
399 if ((strlen (scm_alt_charnames[c]) == charname_len)
400 && (!strncasecmp (scm_alt_charnames[c], charname, charname_len)))
401 return SCM_MAKE_CHAR (scm_alt_charnums[c]);
402
403 return SCM_BOOL_F;
404 }
405
406 \f
407
408
409 void
410 scm_init_chars ()
411 {
412 #include "libguile/chars.x"
413 }
414
415
416 /*
417 Local Variables:
418 c-file-style: "gnu"
419 End:
420 */