libguile/chars.c

   1 /*      Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
   2  *
   3  * This library is free software; you can redistribute it and/or
   4  * modify it under the terms of the GNU Lesser General Public License
   5  * as published by the Free Software Foundation; either version 3 of
   6  * the License, or (at your option) any later version.
   7  *
   8  * This library is distributed in the hope that it will be useful, but
   9  * WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11  * Lesser General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU Lesser General Public
  14  * License along with this library; if not, write to the Free Software
  15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16  * 02110-1301 USA
  17  */
  18
  19
  20 \f
  21 #ifdef HAVE_CONFIG_H
  22 # include <config.h>
  23 #endif
  24
  25 #include <ctype.h>
  26 #include <limits.h>
  27 #include <unicase.h>
  28 #include <unictype.h>
  29
  30 #include "libguile/_scm.h"
  31 #include "libguile/validate.h"
  32
  33 #include "libguile/chars.h"
  34 #include "libguile/srfi-14.h"
  35
  36 \f
  37
  38 SCM_DEFINE (scm_char_p, "char?", 1, 0, 0,
  39             (SCM x),
  40             "Return @code{#t} iff @var{x} is a character, else @code{#f}.")
  41 #define FUNC_NAME s_scm_char_p
  42 {
  43   return scm_from_bool (SCM_CHARP(x));
  44 }
  45 #undef FUNC_NAME
  46
  47 static SCM scm_i_char_eq_p (SCM x, SCM y, SCM rest);
  48 SCM_DEFINE (scm_i_char_eq_p, "char=?", 0, 2, 1,
  49             (SCM x, SCM y, SCM rest),
  50             "Return @code{#t} if the Unicode code point of @var{x} is equal to the\n"
  51             "code point of @var{y}, else @code{#f}.\n")
  52 #define FUNC_NAME s_scm_i_char_eq_p
  53 {
  54   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  55     return SCM_BOOL_T;
  56   while (!scm_is_null (rest))
  57     {
  58       if (scm_is_false (scm_char_eq_p (x, y)))
  59         return SCM_BOOL_F;
  60       x = y;
  61       y = scm_car (rest);
  62       rest = scm_cdr (rest);
  63     }
  64   return scm_char_eq_p (x, y);
  65 }
  66 #undef FUNC_NAME
  67
  68 SCM scm_char_eq_p (SCM x, SCM y)
  69 #define FUNC_NAME s_scm_i_char_eq_p
  70 {
  71   SCM_VALIDATE_CHAR (1, x);
  72   SCM_VALIDATE_CHAR (2, y);
  73   return scm_from_bool (scm_is_eq (x, y));
  74 }
  75 #undef FUNC_NAME
  76
  77
  78 static SCM scm_i_char_less_p (SCM x, SCM y, SCM rest);
  79 SCM_DEFINE (scm_i_char_less_p, "char<?", 0, 2, 1,
  80             (SCM x, SCM y, SCM rest),
  81             "Return @code{#t} iff the code point of @var{x} is less than the code\n"
  82             "point of @var{y}, else @code{#f}.")
  83 #define FUNC_NAME s_scm_i_char_less_p
  84 {
  85   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  86     return SCM_BOOL_T;
  87   while (!scm_is_null (rest))
  88     {
  89       if (scm_is_false (scm_char_less_p (x, y)))
  90         return SCM_BOOL_F;
  91       x = y;
  92       y = scm_car (rest);
  93       rest = scm_cdr (rest);
  94     }
  95   return scm_char_less_p (x, y);
  96 }
  97 #undef FUNC_NAME
  98
  99 SCM scm_char_less_p (SCM x, SCM y)
 100 #define FUNC_NAME s_scm_i_char_less_p
 101 {
 102   SCM_VALIDATE_CHAR (1, x);
 103   SCM_VALIDATE_CHAR (2, y);
 104   return scm_from_bool (SCM_CHAR(x) < SCM_CHAR(y));
 105 }
 106 #undef FUNC_NAME
 107
 108 static SCM scm_i_char_leq_p (SCM x, SCM y, SCM rest);
 109 SCM_DEFINE (scm_i_char_leq_p, "char<=?", 0, 2, 1,
 110             (SCM x, SCM y, SCM rest),
 111             "Return @code{#t} if the Unicode code point of @var{x} is less than or\n"
 112             "equal to the code point of @var{y}, else @code{#f}.")
 113 #define FUNC_NAME s_scm_i_char_leq_p
 114 {
 115   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
 116     return SCM_BOOL_T;
 117   while (!scm_is_null (rest))
 118     {
 119       if (scm_is_false (scm_char_leq_p (x, y)))
 120         return SCM_BOOL_F;
 121       x = y;
 122       y = scm_car (rest);
 123       rest = scm_cdr (rest);
 124     }
 125   return scm_char_leq_p (x, y);
 126 }
 127 #undef FUNC_NAME
 128
 129 SCM scm_char_leq_p (SCM x, SCM y)
 130 #define FUNC_NAME s_scm_i_char_leq_p
 131 {
 132   SCM_VALIDATE_CHAR (1, x);
 133   SCM_VALIDATE_CHAR (2, y);
 134   return scm_from_bool (SCM_CHAR(x) <= SCM_CHAR(y));
 135 }
 136 #undef FUNC_NAME
 137
 138 static SCM scm_i_char_gr_p (SCM x, SCM y, SCM rest);
 139 SCM_DEFINE (scm_i_char_gr_p, "char>?", 0, 2, 1,
 140             (SCM x, SCM y, SCM rest),
 141             "Return @code{#t} if the Unicode code point of @var{x} is greater than\n"
 142             "the code point of @var{y}, else @code{#f}.")
 143 #define FUNC_NAME s_scm_i_char_gr_p
 144 {
 145   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
 146     return SCM_BOOL_T;
 147   while (!scm_is_null (rest))
 148     {
 149       if (scm_is_false (scm_char_gr_p (x, y)))
 150         return SCM_BOOL_F;
 151       x = y;
 152       y = scm_car (rest);
 153       rest = scm_cdr (rest);
 154     }
 155   return scm_char_gr_p (x, y);
 156 }
 157 #undef FUNC_NAME
 158
 159 SCM scm_char_gr_p (SCM x, SCM y)
 160 #define FUNC_NAME s_scm_i_char_gr_p
 161 {
 162   SCM_VALIDATE_CHAR (1, x);
 163   SCM_VALIDATE_CHAR (2, y);
 164   return scm_from_bool (SCM_CHAR(x) > SCM_CHAR(y));
 165 }
 166 #undef FUNC_NAME
 167
 168 static SCM scm_i_char_geq_p (SCM x, SCM y, SCM rest);
 169 SCM_DEFINE (scm_i_char_geq_p, "char>=?", 0, 2, 1,
 170             (SCM x, SCM y, SCM rest),
 171             "Return @code{#t} if the Unicode code point of @var{x} is greater than\n"
 172             "or equal to the code point of @var{y}, else @code{#f}.")
 173 #define FUNC_NAME s_scm_i_char_geq_p
 174 {
 175   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
 176     return SCM_BOOL_T;
 177   while (!scm_is_null (rest))
 178     {
 179       if (scm_is_false (scm_char_geq_p (x, y)))
 180         return SCM_BOOL_F;
 181       x = y;
 182       y = scm_car (rest);
 183       rest = scm_cdr (rest);
 184     }
 185   return scm_char_geq_p (x, y);
 186 }
 187 #undef FUNC_NAME
 188
 189 SCM scm_char_geq_p (SCM x, SCM y)
 190 #define FUNC_NAME s_scm_i_char_geq_p
 191 {
 192   SCM_VALIDATE_CHAR (1, x);
 193   SCM_VALIDATE_CHAR (2, y);
 194   return scm_from_bool (SCM_CHAR(x) >= SCM_CHAR(y));
 195 }
 196 #undef FUNC_NAME
 197
 198 /* FIXME?: R6RS specifies that these comparisons are case-folded.
 199    This is the same thing as comparing the uppercase characters in
 200    practice, but, not in theory.  Unicode has table containing their
 201    definition of case-folded character mappings.  A more correct
 202    implementation would be to use that table and make a char-foldcase
 203    function.  */
 204
 205 static SCM scm_i_char_ci_eq_p (SCM x, SCM y, SCM rest);
 206 SCM_DEFINE (scm_i_char_ci_eq_p, "char-ci=?", 0, 2, 1,
 207             (SCM x, SCM y, SCM rest),
 208             "Return @code{#t} if the case-folded Unicode code point of @var{x} is\n"
 209             "the same as the case-folded code point of @var{y}, else @code{#f}.")
 210 #define FUNC_NAME s_scm_i_char_ci_eq_p
 211 {
 212   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
 213     return SCM_BOOL_T;
 214   while (!scm_is_null (rest))
 215     {
 216       if (scm_is_false (scm_char_ci_eq_p (x, y)))
 217         return SCM_BOOL_F;
 218       x = y;
 219       y = scm_car (rest);
 220       rest = scm_cdr (rest);
 221     }
 222   return scm_char_ci_eq_p (x, y);
 223 }
 224 #undef FUNC_NAME
 225
 226 SCM scm_char_ci_eq_p (SCM x, SCM y)
 227 #define FUNC_NAME s_scm_i_char_ci_eq_p
 228 {
 229   SCM_VALIDATE_CHAR (1, x);
 230   SCM_VALIDATE_CHAR (2, y);
 231   return scm_from_bool (scm_c_upcase(SCM_CHAR(x))==scm_c_upcase(SCM_CHAR(y)));
 232 }
 233 #undef FUNC_NAME
 234
 235 static SCM scm_i_char_ci_less_p (SCM x, SCM y, SCM rest);
 236 SCM_DEFINE (scm_i_char_ci_less_p, "char-ci<?", 0, 2, 1,
 237             (SCM x, SCM y, SCM rest),
 238             "Return @code{#t} if the case-folded Unicode code point of @var{x} is\n"
 239             "less than the case-folded code point of @var{y}, else @code{#f}.")
 240 #define FUNC_NAME s_scm_i_char_ci_less_p
 241 {
 242   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
 243     return SCM_BOOL_T;
 244   while (!scm_is_null (rest))
 245     {
 246       if (scm_is_false (scm_char_ci_less_p (x, y)))
 247         return SCM_BOOL_F;
 248       x = y;
 249       y = scm_car (rest);
 250       rest = scm_cdr (rest);
 251     }
 252   return scm_char_ci_less_p (x, y);
 253 }
 254 #undef FUNC_NAME
 255
 256 SCM scm_char_ci_less_p (SCM x, SCM y)
 257 #define FUNC_NAME s_scm_i_char_ci_less_p
 258 {
 259   SCM_VALIDATE_CHAR (1, x);
 260   SCM_VALIDATE_CHAR (2, y);
 261   return scm_from_bool ((scm_c_upcase(SCM_CHAR(x))) < scm_c_upcase(SCM_CHAR(y)));
 262 }
 263 #undef FUNC_NAME
 264
 265 static SCM scm_i_char_ci_leq_p (SCM x, SCM y, SCM rest);
 266 SCM_DEFINE (scm_i_char_ci_leq_p, "char-ci<=?", 0, 2, 1,
 267             (SCM x, SCM y, SCM rest),
 268             "Return @code{#t} iff the case-folded Unicode code point of @var{x} is\n"
 269             "less than or equal to the case-folded code point of @var{y}, else\n"
 270             "@code{#f}")
 271 #define FUNC_NAME s_scm_i_char_ci_leq_p
 272 {
 273   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
 274     return SCM_BOOL_T;
 275   while (!scm_is_null (rest))
 276     {
 277       if (scm_is_false (scm_char_ci_leq_p (x, y)))
 278         return SCM_BOOL_F;
 279       x = y;
 280       y = scm_car (rest);
 281       rest = scm_cdr (rest);
 282     }
 283   return scm_char_ci_leq_p (x, y);
 284 }
 285 #undef FUNC_NAME
 286
 287 SCM scm_char_ci_leq_p (SCM x, SCM y)
 288 #define FUNC_NAME s_scm_i_char_ci_leq_p
 289 {
 290   SCM_VALIDATE_CHAR (1, x);
 291   SCM_VALIDATE_CHAR (2, y);
 292   return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) <= scm_c_upcase(SCM_CHAR(y)));
 293 }
 294 #undef FUNC_NAME
 295
 296 static SCM scm_i_char_ci_gr_p (SCM x, SCM y, SCM rest);
 297 SCM_DEFINE (scm_i_char_ci_gr_p, "char-ci>?", 0, 2, 1,
 298             (SCM x, SCM y, SCM rest),
 299             "Return @code{#t} iff the case-folded code point of @var{x} is greater\n"
 300             "than the case-folded code point of @var{y}, else @code{#f}.")
 301 #define FUNC_NAME s_scm_i_char_ci_gr_p
 302 {
 303   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
 304     return SCM_BOOL_T;
 305   while (!scm_is_null (rest))
 306     {
 307       if (scm_is_false (scm_char_ci_gr_p (x, y)))
 308         return SCM_BOOL_F;
 309       x = y;
 310       y = scm_car (rest);
 311       rest = scm_cdr (rest);
 312     }
 313   return scm_char_ci_gr_p (x, y);
 314 }
 315 #undef FUNC_NAME
 316
 317 SCM scm_char_ci_gr_p (SCM x, SCM y)
 318 #define FUNC_NAME s_scm_i_char_ci_gr_p
 319 {
 320   SCM_VALIDATE_CHAR (1, x);
 321   SCM_VALIDATE_CHAR (2, y);
 322   return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) > scm_c_upcase(SCM_CHAR(y)));
 323 }
 324 #undef FUNC_NAME
 325
 326 static SCM scm_i_char_ci_geq_p (SCM x, SCM y, SCM rest);
 327 SCM_DEFINE (scm_i_char_ci_geq_p, "char-ci>=?", 0, 2, 1,
 328             (SCM x, SCM y, SCM rest),
 329             "Return @code{#t} iff the case-folded Unicode code point of @var{x} is\n"
 330             "greater than or equal to the case-folded code point of @var{y}, else\n"
 331             "@code{#f}.")
 332 #define FUNC_NAME s_scm_i_char_ci_geq_p
 333 {
 334   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
 335     return SCM_BOOL_T;
 336   while (!scm_is_null (rest))
 337     {
 338       if (scm_is_false (scm_char_ci_geq_p (x, y)))
 339         return SCM_BOOL_F;
 340       x = y;
 341       y = scm_car (rest);
 342       rest = scm_cdr (rest);
 343     }
 344   return scm_char_ci_geq_p (x, y);
 345 }
 346 #undef FUNC_NAME
 347
 348 SCM scm_char_ci_geq_p (SCM x, SCM y)
 349 #define FUNC_NAME s_scm_i_char_ci_geq_p
 350 {
 351   SCM_VALIDATE_CHAR (1, x);
 352   SCM_VALIDATE_CHAR (2, y);
 353   return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) >= scm_c_upcase(SCM_CHAR(y)));
 354 }
 355 #undef FUNC_NAME
 356
 357
 358 SCM_DEFINE (scm_char_alphabetic_p, "char-alphabetic?", 1, 0, 0,
 359            (SCM chr),
 360             "Return @code{#t} iff @var{chr} is alphabetic, else @code{#f}.\n")
 361 #define FUNC_NAME s_scm_char_alphabetic_p
 362 {
 363   return scm_char_set_contains_p (scm_char_set_letter, chr);
 364 }
 365 #undef FUNC_NAME
 366
 367 SCM_DEFINE (scm_char_numeric_p, "char-numeric?", 1, 0, 0,
 368            (SCM chr),
 369             "Return @code{#t} iff @var{chr} is numeric, else @code{#f}.\n")
 370 #define FUNC_NAME s_scm_char_numeric_p
 371 {
 372   return scm_char_set_contains_p (scm_char_set_digit, chr);
 373 }
 374 #undef FUNC_NAME
 375
 376 SCM_DEFINE (scm_char_whitespace_p, "char-whitespace?", 1, 0, 0,
 377            (SCM chr),
 378             "Return @code{#t} iff @var{chr} is whitespace, else @code{#f}.\n")
 379 #define FUNC_NAME s_scm_char_whitespace_p
 380 {
 381   return scm_char_set_contains_p (scm_char_set_whitespace, chr);
 382 }
 383 #undef FUNC_NAME
 384
 385
 386 SCM_DEFINE (scm_char_upper_case_p, "char-upper-case?", 1, 0, 0,
 387            (SCM chr),
 388             "Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.\n")
 389 #define FUNC_NAME s_scm_char_upper_case_p
 390 {
 391   return scm_char_set_contains_p (scm_char_set_upper_case, chr);
 392 }
 393 #undef FUNC_NAME
 394
 395
 396 SCM_DEFINE (scm_char_lower_case_p, "char-lower-case?", 1, 0, 0,
 397            (SCM chr),
 398             "Return @code{#t} iff @var{chr} is lowercase, else @code{#f}.\n")
 399 #define FUNC_NAME s_scm_char_lower_case_p
 400 {
 401   return scm_char_set_contains_p (scm_char_set_lower_case, chr);
 402 }
 403 #undef FUNC_NAME
 404
 405 SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0,
 406             (SCM chr),
 407             "Return @code{#t} iff @var{chr} is either uppercase or lowercase, else\n"
 408             "@code{#f}.\n")
 409 #define FUNC_NAME s_scm_char_is_both_p
 410 {
 411   if (scm_is_true (scm_char_set_contains_p (scm_char_set_lower_case, chr)))
 412     return SCM_BOOL_T;
 413   return scm_char_set_contains_p (scm_char_set_upper_case, chr);
 414 }
 415 #undef FUNC_NAME
 416
 417
 418 SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0,
 419             (SCM chr),
 420             "Return the Unicode code point of @var{chr}.")
 421 #define FUNC_NAME s_scm_char_to_integer
 422 {
 423   SCM_VALIDATE_CHAR (1, chr);
 424   return scm_from_uint32 (SCM_CHAR(chr));
 425 }
 426 #undef FUNC_NAME
 427
 428
 429 SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0,
 430            (SCM n),
 431             "Return the character that has Unicode code point @var{n}.  The integer\n"
 432             "@var{n} must be a valid code point.  Valid code points are in the\n"
 433             "ranges 0 to @code{#xD7FF} inclusive or @code{#xE000} to\n"
 434             "@code{#x10FFFF} inclusive.")
 435 #define FUNC_NAME s_scm_integer_to_char
 436 {
 437   scm_t_wchar cn;
 438
 439   cn = scm_to_wchar (n);
 440
 441   /* Avoid the surrogates.  */
 442   if (!SCM_IS_UNICODE_CHAR (cn))
 443     scm_out_of_range (FUNC_NAME, n);
 444
 445   return SCM_MAKE_CHAR (cn);
 446 }
 447 #undef FUNC_NAME
 448
 449
 450 SCM_DEFINE (scm_char_upcase, "char-upcase", 1, 0, 0,
 451            (SCM chr),
 452             "Return the uppercase character version of @var{chr}.")
 453 #define FUNC_NAME s_scm_char_upcase
 454 {
 455   SCM_VALIDATE_CHAR (1, chr);
 456   return SCM_MAKE_CHAR (scm_c_upcase (SCM_CHAR (chr)));
 457 }
 458 #undef FUNC_NAME
 459
 460
 461 SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0,
 462            (SCM chr),
 463             "Return the lowercase character version of @var{chr}.")
 464 #define FUNC_NAME s_scm_char_downcase
 465 {
 466   SCM_VALIDATE_CHAR (1, chr);
 467   return SCM_MAKE_CHAR (scm_c_downcase (SCM_CHAR(chr)));
 468 }
 469 #undef FUNC_NAME
 470
 471 SCM_DEFINE (scm_char_titlecase, "char-titlecase", 1, 0, 0,
 472            (SCM chr),
 473             "Return the titlecase character version of @var{chr}.")
 474 #define FUNC_NAME s_scm_char_titlecase
 475 {
 476   SCM_VALIDATE_CHAR (1, chr);
 477   return SCM_MAKE_CHAR (scm_c_titlecase (SCM_CHAR(chr)));
 478 }
 479 #undef FUNC_NAME
 480
 481 SCM_DEFINE (scm_char_general_category, "char-general-category", 1, 0, 0,
 482            (SCM chr),
 483             "Return a symbol representing the Unicode general category of "
 484             "@var{chr} or @code{#f} if a named category cannot be found.")
 485 #define FUNC_NAME s_scm_char_general_category
 486 {
 487   const char *sym;
 488   uc_general_category_t cat;
 489
 490   SCM_VALIDATE_CHAR (1, chr);
 491   cat = uc_general_category (SCM_CHAR (chr));
 492   sym = uc_general_category_name (cat);
 493
 494   if (sym != NULL)
 495     return scm_from_utf8_symbol (sym);
 496   return SCM_BOOL_F;
 497 }
 498 #undef FUNC_NAME
 499
 500 \f
 501
 502
 503
 504 /*
 505 TODO: change name  to scm_i_.. ? --hwn
 506 */
 507
 508
 509 scm_t_wchar
 510 scm_c_upcase (scm_t_wchar c)
 511 {
 512   return uc_toupper ((int) c);
 513 }
 514
 515
 516 scm_t_wchar
 517 scm_c_downcase (scm_t_wchar c)
 518 {
 519   return uc_tolower ((int) c);
 520 }
 521
 522 scm_t_wchar
 523 scm_c_titlecase (scm_t_wchar c)
 524 {
 525   return uc_totitle ((int) c);
 526 }
 527
 528 \f
 529
 530 /* There are a few sets of character names: R5RS, Guile
 531    extensions for control characters, and leftover Guile extensions.
 532    They are listed in order of precedence.  */
 533
 534 static const char *const scm_r5rs_charnames[] = {
 535   "space", "newline"
 536 };
 537
 538 static const scm_t_uint32 const scm_r5rs_charnums[] = {
 539   0x20, 0x0a
 540 };
 541
 542 #define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *))
 543
 544 static const char *const scm_r6rs_charnames[] = {
 545   "nul", "alarm", "backspace", "tab", "linefeed", "vtab", "page",
 546   "return", "esc", "delete"
 547   /* 'space' and 'newline' are already included from the R5RS list.  */
 548 };
 549
 550 static const scm_t_uint32 const scm_r6rs_charnums[] = {
 551   0x00, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
 552   0x0d, 0x1b, 0x7f
 553 };
 554
 555 #define SCM_N_R6RS_CHARNAMES (sizeof (scm_r6rs_charnames) / sizeof (char *))
 556
 557 /* The abbreviated names for control characters.  */
 558 static const char *const scm_C0_control_charnames[] = {
 559   /* C0 controls */
 560   "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
 561   "bs",  "ht",  "lf",  "vt",  "ff",  "cr",  "so",  "si",
 562   "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
 563   "can", "em",  "sub", "esc", "fs",  "gs",  "rs",  "us",
 564   "sp", "del"
 565 };
 566
 567 static const scm_t_uint32 const scm_C0_control_charnums[] = {
 568   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 569   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
 570   0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
 571   0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
 572   0x20, 0x7f
 573 };
 574
 575 #define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *))
 576
 577 static const char *const scm_alt_charnames[] = {
 578   "null", "nl", "np"
 579 };
 580
 581 static const scm_t_uint32 const scm_alt_charnums[] = {
 582   0x00, 0x0a, 0x0c
 583 };
 584
 585 #define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
 586
 587 /* Returns the string charname for a character if it exists, or NULL
 588    otherwise.  */
 589 const char *
 590 scm_i_charname (SCM chr)
 591 {
 592   size_t c;
 593   scm_t_uint32 i = SCM_CHAR (chr);
 594
 595   for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
 596     if (scm_r5rs_charnums[c] == i)
 597       return scm_r5rs_charnames[c];
 598
 599   for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
 600     if (scm_r6rs_charnums[c] == i)
 601       return scm_r6rs_charnames[c];
 602
 603   for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
 604     if (scm_C0_control_charnums[c] == i)
 605       return scm_C0_control_charnames[c];
 606
 607   /* Since the characters in scm_alt_charnums is a subset of
 608      scm_C0_control_charnums, this code is never reached.  */
 609   for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
 610     if (scm_alt_charnums[c] == i)
 611       return scm_alt_charnames[c];
 612
 613   return NULL;
 614 }
 615
 616 /* Return a character from a string charname.  */
 617 SCM
 618 scm_i_charname_to_char (const char *charname, size_t charname_len)
 619 {
 620   size_t c;
 621
 622   /* The R5RS charnames.  These are supposed to be case insensitive. */
 623   for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
 624     if ((strlen (scm_r5rs_charnames[c]) == charname_len)
 625         && (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len)))
 626       return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
 627
 628   /* The R6RS charnames.  R6RS says that these should be case-sensitive.  They
 629      are left as case-insensitive to avoid confusion.  */
 630   for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
 631     if ((strlen (scm_r6rs_charnames[c]) == charname_len)
 632         && (!strncasecmp (scm_r6rs_charnames[c], charname, charname_len)))
 633       return SCM_MAKE_CHAR (scm_r6rs_charnums[c]);
 634
 635   /* Then come the controls.  By Guile convention, these are not case
 636      sensitive.  */
 637   for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
 638     if ((strlen (scm_C0_control_charnames[c]) == charname_len)
 639         && (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len)))
 640       return SCM_MAKE_CHAR (scm_C0_control_charnums[c]);
 641
 642   /* Lastly are some old names carried over for compatibility.  */
 643   for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
 644     if ((strlen (scm_alt_charnames[c]) == charname_len)
 645         && (!strncasecmp (scm_alt_charnames[c], charname, charname_len)))
 646       return SCM_MAKE_CHAR (scm_alt_charnums[c]);
 647
 648   return SCM_BOOL_F;
 649 }
 650
 651 \f
 652
 653
 654 void
 655 scm_init_chars ()
 656 {
 657 #include "libguile/chars.x"
 658 }
 659
 660
 661 /*
 662   Local Variables:
 663   c-file-style: "gnu"
 664   End:
 665 */