libguile/chars.c

   1 /* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009,
   2  *   2010, 2014 Free Software Foundation, Inc.
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public License
   6  * as published by the Free Software Foundation; either version 3 of
   7  * the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful, but
  10  * WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with this library; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17  * 02110-1301 USA
  18  */
  19
  20
  21 \f
  22 #ifdef HAVE_CONFIG_H
  23 # include <config.h>
  24 #endif
  25
  26 #include <ctype.h>
  27 #include <limits.h>
  28 #include <unicase.h>
  29 #include <unictype.h>
  30
  31 #include "libguile/_scm.h"
  32 #include "libguile/validate.h"
  33
  34 #include "libguile/chars.h"
  35 #include "libguile/srfi-14.h"
  36
  37 \f
  38
  39 SCM_DEFINE (scm_char_p, "char?", 1, 0, 0,
  40             (SCM x),
  41             "Return @code{#t} iff @var{x} is a character, else @code{#f}.")
  42 #define FUNC_NAME s_scm_char_p
  43 {
  44   return scm_from_bool (SCM_CHARP(x));
  45 }
  46 #undef FUNC_NAME
  47
  48 static SCM scm_i_char_eq_p (SCM x, SCM y, SCM rest);
  49 SCM_DEFINE (scm_i_char_eq_p, "char=?", 0, 2, 1,
  50             (SCM x, SCM y, SCM rest),
  51             "Return @code{#t} if the Unicode code point of @var{x} is equal to the\n"
  52             "code point of @var{y}, else @code{#f}.\n")
  53 #define FUNC_NAME s_scm_i_char_eq_p
  54 {
  55   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  56     return SCM_BOOL_T;
  57   while (!scm_is_null (rest))
  58     {
  59       if (scm_is_false (scm_char_eq_p (x, y)))
  60         return SCM_BOOL_F;
  61       x = y;
  62       y = scm_car (rest);
  63       rest = scm_cdr (rest);
  64     }
  65   return scm_char_eq_p (x, y);
  66 }
  67 #undef FUNC_NAME
  68
  69 SCM scm_char_eq_p (SCM x, SCM y)
  70 #define FUNC_NAME s_scm_i_char_eq_p
  71 {
  72   SCM_VALIDATE_CHAR (1, x);
  73   SCM_VALIDATE_CHAR (2, y);
  74   return scm_from_bool (scm_is_eq (x, y));
  75 }
  76 #undef FUNC_NAME
  77
  78
  79 static SCM scm_i_char_less_p (SCM x, SCM y, SCM rest);
  80 SCM_DEFINE (scm_i_char_less_p, "char<?", 0, 2, 1,
  81             (SCM x, SCM y, SCM rest),
  82             "Return @code{#t} iff the code point of @var{x} is less than the code\n"
  83             "point of @var{y}, else @code{#f}.")
  84 #define FUNC_NAME s_scm_i_char_less_p
  85 {
  86   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
  87     return SCM_BOOL_T;
  88   while (!scm_is_null (rest))
  89     {
  90       if (scm_is_false (scm_char_less_p (x, y)))
  91         return SCM_BOOL_F;
  92       x = y;
  93       y = scm_car (rest);
  94       rest = scm_cdr (rest);
  95     }
  96   return scm_char_less_p (x, y);
  97 }
  98 #undef FUNC_NAME
  99
 100 SCM scm_char_less_p (SCM x, SCM y)
 101 #define FUNC_NAME s_scm_i_char_less_p
 102 {
 103   SCM_VALIDATE_CHAR (1, x);
 104   SCM_VALIDATE_CHAR (2, y);
 105   return scm_from_bool (SCM_CHAR(x) < SCM_CHAR(y));
 106 }
 107 #undef FUNC_NAME
 108
 109 static SCM scm_i_char_leq_p (SCM x, SCM y, SCM rest);
 110 SCM_DEFINE (scm_i_char_leq_p, "char<=?", 0, 2, 1,
 111             (SCM x, SCM y, SCM rest),
 112             "Return @code{#t} if the Unicode code point of @var{x} is less than or\n"
 113             "equal to the code point of @var{y}, else @code{#f}.")
 114 #define FUNC_NAME s_scm_i_char_leq_p
 115 {
 116   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
 117     return SCM_BOOL_T;
 118   while (!scm_is_null (rest))
 119     {
 120       if (scm_is_false (scm_char_leq_p (x, y)))
 121         return SCM_BOOL_F;
 122       x = y;
 123       y = scm_car (rest);
 124       rest = scm_cdr (rest);
 125     }
 126   return scm_char_leq_p (x, y);
 127 }
 128 #undef FUNC_NAME
 129
 130 SCM scm_char_leq_p (SCM x, SCM y)
 131 #define FUNC_NAME s_scm_i_char_leq_p
 132 {
 133   SCM_VALIDATE_CHAR (1, x);
 134   SCM_VALIDATE_CHAR (2, y);
 135   return scm_from_bool (SCM_CHAR(x) <= SCM_CHAR(y));
 136 }
 137 #undef FUNC_NAME
 138
 139 static SCM scm_i_char_gr_p (SCM x, SCM y, SCM rest);
 140 SCM_DEFINE (scm_i_char_gr_p, "char>?", 0, 2, 1,
 141             (SCM x, SCM y, SCM rest),
 142             "Return @code{#t} if the Unicode code point of @var{x} is greater than\n"
 143             "the code point of @var{y}, else @code{#f}.")
 144 #define FUNC_NAME s_scm_i_char_gr_p
 145 {
 146   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
 147     return SCM_BOOL_T;
 148   while (!scm_is_null (rest))
 149     {
 150       if (scm_is_false (scm_char_gr_p (x, y)))
 151         return SCM_BOOL_F;
 152       x = y;
 153       y = scm_car (rest);
 154       rest = scm_cdr (rest);
 155     }
 156   return scm_char_gr_p (x, y);
 157 }
 158 #undef FUNC_NAME
 159
 160 SCM scm_char_gr_p (SCM x, SCM y)
 161 #define FUNC_NAME s_scm_i_char_gr_p
 162 {
 163   SCM_VALIDATE_CHAR (1, x);
 164   SCM_VALIDATE_CHAR (2, y);
 165   return scm_from_bool (SCM_CHAR(x) > SCM_CHAR(y));
 166 }
 167 #undef FUNC_NAME
 168
 169 static SCM scm_i_char_geq_p (SCM x, SCM y, SCM rest);
 170 SCM_DEFINE (scm_i_char_geq_p, "char>=?", 0, 2, 1,
 171             (SCM x, SCM y, SCM rest),
 172             "Return @code{#t} if the Unicode code point of @var{x} is greater than\n"
 173             "or equal to the code point of @var{y}, else @code{#f}.")
 174 #define FUNC_NAME s_scm_i_char_geq_p
 175 {
 176   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
 177     return SCM_BOOL_T;
 178   while (!scm_is_null (rest))
 179     {
 180       if (scm_is_false (scm_char_geq_p (x, y)))
 181         return SCM_BOOL_F;
 182       x = y;
 183       y = scm_car (rest);
 184       rest = scm_cdr (rest);
 185     }
 186   return scm_char_geq_p (x, y);
 187 }
 188 #undef FUNC_NAME
 189
 190 SCM scm_char_geq_p (SCM x, SCM y)
 191 #define FUNC_NAME s_scm_i_char_geq_p
 192 {
 193   SCM_VALIDATE_CHAR (1, x);
 194   SCM_VALIDATE_CHAR (2, y);
 195   return scm_from_bool (SCM_CHAR(x) >= SCM_CHAR(y));
 196 }
 197 #undef FUNC_NAME
 198
 199 /* FIXME?: R6RS specifies that these comparisons are case-folded.
 200    This is the same thing as comparing the uppercase characters in
 201    practice, but, not in theory.  Unicode has table containing their
 202    definition of case-folded character mappings.  A more correct
 203    implementation would be to use that table and make a char-foldcase
 204    function.  */
 205
 206 static SCM scm_i_char_ci_eq_p (SCM x, SCM y, SCM rest);
 207 SCM_DEFINE (scm_i_char_ci_eq_p, "char-ci=?", 0, 2, 1,
 208             (SCM x, SCM y, SCM rest),
 209             "Return @code{#t} if the case-folded Unicode code point of @var{x} is\n"
 210             "the same as the case-folded code point of @var{y}, else @code{#f}.")
 211 #define FUNC_NAME s_scm_i_char_ci_eq_p
 212 {
 213   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
 214     return SCM_BOOL_T;
 215   while (!scm_is_null (rest))
 216     {
 217       if (scm_is_false (scm_char_ci_eq_p (x, y)))
 218         return SCM_BOOL_F;
 219       x = y;
 220       y = scm_car (rest);
 221       rest = scm_cdr (rest);
 222     }
 223   return scm_char_ci_eq_p (x, y);
 224 }
 225 #undef FUNC_NAME
 226
 227 SCM scm_char_ci_eq_p (SCM x, SCM y)
 228 #define FUNC_NAME s_scm_i_char_ci_eq_p
 229 {
 230   SCM_VALIDATE_CHAR (1, x);
 231   SCM_VALIDATE_CHAR (2, y);
 232   return scm_from_bool (scm_c_upcase(SCM_CHAR(x))==scm_c_upcase(SCM_CHAR(y)));
 233 }
 234 #undef FUNC_NAME
 235
 236 static SCM scm_i_char_ci_less_p (SCM x, SCM y, SCM rest);
 237 SCM_DEFINE (scm_i_char_ci_less_p, "char-ci<?", 0, 2, 1,
 238             (SCM x, SCM y, SCM rest),
 239             "Return @code{#t} if the case-folded Unicode code point of @var{x} is\n"
 240             "less than the case-folded code point of @var{y}, else @code{#f}.")
 241 #define FUNC_NAME s_scm_i_char_ci_less_p
 242 {
 243   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
 244     return SCM_BOOL_T;
 245   while (!scm_is_null (rest))
 246     {
 247       if (scm_is_false (scm_char_ci_less_p (x, y)))
 248         return SCM_BOOL_F;
 249       x = y;
 250       y = scm_car (rest);
 251       rest = scm_cdr (rest);
 252     }
 253   return scm_char_ci_less_p (x, y);
 254 }
 255 #undef FUNC_NAME
 256
 257 SCM scm_char_ci_less_p (SCM x, SCM y)
 258 #define FUNC_NAME s_scm_i_char_ci_less_p
 259 {
 260   SCM_VALIDATE_CHAR (1, x);
 261   SCM_VALIDATE_CHAR (2, y);
 262   return scm_from_bool ((scm_c_upcase(SCM_CHAR(x))) < scm_c_upcase(SCM_CHAR(y)));
 263 }
 264 #undef FUNC_NAME
 265
 266 static SCM scm_i_char_ci_leq_p (SCM x, SCM y, SCM rest);
 267 SCM_DEFINE (scm_i_char_ci_leq_p, "char-ci<=?", 0, 2, 1,
 268             (SCM x, SCM y, SCM rest),
 269             "Return @code{#t} iff the case-folded Unicode code point of @var{x} is\n"
 270             "less than or equal to the case-folded code point of @var{y}, else\n"
 271             "@code{#f}")
 272 #define FUNC_NAME s_scm_i_char_ci_leq_p
 273 {
 274   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
 275     return SCM_BOOL_T;
 276   while (!scm_is_null (rest))
 277     {
 278       if (scm_is_false (scm_char_ci_leq_p (x, y)))
 279         return SCM_BOOL_F;
 280       x = y;
 281       y = scm_car (rest);
 282       rest = scm_cdr (rest);
 283     }
 284   return scm_char_ci_leq_p (x, y);
 285 }
 286 #undef FUNC_NAME
 287
 288 SCM scm_char_ci_leq_p (SCM x, SCM y)
 289 #define FUNC_NAME s_scm_i_char_ci_leq_p
 290 {
 291   SCM_VALIDATE_CHAR (1, x);
 292   SCM_VALIDATE_CHAR (2, y);
 293   return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) <= scm_c_upcase(SCM_CHAR(y)));
 294 }
 295 #undef FUNC_NAME
 296
 297 static SCM scm_i_char_ci_gr_p (SCM x, SCM y, SCM rest);
 298 SCM_DEFINE (scm_i_char_ci_gr_p, "char-ci>?", 0, 2, 1,
 299             (SCM x, SCM y, SCM rest),
 300             "Return @code{#t} iff the case-folded code point of @var{x} is greater\n"
 301             "than the case-folded code point of @var{y}, else @code{#f}.")
 302 #define FUNC_NAME s_scm_i_char_ci_gr_p
 303 {
 304   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
 305     return SCM_BOOL_T;
 306   while (!scm_is_null (rest))
 307     {
 308       if (scm_is_false (scm_char_ci_gr_p (x, y)))
 309         return SCM_BOOL_F;
 310       x = y;
 311       y = scm_car (rest);
 312       rest = scm_cdr (rest);
 313     }
 314   return scm_char_ci_gr_p (x, y);
 315 }
 316 #undef FUNC_NAME
 317
 318 SCM scm_char_ci_gr_p (SCM x, SCM y)
 319 #define FUNC_NAME s_scm_i_char_ci_gr_p
 320 {
 321   SCM_VALIDATE_CHAR (1, x);
 322   SCM_VALIDATE_CHAR (2, y);
 323   return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) > scm_c_upcase(SCM_CHAR(y)));
 324 }
 325 #undef FUNC_NAME
 326
 327 static SCM scm_i_char_ci_geq_p (SCM x, SCM y, SCM rest);
 328 SCM_DEFINE (scm_i_char_ci_geq_p, "char-ci>=?", 0, 2, 1,
 329             (SCM x, SCM y, SCM rest),
 330             "Return @code{#t} iff the case-folded Unicode code point of @var{x} is\n"
 331             "greater than or equal to the case-folded code point of @var{y}, else\n"
 332             "@code{#f}.")
 333 #define FUNC_NAME s_scm_i_char_ci_geq_p
 334 {
 335   if (SCM_UNBNDP (x) || SCM_UNBNDP (y))
 336     return SCM_BOOL_T;
 337   while (!scm_is_null (rest))
 338     {
 339       if (scm_is_false (scm_char_ci_geq_p (x, y)))
 340         return SCM_BOOL_F;
 341       x = y;
 342       y = scm_car (rest);
 343       rest = scm_cdr (rest);
 344     }
 345   return scm_char_ci_geq_p (x, y);
 346 }
 347 #undef FUNC_NAME
 348
 349 SCM scm_char_ci_geq_p (SCM x, SCM y)
 350 #define FUNC_NAME s_scm_i_char_ci_geq_p
 351 {
 352   SCM_VALIDATE_CHAR (1, x);
 353   SCM_VALIDATE_CHAR (2, y);
 354   return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) >= scm_c_upcase(SCM_CHAR(y)));
 355 }
 356 #undef FUNC_NAME
 357
 358
 359 SCM_DEFINE (scm_char_alphabetic_p, "char-alphabetic?", 1, 0, 0,
 360            (SCM chr),
 361             "Return @code{#t} iff @var{chr} is alphabetic, else @code{#f}.\n")
 362 #define FUNC_NAME s_scm_char_alphabetic_p
 363 {
 364   return scm_char_set_contains_p (scm_char_set_letter, chr);
 365 }
 366 #undef FUNC_NAME
 367
 368 SCM_DEFINE (scm_char_numeric_p, "char-numeric?", 1, 0, 0,
 369            (SCM chr),
 370             "Return @code{#t} iff @var{chr} is numeric, else @code{#f}.\n")
 371 #define FUNC_NAME s_scm_char_numeric_p
 372 {
 373   return scm_char_set_contains_p (scm_char_set_digit, chr);
 374 }
 375 #undef FUNC_NAME
 376
 377 SCM_DEFINE (scm_char_whitespace_p, "char-whitespace?", 1, 0, 0,
 378            (SCM chr),
 379             "Return @code{#t} iff @var{chr} is whitespace, else @code{#f}.\n")
 380 #define FUNC_NAME s_scm_char_whitespace_p
 381 {
 382   return scm_char_set_contains_p (scm_char_set_whitespace, chr);
 383 }
 384 #undef FUNC_NAME
 385
 386
 387 SCM_DEFINE (scm_char_upper_case_p, "char-upper-case?", 1, 0, 0,
 388            (SCM chr),
 389             "Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.\n")
 390 #define FUNC_NAME s_scm_char_upper_case_p
 391 {
 392   return scm_char_set_contains_p (scm_char_set_upper_case, chr);
 393 }
 394 #undef FUNC_NAME
 395
 396
 397 SCM_DEFINE (scm_char_lower_case_p, "char-lower-case?", 1, 0, 0,
 398            (SCM chr),
 399             "Return @code{#t} iff @var{chr} is lowercase, else @code{#f}.\n")
 400 #define FUNC_NAME s_scm_char_lower_case_p
 401 {
 402   return scm_char_set_contains_p (scm_char_set_lower_case, chr);
 403 }
 404 #undef FUNC_NAME
 405
 406 SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0,
 407             (SCM chr),
 408             "Return @code{#t} iff @var{chr} is either uppercase or lowercase, else\n"
 409             "@code{#f}.\n")
 410 #define FUNC_NAME s_scm_char_is_both_p
 411 {
 412   if (scm_is_true (scm_char_set_contains_p (scm_char_set_lower_case, chr)))
 413     return SCM_BOOL_T;
 414   return scm_char_set_contains_p (scm_char_set_upper_case, chr);
 415 }
 416 #undef FUNC_NAME
 417
 418
 419 SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0,
 420             (SCM chr),
 421             "Return the Unicode code point of @var{chr}.")
 422 #define FUNC_NAME s_scm_char_to_integer
 423 {
 424   SCM_VALIDATE_CHAR (1, chr);
 425   return scm_from_uint32 (SCM_CHAR(chr));
 426 }
 427 #undef FUNC_NAME
 428
 429
 430 SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0,
 431            (SCM n),
 432             "Return the character that has Unicode code point @var{n}.  The integer\n"
 433             "@var{n} must be a valid code point.  Valid code points are in the\n"
 434             "ranges 0 to @code{#xD7FF} inclusive or @code{#xE000} to\n"
 435             "@code{#x10FFFF} inclusive.")
 436 #define FUNC_NAME s_scm_integer_to_char
 437 {
 438   scm_t_wchar cn;
 439
 440   cn = scm_to_wchar (n);
 441
 442   /* Avoid the surrogates.  */
 443   if (!SCM_IS_UNICODE_CHAR (cn))
 444     scm_out_of_range (FUNC_NAME, n);
 445
 446   return SCM_MAKE_CHAR (cn);
 447 }
 448 #undef FUNC_NAME
 449
 450
 451 SCM_DEFINE (scm_char_upcase, "char-upcase", 1, 0, 0,
 452            (SCM chr),
 453             "Return the uppercase character version of @var{chr}.")
 454 #define FUNC_NAME s_scm_char_upcase
 455 {
 456   SCM_VALIDATE_CHAR (1, chr);
 457   return SCM_MAKE_CHAR (scm_c_upcase (SCM_CHAR (chr)));
 458 }
 459 #undef FUNC_NAME
 460
 461
 462 SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0,
 463            (SCM chr),
 464             "Return the lowercase character version of @var{chr}.")
 465 #define FUNC_NAME s_scm_char_downcase
 466 {
 467   SCM_VALIDATE_CHAR (1, chr);
 468   return SCM_MAKE_CHAR (scm_c_downcase (SCM_CHAR(chr)));
 469 }
 470 #undef FUNC_NAME
 471
 472 SCM_DEFINE (scm_char_titlecase, "char-titlecase", 1, 0, 0,
 473            (SCM chr),
 474             "Return the titlecase character version of @var{chr}.")
 475 #define FUNC_NAME s_scm_char_titlecase
 476 {
 477   SCM_VALIDATE_CHAR (1, chr);
 478   return SCM_MAKE_CHAR (scm_c_titlecase (SCM_CHAR(chr)));
 479 }
 480 #undef FUNC_NAME
 481
 482 SCM_DEFINE (scm_char_general_category, "char-general-category", 1, 0, 0,
 483            (SCM chr),
 484             "Return a symbol representing the Unicode general category of "
 485             "@var{chr} or @code{#f} if a named category cannot be found.")
 486 #define FUNC_NAME s_scm_char_general_category
 487 {
 488   const char *sym;
 489   uc_general_category_t cat;
 490
 491   SCM_VALIDATE_CHAR (1, chr);
 492   cat = uc_general_category (SCM_CHAR (chr));
 493   sym = uc_general_category_name (cat);
 494
 495   if (sym != NULL)
 496     return scm_from_locale_symbol (sym);
 497   return SCM_BOOL_F;
 498 }
 499 #undef FUNC_NAME
 500
 501 \f
 502
 503
 504
 505 /*
 506 TODO: change name  to scm_i_.. ? --hwn
 507 */
 508
 509
 510 scm_t_wchar
 511 scm_c_upcase (scm_t_wchar c)
 512 {
 513   return uc_toupper ((int) c);
 514 }
 515
 516
 517 scm_t_wchar
 518 scm_c_downcase (scm_t_wchar c)
 519 {
 520   return uc_tolower ((int) c);
 521 }
 522
 523 scm_t_wchar
 524 scm_c_titlecase (scm_t_wchar c)
 525 {
 526   return uc_totitle ((int) c);
 527 }
 528
 529 \f
 530
 531 /* There are a few sets of character names: R5RS, Guile
 532    extensions for control characters, and leftover Guile extensions.
 533    They are listed in order of precedence.  */
 534
 535 static const char *const scm_r5rs_charnames[] = {
 536   "space", "newline"
 537 };
 538
 539 static const scm_t_uint32 scm_r5rs_charnums[] = {
 540   0x20, 0x0a
 541 };
 542
 543 #define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *))
 544
 545 static const char *const scm_r6rs_charnames[] = {
 546   "nul", "alarm", "backspace", "tab", "linefeed", "vtab", "page",
 547   "return", "esc", "delete"
 548   /* 'space' and 'newline' are already included from the R5RS list.  */
 549 };
 550
 551 static const scm_t_uint32 scm_r6rs_charnums[] = {
 552   0x00, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
 553   0x0d, 0x1b, 0x7f
 554 };
 555
 556 #define SCM_N_R6RS_CHARNAMES (sizeof (scm_r6rs_charnames) / sizeof (char *))
 557
 558 static const char *const scm_r7rs_charnames[] = {
 559   "escape"
 560 };
 561
 562 static const scm_t_uint32 scm_r7rs_charnums[] = {
 563   0x1b
 564 };
 565
 566 #define SCM_N_R7RS_CHARNAMES (sizeof (scm_r7rs_charnames) / sizeof (char *))
 567
 568 /* The abbreviated names for control characters.  */
 569 static const char *const scm_C0_control_charnames[] = {
 570   /* C0 controls */
 571   "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
 572   "bs",  "ht",  "lf",  "vt",  "ff",  "cr",  "so",  "si",
 573   "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
 574   "can", "em",  "sub", "esc", "fs",  "gs",  "rs",  "us",
 575   "sp", "del"
 576 };
 577
 578 static const scm_t_uint32 scm_C0_control_charnums[] = {
 579   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
 580   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
 581   0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
 582   0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
 583   0x20, 0x7f
 584 };
 585
 586 #define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *))
 587
 588 static const char *const scm_alt_charnames[] = {
 589   "null", "nl", "np"
 590 };
 591
 592 static const scm_t_uint32 scm_alt_charnums[] = {
 593   0x00, 0x0a, 0x0c
 594 };
 595
 596 #define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
 597
 598 /* Returns the string charname for a character if it exists, or NULL
 599    otherwise.  */
 600 const char *
 601 scm_i_charname (SCM chr)
 602 {
 603   size_t c;
 604   scm_t_uint32 i = SCM_CHAR (chr);
 605
 606   for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
 607     if (scm_r5rs_charnums[c] == i)
 608       return scm_r5rs_charnames[c];
 609
 610   for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
 611     if (scm_r6rs_charnums[c] == i)
 612       return scm_r6rs_charnames[c];
 613
 614   for (c = 0; c < SCM_N_R7RS_CHARNAMES; c++)
 615     if (scm_r7rs_charnums[c] == i)
 616       return scm_r7rs_charnames[c];
 617
 618   for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
 619     if (scm_C0_control_charnums[c] == i)
 620       return scm_C0_control_charnames[c];
 621
 622   /* Since the characters in scm_alt_charnums is a subset of
 623      scm_C0_control_charnums, this code is never reached.  */
 624   for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
 625     if (scm_alt_charnums[c] == i)
 626       return scm_alt_charnames[c];
 627
 628   return NULL;
 629 }
 630
 631 /* Return a character from a string charname.  */
 632 SCM
 633 scm_i_charname_to_char (const char *charname, size_t charname_len)
 634 {
 635   size_t c;
 636
 637   /* The R5RS charnames.  These are supposed to be case insensitive. */
 638   for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
 639     if ((strlen (scm_r5rs_charnames[c]) == charname_len)
 640         && (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len)))
 641       return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
 642
 643   /* The R6RS charnames.  R6RS says that these should be case-sensitive.
 644      They are left as case-insensitive to avoid confusion.  */
 645   for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
 646     if ((strlen (scm_r6rs_charnames[c]) == charname_len)
 647         && (!strncasecmp (scm_r6rs_charnames[c], charname, charname_len)))
 648       return SCM_MAKE_CHAR (scm_r6rs_charnums[c]);
 649
 650   /* The R7RS charnames.  R7RS says that these should be case-sensitive.
 651      They are left as case-insensitive to avoid confusion.  */
 652   for (c = 0; c < SCM_N_R7RS_CHARNAMES; c++)
 653     if ((strlen (scm_r7rs_charnames[c]) == charname_len)
 654         && (!strncasecmp (scm_r7rs_charnames[c], charname, charname_len)))
 655       return SCM_MAKE_CHAR (scm_r7rs_charnums[c]);
 656
 657   /* Then come the controls.  By Guile convention, these are not case
 658      sensitive.  */
 659   for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
 660     if ((strlen (scm_C0_control_charnames[c]) == charname_len)
 661         && (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len)))
 662       return SCM_MAKE_CHAR (scm_C0_control_charnums[c]);
 663
 664   /* Lastly are some old names carried over for compatibility.  */
 665   for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
 666     if ((strlen (scm_alt_charnames[c]) == charname_len)
 667         && (!strncasecmp (scm_alt_charnames[c], charname, charname_len)))
 668       return SCM_MAKE_CHAR (scm_alt_charnums[c]);
 669
 670   return SCM_BOOL_F;
 671 }
 672
 673 \f
 674
 675
 676 void
 677 scm_init_chars ()
 678 {
 679 #include "libguile/chars.x"
 680 }
 681
 682
 683 /*
 684   Local Variables:
 685   c-file-style: "gnu"
 686   End:
 687 */