libguile/read.c

   1 /* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008, 2009 Free Software
   2  * Foundation, Inc.
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public License
   6  * as published by the Free Software Foundation; either version 3 of
   7  * the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful, but
  10  * WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with this library; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17  * 02110-1301 USA
  18  */
  19
  20
  21 \f
  22
  23 #ifdef HAVE_CONFIG_H
  24 # include <config.h>
  25 #endif
  26
  27 #include <stdio.h>
  28 #include <ctype.h>
  29 #include <string.h>
  30 #include <unistd.h>
  31 #include <unicase.h>
  32
  33 #include "libguile/_scm.h"
  34 #include "libguile/bytevectors.h"
  35 #include "libguile/chars.h"
  36 #include "libguile/eval.h"
  37 #include "libguile/arrays.h"
  38 #include "libguile/bitvectors.h"
  39 #include "libguile/keywords.h"
  40 #include "libguile/alist.h"
  41 #include "libguile/srcprop.h"
  42 #include "libguile/hashtab.h"
  43 #include "libguile/hash.h"
  44 #include "libguile/ports.h"
  45 #include "libguile/root.h"
  46 #include "libguile/strings.h"
  47 #include "libguile/strports.h"
  48 #include "libguile/vectors.h"
  49 #include "libguile/validate.h"
  50 #include "libguile/srfi-4.h"
  51 #include "libguile/srfi-13.h"
  52
  53 #include "libguile/read.h"
  54 #include "libguile/private-options.h"
  55
  56
  57 \f
  58
  59 SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
  60 SCM_SYMBOL (scm_keyword_prefix, "prefix");
  61 SCM_SYMBOL (scm_keyword_postfix, "postfix");
  62
  63 scm_t_option scm_read_opts[] = {
  64   { SCM_OPTION_BOOLEAN, "copy", 0,
  65     "Copy source code expressions." },
  66   { SCM_OPTION_BOOLEAN, "positions", 0,
  67     "Record positions of source code expressions." },
  68   { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
  69     "Convert symbols to lower case."},
  70   { SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
  71     "Style of keyword recognition: #f, 'prefix or 'postfix."},
  72 #if SCM_ENABLE_ELISP
  73   { SCM_OPTION_BOOLEAN, "elisp-vectors", 0,
  74     "Support Elisp vector syntax, namely `[...]'."},
  75   { SCM_OPTION_BOOLEAN, "elisp-strings", 0,
  76     "Support `\\(' and `\\)' in strings."},
  77 #endif
  78   { 0, },
  79 };
  80
  81 /*
  82   Give meaningful error messages for errors
  83
  84   We use the format
  85
  86   FILE:LINE:COL: MESSAGE
  87   This happened in ....
  88
  89   This is not standard GNU format, but the test-suite likes the real
  90   message to be in front.
  91
  92  */
  93
  94
  95 void
  96 scm_i_input_error (char const *function,
  97                    SCM port, const char *message, SCM arg)
  98 {
  99   SCM fn = (scm_is_string (SCM_FILENAME(port))
 100             ? SCM_FILENAME(port)
 101             : scm_from_locale_string ("#<unknown port>"));
 102
 103   SCM string_port = scm_open_output_string ();
 104   SCM string = SCM_EOL;
 105   scm_simple_format (string_port,
 106                      scm_from_locale_string ("~A:~S:~S: ~A"),
 107                      scm_list_4 (fn,
 108                                  scm_from_long (SCM_LINUM (port) + 1),
 109                                  scm_from_int (SCM_COL (port) + 1),
 110                                  scm_from_locale_string (message)));
 111
 112   string = scm_get_output_string (string_port);
 113   scm_close_output_port (string_port);
 114   scm_error_scm (scm_from_locale_symbol ("read-error"),
 115                  function? scm_from_locale_string (function) : SCM_BOOL_F,
 116                  string,
 117                  arg,
 118                  SCM_BOOL_F);
 119 }
 120
 121
 122 SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
 123             (SCM setting),
 124             "Option interface for the read options. Instead of using\n"
 125             "this procedure directly, use the procedures @code{read-enable},\n"
 126             "@code{read-disable}, @code{read-set!} and @code{read-options}.")
 127 #define FUNC_NAME s_scm_read_options
 128 {
 129   SCM ans = scm_options (setting,
 130                          scm_read_opts,
 131                          FUNC_NAME);
 132   if (SCM_COPY_SOURCE_P)
 133     SCM_RECORD_POSITIONS_P = 1;
 134   return ans;
 135 }
 136 #undef FUNC_NAME
 137
 138 /* An association list mapping extra hash characters to procedures.  */
 139 static SCM *scm_read_hash_procedures;
 140
 141
 142 \f
 143 /* Token readers.  */
 144
 145
 146 /* Size of the C buffer used to read symbols and numbers.  */
 147 #define READER_BUFFER_SIZE            128
 148
 149 /* Size of the C buffer used to read strings.  */
 150 #define READER_STRING_BUFFER_SIZE     512
 151
 152 /* The maximum size of Scheme character names.  */
 153 #define READER_CHAR_NAME_MAX_SIZE      50
 154
 155
 156 /* `isblank' is only in C99.  */
 157 #define CHAR_IS_BLANK_(_chr)                                    \
 158   (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n')      \
 159    || ((_chr) == '\f') || ((_chr) == '\r'))
 160
 161 #ifdef MSDOS
 162 # define CHAR_IS_BLANK(_chr)                    \
 163   ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
 164 #else
 165 # define CHAR_IS_BLANK CHAR_IS_BLANK_
 166 #endif
 167
 168
 169 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
 170    structure'').  */
 171 #define CHAR_IS_R5RS_DELIMITER(c)                               \
 172   (CHAR_IS_BLANK (c)                                            \
 173    || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
 174
 175 #define CHAR_IS_DELIMITER  CHAR_IS_R5RS_DELIMITER
 176
 177 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
 178    Structure''.  */
 179 #define CHAR_IS_EXPONENT_MARKER(_chr)                           \
 180   (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f')        \
 181    || ((_chr) == 'd') || ((_chr) == 'l'))
 182
 183 /* Read an SCSH block comment.  */
 184 static inline SCM scm_read_scsh_block_comment (int chr, SCM port);
 185 static SCM scm_read_commented_expression (int chr, SCM port);
 186
 187 /* Read from PORT until a delimiter (e.g., a whitespace) is read.  Return
 188    zero if the whole token fits in BUF, non-zero otherwise.  */
 189 static inline int
 190 read_token (SCM port, SCM buf, size_t *read)
 191 {
 192   scm_t_wchar chr;
 193   *read = 0;
 194
 195   buf = scm_i_string_start_writing (buf);
 196   while (*read < scm_i_string_length (buf))
 197     {
 198       chr = scm_getc (port);
 199
 200       if (chr == EOF)
 201         {
 202           scm_i_string_stop_writing ();
 203           return 0;
 204         }
 205
 206       chr = (SCM_CASE_INSENSITIVE_P ? uc_tolower (chr) : chr);
 207
 208       if (CHAR_IS_DELIMITER (chr))
 209         {
 210           scm_i_string_stop_writing ();
 211           scm_ungetc (chr, port);
 212           return 0;
 213         }
 214
 215       scm_i_string_set_x (buf, *read, chr);
 216       (*read)++;
 217     }
 218   scm_i_string_stop_writing ();
 219
 220   return 1;
 221 }
 222
 223 static SCM
 224 read_complete_token (SCM port, size_t *read)
 225 {
 226   SCM buffer, str = SCM_EOL;
 227   size_t len;
 228   int overflow;
 229
 230   buffer = scm_i_make_string (READER_BUFFER_SIZE, NULL);
 231   overflow = read_token (port, buffer, read);
 232   if (!overflow)
 233     return scm_i_substring (buffer, 0, *read);
 234
 235   str = scm_string_copy (buffer);
 236   do
 237     {
 238       overflow = read_token (port, buffer, &len);
 239       str = scm_string_append (scm_list_2 (str, buffer));
 240       *read += len;
 241     }
 242   while (overflow);
 243
 244   return scm_i_substring (str, 0, *read);
 245 }
 246
 247 /* Skip whitespace from PORT and return the first non-whitespace character
 248    read.  Raise an error on end-of-file.  */
 249 static int
 250 flush_ws (SCM port, const char *eoferr)
 251 {
 252   register scm_t_wchar c;
 253   while (1)
 254     switch (c = scm_getc (port))
 255       {
 256       case EOF:
 257       goteof:
 258         if (eoferr)
 259           {
 260             scm_i_input_error (eoferr,
 261                                port,
 262                                "end of file",
 263                                SCM_EOL);
 264           }
 265         return c;
 266
 267       case ';':
 268       lp:
 269         switch (c = scm_getc (port))
 270           {
 271           case EOF:
 272             goto goteof;
 273           default:
 274             goto lp;
 275           case SCM_LINE_INCREMENTORS:
 276             break;
 277           }
 278         break;
 279
 280       case '#':
 281         switch (c = scm_getc (port))
 282           {
 283           case EOF:
 284             eoferr = "read_sharp";
 285             goto goteof;
 286           case '!':
 287             scm_read_scsh_block_comment (c, port);
 288             break;
 289           case ';':
 290             scm_read_commented_expression (c, port);
 291             break;
 292           default:
 293             scm_ungetc (c, port);
 294             return '#';
 295           }
 296         break;
 297
 298       case SCM_LINE_INCREMENTORS:
 299       case SCM_SINGLE_SPACES:
 300       case '\t':
 301         break;
 302
 303       default:
 304         return c;
 305       }
 306
 307   return 0;
 308 }
 309
 310
 311 \f
 312 /* Token readers.  */
 313
 314 static SCM scm_read_expression (SCM port);
 315 static SCM scm_read_sharp (int chr, SCM port);
 316 static SCM scm_get_hash_procedure (int c);
 317 static SCM recsexpr (SCM obj, long line, int column, SCM filename);
 318
 319
 320 static SCM
 321 scm_read_sexp (scm_t_wchar chr, SCM port)
 322 #define FUNC_NAME "scm_i_lreadparen"
 323 {
 324   register int c;
 325   register SCM tmp;
 326   register SCM tl, ans = SCM_EOL;
 327   SCM tl2 = SCM_EOL, ans2 = SCM_EOL, copy = SCM_BOOL_F;
 328   static const int terminating_char = ')';
 329
 330   /* Need to capture line and column numbers here. */
 331   long line = SCM_LINUM (port);
 332   int column = SCM_COL (port) - 1;
 333
 334
 335   c = flush_ws (port, FUNC_NAME);
 336   if (terminating_char == c)
 337     return SCM_EOL;
 338
 339   scm_ungetc (c, port);
 340   if (scm_is_eq (scm_sym_dot,
 341                  (tmp = scm_read_expression (port))))
 342     {
 343       ans = scm_read_expression (port);
 344       if (terminating_char != (c = flush_ws (port, FUNC_NAME)))
 345         scm_i_input_error (FUNC_NAME, port, "missing close paren",
 346                            SCM_EOL);
 347       return ans;
 348     }
 349
 350   /* Build the head of the list structure. */
 351   ans = tl = scm_cons (tmp, SCM_EOL);
 352
 353   if (SCM_COPY_SOURCE_P)
 354     ans2 = tl2 = scm_cons (scm_is_pair (tmp)
 355                            ? copy
 356                            : tmp,
 357                            SCM_EOL);
 358
 359   while (terminating_char != (c = flush_ws (port, FUNC_NAME)))
 360     {
 361       SCM new_tail;
 362
 363       scm_ungetc (c, port);
 364       if (scm_is_eq (scm_sym_dot,
 365                      (tmp = scm_read_expression (port))))
 366         {
 367           SCM_SETCDR (tl, tmp = scm_read_expression (port));
 368
 369           if (SCM_COPY_SOURCE_P)
 370             SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp) ? copy : tmp,
 371                                        SCM_EOL));
 372
 373           c = flush_ws (port, FUNC_NAME);
 374           if (terminating_char != c)
 375             scm_i_input_error (FUNC_NAME, port,
 376                                "in pair: missing close paren", SCM_EOL);
 377           goto exit;
 378         }
 379
 380       new_tail = scm_cons (tmp, SCM_EOL);
 381       SCM_SETCDR (tl, new_tail);
 382       tl = new_tail;
 383
 384       if (SCM_COPY_SOURCE_P)
 385         {
 386           SCM new_tail2 = scm_cons (scm_is_pair (tmp)
 387                                     ? copy
 388                                     : tmp, SCM_EOL);
 389           SCM_SETCDR (tl2, new_tail2);
 390           tl2 = new_tail2;
 391         }
 392     }
 393
 394  exit:
 395   if (SCM_RECORD_POSITIONS_P)
 396     scm_whash_insert (scm_source_whash,
 397                       ans,
 398                       scm_make_srcprops (line, column,
 399                                          SCM_FILENAME (port),
 400                                          SCM_COPY_SOURCE_P
 401                                          ? ans2
 402                                          : SCM_UNDEFINED,
 403                                          SCM_EOL));
 404   return ans;
 405 }
 406 #undef FUNC_NAME
 407
 408 static SCM
 409 scm_read_string (int chr, SCM port)
 410 #define FUNC_NAME "scm_lreadr"
 411 {
 412   /* For strings smaller than C_STR, this function creates only one Scheme
 413      object (the string returned).  */
 414
 415   SCM str = SCM_BOOL_F;
 416   unsigned c_str_len = 0;
 417   scm_t_wchar c;
 418
 419   str = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL);
 420   while ('"' != (c = scm_getc (port)))
 421     {
 422       if (c == EOF)
 423         {
 424         str_eof:
 425           scm_i_input_error (FUNC_NAME, port,
 426                              "end of file in string constant", SCM_EOL);
 427         }
 428
 429       if (c_str_len + 1 >= scm_i_string_length (str))
 430         {
 431           SCM addy = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL);
 432
 433           str = scm_string_append (scm_list_2 (str, addy));
 434         }
 435
 436       if (c == '\\')
 437         {
 438           switch (c = scm_getc (port))
 439             {
 440             case EOF:
 441               goto str_eof;
 442             case '"':
 443             case '\\':
 444               break;
 445 #if SCM_ENABLE_ELISP
 446             case '(':
 447             case ')':
 448               if (SCM_ESCAPED_PARENS_P)
 449                 break;
 450               goto bad_escaped;
 451 #endif
 452             case '\n':
 453               continue;
 454             case '0':
 455               c = '\0';
 456               break;
 457             case 'f':
 458               c = '\f';
 459               break;
 460             case 'n':
 461               c = '\n';
 462               break;
 463             case 'r':
 464               c = '\r';
 465               break;
 466             case 't':
 467               c = '\t';
 468               break;
 469             case 'a':
 470               c = '\007';
 471               break;
 472             case 'v':
 473               c = '\v';
 474               break;
 475             case 'x':
 476               {
 477                 scm_t_wchar a, b;
 478                 a = scm_getc (port);
 479                 if (a == EOF)
 480                   goto str_eof;
 481                 b = scm_getc (port);
 482                 if (b == EOF)
 483                   goto str_eof;
 484                 if ('0' <= a && a <= '9')
 485                   a -= '0';
 486                 else if ('A' <= a && a <= 'F')
 487                   a = a - 'A' + 10;
 488                 else if ('a' <= a && a <= 'f')
 489                   a = a - 'a' + 10;
 490                 else
 491                   {
 492                     c = a;
 493                     goto bad_escaped;
 494                   }
 495                 if ('0' <= b && b <= '9')
 496                   b -= '0';
 497                 else if ('A' <= b && b <= 'F')
 498                   b = b - 'A' + 10;
 499                 else if ('a' <= b && b <= 'f')
 500                   b = b - 'a' + 10;
 501                 else
 502                   {
 503                     c = b;
 504                     goto bad_escaped;
 505                   }
 506                 c = a * 16 + b;
 507                 break;
 508               }
 509             case 'u':
 510               {
 511                 scm_t_wchar a;
 512                 int i;
 513                 c = 0;
 514                 for (i = 0; i < 4; i++)
 515                   {
 516                     a = scm_getc (port);
 517                     if (a == EOF)
 518                       goto str_eof;
 519                     if ('0' <= a && a <= '9')
 520                       a -= '0';
 521                     else if ('A' <= a && a <= 'F')
 522                       a = a - 'A' + 10;
 523                     else if ('a' <= a && a <= 'f')
 524                       a = a - 'a' + 10;
 525                     else
 526                       {
 527                         c = a;
 528                         goto bad_escaped;
 529                       }
 530                     c = c * 16 + a;
 531                   }
 532                 break;
 533               }
 534             case 'U':
 535               {
 536                 scm_t_wchar a;
 537                 int i;
 538                 c = 0;
 539                 for (i = 0; i < 6; i++)
 540                   {
 541                     a = scm_getc (port);
 542                     if (a == EOF)
 543                       goto str_eof;
 544                     if ('0' <= a && a <= '9')
 545                       a -= '0';
 546                     else if ('A' <= a && a <= 'F')
 547                       a = a - 'A' + 10;
 548                     else if ('a' <= a && a <= 'f')
 549                       a = a - 'a' + 10;
 550                     else
 551                       {
 552                         c = a;
 553                         goto bad_escaped;
 554                       }
 555                     c = c * 16 + a;
 556                   }
 557                 break;
 558               }
 559             default:
 560             bad_escaped:
 561               scm_i_input_error (FUNC_NAME, port,
 562                                  "illegal character in escape sequence: ~S",
 563                                  scm_list_1 (SCM_MAKE_CHAR (c)));
 564             }
 565         }
 566       str = scm_i_string_start_writing (str);
 567       scm_i_string_set_x (str, c_str_len++, c);
 568       scm_i_string_stop_writing ();
 569     }
 570
 571   if (c_str_len > 0)
 572     {
 573       return scm_i_substring_copy (str, 0, c_str_len);
 574     }
 575
 576   return scm_nullstr;
 577 }
 578 #undef FUNC_NAME
 579
 580
 581 static SCM
 582 scm_read_number (scm_t_wchar chr, SCM port)
 583 {
 584   SCM result;
 585   SCM buffer;
 586   size_t read;
 587
 588   scm_ungetc (chr, port);
 589   buffer = read_complete_token (port, &read);
 590   result = scm_string_to_number (buffer, SCM_UNDEFINED);
 591   if (!scm_is_true (result))
 592     /* Return a symbol instead of a number.  */
 593     result = scm_string_to_symbol (buffer);
 594
 595   return result;
 596 }
 597
 598 static SCM
 599 scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port)
 600 {
 601   SCM result;
 602   int ends_with_colon = 0;
 603   SCM buffer;
 604   size_t read = 0;
 605   int postfix = scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_postfix);
 606
 607   scm_ungetc (chr, port);
 608   buffer = read_complete_token (port, &read);
 609   if (read > 0)
 610     ends_with_colon = scm_i_string_ref (buffer, read - 1) == ':';
 611
 612   if (postfix && ends_with_colon && (read > 1))
 613     result = scm_symbol_to_keyword (scm_string_to_symbol (scm_i_substring (buffer, 0, read - 1)));
 614   else
 615     result = scm_string_to_symbol (buffer);
 616
 617   return result;
 618 }
 619
 620 static SCM
 621 scm_read_number_and_radix (scm_t_wchar chr, SCM port)
 622 #define FUNC_NAME "scm_lreadr"
 623 {
 624   SCM result;
 625   size_t read;
 626   SCM buffer = scm_i_make_string (READER_BUFFER_SIZE, NULL);
 627   unsigned int radix;
 628
 629   switch (chr)
 630     {
 631     case 'B':
 632     case 'b':
 633       radix = 2;
 634       break;
 635
 636     case 'o':
 637     case 'O':
 638       radix = 8;
 639       break;
 640
 641     case 'd':
 642     case 'D':
 643       radix = 10;
 644       break;
 645
 646     case 'x':
 647     case 'X':
 648       radix = 16;
 649       break;
 650
 651     default:
 652       scm_ungetc (chr, port);
 653       scm_ungetc ('#', port);
 654       radix = 10;
 655     }
 656
 657   buffer = read_complete_token (port, &read);
 658   result = scm_string_to_number (buffer, scm_from_uint (radix));
 659
 660   if (scm_is_true (result))
 661     return result;
 662
 663   scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
 664
 665   return SCM_BOOL_F;
 666 }
 667 #undef FUNC_NAME
 668
 669 static SCM
 670 scm_read_quote (int chr, SCM port)
 671 {
 672   SCM p;
 673   long line = SCM_LINUM (port);
 674   int column = SCM_COL (port) - 1;
 675
 676   switch (chr)
 677     {
 678     case '`':
 679       p = scm_sym_quasiquote;
 680       break;
 681
 682     case '\'':
 683       p = scm_sym_quote;
 684       break;
 685
 686     case ',':
 687       {
 688         scm_t_wchar c;
 689
 690         c = scm_getc (port);
 691         if ('@' == c)
 692           p = scm_sym_uq_splicing;
 693         else
 694           {
 695             scm_ungetc (c, port);
 696             p = scm_sym_unquote;
 697           }
 698         break;
 699       }
 700
 701     default:
 702       fprintf (stderr, "%s: unhandled quote character (%i)\n",
 703                "scm_read_quote", chr);
 704       abort ();
 705     }
 706
 707   p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
 708   if (SCM_RECORD_POSITIONS_P)
 709     scm_whash_insert (scm_source_whash, p,
 710                       scm_make_srcprops (line, column,
 711                                          SCM_FILENAME (port),
 712                                          SCM_COPY_SOURCE_P
 713                                          ? (scm_cons2 (SCM_CAR (p),
 714                                                        SCM_CAR (SCM_CDR (p)),
 715                                                        SCM_EOL))
 716                                          : SCM_UNDEFINED,
 717                                          SCM_EOL));
 718
 719
 720   return p;
 721 }
 722
 723 SCM_SYMBOL (sym_syntax, "syntax");
 724 SCM_SYMBOL (sym_quasisyntax, "quasisyntax");
 725 SCM_SYMBOL (sym_unsyntax, "unsyntax");
 726 SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing");
 727
 728 static SCM
 729 scm_read_syntax (int chr, SCM port)
 730 {
 731   SCM p;
 732   long line = SCM_LINUM (port);
 733   int column = SCM_COL (port) - 1;
 734
 735   switch (chr)
 736     {
 737     case '`':
 738       p = sym_quasisyntax;
 739       break;
 740
 741     case '\'':
 742       p = sym_syntax;
 743       break;
 744
 745     case ',':
 746       {
 747         int c;
 748
 749         c = scm_getc (port);
 750         if ('@' == c)
 751           p = sym_unsyntax_splicing;
 752         else
 753           {
 754             scm_ungetc (c, port);
 755             p = sym_unsyntax;
 756           }
 757         break;
 758       }
 759
 760     default:
 761       fprintf (stderr, "%s: unhandled syntax character (%i)\n",
 762                "scm_read_syntax", chr);
 763       abort ();
 764     }
 765
 766   p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
 767   if (SCM_RECORD_POSITIONS_P)
 768     scm_whash_insert (scm_source_whash, p,
 769                       scm_make_srcprops (line, column,
 770                                          SCM_FILENAME (port),
 771                                          SCM_COPY_SOURCE_P
 772                                          ? (scm_cons2 (SCM_CAR (p),
 773                                                        SCM_CAR (SCM_CDR (p)),
 774                                                        SCM_EOL))
 775                                          : SCM_UNDEFINED,
 776                                          SCM_EOL));
 777
 778
 779   return p;
 780 }
 781
 782 static inline SCM
 783 scm_read_semicolon_comment (int chr, SCM port)
 784 {
 785   int c;
 786
 787   /* We use the get_byte here because there is no need to get the
 788      locale correct with comment input. This presumes that newline
 789      always represents itself no matter what the encoding is.  */
 790   for (c = scm_get_byte_or_eof (port);
 791        (c != EOF) && (c != '\n');
 792        c = scm_getc (port));
 793
 794   return SCM_UNSPECIFIED;
 795 }
 796
 797 \f
 798 /* Sharp readers, i.e. readers called after a `#' sign has been read.  */
 799
 800 static SCM
 801 scm_read_boolean (int chr, SCM port)
 802 {
 803   switch (chr)
 804     {
 805     case 't':
 806     case 'T':
 807       return SCM_BOOL_T;
 808
 809     case 'f':
 810     case 'F':
 811       return SCM_BOOL_F;
 812     }
 813
 814   return SCM_UNSPECIFIED;
 815 }
 816
 817 static SCM
 818 scm_read_character (scm_t_wchar chr, SCM port)
 819 #define FUNC_NAME "scm_lreadr"
 820 {
 821   SCM charname = scm_i_make_string (READER_CHAR_NAME_MAX_SIZE, NULL);
 822   size_t charname_len;
 823   scm_t_wchar cp;
 824   int overflow;
 825
 826   overflow = read_token (port, charname, &charname_len);
 827   charname = scm_c_substring (charname, 0, charname_len);
 828
 829   if (overflow)
 830     goto char_error;
 831
 832   if (charname_len == 0)
 833     {
 834       chr = scm_getc (port);
 835       if (chr == EOF)
 836         scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
 837                            "while reading character", SCM_EOL);
 838
 839       /* CHR must be a token delimiter, like a whitespace.  */
 840       return (SCM_MAKE_CHAR (chr));
 841     }
 842
 843   if (charname_len == 1)
 844     return SCM_MAKE_CHAR (scm_i_string_ref (charname, 0));
 845
 846   cp = scm_i_string_ref (charname, 0);
 847   if (cp >= '0' && cp < '8')
 848     {
 849       /* Dirk:FIXME::  This type of character syntax is not R5RS
 850        * compliant.  Further, it should be verified that the constant
 851        * does only consist of octal digits.  Finally, it should be
 852        * checked whether the resulting fixnum is in the range of
 853        * characters.  */
 854       SCM p = scm_string_to_number (charname, scm_from_uint (8));
 855       if (SCM_I_INUMP (p))
 856         return SCM_MAKE_CHAR (SCM_I_INUM (p));
 857     }
 858
 859   /* The names of characters should never have non-Latin1
 860      characters.  */
 861   if (scm_i_is_narrow_string (charname)
 862       || scm_i_try_narrow_string (charname))
 863     { SCM ch = scm_i_charname_to_char (scm_i_string_chars (charname),
 864                                        charname_len);
 865       if (scm_is_true (ch))
 866         return ch;
 867     }
 868
 869  char_error:
 870   scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
 871                      scm_list_1 (charname));
 872
 873   return SCM_UNSPECIFIED;
 874 }
 875 #undef FUNC_NAME
 876
 877 static inline SCM
 878 scm_read_keyword (int chr, SCM port)
 879 {
 880   SCM symbol;
 881
 882   /* Read the symbol that comprises the keyword.  Doing this instead of
 883      invoking a specific symbol reader function allows `scm_read_keyword ()'
 884      to adapt to the delimiters currently valid of symbols.
 885
 886      XXX: This implementation allows sloppy syntaxes like `#:  key'.  */
 887   symbol = scm_read_expression (port);
 888   if (!scm_is_symbol (symbol))
 889     scm_i_input_error ("scm_read_keyword", port,
 890                        "keyword prefix `~a' not followed by a symbol: ~s",
 891                        scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
 892
 893   return (scm_symbol_to_keyword (symbol));
 894 }
 895
 896 static inline SCM
 897 scm_read_vector (int chr, SCM port)
 898 {
 899   /* Note: We call `scm_read_sexp ()' rather than READER here in order to
 900      guarantee that it's going to do what we want.  After all, this is an
 901      implementation detail of `scm_read_vector ()', not a desirable
 902      property.  */
 903   return (scm_vector (scm_read_sexp (chr, port)));
 904 }
 905
 906 static inline SCM
 907 scm_read_srfi4_vector (int chr, SCM port)
 908 {
 909   return scm_i_read_array (port, chr);
 910 }
 911
 912 static SCM
 913 scm_read_bytevector (scm_t_wchar chr, SCM port)
 914 {
 915   chr = scm_getc (port);
 916   if (chr != 'u')
 917     goto syntax;
 918
 919   chr = scm_getc (port);
 920   if (chr != '8')
 921     goto syntax;
 922
 923   chr = scm_getc (port);
 924   if (chr != '(')
 925     goto syntax;
 926
 927   return scm_u8_list_to_bytevector (scm_read_sexp (chr, port));
 928
 929  syntax:
 930   scm_i_input_error ("read_bytevector", port,
 931                      "invalid bytevector prefix",
 932                      SCM_MAKE_CHAR (chr));
 933   return SCM_UNSPECIFIED;
 934 }
 935
 936 static SCM
 937 scm_read_guile_bit_vector (scm_t_wchar chr, SCM port)
 938 {
 939   /* Read the `#*10101'-style read syntax for bit vectors in Guile.  This is
 940      terribly inefficient but who cares?  */
 941   SCM s_bits = SCM_EOL;
 942
 943   for (chr = scm_getc (port);
 944        (chr != EOF) && ((chr == '0') || (chr == '1'));
 945        chr = scm_getc (port))
 946     {
 947       s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
 948     }
 949
 950   if (chr != EOF)
 951     scm_ungetc (chr, port);
 952
 953   return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
 954 }
 955
 956 static inline SCM
 957 scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
 958 {
 959   int bang_seen = 0;
 960
 961   /* We can use the get_byte here because there is no need to get the
 962      locale correct when reading comments. This presumes that
 963      hash and exclamation points always represent themselves no
 964      matter what the source encoding is.*/
 965   for (;;)
 966     {
 967       int c = scm_get_byte_or_eof (port);
 968
 969       if (c == EOF)
 970         scm_i_input_error ("skip_block_comment", port,
 971                            "unterminated `#! ... !#' comment", SCM_EOL);
 972
 973       if (c == '!')
 974         bang_seen = 1;
 975       else if (c == '#' && bang_seen)
 976         break;
 977       else
 978         bang_seen = 0;
 979     }
 980
 981   return SCM_UNSPECIFIED;
 982 }
 983
 984 static SCM
 985 scm_read_commented_expression (scm_t_wchar chr, SCM port)
 986 {
 987   scm_t_wchar c;
 988
 989   c = flush_ws (port, (char *) NULL);
 990   if (EOF == c)
 991     scm_i_input_error ("read_commented_expression", port,
 992                        "no expression after #; comment", SCM_EOL);
 993   scm_ungetc (c, port);
 994   scm_read_expression (port);
 995   return SCM_UNSPECIFIED;
 996 }
 997
 998 static SCM
 999 scm_read_extended_symbol (scm_t_wchar chr, SCM port)
1000 {
1001   /* Guile's extended symbol read syntax looks like this:
1002
1003        #{This is all a symbol name}#
1004
1005      So here, CHR is expected to be `{'.  */
1006   int saw_brace = 0, finished = 0;
1007   size_t len = 0;
1008   SCM buf = scm_i_make_string (1024, NULL);
1009
1010   buf = scm_i_string_start_writing (buf);
1011
1012   while ((chr = scm_getc (port)) != EOF)
1013     {
1014       if (saw_brace)
1015         {
1016           if (chr == '#')
1017             {
1018               finished = 1;
1019               break;
1020             }
1021           else
1022             {
1023               saw_brace = 0;
1024               scm_i_string_set_x (buf, len++, '}');
1025               scm_i_string_set_x (buf, len++, chr);
1026             }
1027         }
1028       else if (chr == '}')
1029         saw_brace = 1;
1030       else
1031         scm_i_string_set_x (buf, len++, chr);
1032
1033       if (len >= scm_i_string_length (buf) - 2)
1034         {
1035           scm_i_string_stop_writing ();
1036           SCM addy = scm_i_make_string (1024, NULL);
1037           buf = scm_string_append (scm_list_2 (buf, addy));
1038           len = 0;
1039           buf = scm_i_string_start_writing (buf);
1040         }
1041
1042       if (finished)
1043         break;
1044     }
1045   scm_i_string_stop_writing ();
1046
1047   return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
1048 }
1049
1050
1051 \f
1052 /* Top-level token readers, i.e., dispatchers.  */
1053
1054 static SCM
1055 scm_read_sharp_extension (int chr, SCM port)
1056 {
1057   SCM proc;
1058
1059   proc = scm_get_hash_procedure (chr);
1060   if (scm_is_true (scm_procedure_p (proc)))
1061     {
1062       long line = SCM_LINUM (port);
1063       int column = SCM_COL (port) - 2;
1064       SCM got;
1065
1066       got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
1067       if (!scm_is_eq (got, SCM_UNSPECIFIED))
1068         {
1069           if (SCM_RECORD_POSITIONS_P)
1070             return (recsexpr (got, line, column,
1071                               SCM_FILENAME (port)));
1072           else
1073             return got;
1074         }
1075     }
1076
1077   return SCM_UNSPECIFIED;
1078 }
1079
1080 /* The reader for the sharp `#' character.  It basically dispatches reads
1081    among the above token readers.   */
1082 static SCM
1083 scm_read_sharp (scm_t_wchar chr, SCM port)
1084 #define FUNC_NAME "scm_lreadr"
1085 {
1086   SCM result;
1087
1088   chr = scm_getc (port);
1089
1090   result = scm_read_sharp_extension (chr, port);
1091   if (!scm_is_eq (result, SCM_UNSPECIFIED))
1092     return result;
1093
1094   switch (chr)
1095     {
1096     case '\\':
1097       return (scm_read_character (chr, port));
1098     case '(':
1099       return (scm_read_vector (chr, port));
1100     case 's':
1101     case 'u':
1102     case 'f':
1103       /* This one may return either a boolean or an SRFI-4 vector.  */
1104       return (scm_read_srfi4_vector (chr, port));
1105     case 'v':
1106       return (scm_read_bytevector (chr, port));
1107     case '*':
1108       return (scm_read_guile_bit_vector (chr, port));
1109     case 't':
1110     case 'T':
1111     case 'F':
1112       /* This one may return either a boolean or an SRFI-4 vector.  */
1113       return (scm_read_boolean (chr, port));
1114     case ':':
1115       return (scm_read_keyword (chr, port));
1116     case '0': case '1': case '2': case '3': case '4':
1117     case '5': case '6': case '7': case '8': case '9':
1118     case '@':
1119 #if SCM_ENABLE_DEPRECATED
1120       /* See below for 'i' and 'e'. */
1121     case 'a':
1122     case 'c':
1123     case 'y':
1124     case 'h':
1125     case 'l':
1126 #endif
1127       return (scm_i_read_array (port, chr));
1128
1129     case 'i':
1130     case 'e':
1131 #if SCM_ENABLE_DEPRECATED
1132       {
1133         /* When next char is '(', it really is an old-style
1134            uniform array. */
1135         scm_t_wchar next_c = scm_getc (port);
1136         if (next_c != EOF)
1137           scm_ungetc (next_c, port);
1138         if (next_c == '(')
1139           return scm_i_read_array (port, chr);
1140         /* Fall through. */
1141       }
1142 #endif
1143     case 'b':
1144     case 'B':
1145     case 'o':
1146     case 'O':
1147     case 'd':
1148     case 'D':
1149     case 'x':
1150     case 'X':
1151     case 'I':
1152     case 'E':
1153       return (scm_read_number_and_radix (chr, port));
1154     case '{':
1155       return (scm_read_extended_symbol (chr, port));
1156     case '!':
1157       return (scm_read_scsh_block_comment (chr, port));
1158     case ';':
1159       return (scm_read_commented_expression (chr, port));
1160     case '`':
1161     case '\'':
1162     case ',':
1163       return (scm_read_syntax (chr, port));
1164     default:
1165       result = scm_read_sharp_extension (chr, port);
1166       if (scm_is_eq (result, SCM_UNSPECIFIED))
1167         scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
1168                            scm_list_1 (SCM_MAKE_CHAR (chr)));
1169       else
1170         return result;
1171     }
1172
1173   return SCM_UNSPECIFIED;
1174 }
1175 #undef FUNC_NAME
1176
1177 static SCM
1178 scm_read_expression (SCM port)
1179 #define FUNC_NAME "scm_read_expression"
1180 {
1181   while (1)
1182     {
1183       register scm_t_wchar chr;
1184
1185       chr = scm_getc (port);
1186
1187       switch (chr)
1188         {
1189         case SCM_WHITE_SPACES:
1190         case SCM_LINE_INCREMENTORS:
1191           break;
1192         case ';':
1193           (void) scm_read_semicolon_comment (chr, port);
1194           break;
1195         case '(':
1196           return (scm_read_sexp (chr, port));
1197         case '"':
1198           return (scm_read_string (chr, port));
1199         case '\'':
1200         case '`':
1201         case ',':
1202           return (scm_read_quote (chr, port));
1203         case '#':
1204           {
1205             SCM result;
1206             result = scm_read_sharp (chr, port);
1207             if (scm_is_eq (result, SCM_UNSPECIFIED))
1208               /* We read a comment or some such.  */
1209               break;
1210             else
1211               return result;
1212           }
1213         case ')':
1214           scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
1215           break;
1216         case EOF:
1217           return SCM_EOF_VAL;
1218         case ':':
1219           if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
1220             return scm_symbol_to_keyword (scm_read_expression (port));
1221           /* Fall through.  */
1222
1223         default:
1224           {
1225             if (((chr >= '0') && (chr <= '9'))
1226                 || (strchr ("+-.", chr)))
1227               return (scm_read_number (chr, port));
1228             else
1229               return (scm_read_mixed_case_symbol (chr, port));
1230           }
1231         }
1232     }
1233 }
1234 #undef FUNC_NAME
1235
1236 \f
1237 /* Actual reader.  */
1238
1239 SCM_DEFINE (scm_read, "read", 0, 1, 0,
1240             (SCM port),
1241             "Read an s-expression from the input port @var{port}, or from\n"
1242             "the current input port if @var{port} is not specified.\n"
1243             "Any whitespace before the next token is discarded.")
1244 #define FUNC_NAME s_scm_read
1245 {
1246   int c;
1247
1248   if (SCM_UNBNDP (port))
1249     port = scm_current_input_port ();
1250   SCM_VALIDATE_OPINPORT (1, port);
1251
1252   c = flush_ws (port, (char *) NULL);
1253   if (EOF == c)
1254     return SCM_EOF_VAL;
1255   scm_ungetc (c, port);
1256
1257   return (scm_read_expression (port));
1258 }
1259 #undef FUNC_NAME
1260
1261
1262 \f
1263
1264 /* Used when recording expressions constructed by `scm_read_sharp ()'.  */
1265 static SCM
1266 recsexpr (SCM obj, long line, int column, SCM filename)
1267 {
1268   if (!scm_is_pair(obj)) {
1269     return obj;
1270   } else {
1271     SCM tmp = obj, copy;
1272     /* If this sexpr is visible in the read:sharp source, we want to
1273        keep that information, so only record non-constant cons cells
1274        which haven't previously been read by the reader. */
1275     if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
1276       {
1277         if (SCM_COPY_SOURCE_P)
1278           {
1279             copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
1280                              SCM_UNDEFINED);
1281             while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
1282               {
1283                 SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
1284                                                       line,
1285                                                       column,
1286                                                       filename),
1287                                             SCM_UNDEFINED));
1288                 copy = SCM_CDR (copy);
1289               }
1290             SCM_SETCDR (copy, tmp);
1291           }
1292         else
1293           {
1294             recsexpr (SCM_CAR (obj), line, column, filename);
1295             while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
1296               recsexpr (SCM_CAR (tmp), line, column, filename);
1297             copy = SCM_UNDEFINED;
1298           }
1299         scm_whash_insert (scm_source_whash,
1300                           obj,
1301                           scm_make_srcprops (line,
1302                                              column,
1303                                              filename,
1304                                              copy,
1305                                              SCM_EOL));
1306       }
1307     return obj;
1308   }
1309 }
1310
1311 /* Manipulate the read-hash-procedures alist.  This could be written in
1312    Scheme, but maybe it will also be used by C code during initialisation.  */
1313 SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
1314             (SCM chr, SCM proc),
1315             "Install the procedure @var{proc} for reading expressions\n"
1316             "starting with the character sequence @code{#} and @var{chr}.\n"
1317             "@var{proc} will be called with two arguments:  the character\n"
1318             "@var{chr} and the port to read further data from. The object\n"
1319             "returned will be the return value of @code{read}. \n"
1320             "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1321             )
1322 #define FUNC_NAME s_scm_read_hash_extend
1323 {
1324   SCM this;
1325   SCM prev;
1326
1327   SCM_VALIDATE_CHAR (1, chr);
1328   SCM_ASSERT (scm_is_false (proc)
1329               || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
1330               proc, SCM_ARG2, FUNC_NAME);
1331
1332   /* Check if chr is already in the alist.  */
1333   this = *scm_read_hash_procedures;
1334   prev = SCM_BOOL_F;
1335   while (1)
1336     {
1337       if (scm_is_null (this))
1338         {
1339           /* not found, so add it to the beginning.  */
1340           if (scm_is_true (proc))
1341             {
1342               *scm_read_hash_procedures =
1343                 scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
1344             }
1345           break;
1346         }
1347       if (scm_is_eq (chr, SCM_CAAR (this)))
1348         {
1349           /* already in the alist.  */
1350           if (scm_is_false (proc))
1351             {
1352               /* remove it.  */
1353               if (scm_is_false (prev))
1354                 {
1355                   *scm_read_hash_procedures =
1356                     SCM_CDR (*scm_read_hash_procedures);
1357                 }
1358               else
1359                 scm_set_cdr_x (prev, SCM_CDR (this));
1360             }
1361           else
1362             {
1363               /* replace it.  */
1364               scm_set_cdr_x (SCM_CAR (this), proc);
1365             }
1366           break;
1367         }
1368       prev = this;
1369       this = SCM_CDR (this);
1370     }
1371
1372   return SCM_UNSPECIFIED;
1373 }
1374 #undef FUNC_NAME
1375
1376 /* Recover the read-hash procedure corresponding to char c.  */
1377 static SCM
1378 scm_get_hash_procedure (int c)
1379 {
1380   SCM rest = *scm_read_hash_procedures;
1381
1382   while (1)
1383     {
1384       if (scm_is_null (rest))
1385         return SCM_BOOL_F;
1386
1387       if (SCM_CHAR (SCM_CAAR (rest)) == c)
1388         return SCM_CDAR (rest);
1389
1390       rest = SCM_CDR (rest);
1391     }
1392 }
1393
1394 #define SCM_ENCODING_SEARCH_SIZE (500)
1395
1396 /* Search the first few hundred characters of a file for
1397    an emacs-like coding declaration.  */
1398 char *
1399 scm_scan_for_encoding (SCM port)
1400 {
1401   char header[SCM_ENCODING_SEARCH_SIZE+1];
1402   size_t bytes_read;
1403   char *encoding = NULL;
1404   int utf8_bom = 0;
1405   char *pos;
1406   int i;
1407   int in_comment;
1408
1409   bytes_read = scm_c_read (port, header, SCM_ENCODING_SEARCH_SIZE);
1410   scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
1411
1412   if (bytes_read > 3
1413       && header[0] == '\xef' && header[1] == '\xbb' && header[2] == '\xbf')
1414     utf8_bom = 1;
1415
1416   /* search past "coding[:=]" */
1417   pos = header;
1418   while (1)
1419     {
1420       if ((pos = strstr(pos, "coding")) == NULL)
1421         return NULL;
1422
1423       pos += strlen("coding");
1424       if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
1425           (*pos == ':' || *pos == '='))
1426         {
1427           pos ++;
1428           break;
1429         }
1430     }
1431
1432   /* skip spaces */
1433   while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
1434          (*pos == ' ' || *pos == '\t'))
1435     pos ++;
1436
1437   /* grab the next token */
1438   i = 0;
1439   while (pos + i - header <= SCM_ENCODING_SEARCH_SIZE
1440          && (isalnum(pos[i]) || pos[i] == '_' || pos[i] == '-' || pos[i] == '.'))
1441     i++;
1442
1443   if (i == 0)
1444     return NULL;
1445
1446   encoding = scm_malloc (i+1);
1447   memcpy (encoding, pos, i);
1448   encoding[i] ='\0';
1449   for (i = 0; i < strlen (encoding); i++)
1450     encoding[i] = toupper ((int) encoding[i]);
1451
1452   /* push backwards to make sure we were in a comment */
1453   in_comment = 0;
1454   while (pos - i - header > 0)
1455     {
1456       if (*(pos - i) == '\n')
1457         {
1458           /* This wasn't in a semicolon comment. Check for a
1459            hash-bang comment. */
1460           char *beg = strstr (header, "#!");
1461           char *end = strstr (header, "!#");
1462           if (beg < pos && pos < end)
1463             in_comment = 1;
1464           break;
1465         }
1466       if (*(pos - i) == ';')
1467         {
1468           in_comment = 1;
1469           break;
1470         }
1471       i ++;
1472     }
1473   if (!in_comment)
1474     {
1475       /* This wasn't in a comment */
1476       free (encoding);
1477       return NULL;
1478     }
1479   if (utf8_bom && strcmp(encoding, "UTF-8"))
1480     scm_misc_error (NULL,
1481                     "the port input declares the encoding ~s but is encoded as UTF-8",
1482                     scm_list_1 (scm_from_locale_string (encoding)));
1483
1484   return encoding;
1485 }
1486
1487 SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
1488             (SCM port),
1489             "Scans the port for an EMACS-like character coding declaration\n"
1490             "near the top of the contents of a port with random-acessible contents.\n"
1491             "The coding declaration is of the form\n"
1492             "@code{coding: XXXXX} and must appear in a scheme comment.\n"
1493             "\n"
1494             "Returns a string containing the character encoding of the file\n"
1495             "if a declaration was found, or @code{#f} otherwise.\n")
1496 #define FUNC_NAME s_scm_file_encoding
1497 {
1498   char *enc;
1499   SCM s_enc;
1500
1501   enc = scm_scan_for_encoding (port);
1502   if (enc == NULL)
1503     return SCM_BOOL_F;
1504   else
1505     {
1506       s_enc = scm_from_locale_string (enc);
1507       free (enc);
1508       return s_enc;
1509     }
1510
1511   return SCM_BOOL_F;
1512 }
1513 #undef FUNC_NAME
1514
1515 void
1516 scm_init_read ()
1517 {
1518   scm_read_hash_procedures =
1519     SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));
1520
1521   scm_init_opts (scm_read_options, scm_read_opts);
1522 #include "libguile/read.x"
1523 }
1524
1525 /*
1526   Local Variables:
1527   c-file-style: "gnu"
1528   End:
1529 */