libguile/read.c

   1 /* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008, 2009 Free Software
   2  * Foundation, Inc.
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public License
   6  * as published by the Free Software Foundation; either version 3 of
   7  * the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful, but
  10  * WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with this library; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17  * 02110-1301 USA
  18  */
  19
  20
  21 \f
  22
  23 #ifdef HAVE_CONFIG_H
  24 # include <config.h>
  25 #endif
  26
  27 #include <stdio.h>
  28 #include <ctype.h>
  29 #include <string.h>
  30 #include <unistd.h>
  31 #include <unicase.h>
  32
  33 #include "libguile/_scm.h"
  34 #include "libguile/bytevectors.h"
  35 #include "libguile/chars.h"
  36 #include "libguile/eval.h"
  37 #include "libguile/arrays.h"
  38 #include "libguile/bitvectors.h"
  39 #include "libguile/keywords.h"
  40 #include "libguile/alist.h"
  41 #include "libguile/srcprop.h"
  42 #include "libguile/hashtab.h"
  43 #include "libguile/hash.h"
  44 #include "libguile/ports.h"
  45 #include "libguile/root.h"
  46 #include "libguile/strings.h"
  47 #include "libguile/strports.h"
  48 #include "libguile/vectors.h"
  49 #include "libguile/validate.h"
  50 #include "libguile/srfi-4.h"
  51 #include "libguile/srfi-13.h"
  52
  53 #include "libguile/read.h"
  54 #include "libguile/private-options.h"
  55
  56
  57 \f
  58
  59 SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
  60 SCM_SYMBOL (scm_keyword_prefix, "prefix");
  61 SCM_SYMBOL (scm_keyword_postfix, "postfix");
  62
  63 scm_t_option scm_read_opts[] = {
  64   { SCM_OPTION_BOOLEAN, "copy", 0,
  65     "Copy source code expressions." },
  66   { SCM_OPTION_BOOLEAN, "positions", 0,
  67     "Record positions of source code expressions." },
  68   { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
  69     "Convert symbols to lower case."},
  70   { SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
  71     "Style of keyword recognition: #f, 'prefix or 'postfix."},
  72 #if SCM_ENABLE_ELISP
  73   { SCM_OPTION_BOOLEAN, "elisp-vectors", 0,
  74     "Support Elisp vector syntax, namely `[...]'."},
  75   { SCM_OPTION_BOOLEAN, "elisp-strings", 0,
  76     "Support `\\(' and `\\)' in strings."},
  77 #endif
  78   { 0, },
  79 };
  80
  81 /*
  82   Give meaningful error messages for errors
  83
  84   We use the format
  85
  86   FILE:LINE:COL: MESSAGE
  87   This happened in ....
  88
  89   This is not standard GNU format, but the test-suite likes the real
  90   message to be in front.
  91
  92  */
  93
  94
  95 void
  96 scm_i_input_error (char const *function,
  97                    SCM port, const char *message, SCM arg)
  98 {
  99   SCM fn = (scm_is_string (SCM_FILENAME(port))
 100             ? SCM_FILENAME(port)
 101             : scm_from_locale_string ("#<unknown port>"));
 102
 103   SCM string_port = scm_open_output_string ();
 104   SCM string = SCM_EOL;
 105   scm_simple_format (string_port,
 106                      scm_from_locale_string ("~A:~S:~S: ~A"),
 107                      scm_list_4 (fn,
 108                                  scm_from_long (SCM_LINUM (port) + 1),
 109                                  scm_from_int (SCM_COL (port) + 1),
 110                                  scm_from_locale_string (message)));
 111
 112   string = scm_get_output_string (string_port);
 113   scm_close_output_port (string_port);
 114   scm_error_scm (scm_from_locale_symbol ("read-error"),
 115                  function? scm_from_locale_string (function) : SCM_BOOL_F,
 116                  string,
 117                  arg,
 118                  SCM_BOOL_F);
 119 }
 120
 121
 122 SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
 123             (SCM setting),
 124             "Option interface for the read options. Instead of using\n"
 125             "this procedure directly, use the procedures @code{read-enable},\n"
 126             "@code{read-disable}, @code{read-set!} and @code{read-options}.")
 127 #define FUNC_NAME s_scm_read_options
 128 {
 129   SCM ans = scm_options (setting,
 130                          scm_read_opts,
 131                          FUNC_NAME);
 132   if (SCM_COPY_SOURCE_P)
 133     SCM_RECORD_POSITIONS_P = 1;
 134   return ans;
 135 }
 136 #undef FUNC_NAME
 137
 138 /* An association list mapping extra hash characters to procedures.  */
 139 static SCM *scm_read_hash_procedures;
 140
 141
 142 \f
 143 /* Token readers.  */
 144
 145
 146 /* Size of the C buffer used to read symbols and numbers.  */
 147 #define READER_BUFFER_SIZE            128
 148
 149 /* Size of the C buffer used to read strings.  */
 150 #define READER_STRING_BUFFER_SIZE     512
 151
 152 /* The maximum size of Scheme character names.  */
 153 #define READER_CHAR_NAME_MAX_SIZE      50
 154
 155
 156 /* `isblank' is only in C99.  */
 157 #define CHAR_IS_BLANK_(_chr)                                    \
 158   (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n')      \
 159    || ((_chr) == '\f') || ((_chr) == '\r'))
 160
 161 #ifdef MSDOS
 162 # define CHAR_IS_BLANK(_chr)                    \
 163   ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
 164 #else
 165 # define CHAR_IS_BLANK CHAR_IS_BLANK_
 166 #endif
 167
 168
 169 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
 170    structure'').  */
 171 #define CHAR_IS_R5RS_DELIMITER(c)                               \
 172   (CHAR_IS_BLANK (c)                                            \
 173    || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
 174
 175 #define CHAR_IS_DELIMITER  CHAR_IS_R5RS_DELIMITER
 176
 177 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
 178    Structure''.  */
 179 #define CHAR_IS_EXPONENT_MARKER(_chr)                           \
 180   (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f')        \
 181    || ((_chr) == 'd') || ((_chr) == 'l'))
 182
 183 /* Read an SCSH block comment.  */
 184 static inline SCM scm_read_scsh_block_comment (int chr, SCM port);
 185 static SCM scm_read_commented_expression (int chr, SCM port);
 186
 187 /* Read from PORT until a delimiter (e.g., a whitespace) is read.  Return
 188    zero if the whole token fits in BUF, non-zero otherwise.  */
 189 static inline int
 190 read_token (SCM port, SCM buf, size_t *read)
 191 {
 192   scm_t_wchar chr;
 193   *read = 0;
 194
 195   buf = scm_i_string_start_writing (buf);
 196   while (*read < scm_i_string_length (buf))
 197     {
 198       chr = scm_getc (port);
 199
 200       if (chr == EOF)
 201         {
 202           scm_i_string_stop_writing ();
 203           return 0;
 204         }
 205
 206       chr = (SCM_CASE_INSENSITIVE_P ? uc_tolower (chr) : chr);
 207
 208       if (CHAR_IS_DELIMITER (chr))
 209         {
 210           scm_i_string_stop_writing ();
 211           scm_ungetc (chr, port);
 212           return 0;
 213         }
 214
 215       scm_i_string_set_x (buf, *read, chr);
 216       (*read)++;
 217     }
 218   scm_i_string_stop_writing ();
 219
 220   return 1;
 221 }
 222
 223 static SCM
 224 read_complete_token (SCM port, size_t *read)
 225 {
 226   SCM buffer, str = SCM_EOL;
 227   size_t len;
 228   int overflow;
 229
 230   buffer = scm_i_make_string (READER_BUFFER_SIZE, NULL);
 231   overflow = read_token (port, buffer, read);
 232   if (!overflow)
 233     return scm_i_substring (buffer, 0, *read);
 234
 235   str = scm_string_copy (buffer);
 236   do
 237     {
 238       overflow = read_token (port, buffer, &len);
 239       str = scm_string_append (scm_list_2 (str, buffer));
 240       *read += len;
 241     }
 242   while (overflow);
 243
 244   return scm_i_substring (str, 0, *read);
 245 }
 246
 247 /* Skip whitespace from PORT and return the first non-whitespace character
 248    read.  Raise an error on end-of-file.  */
 249 static int
 250 flush_ws (SCM port, const char *eoferr)
 251 {
 252   register scm_t_wchar c;
 253   while (1)
 254     switch (c = scm_getc (port))
 255       {
 256       case EOF:
 257       goteof:
 258         if (eoferr)
 259           {
 260             scm_i_input_error (eoferr,
 261                                port,
 262                                "end of file",
 263                                SCM_EOL);
 264           }
 265         return c;
 266
 267       case ';':
 268       lp:
 269         switch (c = scm_getc (port))
 270           {
 271           case EOF:
 272             goto goteof;
 273           default:
 274             goto lp;
 275           case SCM_LINE_INCREMENTORS:
 276             break;
 277           }
 278         break;
 279
 280       case '#':
 281         switch (c = scm_getc (port))
 282           {
 283           case EOF:
 284             eoferr = "read_sharp";
 285             goto goteof;
 286           case '!':
 287             scm_read_scsh_block_comment (c, port);
 288             break;
 289           case ';':
 290             scm_read_commented_expression (c, port);
 291             break;
 292           default:
 293             scm_ungetc (c, port);
 294             return '#';
 295           }
 296         break;
 297
 298       case SCM_LINE_INCREMENTORS:
 299       case SCM_SINGLE_SPACES:
 300       case '\t':
 301         break;
 302
 303       default:
 304         return c;
 305       }
 306
 307   return 0;
 308 }
 309
 310
 311 \f
 312 /* Token readers.  */
 313
 314 static SCM scm_read_expression (SCM port);
 315 static SCM scm_read_sharp (int chr, SCM port);
 316 static SCM scm_get_hash_procedure (int c);
 317 static SCM recsexpr (SCM obj, long line, int column, SCM filename);
 318
 319
 320 static SCM
 321 scm_read_sexp (scm_t_wchar chr, SCM port)
 322 #define FUNC_NAME "scm_i_lreadparen"
 323 {
 324   register int c;
 325   register SCM tmp;
 326   register SCM tl, ans = SCM_EOL;
 327   SCM tl2 = SCM_EOL, ans2 = SCM_EOL, copy = SCM_BOOL_F;
 328   static const int terminating_char = ')';
 329
 330   /* Need to capture line and column numbers here. */
 331   long line = SCM_LINUM (port);
 332   int column = SCM_COL (port) - 1;
 333
 334
 335   c = flush_ws (port, FUNC_NAME);
 336   if (terminating_char == c)
 337     return SCM_EOL;
 338
 339   scm_ungetc (c, port);
 340   if (scm_is_eq (scm_sym_dot,
 341                  (tmp = scm_read_expression (port))))
 342     {
 343       ans = scm_read_expression (port);
 344       if (terminating_char != (c = flush_ws (port, FUNC_NAME)))
 345         scm_i_input_error (FUNC_NAME, port, "missing close paren",
 346                            SCM_EOL);
 347       return ans;
 348     }
 349
 350   /* Build the head of the list structure. */
 351   ans = tl = scm_cons (tmp, SCM_EOL);
 352
 353   if (SCM_COPY_SOURCE_P)
 354     ans2 = tl2 = scm_cons (scm_is_pair (tmp)
 355                            ? copy
 356                            : tmp,
 357                            SCM_EOL);
 358
 359   while (terminating_char != (c = flush_ws (port, FUNC_NAME)))
 360     {
 361       SCM new_tail;
 362
 363       scm_ungetc (c, port);
 364       if (scm_is_eq (scm_sym_dot,
 365                      (tmp = scm_read_expression (port))))
 366         {
 367           SCM_SETCDR (tl, tmp = scm_read_expression (port));
 368
 369           if (SCM_COPY_SOURCE_P)
 370             SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp) ? copy : tmp,
 371                                        SCM_EOL));
 372
 373           c = flush_ws (port, FUNC_NAME);
 374           if (terminating_char != c)
 375             scm_i_input_error (FUNC_NAME, port,
 376                                "in pair: missing close paren", SCM_EOL);
 377           goto exit;
 378         }
 379
 380       new_tail = scm_cons (tmp, SCM_EOL);
 381       SCM_SETCDR (tl, new_tail);
 382       tl = new_tail;
 383
 384       if (SCM_COPY_SOURCE_P)
 385         {
 386           SCM new_tail2 = scm_cons (scm_is_pair (tmp)
 387                                     ? copy
 388                                     : tmp, SCM_EOL);
 389           SCM_SETCDR (tl2, new_tail2);
 390           tl2 = new_tail2;
 391         }
 392     }
 393
 394  exit:
 395   if (SCM_RECORD_POSITIONS_P)
 396     scm_whash_insert (scm_source_whash,
 397                       ans,
 398                       scm_make_srcprops (line, column,
 399                                          SCM_FILENAME (port),
 400                                          SCM_COPY_SOURCE_P
 401                                          ? ans2
 402                                          : SCM_UNDEFINED,
 403                                          SCM_EOL));
 404   return ans;
 405 }
 406 #undef FUNC_NAME
 407
 408 static SCM
 409 scm_read_string (int chr, SCM port)
 410 #define FUNC_NAME "scm_lreadr"
 411 {
 412   /* For strings smaller than C_STR, this function creates only one Scheme
 413      object (the string returned).  */
 414
 415   SCM str = SCM_BOOL_F;
 416   unsigned c_str_len = 0;
 417   scm_t_wchar c;
 418
 419   str = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL);
 420   while ('"' != (c = scm_getc (port)))
 421     {
 422       if (c == EOF)
 423         {
 424         str_eof:
 425           scm_i_input_error (FUNC_NAME, port,
 426                              "end of file in string constant", SCM_EOL);
 427         }
 428
 429       if (c_str_len + 1 >= scm_i_string_length (str))
 430         {
 431           SCM addy = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL);
 432
 433           str = scm_string_append (scm_list_2 (str, addy));
 434         }
 435
 436       if (c == '\\')
 437         {
 438           switch (c = scm_getc (port))
 439             {
 440             case EOF:
 441               goto str_eof;
 442             case '"':
 443             case '\\':
 444               break;
 445 #if SCM_ENABLE_ELISP
 446             case '(':
 447             case ')':
 448               if (SCM_ESCAPED_PARENS_P)
 449                 break;
 450               goto bad_escaped;
 451 #endif
 452             case '\n':
 453               continue;
 454             case '0':
 455               c = '\0';
 456               break;
 457             case 'f':
 458               c = '\f';
 459               break;
 460             case 'n':
 461               c = '\n';
 462               break;
 463             case 'r':
 464               c = '\r';
 465               break;
 466             case 't':
 467               c = '\t';
 468               break;
 469             case 'a':
 470               c = '\007';
 471               break;
 472             case 'v':
 473               c = '\v';
 474               break;
 475             case 'x':
 476               {
 477                 scm_t_wchar a, b;
 478                 a = scm_getc (port);
 479                 if (a == EOF)
 480                   goto str_eof;
 481                 b = scm_getc (port);
 482                 if (b == EOF)
 483                   goto str_eof;
 484                 if ('0' <= a && a <= '9')
 485                   a -= '0';
 486                 else if ('A' <= a && a <= 'F')
 487                   a = a - 'A' + 10;
 488                 else if ('a' <= a && a <= 'f')
 489                   a = a - 'a' + 10;
 490                 else
 491                   {
 492                     c = a;
 493                     goto bad_escaped;
 494                   }
 495                 if ('0' <= b && b <= '9')
 496                   b -= '0';
 497                 else if ('A' <= b && b <= 'F')
 498                   b = b - 'A' + 10;
 499                 else if ('a' <= b && b <= 'f')
 500                   b = b - 'a' + 10;
 501                 else
 502                   {
 503                     c = b;
 504                     goto bad_escaped;
 505                   }
 506                 c = a * 16 + b;
 507                 break;
 508               }
 509             case 'u':
 510               {
 511                 scm_t_wchar a;
 512                 int i;
 513                 c = 0;
 514                 for (i = 0; i < 4; i++)
 515                   {
 516                     a = scm_getc (port);
 517                     if (a == EOF)
 518                       goto str_eof;
 519                     if ('0' <= a && a <= '9')
 520                       a -= '0';
 521                     else if ('A' <= a && a <= 'F')
 522                       a = a - 'A' + 10;
 523                     else if ('a' <= a && a <= 'f')
 524                       a = a - 'a' + 10;
 525                     else
 526                       {
 527                         c = a;
 528                         goto bad_escaped;
 529                       }
 530                     c = c * 16 + a;
 531                   }
 532                 break;
 533               }
 534             case 'U':
 535               {
 536                 scm_t_wchar a;
 537                 int i;
 538                 c = 0;
 539                 for (i = 0; i < 6; i++)
 540                   {
 541                     a = scm_getc (port);
 542                     if (a == EOF)
 543                       goto str_eof;
 544                     if ('0' <= a && a <= '9')
 545                       a -= '0';
 546                     else if ('A' <= a && a <= 'F')
 547                       a = a - 'A' + 10;
 548                     else if ('a' <= a && a <= 'f')
 549                       a = a - 'a' + 10;
 550                     else
 551                       {
 552                         c = a;
 553                         goto bad_escaped;
 554                       }
 555                     c = c * 16 + a;
 556                   }
 557                 break;
 558               }
 559             default:
 560             bad_escaped:
 561               scm_i_input_error (FUNC_NAME, port,
 562                                  "illegal character in escape sequence: ~S",
 563                                  scm_list_1 (SCM_MAKE_CHAR (c)));
 564             }
 565         }
 566       str = scm_i_string_start_writing (str);
 567       scm_i_string_set_x (str, c_str_len++, c);
 568       scm_i_string_stop_writing ();
 569     }
 570
 571   if (c_str_len > 0)
 572     {
 573       return scm_i_substring_copy (str, 0, c_str_len);
 574     }
 575
 576   return scm_nullstr;
 577 }
 578 #undef FUNC_NAME
 579
 580
 581 static SCM
 582 scm_read_number (scm_t_wchar chr, SCM port)
 583 {
 584   SCM result;
 585   SCM buffer;
 586   size_t read;
 587
 588   scm_ungetc (chr, port);
 589   buffer = read_complete_token (port, &read);
 590   result = scm_string_to_number (buffer, SCM_UNDEFINED);
 591   if (!scm_is_true (result))
 592     /* Return a symbol instead of a number.  */
 593     result = scm_string_to_symbol (buffer);
 594
 595   return result;
 596 }
 597
 598 static SCM
 599 scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port)
 600 {
 601   SCM result;
 602   int ends_with_colon = 0;
 603   SCM buffer;
 604   size_t read = 0;
 605   int postfix = scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_postfix);
 606
 607   scm_ungetc (chr, port);
 608   buffer = read_complete_token (port, &read);
 609   if (read > 0)
 610     ends_with_colon = scm_i_string_ref (buffer, read - 1) == ':';
 611
 612   if (postfix && ends_with_colon && (read > 1))
 613     result = scm_symbol_to_keyword (scm_string_to_symbol (scm_i_substring (buffer, 0, read - 1)));
 614   else
 615     result = scm_string_to_symbol (buffer);
 616
 617   return result;
 618 }
 619
 620 static SCM
 621 scm_read_number_and_radix (scm_t_wchar chr, SCM port)
 622 #define FUNC_NAME "scm_lreadr"
 623 {
 624   SCM result;
 625   size_t read;
 626   SCM buffer = scm_i_make_string (READER_BUFFER_SIZE, NULL);
 627   unsigned int radix;
 628
 629   switch (chr)
 630     {
 631     case 'B':
 632     case 'b':
 633       radix = 2;
 634       break;
 635
 636     case 'o':
 637     case 'O':
 638       radix = 8;
 639       break;
 640
 641     case 'd':
 642     case 'D':
 643       radix = 10;
 644       break;
 645
 646     case 'x':
 647     case 'X':
 648       radix = 16;
 649       break;
 650
 651     default:
 652       scm_ungetc (chr, port);
 653       scm_ungetc ('#', port);
 654       radix = 10;
 655     }
 656
 657   buffer = read_complete_token (port, &read);
 658   result = scm_string_to_number (buffer, scm_from_uint (radix));
 659
 660   if (scm_is_true (result))
 661     return result;
 662
 663   scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
 664
 665   return SCM_BOOL_F;
 666 }
 667 #undef FUNC_NAME
 668
 669 static SCM
 670 scm_read_quote (int chr, SCM port)
 671 {
 672   SCM p;
 673   long line = SCM_LINUM (port);
 674   int column = SCM_COL (port) - 1;
 675
 676   switch (chr)
 677     {
 678     case '`':
 679       p = scm_sym_quasiquote;
 680       break;
 681
 682     case '\'':
 683       p = scm_sym_quote;
 684       break;
 685
 686     case ',':
 687       {
 688         scm_t_wchar c;
 689
 690         c = scm_getc (port);
 691         if ('@' == c)
 692           p = scm_sym_uq_splicing;
 693         else
 694           {
 695             scm_ungetc (c, port);
 696             p = scm_sym_unquote;
 697           }
 698         break;
 699       }
 700
 701     default:
 702       fprintf (stderr, "%s: unhandled quote character (%i)\n",
 703                "scm_read_quote", chr);
 704       abort ();
 705     }
 706
 707   p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
 708   if (SCM_RECORD_POSITIONS_P)
 709     scm_whash_insert (scm_source_whash, p,
 710                       scm_make_srcprops (line, column,
 711                                          SCM_FILENAME (port),
 712                                          SCM_COPY_SOURCE_P
 713                                          ? (scm_cons2 (SCM_CAR (p),
 714                                                        SCM_CAR (SCM_CDR (p)),
 715                                                        SCM_EOL))
 716                                          : SCM_UNDEFINED,
 717                                          SCM_EOL));
 718
 719
 720   return p;
 721 }
 722
 723 SCM_SYMBOL (sym_syntax, "syntax");
 724 SCM_SYMBOL (sym_quasisyntax, "quasisyntax");
 725 SCM_SYMBOL (sym_unsyntax, "unsyntax");
 726 SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing");
 727
 728 static SCM
 729 scm_read_syntax (int chr, SCM port)
 730 {
 731   SCM p;
 732   long line = SCM_LINUM (port);
 733   int column = SCM_COL (port) - 1;
 734
 735   switch (chr)
 736     {
 737     case '`':
 738       p = sym_quasisyntax;
 739       break;
 740
 741     case '\'':
 742       p = sym_syntax;
 743       break;
 744
 745     case ',':
 746       {
 747         int c;
 748
 749         c = scm_getc (port);
 750         if ('@' == c)
 751           p = sym_unsyntax_splicing;
 752         else
 753           {
 754             scm_ungetc (c, port);
 755             p = sym_unsyntax;
 756           }
 757         break;
 758       }
 759
 760     default:
 761       fprintf (stderr, "%s: unhandled syntax character (%i)\n",
 762                "scm_read_syntax", chr);
 763       abort ();
 764     }
 765
 766   p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
 767   if (SCM_RECORD_POSITIONS_P)
 768     scm_whash_insert (scm_source_whash, p,
 769                       scm_make_srcprops (line, column,
 770                                          SCM_FILENAME (port),
 771                                          SCM_COPY_SOURCE_P
 772                                          ? (scm_cons2 (SCM_CAR (p),
 773                                                        SCM_CAR (SCM_CDR (p)),
 774                                                        SCM_EOL))
 775                                          : SCM_UNDEFINED,
 776                                          SCM_EOL));
 777
 778
 779   return p;
 780 }
 781
 782 static inline SCM
 783 scm_read_semicolon_comment (int chr, SCM port)
 784 {
 785   int c;
 786
 787   /* We use the get_byte here because there is no need to get the
 788      locale correct with comment input. This presumes that newline
 789      always represents itself no matter what the encoding is.  */
 790   for (c = scm_get_byte_or_eof (port);
 791        (c != EOF) && (c != '\n');
 792        c = scm_getc (port));
 793
 794   return SCM_UNSPECIFIED;
 795 }
 796
 797 \f
 798 /* Sharp readers, i.e. readers called after a `#' sign has been read.  */
 799
 800 static SCM
 801 scm_read_boolean (int chr, SCM port)
 802 {
 803   switch (chr)
 804     {
 805     case 't':
 806     case 'T':
 807       return SCM_BOOL_T;
 808
 809     case 'f':
 810     case 'F':
 811       return SCM_BOOL_F;
 812     }
 813
 814   return SCM_UNSPECIFIED;
 815 }
 816
 817 static SCM
 818 scm_read_character (scm_t_wchar chr, SCM port)
 819 #define FUNC_NAME "scm_lreadr"
 820 {
 821   SCM charname = scm_i_make_string (READER_CHAR_NAME_MAX_SIZE, NULL);
 822   size_t charname_len;
 823   scm_t_wchar cp;
 824   int overflow;
 825
 826   overflow = read_token (port, charname, &charname_len);
 827   charname = scm_c_substring (charname, 0, charname_len);
 828
 829   if (overflow)
 830     goto char_error;
 831
 832   if (charname_len == 0)
 833     {
 834       chr = scm_getc (port);
 835       if (chr == EOF)
 836         scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
 837                            "while reading character", SCM_EOL);
 838
 839       /* CHR must be a token delimiter, like a whitespace.  */
 840       return (SCM_MAKE_CHAR (chr));
 841     }
 842
 843   if (charname_len == 1)
 844     return SCM_MAKE_CHAR (scm_i_string_ref (charname, 0));
 845
 846   cp = scm_i_string_ref (charname, 0);
 847   if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2)
 848     return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1));
 849
 850   if (cp >= '0' && cp < '8')
 851     {
 852       /* Dirk:FIXME::  This type of character syntax is not R5RS
 853        * compliant.  Further, it should be verified that the constant
 854        * does only consist of octal digits.  */
 855       SCM p = scm_string_to_number (charname, scm_from_uint (8));
 856       if (SCM_I_INUMP (p))
 857         {
 858           scm_t_wchar c = SCM_I_INUM (p);
 859           if (SCM_IS_UNICODE_CHAR (c))
 860             return SCM_MAKE_CHAR (c);
 861           else
 862             scm_i_input_error (FUNC_NAME, port,
 863                                "out-of-range octal character escape: ~a",
 864                                scm_list_1 (charname));
 865         }
 866     }
 867
 868   /* The names of characters should never have non-Latin1
 869      characters.  */
 870   if (scm_i_is_narrow_string (charname)
 871       || scm_i_try_narrow_string (charname))
 872     { SCM ch = scm_i_charname_to_char (scm_i_string_chars (charname),
 873                                        charname_len);
 874       if (scm_is_true (ch))
 875         return ch;
 876     }
 877
 878  char_error:
 879   scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
 880                      scm_list_1 (charname));
 881
 882   return SCM_UNSPECIFIED;
 883 }
 884 #undef FUNC_NAME
 885
 886 static inline SCM
 887 scm_read_keyword (int chr, SCM port)
 888 {
 889   SCM symbol;
 890
 891   /* Read the symbol that comprises the keyword.  Doing this instead of
 892      invoking a specific symbol reader function allows `scm_read_keyword ()'
 893      to adapt to the delimiters currently valid of symbols.
 894
 895      XXX: This implementation allows sloppy syntaxes like `#:  key'.  */
 896   symbol = scm_read_expression (port);
 897   if (!scm_is_symbol (symbol))
 898     scm_i_input_error ("scm_read_keyword", port,
 899                        "keyword prefix `~a' not followed by a symbol: ~s",
 900                        scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
 901
 902   return (scm_symbol_to_keyword (symbol));
 903 }
 904
 905 static inline SCM
 906 scm_read_vector (int chr, SCM port)
 907 {
 908   /* Note: We call `scm_read_sexp ()' rather than READER here in order to
 909      guarantee that it's going to do what we want.  After all, this is an
 910      implementation detail of `scm_read_vector ()', not a desirable
 911      property.  */
 912   return (scm_vector (scm_read_sexp (chr, port)));
 913 }
 914
 915 static inline SCM
 916 scm_read_srfi4_vector (int chr, SCM port)
 917 {
 918   return scm_i_read_array (port, chr);
 919 }
 920
 921 static SCM
 922 scm_read_bytevector (scm_t_wchar chr, SCM port)
 923 {
 924   chr = scm_getc (port);
 925   if (chr != 'u')
 926     goto syntax;
 927
 928   chr = scm_getc (port);
 929   if (chr != '8')
 930     goto syntax;
 931
 932   chr = scm_getc (port);
 933   if (chr != '(')
 934     goto syntax;
 935
 936   return scm_u8_list_to_bytevector (scm_read_sexp (chr, port));
 937
 938  syntax:
 939   scm_i_input_error ("read_bytevector", port,
 940                      "invalid bytevector prefix",
 941                      SCM_MAKE_CHAR (chr));
 942   return SCM_UNSPECIFIED;
 943 }
 944
 945 static SCM
 946 scm_read_guile_bit_vector (scm_t_wchar chr, SCM port)
 947 {
 948   /* Read the `#*10101'-style read syntax for bit vectors in Guile.  This is
 949      terribly inefficient but who cares?  */
 950   SCM s_bits = SCM_EOL;
 951
 952   for (chr = scm_getc (port);
 953        (chr != EOF) && ((chr == '0') || (chr == '1'));
 954        chr = scm_getc (port))
 955     {
 956       s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
 957     }
 958
 959   if (chr != EOF)
 960     scm_ungetc (chr, port);
 961
 962   return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
 963 }
 964
 965 static inline SCM
 966 scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
 967 {
 968   int bang_seen = 0;
 969
 970   /* We can use the get_byte here because there is no need to get the
 971      locale correct when reading comments. This presumes that
 972      hash and exclamation points always represent themselves no
 973      matter what the source encoding is.*/
 974   for (;;)
 975     {
 976       int c = scm_get_byte_or_eof (port);
 977
 978       if (c == EOF)
 979         scm_i_input_error ("skip_block_comment", port,
 980                            "unterminated `#! ... !#' comment", SCM_EOL);
 981
 982       if (c == '!')
 983         bang_seen = 1;
 984       else if (c == '#' && bang_seen)
 985         break;
 986       else
 987         bang_seen = 0;
 988     }
 989
 990   return SCM_UNSPECIFIED;
 991 }
 992
 993 static SCM
 994 scm_read_commented_expression (scm_t_wchar chr, SCM port)
 995 {
 996   scm_t_wchar c;
 997
 998   c = flush_ws (port, (char *) NULL);
 999   if (EOF == c)
1000     scm_i_input_error ("read_commented_expression", port,
1001                        "no expression after #; comment", SCM_EOL);
1002   scm_ungetc (c, port);
1003   scm_read_expression (port);
1004   return SCM_UNSPECIFIED;
1005 }
1006
1007 static SCM
1008 scm_read_extended_symbol (scm_t_wchar chr, SCM port)
1009 {
1010   /* Guile's extended symbol read syntax looks like this:
1011
1012        #{This is all a symbol name}#
1013
1014      So here, CHR is expected to be `{'.  */
1015   int saw_brace = 0, finished = 0;
1016   size_t len = 0;
1017   SCM buf = scm_i_make_string (1024, NULL);
1018
1019   buf = scm_i_string_start_writing (buf);
1020
1021   while ((chr = scm_getc (port)) != EOF)
1022     {
1023       if (saw_brace)
1024         {
1025           if (chr == '#')
1026             {
1027               finished = 1;
1028               break;
1029             }
1030           else
1031             {
1032               saw_brace = 0;
1033               scm_i_string_set_x (buf, len++, '}');
1034               scm_i_string_set_x (buf, len++, chr);
1035             }
1036         }
1037       else if (chr == '}')
1038         saw_brace = 1;
1039       else
1040         scm_i_string_set_x (buf, len++, chr);
1041
1042       if (len >= scm_i_string_length (buf) - 2)
1043         {
1044           scm_i_string_stop_writing ();
1045           SCM addy = scm_i_make_string (1024, NULL);
1046           buf = scm_string_append (scm_list_2 (buf, addy));
1047           len = 0;
1048           buf = scm_i_string_start_writing (buf);
1049         }
1050
1051       if (finished)
1052         break;
1053     }
1054   scm_i_string_stop_writing ();
1055
1056   return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
1057 }
1058
1059
1060 \f
1061 /* Top-level token readers, i.e., dispatchers.  */
1062
1063 static SCM
1064 scm_read_sharp_extension (int chr, SCM port)
1065 {
1066   SCM proc;
1067
1068   proc = scm_get_hash_procedure (chr);
1069   if (scm_is_true (scm_procedure_p (proc)))
1070     {
1071       long line = SCM_LINUM (port);
1072       int column = SCM_COL (port) - 2;
1073       SCM got;
1074
1075       got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
1076       if (!scm_is_eq (got, SCM_UNSPECIFIED))
1077         {
1078           if (SCM_RECORD_POSITIONS_P)
1079             return (recsexpr (got, line, column,
1080                               SCM_FILENAME (port)));
1081           else
1082             return got;
1083         }
1084     }
1085
1086   return SCM_UNSPECIFIED;
1087 }
1088
1089 /* The reader for the sharp `#' character.  It basically dispatches reads
1090    among the above token readers.   */
1091 static SCM
1092 scm_read_sharp (scm_t_wchar chr, SCM port)
1093 #define FUNC_NAME "scm_lreadr"
1094 {
1095   SCM result;
1096
1097   chr = scm_getc (port);
1098
1099   result = scm_read_sharp_extension (chr, port);
1100   if (!scm_is_eq (result, SCM_UNSPECIFIED))
1101     return result;
1102
1103   switch (chr)
1104     {
1105     case '\\':
1106       return (scm_read_character (chr, port));
1107     case '(':
1108       return (scm_read_vector (chr, port));
1109     case 's':
1110     case 'u':
1111     case 'f':
1112       /* This one may return either a boolean or an SRFI-4 vector.  */
1113       return (scm_read_srfi4_vector (chr, port));
1114     case 'v':
1115       return (scm_read_bytevector (chr, port));
1116     case '*':
1117       return (scm_read_guile_bit_vector (chr, port));
1118     case 't':
1119     case 'T':
1120     case 'F':
1121       /* This one may return either a boolean or an SRFI-4 vector.  */
1122       return (scm_read_boolean (chr, port));
1123     case ':':
1124       return (scm_read_keyword (chr, port));
1125     case '0': case '1': case '2': case '3': case '4':
1126     case '5': case '6': case '7': case '8': case '9':
1127     case '@':
1128 #if SCM_ENABLE_DEPRECATED
1129       /* See below for 'i' and 'e'. */
1130     case 'a':
1131     case 'c':
1132     case 'y':
1133     case 'h':
1134     case 'l':
1135 #endif
1136       return (scm_i_read_array (port, chr));
1137
1138     case 'i':
1139     case 'e':
1140 #if SCM_ENABLE_DEPRECATED
1141       {
1142         /* When next char is '(', it really is an old-style
1143            uniform array. */
1144         scm_t_wchar next_c = scm_getc (port);
1145         if (next_c != EOF)
1146           scm_ungetc (next_c, port);
1147         if (next_c == '(')
1148           return scm_i_read_array (port, chr);
1149         /* Fall through. */
1150       }
1151 #endif
1152     case 'b':
1153     case 'B':
1154     case 'o':
1155     case 'O':
1156     case 'd':
1157     case 'D':
1158     case 'x':
1159     case 'X':
1160     case 'I':
1161     case 'E':
1162       return (scm_read_number_and_radix (chr, port));
1163     case '{':
1164       return (scm_read_extended_symbol (chr, port));
1165     case '!':
1166       return (scm_read_scsh_block_comment (chr, port));
1167     case ';':
1168       return (scm_read_commented_expression (chr, port));
1169     case '`':
1170     case '\'':
1171     case ',':
1172       return (scm_read_syntax (chr, port));
1173     default:
1174       result = scm_read_sharp_extension (chr, port);
1175       if (scm_is_eq (result, SCM_UNSPECIFIED))
1176         scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
1177                            scm_list_1 (SCM_MAKE_CHAR (chr)));
1178       else
1179         return result;
1180     }
1181
1182   return SCM_UNSPECIFIED;
1183 }
1184 #undef FUNC_NAME
1185
1186 static SCM
1187 scm_read_expression (SCM port)
1188 #define FUNC_NAME "scm_read_expression"
1189 {
1190   while (1)
1191     {
1192       register scm_t_wchar chr;
1193
1194       chr = scm_getc (port);
1195
1196       switch (chr)
1197         {
1198         case SCM_WHITE_SPACES:
1199         case SCM_LINE_INCREMENTORS:
1200           break;
1201         case ';':
1202           (void) scm_read_semicolon_comment (chr, port);
1203           break;
1204         case '(':
1205           return (scm_read_sexp (chr, port));
1206         case '"':
1207           return (scm_read_string (chr, port));
1208         case '\'':
1209         case '`':
1210         case ',':
1211           return (scm_read_quote (chr, port));
1212         case '#':
1213           {
1214             SCM result;
1215             result = scm_read_sharp (chr, port);
1216             if (scm_is_eq (result, SCM_UNSPECIFIED))
1217               /* We read a comment or some such.  */
1218               break;
1219             else
1220               return result;
1221           }
1222         case ')':
1223           scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
1224           break;
1225         case EOF:
1226           return SCM_EOF_VAL;
1227         case ':':
1228           if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
1229             return scm_symbol_to_keyword (scm_read_expression (port));
1230           /* Fall through.  */
1231
1232         default:
1233           {
1234             if (((chr >= '0') && (chr <= '9'))
1235                 || (strchr ("+-.", chr)))
1236               return (scm_read_number (chr, port));
1237             else
1238               return (scm_read_mixed_case_symbol (chr, port));
1239           }
1240         }
1241     }
1242 }
1243 #undef FUNC_NAME
1244
1245 \f
1246 /* Actual reader.  */
1247
1248 SCM_DEFINE (scm_read, "read", 0, 1, 0,
1249             (SCM port),
1250             "Read an s-expression from the input port @var{port}, or from\n"
1251             "the current input port if @var{port} is not specified.\n"
1252             "Any whitespace before the next token is discarded.")
1253 #define FUNC_NAME s_scm_read
1254 {
1255   int c;
1256
1257   if (SCM_UNBNDP (port))
1258     port = scm_current_input_port ();
1259   SCM_VALIDATE_OPINPORT (1, port);
1260
1261   c = flush_ws (port, (char *) NULL);
1262   if (EOF == c)
1263     return SCM_EOF_VAL;
1264   scm_ungetc (c, port);
1265
1266   return (scm_read_expression (port));
1267 }
1268 #undef FUNC_NAME
1269
1270
1271 \f
1272
1273 /* Used when recording expressions constructed by `scm_read_sharp ()'.  */
1274 static SCM
1275 recsexpr (SCM obj, long line, int column, SCM filename)
1276 {
1277   if (!scm_is_pair(obj)) {
1278     return obj;
1279   } else {
1280     SCM tmp = obj, copy;
1281     /* If this sexpr is visible in the read:sharp source, we want to
1282        keep that information, so only record non-constant cons cells
1283        which haven't previously been read by the reader. */
1284     if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
1285       {
1286         if (SCM_COPY_SOURCE_P)
1287           {
1288             copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
1289                              SCM_UNDEFINED);
1290             while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
1291               {
1292                 SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
1293                                                       line,
1294                                                       column,
1295                                                       filename),
1296                                             SCM_UNDEFINED));
1297                 copy = SCM_CDR (copy);
1298               }
1299             SCM_SETCDR (copy, tmp);
1300           }
1301         else
1302           {
1303             recsexpr (SCM_CAR (obj), line, column, filename);
1304             while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
1305               recsexpr (SCM_CAR (tmp), line, column, filename);
1306             copy = SCM_UNDEFINED;
1307           }
1308         scm_whash_insert (scm_source_whash,
1309                           obj,
1310                           scm_make_srcprops (line,
1311                                              column,
1312                                              filename,
1313                                              copy,
1314                                              SCM_EOL));
1315       }
1316     return obj;
1317   }
1318 }
1319
1320 /* Manipulate the read-hash-procedures alist.  This could be written in
1321    Scheme, but maybe it will also be used by C code during initialisation.  */
1322 SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
1323             (SCM chr, SCM proc),
1324             "Install the procedure @var{proc} for reading expressions\n"
1325             "starting with the character sequence @code{#} and @var{chr}.\n"
1326             "@var{proc} will be called with two arguments:  the character\n"
1327             "@var{chr} and the port to read further data from. The object\n"
1328             "returned will be the return value of @code{read}. \n"
1329             "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1330             )
1331 #define FUNC_NAME s_scm_read_hash_extend
1332 {
1333   SCM this;
1334   SCM prev;
1335
1336   SCM_VALIDATE_CHAR (1, chr);
1337   SCM_ASSERT (scm_is_false (proc)
1338               || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
1339               proc, SCM_ARG2, FUNC_NAME);
1340
1341   /* Check if chr is already in the alist.  */
1342   this = *scm_read_hash_procedures;
1343   prev = SCM_BOOL_F;
1344   while (1)
1345     {
1346       if (scm_is_null (this))
1347         {
1348           /* not found, so add it to the beginning.  */
1349           if (scm_is_true (proc))
1350             {
1351               *scm_read_hash_procedures =
1352                 scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
1353             }
1354           break;
1355         }
1356       if (scm_is_eq (chr, SCM_CAAR (this)))
1357         {
1358           /* already in the alist.  */
1359           if (scm_is_false (proc))
1360             {
1361               /* remove it.  */
1362               if (scm_is_false (prev))
1363                 {
1364                   *scm_read_hash_procedures =
1365                     SCM_CDR (*scm_read_hash_procedures);
1366                 }
1367               else
1368                 scm_set_cdr_x (prev, SCM_CDR (this));
1369             }
1370           else
1371             {
1372               /* replace it.  */
1373               scm_set_cdr_x (SCM_CAR (this), proc);
1374             }
1375           break;
1376         }
1377       prev = this;
1378       this = SCM_CDR (this);
1379     }
1380
1381   return SCM_UNSPECIFIED;
1382 }
1383 #undef FUNC_NAME
1384
1385 /* Recover the read-hash procedure corresponding to char c.  */
1386 static SCM
1387 scm_get_hash_procedure (int c)
1388 {
1389   SCM rest = *scm_read_hash_procedures;
1390
1391   while (1)
1392     {
1393       if (scm_is_null (rest))
1394         return SCM_BOOL_F;
1395
1396       if (SCM_CHAR (SCM_CAAR (rest)) == c)
1397         return SCM_CDAR (rest);
1398
1399       rest = SCM_CDR (rest);
1400     }
1401 }
1402
1403 #define SCM_ENCODING_SEARCH_SIZE (500)
1404
1405 /* Search the first few hundred characters of a file for
1406    an emacs-like coding declaration.  */
1407 char *
1408 scm_scan_for_encoding (SCM port)
1409 {
1410   char header[SCM_ENCODING_SEARCH_SIZE+1];
1411   size_t bytes_read;
1412   char *encoding = NULL;
1413   int utf8_bom = 0;
1414   char *pos;
1415   int i;
1416   int in_comment;
1417
1418   bytes_read = scm_c_read (port, header, SCM_ENCODING_SEARCH_SIZE);
1419   scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
1420
1421   if (bytes_read > 3
1422       && header[0] == '\xef' && header[1] == '\xbb' && header[2] == '\xbf')
1423     utf8_bom = 1;
1424
1425   /* search past "coding[:=]" */
1426   pos = header;
1427   while (1)
1428     {
1429       if ((pos = strstr(pos, "coding")) == NULL)
1430         return NULL;
1431
1432       pos += strlen("coding");
1433       if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
1434           (*pos == ':' || *pos == '='))
1435         {
1436           pos ++;
1437           break;
1438         }
1439     }
1440
1441   /* skip spaces */
1442   while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
1443          (*pos == ' ' || *pos == '\t'))
1444     pos ++;
1445
1446   /* grab the next token */
1447   i = 0;
1448   while (pos + i - header <= SCM_ENCODING_SEARCH_SIZE
1449          && (isalnum((int) pos[i]) || pos[i] == '_' || pos[i] == '-'
1450              || pos[i] == '.'))
1451     i++;
1452
1453   if (i == 0)
1454     return NULL;
1455
1456   encoding = scm_malloc (i+1);
1457   memcpy (encoding, pos, i);
1458   encoding[i] ='\0';
1459   for (i = 0; i < strlen (encoding); i++)
1460     encoding[i] = toupper ((int) encoding[i]);
1461
1462   /* push backwards to make sure we were in a comment */
1463   in_comment = 0;
1464   while (pos - i - header > 0)
1465     {
1466       if (*(pos - i) == '\n')
1467         {
1468           /* This wasn't in a semicolon comment. Check for a
1469            hash-bang comment. */
1470           char *beg = strstr (header, "#!");
1471           char *end = strstr (header, "!#");
1472           if (beg < pos && pos < end)
1473             in_comment = 1;
1474           break;
1475         }
1476       if (*(pos - i) == ';')
1477         {
1478           in_comment = 1;
1479           break;
1480         }
1481       i ++;
1482     }
1483   if (!in_comment)
1484     {
1485       /* This wasn't in a comment */
1486       free (encoding);
1487       return NULL;
1488     }
1489   if (utf8_bom && strcmp(encoding, "UTF-8"))
1490     scm_misc_error (NULL,
1491                     "the port input declares the encoding ~s but is encoded as UTF-8",
1492                     scm_list_1 (scm_from_locale_string (encoding)));
1493
1494   return encoding;
1495 }
1496
1497 SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
1498             (SCM port),
1499             "Scans the port for an EMACS-like character coding declaration\n"
1500             "near the top of the contents of a port with random-acessible contents.\n"
1501             "The coding declaration is of the form\n"
1502             "@code{coding: XXXXX} and must appear in a scheme comment.\n"
1503             "\n"
1504             "Returns a string containing the character encoding of the file\n"
1505             "if a declaration was found, or @code{#f} otherwise.\n")
1506 #define FUNC_NAME s_scm_file_encoding
1507 {
1508   char *enc;
1509   SCM s_enc;
1510
1511   enc = scm_scan_for_encoding (port);
1512   if (enc == NULL)
1513     return SCM_BOOL_F;
1514   else
1515     {
1516       s_enc = scm_from_locale_string (enc);
1517       free (enc);
1518       return s_enc;
1519     }
1520
1521   return SCM_BOOL_F;
1522 }
1523 #undef FUNC_NAME
1524
1525 void
1526 scm_init_read ()
1527 {
1528   scm_read_hash_procedures =
1529     SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));
1530
1531   scm_init_opts (scm_read_options, scm_read_opts);
1532 #include "libguile/read.x"
1533 }
1534
1535 /*
1536   Local Variables:
1537   c-file-style: "gnu"
1538   End:
1539 */