-/* Copyright (C) 1995,1996,1997,1999,2000,2001 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this software; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
- * Boston, MA 02111-1307 USA
- *
- * As a special exception, the Free Software Foundation gives permission
- * for additional uses of the text contained in its release of GUILE.
- *
- * The exception is that, if you link the GUILE library with other files
- * to produce an executable, this does not by itself cause the
- * resulting executable to be covered by the GNU General Public License.
- * Your use of that executable is in no way restricted on account of
- * linking the GUILE library code into it.
- *
- * This exception does not however invalidate any other reasons why
- * the executable file might be covered by the GNU General Public License.
- *
- * This exception applies only to the code released by the
- * Free Software Foundation under the name GUILE. If you copy
- * code from other Free Software Foundation releases into a copy of
- * GUILE, as the General Public License permits, the exception does
- * not apply to the code that you add in this way. To avoid misleading
- * anyone as to the status of such modified files, you must delete
- * this exception notice from them.
- *
- * If you write modifications of your own for GUILE, it is your choice
- * whether to permit this exception to apply to your modifications.
- * If you do not wish that, delete this exception notice. */
-
-/* Software engineering face-lift by Greg J. Badros, 11-Dec-1999,
- gjb@cs.washington.edu, http://www.cs.washington.edu/homes/gjb */
-
-\f
-
-#include <stdio.h>
-#include "libguile/_scm.h"
-#include "libguile/chars.h"
-#include "libguile/eval.h"
-#include "libguile/unif.h"
-#include "libguile/keywords.h"
-#include "libguile/alist.h"
-#include "libguile/srcprop.h"
-#include "libguile/hashtab.h"
-#include "libguile/hash.h"
-#include "libguile/ports.h"
-#include "libguile/root.h"
-#include "libguile/strings.h"
-#include "libguile/vectors.h"
-
-#include "libguile/validate.h"
-#include "libguile/read.h"
-
-\f
-
-SCM_SYMBOL (scm_keyword_prefix, "prefix");
-
-scm_t_option scm_read_opts[] = {
- { SCM_OPTION_BOOLEAN, "copy", 0,
- "Copy source code expressions." },
- { SCM_OPTION_BOOLEAN, "positions", 0,
- "Record positions of source code expressions." },
- { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
- "Convert symbols to lower case."},
- { SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
- "Style of keyword recognition: #f or 'prefix"}
-};
-
-SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
- (SCM setting),
- "Option interface for the read options. Instead of using\n"
- "this procedure directly, use the procedures @code{read-enable},\n"
- "@code{read-disable}, @code{read-set!} and @var{read-options}.")
-#define FUNC_NAME s_scm_read_options
-{
- SCM ans = scm_options (setting,
- scm_read_opts,
- SCM_N_READ_OPTIONS,
- FUNC_NAME);
- if (SCM_COPY_SOURCE_P)
- SCM_RECORD_POSITIONS_P = 1;
- return ans;
-}
-#undef FUNC_NAME
-
-/* An association list mapping extra hash characters to procedures. */
-static SCM *scm_read_hash_procedures;
-
-SCM_DEFINE (scm_read, "read", 0, 1, 0,
- (SCM port),
- "Read an s-expression from the input port @var{port}, or from\n"
- "the current input port if @var{port} is not specified.\n"
- "Any whitespace before the next token is discarded.")
-#define FUNC_NAME s_scm_read
-{
- int c;
- SCM tok_buf, copy;
-
- if (SCM_UNBNDP (port))
- port = scm_cur_inp;
- SCM_VALIDATE_OPINPORT (1,port);
-
- c = scm_flush_ws (port, (char *) NULL);
- if (EOF == c)
- return SCM_EOF_VAL;
- scm_ungetc (c, port);
-
- tok_buf = scm_allocate_string (30);
- return scm_lreadr (&tok_buf, port, ©);
-}
-#undef FUNC_NAME
-
-
-
-char *
-scm_grow_tok_buf (SCM *tok_buf)
-{
- size_t oldlen = SCM_STRING_LENGTH (*tok_buf);
- SCM newstr = scm_allocate_string (2 * oldlen);
- size_t i;
-
- for (i = 0; i != oldlen; ++i)
- SCM_STRING_CHARS (newstr) [i] = SCM_STRING_CHARS (*tok_buf) [i];
-
- *tok_buf = newstr;
- return SCM_STRING_CHARS (newstr);
-}
-
-
-
-int
-scm_flush_ws (SCM port, const char *eoferr)
-{
- register int c;
- while (1)
- switch (c = scm_getc (port))
- {
- case EOF:
- goteof:
- if (eoferr)
- {
- if (!SCM_FALSEP (SCM_FILENAME (port)))
- scm_misc_error (eoferr,
- "end of file in ~A",
- SCM_LIST1 (SCM_FILENAME (port)));
- else
- scm_misc_error (eoferr, "end of file", SCM_EOL);
- }
- return c;
- case ';':
- lp:
- switch (c = scm_getc (port))
- {
- case EOF:
- goto goteof;
- default:
- goto lp;
- case SCM_LINE_INCREMENTORS:
- break;
- }
- break;
- case SCM_LINE_INCREMENTORS:
- case SCM_SINGLE_SPACES:
- case '\t':
- break;
- default:
- return c;
- }
-}
-
-
-
-int
-scm_casei_streq (char *s1, char *s2)
-{
- while (*s1 && *s2)
- if (scm_downcase((int)*s1) != scm_downcase((int)*s2))
- return 0;
- else
- {
- ++s1;
- ++s2;
- }
- return !(*s1 || *s2);
-}
-
-
-/* recsexpr is used when recording expressions
- * constructed by read:sharp.
- */
-#ifndef DEBUG_EXTENSIONS
-#define recsexpr(obj, line, column, filename) (obj)
-#else
-static SCM
-recsexpr (SCM obj, long line, int column, SCM filename)
-{
- if (!SCM_CONSP(obj)) {
- return obj;
- } else {
- SCM tmp = obj, copy;
- /* If this sexpr is visible in the read:sharp source, we want to
- keep that information, so only record non-constant cons cells
- which haven't previously been read by the reader. */
- if (SCM_FALSEP (scm_whash_lookup (scm_source_whash, obj)))
- {
- if (SCM_COPY_SOURCE_P)
- {
- copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
- SCM_UNDEFINED);
- while ((tmp = SCM_CDR (tmp)) && SCM_CONSP (tmp))
- {
- SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
- line,
- column,
- filename),
- SCM_UNDEFINED));
- copy = SCM_CDR (copy);
- }
- SCM_SETCDR (copy, tmp);
- }
- else
- {
- recsexpr (SCM_CAR (obj), line, column, filename);
- while ((tmp = SCM_CDR (tmp)) && SCM_CONSP (tmp))
- recsexpr (SCM_CAR (tmp), line, column, filename);
- copy = SCM_UNDEFINED;
- }
- scm_whash_insert (scm_source_whash,
- obj,
- scm_make_srcprops (line,
- column,
- filename,
- copy,
- SCM_EOL));
- }
- return obj;
- }
-}
-#endif
-
-/* Consume an SCSH-style block comment. Assume that we've already
- read the initial `#!', and eat characters until we get a
- newline/exclamation-point/sharp-sign/newline sequence. */
-
-static void
-skip_scsh_block_comment (SCM port)
-#define FUNC_NAME "skip_scsh_block_comment"
-{
- /* Is this portable? Dear God, spare me from the non-eight-bit
- characters. But is it tasteful? */
- long history = 0;
-
- for (;;)
- {
- int c = scm_getc (port);
-
- if (c == EOF)
- SCM_MISC_ERROR ("unterminated `#! ... !#' comment", SCM_EOL);
- history = ((history << 8) | (c & 0xff)) & 0xffffffff;
-
- /* Were the last four characters read "\n!#\n"? */
- if (history == (('\n' << 24) | ('!' << 16) | ('#' << 8) | '\n'))
- return;
- }
-}
-#undef FUNC_NAME
-
-
-static SCM scm_get_hash_procedure(int c);
-
-static char s_list[]="list";
-
-SCM
-scm_lreadr (SCM *tok_buf,SCM port,SCM *copy)
-#define FUNC_NAME "scm_lreadr"
-{
- int c;
- size_t j;
- SCM p;
-
- tryagain:
- c = scm_flush_ws (port, s_scm_read);
- tryagain_no_flush_ws:
- switch (c)
- {
- case EOF:
- return SCM_EOF_VAL;
-
- case '(':
- return SCM_RECORD_POSITIONS_P
- ? scm_lreadrecparen (tok_buf, port, s_list, copy)
- : scm_lreadparen (tok_buf, port, s_list, copy);
- case ')':
- SCM_MISC_ERROR ("unexpected \")\"", SCM_EOL);
- goto tryagain;
-
- case '\'':
- p = scm_sym_quote;
- goto recquote;
- case '`':
- p = scm_sym_quasiquote;
- goto recquote;
- case ',':
- c = scm_getc (port);
- if ('@' == c)
- p = scm_sym_uq_splicing;
- else
- {
- scm_ungetc (c, port);
- p = scm_sym_unquote;
- }
- recquote:
- p = scm_cons2 (p,
- scm_lreadr (tok_buf, port, copy),
- SCM_EOL);
- if (SCM_RECORD_POSITIONS_P)
- scm_whash_insert (scm_source_whash,
- p,
- scm_make_srcprops (SCM_LINUM (port),
- SCM_COL (port) - 1,
- SCM_FILENAME (port),
- SCM_COPY_SOURCE_P
- ? (*copy = scm_cons2 (SCM_CAR (p),
- SCM_CAR (SCM_CDR (p)),
- SCM_EOL))
- : SCM_UNDEFINED,
- SCM_EOL));
- return p;
- case '#':
- c = scm_getc (port);
-
- {
- /* Check for user-defined hash procedure first, to allow
- overriding of builtin hash read syntaxes. */
- SCM sharp = scm_get_hash_procedure (c);
- if (!SCM_FALSEP (sharp))
- {
- int line = SCM_LINUM (port);
- int column = SCM_COL (port) - 2;
- SCM got;
-
- got = scm_call_2 (sharp, SCM_MAKE_CHAR (c), port);
- if (SCM_EQ_P (got, SCM_UNSPECIFIED))
- goto unkshrp;
- if (SCM_RECORD_POSITIONS_P)
- return *copy = recsexpr (got, line, column,
- SCM_FILENAME (port));
- else
- return got;
- }
- }
- switch (c)
- {
- case '(':
- p = scm_lreadparen (tok_buf, port, "vector", copy);
- return SCM_NULLP (p) ? scm_nullvect : scm_vector (p);
-
- case 't':
- case 'T':
- return SCM_BOOL_T;
- case 'f':
- case 'F':
- return SCM_BOOL_F;
-
- case 'b':
- case 'B':
- case 'o':
- case 'O':
- case 'd':
- case 'D':
- case 'x':
- case 'X':
- case 'i':
- case 'I':
- case 'e':
- case 'E':
- scm_ungetc (c, port);
- c = '#';
- goto num;
-
- case '!':
- /* start of a shell script. Parse as a block comment,
- terminated by !#, just like SCSH. */
- skip_scsh_block_comment (port);
- /* EOF is not an error here */
- c = scm_flush_ws (port, (char *)NULL);
- goto tryagain_no_flush_ws;
-
-#ifdef HAVE_ARRAYS
- case '*':
- j = scm_read_token (c, tok_buf, port, 0);
- p = scm_istr2bve (SCM_STRING_CHARS (*tok_buf) + 1, (long) (j - 1));
- if (!SCM_FALSEP (p))
- return p;
- else
- goto unkshrp;
-#endif
-
- case '{':
- j = scm_read_token (c, tok_buf, port, 1);
- return scm_mem2symbol (SCM_STRING_CHARS (*tok_buf), j);
-
- case '\\':
- c = scm_getc (port);
- j = scm_read_token (c, tok_buf, port, 0);
- if (j == 1)
- return SCM_MAKE_CHAR (c);
- if (c >= '0' && c < '8')
- {
- p = scm_istr2int (SCM_STRING_CHARS (*tok_buf), (long) j, 8);
- if (!SCM_FALSEP (p))
- return SCM_MAKE_CHAR (SCM_INUM (p));
- }
- for (c = 0; c < scm_n_charnames; c++)
- if (scm_charnames[c]
- && (scm_casei_streq (scm_charnames[c], SCM_STRING_CHARS (*tok_buf))))
- return SCM_MAKE_CHAR (scm_charnums[c]);
- SCM_MISC_ERROR ("unknown # object", SCM_EOL);
-
- /* #:SYMBOL is a syntax for keywords supported in all contexts. */
- case ':':
- j = scm_read_token ('-', tok_buf, port, 0);
- p = scm_mem2symbol (SCM_STRING_CHARS (*tok_buf), j);
- return scm_make_keyword_from_dash_symbol (p);
-
- default:
- callshrp:
- {
- SCM sharp = scm_get_hash_procedure (c);
-
- if (!SCM_FALSEP (sharp))
- {
- int line = SCM_LINUM (port);
- int column = SCM_COL (port) - 2;
- SCM got;
-
- got = scm_call_2 (sharp, SCM_MAKE_CHAR (c), port);
- if (SCM_EQ_P (got, SCM_UNSPECIFIED))
- goto unkshrp;
- if (SCM_RECORD_POSITIONS_P)
- return *copy = recsexpr (got, line, column,
- SCM_FILENAME (port));
- else
- return got;
- }
- }
- unkshrp:
- scm_misc_error (s_scm_read, "Unknown # object: ~S",
- SCM_LIST1 (SCM_MAKE_CHAR (c)));
- }
-
- case '"':
- j = 0;
- while ('"' != (c = scm_getc (port)))
- {
- if (c == EOF)
- SCM_MISC_ERROR ("end of file in string constant", SCM_EOL);
-
- while (j + 2 >= SCM_STRING_LENGTH (*tok_buf))
- scm_grow_tok_buf (tok_buf);
-
- if (c == '\\')
- switch (c = scm_getc (port))
- {
- case '\n':
- continue;
- case '0':
- c = '\0';
- break;
- case 'f':
- c = '\f';
- break;
- case 'n':
- c = '\n';
- break;
- case 'r':
- c = '\r';
- break;
- case 't':
- c = '\t';
- break;
- case 'a':
- c = '\007';
- break;
- case 'v':
- c = '\v';
- break;
- }
- SCM_STRING_CHARS (*tok_buf)[j] = c;
- ++j;
- }
- if (j == 0)
- return scm_nullstr;
- SCM_STRING_CHARS (*tok_buf)[j] = 0;
- return scm_mem2string (SCM_STRING_CHARS (*tok_buf), j);
-
- case'0':case '1':case '2':case '3':case '4':
- case '5':case '6':case '7':case '8':case '9':
- case '.':
- case '-':
- case '+':
- num:
- j = scm_read_token (c, tok_buf, port, 0);
- p = scm_istring2number (SCM_STRING_CHARS (*tok_buf), (long) j, 10L);
- if (!SCM_FALSEP (p))
- return p;
- if (c == '#')
- {
- if ((j == 2) && (scm_getc (port) == '('))
- {
- scm_ungetc ('(', port);
- c = SCM_STRING_CHARS (*tok_buf)[1];
- goto callshrp;
- }
- SCM_MISC_ERROR ("unknown # object", SCM_EOL);
- }
- goto tok;
-
- case ':':
- if (SCM_EQ_P (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
- {
- j = scm_read_token ('-', tok_buf, port, 0);
- p = scm_mem2symbol (SCM_STRING_CHARS (*tok_buf), j);
- return scm_make_keyword_from_dash_symbol (p);
- }
- /* fallthrough */
- default:
- j = scm_read_token (c, tok_buf, port, 0);
- /* fallthrough */
-
- tok:
- return scm_mem2symbol (SCM_STRING_CHARS (*tok_buf), j);
- }
-}
-#undef FUNC_NAME
-
-
-#ifdef _UNICOS
-_Pragma ("noopt"); /* # pragma _CRI noopt */
-#endif
-
-size_t
-scm_read_token (int ic, SCM *tok_buf, SCM port, int weird)
-{
- register size_t j;
- register int c;
- register char *p;
-
- c = (SCM_CASE_INSENSITIVE_P ? scm_downcase(ic) : ic);
- p = SCM_STRING_CHARS (*tok_buf);
-
- if (weird)
- j = 0;
- else
- {
- j = 0;
- while (j + 2 >= SCM_STRING_LENGTH (*tok_buf))
- p = scm_grow_tok_buf (tok_buf);
- p[j] = c;
- ++j;
- }
-
- while (1)
- {
- while (j + 2 >= SCM_STRING_LENGTH (*tok_buf))
- p = scm_grow_tok_buf (tok_buf);
- c = scm_getc (port);
- switch (c)
- {
- case '(':
- case ')':
- case '"':
- case ';':
- case SCM_WHITE_SPACES:
- case SCM_LINE_INCREMENTORS:
- if (weird)
- goto default_case;
-
- scm_ungetc (c, port);
- case EOF:
- eof_case:
- p[j] = 0;
- return j;
- case '\\':
- if (!weird)
- goto default_case;
- else
- {
- c = scm_getc (port);
- if (c == EOF)
- goto eof_case;
- else
- goto default_case;
- }
- case '}':
- if (!weird)
- goto default_case;
-
- c = scm_getc (port);
- if (c == '#')
- {
- p[j] = 0;
- return j;
- }
- else
- {
- scm_ungetc (c, port);
- c = '}';
- goto default_case;
- }
-
- default:
- default_case:
- {
- c = (SCM_CASE_INSENSITIVE_P ? scm_downcase(c) : c);
- p[j] = c;
- ++j;
- }
-
- }
- }
-}
-
-#ifdef _UNICOS
-_Pragma ("opt"); /* # pragma _CRI opt */
-#endif
-
-SCM
-scm_lreadparen (SCM *tok_buf, SCM port, char *name, SCM *copy)
-#define FUNC_NAME "scm_lreadparen"
-{
- SCM tmp;
- SCM tl;
- SCM ans;
- int c;
-
- c = scm_flush_ws (port, name);
- if (')' == c)
- return SCM_EOL;
- scm_ungetc (c, port);
- if (SCM_EQ_P (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
- {
- ans = scm_lreadr (tok_buf, port, copy);
- closeit:
- if (')' != (c = scm_flush_ws (port, name)))
- SCM_MISC_ERROR ("missing close paren", SCM_EOL);
- return ans;
- }
- ans = tl = scm_cons (tmp, SCM_EOL);
- while (')' != (c = scm_flush_ws (port, name)))
- {
- scm_ungetc (c, port);
- if (SCM_EQ_P (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
- {
- SCM_SETCDR (tl, scm_lreadr (tok_buf, port, copy));
- goto closeit;
- }
- SCM_SETCDR (tl, scm_cons (tmp, SCM_EOL));
- tl = SCM_CDR (tl);
- }
- return ans;
-}
-#undef FUNC_NAME
-
-
-SCM
-scm_lreadrecparen (SCM *tok_buf, SCM port, char *name, SCM *copy)
-#define FUNC_NAME "scm_lreadrecparen"
-{
- register int c;
- register SCM tmp;
- register SCM tl, tl2 = SCM_EOL;
- SCM ans, ans2 = SCM_EOL;
- /* Need to capture line and column numbers here. */
- int line = SCM_LINUM (port);
- int column = SCM_COL (port) - 1;
-
- c = scm_flush_ws (port, name);
- if (')' == c)
- return SCM_EOL;
- scm_ungetc (c, port);
- if (SCM_EQ_P (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
- {
- ans = scm_lreadr (tok_buf, port, copy);
- if (')' != (c = scm_flush_ws (port, name)))
- SCM_MISC_ERROR ("missing close paren", SCM_EOL);
- return ans;
- }
- /* Build the head of the list structure. */
- ans = tl = scm_cons (tmp, SCM_EOL);
- if (SCM_COPY_SOURCE_P)
- ans2 = tl2 = scm_cons (SCM_CONSP (tmp)
- ? *copy
- : tmp,
- SCM_EOL);
- while (')' != (c = scm_flush_ws (port, name)))
- {
- SCM new_tail;
-
- scm_ungetc (c, port);
- if (SCM_EQ_P (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
- {
- SCM_SETCDR (tl, tmp = scm_lreadr (tok_buf, port, copy));
- if (SCM_COPY_SOURCE_P)
- SCM_SETCDR (tl2, scm_cons (SCM_CONSP (tmp)
- ? *copy
- : tmp,
- SCM_EOL));
- if (')' != (c = scm_flush_ws (port, name)))
- SCM_MISC_ERROR ("missing close paren", SCM_EOL);
- goto exit;
- }
-
- new_tail = scm_cons (tmp, SCM_EOL);
- SCM_SETCDR (tl, new_tail);
- tl = new_tail;
-
- if (SCM_COPY_SOURCE_P)
- {
- SCM new_tail2 = scm_cons (SCM_CONSP (tmp) ? *copy : tmp, SCM_EOL);
- SCM_SETCDR (tl2, new_tail2);
- tl2 = new_tail2;
- }
- }
-exit:
- scm_whash_insert (scm_source_whash,
- ans,
- scm_make_srcprops (line,
- column,
- SCM_FILENAME (port),
- SCM_COPY_SOURCE_P
- ? *copy = ans2
- : SCM_UNDEFINED,
- SCM_EOL));
- return ans;
-}
-#undef FUNC_NAME
-
-
-\f
-
-/* Manipulate the read-hash-procedures alist. This could be written in
- Scheme, but maybe it will also be used by C code during initialisation. */
-SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
- (SCM chr, SCM proc),
- "Install the procedure @var{proc} for reading expressions\n"
- "starting with the character sequence @code{#} and @var{chr}.\n"
- "@var{proc} will be called with two arguments: the character\n"
- "@var{chr} and the port to read further data from. The object\n"
- "returned will be the return value of @code{read}.")
-#define FUNC_NAME s_scm_read_hash_extend
-{
- SCM this;
- SCM prev;
-
- SCM_VALIDATE_CHAR (1, chr);
- SCM_ASSERT (SCM_FALSEP (proc)
- || SCM_EQ_P (scm_procedure_p (proc), SCM_BOOL_T),
- proc, SCM_ARG2, FUNC_NAME);
-
- /* Check if chr is already in the alist. */
- this = *scm_read_hash_procedures;
- prev = SCM_BOOL_F;
- while (1)
- {
- if (SCM_NULLP (this))
- {
- /* not found, so add it to the beginning. */
- if (!SCM_FALSEP (proc))
- {
- *scm_read_hash_procedures =
- scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
- }
- break;
- }
- if (SCM_EQ_P (chr, SCM_CAAR (this)))
- {
- /* already in the alist. */
- if (SCM_FALSEP (proc))
- {
- /* remove it. */
- if (SCM_FALSEP (prev))
- {
- *scm_read_hash_procedures =
- SCM_CDR (*scm_read_hash_procedures);
- }
- else
- scm_set_cdr_x (prev, SCM_CDR (this));
- }
- else
- {
- /* replace it. */
- scm_set_cdr_x (SCM_CAR (this), proc);
- }
- break;
- }
- prev = this;
- this = SCM_CDR (this);
- }
-
- return SCM_UNSPECIFIED;
-}
-#undef FUNC_NAME
-
-/* Recover the read-hash procedure corresponding to char c. */
-static SCM
-scm_get_hash_procedure (int c)
-{
- SCM rest = *scm_read_hash_procedures;
-
- while (1)
- {
- if (SCM_NULLP (rest))
- return SCM_BOOL_F;
-
- if (SCM_CHAR (SCM_CAAR (rest)) == c)
- return SCM_CDAR (rest);
-
- rest = SCM_CDR (rest);
- }
-}
-
-void
-scm_init_read ()
-{
- scm_read_hash_procedures =
- SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));
-
- scm_init_opts (scm_read_options, scm_read_opts, SCM_N_READ_OPTIONS);
-#ifndef SCM_MAGIC_SNARFER
-#include "libguile/read.x"
-#endif
-}
-
-/*
- Local Variables:
- c-file-style: "gnu"
- End:
-*/
+/* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007 Free Software
+ * Foundation, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+\f
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#endif
+
+#include "libguile/_scm.h"
+#include "libguile/chars.h"
+#include "libguile/eval.h"
+#include "libguile/unif.h"
+#include "libguile/keywords.h"
+#include "libguile/alist.h"
+#include "libguile/srcprop.h"
+#include "libguile/hashtab.h"
+#include "libguile/hash.h"
+#include "libguile/ports.h"
+#include "libguile/root.h"
+#include "libguile/strings.h"
+#include "libguile/strports.h"
+#include "libguile/vectors.h"
+#include "libguile/validate.h"
+#include "libguile/srfi-4.h"
+#include "libguile/srfi-13.h"
+
+#include "libguile/read.h"
+#include "libguile/private-options.h"
+
+
+\f
+
+SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
+SCM_SYMBOL (scm_keyword_prefix, "prefix");
+
+scm_t_option scm_read_opts[] = {
+ { SCM_OPTION_BOOLEAN, "copy", 0,
+ "Copy source code expressions." },
+ { SCM_OPTION_BOOLEAN, "positions", 0,
+ "Record positions of source code expressions." },
+ { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
+ "Convert symbols to lower case."},
+ { SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
+ "Style of keyword recognition: #f or 'prefix."},
+#if SCM_ENABLE_ELISP
+ { SCM_OPTION_BOOLEAN, "elisp-vectors", 0,
+ "Support Elisp vector syntax, namely `[...]'."},
+ { SCM_OPTION_BOOLEAN, "elisp-strings", 0,
+ "Support `\\(' and `\\)' in strings."},
+#endif
+ { 0, },
+};
+
+/*
+ Give meaningful error messages for errors
+
+ We use the format
+
+ FILE:LINE:COL: MESSAGE
+ This happened in ....
+
+ This is not standard GNU format, but the test-suite likes the real
+ message to be in front.
+
+ */
+
+
+void
+scm_i_input_error (char const *function,
+ SCM port, const char *message, SCM arg)
+{
+ SCM fn = (scm_is_string (SCM_FILENAME(port))
+ ? SCM_FILENAME(port)
+ : scm_from_locale_string ("#<unknown port>"));
+
+ SCM string_port = scm_open_output_string ();
+ SCM string = SCM_EOL;
+ scm_simple_format (string_port,
+ scm_from_locale_string ("~A:~S:~S: ~A"),
+ scm_list_4 (fn,
+ scm_from_long (SCM_LINUM (port) + 1),
+ scm_from_int (SCM_COL (port) + 1),
+ scm_from_locale_string (message)));
+
+ string = scm_get_output_string (string_port);
+ scm_close_output_port (string_port);
+ scm_error_scm (scm_from_locale_symbol ("read-error"),
+ function? scm_from_locale_string (function) : SCM_BOOL_F,
+ string,
+ arg,
+ SCM_BOOL_F);
+}
+
+
+SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
+ (SCM setting),
+ "Option interface for the read options. Instead of using\n"
+ "this procedure directly, use the procedures @code{read-enable},\n"
+ "@code{read-disable}, @code{read-set!} and @code{read-options}.")
+#define FUNC_NAME s_scm_read_options
+{
+ SCM ans = scm_options (setting,
+ scm_read_opts,
+ FUNC_NAME);
+ if (SCM_COPY_SOURCE_P)
+ SCM_RECORD_POSITIONS_P = 1;
+ return ans;
+}
+#undef FUNC_NAME
+
+/* An association list mapping extra hash characters to procedures. */
+static SCM *scm_read_hash_procedures;
+
+
+\f
+/* Token readers. */
+
+
+/* Size of the C buffer used to read symbols and numbers. */
+#define READER_BUFFER_SIZE 128
+
+/* Size of the C buffer used to read strings. */
+#define READER_STRING_BUFFER_SIZE 512
+
+/* The maximum size of Scheme character names. */
+#define READER_CHAR_NAME_MAX_SIZE 50
+
+
+/* `isblank' is only in C99. */
+#define CHAR_IS_BLANK_(_chr) \
+ (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
+ || ((_chr) == '\f'))
+
+#ifdef MSDOS
+# define CHAR_IS_BLANK(_chr) \
+ ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
+#else
+# define CHAR_IS_BLANK CHAR_IS_BLANK_
+#endif
+
+
+/* R5RS one-character delimiters (see section 7.1.1, ``Lexical
+ structure''). */
+#define CHAR_IS_R5RS_DELIMITER(c) \
+ (CHAR_IS_BLANK (c) \
+ || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
+
+#define CHAR_IS_DELIMITER CHAR_IS_R5RS_DELIMITER
+
+/* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
+ Structure''. */
+#define CHAR_IS_EXPONENT_MARKER(_chr) \
+ (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
+ || ((_chr) == 'd') || ((_chr) == 'l'))
+
+/* An inlinable version of `scm_c_downcase ()'. */
+#define CHAR_DOWNCASE(_chr) \
+ (((_chr) <= UCHAR_MAX) ? tolower (_chr) : (_chr))
+
+
+#ifndef HAVE_STRNCASECMP
+/* XXX: Use Gnulib's `strncasecmp ()'. */
+
+static int
+strncasecmp (const char *s1, const char *s2, size_t len2)
+{
+ while (*s1 && *s2 && len2 > 0)
+ {
+ int c1 = *s1, c2 = *s2;
+
+ if (CHAR_DOWNCASE (c1) != CHAR_DOWNCASE (c2))
+ return 0;
+ else
+ {
+ ++s1;
+ ++s2;
+ --len2;
+ }
+ }
+ return !(*s1 || *s2 || len2 > 0);
+}
+#endif
+
+
+/* Helper function similar to `scm_read_token ()'. Read from PORT until a
+ whitespace is read. Return zero if the whole token could fit in BUF,
+ non-zero otherwise. */
+static inline int
+read_token (SCM port, char *buf, size_t buf_size, size_t *read)
+{
+ *read = 0;
+
+ while (*read < buf_size)
+ {
+ int chr;
+
+ chr = scm_getc (port);
+ chr = (SCM_CASE_INSENSITIVE_P ? CHAR_DOWNCASE (chr) : chr);
+
+ if (chr == EOF)
+ return 0;
+ else if (CHAR_IS_DELIMITER (chr))
+ {
+ scm_ungetc (chr, port);
+ return 0;
+ }
+ else
+ {
+ *buf = (char) chr;
+ buf++, (*read)++;
+ }
+ }
+
+ return 1;
+}
+
+
+/* Skip whitespace from PORT and return the first non-whitespace character
+ read. Raise an error on end-of-file. */
+static int
+flush_ws (SCM port, const char *eoferr)
+{
+ register int c;
+ while (1)
+ switch (c = scm_getc (port))
+ {
+ case EOF:
+ goteof:
+ if (eoferr)
+ {
+ scm_i_input_error (eoferr,
+ port,
+ "end of file",
+ SCM_EOL);
+ }
+ return c;
+
+ case ';':
+ lp:
+ switch (c = scm_getc (port))
+ {
+ case EOF:
+ goto goteof;
+ default:
+ goto lp;
+ case SCM_LINE_INCREMENTORS:
+ break;
+ }
+ break;
+
+ case SCM_LINE_INCREMENTORS:
+ case SCM_SINGLE_SPACES:
+ case '\t':
+ break;
+
+ default:
+ return c;
+ }
+
+ return 0;
+}
+
+
+\f
+/* Token readers. */
+
+static SCM scm_read_expression (SCM port);
+static SCM scm_read_sharp (int chr, SCM port);
+static SCM scm_get_hash_procedure (int c);
+static SCM recsexpr (SCM obj, long line, int column, SCM filename);
+
+
+static SCM
+scm_read_sexp (int chr, SCM port)
+#define FUNC_NAME "scm_i_lreadparen"
+{
+ register int c;
+ register SCM tmp;
+ register SCM tl, ans = SCM_EOL;
+ SCM tl2 = SCM_EOL, ans2 = SCM_EOL, copy = SCM_BOOL_F;;
+ static const int terminating_char = ')';
+
+ /* Need to capture line and column numbers here. */
+ long line = SCM_LINUM (port);
+ int column = SCM_COL (port) - 1;
+
+
+ c = flush_ws (port, FUNC_NAME);
+ if (terminating_char == c)
+ return SCM_EOL;
+
+ scm_ungetc (c, port);
+ if (scm_is_eq (scm_sym_dot,
+ (tmp = scm_read_expression (port))))
+ {
+ ans = scm_read_expression (port);
+ if (terminating_char != (c = flush_ws (port, FUNC_NAME)))
+ scm_i_input_error (FUNC_NAME, port, "missing close paren",
+ SCM_EOL);
+ return ans;
+ }
+
+ /* Build the head of the list structure. */
+ ans = tl = scm_cons (tmp, SCM_EOL);
+
+ if (SCM_COPY_SOURCE_P)
+ ans2 = tl2 = scm_cons (scm_is_pair (tmp)
+ ? copy
+ : tmp,
+ SCM_EOL);
+
+ while (terminating_char != (c = flush_ws (port, FUNC_NAME)))
+ {
+ SCM new_tail;
+
+ scm_ungetc (c, port);
+ if (scm_is_eq (scm_sym_dot,
+ (tmp = scm_read_expression (port))))
+ {
+ SCM_SETCDR (tl, tmp = scm_read_expression (port));
+
+ if (SCM_COPY_SOURCE_P)
+ SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp) ? copy : tmp,
+ SCM_EOL));
+
+ c = flush_ws (port, FUNC_NAME);
+ if (terminating_char != c)
+ scm_i_input_error (FUNC_NAME, port,
+ "in pair: missing close paren", SCM_EOL);
+ goto exit;
+ }
+
+ new_tail = scm_cons (tmp, SCM_EOL);
+ SCM_SETCDR (tl, new_tail);
+ tl = new_tail;
+
+ if (SCM_COPY_SOURCE_P)
+ {
+ SCM new_tail2 = scm_cons (scm_is_pair (tmp)
+ ? copy
+ : tmp, SCM_EOL);
+ SCM_SETCDR (tl2, new_tail2);
+ tl2 = new_tail2;
+ }
+ }
+
+ exit:
+ if (SCM_RECORD_POSITIONS_P)
+ scm_whash_insert (scm_source_whash,
+ ans,
+ scm_make_srcprops (line, column,
+ SCM_FILENAME (port),
+ SCM_COPY_SOURCE_P
+ ? ans2
+ : SCM_UNDEFINED,
+ SCM_EOL));
+ return ans;
+}
+#undef FUNC_NAME
+
+static SCM
+scm_read_string (int chr, SCM port)
+#define FUNC_NAME "scm_lreadr"
+{
+ /* For strings smaller than C_STR, this function creates only one Scheme
+ object (the string returned). */
+
+ SCM str = SCM_BOOL_F;
+ char c_str[READER_STRING_BUFFER_SIZE];
+ unsigned c_str_len = 0;
+ int c;
+
+ while ('"' != (c = scm_getc (port)))
+ {
+ if (c == EOF)
+ str_eof: scm_i_input_error (FUNC_NAME, port,
+ "end of file in string constant",
+ SCM_EOL);
+
+ if (c_str_len + 1 >= sizeof (c_str))
+ {
+ /* Flush the C buffer onto a Scheme string. */
+ SCM addy;
+
+ if (str == SCM_BOOL_F)
+ str = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
+
+ addy = scm_from_locale_stringn (c_str, c_str_len);
+ str = scm_string_append_shared (scm_list_2 (str, addy));
+
+ c_str_len = 0;
+ }
+
+ if (c == '\\')
+ switch (c = scm_getc (port))
+ {
+ case EOF:
+ goto str_eof;
+ case '"':
+ case '\\':
+ break;
+#if SCM_ENABLE_ELISP
+ case '(':
+ case ')':
+ if (SCM_ESCAPED_PARENS_P)
+ break;
+ goto bad_escaped;
+#endif
+ case '\n':
+ continue;
+ case '0':
+ c = '\0';
+ break;
+ case 'f':
+ c = '\f';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ case 'a':
+ c = '\007';
+ break;
+ case 'v':
+ c = '\v';
+ break;
+ case 'x':
+ {
+ int a, b;
+ a = scm_getc (port);
+ if (a == EOF) goto str_eof;
+ b = scm_getc (port);
+ if (b == EOF) goto str_eof;
+ if ('0' <= a && a <= '9') a -= '0';
+ else if ('A' <= a && a <= 'F') a = a - 'A' + 10;
+ else if ('a' <= a && a <= 'f') a = a - 'a' + 10;
+ else goto bad_escaped;
+ if ('0' <= b && b <= '9') b -= '0';
+ else if ('A' <= b && b <= 'F') b = b - 'A' + 10;
+ else if ('a' <= b && b <= 'f') b = b - 'a' + 10;
+ else goto bad_escaped;
+ c = a * 16 + b;
+ break;
+ }
+ default:
+ bad_escaped:
+ scm_i_input_error (FUNC_NAME, port,
+ "illegal character in escape sequence: ~S",
+ scm_list_1 (SCM_MAKE_CHAR (c)));
+ }
+ c_str[c_str_len++] = c;
+ }
+
+ if (c_str_len > 0)
+ {
+ SCM addy;
+
+ addy = scm_from_locale_stringn (c_str, c_str_len);
+ if (str == SCM_BOOL_F)
+ str = addy;
+ else
+ str = scm_string_append_shared (scm_list_2 (str, addy));
+ }
+ else
+ str = (str == SCM_BOOL_F) ? scm_nullstr : str;
+
+ return str;
+}
+#undef FUNC_NAME
+
+
+static SCM
+scm_read_number (int chr, SCM port)
+{
+ SCM result, str = SCM_EOL;
+ char buffer[READER_BUFFER_SIZE];
+ size_t read;
+ int overflow = 0;
+
+ scm_ungetc (chr, port);
+ do
+ {
+ overflow = read_token (port, buffer, sizeof (buffer), &read);
+
+ if ((overflow) || (scm_is_pair (str)))
+ str = scm_cons (scm_from_locale_stringn (buffer, read), str);
+ }
+ while (overflow);
+
+ if (scm_is_pair (str))
+ {
+ /* The slow path. */
+
+ str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
+ result = scm_string_to_number (str, SCM_UNDEFINED);
+ if (!scm_is_true (result))
+ /* Return a symbol instead of a number. */
+ result = scm_string_to_symbol (str);
+ }
+ else
+ {
+ result = scm_c_locale_stringn_to_number (buffer, read, 10);
+ if (!scm_is_true (result))
+ /* Return a symbol instead of a number. */
+ result = scm_from_locale_symboln (buffer, read);
+ }
+
+ return result;
+}
+
+static SCM
+scm_read_mixed_case_symbol (int chr, SCM port)
+{
+ SCM result, str = SCM_EOL;
+ int overflow = 0;
+ char buffer[READER_BUFFER_SIZE];
+ size_t read = 0;
+
+ scm_ungetc (chr, port);
+ do
+ {
+ overflow = read_token (port, buffer, sizeof (buffer), &read);
+
+ if ((overflow) || (scm_is_pair (str)))
+ str = scm_cons (scm_from_locale_stringn (buffer, read), str);
+ }
+ while (overflow);
+
+ if (scm_is_pair (str))
+ {
+ str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
+ result = scm_string_to_symbol (str);
+ }
+ else
+ /* For symbols smaller than `sizeof (buffer)', we don't need to recur to
+ Scheme strings. Therefore, we only create one Scheme object (a
+ symbol) per symbol read. */
+ result = scm_from_locale_symboln (buffer, read);
+
+ return result;
+}
+
+static SCM
+scm_read_number_and_radix (int chr, SCM port)
+#define FUNC_NAME "scm_lreadr"
+{
+ SCM result, str = SCM_EOL;
+ size_t read;
+ char buffer[READER_BUFFER_SIZE];
+ unsigned int radix;
+ int overflow = 0;
+
+ switch (chr)
+ {
+ case 'B':
+ case 'b':
+ radix = 2;
+ break;
+
+ case 'o':
+ case 'O':
+ radix = 8;
+ break;
+
+ case 'd':
+ case 'D':
+ radix = 10;
+ break;
+
+ case 'x':
+ case 'X':
+ radix = 16;
+ break;
+
+ default:
+ scm_ungetc (chr, port);
+ scm_ungetc ('#', port);
+ radix = 10;
+ }
+
+ do
+ {
+ overflow = read_token (port, buffer, sizeof (buffer), &read);
+
+ if ((overflow) || (scm_is_pair (str)))
+ str = scm_cons (scm_from_locale_stringn (buffer, read), str);
+ }
+ while (overflow);
+
+ if (scm_is_pair (str))
+ {
+ str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
+ result = scm_string_to_number (str, scm_from_uint (radix));
+ }
+ else
+ result = scm_c_locale_stringn_to_number (buffer, read, radix);
+
+ if (scm_is_true (result))
+ return result;
+
+ scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
+
+ return SCM_BOOL_F;
+}
+#undef FUNC_NAME
+
+static SCM
+scm_read_quote (int chr, SCM port)
+{
+ SCM p;
+
+ switch (chr)
+ {
+ case '`':
+ p = scm_sym_quasiquote;
+ break;
+
+ case '\'':
+ p = scm_sym_quote;
+ break;
+
+ case ',':
+ {
+ int c;
+
+ c = scm_getc (port);
+ if ('@' == c)
+ p = scm_sym_uq_splicing;
+ else
+ {
+ scm_ungetc (c, port);
+ p = scm_sym_unquote;
+ }
+ break;
+ }
+
+ default:
+ fprintf (stderr, "%s: unhandled quote character (%i)\n",
+ __FUNCTION__, chr);
+ abort ();
+ }
+
+ p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
+
+ return p;
+}
+
+static inline SCM
+scm_read_semicolon_comment (int chr, SCM port)
+{
+ int c;
+
+ for (c = scm_getc (port);
+ (c != EOF) && (c != '\n');
+ c = scm_getc (port));
+
+ return SCM_UNSPECIFIED;
+}
+
+\f
+/* Sharp readers, i.e. readers called after a `#' sign has been read. */
+
+static SCM
+scm_read_boolean (int chr, SCM port)
+{
+ switch (chr)
+ {
+ case 't':
+ case 'T':
+ return SCM_BOOL_T;
+
+ case 'f':
+ case 'F':
+ return SCM_BOOL_F;
+ }
+
+ return SCM_UNSPECIFIED;
+}
+
+static SCM
+scm_read_character (int chr, SCM port)
+#define FUNC_NAME "scm_lreadr"
+{
+ unsigned c;
+ char charname[READER_CHAR_NAME_MAX_SIZE];
+ size_t charname_len;
+
+ if (read_token (port, charname, sizeof (charname), &charname_len))
+ goto char_error;
+
+ if (charname_len == 0)
+ {
+ chr = scm_getc (port);
+ if (chr == EOF)
+ scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
+ "while reading character", SCM_EOL);
+
+ /* CHR must be a token delimiter, like a whitespace. */
+ return (SCM_MAKE_CHAR (chr));
+ }
+
+ if (charname_len == 1)
+ return SCM_MAKE_CHAR (charname[0]);
+
+ if (*charname >= '0' && *charname < '8')
+ {
+ /* Dirk:FIXME:: This type of character syntax is not R5RS
+ * compliant. Further, it should be verified that the constant
+ * does only consist of octal digits. Finally, it should be
+ * checked whether the resulting fixnum is in the range of
+ * characters. */
+ SCM p = scm_c_locale_stringn_to_number (charname, charname_len, 8);
+ if (SCM_I_INUMP (p))
+ return SCM_MAKE_CHAR (SCM_I_INUM (p));
+ }
+
+ for (c = 0; c < scm_n_charnames; c++)
+ if (scm_charnames[c]
+ && (!strncasecmp (scm_charnames[c], charname, charname_len)))
+ return SCM_MAKE_CHAR (scm_charnums[c]);
+
+ char_error:
+ scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
+ scm_list_1 (scm_from_locale_stringn (charname,
+ charname_len)));
+
+ return SCM_UNSPECIFIED;
+}
+#undef FUNC_NAME
+
+static inline SCM
+scm_read_keyword (int chr, SCM port)
+{
+ SCM symbol;
+
+ /* Read the symbol that comprises the keyword. Doing this instead of
+ invoking a specific symbol reader function allows `scm_read_keyword ()'
+ to adapt to the delimiters currently valid of symbols.
+
+ XXX: This implementation allows sloppy syntaxes like `#: key'. */
+ symbol = scm_read_expression (port);
+ if (!scm_is_symbol (symbol))
+ scm_i_input_error (__FUNCTION__, port,
+ "keyword prefix `~a' not followed by a symbol: ~s",
+ scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
+
+ return (scm_symbol_to_keyword (symbol));
+}
+
+static inline SCM
+scm_read_vector (int chr, SCM port)
+{
+ /* Note: We call `scm_read_sexp ()' rather than READER here in order to
+ guarantee that it's going to do what we want. After all, this is an
+ implementation detail of `scm_read_vector ()', not a desirable
+ property. */
+ return (scm_vector (scm_read_sexp (chr, port)));
+}
+
+static inline SCM
+scm_read_srfi4_vector (int chr, SCM port)
+{
+ return scm_i_read_array (port, chr);
+}
+
+static SCM
+scm_read_guile_bit_vector (int chr, SCM port)
+{
+ /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
+ terribly inefficient but who cares? */
+ SCM s_bits = SCM_EOL;
+
+ for (chr = scm_getc (port);
+ (chr != EOF) && ((chr == '0') || (chr == '1'));
+ chr = scm_getc (port))
+ {
+ s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
+ }
+
+ if (chr != EOF)
+ scm_ungetc (chr, port);
+
+ return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
+}
+
+static inline SCM
+scm_read_scsh_block_comment (int chr, SCM port)
+{
+ int bang_seen = 0;
+
+ for (;;)
+ {
+ int c = scm_getc (port);
+
+ if (c == EOF)
+ scm_i_input_error ("skip_block_comment", port,
+ "unterminated `#! ... !#' comment", SCM_EOL);
+
+ if (c == '!')
+ bang_seen = 1;
+ else if (c == '#' && bang_seen)
+ break;
+ else
+ bang_seen = 0;
+ }
+
+ return SCM_UNSPECIFIED;
+}
+
+static SCM
+scm_read_extended_symbol (int chr, SCM port)
+{
+ /* Guile's extended symbol read syntax looks like this:
+
+ #{This is all a symbol name}#
+
+ So here, CHR is expected to be `{'. */
+ SCM result;
+ int saw_brace = 0, finished = 0;
+ size_t len = 0;
+ char buf[1024];
+
+ result = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
+
+ while ((chr = scm_getc (port)) != EOF)
+ {
+ if (saw_brace)
+ {
+ if (chr == '#')
+ {
+ finished = 1;
+ break;
+ }
+ else
+ {
+ saw_brace = 0;
+ buf[len++] = '}';
+ buf[len++] = chr;
+ }
+ }
+ else if (chr == '}')
+ saw_brace = 1;
+ else
+ buf[len++] = chr;
+
+ if (len >= sizeof (buf) - 2)
+ {
+ scm_string_append (scm_list_2 (result,
+ scm_from_locale_stringn (buf, len)));
+ len = 0;
+ }
+
+ if (finished)
+ break;
+ }
+
+ if (len)
+ result = scm_string_append (scm_list_2
+ (result,
+ scm_from_locale_stringn (buf, len)));
+
+ return (scm_string_to_symbol (result));
+}
+
+
+\f
+/* Top-level token readers, i.e., dispatchers. */
+
+static SCM
+scm_read_sharp_extension (int chr, SCM port)
+{
+ SCM proc;
+
+ proc = scm_get_hash_procedure (chr);
+ if (scm_is_true (scm_procedure_p (proc)))
+ {
+ long line = SCM_LINUM (port);
+ int column = SCM_COL (port) - 2;
+ SCM got;
+
+ got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
+ if (!scm_is_eq (got, SCM_UNSPECIFIED))
+ {
+ if (SCM_RECORD_POSITIONS_P)
+ return (recsexpr (got, line, column,
+ SCM_FILENAME (port)));
+ else
+ return got;
+ }
+ }
+
+ return SCM_UNSPECIFIED;
+}
+
+/* The reader for the sharp `#' character. It basically dispatches reads
+ among the above token readers. */
+static SCM
+scm_read_sharp (int chr, SCM port)
+#define FUNC_NAME "scm_lreadr"
+{
+ SCM result;
+
+ chr = scm_getc (port);
+
+ result = scm_read_sharp_extension (chr, port);
+ if (!scm_is_eq (result, SCM_UNSPECIFIED))
+ return result;
+
+ switch (chr)
+ {
+ case '\\':
+ return (scm_read_character (chr, port));
+ case '(':
+ return (scm_read_vector (chr, port));
+ case 's':
+ case 'u':
+ case 'f':
+ /* This one may return either a boolean or an SRFI-4 vector. */
+ return (scm_read_srfi4_vector (chr, port));
+ case '*':
+ return (scm_read_guile_bit_vector (chr, port));
+ case 't':
+ case 'T':
+ case 'F':
+ /* This one may return either a boolean or an SRFI-4 vector. */
+ return (scm_read_boolean (chr, port));
+ case ':':
+ return (scm_read_keyword (chr, port));
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case '@':
+#if SCM_ENABLE_DEPRECATED
+ /* See below for 'i' and 'e'. */
+ case 'a':
+ case 'c':
+ case 'y':
+ case 'h':
+ case 'l':
+#endif
+ return (scm_i_read_array (port, chr));
+
+ case 'i':
+ case 'e':
+#if SCM_ENABLE_DEPRECATED
+ {
+ /* When next char is '(', it really is an old-style
+ uniform array. */
+ int next_c = scm_getc (port);
+ if (next_c != EOF)
+ scm_ungetc (next_c, port);
+ if (next_c == '(')
+ return scm_i_read_array (port, chr);
+ /* Fall through. */
+ }
+#endif
+ case 'b':
+ case 'B':
+ case 'o':
+ case 'O':
+ case 'd':
+ case 'D':
+ case 'x':
+ case 'X':
+ case 'I':
+ case 'E':
+ return (scm_read_number_and_radix (chr, port));
+ case '{':
+ return (scm_read_extended_symbol (chr, port));
+ case '!':
+ return (scm_read_scsh_block_comment (chr, port));
+ default:
+ result = scm_read_sharp_extension (chr, port);
+ if (scm_is_eq (result, SCM_UNSPECIFIED))
+ scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
+ scm_list_1 (SCM_MAKE_CHAR (chr)));
+ else
+ return result;
+ }
+
+ return SCM_UNSPECIFIED;
+}
+#undef FUNC_NAME
+
+static SCM
+scm_read_expression (SCM port)
+#define FUNC_NAME "scm_read_expression"
+{
+ while (1)
+ {
+ register int chr;
+
+ chr = scm_getc (port);
+
+ switch (chr)
+ {
+ case SCM_WHITE_SPACES:
+ case SCM_LINE_INCREMENTORS:
+ break;
+ case ';':
+ (void) scm_read_semicolon_comment (chr, port);
+ break;
+ case '(':
+ return (scm_read_sexp (chr, port));
+ case '"':
+ return (scm_read_string (chr, port));
+ case '\'':
+ case '`':
+ case ',':
+ return (scm_read_quote (chr, port));
+ case '#':
+ {
+ SCM result;
+ result = scm_read_sharp (chr, port);
+ if (scm_is_eq (result, SCM_UNSPECIFIED))
+ /* We read a comment or some such. */
+ break;
+ else
+ return result;
+ }
+ case ')':
+ scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
+ break;
+ case EOF:
+ return SCM_EOF_VAL;
+ case ':':
+ if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
+ return scm_symbol_to_keyword (scm_read_expression (port));
+ /* Fall through. */
+
+ default:
+ {
+ if (((chr >= '0') && (chr <= '9'))
+ || (strchr ("+-.", chr)))
+ return (scm_read_number (chr, port));
+ else
+ return (scm_read_mixed_case_symbol (chr, port));
+ }
+ }
+ }
+}
+#undef FUNC_NAME
+
+\f
+/* Actual reader. */
+
+SCM_DEFINE (scm_read, "read", 0, 1, 0,
+ (SCM port),
+ "Read an s-expression from the input port @var{port}, or from\n"
+ "the current input port if @var{port} is not specified.\n"
+ "Any whitespace before the next token is discarded.")
+#define FUNC_NAME s_scm_read
+{
+ int c;
+
+ if (SCM_UNBNDP (port))
+ port = scm_current_input_port ();
+ SCM_VALIDATE_OPINPORT (1, port);
+
+ c = flush_ws (port, (char *) NULL);
+ if (EOF == c)
+ return SCM_EOF_VAL;
+ scm_ungetc (c, port);
+
+ return (scm_read_expression (port));
+}
+#undef FUNC_NAME
+
+
+\f
+
+/* Used when recording expressions constructed by `scm_read_sharp ()'. */
+static SCM
+recsexpr (SCM obj, long line, int column, SCM filename)
+{
+ if (!scm_is_pair(obj)) {
+ return obj;
+ } else {
+ SCM tmp = obj, copy;
+ /* If this sexpr is visible in the read:sharp source, we want to
+ keep that information, so only record non-constant cons cells
+ which haven't previously been read by the reader. */
+ if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
+ {
+ if (SCM_COPY_SOURCE_P)
+ {
+ copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
+ SCM_UNDEFINED);
+ while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
+ {
+ SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
+ line,
+ column,
+ filename),
+ SCM_UNDEFINED));
+ copy = SCM_CDR (copy);
+ }
+ SCM_SETCDR (copy, tmp);
+ }
+ else
+ {
+ recsexpr (SCM_CAR (obj), line, column, filename);
+ while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
+ recsexpr (SCM_CAR (tmp), line, column, filename);
+ copy = SCM_UNDEFINED;
+ }
+ scm_whash_insert (scm_source_whash,
+ obj,
+ scm_make_srcprops (line,
+ column,
+ filename,
+ copy,
+ SCM_EOL));
+ }
+ return obj;
+ }
+}
+
+/* Manipulate the read-hash-procedures alist. This could be written in
+ Scheme, but maybe it will also be used by C code during initialisation. */
+SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
+ (SCM chr, SCM proc),
+ "Install the procedure @var{proc} for reading expressions\n"
+ "starting with the character sequence @code{#} and @var{chr}.\n"
+ "@var{proc} will be called with two arguments: the character\n"
+ "@var{chr} and the port to read further data from. The object\n"
+ "returned will be the return value of @code{read}. \n"
+ "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
+ )
+#define FUNC_NAME s_scm_read_hash_extend
+{
+ SCM this;
+ SCM prev;
+
+ SCM_VALIDATE_CHAR (1, chr);
+ SCM_ASSERT (scm_is_false (proc)
+ || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
+ proc, SCM_ARG2, FUNC_NAME);
+
+ /* Check if chr is already in the alist. */
+ this = *scm_read_hash_procedures;
+ prev = SCM_BOOL_F;
+ while (1)
+ {
+ if (scm_is_null (this))
+ {
+ /* not found, so add it to the beginning. */
+ if (scm_is_true (proc))
+ {
+ *scm_read_hash_procedures =
+ scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
+ }
+ break;
+ }
+ if (scm_is_eq (chr, SCM_CAAR (this)))
+ {
+ /* already in the alist. */
+ if (scm_is_false (proc))
+ {
+ /* remove it. */
+ if (scm_is_false (prev))
+ {
+ *scm_read_hash_procedures =
+ SCM_CDR (*scm_read_hash_procedures);
+ }
+ else
+ scm_set_cdr_x (prev, SCM_CDR (this));
+ }
+ else
+ {
+ /* replace it. */
+ scm_set_cdr_x (SCM_CAR (this), proc);
+ }
+ break;
+ }
+ prev = this;
+ this = SCM_CDR (this);
+ }
+
+ return SCM_UNSPECIFIED;
+}
+#undef FUNC_NAME
+
+/* Recover the read-hash procedure corresponding to char c. */
+static SCM
+scm_get_hash_procedure (int c)
+{
+ SCM rest = *scm_read_hash_procedures;
+
+ while (1)
+ {
+ if (scm_is_null (rest))
+ return SCM_BOOL_F;
+
+ if (SCM_CHAR (SCM_CAAR (rest)) == c)
+ return SCM_CDAR (rest);
+
+ rest = SCM_CDR (rest);
+ }
+}
+
+void
+scm_init_read ()
+{
+ scm_read_hash_procedures =
+ SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));
+
+ scm_init_opts (scm_read_options, scm_read_opts);
+#include "libguile/read.x"
+}
+
+/*
+ Local Variables:
+ c-file-style: "gnu"
+ End:
+*/