-/* Copyright (C) 1995,1996,1997, 1999, 2000 Free Software Foundation, Inc.
+/* Copyright (C) 1995,1996,1997,1999,2000,2001 Free Software Foundation, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* whether to permit this exception to apply to your modifications.
* If you do not wish that, delete this exception notice. */
-/* Software engineering face-lift by Greg J. Badros, 11-Dec-1999,
- gjb@cs.washington.edu, http://www.cs.washington.edu/homes/gjb */
\f
#include "libguile/ports.h"
#include "libguile/root.h"
#include "libguile/strings.h"
+#include "libguile/strports.h"
#include "libguile/vectors.h"
-
#include "libguile/validate.h"
+
#include "libguile/read.h"
\f
SCM_SYMBOL (scm_keyword_prefix, "prefix");
-scm_option scm_read_opts[] = {
+scm_t_option scm_read_opts[] = {
{ SCM_OPTION_BOOLEAN, "copy", 0,
"Copy source code expressions." },
{ SCM_OPTION_BOOLEAN, "positions", 0,
{ SCM_OPTION_BOOLEAN, "case-insensitive", 0,
"Convert symbols to lower case."},
{ SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
- "Style of keyword recognition: #f or 'prefix"}
+ "Style of keyword recognition: #f or 'prefix."}
};
+/*
+ Give meaningful error messages for errors
+
+ We use the format
+
+ FILE:LINE:COL: MESSAGE
+ This happened in ....
+
+ This is not standard GNU format, but the test-suite likes the real
+ message to be in front.
+
+ */
+
+
+static void
+scm_input_error(char const * function,
+ SCM port, const char * message, SCM arg)
+{
+ char *fn = SCM_STRINGP (SCM_FILENAME(port))
+ ? SCM_STRING_CHARS(SCM_FILENAME(port))
+ : "#<unknown port>";
+
+ SCM string_port = scm_open_output_string ();
+ SCM string = SCM_EOL;
+ scm_simple_format (string_port,
+ scm_makfrom0str ("~A:~S:~S: ~A"),
+ scm_list_4 (scm_makfrom0str (fn),
+ scm_int2num (SCM_LINUM (port) + 1),
+ scm_int2num (SCM_COL (port) + 1),
+ scm_makfrom0str (message)));
+
+
+ string = scm_get_output_string (string_port);
+ scm_close_output_port (string_port);
+ scm_error_scm (scm_str2symbol ("read-error"),
+ scm_makfrom0str (function),
+ string,
+ SCM_EOL,
+ SCM_BOOL_F);
+}
+
+
SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
(SCM setting),
-"")
+ "Option interface for the read options. Instead of using\n"
+ "this procedure directly, use the procedures @code{read-enable},\n"
+ "@code{read-disable}, @code{read-set!} and @code{read-options}.")
#define FUNC_NAME s_scm_read_options
{
SCM ans = scm_options (setting,
SCM_DEFINE (scm_read, "read", 0, 1, 0,
(SCM port),
-"")
+ "Read an s-expression from the input port @var{port}, or from\n"
+ "the current input port if @var{port} is not specified.\n"
+ "Any whitespace before the next token is discarded.")
#define FUNC_NAME s_scm_read
{
int c;
if (SCM_UNBNDP (port))
port = scm_cur_inp;
- SCM_VALIDATE_OPINPORT (1,port);
+ SCM_VALIDATE_OPINPORT (1, port);
c = scm_flush_ws (port, (char *) NULL);
if (EOF == c)
return SCM_EOF_VAL;
scm_ungetc (c, port);
- tok_buf = scm_makstr (30L, 0);
+ tok_buf = scm_allocate_string (30);
return scm_lreadr (&tok_buf, port, ©);
}
#undef FUNC_NAME
char *
scm_grow_tok_buf (SCM *tok_buf)
{
- scm_vector_set_length_x (*tok_buf, SCM_MAKINUM (2 * SCM_LENGTH (*tok_buf)));
- return SCM_CHARS (*tok_buf);
+ size_t oldlen = SCM_STRING_LENGTH (*tok_buf);
+ SCM newstr = scm_allocate_string (2 * oldlen);
+ size_t i;
+
+ for (i = 0; i != oldlen; ++i)
+ SCM_STRING_CHARS (newstr) [i] = SCM_STRING_CHARS (*tok_buf) [i];
+
+ *tok_buf = newstr;
+ return SCM_STRING_CHARS (newstr);
}
case EOF:
goteof:
if (eoferr)
- scm_wta (SCM_UNDEFINED, "end of file in ", eoferr);
+ {
+ scm_input_error (eoferr,
+ port,
+ "end of file",
+ SCM_EOL);
+ }
return c;
case ';':
lp:
#define recsexpr(obj, line, column, filename) (obj)
#else
static SCM
-recsexpr (SCM obj,int line,int column,SCM filename)
+recsexpr (SCM obj, long line, int column, SCM filename)
{
if (!SCM_CONSP(obj)) {
return obj;
static void
skip_scsh_block_comment (SCM port)
+#define FUNC_NAME "skip_scsh_block_comment"
{
/* Is this portable? Dear God, spare me from the non-eight-bit
characters. But is it tasteful? */
int c = scm_getc (port);
if (c == EOF)
- scm_wta (SCM_UNDEFINED,
- "unterminated `#! ... !#' comment", "read");
+ SCM_MISC_ERROR ("unterminated `#! ... !#' comment", SCM_EOL);
history = ((history << 8) | (c & 0xff)) & 0xffffffff;
/* Were the last four characters read "\n!#\n"? */
return;
}
}
+#undef FUNC_NAME
+
static SCM scm_get_hash_procedure(int c);
static char s_list[]="list";
SCM
-scm_lreadr (SCM *tok_buf,SCM port,SCM *copy)
+scm_lreadr (SCM *tok_buf, SCM port, SCM *copy)
+#define FUNC_NAME "scm_lreadr"
{
int c;
- scm_sizet j;
+ size_t j;
SCM p;
-tryagain:
+ tryagain:
c = scm_flush_ws (port, s_scm_read);
-tryagain_no_flush_ws:
+ tryagain_no_flush_ws:
switch (c)
{
case EOF:
case '(':
return SCM_RECORD_POSITIONS_P
- ? scm_lreadrecparen (tok_buf, port, s_list, copy)
- : scm_lreadparen (tok_buf, port, s_list, copy);
+ ? scm_lreadrecparen (tok_buf, port, s_list, copy)
+ : scm_lreadparen (tok_buf, port, s_list, copy SCM_ELISP_CLOSE);
case ')':
- scm_wta (SCM_UNDEFINED, "unexpected \")\"", "read");
+ scm_input_error (FUNC_NAME, port,"unexpected \")\"", SCM_EOL);
goto tryagain;
+#ifdef SCM_ELISP_READ_EXTENSIONS
+ case '[':
+ p = scm_lreadparen (tok_buf, port, "vector", copy, ']');
+ return SCM_NULLP (p) ? scm_nullvect : scm_vector (p);
+#endif
case '\'':
p = scm_sym_quote;
goto recquote;
return p;
case '#':
c = scm_getc (port);
+
+ {
+ /* Check for user-defined hash procedure first, to allow
+ overriding of builtin hash read syntaxes. */
+ SCM sharp = scm_get_hash_procedure (c);
+ if (!SCM_FALSEP (sharp))
+ {
+ int line = SCM_LINUM (port);
+ int column = SCM_COL (port) - 2;
+ SCM got;
+
+ got = scm_call_2 (sharp, SCM_MAKE_CHAR (c), port);
+ if (SCM_EQ_P (got, SCM_UNSPECIFIED))
+ goto handle_sharp;
+ if (SCM_RECORD_POSITIONS_P)
+ return *copy = recsexpr (got, line, column,
+ SCM_FILENAME (port));
+ else
+ return got;
+ }
+ }
+ handle_sharp:
switch (c)
{
case '(':
- p = scm_lreadparen (tok_buf, port, "vector", copy);
+ p = scm_lreadparen (tok_buf, port, "vector", copy SCM_ELISP_CLOSE);
return SCM_NULLP (p) ? scm_nullvect : scm_vector (p);
case 't':
c = scm_flush_ws (port, (char *)NULL);
goto tryagain_no_flush_ws;
-#ifdef HAVE_ARRAYS
+#ifdef SCM_HAVE_ARRAYS
case '*':
j = scm_read_token (c, tok_buf, port, 0);
- p = scm_istr2bve (SCM_CHARS (*tok_buf) + 1, (long) (j - 1));
- if (SCM_NFALSEP (p))
+ p = scm_istr2bve (SCM_STRING_CHARS (*tok_buf) + 1, (long) (j - 1));
+ if (!SCM_FALSEP (p))
return p;
else
goto unkshrp;
case '{':
j = scm_read_token (c, tok_buf, port, 1);
- p = scm_intern (SCM_CHARS (*tok_buf), j);
- return SCM_CAR (p);
+ return scm_mem2symbol (SCM_STRING_CHARS (*tok_buf), j);
case '\\':
c = scm_getc (port);
return SCM_MAKE_CHAR (c);
if (c >= '0' && c < '8')
{
- p = scm_istr2int (SCM_CHARS (*tok_buf), (long) j, 8);
- if (SCM_NFALSEP (p))
+ /* Dirk:FIXME:: This type of character syntax is not R5RS
+ * compliant. Further, it should be verified that the constant
+ * does only consist of octal digits. Finally, it should be
+ * checked whether the resulting fixnum is in the range of
+ * characters. */
+ p = scm_i_mem2number (SCM_STRING_CHARS (*tok_buf), j, 8);
+ if (SCM_INUMP (p))
return SCM_MAKE_CHAR (SCM_INUM (p));
}
for (c = 0; c < scm_n_charnames; c++)
if (scm_charnames[c]
- && (scm_casei_streq (scm_charnames[c], SCM_CHARS (*tok_buf))))
+ && (scm_casei_streq (scm_charnames[c], SCM_STRING_CHARS (*tok_buf))))
return SCM_MAKE_CHAR (scm_charnums[c]);
- scm_wta (SCM_UNDEFINED, "unknown # object: #\\", SCM_CHARS (*tok_buf));
+ scm_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
/* #:SYMBOL is a syntax for keywords supported in all contexts. */
case ':':
j = scm_read_token ('-', tok_buf, port, 0);
- p = scm_intern (SCM_CHARS (*tok_buf), j);
- return scm_make_keyword_from_dash_symbol (SCM_CAR (p));
+ p = scm_mem2symbol (SCM_STRING_CHARS (*tok_buf), j);
+ return scm_make_keyword_from_dash_symbol (p);
default:
callshrp:
{
SCM sharp = scm_get_hash_procedure (c);
- if (SCM_NIMP (sharp))
+ if (!SCM_FALSEP (sharp))
{
int line = SCM_LINUM (port);
int column = SCM_COL (port) - 2;
SCM got;
- got = scm_apply (sharp,
- SCM_MAKE_CHAR (c),
- scm_acons (port, SCM_EOL, SCM_EOL));
+ got = scm_call_2 (sharp, SCM_MAKE_CHAR (c), port);
if (SCM_EQ_P (got, SCM_UNSPECIFIED))
goto unkshrp;
if (SCM_RECORD_POSITIONS_P)
}
}
unkshrp:
- scm_misc_error (s_scm_read, "Unknown # object: ~S",
- scm_listify (SCM_MAKE_CHAR (c), SCM_UNDEFINED));
+ scm_input_error (FUNC_NAME, port, "Unknown # object: ~S",
+ scm_list_1 (SCM_MAKE_CHAR (c)));
}
case '"':
j = 0;
while ('"' != (c = scm_getc (port)))
{
- SCM_ASSERT (EOF != c, SCM_UNDEFINED, "end of file in ", "string");
+ if (c == EOF)
+ scm_input_error (FUNC_NAME, port, "end of file in string constant", SCM_EOL);
- while (j + 2 >= SCM_LENGTH (*tok_buf))
+ while (j + 2 >= SCM_STRING_LENGTH (*tok_buf))
scm_grow_tok_buf (tok_buf);
if (c == '\\')
c = '\v';
break;
}
- SCM_CHARS (*tok_buf)[j] = c;
+ SCM_STRING_CHARS (*tok_buf)[j] = c;
++j;
}
if (j == 0)
return scm_nullstr;
- SCM_CHARS (*tok_buf)[j] = 0;
- {
- SCM str;
- str = scm_makfromstr (SCM_CHARS (*tok_buf), j, 0);
- return str;
- }
+ SCM_STRING_CHARS (*tok_buf)[j] = 0;
+ return scm_mem2string (SCM_STRING_CHARS (*tok_buf), j);
- case'0':case '1':case '2':case '3':case '4':
- case '5':case '6':case '7':case '8':case '9':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
case '.':
case '-':
case '+':
num:
j = scm_read_token (c, tok_buf, port, 0);
- p = scm_istring2number (SCM_CHARS (*tok_buf), (long) j, 10L);
- if (SCM_NFALSEP (p))
+ if (j == 1 && (c == '+' || c == '-'))
+ /* Shortcut: Detected symbol '+ or '- */
+ goto tok;
+
+ p = scm_i_mem2number (SCM_STRING_CHARS (*tok_buf), j, 10);
+ if (!SCM_FALSEP (p))
return p;
if (c == '#')
{
if ((j == 2) && (scm_getc (port) == '('))
{
scm_ungetc ('(', port);
- c = SCM_CHARS (*tok_buf)[1];
+ c = SCM_STRING_CHARS (*tok_buf)[1];
goto callshrp;
}
- scm_wta (SCM_UNDEFINED, "unknown # object", SCM_CHARS (*tok_buf));
+ scm_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
}
goto tok;
if (SCM_EQ_P (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
{
j = scm_read_token ('-', tok_buf, port, 0);
- p = scm_intern (SCM_CHARS (*tok_buf), j);
- return scm_make_keyword_from_dash_symbol (SCM_CAR (p));
+ p = scm_mem2symbol (SCM_STRING_CHARS (*tok_buf), j);
+ return scm_make_keyword_from_dash_symbol (p);
}
/* fallthrough */
default:
/* fallthrough */
tok:
- p = scm_intern (SCM_CHARS (*tok_buf), j);
- return SCM_CAR (p);
+ return scm_mem2symbol (SCM_STRING_CHARS (*tok_buf), j);
}
}
+#undef FUNC_NAME
+
#ifdef _UNICOS
_Pragma ("noopt"); /* # pragma _CRI noopt */
#endif
-scm_sizet
+size_t
scm_read_token (int ic, SCM *tok_buf, SCM port, int weird)
{
- register scm_sizet j;
+ register size_t j;
register int c;
register char *p;
c = (SCM_CASE_INSENSITIVE_P ? scm_downcase(ic) : ic);
- p = SCM_CHARS (*tok_buf);
+ p = SCM_STRING_CHARS (*tok_buf);
if (weird)
j = 0;
else
{
j = 0;
- while (j + 2 >= SCM_LENGTH (*tok_buf))
+ while (j + 2 >= SCM_STRING_LENGTH (*tok_buf))
p = scm_grow_tok_buf (tok_buf);
p[j] = c;
++j;
while (1)
{
- while (j + 2 >= SCM_LENGTH (*tok_buf))
+ while (j + 2 >= SCM_STRING_LENGTH (*tok_buf))
p = scm_grow_tok_buf (tok_buf);
c = scm_getc (port);
switch (c)
{
case '(':
case ')':
+#ifdef SCM_ELISP_READ_EXTENSIONS
+ case '[':
+ case ']':
+#endif
case '"':
case ';':
case SCM_WHITE_SPACES:
#endif
SCM
-scm_lreadparen (SCM *tok_buf, SCM port, char *name, SCM *copy)
+scm_lreadparen (SCM *tok_buf, SCM port, char *name, SCM *copy
+#ifdef SCM_ELISP_READ_EXTENSIONS
+ , char term_char
+#else
+#define term_char ')'
+#endif
+ )
+#define FUNC_NAME "scm_lreadparen"
{
SCM tmp;
SCM tl;
int c;
c = scm_flush_ws (port, name);
- if (')' == c)
+ if (term_char == c)
return SCM_EOL;
scm_ungetc (c, port);
if (SCM_EQ_P (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
{
ans = scm_lreadr (tok_buf, port, copy);
closeit:
- if (')' != (c = scm_flush_ws (port, name)))
- scm_wta (SCM_UNDEFINED, "missing close paren", "");
+ if (term_char != (c = scm_flush_ws (port, name)))
+ scm_input_error (FUNC_NAME, port, "missing close paren", SCM_EOL);
return ans;
}
ans = tl = scm_cons (tmp, SCM_EOL);
- while (')' != (c = scm_flush_ws (port, name)))
+ while (term_char != (c = scm_flush_ws (port, name)))
{
scm_ungetc (c, port);
if (SCM_EQ_P (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
}
return ans;
}
+#undef FUNC_NAME
+#ifndef SCM_ELISP_READ_EXTENSIONS
+#undef term_char
+#endif
SCM
scm_lreadrecparen (SCM *tok_buf, SCM port, char *name, SCM *copy)
+#define FUNC_NAME "scm_lreadrecparen"
{
register int c;
register SCM tmp;
{
ans = scm_lreadr (tok_buf, port, copy);
if (')' != (c = scm_flush_ws (port, name)))
- scm_wta (SCM_UNDEFINED, "missing close paren", "");
+ scm_input_error (FUNC_NAME, port, "missing close paren", SCM_EOL);
return ans;
}
/* Build the head of the list structure. */
SCM_EOL);
while (')' != (c = scm_flush_ws (port, name)))
{
+ SCM new_tail;
+
scm_ungetc (c, port);
if (SCM_EQ_P (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
{
: tmp,
SCM_EOL));
if (')' != (c = scm_flush_ws (port, name)))
- scm_wta (SCM_UNDEFINED, "missing close paren", "");
+ scm_input_error (FUNC_NAME, port, "missing close paren", SCM_EOL);
goto exit;
}
- tl = SCM_SETCDR (tl, scm_cons (tmp, SCM_EOL));
+
+ new_tail = scm_cons (tmp, SCM_EOL);
+ SCM_SETCDR (tl, new_tail);
+ tl = new_tail;
+
if (SCM_COPY_SOURCE_P)
- tl2 = SCM_SETCDR (tl2, scm_cons (SCM_CONSP (tmp)
- ? *copy
- : tmp,
- SCM_EOL));
+ {
+ SCM new_tail2 = scm_cons (SCM_CONSP (tmp) ? *copy : tmp, SCM_EOL);
+ SCM_SETCDR (tl2, new_tail2);
+ tl2 = new_tail2;
+ }
}
exit:
scm_whash_insert (scm_source_whash,
SCM_EOL));
return ans;
}
+#undef FUNC_NAME
\f
Scheme, but maybe it will also be used by C code during initialisation. */
SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
(SCM chr, SCM proc),
-"")
+ "Install the procedure @var{proc} for reading expressions\n"
+ "starting with the character sequence @code{#} and @var{chr}.\n"
+ "@var{proc} will be called with two arguments: the character\n"
+ "@var{chr} and the port to read further data from. The object\n"
+ "returned will be the return value of @code{read}.")
#define FUNC_NAME s_scm_read_hash_extend
{
SCM this;
SCM prev;
- SCM_VALIDATE_CHAR (1,chr);
- SCM_ASSERT (SCM_FALSEP (proc) || SCM_NIMP(proc), proc, SCM_ARG2,
- FUNC_NAME);
+ SCM_VALIDATE_CHAR (1, chr);
+ SCM_ASSERT (SCM_FALSEP (proc)
+ || SCM_EQ_P (scm_procedure_p (proc), SCM_BOOL_T),
+ proc, SCM_ARG2, FUNC_NAME);
/* Check if chr is already in the alist. */
this = *scm_read_hash_procedures;
if (SCM_NULLP (this))
{
/* not found, so add it to the beginning. */
- if (SCM_NFALSEP (proc))
+ if (!SCM_FALSEP (proc))
{
*scm_read_hash_procedures =
scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
scm_init_read ()
{
scm_read_hash_procedures =
- SCM_CDRLOC (scm_sysintern ("read-hash-procedures", SCM_EOL));
+ SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));
scm_init_opts (scm_read_options, scm_read_opts, SCM_N_READ_OPTIONS);
#include "libguile/read.x"