-/* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008, 2009, 2010 Free Software
- * Foundation, Inc.
+/* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
+ * 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
#include <string.h>
#include <unistd.h>
#include <unicase.h>
+#include <unictype.h>
#include "libguile/_scm.h"
#include "libguile/bytevectors.h"
SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
SCM_SYMBOL (scm_keyword_prefix, "prefix");
SCM_SYMBOL (scm_keyword_postfix, "postfix");
+SCM_SYMBOL (sym_nil, "nil");
scm_t_option scm_read_opts[] = {
{ SCM_OPTION_BOOLEAN, "copy", 0,
"Copy source code expressions." },
- { SCM_OPTION_BOOLEAN, "positions", 0,
+ { SCM_OPTION_BOOLEAN, "positions", 1,
"Record positions of source code expressions." },
{ SCM_OPTION_BOOLEAN, "case-insensitive", 0,
"Convert symbols to lower case."},
- { SCM_OPTION_SCM, "keywords", (unsigned long) SCM_BOOL_F,
+ { SCM_OPTION_SCM, "keywords", (scm_t_bits) SCM_BOOL_F_BITS,
"Style of keyword recognition: #f, 'prefix or 'postfix."},
- { SCM_OPTION_BOOLEAN, "elisp-vectors", 0,
- "Support Elisp vector syntax, namely `[...]'."},
- { SCM_OPTION_BOOLEAN, "elisp-strings", 0,
- "Support `\\(' and `\\)' in strings."},
{ SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
"Use R6RS variable-length character and string hex escapes."},
{ SCM_OPTION_BOOLEAN, "square-brackets", 1,
"Treat `[' and `]' as parentheses, for R6RS compatibility."},
+ { SCM_OPTION_BOOLEAN, "hungry-eol-escapes", 0,
+ "In strings, consume leading whitespace after an escaped end-of-line."},
{ 0, },
};
string = scm_get_output_string (string_port);
scm_close_output_port (string_port);
- scm_error_scm (scm_from_locale_symbol ("read-error"),
+ scm_error_scm (scm_from_latin1_symbol ("read-error"),
function? scm_from_locale_string (function) : SCM_BOOL_F,
string,
arg,
}
#undef FUNC_NAME
-/* An association list mapping extra hash characters to procedures. */
-static SCM *scm_read_hash_procedures;
+/* A fluid referring to an association list mapping extra hash
+ characters to procedures. */
+static SCM *scm_i_read_hash_procedures;
+static SCM
+scm_i_read_hash_procedures_ref (void)
+{
+ return scm_fluid_ref (*scm_i_read_hash_procedures);
+}
+
+static void
+scm_i_read_hash_procedures_set_x (SCM value)
+{
+ scm_fluid_set_x (*scm_i_read_hash_procedures, value);
+}
\f
/* Token readers. */
|| ((_chr) == 'd') || ((_chr) == 'l'))
/* Read an SCSH block comment. */
-static inline SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
+static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM);
static SCM scm_read_commented_expression (scm_t_wchar, SCM);
+static SCM scm_read_shebang (scm_t_wchar, SCM);
static SCM scm_get_hash_procedure (int);
/* Read from PORT until a delimiter (e.g., a whitespace) is read. Put the
result in the pre-allocated buffer BUF. Return zero if the whole token has
fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
bytes actually read. */
-static inline int
+static int
read_token (SCM port, char *buf, const size_t buf_size, size_t *read)
{
*read = 0;
{
int chr;
- chr = scm_get_byte_or_eof (port);
+ chr = scm_get_byte_or_eof_unlocked (port);
if (chr == EOF)
return 0;
else if (CHAR_IS_DELIMITER (chr))
{
- scm_unget_byte (chr, port);
+ scm_unget_byte_unlocked (chr, port);
return 0;
}
else
static int
flush_ws (SCM port, const char *eoferr)
{
- register scm_t_wchar c;
+ scm_t_wchar c;
while (1)
- switch (c = scm_getc (port))
+ switch (c = scm_getc_unlocked (port))
{
case EOF:
goteof:
case ';':
lp:
- switch (c = scm_getc (port))
+ switch (c = scm_getc_unlocked (port))
{
case EOF:
goto goteof;
break;
case '#':
- switch (c = scm_getc (port))
+ switch (c = scm_getc_unlocked (port))
{
case EOF:
eoferr = "read_sharp";
goto goteof;
case '!':
- scm_read_scsh_block_comment (c, port);
+ scm_read_shebang (c, port);
break;
case ';':
scm_read_commented_expression (c, port);
}
/* fall through */
default:
- scm_ungetc (c, port);
+ scm_ungetc_unlocked (c, port);
return '#';
}
break;
/* Token readers. */
static SCM scm_read_expression (SCM port);
-static SCM scm_read_sharp (int chr, SCM port);
-static SCM recsexpr (SCM obj, long line, int column, SCM filename);
+static SCM scm_read_sharp (int chr, SCM port, long line, int column);
+
+static SCM
+maybe_annotate_source (SCM x, SCM port, long line, int column)
+{
+ if (SCM_RECORD_POSITIONS_P)
+ scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port));
+ return x;
+}
static SCM
scm_read_sexp (scm_t_wchar chr, SCM port)
#define FUNC_NAME "scm_i_lreadparen"
{
- register int c;
- register SCM tmp;
- register SCM tl, ans = SCM_EOL;
- SCM tl2 = SCM_EOL, ans2 = SCM_EOL, copy = SCM_BOOL_F;
+ int c;
+ SCM tmp, tl, ans = SCM_EOL;
const int terminating_char = ((chr == '[') ? ']' : ')');
/* Need to capture line and column numbers here. */
long line = SCM_LINUM (port);
int column = SCM_COL (port) - 1;
-
c = flush_ws (port, FUNC_NAME);
if (terminating_char == c)
return SCM_EOL;
- scm_ungetc (c, port);
- if (scm_is_eq (scm_sym_dot,
- (tmp = scm_read_expression (port))))
+ scm_ungetc_unlocked (c, port);
+ tmp = scm_read_expression (port);
+
+ /* Note that it is possible for scm_read_expression to return
+ scm_sym_dot, but not as part of a dotted pair: as in #{.}#. So
+ check that it's a real dot by checking `c'. */
+ if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
{
ans = scm_read_expression (port);
if (terminating_char != (c = flush_ws (port, FUNC_NAME)))
/* Build the head of the list structure. */
ans = tl = scm_cons (tmp, SCM_EOL);
- if (SCM_COPY_SOURCE_P)
- ans2 = tl2 = scm_cons (scm_is_pair (tmp)
- ? copy
- : tmp,
- SCM_EOL);
-
while (terminating_char != (c = flush_ws (port, FUNC_NAME)))
{
SCM new_tail;
- scm_ungetc (c, port);
- if (scm_is_eq (scm_sym_dot,
- (tmp = scm_read_expression (port))))
- {
- SCM_SETCDR (tl, tmp = scm_read_expression (port));
+ if (c == ')' || (SCM_SQUARE_BRACKETS_P && c == ']'))
+ scm_i_input_error (FUNC_NAME, port,
+ "in pair: mismatched close paren: ~A",
+ scm_list_1 (SCM_MAKE_CHAR (c)));
- if (SCM_COPY_SOURCE_P)
- SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp) ? copy : tmp,
- SCM_EOL));
+ scm_ungetc_unlocked (c, port);
+ tmp = scm_read_expression (port);
+
+ /* See above note about scm_sym_dot. */
+ if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
+ {
+ SCM_SETCDR (tl, scm_read_expression (port));
c = flush_ws (port, FUNC_NAME);
if (terminating_char != c)
new_tail = scm_cons (tmp, SCM_EOL);
SCM_SETCDR (tl, new_tail);
tl = new_tail;
-
- if (SCM_COPY_SOURCE_P)
- {
- SCM new_tail2 = scm_cons (scm_is_pair (tmp)
- ? copy
- : tmp, SCM_EOL);
- SCM_SETCDR (tl2, new_tail2);
- tl2 = new_tail2;
- }
}
exit:
- if (SCM_RECORD_POSITIONS_P)
- scm_whash_insert (scm_source_whash,
- ans,
- scm_make_srcprops (line, column,
- SCM_FILENAME (port),
- SCM_COPY_SOURCE_P
- ? ans2
- : SCM_UNDEFINED,
- SCM_EOL));
- return ans;
+ return maybe_annotate_source (ans, port, line, column);
}
#undef FUNC_NAME
c = 0; \
while (i < ndigits) \
{ \
- a = scm_getc (port); \
+ a = scm_getc_unlocked (port); \
if (a == EOF) \
goto str_eof; \
if (terminator \
} \
} while (0)
+static void
+skip_intraline_whitespace (SCM port)
+{
+ scm_t_wchar c;
+
+ do
+ {
+ c = scm_getc_unlocked (port);
+ if (c == EOF)
+ return;
+ }
+ while (c == '\t' || uc_is_general_category (c, UC_SPACE_SEPARATOR));
+
+ scm_ungetc_unlocked (c, port);
+}
+
static SCM
scm_read_string (int chr, SCM port)
#define FUNC_NAME "scm_lreadr"
unsigned c_str_len = 0;
scm_t_wchar c;
- str = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL);
- while ('"' != (c = scm_getc (port)))
+ /* Need to capture line and column numbers here. */
+ long line = SCM_LINUM (port);
+ int column = SCM_COL (port) - 1;
+
+ str = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL, 0);
+ while ('"' != (c = scm_getc_unlocked (port)))
{
if (c == EOF)
{
if (c_str_len + 1 >= scm_i_string_length (str))
{
- SCM addy = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL);
+ SCM addy = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL, 0);
str = scm_string_append (scm_list_2 (str, addy));
}
if (c == '\\')
{
- switch (c = scm_getc (port))
+ switch (c = scm_getc_unlocked (port))
{
case EOF:
goto str_eof;
case '"':
case '\\':
break;
- case '(':
- case ')':
- if (SCM_ESCAPED_PARENS_P)
- break;
- goto bad_escaped;
case '\n':
+ if (SCM_HUNGRY_EOL_ESCAPES_P)
+ skip_intraline_whitespace (port);
continue;
case '0':
c = '\0';
scm_i_string_set_x (str, c_str_len++, c);
scm_i_string_stop_writing ();
}
-
- if (c_str_len > 0)
- {
- return scm_i_substring_copy (str, 0, c_str_len);
- }
-
- return scm_nullstr;
+ return maybe_annotate_source (scm_i_substring_copy (str, 0, c_str_len),
+ port, line, column);
}
#undef FUNC_NAME
int overflow;
scm_t_port *pt = SCM_PTAB_ENTRY (port);
- scm_ungetc (chr, port);
+ /* Need to capture line and column numbers here. */
+ long line = SCM_LINUM (port);
+ int column = SCM_COL (port) - 1;
+
+ scm_ungetc_unlocked (chr, port);
overflow = read_complete_token (port, buffer, sizeof (buffer),
&overflow_buffer, &bytes_read);
pt->ilseq_handler);
result = scm_string_to_number (str, SCM_UNDEFINED);
- if (!scm_is_true (result))
+ if (scm_is_false (result))
{
/* Return a symbol instead of a number */
if (SCM_CASE_INSENSITIVE_P)
str = scm_string_downcase_x (str);
result = scm_string_to_symbol (str);
}
+ else if (SCM_NIMP (result))
+ result = maybe_annotate_source (result, port, line, column);
if (overflow)
free (overflow_buffer);
scm_t_port *pt = SCM_PTAB_ENTRY (port);
SCM str;
- scm_ungetc (chr, port);
+ scm_ungetc_unlocked (chr, port);
overflow = read_complete_token (port, buffer, READER_BUFFER_SIZE,
&overflow_buffer, &bytes_read);
if (bytes_read > 0)
break;
default:
- scm_ungetc (chr, port);
- scm_ungetc ('#', port);
+ scm_ungetc_unlocked (chr, port);
+ scm_ungetc_unlocked ('#', port);
radix = 10;
}
{
scm_t_wchar c;
- c = scm_getc (port);
+ c = scm_getc_unlocked (port);
if ('@' == c)
p = scm_sym_uq_splicing;
else
{
- scm_ungetc (c, port);
+ scm_ungetc_unlocked (c, port);
p = scm_sym_unquote;
}
break;
}
p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
- if (SCM_RECORD_POSITIONS_P)
- scm_whash_insert (scm_source_whash, p,
- scm_make_srcprops (line, column,
- SCM_FILENAME (port),
- SCM_COPY_SOURCE_P
- ? (scm_cons2 (SCM_CAR (p),
- SCM_CAR (SCM_CDR (p)),
- SCM_EOL))
- : SCM_UNDEFINED,
- SCM_EOL));
-
-
- return p;
+ return maybe_annotate_source (p, port, line, column);
}
SCM_SYMBOL (sym_syntax, "syntax");
{
int c;
- c = scm_getc (port);
+ c = scm_getc_unlocked (port);
if ('@' == c)
p = sym_unsyntax_splicing;
else
{
- scm_ungetc (c, port);
+ scm_ungetc_unlocked (c, port);
p = sym_unsyntax;
}
break;
}
p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
- if (SCM_RECORD_POSITIONS_P)
- scm_whash_insert (scm_source_whash, p,
- scm_make_srcprops (line, column,
- SCM_FILENAME (port),
- SCM_COPY_SOURCE_P
- ? (scm_cons2 (SCM_CAR (p),
- SCM_CAR (SCM_CDR (p)),
- SCM_EOL))
- : SCM_UNDEFINED,
- SCM_EOL));
-
-
- return p;
+ return maybe_annotate_source (p, port, line, column);
}
-static inline SCM
+static SCM
+scm_read_nil (int chr, SCM port)
+{
+ SCM id = scm_read_mixed_case_symbol (chr, port);
+
+ if (!scm_is_eq (id, sym_nil))
+ scm_i_input_error ("scm_read_nil", port,
+ "unexpected input while reading #nil: ~a",
+ scm_list_1 (id));
+
+ return SCM_ELISP_NIL;
+}
+
+static SCM
scm_read_semicolon_comment (int chr, SCM port)
{
int c;
/* We use the get_byte here because there is no need to get the
locale correct with comment input. This presumes that newline
always represents itself no matter what the encoding is. */
- for (c = scm_get_byte_or_eof (port);
+ for (c = scm_get_byte_or_eof_unlocked (port);
(c != EOF) && (c != '\n');
- c = scm_get_byte_or_eof (port));
+ c = scm_get_byte_or_eof_unlocked (port));
return SCM_UNSPECIFIED;
}
overflow = read_token (port, buffer, READER_CHAR_NAME_MAX_SIZE, &bytes_read);
if (overflow)
- goto char_error;
+ scm_i_input_error (FUNC_NAME, port, "character name too long", SCM_EOL);
if (bytes_read == 0)
{
- chr = scm_getc (port);
+ chr = scm_getc_unlocked (port);
if (chr == EOF)
scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
"while reading character", SCM_EOL);
SCM p = scm_string_to_number (charname, scm_from_uint (8));
if (SCM_I_INUMP (p))
{
- scm_t_wchar c = SCM_I_INUM (p);
+ scm_t_wchar c = scm_to_uint32 (p);
if (SCM_IS_UNICODE_CHAR (c))
return SCM_MAKE_CHAR (c);
else
- scm_i_input_error (FUNC_NAME, port,
+ scm_i_input_error (FUNC_NAME, port,
"out-of-range octal character escape: ~a",
scm_list_1 (charname));
}
}
- if (cp == 'x' && (charname_len > 1) && SCM_R6RS_ESCAPES_P)
+ if (cp == 'x' && (charname_len > 1))
{
SCM p;
-
+
/* Convert from hex, skipping the initial 'x' character in CHARNAME */
p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
scm_from_uint (16));
if (SCM_I_INUMP (p))
{
- scm_t_wchar c = SCM_I_INUM (p);
+ scm_t_wchar c = scm_to_uint32 (p);
if (SCM_IS_UNICODE_CHAR (c))
return SCM_MAKE_CHAR (c);
else
return ch;
}
- char_error:
scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
scm_list_1 (charname));
}
#undef FUNC_NAME
-static inline SCM
+static SCM
scm_read_keyword (int chr, SCM port)
{
SCM symbol;
return (scm_symbol_to_keyword (symbol));
}
-static inline SCM
-scm_read_vector (int chr, SCM port)
+static SCM
+scm_read_vector (int chr, SCM port, long line, int column)
{
/* Note: We call `scm_read_sexp ()' rather than READER here in order to
guarantee that it's going to do what we want. After all, this is an
implementation detail of `scm_read_vector ()', not a desirable
property. */
- return (scm_vector (scm_read_sexp (chr, port)));
+ return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port)),
+ port, line, column);
}
-static inline SCM
-scm_read_srfi4_vector (int chr, SCM port)
+static SCM
+scm_read_array (int chr, SCM port, long line, int column)
{
- return scm_i_read_array (port, chr);
+ SCM result = scm_i_read_array (port, chr);
+ if (scm_is_false (result))
+ return result;
+ else
+ return maybe_annotate_source (result, port, line, column);
}
static SCM
-scm_read_bytevector (scm_t_wchar chr, SCM port)
+scm_read_srfi4_vector (int chr, SCM port, long line, int column)
{
- chr = scm_getc (port);
+ return scm_read_array (chr, port, line, column);
+}
+
+static SCM
+scm_read_bytevector (scm_t_wchar chr, SCM port, long line, int column)
+{
+ chr = scm_getc_unlocked (port);
if (chr != 'u')
goto syntax;
- chr = scm_getc (port);
+ chr = scm_getc_unlocked (port);
if (chr != '8')
goto syntax;
- chr = scm_getc (port);
+ chr = scm_getc_unlocked (port);
if (chr != '(')
goto syntax;
- return scm_u8_list_to_bytevector (scm_read_sexp (chr, port));
+ return maybe_annotate_source
+ (scm_u8_list_to_bytevector (scm_read_sexp (chr, port)),
+ port, line, column);
syntax:
scm_i_input_error ("read_bytevector", port,
}
static SCM
-scm_read_guile_bit_vector (scm_t_wchar chr, SCM port)
+scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, long line, int column)
{
/* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
terribly inefficient but who cares? */
SCM s_bits = SCM_EOL;
- for (chr = scm_getc (port);
+ for (chr = scm_getc_unlocked (port);
(chr != EOF) && ((chr == '0') || (chr == '1'));
- chr = scm_getc (port))
+ chr = scm_getc_unlocked (port))
{
s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
}
if (chr != EOF)
- scm_ungetc (chr, port);
+ scm_ungetc_unlocked (chr, port);
- return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
+ return maybe_annotate_source
+ (scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)),
+ port, line, column);
}
-static inline SCM
+static SCM
scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
{
int bang_seen = 0;
- /* We can use the get_byte here because there is no need to get the
- locale correct when reading comments. This presumes that
- hash and exclamation points always represent themselves no
- matter what the source encoding is.*/
for (;;)
{
- int c = scm_get_byte_or_eof (port);
+ int c = scm_getc_unlocked (port);
if (c == EOF)
scm_i_input_error ("skip_block_comment", port,
return SCM_UNSPECIFIED;
}
+static SCM
+scm_read_shebang (scm_t_wchar chr, SCM port)
+{
+ int c = 0;
+ if ((c = scm_get_byte_or_eof_unlocked (port)) != 'r')
+ {
+ scm_ungetc_unlocked (c, port);
+ return scm_read_scsh_block_comment (chr, port);
+ }
+ if ((c = scm_get_byte_or_eof_unlocked (port)) != '6')
+ {
+ scm_ungetc_unlocked (c, port);
+ scm_ungetc_unlocked ('r', port);
+ return scm_read_scsh_block_comment (chr, port);
+ }
+ if ((c = scm_get_byte_or_eof_unlocked (port)) != 'r')
+ {
+ scm_ungetc_unlocked (c, port);
+ scm_ungetc_unlocked ('6', port);
+ scm_ungetc_unlocked ('r', port);
+ return scm_read_scsh_block_comment (chr, port);
+ }
+ if ((c = scm_get_byte_or_eof_unlocked (port)) != 's')
+ {
+ scm_ungetc_unlocked (c, port);
+ scm_ungetc_unlocked ('r', port);
+ scm_ungetc_unlocked ('6', port);
+ scm_ungetc_unlocked ('r', port);
+ return scm_read_scsh_block_comment (chr, port);
+ }
+
+ return SCM_UNSPECIFIED;
+}
+
static SCM
scm_read_r6rs_block_comment (scm_t_wchar chr, SCM port)
{
/* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
nested. So care must be taken. */
int nesting_level = 1;
- int opening_seen = 0, closing_seen = 0;
+
+ int a = scm_getc_unlocked (port);
+
+ if (a == EOF)
+ scm_i_input_error ("scm_read_r6rs_block_comment", port,
+ "unterminated `#| ... |#' comment", SCM_EOL);
while (nesting_level > 0)
{
- int c = scm_getc (port);
+ int b = scm_getc_unlocked (port);
- if (c == EOF)
+ if (b == EOF)
scm_i_input_error ("scm_read_r6rs_block_comment", port,
"unterminated `#| ... |#' comment", SCM_EOL);
- if (opening_seen)
- {
- if (c == '|')
- nesting_level++;
- opening_seen = 0;
- }
- else if (closing_seen)
- {
- if (c == '#')
- nesting_level--;
- closing_seen = 0;
- }
- else if (c == '|')
- closing_seen = 1;
- else if (c == '#')
- opening_seen = 1;
- else
- opening_seen = closing_seen = 0;
+ if (a == '|' && b == '#')
+ {
+ nesting_level--;
+ b = EOF;
+ }
+ else if (a == '#' && b == '|')
+ {
+ nesting_level++;
+ b = EOF;
+ }
+
+ a = b;
}
return SCM_UNSPECIFIED;
if (EOF == c)
scm_i_input_error ("read_commented_expression", port,
"no expression after #; comment", SCM_EOL);
- scm_ungetc (c, port);
+ scm_ungetc_unlocked (c, port);
scm_read_expression (port);
return SCM_UNSPECIFIED;
}
#{This is all a symbol name}#
So here, CHR is expected to be `{'. */
- int saw_brace = 0, finished = 0;
+ int saw_brace = 0;
size_t len = 0;
- SCM buf = scm_i_make_string (1024, NULL);
+ SCM buf = scm_i_make_string (1024, NULL, 0);
buf = scm_i_string_start_writing (buf);
- while ((chr = scm_getc (port)) != EOF)
+ while ((chr = scm_getc_unlocked (port)) != EOF)
{
if (saw_brace)
{
if (chr == '#')
{
- finished = 1;
break;
}
else
{
saw_brace = 0;
scm_i_string_set_x (buf, len++, '}');
- scm_i_string_set_x (buf, len++, chr);
}
}
- else if (chr == '}')
+
+ if (chr == '}')
saw_brace = 1;
+ else if (chr == '\\')
+ {
+ /* It used to be that print.c would print extended-read-syntax
+ symbols with backslashes before "non-standard" chars, but
+ this routine wouldn't do anything with those escapes.
+ Bummer. What we've done is to change print.c to output
+ R6RS hex escapes for those characters, relying on the fact
+ that the extended read syntax would never put a `\' before
+ an `x'. For now, we just ignore other instances of
+ backslash in the string. */
+ switch ((chr = scm_getc_unlocked (port)))
+ {
+ case EOF:
+ goto done;
+ case 'x':
+ {
+ scm_t_wchar c;
+
+ SCM_READ_HEX_ESCAPE (10, ';');
+ scm_i_string_set_x (buf, len++, c);
+ break;
+
+ str_eof:
+ chr = EOF;
+ goto done;
+
+ bad_escaped:
+ scm_i_string_stop_writing ();
+ scm_i_input_error ("scm_read_extended_symbol", port,
+ "illegal character in escape sequence: ~S",
+ scm_list_1 (SCM_MAKE_CHAR (c)));
+ break;
+ }
+ default:
+ scm_i_string_set_x (buf, len++, chr);
+ break;
+ }
+ }
else
- scm_i_string_set_x (buf, len++, chr);
+ scm_i_string_set_x (buf, len++, chr);
if (len >= scm_i_string_length (buf) - 2)
{
SCM addy;
scm_i_string_stop_writing ();
- addy = scm_i_make_string (1024, NULL);
+ addy = scm_i_make_string (1024, NULL, 0);
buf = scm_string_append (scm_list_2 (buf, addy));
len = 0;
buf = scm_i_string_start_writing (buf);
}
-
- if (finished)
- break;
}
+
+ done:
scm_i_string_stop_writing ();
+ if (chr == EOF)
+ scm_i_input_error ("scm_read_extended_symbol", port,
+ "end of file while reading symbol", SCM_EOL);
return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
}
SCM got;
got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
- if (!scm_is_eq (got, SCM_UNSPECIFIED))
- {
- if (SCM_RECORD_POSITIONS_P)
- return (recsexpr (got, line, column,
- SCM_FILENAME (port)));
- else
- return got;
- }
+
+ if (scm_is_pair (got) && !scm_i_has_source_properties (got))
+ scm_i_set_source_properties_x (got, line, column, SCM_FILENAME (port));
+
+ return got;
}
return SCM_UNSPECIFIED;
/* The reader for the sharp `#' character. It basically dispatches reads
among the above token readers. */
static SCM
-scm_read_sharp (scm_t_wchar chr, SCM port)
+scm_read_sharp (scm_t_wchar chr, SCM port, long line, int column)
#define FUNC_NAME "scm_lreadr"
{
SCM result;
- chr = scm_getc (port);
+ chr = scm_getc_unlocked (port);
result = scm_read_sharp_extension (chr, port);
if (!scm_is_eq (result, SCM_UNSPECIFIED))
case '\\':
return (scm_read_character (chr, port));
case '(':
- return (scm_read_vector (chr, port));
+ return (scm_read_vector (chr, port, line, column));
case 's':
case 'u':
case 'f':
+ case 'c':
/* This one may return either a boolean or an SRFI-4 vector. */
- return (scm_read_srfi4_vector (chr, port));
+ return (scm_read_srfi4_vector (chr, port, line, column));
case 'v':
- return (scm_read_bytevector (chr, port));
+ return (scm_read_bytevector (chr, port, line, column));
case '*':
- return (scm_read_guile_bit_vector (chr, port));
+ return (scm_read_guile_bit_vector (chr, port, line, column));
case 't':
case 'T':
case 'F':
- /* This one may return either a boolean or an SRFI-4 vector. */
return (scm_read_boolean (chr, port));
case ':':
return (scm_read_keyword (chr, port));
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case '@':
-#if SCM_ENABLE_DEPRECATED
- /* See below for 'i' and 'e'. */
- case 'a':
- case 'c':
- case 'y':
- case 'h':
- case 'l':
-#endif
- return (scm_i_read_array (port, chr));
+ return (scm_read_array (chr, port, line, column));
case 'i':
case 'e':
-#if SCM_ENABLE_DEPRECATED
- {
- /* When next char is '(', it really is an old-style
- uniform array. */
- scm_t_wchar next_c = scm_getc (port);
- if (next_c != EOF)
- scm_ungetc (next_c, port);
- if (next_c == '(')
- return scm_i_read_array (port, chr);
- /* Fall through. */
- }
-#endif
case 'b':
case 'B':
case 'o':
case '{':
return (scm_read_extended_symbol (chr, port));
case '!':
- return (scm_read_scsh_block_comment (chr, port));
+ return (scm_read_shebang (chr, port));
case ';':
return (scm_read_commented_expression (chr, port));
case '`':
case '\'':
case ',':
return (scm_read_syntax (chr, port));
+ case 'n':
+ return (scm_read_nil (chr, port));
default:
result = scm_read_sharp_extension (chr, port);
if (scm_is_eq (result, SCM_UNSPECIFIED))
{
while (1)
{
- register scm_t_wchar chr;
+ scm_t_wchar chr;
- chr = scm_getc (port);
+ chr = scm_getc_unlocked (port);
switch (chr)
{
return (scm_read_quote (chr, port));
case '#':
{
- SCM result;
- result = scm_read_sharp (chr, port);
+ long line = SCM_LINUM (port);
+ int column = SCM_COL (port) - 1;
+ SCM result = scm_read_sharp (chr, port, line, column);
if (scm_is_eq (result, SCM_UNSPECIFIED))
/* We read a comment or some such. */
break;
case ')':
scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
break;
+ case ']':
+ if (SCM_SQUARE_BRACKETS_P)
+ scm_i_input_error (FUNC_NAME, port, "unexpected \"]\"", SCM_EOL);
+ /* otherwise fall through */
case EOF:
return SCM_EOF_VAL;
case ':':
c = flush_ws (port, (char *) NULL);
if (EOF == c)
return SCM_EOF_VAL;
- scm_ungetc (c, port);
+ scm_ungetc_unlocked (c, port);
return (scm_read_expression (port));
}
\f
-/* Used when recording expressions constructed by `scm_read_sharp ()'. */
-static SCM
-recsexpr (SCM obj, long line, int column, SCM filename)
-{
- if (!scm_is_pair(obj)) {
- return obj;
- } else {
- SCM tmp = obj, copy;
- /* If this sexpr is visible in the read:sharp source, we want to
- keep that information, so only record non-constant cons cells
- which haven't previously been read by the reader. */
- if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
- {
- if (SCM_COPY_SOURCE_P)
- {
- copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
- SCM_UNDEFINED);
- while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
- {
- SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
- line,
- column,
- filename),
- SCM_UNDEFINED));
- copy = SCM_CDR (copy);
- }
- SCM_SETCDR (copy, tmp);
- }
- else
- {
- recsexpr (SCM_CAR (obj), line, column, filename);
- while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
- recsexpr (SCM_CAR (tmp), line, column, filename);
- copy = SCM_UNDEFINED;
- }
- scm_whash_insert (scm_source_whash,
- obj,
- scm_make_srcprops (line,
- column,
- filename,
- copy,
- SCM_EOL));
- }
- return obj;
- }
-}
-
/* Manipulate the read-hash-procedures alist. This could be written in
Scheme, but maybe it will also be used by C code during initialisation. */
SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
proc, SCM_ARG2, FUNC_NAME);
/* Check if chr is already in the alist. */
- this = *scm_read_hash_procedures;
+ this = scm_i_read_hash_procedures_ref ();
prev = SCM_BOOL_F;
while (1)
{
/* not found, so add it to the beginning. */
if (scm_is_true (proc))
{
- *scm_read_hash_procedures =
- scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
+ SCM new = scm_cons (scm_cons (chr, proc),
+ scm_i_read_hash_procedures_ref ());
+ scm_i_read_hash_procedures_set_x (new);
}
break;
}
/* remove it. */
if (scm_is_false (prev))
{
- *scm_read_hash_procedures =
- SCM_CDR (*scm_read_hash_procedures);
+ SCM rest = SCM_CDR (scm_i_read_hash_procedures_ref ());
+ scm_i_read_hash_procedures_set_x (rest);
}
else
scm_set_cdr_x (prev, SCM_CDR (this));
static SCM
scm_get_hash_procedure (int c)
{
- SCM rest = *scm_read_hash_procedures;
+ SCM rest = scm_i_read_hash_procedures_ref ();
while (1)
{
char *
scm_i_scan_for_encoding (SCM port)
{
+ scm_t_port *pt;
char header[SCM_ENCODING_SEARCH_SIZE+1];
- size_t bytes_read;
+ size_t bytes_read, encoding_length, i;
char *encoding = NULL;
int utf8_bom = 0;
- char *pos;
- int i;
+ char *pos, *encoding_start;
int in_comment;
- if (SCM_FPORTP (port) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port)))
- /* PORT is a non-seekable file port (e.g., as created by Bash when using
- "guile <(echo '(display "hello")')") so bail out. */
- return NULL;
+ pt = SCM_PTAB_ENTRY (port);
+
+ if (pt->rw_active == SCM_PORT_WRITE)
+ scm_flush_unlocked (port);
- bytes_read = scm_c_read (port, header, SCM_ENCODING_SEARCH_SIZE);
+ if (pt->rw_random)
+ pt->rw_active = SCM_PORT_READ;
- scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
+ if (pt->read_pos == pt->read_end)
+ {
+ /* We can use the read buffer, and thus avoid a seek. */
+ if (scm_fill_input_unlocked (port) == EOF)
+ return NULL;
+
+ bytes_read = pt->read_end - pt->read_pos;
+ if (bytes_read > SCM_ENCODING_SEARCH_SIZE)
+ bytes_read = SCM_ENCODING_SEARCH_SIZE;
+
+ if (bytes_read <= 1)
+ /* An unbuffered port -- don't scan. */
+ return NULL;
+
+ memcpy (header, pt->read_pos, bytes_read);
+ header[bytes_read] = '\0';
+ }
+ else
+ {
+ /* Try to read some bytes and then seek back. Not all ports
+ support seeking back; and indeed some file ports (like
+ /dev/urandom) will succeed on an lseek (fd, 0, SEEK_CUR)---the
+ check performed by SCM_FPORT_FDES---but fail to seek
+ backwards. Hence this block comes second. We prefer to use
+ the read buffer in-place. */
+ if (SCM_FPORTP (port) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port)))
+ return NULL;
+
+ bytes_read = scm_c_read_unlocked (port, header, SCM_ENCODING_SEARCH_SIZE);
+ header[bytes_read] = '\0';
+ scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
+ }
if (bytes_read > 3
&& header[0] == '\xef' && header[1] == '\xbb' && header[2] == '\xbf')
pos ++;
/* grab the next token */
+ encoding_start = pos;
i = 0;
- while (pos + i - header <= SCM_ENCODING_SEARCH_SIZE
- && pos + i - header < bytes_read
- && (isalnum ((int) pos[i]) || strchr ("_-.:/,+=()", pos[i]) != NULL))
+ while (encoding_start + i - header <= SCM_ENCODING_SEARCH_SIZE
+ && encoding_start + i - header < bytes_read
+ && (isalnum ((int) encoding_start[i])
+ || strchr ("_-.:/,+=()", encoding_start[i]) != NULL))
i++;
- if (i == 0)
+ encoding_length = i;
+ if (encoding_length == 0)
return NULL;
- encoding = scm_gc_strndup (pos, i, "encoding");
- for (i = 0; i < strlen (encoding); i++)
+ encoding = scm_gc_strndup (encoding_start, encoding_length, "encoding");
+ for (i = 0; i < encoding_length; i++)
encoding[i] = toupper ((int) encoding[i]);
/* push backwards to make sure we were in a comment */
in_comment = 0;
- while (pos - i - header > 0)
+ pos = encoding_start;
+ while (pos >= header)
{
- if (*(pos - i) == '\n')
+ if (*pos == ';')
+ {
+ in_comment = 1;
+ break;
+ }
+ else if (*pos == '\n' || pos == header)
{
/* This wasn't in a semicolon comment. Check for a
hash-bang comment. */
char *beg = strstr (header, "#!");
char *end = strstr (header, "!#");
- if (beg < pos && pos < end)
+ if (beg < encoding_start && encoding_start + encoding_length <= end)
in_comment = 1;
break;
}
- if (*(pos - i) == ';')
- {
- in_comment = 1;
- break;
- }
- i ++;
+ else
+ {
+ pos --;
+ continue;
+ }
}
if (!in_comment)
/* This wasn't in a comment */
return NULL;
if (utf8_bom && strcmp(encoding, "UTF-8"))
- scm_misc_error (NULL,
+ scm_misc_error (NULL,
"the port input declares the encoding ~s but is encoded as UTF-8",
scm_list_1 (scm_from_locale_string (encoding)));
SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
(SCM port),
"Scans the port for an Emacs-like character coding declaration\n"
- "near the top of the contents of a port with random-acessible contents.\n"
+ "near the top of the contents of a port with random-accessible contents.\n"
"The coding declaration is of the form\n"
"@code{coding: XXXXX} and must appear in a scheme comment.\n"
"\n"
char *enc;
SCM s_enc;
+ SCM_VALIDATE_OPINPORT (SCM_ARG1, port);
+
enc = scm_i_scan_for_encoding (port);
if (enc == NULL)
return SCM_BOOL_F;
void
scm_init_read ()
{
- scm_read_hash_procedures =
- SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));
+ SCM read_hash_procs;
+
+ read_hash_procs = scm_make_fluid_with_default (SCM_EOL);
+
+ scm_i_read_hash_procedures =
+ SCM_VARIABLE_LOC (scm_c_define ("%read-hash-procedures", read_hash_procs));
scm_init_opts (scm_read_options, scm_read_opts);
#include "libguile/read.x"