-/* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
- * 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+/* Copyright (C) 1995-1997, 1999-2001, 2003, 2004, 2006-2012, 2014
+ * Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
#include <unicase.h>
#include <unictype.h>
#include <c-strcase.h>
+#include <c-ctype.h>
#include "libguile/_scm.h"
#include "libguile/bytevectors.h"
"In strings, consume leading whitespace after an escaped end-of-line."},
{ SCM_OPTION_BOOLEAN, "curly-infix", 0,
"Support SRFI-105 curly infix expressions."},
+ { SCM_OPTION_BOOLEAN, "r7rs-symbols", 0,
+ "Support R7RS |...| symbol notation."},
{ 0, },
};
unsigned int hungry_eol_escapes_p : 1;
unsigned int curly_infix_p : 1;
unsigned int neoteric_p : 1;
+ unsigned int r7rs_symbols_p : 1;
};
typedef struct t_read_opts scm_t_read_opts;
maybe_annotate_source (SCM x, SCM port, scm_t_read_opts *opts,
long line, int column)
{
+ /* This condition can be caused by a user calling
+ set-port-column!. */
+ if (line < 0 || column < 0)
+ return x;
+
if (opts->record_positions_p)
scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port));
return x;
scm_ungetc_unlocked (c, port);
}
+/* Read either a double-quoted string or an R7RS-style symbol delimited
+ by vertical lines, depending on the value of 'chr' ('"' or '|').
+ Regardless, the result is always returned as a string. */
static SCM
-scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
+scm_read_string_like_syntax (int chr, SCM port, scm_t_read_opts *opts)
#define FUNC_NAME "scm_lreadr"
{
/* For strings smaller than C_STR, this function creates only one Scheme
long line = SCM_LINUM (port);
int column = SCM_COL (port) - 1;
- while ('"' != (c = scm_getc_unlocked (port)))
+ while (chr != (c = scm_getc_unlocked (port)))
{
if (c == EOF)
{
str_eof:
scm_i_input_error (FUNC_NAME, port,
- "end of file in string constant", SCM_EOL);
+ (chr == '|'
+ ? "end of file in symbol"
+ : "end of file in string constant"),
+ SCM_EOL);
}
if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE)
{
case EOF:
goto str_eof;
- case '"':
+ case '|':
case '\\':
+ case '(': /* Accept "\(" for use at the beginning of lines
+ in multiline strings to avoid confusing emacs
+ lisp modes. */
break;
case '\n':
if (opts->hungry_eol_escapes_p)
c = '\010';
break;
case 'x':
- if (opts->r6rs_escapes_p)
+ if (opts->r6rs_escapes_p || chr == '|')
SCM_READ_HEX_ESCAPE (10, ';');
else
SCM_READ_HEX_ESCAPE (2, '\0');
break;
}
default:
+ if (c == chr)
+ break;
bad_escaped:
scm_i_input_error (FUNC_NAME, port,
"illegal character in escape sequence: ~S",
}
#undef FUNC_NAME
+static SCM
+scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
+{
+ return scm_read_string_like_syntax (chr, port, opts);
+}
+
+static SCM
+scm_read_r7rs_symbol (int chr, SCM port, scm_t_read_opts *opts)
+{
+ return scm_string_to_symbol (scm_read_string_like_syntax (chr, port, opts));
+}
static SCM
scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
return SCM_UNSPECIFIED;
}
+/* If the EXPECTED_CHARS are the next ones available from PORT, then
+ consume them and return 1. Otherwise leave the port position where
+ it was and return 0. EXPECTED_CHARS should be all lowercase, and
+ will be matched case-insensitively against the characters read from
+ PORT. */
+static int
+try_read_ci_chars (SCM port, const char *expected_chars)
+{
+ int num_chars_wanted = strlen (expected_chars);
+ int num_chars_read = 0;
+ char *chars_read = alloca (num_chars_wanted);
+ int c;
+
+ while (num_chars_read < num_chars_wanted)
+ {
+ c = scm_getc_unlocked (port);
+ if (c == EOF)
+ break;
+ else if (c_tolower (c) != expected_chars[num_chars_read])
+ {
+ scm_ungetc_unlocked (c, port);
+ break;
+ }
+ else
+ chars_read[num_chars_read++] = c;
+ }
+
+ if (num_chars_read == num_chars_wanted)
+ return 1;
+ else
+ {
+ while (num_chars_read > 0)
+ scm_ungetc_unlocked (chars_read[--num_chars_read], port);
+ return 0;
+ }
+}
+
\f
/* Sharp readers, i.e. readers called after a `#' sign has been read. */
{
case 't':
case 'T':
+ try_read_ci_chars (port, "rue");
return SCM_BOOL_T;
case 'f':
case 'F':
+ try_read_ci_chars (port, "alse");
return SCM_BOOL_F;
}
while ('0' <= c && c <= '9')
{
+ if (((SSIZE_MAX - (c-'0')) / 10) <= res)
+ scm_i_input_error ("read_decimal_integer", port,
+ "number too large", SCM_EOL);
res = 10*res + c-'0';
got_it = 1;
c = scm_getc_unlocked (port);
c = scm_getc_unlocked (port);
if (c != '3' && c != '6')
{
- if (c != EOF)
- scm_ungetc_unlocked (c, port);
+ if (c == 'a' && try_read_ci_chars (port, "lse"))
+ return SCM_BOOL_F;
+ else if (c != EOF)
+ scm_ungetc_unlocked (c, port);
return SCM_BOOL_F;
}
rank = 1;
return (scm_read_sexp (chr, port, opts));
case '"':
return (scm_read_string (chr, port, opts));
+ case '|':
+ if (opts->r7rs_symbols_p)
+ return scm_read_r7rs_symbol (chr, port, opts);
+ else
+ return scm_read_mixed_case_symbol (chr, port, opts);
case '\'':
case '`':
case ',':
}
}
-#define SCM_ENCODING_SEARCH_SIZE (500)
-
static int
is_encoding_char (char c)
{
return strchr ("_-.:/,+=()", c) != NULL;
}
-/* Search the first few hundred characters of a file for an Emacs-like coding
- declaration. Returns either NULL or a string whose storage has been
- allocated with `scm_gc_malloc ()'. */
+/* Maximum size of an encoding name. This is a bit more than the
+ longest name listed at
+ <http://www.iana.org/assignments/character-sets> ("ISO-2022-JP-2", 13
+ characters.) */
+#define ENCODING_NAME_MAX_SIZE 20
+
+/* Number of bytes at the beginning or end of a file that are scanned
+ for a "coding:" declaration. */
+#define SCM_ENCODING_SEARCH_SIZE (500 + ENCODING_NAME_MAX_SIZE)
+
+
+/* Search the SCM_ENCODING_SEARCH_SIZE bytes of a file for an Emacs-like
+ coding declaration. Returns either NULL or a string whose storage
+ has been allocated with `scm_gc_malloc'. */
char *
scm_i_scan_for_encoding (SCM port)
{
if ((pos = strstr(pos, "coding")) == NULL)
return NULL;
- pos += strlen("coding");
- if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
+ pos += strlen ("coding");
+ if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
(*pos == ':' || *pos == '='))
{
pos ++;
}
/* skip spaces */
- while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
+ while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
(*pos == ' ' || *pos == '\t'))
pos ++;
+ if (pos - header >= SCM_ENCODING_SEARCH_SIZE - ENCODING_NAME_MAX_SIZE)
+ /* We found the "coding:" string, but there is probably not enough
+ room to store an encoding name in its entirety, so ignore it.
+ This makes sure we do not end up returning a truncated encoding
+ name. */
+ return NULL;
+
/* grab the next token */
encoding_start = pos;
i = 0;
\f
/* Per-port read options.
- We store per-port read options in the 'port-read-options' key of the
- port's alist, which is stored in the internal port structure. The
- value stored in the alist is a single integer that contains a two-bit
- field for each read option.
+ We store per-port read options in the 'port-read-options' port
+ property, which is stored in the internal port structure. The value
+ stored is a single integer that contains a two-bit field for each
+ read option.
If a bit field contains READ_OPTION_INHERIT (3), that indicates that
the applicable value should be inherited from the corresponding
read option has been set per-port, its possible values are those in
'enum t_keyword_style'. */
-/* Key to read options in per-port alists. */
+/* Key to read options in port properties. */
SCM_SYMBOL (sym_port_read_options, "port-read-options");
/* Offsets of bit fields for each per-port override */
#define READ_OPTION_SQUARE_BRACKETS_P 10
#define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12
#define READ_OPTION_CURLY_INFIX_P 14
+#define READ_OPTION_R7RS_SYMBOLS_P 16
/* The total width in bits of the per-port overrides */
-#define READ_OPTIONS_NUM_BITS 16
+#define READ_OPTIONS_NUM_BITS 18
#define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1)
#define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL
static void
set_port_read_option (SCM port, int option, int new_value)
{
- SCM alist, scm_read_options;
+ SCM scm_read_options;
unsigned int read_options;
new_value &= READ_OPTION_MASK;
- alist = scm_i_port_alist (port);
- scm_read_options = scm_assq_ref (alist, sym_port_read_options);
+
+ scm_dynwind_begin (0);
+ scm_dynwind_lock_port (port);
+
+ scm_read_options = scm_i_port_property (port, sym_port_read_options);
if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
read_options = scm_to_uint (scm_read_options);
else
read_options &= ~(READ_OPTION_MASK << option);
read_options |= new_value << option;
scm_read_options = scm_from_uint (read_options);
- alist = scm_assq_set_x (alist, sym_port_read_options, scm_read_options);
- scm_i_set_port_alist_x (port, alist);
+ scm_i_set_port_property_x (port, sym_port_read_options, scm_read_options);
+
+ scm_dynwind_end ();
}
/* Set OPTS and PORT's case-insensitivity according to VALUE. */
static void
init_read_options (SCM port, scm_t_read_opts *opts)
{
- SCM alist, val, scm_read_options;
+ SCM val, scm_read_options;
unsigned int read_options, x;
- alist = scm_i_port_alist (port);
- scm_read_options = scm_assq_ref (alist, sym_port_read_options);
+ scm_read_options = scm_i_port_property (port, sym_port_read_options);
if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
read_options = scm_to_uint (scm_read_options);
RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P, square_brackets_p);
RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p);
RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p);
+ RESOLVE_BOOLEAN_OPTION (R7RS_SYMBOLS_P, r7rs_symbols_p);
#undef RESOLVE_BOOLEAN_OPTION