X-Git-Url: http://git.hcoop.net/bpt/guile.git/blobdiff_plain/f6f4feb0a2222efcb297e634603621126542e63f..fcd3c8ccd3ea8f8e052c8e1957cb21004c32d912:/libguile/read.c diff --git a/libguile/read.c b/libguile/read.c index c8db81277..bcb40ee85 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -1,5 +1,5 @@ /* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006, - * 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + * 2007, 2008, 2009, 2010, 2011, 2012, 2014 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -30,6 +30,7 @@ #include #include #include +#include #include "libguile/_scm.h" #include "libguile/bytevectors.h" @@ -87,6 +88,8 @@ scm_t_option scm_read_opts[] = "In strings, consume leading whitespace after an escaped end-of-line."}, { SCM_OPTION_BOOLEAN, "curly-infix", 0, "Support SRFI-105 curly infix expressions."}, + { SCM_OPTION_BOOLEAN, "r7rs-symbols", 0, + "Support R7RS |...| symbol notation."}, { 0, }, }; @@ -112,6 +115,7 @@ struct t_read_opts unsigned int hungry_eol_escapes_p : 1; unsigned int curly_infix_p : 1; unsigned int neoteric_p : 1; + unsigned int r7rs_symbols_p : 1; }; typedef struct t_read_opts scm_t_read_opts; @@ -587,8 +591,11 @@ skip_intraline_whitespace (SCM port) scm_ungetc_unlocked (c, port); } +/* Read either a double-quoted string or an R7RS-style symbol delimited + by vertical lines, depending on the value of 'chr' ('"' or '|'). + Regardless, the result is always returned as a string. */ static SCM -scm_read_string (int chr, SCM port, scm_t_read_opts *opts) +scm_read_string_like_syntax (int chr, SCM port, scm_t_read_opts *opts) #define FUNC_NAME "scm_lreadr" { /* For strings smaller than C_STR, this function creates only one Scheme @@ -602,13 +609,16 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts) long line = SCM_LINUM (port); int column = SCM_COL (port) - 1; - while ('"' != (c = scm_getc_unlocked (port))) + while (chr != (c = scm_getc_unlocked (port))) { if (c == EOF) { str_eof: scm_i_input_error (FUNC_NAME, port, - "end of file in string constant", SCM_EOL); + (chr == '|' + ? "end of file in symbol" + : "end of file in string constant"), + SCM_EOL); } if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE) @@ -623,7 +633,7 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts) { case EOF: goto str_eof; - case '"': + case '|': case '\\': break; case '\n': @@ -655,7 +665,7 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts) c = '\010'; break; case 'x': - if (opts->r6rs_escapes_p) + if (opts->r6rs_escapes_p || chr == '|') SCM_READ_HEX_ESCAPE (10, ';'); else SCM_READ_HEX_ESCAPE (2, '\0'); @@ -673,6 +683,8 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts) break; } default: + if (c == chr) + break; bad_escaped: scm_i_input_error (FUNC_NAME, port, "illegal character in escape sequence: ~S", @@ -698,6 +710,17 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts) } #undef FUNC_NAME +static SCM +scm_read_string (int chr, SCM port, scm_t_read_opts *opts) +{ + return scm_read_string_like_syntax (chr, port, opts); +} + +static SCM +scm_read_r7rs_symbol (int chr, SCM port, scm_t_read_opts *opts) +{ + return scm_string_to_symbol (scm_read_string_like_syntax (chr, port, opts)); +} static SCM scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) @@ -941,6 +964,43 @@ scm_read_semicolon_comment (int chr, SCM port) return SCM_UNSPECIFIED; } +/* If the EXPECTED_CHARS are the next ones available from PORT, then + consume them and return 1. Otherwise leave the port position where + it was and return 0. EXPECTED_CHARS should be all lowercase, and + will be matched case-insensitively against the characters read from + PORT. */ +static int +try_read_ci_chars (SCM port, const char *expected_chars) +{ + int num_chars_wanted = strlen (expected_chars); + int num_chars_read = 0; + char *chars_read = alloca (num_chars_wanted); + int c; + + while (num_chars_read < num_chars_wanted) + { + c = scm_getc_unlocked (port); + if (c == EOF) + break; + else if (c_tolower (c) != expected_chars[num_chars_read]) + { + scm_ungetc_unlocked (c, port); + break; + } + else + chars_read[num_chars_read++] = c; + } + + if (num_chars_read == num_chars_wanted) + return 1; + else + { + while (num_chars_read > 0) + scm_ungetc_unlocked (chars_read[--num_chars_read], port); + return 0; + } +} + /* Sharp readers, i.e. readers called after a `#' sign has been read. */ @@ -951,10 +1011,12 @@ scm_read_boolean (int chr, SCM port) { case 't': case 'T': + try_read_ci_chars (port, "rue"); return SCM_BOOL_T; case 'f': case 'F': + try_read_ci_chars (port, "alse"); return SCM_BOOL_F; } @@ -1116,6 +1178,9 @@ read_decimal_integer (SCM port, int c, ssize_t *resp) while ('0' <= c && c <= '9') { + if (((SSIZE_MAX - (c-'0')) / 10) <= res) + scm_i_input_error ("read_decimal_integer", port, + "number too large", SCM_EOL); res = 10*res + c-'0'; got_it = 1; c = scm_getc_unlocked (port); @@ -1152,8 +1217,10 @@ scm_read_array (int c, SCM port, scm_t_read_opts *opts, long line, int column) c = scm_getc_unlocked (port); if (c != '3' && c != '6') { - if (c != EOF) - scm_ungetc_unlocked (c, port); + if (c == 'a' && try_read_ci_chars (port, "lse")) + return SCM_BOOL_F; + else if (c != EOF) + scm_ungetc_unlocked (c, port); return SCM_BOOL_F; } rank = 1; @@ -1718,6 +1785,11 @@ read_inner_expression (SCM port, scm_t_read_opts *opts) return (scm_read_sexp (chr, port, opts)); case '"': return (scm_read_string (chr, port, opts)); + case '|': + if (opts->r7rs_symbols_p) + return scm_read_r7rs_symbol (chr, port, opts); + else + return scm_read_mixed_case_symbol (chr, port, opts); case '\'': case '`': case ',': @@ -1950,8 +2022,6 @@ scm_get_hash_procedure (int c) } } -#define SCM_ENCODING_SEARCH_SIZE (500) - static int is_encoding_char (char c) { @@ -1961,9 +2031,20 @@ is_encoding_char (char c) return strchr ("_-.:/,+=()", c) != NULL; } -/* Search the first few hundred characters of a file for an Emacs-like coding - declaration. Returns either NULL or a string whose storage has been - allocated with `scm_gc_malloc ()'. */ +/* Maximum size of an encoding name. This is a bit more than the + longest name listed at + ("ISO-2022-JP-2", 13 + characters.) */ +#define ENCODING_NAME_MAX_SIZE 20 + +/* Number of bytes at the beginning or end of a file that are scanned + for a "coding:" declaration. */ +#define SCM_ENCODING_SEARCH_SIZE (500 + ENCODING_NAME_MAX_SIZE) + + +/* Search the SCM_ENCODING_SEARCH_SIZE bytes of a file for an Emacs-like + coding declaration. Returns either NULL or a string whose storage + has been allocated with `scm_gc_malloc'. */ char * scm_i_scan_for_encoding (SCM port) { @@ -2022,8 +2103,8 @@ scm_i_scan_for_encoding (SCM port) if ((pos = strstr(pos, "coding")) == NULL) return NULL; - pos += strlen("coding"); - if (pos - header >= SCM_ENCODING_SEARCH_SIZE || + pos += strlen ("coding"); + if (pos - header >= SCM_ENCODING_SEARCH_SIZE || (*pos == ':' || *pos == '=')) { pos ++; @@ -2032,10 +2113,17 @@ scm_i_scan_for_encoding (SCM port) } /* skip spaces */ - while (pos - header <= SCM_ENCODING_SEARCH_SIZE && + while (pos - header <= SCM_ENCODING_SEARCH_SIZE && (*pos == ' ' || *pos == '\t')) pos ++; + if (pos - header >= SCM_ENCODING_SEARCH_SIZE - ENCODING_NAME_MAX_SIZE) + /* We found the "coding:" string, but there is probably not enough + room to store an encoding name in its entirety, so ignore it. + This makes sure we do not end up returning a truncated encoding + name. */ + return NULL; + /* grab the next token */ encoding_start = pos; i = 0; @@ -2115,10 +2203,10 @@ SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0, /* Per-port read options. - We store per-port read options in the 'port-read-options' key of the - port's alist, which is stored in the internal port structure. The - value stored in the alist is a single integer that contains a two-bit - field for each read option. + We store per-port read options in the 'port-read-options' port + property, which is stored in the internal port structure. The value + stored is a single integer that contains a two-bit field for each + read option. If a bit field contains READ_OPTION_INHERIT (3), that indicates that the applicable value should be inherited from the corresponding @@ -2128,7 +2216,7 @@ SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0, read option has been set per-port, its possible values are those in 'enum t_keyword_style'. */ -/* Key to read options in per-port alists. */ +/* Key to read options in port properties. */ SCM_SYMBOL (sym_port_read_options, "port-read-options"); /* Offsets of bit fields for each per-port override */ @@ -2140,9 +2228,10 @@ SCM_SYMBOL (sym_port_read_options, "port-read-options"); #define READ_OPTION_SQUARE_BRACKETS_P 10 #define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12 #define READ_OPTION_CURLY_INFIX_P 14 +#define READ_OPTION_R7RS_SYMBOLS_P 16 /* The total width in bits of the per-port overrides */ -#define READ_OPTIONS_NUM_BITS 16 +#define READ_OPTIONS_NUM_BITS 18 #define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1) #define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL @@ -2153,12 +2242,15 @@ SCM_SYMBOL (sym_port_read_options, "port-read-options"); static void set_port_read_option (SCM port, int option, int new_value) { - SCM alist, scm_read_options; + SCM scm_read_options; unsigned int read_options; new_value &= READ_OPTION_MASK; - alist = scm_i_port_alist (port); - scm_read_options = scm_assq_ref (alist, sym_port_read_options); + + scm_dynwind_begin (0); + scm_dynwind_lock_port (port); + + scm_read_options = scm_i_port_property (port, sym_port_read_options); if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE)) read_options = scm_to_uint (scm_read_options); else @@ -2166,8 +2258,9 @@ set_port_read_option (SCM port, int option, int new_value) read_options &= ~(READ_OPTION_MASK << option); read_options |= new_value << option; scm_read_options = scm_from_uint (read_options); - alist = scm_assq_set_x (alist, sym_port_read_options, scm_read_options); - scm_i_set_port_alist_x (port, alist); + scm_i_set_port_property_x (port, sym_port_read_options, scm_read_options); + + scm_dynwind_end (); } /* Set OPTS and PORT's case-insensitivity according to VALUE. */ @@ -2202,11 +2295,10 @@ set_port_curly_infix_p (SCM port, scm_t_read_opts *opts, int value) static void init_read_options (SCM port, scm_t_read_opts *opts) { - SCM alist, val, scm_read_options; + SCM val, scm_read_options; unsigned int read_options, x; - alist = scm_i_port_alist (port); - scm_read_options = scm_assq_ref (alist, sym_port_read_options); + scm_read_options = scm_i_port_property (port, sym_port_read_options); if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE)) read_options = scm_to_uint (scm_read_options); @@ -2243,6 +2335,7 @@ init_read_options (SCM port, scm_t_read_opts *opts) RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P, square_brackets_p); RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p); RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p); + RESOLVE_BOOLEAN_OPTION (R7RS_SYMBOLS_P, r7rs_symbols_p); #undef RESOLVE_BOOLEAN_OPTION