X-Git-Url: http://git.hcoop.net/bpt/guile.git/blobdiff_plain/1df515a077f26d59510e48fad3d45a33d2c90e0f..a50eda40dfca0461d7ad6f013fa55d2c14f1d2c6:/libguile/read.c diff --git a/libguile/read.c b/libguile/read.c index 61addf3a5..ecf27ff6e 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -1,5 +1,5 @@ -/* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006, - * 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. +/* Copyright (C) 1995-1997, 1999-2001, 2003, 2004, 2006-2012, 2014 + * Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -30,6 +30,7 @@ #include #include #include +#include #include "libguile/_scm.h" #include "libguile/bytevectors.h" @@ -87,6 +88,8 @@ scm_t_option scm_read_opts[] = "In strings, consume leading whitespace after an escaped end-of-line."}, { SCM_OPTION_BOOLEAN, "curly-infix", 0, "Support SRFI-105 curly infix expressions."}, + { SCM_OPTION_BOOLEAN, "r7rs-symbols", 0, + "Support R7RS |...| symbol notation."}, { 0, }, }; @@ -112,6 +115,7 @@ struct t_read_opts unsigned int hungry_eol_escapes_p : 1; unsigned int curly_infix_p : 1; unsigned int neoteric_p : 1; + unsigned int r7rs_symbols_p : 1; }; typedef struct t_read_opts scm_t_read_opts; @@ -407,6 +411,11 @@ static SCM maybe_annotate_source (SCM x, SCM port, scm_t_read_opts *opts, long line, int column) { + /* This condition can be caused by a user calling + set-port-column!. */ + if (line < 0 || column < 0) + return x; + if (opts->record_positions_p) scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port)); return x; @@ -587,8 +596,11 @@ skip_intraline_whitespace (SCM port) scm_ungetc_unlocked (c, port); } +/* Read either a double-quoted string or an R7RS-style symbol delimited + by vertical lines, depending on the value of 'chr' ('"' or '|'). + Regardless, the result is always returned as a string. */ static SCM -scm_read_string (int chr, SCM port, scm_t_read_opts *opts) +scm_read_string_like_syntax (int chr, SCM port, scm_t_read_opts *opts) #define FUNC_NAME "scm_lreadr" { /* For strings smaller than C_STR, this function creates only one Scheme @@ -602,13 +614,16 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts) long line = SCM_LINUM (port); int column = SCM_COL (port) - 1; - while ('"' != (c = scm_getc_unlocked (port))) + while (chr != (c = scm_getc_unlocked (port))) { if (c == EOF) { str_eof: scm_i_input_error (FUNC_NAME, port, - "end of file in string constant", SCM_EOL); + (chr == '|' + ? "end of file in symbol" + : "end of file in string constant"), + SCM_EOL); } if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE) @@ -623,8 +638,11 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts) { case EOF: goto str_eof; - case '"': + case '|': case '\\': + case '(': /* Accept "\(" for use at the beginning of lines + in multiline strings to avoid confusing emacs + lisp modes. */ break; case '\n': if (opts->hungry_eol_escapes_p) @@ -655,7 +673,7 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts) c = '\010'; break; case 'x': - if (opts->r6rs_escapes_p) + if (opts->r6rs_escapes_p || chr == '|') SCM_READ_HEX_ESCAPE (10, ';'); else SCM_READ_HEX_ESCAPE (2, '\0'); @@ -673,6 +691,8 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts) break; } default: + if (c == chr) + break; bad_escaped: scm_i_input_error (FUNC_NAME, port, "illegal character in escape sequence: ~S", @@ -698,6 +718,17 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts) } #undef FUNC_NAME +static SCM +scm_read_string (int chr, SCM port, scm_t_read_opts *opts) +{ + return scm_read_string_like_syntax (chr, port, opts); +} + +static SCM +scm_read_r7rs_symbol (int chr, SCM port, scm_t_read_opts *opts) +{ + return scm_string_to_symbol (scm_read_string_like_syntax (chr, port, opts)); +} static SCM scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) @@ -941,6 +972,43 @@ scm_read_semicolon_comment (int chr, SCM port) return SCM_UNSPECIFIED; } +/* If the EXPECTED_CHARS are the next ones available from PORT, then + consume them and return 1. Otherwise leave the port position where + it was and return 0. EXPECTED_CHARS should be all lowercase, and + will be matched case-insensitively against the characters read from + PORT. */ +static int +try_read_ci_chars (SCM port, const char *expected_chars) +{ + int num_chars_wanted = strlen (expected_chars); + int num_chars_read = 0; + char *chars_read = alloca (num_chars_wanted); + int c; + + while (num_chars_read < num_chars_wanted) + { + c = scm_getc_unlocked (port); + if (c == EOF) + break; + else if (c_tolower (c) != expected_chars[num_chars_read]) + { + scm_ungetc_unlocked (c, port); + break; + } + else + chars_read[num_chars_read++] = c; + } + + if (num_chars_read == num_chars_wanted) + return 1; + else + { + while (num_chars_read > 0) + scm_ungetc_unlocked (chars_read[--num_chars_read], port); + return 0; + } +} + /* Sharp readers, i.e. readers called after a `#' sign has been read. */ @@ -951,10 +1019,12 @@ scm_read_boolean (int chr, SCM port) { case 't': case 'T': + try_read_ci_chars (port, "rue"); return SCM_BOOL_T; case 'f': case 'F': + try_read_ci_chars (port, "alse"); return SCM_BOOL_F; } @@ -1155,8 +1225,10 @@ scm_read_array (int c, SCM port, scm_t_read_opts *opts, long line, int column) c = scm_getc_unlocked (port); if (c != '3' && c != '6') { - if (c != EOF) - scm_ungetc_unlocked (c, port); + if (c == 'a' && try_read_ci_chars (port, "lse")) + return SCM_BOOL_F; + else if (c != EOF) + scm_ungetc_unlocked (c, port); return SCM_BOOL_F; } rank = 1; @@ -1721,6 +1793,11 @@ read_inner_expression (SCM port, scm_t_read_opts *opts) return (scm_read_sexp (chr, port, opts)); case '"': return (scm_read_string (chr, port, opts)); + case '|': + if (opts->r7rs_symbols_p) + return scm_read_r7rs_symbol (chr, port, opts); + else + return scm_read_mixed_case_symbol (chr, port, opts); case '\'': case '`': case ',': @@ -1953,8 +2030,6 @@ scm_get_hash_procedure (int c) } } -#define SCM_ENCODING_SEARCH_SIZE (500) - static int is_encoding_char (char c) { @@ -1964,9 +2039,20 @@ is_encoding_char (char c) return strchr ("_-.:/,+=()", c) != NULL; } -/* Search the first few hundred characters of a file for an Emacs-like coding - declaration. Returns either NULL or a string whose storage has been - allocated with `scm_gc_malloc ()'. */ +/* Maximum size of an encoding name. This is a bit more than the + longest name listed at + ("ISO-2022-JP-2", 13 + characters.) */ +#define ENCODING_NAME_MAX_SIZE 20 + +/* Number of bytes at the beginning or end of a file that are scanned + for a "coding:" declaration. */ +#define SCM_ENCODING_SEARCH_SIZE (500 + ENCODING_NAME_MAX_SIZE) + + +/* Search the SCM_ENCODING_SEARCH_SIZE bytes of a file for an Emacs-like + coding declaration. Returns either NULL or a string whose storage + has been allocated with `scm_gc_malloc'. */ char * scm_i_scan_for_encoding (SCM port) { @@ -2025,8 +2111,8 @@ scm_i_scan_for_encoding (SCM port) if ((pos = strstr(pos, "coding")) == NULL) return NULL; - pos += strlen("coding"); - if (pos - header >= SCM_ENCODING_SEARCH_SIZE || + pos += strlen ("coding"); + if (pos - header >= SCM_ENCODING_SEARCH_SIZE || (*pos == ':' || *pos == '=')) { pos ++; @@ -2035,10 +2121,17 @@ scm_i_scan_for_encoding (SCM port) } /* skip spaces */ - while (pos - header <= SCM_ENCODING_SEARCH_SIZE && + while (pos - header <= SCM_ENCODING_SEARCH_SIZE && (*pos == ' ' || *pos == '\t')) pos ++; + if (pos - header >= SCM_ENCODING_SEARCH_SIZE - ENCODING_NAME_MAX_SIZE) + /* We found the "coding:" string, but there is probably not enough + room to store an encoding name in its entirety, so ignore it. + This makes sure we do not end up returning a truncated encoding + name. */ + return NULL; + /* grab the next token */ encoding_start = pos; i = 0; @@ -2143,9 +2236,10 @@ SCM_SYMBOL (sym_port_read_options, "port-read-options"); #define READ_OPTION_SQUARE_BRACKETS_P 10 #define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12 #define READ_OPTION_CURLY_INFIX_P 14 +#define READ_OPTION_R7RS_SYMBOLS_P 16 /* The total width in bits of the per-port overrides */ -#define READ_OPTIONS_NUM_BITS 16 +#define READ_OPTIONS_NUM_BITS 18 #define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1) #define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL @@ -2249,6 +2343,7 @@ init_read_options (SCM port, scm_t_read_opts *opts) RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P, square_brackets_p); RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p); RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p); + RESOLVE_BOOLEAN_OPTION (R7RS_SYMBOLS_P, r7rs_symbols_p); #undef RESOLVE_BOOLEAN_OPTION