HCoop Git - bpt/guile.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
	2	* 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
	3	*
	4	* This library is free software; you can redistribute it and/or
	5	* modify it under the terms of the GNU Lesser General Public License
	6	* as published by the Free Software Foundation; either version 3 of
	7	* the License, or (at your option) any later version.
	8	*
	9	* This library is distributed in the hope that it will be useful, but
	10	* WITHOUT ANY WARRANTY; without even the implied warranty of
	11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	12	* Lesser General Public License for more details.
	13	*
	14	* You should have received a copy of the GNU Lesser General Public
	15	* License along with this library; if not, write to the Free Software
	16	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
	17	* 02110-1301 USA
	18	*/
	19
	20
	21	\f
	22
	23	#ifdef HAVE_CONFIG_H
	24	# include <config.h>
	25	#endif
	26
	27	#include <stdio.h>
	28	#include <ctype.h>
	29	#include <string.h>
	30	#include <unistd.h>
	31	#include <unicase.h>
	32	#include <unictype.h>
	33
	34	#include "libguile/_scm.h"
	35	#include "libguile/bytevectors.h"
	36	#include "libguile/chars.h"
	37	#include "libguile/eval.h"
	38	#include "libguile/arrays.h"
	39	#include "libguile/bitvectors.h"
	40	#include "libguile/keywords.h"
	41	#include "libguile/alist.h"
	42	#include "libguile/srcprop.h"
	43	#include "libguile/hashtab.h"
	44	#include "libguile/hash.h"
	45	#include "libguile/ports.h"
	46	#include "libguile/fports.h"
	47	#include "libguile/root.h"
	48	#include "libguile/strings.h"
	49	#include "libguile/strports.h"
	50	#include "libguile/vectors.h"
	51	#include "libguile/validate.h"
	52	#include "libguile/srfi-4.h"
	53	#include "libguile/srfi-13.h"
	54
	55	#include "libguile/read.h"
	56	#include "libguile/private-options.h"
	57
	58
	59	\f
	60
	61	SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
	62	SCM_SYMBOL (scm_keyword_prefix, "prefix");
	63	SCM_SYMBOL (scm_keyword_postfix, "postfix");
	64	SCM_SYMBOL (sym_nil, "nil");
	65
	66	/* SRFI-105 curly infix expression support */
	67	SCM_SYMBOL (sym_nfx, "$nfx$");
	68	SCM_SYMBOL (sym_bracket_list, "$bracket-list$");
	69	SCM_SYMBOL (sym_bracket_apply, "$bracket-apply$");
	70
	71	scm_t_option scm_read_opts[] =
	72	{
	73	{ SCM_OPTION_BOOLEAN, "copy", 0,
	74	"Copy source code expressions." },
	75	{ SCM_OPTION_BOOLEAN, "positions", 1,
	76	"Record positions of source code expressions." },
	77	{ SCM_OPTION_BOOLEAN, "case-insensitive", 0,
	78	"Convert symbols to lower case."},
	79	{ SCM_OPTION_SCM, "keywords", (scm_t_bits) SCM_BOOL_F_BITS,
	80	"Style of keyword recognition: #f, 'prefix or 'postfix."},
	81	{ SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
	82	"Use R6RS variable-length character and string hex escapes."},
	83	{ SCM_OPTION_BOOLEAN, "square-brackets", 1,
	84	"Treat `[' and `]' as parentheses, for R6RS compatibility."},
	85	{ SCM_OPTION_BOOLEAN, "hungry-eol-escapes", 0,
	86	"In strings, consume leading whitespace after an escaped end-of-line."},
	87	{ SCM_OPTION_BOOLEAN, "curly-infix", 0,
	88	"Support SRFI-105 curly infix expressions."},
	89	{ 0, },
	90	};
	91
	92	/* Internal read options structure. This is initialized by 'scm_read'
	93	from the global and per-port read options, and a pointer is passed
	94	down to all helper functions. */
	95
	96	enum t_keyword_style
	97	{
	98	KEYWORD_STYLE_HASH_PREFIX,
	99	KEYWORD_STYLE_PREFIX,
	100	KEYWORD_STYLE_POSTFIX
	101	};
	102
	103	struct t_read_opts
	104	{
	105	enum t_keyword_style keyword_style;
	106	unsigned int copy_source_p : 1;
	107	unsigned int record_positions_p : 1;
	108	unsigned int case_insensitive_p : 1;
	109	unsigned int r6rs_escapes_p : 1;
	110	unsigned int square_brackets_p : 1;
	111	unsigned int hungry_eol_escapes_p : 1;
	112	unsigned int curly_infix_p : 1;
	113	unsigned int neoteric_p : 1;
	114	};
	115
	116	typedef struct t_read_opts scm_t_read_opts;
	117
	118
	119	/*
	120	Give meaningful error messages for errors
	121
	122	We use the format
	123
	124	FILE:LINE:COL: MESSAGE
	125	This happened in ....
	126
	127	This is not standard GNU format, but the test-suite likes the real
	128	message to be in front.
	129
	130	*/
	131
	132
	133	void
	134	scm_i_input_error (char const *function,
	135	SCM port, const char *message, SCM arg)
	136	{
	137	SCM fn = (scm_is_string (SCM_FILENAME(port))
	138	? SCM_FILENAME(port)
	139	: scm_from_locale_string ("#<unknown port>"));
	140
	141	SCM string_port = scm_open_output_string ();
	142	SCM string = SCM_EOL;
	143	scm_simple_format (string_port,
	144	scm_from_locale_string ("~A:~S:~S: ~A"),
	145	scm_list_4 (fn,
	146	scm_from_long (SCM_LINUM (port) + 1),
	147	scm_from_int (SCM_COL (port) + 1),
	148	scm_from_locale_string (message)));
	149
	150	string = scm_get_output_string (string_port);
	151	scm_close_output_port (string_port);
	152	scm_error_scm (scm_from_latin1_symbol ("read-error"),
	153	function? scm_from_locale_string (function) : SCM_BOOL_F,
	154	string,
	155	arg,
	156	SCM_BOOL_F);
	157	}
	158
	159
	160	SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
	161	(SCM setting),
	162	"Option interface for the read options. Instead of using\n"
	163	"this procedure directly, use the procedures @code{read-enable},\n"
	164	"@code{read-disable}, @code{read-set!} and @code{read-options}.")
	165	#define FUNC_NAME s_scm_read_options
	166	{
	167	SCM ans = scm_options (setting,
	168	scm_read_opts,
	169	FUNC_NAME);
	170	if (SCM_COPY_SOURCE_P)
	171	SCM_RECORD_POSITIONS_P = 1;
	172	return ans;
	173	}
	174	#undef FUNC_NAME
	175
	176	/* A fluid referring to an association list mapping extra hash
	177	characters to procedures. */
	178	static SCM *scm_i_read_hash_procedures;
	179
	180	static SCM
	181	scm_i_read_hash_procedures_ref (void)
	182	{
	183	return scm_fluid_ref (*scm_i_read_hash_procedures);
	184	}
	185
	186	static void
	187	scm_i_read_hash_procedures_set_x (SCM value)
	188	{
	189	scm_fluid_set_x (*scm_i_read_hash_procedures, value);
	190	}
	191
	192	\f
	193	/* Token readers. */
	194
	195
	196	/* Size of the C buffer used to read symbols and numbers. */
	197	#define READER_BUFFER_SIZE 128
	198
	199	/* Number of 32-bit codepoints in the buffer used to read strings. */
	200	#define READER_STRING_BUFFER_SIZE 128
	201
	202	/* The maximum size of Scheme character names. */
	203	#define READER_CHAR_NAME_MAX_SIZE 50
	204
	205	/* The maximum size of reader directive names. */
	206	#define READER_DIRECTIVE_NAME_MAX_SIZE 50
	207
	208
	209	/* `isblank' is only in C99. */
	210	#define CHAR_IS_BLANK_(_chr) \
	211	(((_chr) == ' ') \|\| ((_chr) == '\t') \|\| ((_chr) == '\n') \
	212	\|\| ((_chr) == '\f') \|\| ((_chr) == '\r'))
	213
	214	#ifdef MSDOS
	215	# define CHAR_IS_BLANK(_chr) \
	216	((CHAR_IS_BLANK_ (chr)) \|\| ((_chr) == 26))
	217	#else
	218	# define CHAR_IS_BLANK CHAR_IS_BLANK_
	219	#endif
	220
	221
	222	/* R5RS one-character delimiters (see section 7.1.1, ``Lexical
	223	structure''). */
	224	#define CHAR_IS_R5RS_DELIMITER(c) \
	225	(CHAR_IS_BLANK (c) \
	226	\|\| (c) == ')' \|\| (c) == '(' \|\| (c) == ';' \|\| (c) == '"')
	227
	228	#define CHAR_IS_DELIMITER(c) \
	229	(CHAR_IS_R5RS_DELIMITER (c) \
	230	\|\| (((c) == ']' \|\| (c) == '[') && (opts->square_brackets_p \
	231	\|\| opts->curly_infix_p)) \
	232	\|\| (((c) == '}' \|\| (c) == '{') && opts->curly_infix_p))
	233
	234	/* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
	235	Structure''. */
	236	#define CHAR_IS_EXPONENT_MARKER(_chr) \
	237	(((_chr) == 'e') \|\| ((_chr) == 's') \|\| ((_chr) == 'f') \
	238	\|\| ((_chr) == 'd') \|\| ((_chr) == 'l'))
	239
	240	/* Read an SCSH block comment. */
	241	static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
	242	static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM);
	243	static SCM scm_read_commented_expression (scm_t_wchar, SCM, scm_t_read_opts *);
	244	static SCM scm_read_shebang (scm_t_wchar, SCM, scm_t_read_opts *);
	245	static SCM scm_get_hash_procedure (int);
	246
	247	/* Read from PORT until a delimiter (e.g., a whitespace) is read. Put the
	248	result in the pre-allocated buffer BUF. Return zero if the whole token has
	249	fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
	250	bytes actually read. */
	251	static int
	252	read_token (SCM port, scm_t_read_opts *opts,
	253	char buf, size_t buf_size, size_t read)
	254	{
	255	*read = 0;
	256
	257	while (*read < buf_size)
	258	{
	259	int chr;
	260
	261	chr = scm_get_byte_or_eof (port);
	262
	263	if (chr == EOF)
	264	return 0;
	265	else if (CHAR_IS_DELIMITER (chr))
	266	{
	267	scm_unget_byte (chr, port);
	268	return 0;
	269	}
	270	else
	271	{
	272	*buf = (char) chr;
	273	buf++, (*read)++;
	274	}
	275	}
	276
	277	return 1;
	278	}
	279
	280	/* Like `read_token', but return either BUFFER, or a GC-allocated buffer
	281	if the token doesn't fit in BUFFER_SIZE bytes. */
	282	static char *
	283	read_complete_token (SCM port, scm_t_read_opts *opts,
	284	char buffer, size_t buffer_size, size_t read)
	285	{
	286	int overflow = 0;
	287	size_t bytes_read, overflow_size = 0;
	288	char *overflow_buffer = NULL;
	289
	290	do
	291	{
	292	overflow = read_token (port, opts, buffer, buffer_size, &bytes_read);
	293	if (bytes_read == 0)
	294	break;
	295	if (overflow \|\| overflow_size != 0)
	296	{
	297	if (overflow_size == 0)
	298	{
	299	overflow_buffer = scm_gc_malloc_pointerless (bytes_read, "read");
	300	memcpy (overflow_buffer, buffer, bytes_read);
	301	overflow_size = bytes_read;
	302	}
	303	else
	304	{
	305	char *new_buf =
	306	scm_gc_malloc_pointerless (overflow_size + bytes_read, "read");
	307
	308	memcpy (new_buf, overflow_buffer, overflow_size);
	309	memcpy (new_buf + overflow_size, buffer, bytes_read);
	310
	311	overflow_buffer = new_buf;
	312	overflow_size += bytes_read;
	313	}
	314	}
	315	}
	316	while (overflow);
	317
	318	if (overflow_size)
	319	*read = overflow_size;
	320	else
	321	*read = bytes_read;
	322
	323	return (overflow_size > 0 ? overflow_buffer : buffer);
	324	}
	325
	326	/* Skip whitespace from PORT and return the first non-whitespace character
	327	read. Raise an error on end-of-file. */
	328	static int
	329	flush_ws (SCM port, scm_t_read_opts opts, const char eoferr)
	330	{
	331	scm_t_wchar c;
	332	while (1)
	333	switch (c = scm_getc (port))
	334	{
	335	case EOF:
	336	goteof:
	337	if (eoferr)
	338	{
	339	scm_i_input_error (eoferr,
	340	port,
	341	"end of file",
	342	SCM_EOL);
	343	}
	344	return c;
	345
	346	case ';':
	347	lp:
	348	switch (c = scm_getc (port))
	349	{
	350	case EOF:
	351	goto goteof;
	352	default:
	353	goto lp;
	354	case SCM_LINE_INCREMENTORS:
	355	break;
	356	}
	357	break;
	358
	359	case '#':
	360	switch (c = scm_getc (port))
	361	{
	362	case EOF:
	363	eoferr = "read_sharp";
	364	goto goteof;
	365	case '!':
	366	scm_read_shebang (c, port, opts);
	367	break;
	368	case ';':
	369	scm_read_commented_expression (c, port, opts);
	370	break;
	371	case '\|':
	372	if (scm_is_false (scm_get_hash_procedure (c)))
	373	{
	374	scm_read_r6rs_block_comment (c, port);
	375	break;
	376	}
	377	/* fall through */
	378	default:
	379	scm_ungetc (c, port);
	380	return '#';
	381	}
	382	break;
	383
	384	case SCM_LINE_INCREMENTORS:
	385	case SCM_SINGLE_SPACES:
	386	case '\t':
	387	break;
	388
	389	default:
	390	return c;
	391	}
	392
	393	return 0;
	394	}
	395
	396
	397	\f
	398	/* Token readers. */
	399
	400	static SCM scm_read_expression (SCM port, scm_t_read_opts *opts);
	401	static SCM scm_read_sharp (int chr, SCM port, scm_t_read_opts *opts,
	402	long line, int column);
	403
	404
	405	static SCM
	406	maybe_annotate_source (SCM x, SCM port, scm_t_read_opts *opts,
	407	long line, int column)
	408	{
	409	if (opts->record_positions_p)
	410	scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port));
	411	return x;
	412	}
	413
	414	static SCM
	415	scm_read_sexp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
	416	#define FUNC_NAME "scm_i_lreadparen"
	417	{
	418	int c;
	419	SCM tmp, tl, ans = SCM_EOL;
	420	const int curly_list_p = (chr == '{') && opts->curly_infix_p;
	421	const int terminating_char = ((chr == '{') ? '}'
	422	: ((chr == '[') ? ']'
	423	: ')'));
	424
	425	/* Need to capture line and column numbers here. */
	426	long line = SCM_LINUM (port);
	427	int column = SCM_COL (port) - 1;
	428
	429	c = flush_ws (port, opts, FUNC_NAME);
	430	if (terminating_char == c)
	431	return SCM_EOL;
	432
	433	scm_ungetc (c, port);
	434	tmp = scm_read_expression (port, opts);
	435
	436	/* Note that it is possible for scm_read_expression to return
	437	scm_sym_dot, but not as part of a dotted pair: as in #{.}#. So
	438	check that it's a real dot by checking `c'. */
	439	if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
	440	{
	441	ans = scm_read_expression (port, opts);
	442	if (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
	443	scm_i_input_error (FUNC_NAME, port, "missing close paren",
	444	SCM_EOL);
	445	return ans;
	446	}
	447
	448	/* Build the head of the list structure. */
	449	ans = tl = scm_cons (tmp, SCM_EOL);
	450
	451	while (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
	452	{
	453	SCM new_tail;
	454
	455	if (c == ')' \|\| (c == ']' && opts->square_brackets_p)
	456	\|\| ((c == '}' \|\| c == ']') && opts->curly_infix_p))
	457	scm_i_input_error (FUNC_NAME, port,
	458	"in pair: mismatched close paren: ~A",
	459	scm_list_1 (SCM_MAKE_CHAR (c)));
	460
	461	scm_ungetc (c, port);
	462	tmp = scm_read_expression (port, opts);
	463
	464	/* See above note about scm_sym_dot. */
	465	if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
	466	{
	467	SCM_SETCDR (tl, scm_read_expression (port, opts));
	468
	469	c = flush_ws (port, opts, FUNC_NAME);
	470	if (terminating_char != c)
	471	scm_i_input_error (FUNC_NAME, port,
	472	"in pair: missing close paren", SCM_EOL);
	473	break;
	474	}
	475
	476	new_tail = scm_cons (tmp, SCM_EOL);
	477	SCM_SETCDR (tl, new_tail);
	478	tl = new_tail;
	479	}
	480
	481	if (curly_list_p)
	482	{
	483	/* In addition to finding the length, 'scm_ilength' checks for
	484	improper or circular lists, in which case it returns -1. */
	485	int len = scm_ilength (ans);
	486
	487	/* The (len == 0) case is handled above */
	488	if (len == 1)
	489	/* Return directly to avoid re-annotating the element's source
	490	location with the position of the outer brace. Also, it
	491	might not be possible to annotate the element. */
	492	return scm_car (ans); /* {e} => e */
	493	else if (len == 2)
	494	; /* Leave the list unchanged: {e1 e2} => (e1 e2) */
	495	else if (len >= 3 && (len & 1))
	496	{
	497	/* It's a proper list whose length is odd and at least 3. If
	498	the elements at odd indices (the infix operator positions)
	499	are all 'equal?', then it's a simple curly-infix list.
	500	Otherwise it's a mixed curly-infix list. */
	501	SCM op = scm_cadr (ans);
	502
	503	/* Check to see if the elements at odd indices are 'equal?' */
	504	for (tl = scm_cdddr (ans); ; tl = scm_cddr (tl))
	505	{
	506	if (scm_is_null (tl))
	507	{
	508	/* Convert simple curly-infix list to prefix:
	509	{a <op> b <op> ...} => (<op> a b ...) */
	510	tl = ans;
	511	while (scm_is_pair (scm_cdr (tl)))
	512	{
	513	tmp = scm_cddr (tl);
	514	SCM_SETCDR (tl, tmp);
	515	tl = tmp;
	516	}
	517	ans = scm_cons (op, ans);
	518	break;
	519	}
	520	else if (scm_is_false (scm_equal_p (op, scm_car (tl))))
	521	{
	522	/* Mixed curly-infix list: {e ...} => ($nfx$ e ...) */
	523	ans = scm_cons (sym_nfx, ans);
	524	break;
	525	}
	526	}
	527	}
	528	else
	529	/* Mixed curly-infix (possibly improper) list:
	530	{e . tail} => ($nfx$ e . tail) */
	531	ans = scm_cons (sym_nfx, ans);
	532	}
	533
	534	return maybe_annotate_source (ans, port, opts, line, column);
	535	}
	536	#undef FUNC_NAME
	537
	538
	539	/* Read a hexadecimal number NDIGITS in length. Put its value into the variable
	540	C. If TERMINATOR is non-null, terminate early if the TERMINATOR character is
	541	found. */
	542	#define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
	543	do \
	544	{ \
	545	scm_t_wchar a; \
	546	size_t i = 0; \
	547	c = 0; \
	548	while (i < ndigits) \
	549	{ \
	550	a = scm_getc (port); \
	551	if (a == EOF) \
	552	goto str_eof; \
	553	if (terminator \
	554	&& (a == (scm_t_wchar) terminator) \
	555	&& (i > 0)) \
	556	break; \
	557	if ('0' <= a && a <= '9') \
	558	a -= '0'; \
	559	else if ('A' <= a && a <= 'F') \
	560	a = a - 'A' + 10; \
	561	else if ('a' <= a && a <= 'f') \
	562	a = a - 'a' + 10; \
	563	else \
	564	{ \
	565	c = a; \
	566	goto bad_escaped; \
	567	} \
	568	c = c * 16 + a; \
	569	i ++; \
	570	} \
	571	} while (0)
	572
	573	static void
	574	skip_intraline_whitespace (SCM port)
	575	{
	576	scm_t_wchar c;
	577
	578	do
	579	{
	580	c = scm_getc (port);
	581	if (c == EOF)
	582	return;
	583	}
	584	while (c == '\t' \|\| uc_is_general_category (c, UC_SPACE_SEPARATOR));
	585
	586	scm_ungetc (c, port);
	587	}
	588
	589	static SCM
	590	scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
	591	#define FUNC_NAME "scm_lreadr"
	592	{
	593	/* For strings smaller than C_STR, this function creates only one Scheme
	594	object (the string returned). */
	595
	596	SCM str = SCM_EOL;
	597	size_t c_str_len = 0;
	598	scm_t_wchar c, c_str[READER_STRING_BUFFER_SIZE];
	599
	600	/* Need to capture line and column numbers here. */
	601	long line = SCM_LINUM (port);
	602	int column = SCM_COL (port) - 1;
	603
	604	while ('"' != (c = scm_getc (port)))
	605	{
	606	if (c == EOF)
	607	{
	608	str_eof:
	609	scm_i_input_error (FUNC_NAME, port,
	610	"end of file in string constant", SCM_EOL);
	611	}
	612
	613	if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE)
	614	{
	615	str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
	616	c_str_len = 0;
	617	}
	618
	619	if (c == '\\')
	620	{
	621	switch (c = scm_getc (port))
	622	{
	623	case EOF:
	624	goto str_eof;
	625	case '"':
	626	case '\\':
	627	break;
	628	case '\n':
	629	if (opts->hungry_eol_escapes_p)
	630	skip_intraline_whitespace (port);
	631	continue;
	632	case '0':
	633	c = '\0';
	634	break;
	635	case 'f':
	636	c = '\f';
	637	break;
	638	case 'n':
	639	c = '\n';
	640	break;
	641	case 'r':
	642	c = '\r';
	643	break;
	644	case 't':
	645	c = '\t';
	646	break;
	647	case 'a':
	648	c = '\007';
	649	break;
	650	case 'v':
	651	c = '\v';
	652	break;
	653	case 'b':
	654	c = '\010';
	655	break;
	656	case 'x':
	657	if (opts->r6rs_escapes_p)
	658	SCM_READ_HEX_ESCAPE (10, ';');
	659	else
	660	SCM_READ_HEX_ESCAPE (2, '\0');
	661	break;
	662	case 'u':
	663	if (!opts->r6rs_escapes_p)
	664	{
	665	SCM_READ_HEX_ESCAPE (4, '\0');
	666	break;
	667	}
	668	case 'U':
	669	if (!opts->r6rs_escapes_p)
	670	{
	671	SCM_READ_HEX_ESCAPE (6, '\0');
	672	break;
	673	}
	674	default:
	675	bad_escaped:
	676	scm_i_input_error (FUNC_NAME, port,
	677	"illegal character in escape sequence: ~S",
	678	scm_list_1 (SCM_MAKE_CHAR (c)));
	679	}
	680	}
	681
	682	c_str[c_str_len++] = c;
	683	}
	684
	685	if (scm_is_null (str))
	686	/* Fast path: we got a string that fits in C_STR. */
	687	str = scm_from_utf32_stringn (c_str, c_str_len);
	688	else
	689	{
	690	if (c_str_len > 0)
	691	str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
	692
	693	str = scm_string_concatenate_reverse (str, SCM_UNDEFINED, SCM_UNDEFINED);
	694	}
	695
	696	return maybe_annotate_source (str, port, opts, line, column);
	697	}
	698	#undef FUNC_NAME
	699
	700
	701	static SCM
	702	scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
	703	{
	704	SCM result, str = SCM_EOL;
	705	char local_buffer[READER_BUFFER_SIZE], *buffer;
	706	size_t bytes_read;
	707	scm_t_port *pt = SCM_PTAB_ENTRY (port);
	708
	709	/* Need to capture line and column numbers here. */
	710	long line = SCM_LINUM (port);
	711	int column = SCM_COL (port) - 1;
	712
	713	scm_ungetc (chr, port);
	714	buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
	715	&bytes_read);
	716
	717	str = scm_from_stringn (buffer, bytes_read, pt->encoding, pt->ilseq_handler);
	718
	719	result = scm_string_to_number (str, SCM_UNDEFINED);
	720	if (scm_is_false (result))
	721	{
	722	/* Return a symbol instead of a number */
	723	if (opts->case_insensitive_p)
	724	str = scm_string_downcase_x (str);
	725	result = scm_string_to_symbol (str);
	726	}
	727	else if (SCM_NIMP (result))
	728	result = maybe_annotate_source (result, port, opts, line, column);
	729
	730	SCM_COL (port) += scm_i_string_length (str);
	731	return result;
	732	}
	733
	734	static SCM
	735	scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
	736	{
	737	SCM result;
	738	int ends_with_colon = 0;
	739	size_t bytes_read;
	740	int postfix = (opts->keyword_style == KEYWORD_STYLE_POSTFIX);
	741	char local_buffer[READER_BUFFER_SIZE], *buffer;
	742	scm_t_port *pt = SCM_PTAB_ENTRY (port);
	743	SCM str;
	744
	745	scm_ungetc (chr, port);
	746	buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
	747	&bytes_read);
	748	if (bytes_read > 0)
	749	ends_with_colon = buffer[bytes_read - 1] == ':';
	750
	751	if (postfix && ends_with_colon && (bytes_read > 1))
	752	{
	753	str = scm_from_stringn (buffer, bytes_read - 1,
	754	pt->encoding, pt->ilseq_handler);
	755
	756	if (opts->case_insensitive_p)
	757	str = scm_string_downcase_x (str);
	758	result = scm_symbol_to_keyword (scm_string_to_symbol (str));
	759	}
	760	else
	761	{
	762	str = scm_from_stringn (buffer, bytes_read,
	763	pt->encoding, pt->ilseq_handler);
	764
	765	if (opts->case_insensitive_p)
	766	str = scm_string_downcase_x (str);
	767	result = scm_string_to_symbol (str);
	768	}
	769
	770	SCM_COL (port) += scm_i_string_length (str);
	771	return result;
	772	}
	773
	774	static SCM
	775	scm_read_number_and_radix (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
	776	#define FUNC_NAME "scm_lreadr"
	777	{
	778	SCM result;
	779	size_t read;
	780	char local_buffer[READER_BUFFER_SIZE], *buffer;
	781	unsigned int radix;
	782	SCM str;
	783	scm_t_port *pt;
	784
	785	switch (chr)
	786	{
	787	case 'B':
	788	case 'b':
	789	radix = 2;
	790	break;
	791
	792	case 'o':
	793	case 'O':
	794	radix = 8;
	795	break;
	796
	797	case 'd':
	798	case 'D':
	799	radix = 10;
	800	break;
	801
	802	case 'x':
	803	case 'X':
	804	radix = 16;
	805	break;
	806
	807	default:
	808	scm_ungetc (chr, port);
	809	scm_ungetc ('#', port);
	810	radix = 10;
	811	}
	812
	813	buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
	814	&read);
	815
	816	pt = SCM_PTAB_ENTRY (port);
	817	str = scm_from_stringn (buffer, read, pt->encoding, pt->ilseq_handler);
	818
	819	result = scm_string_to_number (str, scm_from_uint (radix));
	820
	821	SCM_COL (port) += scm_i_string_length (str);
	822
	823	if (scm_is_true (result))
	824	return result;
	825
	826	scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
	827
	828	return SCM_BOOL_F;
	829	}
	830	#undef FUNC_NAME
	831
	832	static SCM
	833	scm_read_quote (int chr, SCM port, scm_t_read_opts *opts)
	834	{
	835	SCM p;
	836	long line = SCM_LINUM (port);
	837	int column = SCM_COL (port) - 1;
	838
	839	switch (chr)
	840	{
	841	case '`':
	842	p = scm_sym_quasiquote;
	843	break;
	844
	845	case '\'':
	846	p = scm_sym_quote;
	847	break;
	848
	849	case ',':
	850	{
	851	scm_t_wchar c;
	852
	853	c = scm_getc (port);
	854	if ('@' == c)
	855	p = scm_sym_uq_splicing;
	856	else
	857	{
	858	scm_ungetc (c, port);
	859	p = scm_sym_unquote;
	860	}
	861	break;
	862	}
	863
	864	default:
	865	fprintf (stderr, "%s: unhandled quote character (%i)\n",
	866	"scm_read_quote", chr);
	867	abort ();
	868	}
	869
	870	p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
	871	return maybe_annotate_source (p, port, opts, line, column);
	872	}
	873
	874	SCM_SYMBOL (sym_syntax, "syntax");
	875	SCM_SYMBOL (sym_quasisyntax, "quasisyntax");
	876	SCM_SYMBOL (sym_unsyntax, "unsyntax");
	877	SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing");
	878
	879	static SCM
	880	scm_read_syntax (int chr, SCM port, scm_t_read_opts *opts)
	881	{
	882	SCM p;
	883	long line = SCM_LINUM (port);
	884	int column = SCM_COL (port) - 1;
	885
	886	switch (chr)
	887	{
	888	case '`':
	889	p = sym_quasisyntax;
	890	break;
	891
	892	case '\'':
	893	p = sym_syntax;
	894	break;
	895
	896	case ',':
	897	{
	898	int c;
	899
	900	c = scm_getc (port);
	901	if ('@' == c)
	902	p = sym_unsyntax_splicing;
	903	else
	904	{
	905	scm_ungetc (c, port);
	906	p = sym_unsyntax;
	907	}
	908	break;
	909	}
	910
	911	default:
	912	fprintf (stderr, "%s: unhandled syntax character (%i)\n",
	913	"scm_read_syntax", chr);
	914	abort ();
	915	}
	916
	917	p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
	918	return maybe_annotate_source (p, port, opts, line, column);
	919	}
	920
	921	static SCM
	922	scm_read_nil (int chr, SCM port, scm_t_read_opts *opts)
	923	{
	924	SCM id = scm_read_mixed_case_symbol (chr, port, opts);
	925
	926	if (!scm_is_eq (id, sym_nil))
	927	scm_i_input_error ("scm_read_nil", port,
	928	"unexpected input while reading #nil: ~a",
	929	scm_list_1 (id));
	930
	931	return SCM_ELISP_NIL;
	932	}
	933
	934	static SCM
	935	scm_read_semicolon_comment (int chr, SCM port)
	936	{
	937	int c;
	938
	939	/* We use the get_byte here because there is no need to get the
	940	locale correct with comment input. This presumes that newline
	941	always represents itself no matter what the encoding is. */
	942	for (c = scm_get_byte_or_eof (port);
	943	(c != EOF) && (c != '\n');
	944	c = scm_get_byte_or_eof (port));
	945
	946	return SCM_UNSPECIFIED;
	947	}
	948
	949	\f
	950	/* Sharp readers, i.e. readers called after a `#' sign has been read. */
	951
	952	static SCM
	953	scm_read_boolean (int chr, SCM port)
	954	{
	955	switch (chr)
	956	{
	957	case 't':
	958	case 'T':
	959	return SCM_BOOL_T;
	960
	961	case 'f':
	962	case 'F':
	963	return SCM_BOOL_F;
	964	}
	965
	966	return SCM_UNSPECIFIED;
	967	}
	968
	969	static SCM
	970	scm_read_character (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
	971	#define FUNC_NAME "scm_lreadr"
	972	{
	973	char buffer[READER_CHAR_NAME_MAX_SIZE];
	974	SCM charname;
	975	size_t charname_len, bytes_read;
	976	scm_t_wchar cp;
	977	int overflow;
	978	scm_t_port *pt;
	979
	980	overflow = read_token (port, opts, buffer, READER_CHAR_NAME_MAX_SIZE,
	981	&bytes_read);
	982	if (overflow)
	983	scm_i_input_error (FUNC_NAME, port, "character name too long", SCM_EOL);
	984
	985	if (bytes_read == 0)
	986	{
	987	chr = scm_getc (port);
	988	if (chr == EOF)
	989	scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
	990	"while reading character", SCM_EOL);
	991
	992	/* CHR must be a token delimiter, like a whitespace. */
	993	return (SCM_MAKE_CHAR (chr));
	994	}
	995
	996	pt = SCM_PTAB_ENTRY (port);
	997
	998	/* Simple ASCII characters can be processed immediately. Also, simple
	999	ISO-8859-1 characters can be processed immediately if the encoding for this
	1000	port is ISO-8859-1. */
	1001	if (bytes_read == 1 && ((unsigned char) buffer[0] <= 127 \|\| pt->encoding == NULL))
	1002	{
	1003	SCM_COL (port) += 1;
	1004	return SCM_MAKE_CHAR (buffer[0]);
	1005	}
	1006
	1007	/* Otherwise, convert the buffer into a proper scheme string for
	1008	processing. */
	1009	charname = scm_from_stringn (buffer, bytes_read, pt->encoding,
	1010	pt->ilseq_handler);
	1011	charname_len = scm_i_string_length (charname);
	1012	SCM_COL (port) += charname_len;
	1013	cp = scm_i_string_ref (charname, 0);
	1014	if (charname_len == 1)
	1015	return SCM_MAKE_CHAR (cp);
	1016
	1017	/* Ignore dotted circles, which may be used to keep combining characters from
	1018	combining with the backslash in #\charname. */
	1019	if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2)
	1020	return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1));
	1021
	1022	if (cp >= '0' && cp < '8')
	1023	{
	1024	/* Dirk:FIXME:: This type of character syntax is not R5RS
	1025	* compliant. Further, it should be verified that the constant
	1026	* does only consist of octal digits. */
	1027	SCM p = scm_string_to_number (charname, scm_from_uint (8));
	1028	if (SCM_I_INUMP (p))
	1029	{
	1030	scm_t_wchar c = scm_to_uint32 (p);
	1031	if (SCM_IS_UNICODE_CHAR (c))
	1032	return SCM_MAKE_CHAR (c);
	1033	else
	1034	scm_i_input_error (FUNC_NAME, port,
	1035	"out-of-range octal character escape: ~a",
	1036	scm_list_1 (charname));
	1037	}
	1038	}
	1039
	1040	if (cp == 'x' && (charname_len > 1))
	1041	{
	1042	SCM p;
	1043
	1044	/* Convert from hex, skipping the initial 'x' character in CHARNAME */
	1045	p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
	1046	scm_from_uint (16));
	1047	if (SCM_I_INUMP (p))
	1048	{
	1049	scm_t_wchar c = scm_to_uint32 (p);
	1050	if (SCM_IS_UNICODE_CHAR (c))
	1051	return SCM_MAKE_CHAR (c);
	1052	else
	1053	scm_i_input_error (FUNC_NAME, port,
	1054	"out-of-range hex character escape: ~a",
	1055	scm_list_1 (charname));
	1056	}
	1057	}
	1058
	1059	/* The names of characters should never have non-Latin1
	1060	characters. */
	1061	if (scm_i_is_narrow_string (charname)
	1062	\|\| scm_i_try_narrow_string (charname))
	1063	{ SCM ch = scm_i_charname_to_char (scm_i_string_chars (charname),
	1064	charname_len);
	1065	if (scm_is_true (ch))
	1066	return ch;
	1067	}
	1068
	1069	scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
	1070	scm_list_1 (charname));
	1071
	1072	return SCM_UNSPECIFIED;
	1073	}
	1074	#undef FUNC_NAME
	1075
	1076	static SCM
	1077	scm_read_keyword (int chr, SCM port, scm_t_read_opts *opts)
	1078	{
	1079	SCM symbol;
	1080
	1081	/* Read the symbol that comprises the keyword. Doing this instead of
	1082	invoking a specific symbol reader function allows `scm_read_keyword ()'
	1083	to adapt to the delimiters currently valid of symbols.
	1084
	1085	XXX: This implementation allows sloppy syntaxes like `#: key'. */
	1086	symbol = scm_read_expression (port, opts);
	1087	if (!scm_is_symbol (symbol))
	1088	scm_i_input_error ("scm_read_keyword", port,
	1089	"keyword prefix `~a' not followed by a symbol: ~s",
	1090	scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
	1091
	1092	return (scm_symbol_to_keyword (symbol));
	1093	}
	1094
	1095	static SCM
	1096	scm_read_vector (int chr, SCM port, scm_t_read_opts *opts,
	1097	long line, int column)
	1098	{
	1099	/* Note: We call `scm_read_sexp ()' rather than READER here in order to
	1100	guarantee that it's going to do what we want. After all, this is an
	1101	implementation detail of `scm_read_vector ()', not a desirable
	1102	property. */
	1103	return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port, opts)),
	1104	port, opts, line, column);
	1105	}
	1106
	1107	/* Helper used by scm_read_array */
	1108	static int
	1109	read_decimal_integer (SCM port, int c, ssize_t *resp)
	1110	{
	1111	ssize_t sign = 1;
	1112	ssize_t res = 0;
	1113	int got_it = 0;
	1114
	1115	if (c == '-')
	1116	{
	1117	sign = -1;
	1118	c = scm_getc (port);
	1119	}
	1120
	1121	while ('0' <= c && c <= '9')
	1122	{
	1123	res = 10*res + c-'0';
	1124	got_it = 1;
	1125	c = scm_getc (port);
	1126	}
	1127
	1128	if (got_it)
	1129	resp = sign res;
	1130	return c;
	1131	}
	1132
	1133	/* Read an array. This function can also read vectors and uniform
	1134	vectors. Also, the conflict between '#f' and '#f32' and '#f64' is
	1135	handled here.
	1136
	1137	C is the first character read after the '#'. */
	1138	static SCM
	1139	scm_read_array (int c, SCM port, scm_t_read_opts *opts, long line, int column)
	1140	{
	1141	ssize_t rank;
	1142	scm_t_wchar tag_buf[8];
	1143	int tag_len;
	1144
	1145	SCM tag, shape = SCM_BOOL_F, elements, array;
	1146
	1147	/* XXX - shortcut for ordinary vectors. Shouldn't be necessary but
	1148	the array code can not deal with zero-length dimensions yet, and
	1149	we want to allow zero-length vectors, of course. */
	1150	if (c == '(')
	1151	return scm_read_vector (c, port, opts, line, column);
	1152
	1153	/* Disambiguate between '#f' and uniform floating point vectors. */
	1154	if (c == 'f')
	1155	{
	1156	c = scm_getc (port);
	1157	if (c != '3' && c != '6')
	1158	{
	1159	if (c != EOF)
	1160	scm_ungetc (c, port);
	1161	return SCM_BOOL_F;
	1162	}
	1163	rank = 1;
	1164	tag_buf[0] = 'f';
	1165	tag_len = 1;
	1166	goto continue_reading_tag;
	1167	}
	1168
	1169	/* Read rank. */
	1170	rank = 1;
	1171	c = read_decimal_integer (port, c, &rank);
	1172	if (rank < 0)
	1173	scm_i_input_error (NULL, port, "array rank must be non-negative",
	1174	SCM_EOL);
	1175
	1176	/* Read tag. */
	1177	tag_len = 0;
	1178	continue_reading_tag:
	1179	while (c != EOF && c != '(' && c != '@' && c != ':'
	1180	&& tag_len < sizeof tag_buf / sizeof tag_buf[0])
	1181	{
	1182	tag_buf[tag_len++] = c;
	1183	c = scm_getc (port);
	1184	}
	1185	if (tag_len == 0)
	1186	tag = SCM_BOOL_T;
	1187	else
	1188	{
	1189	tag = scm_string_to_symbol (scm_from_utf32_stringn (tag_buf, tag_len));
	1190	if (tag_len == sizeof tag_buf / sizeof tag_buf[0])
	1191	scm_i_input_error (NULL, port, "invalid array tag, starting with: ~a",
	1192	scm_list_1 (tag));
	1193	}
	1194
	1195	/* Read shape. */
	1196	if (c == '@' \|\| c == ':')
	1197	{
	1198	shape = SCM_EOL;
	1199
	1200	do
	1201	{
	1202	ssize_t lbnd = 0, len = 0;
	1203	SCM s;
	1204
	1205	if (c == '@')
	1206	{
	1207	c = scm_getc (port);
	1208	c = read_decimal_integer (port, c, &lbnd);
	1209	}
	1210
	1211	s = scm_from_ssize_t (lbnd);
	1212
	1213	if (c == ':')
	1214	{
	1215	c = scm_getc (port);
	1216	c = read_decimal_integer (port, c, &len);
	1217	if (len < 0)
	1218	scm_i_input_error (NULL, port,
	1219	"array length must be non-negative",
	1220	SCM_EOL);
	1221
	1222	s = scm_list_2 (s, scm_from_ssize_t (lbnd+len-1));
	1223	}
	1224
	1225	shape = scm_cons (s, shape);
	1226	} while (c == '@' \|\| c == ':');
	1227
	1228	shape = scm_reverse_x (shape, SCM_EOL);
	1229	}
	1230
	1231	/* Read nested lists of elements. */
	1232	if (c != '(')
	1233	scm_i_input_error (NULL, port,
	1234	"missing '(' in vector or array literal",
	1235	SCM_EOL);
	1236	elements = scm_read_sexp (c, port, opts);
	1237
	1238	if (scm_is_false (shape))
	1239	shape = scm_from_ssize_t (rank);
	1240	else if (scm_ilength (shape) != rank)
	1241	scm_i_input_error
	1242	(NULL, port,
	1243	"the number of shape specifications must match the array rank",
	1244	SCM_EOL);
	1245
	1246	/* Handle special print syntax of rank zero arrays; see
	1247	scm_i_print_array for a rationale. */
	1248	if (rank == 0)
	1249	{
	1250	if (!scm_is_pair (elements))
	1251	scm_i_input_error (NULL, port,
	1252	"too few elements in array literal, need 1",
	1253	SCM_EOL);
	1254	if (!scm_is_null (SCM_CDR (elements)))
	1255	scm_i_input_error (NULL, port,
	1256	"too many elements in array literal, want 1",
	1257	SCM_EOL);
	1258	elements = SCM_CAR (elements);
	1259	}
	1260
	1261	/* Construct array, annotate with source location, and return. */
	1262	array = scm_list_to_typed_array (tag, shape, elements);
	1263	return maybe_annotate_source (array, port, opts, line, column);
	1264	}
	1265
	1266	static SCM
	1267	scm_read_srfi4_vector (int chr, SCM port, scm_t_read_opts *opts,
	1268	long line, int column)
	1269	{
	1270	return scm_read_array (chr, port, opts, line, column);
	1271	}
	1272
	1273	static SCM
	1274	scm_read_bytevector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
	1275	long line, int column)
	1276	{
	1277	chr = scm_getc (port);
	1278	if (chr != 'u')
	1279	goto syntax;
	1280
	1281	chr = scm_getc (port);
	1282	if (chr != '8')
	1283	goto syntax;
	1284
	1285	chr = scm_getc (port);
	1286	if (chr != '(')
	1287	goto syntax;
	1288
	1289	return maybe_annotate_source
	1290	(scm_u8_list_to_bytevector (scm_read_sexp (chr, port, opts)),
	1291	port, opts, line, column);
	1292
	1293	syntax:
	1294	scm_i_input_error ("read_bytevector", port,
	1295	"invalid bytevector prefix",
	1296	SCM_MAKE_CHAR (chr));
	1297	return SCM_UNSPECIFIED;
	1298	}
	1299
	1300	static SCM
	1301	scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
	1302	long line, int column)
	1303	{
	1304	/* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
	1305	terribly inefficient but who cares? */
	1306	SCM s_bits = SCM_EOL;
	1307
	1308	for (chr = scm_getc (port);
	1309	(chr != EOF) && ((chr == '0') \|\| (chr == '1'));
	1310	chr = scm_getc (port))
	1311	{
	1312	s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
	1313	}
	1314
	1315	if (chr != EOF)
	1316	scm_ungetc (chr, port);
	1317
	1318	return maybe_annotate_source
	1319	(scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)),
	1320	port, opts, line, column);
	1321	}
	1322
	1323	static SCM
	1324	scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
	1325	{
	1326	int bang_seen = 0;
	1327
	1328	for (;;)
	1329	{
	1330	int c = scm_getc (port);
	1331
	1332	if (c == EOF)
	1333	scm_i_input_error ("skip_block_comment", port,
	1334	"unterminated `#! ... !#' comment", SCM_EOL);
	1335
	1336	if (c == '!')
	1337	bang_seen = 1;
	1338	else if (c == '#' && bang_seen)
	1339	break;
	1340	else
	1341	bang_seen = 0;
	1342	}
	1343
	1344	return SCM_UNSPECIFIED;
	1345	}
	1346
	1347	static void set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts,
	1348	int value);
	1349	static void set_port_square_brackets_p (SCM port, scm_t_read_opts *opts,
	1350	int value);
	1351	static void set_port_curly_infix_p (SCM port, scm_t_read_opts *opts,
	1352	int value);
	1353
	1354	static SCM
	1355	scm_read_shebang (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
	1356	{
	1357	char name[READER_DIRECTIVE_NAME_MAX_SIZE + 1];
	1358	int c;
	1359	int i = 0;
	1360
	1361	while (i <= READER_DIRECTIVE_NAME_MAX_SIZE)
	1362	{
	1363	c = scm_getc (port);
	1364	if (c == EOF)
	1365	scm_i_input_error ("skip_block_comment", port,
	1366	"unterminated `#! ... !#' comment", SCM_EOL);
	1367	else if (('a' <= c && c <= 'z') \|\| ('0' <= c && c <= '9') \|\| c == '-')
	1368	name[i++] = c;
	1369	else if (CHAR_IS_DELIMITER (c))
	1370	{
	1371	scm_ungetc (c, port);
	1372	name[i] = '\0';
	1373	if (0 == strcmp ("r6rs", name))
	1374	; /* Silently ignore */
	1375	else if (0 == strcmp ("fold-case", name))
	1376	set_port_case_insensitive_p (port, opts, 1);
	1377	else if (0 == strcmp ("no-fold-case", name))
	1378	set_port_case_insensitive_p (port, opts, 0);
	1379	else if (0 == strcmp ("curly-infix", name))
	1380	set_port_curly_infix_p (port, opts, 1);
	1381	else if (0 == strcmp ("curly-infix-and-bracket-lists", name))
	1382	{
	1383	set_port_curly_infix_p (port, opts, 1);
	1384	set_port_square_brackets_p (port, opts, 0);
	1385	}
	1386	else
	1387	break;
	1388
	1389	return SCM_UNSPECIFIED;
	1390	}
	1391	else
	1392	{
	1393	scm_ungetc (c, port);
	1394	break;
	1395	}
	1396	}
	1397	while (i > 0)
	1398	scm_ungetc (name[--i], port);
	1399	return scm_read_scsh_block_comment (chr, port);
	1400	}
	1401
	1402	static SCM
	1403	scm_read_r6rs_block_comment (scm_t_wchar chr, SCM port)
	1404	{
	1405	/* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
	1406	nested. So care must be taken. */
	1407	int nesting_level = 1;
	1408
	1409	int a = scm_getc (port);
	1410
	1411	if (a == EOF)
	1412	scm_i_input_error ("scm_read_r6rs_block_comment", port,
	1413	"unterminated `#\| ... \|#' comment", SCM_EOL);
	1414
	1415	while (nesting_level > 0)
	1416	{
	1417	int b = scm_getc (port);
	1418
	1419	if (b == EOF)
	1420	scm_i_input_error ("scm_read_r6rs_block_comment", port,
	1421	"unterminated `#\| ... \|#' comment", SCM_EOL);
	1422
	1423	if (a == '\|' && b == '#')
	1424	{
	1425	nesting_level--;
	1426	b = EOF;
	1427	}
	1428	else if (a == '#' && b == '\|')
	1429	{
	1430	nesting_level++;
	1431	b = EOF;
	1432	}
	1433
	1434	a = b;
	1435	}
	1436
	1437	return SCM_UNSPECIFIED;
	1438	}
	1439
	1440	static SCM
	1441	scm_read_commented_expression (scm_t_wchar chr, SCM port,
	1442	scm_t_read_opts *opts)
	1443	{
	1444	scm_t_wchar c;
	1445
	1446	c = flush_ws (port, opts, (char *) NULL);
	1447	if (EOF == c)
	1448	scm_i_input_error ("read_commented_expression", port,
	1449	"no expression after #; comment", SCM_EOL);
	1450	scm_ungetc (c, port);
	1451	scm_read_expression (port, opts);
	1452	return SCM_UNSPECIFIED;
	1453	}
	1454
	1455	static SCM
	1456	scm_read_extended_symbol (scm_t_wchar chr, SCM port)
	1457	{
	1458	/* Guile's extended symbol read syntax looks like this:
	1459
	1460	#{This is all a symbol name}#
	1461
	1462	So here, CHR is expected to be `{'. */
	1463	int saw_brace = 0;
	1464	size_t len = 0;
	1465	SCM buf = scm_i_make_string (1024, NULL, 0);
	1466
	1467	buf = scm_i_string_start_writing (buf);
	1468
	1469	while ((chr = scm_getc (port)) != EOF)
	1470	{
	1471	if (saw_brace)
	1472	{
	1473	if (chr == '#')
	1474	{
	1475	break;
	1476	}
	1477	else
	1478	{
	1479	saw_brace = 0;
	1480	scm_i_string_set_x (buf, len++, '}');
	1481	}
	1482	}
	1483
	1484	if (chr == '}')
	1485	saw_brace = 1;
	1486	else if (chr == '\\')
	1487	{
	1488	/* It used to be that print.c would print extended-read-syntax
	1489	symbols with backslashes before "non-standard" chars, but
	1490	this routine wouldn't do anything with those escapes.
	1491	Bummer. What we've done is to change print.c to output
	1492	R6RS hex escapes for those characters, relying on the fact
	1493	that the extended read syntax would never put a `\' before
	1494	an `x'. For now, we just ignore other instances of
	1495	backslash in the string. */
	1496	switch ((chr = scm_getc (port)))
	1497	{
	1498	case EOF:
	1499	goto done;
	1500	case 'x':
	1501	{
	1502	scm_t_wchar c;
	1503
	1504	SCM_READ_HEX_ESCAPE (10, ';');
	1505	scm_i_string_set_x (buf, len++, c);
	1506	break;
	1507
	1508	str_eof:
	1509	chr = EOF;
	1510	goto done;
	1511
	1512	bad_escaped:
	1513	scm_i_string_stop_writing ();
	1514	scm_i_input_error ("scm_read_extended_symbol", port,
	1515	"illegal character in escape sequence: ~S",
	1516	scm_list_1 (SCM_MAKE_CHAR (c)));
	1517	break;
	1518	}
	1519	default:
	1520	scm_i_string_set_x (buf, len++, chr);
	1521	break;
	1522	}
	1523	}
	1524	else
	1525	scm_i_string_set_x (buf, len++, chr);
	1526
	1527	if (len >= scm_i_string_length (buf) - 2)
	1528	{
	1529	SCM addy;
	1530
	1531	scm_i_string_stop_writing ();
	1532	addy = scm_i_make_string (1024, NULL, 0);
	1533	buf = scm_string_append (scm_list_2 (buf, addy));
	1534	len = 0;
	1535	buf = scm_i_string_start_writing (buf);
	1536	}
	1537	}
	1538
	1539	done:
	1540	scm_i_string_stop_writing ();
	1541	if (chr == EOF)
	1542	scm_i_input_error ("scm_read_extended_symbol", port,
	1543	"end of file while reading symbol", SCM_EOL);
	1544
	1545	return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
	1546	}
	1547
	1548
	1549	\f
	1550	/* Top-level token readers, i.e., dispatchers. */
	1551
	1552	static SCM
	1553	scm_read_sharp_extension (int chr, SCM port, scm_t_read_opts *opts)
	1554	{
	1555	SCM proc;
	1556
	1557	proc = scm_get_hash_procedure (chr);
	1558	if (scm_is_true (scm_procedure_p (proc)))
	1559	{
	1560	long line = SCM_LINUM (port);
	1561	int column = SCM_COL (port) - 2;
	1562	SCM got;
	1563
	1564	got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
	1565
	1566	if (opts->record_positions_p && SCM_NIMP (got)
	1567	&& !scm_i_has_source_properties (got))
	1568	scm_i_set_source_properties_x (got, line, column, SCM_FILENAME (port));
	1569
	1570	return got;
	1571	}
	1572
	1573	return SCM_UNSPECIFIED;
	1574	}
	1575
	1576	/* The reader for the sharp `#' character. It basically dispatches reads
	1577	among the above token readers. */
	1578	static SCM
	1579	scm_read_sharp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
	1580	long line, int column)
	1581	#define FUNC_NAME "scm_lreadr"
	1582	{
	1583	SCM result;
	1584
	1585	chr = scm_getc (port);
	1586
	1587	result = scm_read_sharp_extension (chr, port, opts);
	1588	if (!scm_is_eq (result, SCM_UNSPECIFIED))
	1589	return result;
	1590
	1591	switch (chr)
	1592	{
	1593	case '\\':
	1594	return (scm_read_character (chr, port, opts));
	1595	case '(':
	1596	return (scm_read_vector (chr, port, opts, line, column));
	1597	case 's':
	1598	case 'u':
	1599	case 'f':
	1600	case 'c':
	1601	/* This one may return either a boolean or an SRFI-4 vector. */
	1602	return (scm_read_srfi4_vector (chr, port, opts, line, column));
	1603	case 'v':
	1604	return (scm_read_bytevector (chr, port, opts, line, column));
	1605	case '*':
	1606	return (scm_read_guile_bit_vector (chr, port, opts, line, column));
	1607	case 't':
	1608	case 'T':
	1609	case 'F':
	1610	return (scm_read_boolean (chr, port));
	1611	case ':':
	1612	return (scm_read_keyword (chr, port, opts));
	1613	case '0': case '1': case '2': case '3': case '4':
	1614	case '5': case '6': case '7': case '8': case '9':
	1615	case '@':
	1616	#if SCM_ENABLE_DEPRECATED
	1617	/* See below for 'i' and 'e'. */
	1618	case 'a':
	1619	case 'y':
	1620	case 'h':
	1621	case 'l':
	1622	#endif
	1623	return (scm_read_array (chr, port, opts, line, column));
	1624
	1625	case 'i':
	1626	case 'e':
	1627	#if SCM_ENABLE_DEPRECATED
	1628	{
	1629	/* When next char is '(', it really is an old-style
	1630	uniform array. */
	1631	scm_t_wchar next_c = scm_getc (port);
	1632	if (next_c != EOF)
	1633	scm_ungetc (next_c, port);
	1634	if (next_c == '(')
	1635	return scm_read_array (chr, port, opts, line, column);
	1636	/* Fall through. */
	1637	}
	1638	#endif
	1639	case 'b':
	1640	case 'B':
	1641	case 'o':
	1642	case 'O':
	1643	case 'd':
	1644	case 'D':
	1645	case 'x':
	1646	case 'X':
	1647	case 'I':
	1648	case 'E':
	1649	return (scm_read_number_and_radix (chr, port, opts));
	1650	case '{':
	1651	return (scm_read_extended_symbol (chr, port));
	1652	case '!':
	1653	return (scm_read_shebang (chr, port, opts));
	1654	case ';':
	1655	return (scm_read_commented_expression (chr, port, opts));
	1656	case '`':
	1657	case '\'':
	1658	case ',':
	1659	return (scm_read_syntax (chr, port, opts));
	1660	case 'n':
	1661	return (scm_read_nil (chr, port, opts));
	1662	default:
	1663	result = scm_read_sharp_extension (chr, port, opts);
	1664	if (scm_is_eq (result, SCM_UNSPECIFIED))
	1665	{
	1666	/* To remain compatible with 1.8 and earlier, the following
	1667	characters have lower precedence than `read-hash-extend'
	1668	characters. */
	1669	switch (chr)
	1670	{
	1671	case '\|':
	1672	return scm_read_r6rs_block_comment (chr, port);
	1673	default:
	1674	scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
	1675	scm_list_1 (SCM_MAKE_CHAR (chr)));
	1676	}
	1677	}
	1678	else
	1679	return result;
	1680	}
	1681
	1682	return SCM_UNSPECIFIED;
	1683	}
	1684	#undef FUNC_NAME
	1685
	1686	static SCM
	1687	read_inner_expression (SCM port, scm_t_read_opts *opts)
	1688	#define FUNC_NAME "read_inner_expression"
	1689	{
	1690	while (1)
	1691	{
	1692	scm_t_wchar chr;
	1693
	1694	chr = scm_getc (port);
	1695
	1696	switch (chr)
	1697	{
	1698	case SCM_WHITE_SPACES:
	1699	case SCM_LINE_INCREMENTORS:
	1700	break;
	1701	case ';':
	1702	(void) scm_read_semicolon_comment (chr, port);
	1703	break;
	1704	case '{':
	1705	if (opts->curly_infix_p)
	1706	{
	1707	if (opts->neoteric_p)
	1708	return scm_read_sexp (chr, port, opts);
	1709	else
	1710	{
	1711	SCM expr;
	1712
	1713	/* Enable neoteric expressions within curly braces */
	1714	opts->neoteric_p = 1;
	1715	expr = scm_read_sexp (chr, port, opts);
	1716	opts->neoteric_p = 0;
	1717	return expr;
	1718	}
	1719	}
	1720	else
	1721	return scm_read_mixed_case_symbol (chr, port, opts);
	1722	case '[':
	1723	if (opts->square_brackets_p)
	1724	return scm_read_sexp (chr, port, opts);
	1725	else if (opts->curly_infix_p)
	1726	{
	1727	/* The syntax of neoteric expressions requires that '[' be
	1728	a delimiter when curly-infix is enabled, so it cannot
	1729	be part of an unescaped symbol. We might as well do
	1730	something useful with it, so we adopt Kawa's convention:
	1731	[...] => ($bracket-list$ ...) */
	1732	long line = SCM_LINUM (port);
	1733	int column = SCM_COL (port) - 1;
	1734	return maybe_annotate_source
	1735	(scm_cons (sym_bracket_list, scm_read_sexp (chr, port, opts)),
	1736	port, opts, line, column);
	1737	}
	1738	else
	1739	return scm_read_mixed_case_symbol (chr, port, opts);
	1740	case '(':
	1741	return (scm_read_sexp (chr, port, opts));
	1742	case '"':
	1743	return (scm_read_string (chr, port, opts));
	1744	case '\'':
	1745	case '`':
	1746	case ',':
	1747	return (scm_read_quote (chr, port, opts));
	1748	case '#':
	1749	{
	1750	long line = SCM_LINUM (port);
	1751	int column = SCM_COL (port) - 1;
	1752	SCM result = scm_read_sharp (chr, port, opts, line, column);
	1753	if (scm_is_eq (result, SCM_UNSPECIFIED))
	1754	/* We read a comment or some such. */
	1755	break;
	1756	else
	1757	return result;
	1758	}
	1759	case ')':
	1760	scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
	1761	break;
	1762	case '}':
	1763	if (opts->curly_infix_p)
	1764	scm_i_input_error (FUNC_NAME, port, "unexpected \"}\"", SCM_EOL);
	1765	else
	1766	return scm_read_mixed_case_symbol (chr, port, opts);
	1767	case ']':
	1768	if (opts->square_brackets_p)
	1769	scm_i_input_error (FUNC_NAME, port, "unexpected \"]\"", SCM_EOL);
	1770	/* otherwise fall through */
	1771	case EOF:
	1772	return SCM_EOF_VAL;
	1773	case ':':
	1774	if (opts->keyword_style == KEYWORD_STYLE_PREFIX)
	1775	return scm_symbol_to_keyword (scm_read_expression (port, opts));
	1776	/* Fall through. */
	1777
	1778	default:
	1779	{
	1780	if (((chr >= '0') && (chr <= '9'))
	1781	\|\| (strchr ("+-.", chr)))
	1782	return (scm_read_number (chr, port, opts));
	1783	else
	1784	return (scm_read_mixed_case_symbol (chr, port, opts));
	1785	}
	1786	}
	1787	}
	1788	}
	1789	#undef FUNC_NAME
	1790
	1791	static SCM
	1792	scm_read_expression (SCM port, scm_t_read_opts *opts)
	1793	#define FUNC_NAME "scm_read_expression"
	1794	{
	1795	if (!opts->neoteric_p)
	1796	return read_inner_expression (port, opts);
	1797	else
	1798	{
	1799	long line = 0;
	1800	int column = 0;
	1801	SCM expr;
	1802
	1803	if (opts->record_positions_p)
	1804	{
	1805	/* We need to get the position of the first non-whitespace
	1806	character in order to correctly annotate neoteric
	1807	expressions. For example, for the expression 'f(x)', the
	1808	first call to 'read_inner_expression' reads the 'f' (which
	1809	cannot be annotated), and then we later read the '(x)' and
	1810	use it to construct the new list (f x). */
	1811	int c = flush_ws (port, opts, (char *) NULL);
	1812	if (c == EOF)
	1813	return SCM_EOF_VAL;
	1814	scm_ungetc (c, port);
	1815	line = SCM_LINUM (port);
	1816	column = SCM_COL (port);
	1817	}
	1818
	1819	expr = read_inner_expression (port, opts);
	1820
	1821	/* 'expr' is the first component of the neoteric expression. Now
	1822	we loop, and as long as the next character is '(', '[', or '{',
	1823	(without any intervening whitespace), we use it to construct a
	1824	new expression. For example, f{n - 1}(x) => ((f (- n 1)) x). */
	1825	for (;;)
	1826	{
	1827	int chr = scm_getc (port);
	1828
	1829	if (chr == '(')
	1830	/* e(...) => (e ...) */
	1831	expr = scm_cons (expr, scm_read_sexp (chr, port, opts));
	1832	else if (chr == '[')
	1833	/* e[...] => ($bracket-apply$ e ...) */
	1834	expr = scm_cons (sym_bracket_apply,
	1835	scm_cons (expr,
	1836	scm_read_sexp (chr, port, opts)));
	1837	else if (chr == '{')
	1838	{
	1839	SCM arg = scm_read_sexp (chr, port, opts);
	1840
	1841	if (scm_is_null (arg))
	1842	expr = scm_list_1 (expr); /* e{} => (e) */
	1843	else
	1844	expr = scm_list_2 (expr, arg); /* e{...} => (e {...}) */
	1845	}
	1846	else
	1847	{
	1848	if (chr != EOF)
	1849	scm_ungetc (chr, port);
	1850	break;
	1851	}
	1852	maybe_annotate_source (expr, port, opts, line, column);
	1853	}
	1854	return expr;
	1855	}
	1856	}
	1857	#undef FUNC_NAME
	1858
	1859	\f
	1860	/* Actual reader. */
	1861
	1862	static void init_read_options (SCM port, scm_t_read_opts *opts);
	1863
	1864	SCM_DEFINE (scm_read, "read", 0, 1, 0,
	1865	(SCM port),
	1866	"Read an s-expression from the input port @var{port}, or from\n"
	1867	"the current input port if @var{port} is not specified.\n"
	1868	"Any whitespace before the next token is discarded.")
	1869	#define FUNC_NAME s_scm_read
	1870	{
	1871	scm_t_read_opts opts;
	1872	int c;
	1873
	1874	if (SCM_UNBNDP (port))
	1875	port = scm_current_input_port ();
	1876	SCM_VALIDATE_OPINPORT (1, port);
	1877
	1878	init_read_options (port, &opts);
	1879
	1880	c = flush_ws (port, &opts, (char *) NULL);
	1881	if (EOF == c)
	1882	return SCM_EOF_VAL;
	1883	scm_ungetc (c, port);
	1884
	1885	return (scm_read_expression (port, &opts));
	1886	}
	1887	#undef FUNC_NAME
	1888
	1889
	1890	\f
	1891
	1892	/* Manipulate the read-hash-procedures alist. This could be written in
	1893	Scheme, but maybe it will also be used by C code during initialisation. */
	1894	SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
	1895	(SCM chr, SCM proc),
	1896	"Install the procedure @var{proc} for reading expressions\n"
	1897	"starting with the character sequence @code{#} and @var{chr}.\n"
	1898	"@var{proc} will be called with two arguments: the character\n"
	1899	"@var{chr} and the port to read further data from. The object\n"
	1900	"returned will be the return value of @code{read}. \n"
	1901	"Passing @code{#f} for @var{proc} will remove a previous setting. \n"
	1902	)
	1903	#define FUNC_NAME s_scm_read_hash_extend
	1904	{
	1905	SCM this;
	1906	SCM prev;
	1907
	1908	SCM_VALIDATE_CHAR (1, chr);
	1909	SCM_ASSERT (scm_is_false (proc)
	1910	\|\| scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
	1911	proc, SCM_ARG2, FUNC_NAME);
	1912
	1913	/* Check if chr is already in the alist. */
	1914	this = scm_i_read_hash_procedures_ref ();
	1915	prev = SCM_BOOL_F;
	1916	while (1)
	1917	{
	1918	if (scm_is_null (this))
	1919	{
	1920	/* not found, so add it to the beginning. */
	1921	if (scm_is_true (proc))
	1922	{
	1923	SCM new = scm_cons (scm_cons (chr, proc),
	1924	scm_i_read_hash_procedures_ref ());
	1925	scm_i_read_hash_procedures_set_x (new);
	1926	}
	1927	break;
	1928	}
	1929	if (scm_is_eq (chr, SCM_CAAR (this)))
	1930	{
	1931	/* already in the alist. */
	1932	if (scm_is_false (proc))
	1933	{
	1934	/* remove it. */
	1935	if (scm_is_false (prev))
	1936	{
	1937	SCM rest = SCM_CDR (scm_i_read_hash_procedures_ref ());
	1938	scm_i_read_hash_procedures_set_x (rest);
	1939	}
	1940	else
	1941	scm_set_cdr_x (prev, SCM_CDR (this));
	1942	}
	1943	else
	1944	{
	1945	/* replace it. */
	1946	scm_set_cdr_x (SCM_CAR (this), proc);
	1947	}
	1948	break;
	1949	}
	1950	prev = this;
	1951	this = SCM_CDR (this);
	1952	}
	1953
	1954	return SCM_UNSPECIFIED;
	1955	}
	1956	#undef FUNC_NAME
	1957
	1958	/* Recover the read-hash procedure corresponding to char c. */
	1959	static SCM
	1960	scm_get_hash_procedure (int c)
	1961	{
	1962	SCM rest = scm_i_read_hash_procedures_ref ();
	1963
	1964	while (1)
	1965	{
	1966	if (scm_is_null (rest))
	1967	return SCM_BOOL_F;
	1968
	1969	if (SCM_CHAR (SCM_CAAR (rest)) == c)
	1970	return SCM_CDAR (rest);
	1971
	1972	rest = SCM_CDR (rest);
	1973	}
	1974	}
	1975
	1976	#define SCM_ENCODING_SEARCH_SIZE (500)
	1977
	1978	/* Search the first few hundred characters of a file for an Emacs-like coding
	1979	declaration. Returns either NULL or a string whose storage has been
	1980	allocated with `scm_gc_malloc ()'. */
	1981	char *
	1982	scm_i_scan_for_encoding (SCM port)
	1983	{
	1984	scm_t_port *pt;
	1985	char header[SCM_ENCODING_SEARCH_SIZE+1];
	1986	size_t bytes_read, encoding_length, i;
	1987	char *encoding = NULL;
	1988	int utf8_bom = 0;
	1989	char pos, encoding_start;
	1990	int in_comment;
	1991
	1992	pt = SCM_PTAB_ENTRY (port);
	1993
	1994	if (pt->rw_active == SCM_PORT_WRITE)
	1995	scm_flush (port);
	1996
	1997	if (pt->rw_random)
	1998	pt->rw_active = SCM_PORT_READ;
	1999
	2000	if (pt->read_pos == pt->read_end)
	2001	{
	2002	/* We can use the read buffer, and thus avoid a seek. */
	2003	if (scm_fill_input (port) == EOF)
	2004	return NULL;
	2005
	2006	bytes_read = pt->read_end - pt->read_pos;
	2007	if (bytes_read > SCM_ENCODING_SEARCH_SIZE)
	2008	bytes_read = SCM_ENCODING_SEARCH_SIZE;
	2009
	2010	if (bytes_read <= 1)
	2011	/* An unbuffered port -- don't scan. */
	2012	return NULL;
	2013
	2014	memcpy (header, pt->read_pos, bytes_read);
	2015	header[bytes_read] = '\0';
	2016	}
	2017	else
	2018	{
	2019	/* Try to read some bytes and then seek back. Not all ports
	2020	support seeking back; and indeed some file ports (like
	2021	/dev/urandom) will succeed on an lseek (fd, 0, SEEK_CUR)---the
	2022	check performed by SCM_FPORT_FDES---but fail to seek
	2023	backwards. Hence this block comes second. We prefer to use
	2024	the read buffer in-place. */
	2025	if (SCM_FPORTP (port) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port)))
	2026	return NULL;
	2027
	2028	bytes_read = scm_c_read (port, header, SCM_ENCODING_SEARCH_SIZE);
	2029	header[bytes_read] = '\0';
	2030	scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
	2031	}
	2032
	2033	if (bytes_read > 3
	2034	&& header[0] == '\xef' && header[1] == '\xbb' && header[2] == '\xbf')
	2035	utf8_bom = 1;
	2036
	2037	/* search past "coding[:=]" */
	2038	pos = header;
	2039	while (1)
	2040	{
	2041	if ((pos = strstr(pos, "coding")) == NULL)
	2042	return NULL;
	2043
	2044	pos += strlen("coding");
	2045	if (pos - header >= SCM_ENCODING_SEARCH_SIZE \|\|
	2046	(pos == ':' \|\| pos == '='))
	2047	{
	2048	pos ++;
	2049	break;
	2050	}
	2051	}
	2052
	2053	/* skip spaces */
	2054	while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
	2055	(pos == ' ' \|\| pos == '\t'))
	2056	pos ++;
	2057
	2058	/* grab the next token */
	2059	encoding_start = pos;
	2060	i = 0;
	2061	while (encoding_start + i - header <= SCM_ENCODING_SEARCH_SIZE
	2062	&& encoding_start + i - header < bytes_read
	2063	&& (isalnum ((int) encoding_start[i])
	2064	\|\| strchr ("_-.:/,+=()", encoding_start[i]) != NULL))
	2065	i++;
	2066
	2067	encoding_length = i;
	2068	if (encoding_length == 0)
	2069	return NULL;
	2070
	2071	encoding = scm_gc_strndup (encoding_start, encoding_length, "encoding");
	2072	for (i = 0; i < encoding_length; i++)
	2073	encoding[i] = toupper ((int) encoding[i]);
	2074
	2075	/* push backwards to make sure we were in a comment */
	2076	in_comment = 0;
	2077	pos = encoding_start;
	2078	while (pos >= header)
	2079	{
	2080	if (*pos == ';')
	2081	{
	2082	in_comment = 1;
	2083	break;
	2084	}
	2085	else if (*pos == '\n' \|\| pos == header)
	2086	{
	2087	/* This wasn't in a semicolon comment. Check for a
	2088	hash-bang comment. */
	2089	char *beg = strstr (header, "#!");
	2090	char *end = strstr (header, "!#");
	2091	if (beg < encoding_start && encoding_start + encoding_length <= end)
	2092	in_comment = 1;
	2093	break;
	2094	}
	2095	else
	2096	{
	2097	pos --;
	2098	continue;
	2099	}
	2100	}
	2101	if (!in_comment)
	2102	/* This wasn't in a comment */
	2103	return NULL;
	2104
	2105	if (utf8_bom && strcasecmp(encoding, "UTF-8"))
	2106	scm_misc_error (NULL,
	2107	"the port input declares the encoding ~s but is encoded as UTF-8",
	2108	scm_list_1 (scm_from_locale_string (encoding)));
	2109
	2110	return encoding;
	2111	}
	2112
	2113	SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
	2114	(SCM port),
	2115	"Scans the port for an Emacs-like character coding declaration\n"
	2116	"near the top of the contents of a port with random-accessible contents.\n"
	2117	"The coding declaration is of the form\n"
	2118	"@code{coding: XXXXX} and must appear in a scheme comment.\n"
	2119	"\n"
	2120	"Returns a string containing the character encoding of the file\n"
	2121	"if a declaration was found, or @code{#f} otherwise.\n")
	2122	#define FUNC_NAME s_scm_file_encoding
	2123	{
	2124	char *enc;
	2125	SCM s_enc;
	2126
	2127	SCM_VALIDATE_OPINPORT (SCM_ARG1, port);
	2128
	2129	enc = scm_i_scan_for_encoding (port);
	2130	if (enc == NULL)
	2131	return SCM_BOOL_F;
	2132	else
	2133	{
	2134	s_enc = scm_from_locale_string (enc);
	2135	return s_enc;
	2136	}
	2137
	2138	return SCM_BOOL_F;
	2139	}
	2140	#undef FUNC_NAME
	2141
	2142	\f
	2143	/* Per-port read options.
	2144
	2145	We store per-port read options in the 'port-read-options' key of the
	2146	port's alist, which is stored in the internal port structure. The
	2147	value stored in the alist is a single integer that contains a two-bit
	2148	field for each read option.
	2149
	2150	If a bit field contains READ_OPTION_INHERIT (3), that indicates that
	2151	the applicable value should be inherited from the corresponding
	2152	global read option. Otherwise, the bit field contains the value of
	2153	the read option. For boolean read options that have been set
	2154	per-port, the possible values are 0 or 1. If the 'keyword_style'
	2155	read option has been set per-port, its possible values are those in
	2156	'enum t_keyword_style'. */
	2157
	2158	/* Key to read options in per-port alists. */
	2159	SCM_SYMBOL (sym_port_read_options, "port-read-options");
	2160
	2161	/* Offsets of bit fields for each per-port override */
	2162	#define READ_OPTION_COPY_SOURCE_P 0
	2163	#define READ_OPTION_RECORD_POSITIONS_P 2
	2164	#define READ_OPTION_CASE_INSENSITIVE_P 4
	2165	#define READ_OPTION_KEYWORD_STYLE 6
	2166	#define READ_OPTION_R6RS_ESCAPES_P 8
	2167	#define READ_OPTION_SQUARE_BRACKETS_P 10
	2168	#define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12
	2169	#define READ_OPTION_CURLY_INFIX_P 14
	2170
	2171	/* The total width in bits of the per-port overrides */
	2172	#define READ_OPTIONS_NUM_BITS 16
	2173
	2174	#define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1)
	2175	#define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL
	2176
	2177	#define READ_OPTION_MASK 3
	2178	#define READ_OPTION_INHERIT 3
	2179
	2180	static void
	2181	set_port_read_option (SCM port, int option, int new_value)
	2182	{
	2183	SCM alist, scm_read_options;
	2184	unsigned int read_options;
	2185
	2186	new_value &= READ_OPTION_MASK;
	2187	alist = scm_i_port_alist (port);
	2188	scm_read_options = scm_assq_ref (alist, sym_port_read_options);
	2189	if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
	2190	read_options = scm_to_uint (scm_read_options);
	2191	else
	2192	read_options = READ_OPTIONS_INHERIT_ALL;
	2193	read_options &= ~(READ_OPTION_MASK << option);
	2194	read_options \|= new_value << option;
	2195	scm_read_options = scm_from_uint (read_options);
	2196	alist = scm_assq_set_x (alist, sym_port_read_options, scm_read_options);
	2197	scm_i_set_port_alist_x (port, alist);
	2198	}
	2199
	2200	/* Set OPTS and PORT's case-insensitivity according to VALUE. */
	2201	static void
	2202	set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts, int value)
	2203	{
	2204	value = !!value;
	2205	opts->case_insensitive_p = value;
	2206	set_port_read_option (port, READ_OPTION_CASE_INSENSITIVE_P, value);
	2207	}
	2208
	2209	/* Set OPTS and PORT's square_brackets_p option according to VALUE. */
	2210	static void
	2211	set_port_square_brackets_p (SCM port, scm_t_read_opts *opts, int value)
	2212	{
	2213	value = !!value;
	2214	opts->square_brackets_p = value;
	2215	set_port_read_option (port, READ_OPTION_SQUARE_BRACKETS_P, value);
	2216	}
	2217
	2218	/* Set OPTS and PORT's curly_infix_p option according to VALUE. */
	2219	static void
	2220	set_port_curly_infix_p (SCM port, scm_t_read_opts *opts, int value)
	2221	{
	2222	value = !!value;
	2223	opts->curly_infix_p = value;
	2224	set_port_read_option (port, READ_OPTION_CURLY_INFIX_P, value);
	2225	}
	2226
	2227	/* Initialize OPTS based on PORT's read options and the global read
	2228	options. */
	2229	static void
	2230	init_read_options (SCM port, scm_t_read_opts *opts)
	2231	{
	2232	SCM alist, val, scm_read_options;
	2233	unsigned int read_options, x;
	2234
	2235	alist = scm_i_port_alist (port);
	2236	scm_read_options = scm_assq_ref (alist, sym_port_read_options);
	2237
	2238	if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
	2239	read_options = scm_to_uint (scm_read_options);
	2240	else
	2241	read_options = READ_OPTIONS_INHERIT_ALL;
	2242
	2243	x = READ_OPTION_MASK & (read_options >> READ_OPTION_KEYWORD_STYLE);
	2244	if (x == READ_OPTION_INHERIT)
	2245	{
	2246	val = SCM_PACK (SCM_KEYWORD_STYLE);
	2247	if (scm_is_eq (val, scm_keyword_prefix))
	2248	x = KEYWORD_STYLE_PREFIX;
	2249	else if (scm_is_eq (val, scm_keyword_postfix))
	2250	x = KEYWORD_STYLE_POSTFIX;
	2251	else
	2252	x = KEYWORD_STYLE_HASH_PREFIX;
	2253	}
	2254	opts->keyword_style = x;
	2255
	2256	#define RESOLVE_BOOLEAN_OPTION(NAME, name) \
	2257	do \
	2258	{ \
	2259	x = READ_OPTION_MASK & (read_options >> READ_OPTION_ ## NAME); \
	2260	if (x == READ_OPTION_INHERIT) \
	2261	x = !!SCM_ ## NAME; \
	2262	opts->name = x; \
	2263	} \
	2264	while (0)
	2265
	2266	RESOLVE_BOOLEAN_OPTION (COPY_SOURCE_P, copy_source_p);
	2267	RESOLVE_BOOLEAN_OPTION (RECORD_POSITIONS_P, record_positions_p);
	2268	RESOLVE_BOOLEAN_OPTION (CASE_INSENSITIVE_P, case_insensitive_p);
	2269	RESOLVE_BOOLEAN_OPTION (R6RS_ESCAPES_P, r6rs_escapes_p);
	2270	RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P, square_brackets_p);
	2271	RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p);
	2272	RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p);
	2273
	2274	#undef RESOLVE_BOOLEAN_OPTION
	2275
	2276	opts->neoteric_p = 0;
	2277	}
	2278
	2279	void
	2280	scm_init_read ()
	2281	{
	2282	SCM read_hash_procs;
	2283
	2284	read_hash_procs = scm_make_fluid_with_default (SCM_EOL);
	2285
	2286	scm_i_read_hash_procedures =
	2287	SCM_VARIABLE_LOC (scm_c_define ("%read-hash-procedures", read_hash_procs));
	2288
	2289	scm_init_opts (scm_read_options, scm_read_opts);
	2290	#include "libguile/read.x"
	2291	}
	2292
	2293	/*
	2294	Local Variables:
	2295	c-file-style: "gnu"
	2296	End:
	2297	*/