Make literal strings (i.e., returned by `read') read-only.
[bpt/guile.git] / libguile / read.c
CommitLineData
7f74cf9a 1/* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008 Free Software
dd72382c 2 * Foundation, Inc.
0f2d19dd 3 *
73be1d9e
MV
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
0f2d19dd 8 *
73be1d9e
MV
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
0f2d19dd 13 *
73be1d9e
MV
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
92205699 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd
JB
20\f
21
7337d56d
LC
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif
25
0f2d19dd 26#include <stdio.h>
7337d56d
LC
27#include <ctype.h>
28#include <string.h>
7337d56d 29
a0599745
MD
30#include "libguile/_scm.h"
31#include "libguile/chars.h"
32#include "libguile/eval.h"
33#include "libguile/unif.h"
34#include "libguile/keywords.h"
35#include "libguile/alist.h"
36#include "libguile/srcprop.h"
37#include "libguile/hashtab.h"
38#include "libguile/hash.h"
39#include "libguile/ports.h"
40#include "libguile/root.h"
41#include "libguile/strings.h"
ba1b2226 42#include "libguile/strports.h"
a0599745 43#include "libguile/vectors.h"
a0599745 44#include "libguile/validate.h"
a4022e69 45#include "libguile/srfi-4.h"
7337d56d 46#include "libguile/srfi-13.h"
ba1b2226 47
a0599745 48#include "libguile/read.h"
22fc179a
HWN
49#include "libguile/private-options.h"
50
0f2d19dd
JB
51
52\f
53
5bf6a6f0 54SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
c7733771 55SCM_SYMBOL (scm_keyword_prefix, "prefix");
ef4cbc08 56SCM_SYMBOL (scm_keyword_postfix, "postfix");
c7733771 57
92c2555f 58scm_t_option scm_read_opts[] = {
b7ff98dd
MD
59 { SCM_OPTION_BOOLEAN, "copy", 0,
60 "Copy source code expressions." },
ac74fc22 61 { SCM_OPTION_BOOLEAN, "positions", 0,
deca31e1
GH
62 "Record positions of source code expressions." },
63 { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
c7733771 64 "Convert symbols to lower case."},
f1267706 65 { SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
904fabb6 66 "Style of keyword recognition: #f, 'prefix or 'postfix."},
16353acc 67#if SCM_ENABLE_ELISP
16353acc
NJ
68 { SCM_OPTION_BOOLEAN, "elisp-vectors", 0,
69 "Support Elisp vector syntax, namely `[...]'."},
cd21f5eb 70 { SCM_OPTION_BOOLEAN, "elisp-strings", 0,
62560650 71 "Support `\\(' and `\\)' in strings."},
16353acc 72#endif
62560650 73 { 0, },
a16f6fe7
MD
74};
75
39e8f371
HWN
76/*
77 Give meaningful error messages for errors
78
79 We use the format
80
ba1b2226 81 FILE:LINE:COL: MESSAGE
39e8f371
HWN
82 This happened in ....
83
84 This is not standard GNU format, but the test-suite likes the real
85 message to be in front.
86
39e8f371
HWN
87 */
88
89
a4022e69
MV
90void
91scm_i_input_error (char const *function,
92 SCM port, const char *message, SCM arg)
ba1b2226 93{
29a837fd
MV
94 SCM fn = (scm_is_string (SCM_FILENAME(port))
95 ? SCM_FILENAME(port)
96 : scm_from_locale_string ("#<unknown port>"));
ba1b2226 97
29a837fd 98 SCM string_port = scm_open_output_string ();
ba1b2226
HWN
99 SCM string = SCM_EOL;
100 scm_simple_format (string_port,
272632a6 101 scm_from_locale_string ("~A:~S:~S: ~A"),
29a837fd 102 scm_list_4 (fn,
b3aa4626 103 scm_from_long (SCM_LINUM (port) + 1),
b9bd8526 104 scm_from_int (SCM_COL (port) + 1),
272632a6 105 scm_from_locale_string (message)));
ba1b2226
HWN
106
107 string = scm_get_output_string (string_port);
108 scm_close_output_port (string_port);
272632a6 109 scm_error_scm (scm_from_locale_symbol ("read-error"),
a4022e69 110 function? scm_from_locale_string (function) : SCM_BOOL_F,
ba1b2226 111 string,
dd72382c 112 arg,
ba1b2226
HWN
113 SCM_BOOL_F);
114}
39e8f371
HWN
115
116
a1ec6916 117SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
1bbd0b84 118 (SCM setting),
dc7fa443
MG
119 "Option interface for the read options. Instead of using\n"
120 "this procedure directly, use the procedures @code{read-enable},\n"
3939e9df 121 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
1bbd0b84 122#define FUNC_NAME s_scm_read_options
a16f6fe7 123{
b7ff98dd
MD
124 SCM ans = scm_options (setting,
125 scm_read_opts,
1bbd0b84 126 FUNC_NAME);
b7ff98dd
MD
127 if (SCM_COPY_SOURCE_P)
128 SCM_RECORD_POSITIONS_P = 1;
a16f6fe7
MD
129 return ans;
130}
1bbd0b84 131#undef FUNC_NAME
a16f6fe7 132
14de3b42
GH
133/* An association list mapping extra hash characters to procedures. */
134static SCM *scm_read_hash_procedures;
deca31e1 135
0f2d19dd 136
7337d56d
LC
137\f
138/* Token readers. */
0f2d19dd 139
0f2d19dd 140
7337d56d
LC
141/* Size of the C buffer used to read symbols and numbers. */
142#define READER_BUFFER_SIZE 128
0f2d19dd 143
7337d56d
LC
144/* Size of the C buffer used to read strings. */
145#define READER_STRING_BUFFER_SIZE 512
0f2d19dd 146
7337d56d
LC
147/* The maximum size of Scheme character names. */
148#define READER_CHAR_NAME_MAX_SIZE 50
1cc91f1b 149
94115ae3 150
7337d56d
LC
151/* `isblank' is only in C99. */
152#define CHAR_IS_BLANK_(_chr) \
153 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
d41668fa 154 || ((_chr) == '\f') || ((_chr) == '\r'))
7337d56d
LC
155
156#ifdef MSDOS
157# define CHAR_IS_BLANK(_chr) \
158 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
159#else
160# define CHAR_IS_BLANK CHAR_IS_BLANK_
161#endif
162
163
164/* R5RS one-character delimiters (see section 7.1.1, ``Lexical
165 structure''). */
166#define CHAR_IS_R5RS_DELIMITER(c) \
167 (CHAR_IS_BLANK (c) \
168 || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
169
170#define CHAR_IS_DELIMITER CHAR_IS_R5RS_DELIMITER
171
172/* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
173 Structure''. */
174#define CHAR_IS_EXPONENT_MARKER(_chr) \
175 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
176 || ((_chr) == 'd') || ((_chr) == 'l'))
177
178/* An inlinable version of `scm_c_downcase ()'. */
179#define CHAR_DOWNCASE(_chr) \
180 (((_chr) <= UCHAR_MAX) ? tolower (_chr) : (_chr))
181
182
454866e0
LC
183/* Read an SCSH block comment. */
184static inline SCM scm_read_scsh_block_comment (int chr, SCM port);
185
d41668fa
LC
186/* Read from PORT until a delimiter (e.g., a whitespace) is read. Return
187 zero if the whole token fits in BUF, non-zero otherwise. */
7337d56d
LC
188static inline int
189read_token (SCM port, char *buf, size_t buf_size, size_t *read)
0520c320 190{
7337d56d 191 *read = 0;
0520c320 192
7337d56d 193 while (*read < buf_size)
0520c320 194 {
7337d56d 195 int chr;
0520c320 196
7337d56d
LC
197 chr = scm_getc (port);
198 chr = (SCM_CASE_INSENSITIVE_P ? CHAR_DOWNCASE (chr) : chr);
199
200 if (chr == EOF)
201 return 0;
202 else if (CHAR_IS_DELIMITER (chr))
203 {
204 scm_ungetc (chr, port);
205 return 0;
206 }
0520c320 207 else
7337d56d
LC
208 {
209 *buf = (char) chr;
210 buf++, (*read)++;
211 }
0520c320 212 }
7337d56d
LC
213
214 return 1;
0520c320 215}
1cc91f1b 216
7337d56d
LC
217
218/* Skip whitespace from PORT and return the first non-whitespace character
219 read. Raise an error on end-of-file. */
220static int
221flush_ws (SCM port, const char *eoferr)
0f2d19dd
JB
222{
223 register int c;
224 while (1)
b7f3516f 225 switch (c = scm_getc (port))
0f2d19dd
JB
226 {
227 case EOF:
228 goteof:
229 if (eoferr)
d156d3b7 230 {
a4022e69
MV
231 scm_i_input_error (eoferr,
232 port,
233 "end of file",
234 SCM_EOL);
d156d3b7 235 }
0f2d19dd 236 return c;
7337d56d 237
0f2d19dd
JB
238 case ';':
239 lp:
b7f3516f 240 switch (c = scm_getc (port))
0f2d19dd
JB
241 {
242 case EOF:
243 goto goteof;
244 default:
245 goto lp;
246 case SCM_LINE_INCREMENTORS:
247 break;
248 }
249 break;
7337d56d 250
454866e0
LC
251 case '#':
252 switch (c = scm_getc (port))
253 {
254 case EOF:
255 eoferr = "read_sharp";
256 goto goteof;
257 case '!':
258 scm_read_scsh_block_comment (c, port);
259 break;
260 default:
261 scm_ungetc (c, port);
262 return '#';
263 }
264 break;
265
0f2d19dd 266 case SCM_LINE_INCREMENTORS:
0f2d19dd 267 case SCM_SINGLE_SPACES:
0f2d19dd 268 case '\t':
0f2d19dd 269 break;
7337d56d 270
0f2d19dd
JB
271 default:
272 return c;
273 }
7337d56d
LC
274
275 return 0;
0f2d19dd
JB
276}
277
278
7337d56d
LC
279\f
280/* Token readers. */
1cc91f1b 281
7337d56d
LC
282static SCM scm_read_expression (SCM port);
283static SCM scm_read_sharp (int chr, SCM port);
284static SCM scm_get_hash_procedure (int c);
285static SCM recsexpr (SCM obj, long line, int column, SCM filename);
0f2d19dd
JB
286
287
09a4f039 288static SCM
7337d56d
LC
289scm_read_sexp (int chr, SCM port)
290#define FUNC_NAME "scm_i_lreadparen"
09a4f039 291{
7337d56d
LC
292 register int c;
293 register SCM tmp;
294 register SCM tl, ans = SCM_EOL;
bd22f1c7 295 SCM tl2 = SCM_EOL, ans2 = SCM_EOL, copy = SCM_BOOL_F;
7337d56d
LC
296 static const int terminating_char = ')';
297
298 /* Need to capture line and column numbers here. */
299 long line = SCM_LINUM (port);
300 int column = SCM_COL (port) - 1;
f9c68a47 301
f9c68a47 302
7337d56d
LC
303 c = flush_ws (port, FUNC_NAME);
304 if (terminating_char == c)
305 return SCM_EOL;
f9c68a47 306
7337d56d
LC
307 scm_ungetc (c, port);
308 if (scm_is_eq (scm_sym_dot,
309 (tmp = scm_read_expression (port))))
310 {
311 ans = scm_read_expression (port);
312 if (terminating_char != (c = flush_ws (port, FUNC_NAME)))
313 scm_i_input_error (FUNC_NAME, port, "missing close paren",
314 SCM_EOL);
315 return ans;
316 }
1cc91f1b 317
7337d56d
LC
318 /* Build the head of the list structure. */
319 ans = tl = scm_cons (tmp, SCM_EOL);
320
321 if (SCM_COPY_SOURCE_P)
322 ans2 = tl2 = scm_cons (scm_is_pair (tmp)
323 ? copy
324 : tmp,
325 SCM_EOL);
326
327 while (terminating_char != (c = flush_ws (port, FUNC_NAME)))
0f2d19dd 328 {
7337d56d 329 SCM new_tail;
0f2d19dd 330
7337d56d
LC
331 scm_ungetc (c, port);
332 if (scm_is_eq (scm_sym_dot,
333 (tmp = scm_read_expression (port))))
0f2d19dd 334 {
7337d56d
LC
335 SCM_SETCDR (tl, tmp = scm_read_expression (port));
336
337 if (SCM_COPY_SOURCE_P)
338 SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp) ? copy : tmp,
339 SCM_EOL));
340
341 c = flush_ws (port, FUNC_NAME);
342 if (terminating_char != c)
343 scm_i_input_error (FUNC_NAME, port,
344 "in pair: missing close paren", SCM_EOL);
345 goto exit;
0f2d19dd 346 }
b858464a 347
7337d56d
LC
348 new_tail = scm_cons (tmp, SCM_EOL);
349 SCM_SETCDR (tl, new_tail);
350 tl = new_tail;
351
352 if (SCM_COPY_SOURCE_P)
0f2d19dd 353 {
7337d56d
LC
354 SCM new_tail2 = scm_cons (scm_is_pair (tmp)
355 ? copy
356 : tmp, SCM_EOL);
357 SCM_SETCDR (tl2, new_tail2);
358 tl2 = new_tail2;
359 }
360 }
0f2d19dd 361
7337d56d
LC
362 exit:
363 if (SCM_RECORD_POSITIONS_P)
364 scm_whash_insert (scm_source_whash,
365 ans,
366 scm_make_srcprops (line, column,
367 SCM_FILENAME (port),
368 SCM_COPY_SOURCE_P
369 ? ans2
370 : SCM_UNDEFINED,
371 SCM_EOL));
372 return ans;
373}
374#undef FUNC_NAME
a4022e69 375
7337d56d
LC
376static SCM
377scm_read_string (int chr, SCM port)
378#define FUNC_NAME "scm_lreadr"
379{
380 /* For strings smaller than C_STR, this function creates only one Scheme
381 object (the string returned). */
0f2d19dd 382
7337d56d
LC
383 SCM str = SCM_BOOL_F;
384 char c_str[READER_STRING_BUFFER_SIZE];
385 unsigned c_str_len = 0;
386 int c;
eb42ff25 387
7337d56d
LC
388 while ('"' != (c = scm_getc (port)))
389 {
390 if (c == EOF)
391 str_eof: scm_i_input_error (FUNC_NAME, port,
392 "end of file in string constant",
393 SCM_EOL);
0f2d19dd 394
7337d56d
LC
395 if (c_str_len + 1 >= sizeof (c_str))
396 {
397 /* Flush the C buffer onto a Scheme string. */
398 SCM addy;
deca31e1 399
7337d56d
LC
400 if (str == SCM_BOOL_F)
401 str = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
0f2d19dd 402
7337d56d
LC
403 addy = scm_from_locale_stringn (c_str, c_str_len);
404 str = scm_string_append_shared (scm_list_2 (str, addy));
0f2d19dd 405
7337d56d
LC
406 c_str_len = 0;
407 }
0f2d19dd 408
7337d56d
LC
409 if (c == '\\')
410 switch (c = scm_getc (port))
411 {
412 case EOF:
413 goto str_eof;
414 case '"':
415 case '\\':
416 break;
16353acc 417#if SCM_ENABLE_ELISP
7337d56d
LC
418 case '(':
419 case ')':
420 if (SCM_ESCAPED_PARENS_P)
421 break;
422 goto bad_escaped;
16353acc 423#endif
7337d56d
LC
424 case '\n':
425 continue;
426 case '0':
427 c = '\0';
428 break;
429 case 'f':
430 c = '\f';
431 break;
432 case 'n':
433 c = '\n';
434 break;
435 case 'r':
436 c = '\r';
437 break;
438 case 't':
439 c = '\t';
440 break;
441 case 'a':
442 c = '\007';
443 break;
444 case 'v':
445 c = '\v';
446 break;
447 case 'x':
3c9a524f 448 {
7337d56d
LC
449 int a, b;
450 a = scm_getc (port);
451 if (a == EOF) goto str_eof;
452 b = scm_getc (port);
453 if (b == EOF) goto str_eof;
454 if ('0' <= a && a <= '9') a -= '0';
455 else if ('A' <= a && a <= 'F') a = a - 'A' + 10;
456 else if ('a' <= a && a <= 'f') a = a - 'a' + 10;
457 else goto bad_escaped;
458 if ('0' <= b && b <= '9') b -= '0';
459 else if ('A' <= b && b <= 'F') b = b - 'A' + 10;
460 else if ('a' <= b && b <= 'f') b = b - 'a' + 10;
461 else goto bad_escaped;
462 c = a * 16 + b;
463 break;
3c9a524f 464 }
7337d56d
LC
465 default:
466 bad_escaped:
467 scm_i_input_error (FUNC_NAME, port,
468 "illegal character in escape sequence: ~S",
469 scm_list_1 (SCM_MAKE_CHAR (c)));
470 }
471 c_str[c_str_len++] = c;
472 }
f13b4400 473
7337d56d
LC
474 if (c_str_len > 0)
475 {
476 SCM addy;
0f2d19dd 477
7337d56d
LC
478 addy = scm_from_locale_stringn (c_str, c_str_len);
479 if (str == SCM_BOOL_F)
480 str = addy;
481 else
482 str = scm_string_append_shared (scm_list_2 (str, addy));
0f2d19dd 483 }
7337d56d
LC
484 else
485 str = (str == SCM_BOOL_F) ? scm_nullstr : str;
486
fb2f8886 487 return scm_i_make_read_only_string (str);
0f2d19dd 488}
db4b4ca6
DH
489#undef FUNC_NAME
490
0f2d19dd 491
7337d56d
LC
492static SCM
493scm_read_number (int chr, SCM port)
0f2d19dd 494{
7337d56d
LC
495 SCM result, str = SCM_EOL;
496 char buffer[READER_BUFFER_SIZE];
497 size_t read;
498 int overflow = 0;
0f2d19dd 499
7337d56d
LC
500 scm_ungetc (chr, port);
501 do
0f2d19dd 502 {
7337d56d
LC
503 overflow = read_token (port, buffer, sizeof (buffer), &read);
504
505 if ((overflow) || (scm_is_pair (str)))
506 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
0f2d19dd 507 }
7337d56d 508 while (overflow);
0f2d19dd 509
7337d56d 510 if (scm_is_pair (str))
0f2d19dd 511 {
7337d56d 512 /* The slow path. */
0f2d19dd 513
7337d56d
LC
514 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
515 result = scm_string_to_number (str, SCM_UNDEFINED);
516 if (!scm_is_true (result))
517 /* Return a symbol instead of a number. */
518 result = scm_string_to_symbol (str);
519 }
520 else
521 {
522 result = scm_c_locale_stringn_to_number (buffer, read, 10);
523 if (!scm_is_true (result))
524 /* Return a symbol instead of a number. */
525 result = scm_from_locale_symboln (buffer, read);
526 }
0f2d19dd 527
7337d56d
LC
528 return result;
529}
0f2d19dd 530
7337d56d
LC
531static SCM
532scm_read_mixed_case_symbol (int chr, SCM port)
533{
534 SCM result, str = SCM_EOL;
ef4cbc08 535 int overflow = 0, ends_with_colon = 0;
7337d56d
LC
536 char buffer[READER_BUFFER_SIZE];
537 size_t read = 0;
ef4cbc08 538 int postfix = scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_postfix);
7337d56d
LC
539
540 scm_ungetc (chr, port);
541 do
542 {
543 overflow = read_token (port, buffer, sizeof (buffer), &read);
544
ef4cbc08
LC
545 if (read > 0)
546 ends_with_colon = (buffer[read - 1] == ':');
547
7337d56d
LC
548 if ((overflow) || (scm_is_pair (str)))
549 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
550 }
551 while (overflow);
552
553 if (scm_is_pair (str))
554 {
555 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
556 result = scm_string_to_symbol (str);
ef4cbc08
LC
557
558 /* Per SRFI-88, `:' alone is an identifier, not a keyword. */
559 if (postfix && ends_with_colon && (scm_c_string_length (result) > 1))
560 result = scm_symbol_to_keyword (result);
7337d56d
LC
561 }
562 else
ef4cbc08
LC
563 {
564 /* For symbols smaller than `sizeof (buffer)', we don't need to recur
565 to Scheme strings. Therefore, we only create one Scheme object (a
566 symbol) per symbol read. */
567 if (postfix && ends_with_colon && (read > 1))
568 result = scm_from_locale_keywordn (buffer, read - 1);
569 else
570 result = scm_from_locale_symboln (buffer, read);
571 }
7337d56d
LC
572
573 return result;
574}
575
576static SCM
577scm_read_number_and_radix (int chr, SCM port)
578#define FUNC_NAME "scm_lreadr"
579{
580 SCM result, str = SCM_EOL;
581 size_t read;
582 char buffer[READER_BUFFER_SIZE];
583 unsigned int radix;
584 int overflow = 0;
585
586 switch (chr)
587 {
588 case 'B':
589 case 'b':
590 radix = 2;
591 break;
592
593 case 'o':
594 case 'O':
595 radix = 8;
596 break;
597
598 case 'd':
599 case 'D':
600 radix = 10;
601 break;
602
603 case 'x':
604 case 'X':
605 radix = 16;
606 break;
607
608 default:
609 scm_ungetc (chr, port);
610 scm_ungetc ('#', port);
611 radix = 10;
612 }
613
614 do
615 {
616 overflow = read_token (port, buffer, sizeof (buffer), &read);
617
618 if ((overflow) || (scm_is_pair (str)))
619 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
620 }
621 while (overflow);
622
623 if (scm_is_pair (str))
624 {
625 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
626 result = scm_string_to_number (str, scm_from_uint (radix));
627 }
628 else
629 result = scm_c_locale_stringn_to_number (buffer, read, radix);
630
631 if (scm_is_true (result))
632 return result;
633
634 scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
635
636 return SCM_BOOL_F;
637}
638#undef FUNC_NAME
639
640static SCM
641scm_read_quote (int chr, SCM port)
642{
643 SCM p;
492faee1
LC
644 long line = SCM_LINUM (port);
645 int column = SCM_COL (port) - 1;
7337d56d
LC
646
647 switch (chr)
648 {
649 case '`':
650 p = scm_sym_quasiquote;
651 break;
652
653 case '\'':
654 p = scm_sym_quote;
655 break;
656
657 case ',':
658 {
659 int c;
660
661 c = scm_getc (port);
662 if ('@' == c)
663 p = scm_sym_uq_splicing;
664 else
0f2d19dd 665 {
7337d56d
LC
666 scm_ungetc (c, port);
667 p = scm_sym_unquote;
0f2d19dd 668 }
7337d56d
LC
669 break;
670 }
0f2d19dd 671
7337d56d
LC
672 default:
673 fprintf (stderr, "%s: unhandled quote character (%i)\n",
7f74cf9a 674 "scm_read_quote", chr);
7337d56d 675 abort ();
0f2d19dd 676 }
1cc91f1b 677
7337d56d 678 p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
492faee1
LC
679 if (SCM_RECORD_POSITIONS_P)
680 scm_whash_insert (scm_source_whash, p,
681 scm_make_srcprops (line, column,
682 SCM_FILENAME (port),
683 SCM_COPY_SOURCE_P
684 ? (scm_cons2 (SCM_CAR (p),
685 SCM_CAR (SCM_CDR (p)),
686 SCM_EOL))
687 : SCM_UNDEFINED,
688 SCM_EOL));
689
0f2d19dd 690
7337d56d
LC
691 return p;
692}
693
694static inline SCM
695scm_read_semicolon_comment (int chr, SCM port)
0f2d19dd 696{
0f2d19dd
JB
697 int c;
698
7337d56d
LC
699 for (c = scm_getc (port);
700 (c != EOF) && (c != '\n');
701 c = scm_getc (port));
702
703 return SCM_UNSPECIFIED;
704}
705
706\f
707/* Sharp readers, i.e. readers called after a `#' sign has been read. */
708
709static SCM
710scm_read_boolean (int chr, SCM port)
711{
712 switch (chr)
0f2d19dd 713 {
7337d56d
LC
714 case 't':
715 case 'T':
716 return SCM_BOOL_T;
717
718 case 'f':
719 case 'F':
720 return SCM_BOOL_F;
0f2d19dd 721 }
7337d56d
LC
722
723 return SCM_UNSPECIFIED;
724}
725
726static SCM
727scm_read_character (int chr, SCM port)
728#define FUNC_NAME "scm_lreadr"
729{
730 unsigned c;
731 char charname[READER_CHAR_NAME_MAX_SIZE];
732 size_t charname_len;
733
734 if (read_token (port, charname, sizeof (charname), &charname_len))
735 goto char_error;
736
737 if (charname_len == 0)
0f2d19dd 738 {
7337d56d
LC
739 chr = scm_getc (port);
740 if (chr == EOF)
741 scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
742 "while reading character", SCM_EOL);
743
744 /* CHR must be a token delimiter, like a whitespace. */
745 return (SCM_MAKE_CHAR (chr));
0f2d19dd 746 }
7337d56d
LC
747
748 if (charname_len == 1)
749 return SCM_MAKE_CHAR (charname[0]);
750
751 if (*charname >= '0' && *charname < '8')
752 {
753 /* Dirk:FIXME:: This type of character syntax is not R5RS
754 * compliant. Further, it should be verified that the constant
755 * does only consist of octal digits. Finally, it should be
756 * checked whether the resulting fixnum is in the range of
757 * characters. */
758 SCM p = scm_c_locale_stringn_to_number (charname, charname_len, 8);
759 if (SCM_I_INUMP (p))
760 return SCM_MAKE_CHAR (SCM_I_INUM (p));
761 }
762
763 for (c = 0; c < scm_n_charnames; c++)
764 if (scm_charnames[c]
765 && (!strncasecmp (scm_charnames[c], charname, charname_len)))
766 return SCM_MAKE_CHAR (scm_charnums[c]);
767
768 char_error:
769 scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
770 scm_list_1 (scm_from_locale_stringn (charname,
771 charname_len)));
772
773 return SCM_UNSPECIFIED;
0f2d19dd 774}
db4b4ca6 775#undef FUNC_NAME
0f2d19dd 776
7337d56d
LC
777static inline SCM
778scm_read_keyword (int chr, SCM port)
779{
780 SCM symbol;
781
782 /* Read the symbol that comprises the keyword. Doing this instead of
783 invoking a specific symbol reader function allows `scm_read_keyword ()'
784 to adapt to the delimiters currently valid of symbols.
1cc91f1b 785
7337d56d
LC
786 XXX: This implementation allows sloppy syntaxes like `#: key'. */
787 symbol = scm_read_expression (port);
788 if (!scm_is_symbol (symbol))
7f74cf9a 789 scm_i_input_error ("scm_read_keyword", port,
7337d56d
LC
790 "keyword prefix `~a' not followed by a symbol: ~s",
791 scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
792
793 return (scm_symbol_to_keyword (symbol));
794}
795
796static inline SCM
797scm_read_vector (int chr, SCM port)
09a4f039 798{
7337d56d
LC
799 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
800 guarantee that it's going to do what we want. After all, this is an
801 implementation detail of `scm_read_vector ()', not a desirable
802 property. */
803 return (scm_vector (scm_read_sexp (chr, port)));
804}
09a4f039 805
7337d56d
LC
806static inline SCM
807scm_read_srfi4_vector (int chr, SCM port)
808{
809 return scm_i_read_array (port, chr);
810}
811
812static SCM
813scm_read_guile_bit_vector (int chr, SCM port)
814{
815 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
816 terribly inefficient but who cares? */
817 SCM s_bits = SCM_EOL;
818
819 for (chr = scm_getc (port);
820 (chr != EOF) && ((chr == '0') || (chr == '1'));
821 chr = scm_getc (port))
09a4f039 822 {
7337d56d 823 s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
09a4f039 824 }
7337d56d
LC
825
826 if (chr != EOF)
827 scm_ungetc (chr, port);
828
829 return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
830}
831
832static inline SCM
833scm_read_scsh_block_comment (int chr, SCM port)
834{
835 int bang_seen = 0;
836
837 for (;;)
09a4f039 838 {
7337d56d 839 int c = scm_getc (port);
62850ef3 840
7337d56d
LC
841 if (c == EOF)
842 scm_i_input_error ("skip_block_comment", port,
843 "unterminated `#! ... !#' comment", SCM_EOL);
844
845 if (c == '!')
846 bang_seen = 1;
847 else if (c == '#' && bang_seen)
848 break;
849 else
850 bang_seen = 0;
851 }
852
853 return SCM_UNSPECIFIED;
854}
855
856static SCM
857scm_read_extended_symbol (int chr, SCM port)
858{
859 /* Guile's extended symbol read syntax looks like this:
860
861 #{This is all a symbol name}#
862
863 So here, CHR is expected to be `{'. */
864 SCM result;
865 int saw_brace = 0, finished = 0;
866 size_t len = 0;
867 char buf[1024];
868
869 result = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
870
871 while ((chr = scm_getc (port)) != EOF)
872 {
873 if (saw_brace)
09a4f039 874 {
7337d56d
LC
875 if (chr == '#')
876 {
877 finished = 1;
878 break;
879 }
880 else
881 {
882 saw_brace = 0;
883 buf[len++] = '}';
884 buf[len++] = chr;
885 }
09a4f039 886 }
7337d56d
LC
887 else if (chr == '}')
888 saw_brace = 1;
889 else
890 buf[len++] = chr;
62850ef3 891
7337d56d
LC
892 if (len >= sizeof (buf) - 2)
893 {
894 scm_string_append (scm_list_2 (result,
895 scm_from_locale_stringn (buf, len)));
896 len = 0;
897 }
62850ef3 898
7337d56d
LC
899 if (finished)
900 break;
901 }
902
903 if (len)
904 result = scm_string_append (scm_list_2
905 (result,
906 scm_from_locale_stringn (buf, len)));
907
908 return (scm_string_to_symbol (result));
909}
910
911
912\f
913/* Top-level token readers, i.e., dispatchers. */
914
915static SCM
916scm_read_sharp_extension (int chr, SCM port)
917{
918 SCM proc;
919
920 proc = scm_get_hash_procedure (chr);
921 if (scm_is_true (scm_procedure_p (proc)))
922 {
923 long line = SCM_LINUM (port);
924 int column = SCM_COL (port) - 2;
925 SCM got;
926
927 got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
928 if (!scm_is_eq (got, SCM_UNSPECIFIED))
62850ef3 929 {
7337d56d
LC
930 if (SCM_RECORD_POSITIONS_P)
931 return (recsexpr (got, line, column,
932 SCM_FILENAME (port)));
933 else
934 return got;
62850ef3 935 }
09a4f039 936 }
7337d56d
LC
937
938 return SCM_UNSPECIFIED;
939}
940
941/* The reader for the sharp `#' character. It basically dispatches reads
942 among the above token readers. */
943static SCM
944scm_read_sharp (int chr, SCM port)
945#define FUNC_NAME "scm_lreadr"
946{
947 SCM result;
948
949 chr = scm_getc (port);
950
951 result = scm_read_sharp_extension (chr, port);
952 if (!scm_is_eq (result, SCM_UNSPECIFIED))
953 return result;
954
955 switch (chr)
956 {
957 case '\\':
958 return (scm_read_character (chr, port));
959 case '(':
960 return (scm_read_vector (chr, port));
961 case 's':
962 case 'u':
963 case 'f':
964 /* This one may return either a boolean or an SRFI-4 vector. */
965 return (scm_read_srfi4_vector (chr, port));
966 case '*':
967 return (scm_read_guile_bit_vector (chr, port));
968 case 't':
969 case 'T':
970 case 'F':
971 /* This one may return either a boolean or an SRFI-4 vector. */
972 return (scm_read_boolean (chr, port));
973 case ':':
974 return (scm_read_keyword (chr, port));
975 case '0': case '1': case '2': case '3': case '4':
976 case '5': case '6': case '7': case '8': case '9':
977 case '@':
978#if SCM_ENABLE_DEPRECATED
979 /* See below for 'i' and 'e'. */
980 case 'a':
981 case 'c':
982 case 'y':
983 case 'h':
984 case 'l':
985#endif
986 return (scm_i_read_array (port, chr));
987
988 case 'i':
989 case 'e':
990#if SCM_ENABLE_DEPRECATED
991 {
992 /* When next char is '(', it really is an old-style
993 uniform array. */
994 int next_c = scm_getc (port);
995 if (next_c != EOF)
996 scm_ungetc (next_c, port);
997 if (next_c == '(')
998 return scm_i_read_array (port, chr);
999 /* Fall through. */
1000 }
1001#endif
1002 case 'b':
1003 case 'B':
1004 case 'o':
1005 case 'O':
1006 case 'd':
1007 case 'D':
1008 case 'x':
1009 case 'X':
1010 case 'I':
1011 case 'E':
1012 return (scm_read_number_and_radix (chr, port));
1013 case '{':
1014 return (scm_read_extended_symbol (chr, port));
1015 case '!':
1016 return (scm_read_scsh_block_comment (chr, port));
1017 default:
1018 result = scm_read_sharp_extension (chr, port);
1019 if (scm_is_eq (result, SCM_UNSPECIFIED))
1020 scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
1021 scm_list_1 (SCM_MAKE_CHAR (chr)));
1022 else
1023 return result;
1024 }
1025
1026 return SCM_UNSPECIFIED;
1027}
1028#undef FUNC_NAME
1029
1030static SCM
1031scm_read_expression (SCM port)
1032#define FUNC_NAME "scm_read_expression"
1033{
1034 while (1)
1035 {
1036 register int chr;
1037
1038 chr = scm_getc (port);
1039
1040 switch (chr)
1041 {
1042 case SCM_WHITE_SPACES:
1043 case SCM_LINE_INCREMENTORS:
1044 break;
1045 case ';':
1046 (void) scm_read_semicolon_comment (chr, port);
1047 break;
1048 case '(':
1049 return (scm_read_sexp (chr, port));
1050 case '"':
1051 return (scm_read_string (chr, port));
1052 case '\'':
1053 case '`':
1054 case ',':
1055 return (scm_read_quote (chr, port));
1056 case '#':
1057 {
1058 SCM result;
1059 result = scm_read_sharp (chr, port);
1060 if (scm_is_eq (result, SCM_UNSPECIFIED))
1061 /* We read a comment or some such. */
1062 break;
1063 else
1064 return result;
1065 }
1066 case ')':
1067 scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
1068 break;
1069 case EOF:
1070 return SCM_EOF_VAL;
1071 case ':':
1072 if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
1073 return scm_symbol_to_keyword (scm_read_expression (port));
1074 /* Fall through. */
1075
1076 default:
1077 {
1078 if (((chr >= '0') && (chr <= '9'))
1079 || (strchr ("+-.", chr)))
1080 return (scm_read_number (chr, port));
1081 else
1082 return (scm_read_mixed_case_symbol (chr, port));
1083 }
1084 }
1085 }
1086}
1087#undef FUNC_NAME
1088
1089\f
1090/* Actual reader. */
1091
1092SCM_DEFINE (scm_read, "read", 0, 1, 0,
1093 (SCM port),
1094 "Read an s-expression from the input port @var{port}, or from\n"
1095 "the current input port if @var{port} is not specified.\n"
1096 "Any whitespace before the next token is discarded.")
1097#define FUNC_NAME s_scm_read
1098{
1099 int c;
1100
1101 if (SCM_UNBNDP (port))
1102 port = scm_current_input_port ();
1103 SCM_VALIDATE_OPINPORT (1, port);
1104
1105 c = flush_ws (port, (char *) NULL);
1106 if (EOF == c)
1107 return SCM_EOF_VAL;
1108 scm_ungetc (c, port);
1109
1110 return (scm_read_expression (port));
09a4f039 1111}
db4b4ca6 1112#undef FUNC_NAME
09a4f039 1113
0f2d19dd
JB
1114
1115\f
1116
7337d56d
LC
1117/* Used when recording expressions constructed by `scm_read_sharp ()'. */
1118static SCM
1119recsexpr (SCM obj, long line, int column, SCM filename)
1120{
1121 if (!scm_is_pair(obj)) {
1122 return obj;
1123 } else {
1124 SCM tmp = obj, copy;
1125 /* If this sexpr is visible in the read:sharp source, we want to
1126 keep that information, so only record non-constant cons cells
1127 which haven't previously been read by the reader. */
1128 if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
1129 {
1130 if (SCM_COPY_SOURCE_P)
1131 {
1132 copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
1133 SCM_UNDEFINED);
1134 while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
1135 {
1136 SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
1137 line,
1138 column,
1139 filename),
1140 SCM_UNDEFINED));
1141 copy = SCM_CDR (copy);
1142 }
1143 SCM_SETCDR (copy, tmp);
1144 }
1145 else
1146 {
1147 recsexpr (SCM_CAR (obj), line, column, filename);
1148 while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
1149 recsexpr (SCM_CAR (tmp), line, column, filename);
1150 copy = SCM_UNDEFINED;
1151 }
1152 scm_whash_insert (scm_source_whash,
1153 obj,
1154 scm_make_srcprops (line,
1155 column,
1156 filename,
1157 copy,
1158 SCM_EOL));
1159 }
1160 return obj;
1161 }
1162}
1163
14de3b42
GH
1164/* Manipulate the read-hash-procedures alist. This could be written in
1165 Scheme, but maybe it will also be used by C code during initialisation. */
a1ec6916 1166SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
1bbd0b84 1167 (SCM chr, SCM proc),
dc7fa443
MG
1168 "Install the procedure @var{proc} for reading expressions\n"
1169 "starting with the character sequence @code{#} and @var{chr}.\n"
1170 "@var{proc} will be called with two arguments: the character\n"
1171 "@var{chr} and the port to read further data from. The object\n"
391f57e6
HWN
1172 "returned will be the return value of @code{read}. \n"
1173 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1174 )
1bbd0b84 1175#define FUNC_NAME s_scm_read_hash_extend
deca31e1 1176{
fed9c9a2
GH
1177 SCM this;
1178 SCM prev;
1179
36284627 1180 SCM_VALIDATE_CHAR (1, chr);
7888309b 1181 SCM_ASSERT (scm_is_false (proc)
bc36d050 1182 || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
36284627 1183 proc, SCM_ARG2, FUNC_NAME);
fed9c9a2 1184
14de3b42
GH
1185 /* Check if chr is already in the alist. */
1186 this = *scm_read_hash_procedures;
1187 prev = SCM_BOOL_F;
fed9c9a2
GH
1188 while (1)
1189 {
d2e53ed6 1190 if (scm_is_null (this))
fed9c9a2
GH
1191 {
1192 /* not found, so add it to the beginning. */
7888309b 1193 if (scm_is_true (proc))
fed9c9a2 1194 {
14de3b42
GH
1195 *scm_read_hash_procedures =
1196 scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
fed9c9a2
GH
1197 }
1198 break;
1199 }
bc36d050 1200 if (scm_is_eq (chr, SCM_CAAR (this)))
fed9c9a2
GH
1201 {
1202 /* already in the alist. */
7888309b 1203 if (scm_is_false (proc))
14de3b42
GH
1204 {
1205 /* remove it. */
7888309b 1206 if (scm_is_false (prev))
14de3b42
GH
1207 {
1208 *scm_read_hash_procedures =
1209 SCM_CDR (*scm_read_hash_procedures);
1210 }
1211 else
1212 scm_set_cdr_x (prev, SCM_CDR (this));
1213 }
fed9c9a2 1214 else
14de3b42
GH
1215 {
1216 /* replace it. */
1217 scm_set_cdr_x (SCM_CAR (this), proc);
1218 }
fed9c9a2
GH
1219 break;
1220 }
1221 prev = this;
1222 this = SCM_CDR (this);
1223 }
deca31e1 1224
deca31e1
GH
1225 return SCM_UNSPECIFIED;
1226}
1bbd0b84 1227#undef FUNC_NAME
0f2d19dd 1228
deca31e1
GH
1229/* Recover the read-hash procedure corresponding to char c. */
1230static SCM
6e8d25a6 1231scm_get_hash_procedure (int c)
deca31e1 1232{
14de3b42 1233 SCM rest = *scm_read_hash_procedures;
fed9c9a2 1234
deca31e1
GH
1235 while (1)
1236 {
d2e53ed6 1237 if (scm_is_null (rest))
deca31e1
GH
1238 return SCM_BOOL_F;
1239
7866a09b 1240 if (SCM_CHAR (SCM_CAAR (rest)) == c)
deca31e1
GH
1241 return SCM_CDAR (rest);
1242
1243 rest = SCM_CDR (rest);
1244 }
1245}
1cc91f1b 1246
0f2d19dd
JB
1247void
1248scm_init_read ()
0f2d19dd 1249{
14de3b42 1250 scm_read_hash_procedures =
86d31dfe 1251 SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));
fed9c9a2 1252
62560650 1253 scm_init_opts (scm_read_options, scm_read_opts);
a0599745 1254#include "libguile/read.x"
0f2d19dd 1255}
89e00824
ML
1256
1257/*
1258 Local Variables:
1259 c-file-style: "gnu"
1260 End:
1261*/