Changes from arch/CVS synchronization
[bpt/guile.git] / libguile / read.c
CommitLineData
b3aa4626 1/* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007 Free Software
dd72382c 2 * Foundation, Inc.
0f2d19dd 3 *
73be1d9e
MV
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
0f2d19dd 8 *
73be1d9e
MV
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
0f2d19dd 13 *
73be1d9e
MV
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
92205699 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd
JB
20\f
21
7337d56d
LC
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif
25
0f2d19dd 26#include <stdio.h>
7337d56d
LC
27#include <ctype.h>
28#include <string.h>
7337d56d 29
a0599745
MD
30#include "libguile/_scm.h"
31#include "libguile/chars.h"
32#include "libguile/eval.h"
33#include "libguile/unif.h"
34#include "libguile/keywords.h"
35#include "libguile/alist.h"
36#include "libguile/srcprop.h"
37#include "libguile/hashtab.h"
38#include "libguile/hash.h"
39#include "libguile/ports.h"
40#include "libguile/root.h"
41#include "libguile/strings.h"
ba1b2226 42#include "libguile/strports.h"
a0599745 43#include "libguile/vectors.h"
a0599745 44#include "libguile/validate.h"
a4022e69 45#include "libguile/srfi-4.h"
7337d56d 46#include "libguile/srfi-13.h"
ba1b2226 47
a0599745 48#include "libguile/read.h"
22fc179a
HWN
49#include "libguile/private-options.h"
50
0f2d19dd
JB
51
52\f
53
5bf6a6f0 54SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
c7733771
GH
55SCM_SYMBOL (scm_keyword_prefix, "prefix");
56
92c2555f 57scm_t_option scm_read_opts[] = {
b7ff98dd
MD
58 { SCM_OPTION_BOOLEAN, "copy", 0,
59 "Copy source code expressions." },
ac74fc22 60 { SCM_OPTION_BOOLEAN, "positions", 0,
deca31e1
GH
61 "Record positions of source code expressions." },
62 { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
c7733771 63 "Convert symbols to lower case."},
f1267706 64 { SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
62560650 65 "Style of keyword recognition: #f or 'prefix."},
16353acc 66#if SCM_ENABLE_ELISP
16353acc
NJ
67 { SCM_OPTION_BOOLEAN, "elisp-vectors", 0,
68 "Support Elisp vector syntax, namely `[...]'."},
cd21f5eb 69 { SCM_OPTION_BOOLEAN, "elisp-strings", 0,
62560650 70 "Support `\\(' and `\\)' in strings."},
16353acc 71#endif
62560650 72 { 0, },
a16f6fe7
MD
73};
74
39e8f371
HWN
75/*
76 Give meaningful error messages for errors
77
78 We use the format
79
ba1b2226 80 FILE:LINE:COL: MESSAGE
39e8f371
HWN
81 This happened in ....
82
83 This is not standard GNU format, but the test-suite likes the real
84 message to be in front.
85
39e8f371
HWN
86 */
87
88
a4022e69
MV
89void
90scm_i_input_error (char const *function,
91 SCM port, const char *message, SCM arg)
ba1b2226 92{
29a837fd
MV
93 SCM fn = (scm_is_string (SCM_FILENAME(port))
94 ? SCM_FILENAME(port)
95 : scm_from_locale_string ("#<unknown port>"));
ba1b2226 96
29a837fd 97 SCM string_port = scm_open_output_string ();
ba1b2226
HWN
98 SCM string = SCM_EOL;
99 scm_simple_format (string_port,
272632a6 100 scm_from_locale_string ("~A:~S:~S: ~A"),
29a837fd 101 scm_list_4 (fn,
b3aa4626 102 scm_from_long (SCM_LINUM (port) + 1),
b9bd8526 103 scm_from_int (SCM_COL (port) + 1),
272632a6 104 scm_from_locale_string (message)));
ba1b2226
HWN
105
106 string = scm_get_output_string (string_port);
107 scm_close_output_port (string_port);
272632a6 108 scm_error_scm (scm_from_locale_symbol ("read-error"),
a4022e69 109 function? scm_from_locale_string (function) : SCM_BOOL_F,
ba1b2226 110 string,
dd72382c 111 arg,
ba1b2226
HWN
112 SCM_BOOL_F);
113}
39e8f371
HWN
114
115
a1ec6916 116SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
1bbd0b84 117 (SCM setting),
dc7fa443
MG
118 "Option interface for the read options. Instead of using\n"
119 "this procedure directly, use the procedures @code{read-enable},\n"
3939e9df 120 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
1bbd0b84 121#define FUNC_NAME s_scm_read_options
a16f6fe7 122{
b7ff98dd
MD
123 SCM ans = scm_options (setting,
124 scm_read_opts,
1bbd0b84 125 FUNC_NAME);
b7ff98dd
MD
126 if (SCM_COPY_SOURCE_P)
127 SCM_RECORD_POSITIONS_P = 1;
a16f6fe7
MD
128 return ans;
129}
1bbd0b84 130#undef FUNC_NAME
a16f6fe7 131
14de3b42
GH
132/* An association list mapping extra hash characters to procedures. */
133static SCM *scm_read_hash_procedures;
deca31e1 134
0f2d19dd 135
7337d56d
LC
136\f
137/* Token readers. */
0f2d19dd 138
0f2d19dd 139
7337d56d
LC
140/* Size of the C buffer used to read symbols and numbers. */
141#define READER_BUFFER_SIZE 128
0f2d19dd 142
7337d56d
LC
143/* Size of the C buffer used to read strings. */
144#define READER_STRING_BUFFER_SIZE 512
0f2d19dd 145
7337d56d
LC
146/* The maximum size of Scheme character names. */
147#define READER_CHAR_NAME_MAX_SIZE 50
1cc91f1b 148
94115ae3 149
7337d56d
LC
150/* `isblank' is only in C99. */
151#define CHAR_IS_BLANK_(_chr) \
152 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
153 || ((_chr) == '\f'))
154
155#ifdef MSDOS
156# define CHAR_IS_BLANK(_chr) \
157 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
158#else
159# define CHAR_IS_BLANK CHAR_IS_BLANK_
160#endif
161
162
163/* R5RS one-character delimiters (see section 7.1.1, ``Lexical
164 structure''). */
165#define CHAR_IS_R5RS_DELIMITER(c) \
166 (CHAR_IS_BLANK (c) \
167 || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
168
169#define CHAR_IS_DELIMITER CHAR_IS_R5RS_DELIMITER
170
171/* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
172 Structure''. */
173#define CHAR_IS_EXPONENT_MARKER(_chr) \
174 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
175 || ((_chr) == 'd') || ((_chr) == 'l'))
176
177/* An inlinable version of `scm_c_downcase ()'. */
178#define CHAR_DOWNCASE(_chr) \
179 (((_chr) <= UCHAR_MAX) ? tolower (_chr) : (_chr))
180
181
7337d56d
LC
182/* Helper function similar to `scm_read_token ()'. Read from PORT until a
183 whitespace is read. Return zero if the whole token could fit in BUF,
184 non-zero otherwise. */
185static inline int
186read_token (SCM port, char *buf, size_t buf_size, size_t *read)
0520c320 187{
7337d56d 188 *read = 0;
0520c320 189
7337d56d 190 while (*read < buf_size)
0520c320 191 {
7337d56d 192 int chr;
0520c320 193
7337d56d
LC
194 chr = scm_getc (port);
195 chr = (SCM_CASE_INSENSITIVE_P ? CHAR_DOWNCASE (chr) : chr);
196
197 if (chr == EOF)
198 return 0;
199 else if (CHAR_IS_DELIMITER (chr))
200 {
201 scm_ungetc (chr, port);
202 return 0;
203 }
0520c320 204 else
7337d56d
LC
205 {
206 *buf = (char) chr;
207 buf++, (*read)++;
208 }
0520c320 209 }
7337d56d
LC
210
211 return 1;
0520c320 212}
1cc91f1b 213
7337d56d
LC
214
215/* Skip whitespace from PORT and return the first non-whitespace character
216 read. Raise an error on end-of-file. */
217static int
218flush_ws (SCM port, const char *eoferr)
0f2d19dd
JB
219{
220 register int c;
221 while (1)
b7f3516f 222 switch (c = scm_getc (port))
0f2d19dd
JB
223 {
224 case EOF:
225 goteof:
226 if (eoferr)
d156d3b7 227 {
a4022e69
MV
228 scm_i_input_error (eoferr,
229 port,
230 "end of file",
231 SCM_EOL);
d156d3b7 232 }
0f2d19dd 233 return c;
7337d56d 234
0f2d19dd
JB
235 case ';':
236 lp:
b7f3516f 237 switch (c = scm_getc (port))
0f2d19dd
JB
238 {
239 case EOF:
240 goto goteof;
241 default:
242 goto lp;
243 case SCM_LINE_INCREMENTORS:
244 break;
245 }
246 break;
7337d56d 247
0f2d19dd 248 case SCM_LINE_INCREMENTORS:
0f2d19dd 249 case SCM_SINGLE_SPACES:
0f2d19dd 250 case '\t':
0f2d19dd 251 break;
7337d56d 252
0f2d19dd
JB
253 default:
254 return c;
255 }
7337d56d
LC
256
257 return 0;
0f2d19dd
JB
258}
259
260
7337d56d
LC
261\f
262/* Token readers. */
1cc91f1b 263
7337d56d
LC
264static SCM scm_read_expression (SCM port);
265static SCM scm_read_sharp (int chr, SCM port);
266static SCM scm_get_hash_procedure (int c);
267static SCM recsexpr (SCM obj, long line, int column, SCM filename);
0f2d19dd
JB
268
269
09a4f039 270static SCM
7337d56d
LC
271scm_read_sexp (int chr, SCM port)
272#define FUNC_NAME "scm_i_lreadparen"
09a4f039 273{
7337d56d
LC
274 register int c;
275 register SCM tmp;
276 register SCM tl, ans = SCM_EOL;
277 SCM tl2 = SCM_EOL, ans2 = SCM_EOL, copy = SCM_BOOL_F;;
278 static const int terminating_char = ')';
279
280 /* Need to capture line and column numbers here. */
281 long line = SCM_LINUM (port);
282 int column = SCM_COL (port) - 1;
f9c68a47 283
f9c68a47 284
7337d56d
LC
285 c = flush_ws (port, FUNC_NAME);
286 if (terminating_char == c)
287 return SCM_EOL;
f9c68a47 288
7337d56d
LC
289 scm_ungetc (c, port);
290 if (scm_is_eq (scm_sym_dot,
291 (tmp = scm_read_expression (port))))
292 {
293 ans = scm_read_expression (port);
294 if (terminating_char != (c = flush_ws (port, FUNC_NAME)))
295 scm_i_input_error (FUNC_NAME, port, "missing close paren",
296 SCM_EOL);
297 return ans;
298 }
1cc91f1b 299
7337d56d
LC
300 /* Build the head of the list structure. */
301 ans = tl = scm_cons (tmp, SCM_EOL);
302
303 if (SCM_COPY_SOURCE_P)
304 ans2 = tl2 = scm_cons (scm_is_pair (tmp)
305 ? copy
306 : tmp,
307 SCM_EOL);
308
309 while (terminating_char != (c = flush_ws (port, FUNC_NAME)))
0f2d19dd 310 {
7337d56d 311 SCM new_tail;
0f2d19dd 312
7337d56d
LC
313 scm_ungetc (c, port);
314 if (scm_is_eq (scm_sym_dot,
315 (tmp = scm_read_expression (port))))
0f2d19dd 316 {
7337d56d
LC
317 SCM_SETCDR (tl, tmp = scm_read_expression (port));
318
319 if (SCM_COPY_SOURCE_P)
320 SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp) ? copy : tmp,
321 SCM_EOL));
322
323 c = flush_ws (port, FUNC_NAME);
324 if (terminating_char != c)
325 scm_i_input_error (FUNC_NAME, port,
326 "in pair: missing close paren", SCM_EOL);
327 goto exit;
0f2d19dd 328 }
b858464a 329
7337d56d
LC
330 new_tail = scm_cons (tmp, SCM_EOL);
331 SCM_SETCDR (tl, new_tail);
332 tl = new_tail;
333
334 if (SCM_COPY_SOURCE_P)
0f2d19dd 335 {
7337d56d
LC
336 SCM new_tail2 = scm_cons (scm_is_pair (tmp)
337 ? copy
338 : tmp, SCM_EOL);
339 SCM_SETCDR (tl2, new_tail2);
340 tl2 = new_tail2;
341 }
342 }
0f2d19dd 343
7337d56d
LC
344 exit:
345 if (SCM_RECORD_POSITIONS_P)
346 scm_whash_insert (scm_source_whash,
347 ans,
348 scm_make_srcprops (line, column,
349 SCM_FILENAME (port),
350 SCM_COPY_SOURCE_P
351 ? ans2
352 : SCM_UNDEFINED,
353 SCM_EOL));
354 return ans;
355}
356#undef FUNC_NAME
a4022e69 357
7337d56d
LC
358static SCM
359scm_read_string (int chr, SCM port)
360#define FUNC_NAME "scm_lreadr"
361{
362 /* For strings smaller than C_STR, this function creates only one Scheme
363 object (the string returned). */
0f2d19dd 364
7337d56d
LC
365 SCM str = SCM_BOOL_F;
366 char c_str[READER_STRING_BUFFER_SIZE];
367 unsigned c_str_len = 0;
368 int c;
eb42ff25 369
7337d56d
LC
370 while ('"' != (c = scm_getc (port)))
371 {
372 if (c == EOF)
373 str_eof: scm_i_input_error (FUNC_NAME, port,
374 "end of file in string constant",
375 SCM_EOL);
0f2d19dd 376
7337d56d
LC
377 if (c_str_len + 1 >= sizeof (c_str))
378 {
379 /* Flush the C buffer onto a Scheme string. */
380 SCM addy;
deca31e1 381
7337d56d
LC
382 if (str == SCM_BOOL_F)
383 str = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
0f2d19dd 384
7337d56d
LC
385 addy = scm_from_locale_stringn (c_str, c_str_len);
386 str = scm_string_append_shared (scm_list_2 (str, addy));
0f2d19dd 387
7337d56d
LC
388 c_str_len = 0;
389 }
0f2d19dd 390
7337d56d
LC
391 if (c == '\\')
392 switch (c = scm_getc (port))
393 {
394 case EOF:
395 goto str_eof;
396 case '"':
397 case '\\':
398 break;
16353acc 399#if SCM_ENABLE_ELISP
7337d56d
LC
400 case '(':
401 case ')':
402 if (SCM_ESCAPED_PARENS_P)
403 break;
404 goto bad_escaped;
16353acc 405#endif
7337d56d
LC
406 case '\n':
407 continue;
408 case '0':
409 c = '\0';
410 break;
411 case 'f':
412 c = '\f';
413 break;
414 case 'n':
415 c = '\n';
416 break;
417 case 'r':
418 c = '\r';
419 break;
420 case 't':
421 c = '\t';
422 break;
423 case 'a':
424 c = '\007';
425 break;
426 case 'v':
427 c = '\v';
428 break;
429 case 'x':
3c9a524f 430 {
7337d56d
LC
431 int a, b;
432 a = scm_getc (port);
433 if (a == EOF) goto str_eof;
434 b = scm_getc (port);
435 if (b == EOF) goto str_eof;
436 if ('0' <= a && a <= '9') a -= '0';
437 else if ('A' <= a && a <= 'F') a = a - 'A' + 10;
438 else if ('a' <= a && a <= 'f') a = a - 'a' + 10;
439 else goto bad_escaped;
440 if ('0' <= b && b <= '9') b -= '0';
441 else if ('A' <= b && b <= 'F') b = b - 'A' + 10;
442 else if ('a' <= b && b <= 'f') b = b - 'a' + 10;
443 else goto bad_escaped;
444 c = a * 16 + b;
445 break;
3c9a524f 446 }
7337d56d
LC
447 default:
448 bad_escaped:
449 scm_i_input_error (FUNC_NAME, port,
450 "illegal character in escape sequence: ~S",
451 scm_list_1 (SCM_MAKE_CHAR (c)));
452 }
453 c_str[c_str_len++] = c;
454 }
f13b4400 455
7337d56d
LC
456 if (c_str_len > 0)
457 {
458 SCM addy;
0f2d19dd 459
7337d56d
LC
460 addy = scm_from_locale_stringn (c_str, c_str_len);
461 if (str == SCM_BOOL_F)
462 str = addy;
463 else
464 str = scm_string_append_shared (scm_list_2 (str, addy));
0f2d19dd 465 }
7337d56d
LC
466 else
467 str = (str == SCM_BOOL_F) ? scm_nullstr : str;
468
469 return str;
0f2d19dd 470}
db4b4ca6
DH
471#undef FUNC_NAME
472
0f2d19dd 473
7337d56d
LC
474static SCM
475scm_read_number (int chr, SCM port)
0f2d19dd 476{
7337d56d
LC
477 SCM result, str = SCM_EOL;
478 char buffer[READER_BUFFER_SIZE];
479 size_t read;
480 int overflow = 0;
0f2d19dd 481
7337d56d
LC
482 scm_ungetc (chr, port);
483 do
0f2d19dd 484 {
7337d56d
LC
485 overflow = read_token (port, buffer, sizeof (buffer), &read);
486
487 if ((overflow) || (scm_is_pair (str)))
488 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
0f2d19dd 489 }
7337d56d 490 while (overflow);
0f2d19dd 491
7337d56d 492 if (scm_is_pair (str))
0f2d19dd 493 {
7337d56d 494 /* The slow path. */
0f2d19dd 495
7337d56d
LC
496 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
497 result = scm_string_to_number (str, SCM_UNDEFINED);
498 if (!scm_is_true (result))
499 /* Return a symbol instead of a number. */
500 result = scm_string_to_symbol (str);
501 }
502 else
503 {
504 result = scm_c_locale_stringn_to_number (buffer, read, 10);
505 if (!scm_is_true (result))
506 /* Return a symbol instead of a number. */
507 result = scm_from_locale_symboln (buffer, read);
508 }
0f2d19dd 509
7337d56d
LC
510 return result;
511}
0f2d19dd 512
7337d56d
LC
513static SCM
514scm_read_mixed_case_symbol (int chr, SCM port)
515{
516 SCM result, str = SCM_EOL;
517 int overflow = 0;
518 char buffer[READER_BUFFER_SIZE];
519 size_t read = 0;
520
521 scm_ungetc (chr, port);
522 do
523 {
524 overflow = read_token (port, buffer, sizeof (buffer), &read);
525
526 if ((overflow) || (scm_is_pair (str)))
527 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
528 }
529 while (overflow);
530
531 if (scm_is_pair (str))
532 {
533 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
534 result = scm_string_to_symbol (str);
535 }
536 else
537 /* For symbols smaller than `sizeof (buffer)', we don't need to recur to
538 Scheme strings. Therefore, we only create one Scheme object (a
539 symbol) per symbol read. */
540 result = scm_from_locale_symboln (buffer, read);
541
542 return result;
543}
544
545static SCM
546scm_read_number_and_radix (int chr, SCM port)
547#define FUNC_NAME "scm_lreadr"
548{
549 SCM result, str = SCM_EOL;
550 size_t read;
551 char buffer[READER_BUFFER_SIZE];
552 unsigned int radix;
553 int overflow = 0;
554
555 switch (chr)
556 {
557 case 'B':
558 case 'b':
559 radix = 2;
560 break;
561
562 case 'o':
563 case 'O':
564 radix = 8;
565 break;
566
567 case 'd':
568 case 'D':
569 radix = 10;
570 break;
571
572 case 'x':
573 case 'X':
574 radix = 16;
575 break;
576
577 default:
578 scm_ungetc (chr, port);
579 scm_ungetc ('#', port);
580 radix = 10;
581 }
582
583 do
584 {
585 overflow = read_token (port, buffer, sizeof (buffer), &read);
586
587 if ((overflow) || (scm_is_pair (str)))
588 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
589 }
590 while (overflow);
591
592 if (scm_is_pair (str))
593 {
594 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
595 result = scm_string_to_number (str, scm_from_uint (radix));
596 }
597 else
598 result = scm_c_locale_stringn_to_number (buffer, read, radix);
599
600 if (scm_is_true (result))
601 return result;
602
603 scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
604
605 return SCM_BOOL_F;
606}
607#undef FUNC_NAME
608
609static SCM
610scm_read_quote (int chr, SCM port)
611{
612 SCM p;
613
614 switch (chr)
615 {
616 case '`':
617 p = scm_sym_quasiquote;
618 break;
619
620 case '\'':
621 p = scm_sym_quote;
622 break;
623
624 case ',':
625 {
626 int c;
627
628 c = scm_getc (port);
629 if ('@' == c)
630 p = scm_sym_uq_splicing;
631 else
0f2d19dd 632 {
7337d56d
LC
633 scm_ungetc (c, port);
634 p = scm_sym_unquote;
0f2d19dd 635 }
7337d56d
LC
636 break;
637 }
0f2d19dd 638
7337d56d
LC
639 default:
640 fprintf (stderr, "%s: unhandled quote character (%i)\n",
641 __FUNCTION__, chr);
642 abort ();
0f2d19dd 643 }
1cc91f1b 644
7337d56d 645 p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
0f2d19dd 646
7337d56d
LC
647 return p;
648}
649
650static inline SCM
651scm_read_semicolon_comment (int chr, SCM port)
0f2d19dd 652{
0f2d19dd
JB
653 int c;
654
7337d56d
LC
655 for (c = scm_getc (port);
656 (c != EOF) && (c != '\n');
657 c = scm_getc (port));
658
659 return SCM_UNSPECIFIED;
660}
661
662\f
663/* Sharp readers, i.e. readers called after a `#' sign has been read. */
664
665static SCM
666scm_read_boolean (int chr, SCM port)
667{
668 switch (chr)
0f2d19dd 669 {
7337d56d
LC
670 case 't':
671 case 'T':
672 return SCM_BOOL_T;
673
674 case 'f':
675 case 'F':
676 return SCM_BOOL_F;
0f2d19dd 677 }
7337d56d
LC
678
679 return SCM_UNSPECIFIED;
680}
681
682static SCM
683scm_read_character (int chr, SCM port)
684#define FUNC_NAME "scm_lreadr"
685{
686 unsigned c;
687 char charname[READER_CHAR_NAME_MAX_SIZE];
688 size_t charname_len;
689
690 if (read_token (port, charname, sizeof (charname), &charname_len))
691 goto char_error;
692
693 if (charname_len == 0)
0f2d19dd 694 {
7337d56d
LC
695 chr = scm_getc (port);
696 if (chr == EOF)
697 scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
698 "while reading character", SCM_EOL);
699
700 /* CHR must be a token delimiter, like a whitespace. */
701 return (SCM_MAKE_CHAR (chr));
0f2d19dd 702 }
7337d56d
LC
703
704 if (charname_len == 1)
705 return SCM_MAKE_CHAR (charname[0]);
706
707 if (*charname >= '0' && *charname < '8')
708 {
709 /* Dirk:FIXME:: This type of character syntax is not R5RS
710 * compliant. Further, it should be verified that the constant
711 * does only consist of octal digits. Finally, it should be
712 * checked whether the resulting fixnum is in the range of
713 * characters. */
714 SCM p = scm_c_locale_stringn_to_number (charname, charname_len, 8);
715 if (SCM_I_INUMP (p))
716 return SCM_MAKE_CHAR (SCM_I_INUM (p));
717 }
718
719 for (c = 0; c < scm_n_charnames; c++)
720 if (scm_charnames[c]
721 && (!strncasecmp (scm_charnames[c], charname, charname_len)))
722 return SCM_MAKE_CHAR (scm_charnums[c]);
723
724 char_error:
725 scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
726 scm_list_1 (scm_from_locale_stringn (charname,
727 charname_len)));
728
729 return SCM_UNSPECIFIED;
0f2d19dd 730}
db4b4ca6 731#undef FUNC_NAME
0f2d19dd 732
7337d56d
LC
733static inline SCM
734scm_read_keyword (int chr, SCM port)
735{
736 SCM symbol;
737
738 /* Read the symbol that comprises the keyword. Doing this instead of
739 invoking a specific symbol reader function allows `scm_read_keyword ()'
740 to adapt to the delimiters currently valid of symbols.
1cc91f1b 741
7337d56d
LC
742 XXX: This implementation allows sloppy syntaxes like `#: key'. */
743 symbol = scm_read_expression (port);
744 if (!scm_is_symbol (symbol))
745 scm_i_input_error (__FUNCTION__, port,
746 "keyword prefix `~a' not followed by a symbol: ~s",
747 scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
748
749 return (scm_symbol_to_keyword (symbol));
750}
751
752static inline SCM
753scm_read_vector (int chr, SCM port)
09a4f039 754{
7337d56d
LC
755 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
756 guarantee that it's going to do what we want. After all, this is an
757 implementation detail of `scm_read_vector ()', not a desirable
758 property. */
759 return (scm_vector (scm_read_sexp (chr, port)));
760}
09a4f039 761
7337d56d
LC
762static inline SCM
763scm_read_srfi4_vector (int chr, SCM port)
764{
765 return scm_i_read_array (port, chr);
766}
767
768static SCM
769scm_read_guile_bit_vector (int chr, SCM port)
770{
771 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
772 terribly inefficient but who cares? */
773 SCM s_bits = SCM_EOL;
774
775 for (chr = scm_getc (port);
776 (chr != EOF) && ((chr == '0') || (chr == '1'));
777 chr = scm_getc (port))
09a4f039 778 {
7337d56d 779 s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
09a4f039 780 }
7337d56d
LC
781
782 if (chr != EOF)
783 scm_ungetc (chr, port);
784
785 return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
786}
787
788static inline SCM
789scm_read_scsh_block_comment (int chr, SCM port)
790{
791 int bang_seen = 0;
792
793 for (;;)
09a4f039 794 {
7337d56d 795 int c = scm_getc (port);
62850ef3 796
7337d56d
LC
797 if (c == EOF)
798 scm_i_input_error ("skip_block_comment", port,
799 "unterminated `#! ... !#' comment", SCM_EOL);
800
801 if (c == '!')
802 bang_seen = 1;
803 else if (c == '#' && bang_seen)
804 break;
805 else
806 bang_seen = 0;
807 }
808
809 return SCM_UNSPECIFIED;
810}
811
812static SCM
813scm_read_extended_symbol (int chr, SCM port)
814{
815 /* Guile's extended symbol read syntax looks like this:
816
817 #{This is all a symbol name}#
818
819 So here, CHR is expected to be `{'. */
820 SCM result;
821 int saw_brace = 0, finished = 0;
822 size_t len = 0;
823 char buf[1024];
824
825 result = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
826
827 while ((chr = scm_getc (port)) != EOF)
828 {
829 if (saw_brace)
09a4f039 830 {
7337d56d
LC
831 if (chr == '#')
832 {
833 finished = 1;
834 break;
835 }
836 else
837 {
838 saw_brace = 0;
839 buf[len++] = '}';
840 buf[len++] = chr;
841 }
09a4f039 842 }
7337d56d
LC
843 else if (chr == '}')
844 saw_brace = 1;
845 else
846 buf[len++] = chr;
62850ef3 847
7337d56d
LC
848 if (len >= sizeof (buf) - 2)
849 {
850 scm_string_append (scm_list_2 (result,
851 scm_from_locale_stringn (buf, len)));
852 len = 0;
853 }
62850ef3 854
7337d56d
LC
855 if (finished)
856 break;
857 }
858
859 if (len)
860 result = scm_string_append (scm_list_2
861 (result,
862 scm_from_locale_stringn (buf, len)));
863
864 return (scm_string_to_symbol (result));
865}
866
867
868\f
869/* Top-level token readers, i.e., dispatchers. */
870
871static SCM
872scm_read_sharp_extension (int chr, SCM port)
873{
874 SCM proc;
875
876 proc = scm_get_hash_procedure (chr);
877 if (scm_is_true (scm_procedure_p (proc)))
878 {
879 long line = SCM_LINUM (port);
880 int column = SCM_COL (port) - 2;
881 SCM got;
882
883 got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
884 if (!scm_is_eq (got, SCM_UNSPECIFIED))
62850ef3 885 {
7337d56d
LC
886 if (SCM_RECORD_POSITIONS_P)
887 return (recsexpr (got, line, column,
888 SCM_FILENAME (port)));
889 else
890 return got;
62850ef3 891 }
09a4f039 892 }
7337d56d
LC
893
894 return SCM_UNSPECIFIED;
895}
896
897/* The reader for the sharp `#' character. It basically dispatches reads
898 among the above token readers. */
899static SCM
900scm_read_sharp (int chr, SCM port)
901#define FUNC_NAME "scm_lreadr"
902{
903 SCM result;
904
905 chr = scm_getc (port);
906
907 result = scm_read_sharp_extension (chr, port);
908 if (!scm_is_eq (result, SCM_UNSPECIFIED))
909 return result;
910
911 switch (chr)
912 {
913 case '\\':
914 return (scm_read_character (chr, port));
915 case '(':
916 return (scm_read_vector (chr, port));
917 case 's':
918 case 'u':
919 case 'f':
920 /* This one may return either a boolean or an SRFI-4 vector. */
921 return (scm_read_srfi4_vector (chr, port));
922 case '*':
923 return (scm_read_guile_bit_vector (chr, port));
924 case 't':
925 case 'T':
926 case 'F':
927 /* This one may return either a boolean or an SRFI-4 vector. */
928 return (scm_read_boolean (chr, port));
929 case ':':
930 return (scm_read_keyword (chr, port));
931 case '0': case '1': case '2': case '3': case '4':
932 case '5': case '6': case '7': case '8': case '9':
933 case '@':
934#if SCM_ENABLE_DEPRECATED
935 /* See below for 'i' and 'e'. */
936 case 'a':
937 case 'c':
938 case 'y':
939 case 'h':
940 case 'l':
941#endif
942 return (scm_i_read_array (port, chr));
943
944 case 'i':
945 case 'e':
946#if SCM_ENABLE_DEPRECATED
947 {
948 /* When next char is '(', it really is an old-style
949 uniform array. */
950 int next_c = scm_getc (port);
951 if (next_c != EOF)
952 scm_ungetc (next_c, port);
953 if (next_c == '(')
954 return scm_i_read_array (port, chr);
955 /* Fall through. */
956 }
957#endif
958 case 'b':
959 case 'B':
960 case 'o':
961 case 'O':
962 case 'd':
963 case 'D':
964 case 'x':
965 case 'X':
966 case 'I':
967 case 'E':
968 return (scm_read_number_and_radix (chr, port));
969 case '{':
970 return (scm_read_extended_symbol (chr, port));
971 case '!':
972 return (scm_read_scsh_block_comment (chr, port));
973 default:
974 result = scm_read_sharp_extension (chr, port);
975 if (scm_is_eq (result, SCM_UNSPECIFIED))
976 scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
977 scm_list_1 (SCM_MAKE_CHAR (chr)));
978 else
979 return result;
980 }
981
982 return SCM_UNSPECIFIED;
983}
984#undef FUNC_NAME
985
986static SCM
987scm_read_expression (SCM port)
988#define FUNC_NAME "scm_read_expression"
989{
990 while (1)
991 {
992 register int chr;
993
994 chr = scm_getc (port);
995
996 switch (chr)
997 {
998 case SCM_WHITE_SPACES:
999 case SCM_LINE_INCREMENTORS:
1000 break;
1001 case ';':
1002 (void) scm_read_semicolon_comment (chr, port);
1003 break;
1004 case '(':
1005 return (scm_read_sexp (chr, port));
1006 case '"':
1007 return (scm_read_string (chr, port));
1008 case '\'':
1009 case '`':
1010 case ',':
1011 return (scm_read_quote (chr, port));
1012 case '#':
1013 {
1014 SCM result;
1015 result = scm_read_sharp (chr, port);
1016 if (scm_is_eq (result, SCM_UNSPECIFIED))
1017 /* We read a comment or some such. */
1018 break;
1019 else
1020 return result;
1021 }
1022 case ')':
1023 scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
1024 break;
1025 case EOF:
1026 return SCM_EOF_VAL;
1027 case ':':
1028 if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
1029 return scm_symbol_to_keyword (scm_read_expression (port));
1030 /* Fall through. */
1031
1032 default:
1033 {
1034 if (((chr >= '0') && (chr <= '9'))
1035 || (strchr ("+-.", chr)))
1036 return (scm_read_number (chr, port));
1037 else
1038 return (scm_read_mixed_case_symbol (chr, port));
1039 }
1040 }
1041 }
1042}
1043#undef FUNC_NAME
1044
1045\f
1046/* Actual reader. */
1047
1048SCM_DEFINE (scm_read, "read", 0, 1, 0,
1049 (SCM port),
1050 "Read an s-expression from the input port @var{port}, or from\n"
1051 "the current input port if @var{port} is not specified.\n"
1052 "Any whitespace before the next token is discarded.")
1053#define FUNC_NAME s_scm_read
1054{
1055 int c;
1056
1057 if (SCM_UNBNDP (port))
1058 port = scm_current_input_port ();
1059 SCM_VALIDATE_OPINPORT (1, port);
1060
1061 c = flush_ws (port, (char *) NULL);
1062 if (EOF == c)
1063 return SCM_EOF_VAL;
1064 scm_ungetc (c, port);
1065
1066 return (scm_read_expression (port));
09a4f039 1067}
db4b4ca6 1068#undef FUNC_NAME
09a4f039 1069
0f2d19dd
JB
1070
1071\f
1072
7337d56d
LC
1073/* Used when recording expressions constructed by `scm_read_sharp ()'. */
1074static SCM
1075recsexpr (SCM obj, long line, int column, SCM filename)
1076{
1077 if (!scm_is_pair(obj)) {
1078 return obj;
1079 } else {
1080 SCM tmp = obj, copy;
1081 /* If this sexpr is visible in the read:sharp source, we want to
1082 keep that information, so only record non-constant cons cells
1083 which haven't previously been read by the reader. */
1084 if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
1085 {
1086 if (SCM_COPY_SOURCE_P)
1087 {
1088 copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
1089 SCM_UNDEFINED);
1090 while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
1091 {
1092 SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
1093 line,
1094 column,
1095 filename),
1096 SCM_UNDEFINED));
1097 copy = SCM_CDR (copy);
1098 }
1099 SCM_SETCDR (copy, tmp);
1100 }
1101 else
1102 {
1103 recsexpr (SCM_CAR (obj), line, column, filename);
1104 while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
1105 recsexpr (SCM_CAR (tmp), line, column, filename);
1106 copy = SCM_UNDEFINED;
1107 }
1108 scm_whash_insert (scm_source_whash,
1109 obj,
1110 scm_make_srcprops (line,
1111 column,
1112 filename,
1113 copy,
1114 SCM_EOL));
1115 }
1116 return obj;
1117 }
1118}
1119
14de3b42
GH
1120/* Manipulate the read-hash-procedures alist. This could be written in
1121 Scheme, but maybe it will also be used by C code during initialisation. */
a1ec6916 1122SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
1bbd0b84 1123 (SCM chr, SCM proc),
dc7fa443
MG
1124 "Install the procedure @var{proc} for reading expressions\n"
1125 "starting with the character sequence @code{#} and @var{chr}.\n"
1126 "@var{proc} will be called with two arguments: the character\n"
1127 "@var{chr} and the port to read further data from. The object\n"
391f57e6
HWN
1128 "returned will be the return value of @code{read}. \n"
1129 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1130 )
1bbd0b84 1131#define FUNC_NAME s_scm_read_hash_extend
deca31e1 1132{
fed9c9a2
GH
1133 SCM this;
1134 SCM prev;
1135
36284627 1136 SCM_VALIDATE_CHAR (1, chr);
7888309b 1137 SCM_ASSERT (scm_is_false (proc)
bc36d050 1138 || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
36284627 1139 proc, SCM_ARG2, FUNC_NAME);
fed9c9a2 1140
14de3b42
GH
1141 /* Check if chr is already in the alist. */
1142 this = *scm_read_hash_procedures;
1143 prev = SCM_BOOL_F;
fed9c9a2
GH
1144 while (1)
1145 {
d2e53ed6 1146 if (scm_is_null (this))
fed9c9a2
GH
1147 {
1148 /* not found, so add it to the beginning. */
7888309b 1149 if (scm_is_true (proc))
fed9c9a2 1150 {
14de3b42
GH
1151 *scm_read_hash_procedures =
1152 scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
fed9c9a2
GH
1153 }
1154 break;
1155 }
bc36d050 1156 if (scm_is_eq (chr, SCM_CAAR (this)))
fed9c9a2
GH
1157 {
1158 /* already in the alist. */
7888309b 1159 if (scm_is_false (proc))
14de3b42
GH
1160 {
1161 /* remove it. */
7888309b 1162 if (scm_is_false (prev))
14de3b42
GH
1163 {
1164 *scm_read_hash_procedures =
1165 SCM_CDR (*scm_read_hash_procedures);
1166 }
1167 else
1168 scm_set_cdr_x (prev, SCM_CDR (this));
1169 }
fed9c9a2 1170 else
14de3b42
GH
1171 {
1172 /* replace it. */
1173 scm_set_cdr_x (SCM_CAR (this), proc);
1174 }
fed9c9a2
GH
1175 break;
1176 }
1177 prev = this;
1178 this = SCM_CDR (this);
1179 }
deca31e1 1180
deca31e1
GH
1181 return SCM_UNSPECIFIED;
1182}
1bbd0b84 1183#undef FUNC_NAME
0f2d19dd 1184
deca31e1
GH
1185/* Recover the read-hash procedure corresponding to char c. */
1186static SCM
6e8d25a6 1187scm_get_hash_procedure (int c)
deca31e1 1188{
14de3b42 1189 SCM rest = *scm_read_hash_procedures;
fed9c9a2 1190
deca31e1
GH
1191 while (1)
1192 {
d2e53ed6 1193 if (scm_is_null (rest))
deca31e1
GH
1194 return SCM_BOOL_F;
1195
7866a09b 1196 if (SCM_CHAR (SCM_CAAR (rest)) == c)
deca31e1
GH
1197 return SCM_CDAR (rest);
1198
1199 rest = SCM_CDR (rest);
1200 }
1201}
1cc91f1b 1202
0f2d19dd
JB
1203void
1204scm_init_read ()
0f2d19dd 1205{
14de3b42 1206 scm_read_hash_procedures =
86d31dfe 1207 SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));
fed9c9a2 1208
62560650 1209 scm_init_opts (scm_read_options, scm_read_opts);
a0599745 1210#include "libguile/read.x"
0f2d19dd 1211}
89e00824
ML
1212
1213/*
1214 Local Variables:
1215 c-file-style: "gnu"
1216 End:
1217*/