More compilation fixes with Sun CC (bug #21378).
[bpt/guile.git] / libguile / read.c
CommitLineData
7f74cf9a 1/* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008 Free Software
dd72382c 2 * Foundation, Inc.
0f2d19dd 3 *
73be1d9e
MV
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
0f2d19dd 8 *
73be1d9e
MV
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
0f2d19dd 13 *
73be1d9e
MV
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
92205699 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd
JB
20\f
21
7337d56d
LC
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif
25
0f2d19dd 26#include <stdio.h>
7337d56d
LC
27#include <ctype.h>
28#include <string.h>
7337d56d 29
a0599745
MD
30#include "libguile/_scm.h"
31#include "libguile/chars.h"
32#include "libguile/eval.h"
33#include "libguile/unif.h"
34#include "libguile/keywords.h"
35#include "libguile/alist.h"
36#include "libguile/srcprop.h"
37#include "libguile/hashtab.h"
38#include "libguile/hash.h"
39#include "libguile/ports.h"
40#include "libguile/root.h"
41#include "libguile/strings.h"
ba1b2226 42#include "libguile/strports.h"
a0599745 43#include "libguile/vectors.h"
a0599745 44#include "libguile/validate.h"
a4022e69 45#include "libguile/srfi-4.h"
7337d56d 46#include "libguile/srfi-13.h"
ba1b2226 47
a0599745 48#include "libguile/read.h"
22fc179a
HWN
49#include "libguile/private-options.h"
50
0f2d19dd
JB
51
52\f
53
5bf6a6f0 54SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
c7733771
GH
55SCM_SYMBOL (scm_keyword_prefix, "prefix");
56
92c2555f 57scm_t_option scm_read_opts[] = {
b7ff98dd
MD
58 { SCM_OPTION_BOOLEAN, "copy", 0,
59 "Copy source code expressions." },
ac74fc22 60 { SCM_OPTION_BOOLEAN, "positions", 0,
deca31e1
GH
61 "Record positions of source code expressions." },
62 { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
c7733771 63 "Convert symbols to lower case."},
f1267706 64 { SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
62560650 65 "Style of keyword recognition: #f or 'prefix."},
16353acc 66#if SCM_ENABLE_ELISP
16353acc
NJ
67 { SCM_OPTION_BOOLEAN, "elisp-vectors", 0,
68 "Support Elisp vector syntax, namely `[...]'."},
cd21f5eb 69 { SCM_OPTION_BOOLEAN, "elisp-strings", 0,
62560650 70 "Support `\\(' and `\\)' in strings."},
16353acc 71#endif
62560650 72 { 0, },
a16f6fe7
MD
73};
74
39e8f371
HWN
75/*
76 Give meaningful error messages for errors
77
78 We use the format
79
ba1b2226 80 FILE:LINE:COL: MESSAGE
39e8f371
HWN
81 This happened in ....
82
83 This is not standard GNU format, but the test-suite likes the real
84 message to be in front.
85
39e8f371
HWN
86 */
87
88
a4022e69
MV
89void
90scm_i_input_error (char const *function,
91 SCM port, const char *message, SCM arg)
ba1b2226 92{
29a837fd
MV
93 SCM fn = (scm_is_string (SCM_FILENAME(port))
94 ? SCM_FILENAME(port)
95 : scm_from_locale_string ("#<unknown port>"));
ba1b2226 96
29a837fd 97 SCM string_port = scm_open_output_string ();
ba1b2226
HWN
98 SCM string = SCM_EOL;
99 scm_simple_format (string_port,
272632a6 100 scm_from_locale_string ("~A:~S:~S: ~A"),
29a837fd 101 scm_list_4 (fn,
b3aa4626 102 scm_from_long (SCM_LINUM (port) + 1),
b9bd8526 103 scm_from_int (SCM_COL (port) + 1),
272632a6 104 scm_from_locale_string (message)));
ba1b2226
HWN
105
106 string = scm_get_output_string (string_port);
107 scm_close_output_port (string_port);
272632a6 108 scm_error_scm (scm_from_locale_symbol ("read-error"),
a4022e69 109 function? scm_from_locale_string (function) : SCM_BOOL_F,
ba1b2226 110 string,
dd72382c 111 arg,
ba1b2226
HWN
112 SCM_BOOL_F);
113}
39e8f371
HWN
114
115
a1ec6916 116SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
1bbd0b84 117 (SCM setting),
dc7fa443
MG
118 "Option interface for the read options. Instead of using\n"
119 "this procedure directly, use the procedures @code{read-enable},\n"
3939e9df 120 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
1bbd0b84 121#define FUNC_NAME s_scm_read_options
a16f6fe7 122{
b7ff98dd
MD
123 SCM ans = scm_options (setting,
124 scm_read_opts,
1bbd0b84 125 FUNC_NAME);
b7ff98dd
MD
126 if (SCM_COPY_SOURCE_P)
127 SCM_RECORD_POSITIONS_P = 1;
a16f6fe7
MD
128 return ans;
129}
1bbd0b84 130#undef FUNC_NAME
a16f6fe7 131
14de3b42
GH
132/* An association list mapping extra hash characters to procedures. */
133static SCM *scm_read_hash_procedures;
deca31e1 134
0f2d19dd 135
7337d56d
LC
136\f
137/* Token readers. */
0f2d19dd 138
0f2d19dd 139
7337d56d
LC
140/* Size of the C buffer used to read symbols and numbers. */
141#define READER_BUFFER_SIZE 128
0f2d19dd 142
7337d56d
LC
143/* Size of the C buffer used to read strings. */
144#define READER_STRING_BUFFER_SIZE 512
0f2d19dd 145
7337d56d
LC
146/* The maximum size of Scheme character names. */
147#define READER_CHAR_NAME_MAX_SIZE 50
1cc91f1b 148
94115ae3 149
7337d56d
LC
150/* `isblank' is only in C99. */
151#define CHAR_IS_BLANK_(_chr) \
152 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
d41668fa 153 || ((_chr) == '\f') || ((_chr) == '\r'))
7337d56d
LC
154
155#ifdef MSDOS
156# define CHAR_IS_BLANK(_chr) \
157 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
158#else
159# define CHAR_IS_BLANK CHAR_IS_BLANK_
160#endif
161
162
163/* R5RS one-character delimiters (see section 7.1.1, ``Lexical
164 structure''). */
165#define CHAR_IS_R5RS_DELIMITER(c) \
166 (CHAR_IS_BLANK (c) \
167 || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
168
169#define CHAR_IS_DELIMITER CHAR_IS_R5RS_DELIMITER
170
171/* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
172 Structure''. */
173#define CHAR_IS_EXPONENT_MARKER(_chr) \
174 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
175 || ((_chr) == 'd') || ((_chr) == 'l'))
176
177/* An inlinable version of `scm_c_downcase ()'. */
178#define CHAR_DOWNCASE(_chr) \
179 (((_chr) <= UCHAR_MAX) ? tolower (_chr) : (_chr))
180
181
454866e0
LC
182/* Read an SCSH block comment. */
183static inline SCM scm_read_scsh_block_comment (int chr, SCM port);
184
d41668fa
LC
185/* Read from PORT until a delimiter (e.g., a whitespace) is read. Return
186 zero if the whole token fits in BUF, non-zero otherwise. */
7337d56d
LC
187static inline int
188read_token (SCM port, char *buf, size_t buf_size, size_t *read)
0520c320 189{
7337d56d 190 *read = 0;
0520c320 191
7337d56d 192 while (*read < buf_size)
0520c320 193 {
7337d56d 194 int chr;
0520c320 195
7337d56d
LC
196 chr = scm_getc (port);
197 chr = (SCM_CASE_INSENSITIVE_P ? CHAR_DOWNCASE (chr) : chr);
198
199 if (chr == EOF)
200 return 0;
201 else if (CHAR_IS_DELIMITER (chr))
202 {
203 scm_ungetc (chr, port);
204 return 0;
205 }
0520c320 206 else
7337d56d
LC
207 {
208 *buf = (char) chr;
209 buf++, (*read)++;
210 }
0520c320 211 }
7337d56d
LC
212
213 return 1;
0520c320 214}
1cc91f1b 215
7337d56d
LC
216
217/* Skip whitespace from PORT and return the first non-whitespace character
218 read. Raise an error on end-of-file. */
219static int
220flush_ws (SCM port, const char *eoferr)
0f2d19dd
JB
221{
222 register int c;
223 while (1)
b7f3516f 224 switch (c = scm_getc (port))
0f2d19dd
JB
225 {
226 case EOF:
227 goteof:
228 if (eoferr)
d156d3b7 229 {
a4022e69
MV
230 scm_i_input_error (eoferr,
231 port,
232 "end of file",
233 SCM_EOL);
d156d3b7 234 }
0f2d19dd 235 return c;
7337d56d 236
0f2d19dd
JB
237 case ';':
238 lp:
b7f3516f 239 switch (c = scm_getc (port))
0f2d19dd
JB
240 {
241 case EOF:
242 goto goteof;
243 default:
244 goto lp;
245 case SCM_LINE_INCREMENTORS:
246 break;
247 }
248 break;
7337d56d 249
454866e0
LC
250 case '#':
251 switch (c = scm_getc (port))
252 {
253 case EOF:
254 eoferr = "read_sharp";
255 goto goteof;
256 case '!':
257 scm_read_scsh_block_comment (c, port);
258 break;
259 default:
260 scm_ungetc (c, port);
261 return '#';
262 }
263 break;
264
0f2d19dd 265 case SCM_LINE_INCREMENTORS:
0f2d19dd 266 case SCM_SINGLE_SPACES:
0f2d19dd 267 case '\t':
0f2d19dd 268 break;
7337d56d 269
0f2d19dd
JB
270 default:
271 return c;
272 }
7337d56d
LC
273
274 return 0;
0f2d19dd
JB
275}
276
277
7337d56d
LC
278\f
279/* Token readers. */
1cc91f1b 280
7337d56d
LC
281static SCM scm_read_expression (SCM port);
282static SCM scm_read_sharp (int chr, SCM port);
283static SCM scm_get_hash_procedure (int c);
284static SCM recsexpr (SCM obj, long line, int column, SCM filename);
0f2d19dd
JB
285
286
09a4f039 287static SCM
7337d56d
LC
288scm_read_sexp (int chr, SCM port)
289#define FUNC_NAME "scm_i_lreadparen"
09a4f039 290{
7337d56d
LC
291 register int c;
292 register SCM tmp;
293 register SCM tl, ans = SCM_EOL;
294 SCM tl2 = SCM_EOL, ans2 = SCM_EOL, copy = SCM_BOOL_F;;
295 static const int terminating_char = ')';
296
297 /* Need to capture line and column numbers here. */
298 long line = SCM_LINUM (port);
299 int column = SCM_COL (port) - 1;
f9c68a47 300
f9c68a47 301
7337d56d
LC
302 c = flush_ws (port, FUNC_NAME);
303 if (terminating_char == c)
304 return SCM_EOL;
f9c68a47 305
7337d56d
LC
306 scm_ungetc (c, port);
307 if (scm_is_eq (scm_sym_dot,
308 (tmp = scm_read_expression (port))))
309 {
310 ans = scm_read_expression (port);
311 if (terminating_char != (c = flush_ws (port, FUNC_NAME)))
312 scm_i_input_error (FUNC_NAME, port, "missing close paren",
313 SCM_EOL);
314 return ans;
315 }
1cc91f1b 316
7337d56d
LC
317 /* Build the head of the list structure. */
318 ans = tl = scm_cons (tmp, SCM_EOL);
319
320 if (SCM_COPY_SOURCE_P)
321 ans2 = tl2 = scm_cons (scm_is_pair (tmp)
322 ? copy
323 : tmp,
324 SCM_EOL);
325
326 while (terminating_char != (c = flush_ws (port, FUNC_NAME)))
0f2d19dd 327 {
7337d56d 328 SCM new_tail;
0f2d19dd 329
7337d56d
LC
330 scm_ungetc (c, port);
331 if (scm_is_eq (scm_sym_dot,
332 (tmp = scm_read_expression (port))))
0f2d19dd 333 {
7337d56d
LC
334 SCM_SETCDR (tl, tmp = scm_read_expression (port));
335
336 if (SCM_COPY_SOURCE_P)
337 SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp) ? copy : tmp,
338 SCM_EOL));
339
340 c = flush_ws (port, FUNC_NAME);
341 if (terminating_char != c)
342 scm_i_input_error (FUNC_NAME, port,
343 "in pair: missing close paren", SCM_EOL);
344 goto exit;
0f2d19dd 345 }
b858464a 346
7337d56d
LC
347 new_tail = scm_cons (tmp, SCM_EOL);
348 SCM_SETCDR (tl, new_tail);
349 tl = new_tail;
350
351 if (SCM_COPY_SOURCE_P)
0f2d19dd 352 {
7337d56d
LC
353 SCM new_tail2 = scm_cons (scm_is_pair (tmp)
354 ? copy
355 : tmp, SCM_EOL);
356 SCM_SETCDR (tl2, new_tail2);
357 tl2 = new_tail2;
358 }
359 }
0f2d19dd 360
7337d56d
LC
361 exit:
362 if (SCM_RECORD_POSITIONS_P)
363 scm_whash_insert (scm_source_whash,
364 ans,
365 scm_make_srcprops (line, column,
366 SCM_FILENAME (port),
367 SCM_COPY_SOURCE_P
368 ? ans2
369 : SCM_UNDEFINED,
370 SCM_EOL));
371 return ans;
372}
373#undef FUNC_NAME
a4022e69 374
7337d56d
LC
375static SCM
376scm_read_string (int chr, SCM port)
377#define FUNC_NAME "scm_lreadr"
378{
379 /* For strings smaller than C_STR, this function creates only one Scheme
380 object (the string returned). */
0f2d19dd 381
7337d56d
LC
382 SCM str = SCM_BOOL_F;
383 char c_str[READER_STRING_BUFFER_SIZE];
384 unsigned c_str_len = 0;
385 int c;
eb42ff25 386
7337d56d
LC
387 while ('"' != (c = scm_getc (port)))
388 {
389 if (c == EOF)
390 str_eof: scm_i_input_error (FUNC_NAME, port,
391 "end of file in string constant",
392 SCM_EOL);
0f2d19dd 393
7337d56d
LC
394 if (c_str_len + 1 >= sizeof (c_str))
395 {
396 /* Flush the C buffer onto a Scheme string. */
397 SCM addy;
deca31e1 398
7337d56d
LC
399 if (str == SCM_BOOL_F)
400 str = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
0f2d19dd 401
7337d56d
LC
402 addy = scm_from_locale_stringn (c_str, c_str_len);
403 str = scm_string_append_shared (scm_list_2 (str, addy));
0f2d19dd 404
7337d56d
LC
405 c_str_len = 0;
406 }
0f2d19dd 407
7337d56d
LC
408 if (c == '\\')
409 switch (c = scm_getc (port))
410 {
411 case EOF:
412 goto str_eof;
413 case '"':
414 case '\\':
415 break;
16353acc 416#if SCM_ENABLE_ELISP
7337d56d
LC
417 case '(':
418 case ')':
419 if (SCM_ESCAPED_PARENS_P)
420 break;
421 goto bad_escaped;
16353acc 422#endif
7337d56d
LC
423 case '\n':
424 continue;
425 case '0':
426 c = '\0';
427 break;
428 case 'f':
429 c = '\f';
430 break;
431 case 'n':
432 c = '\n';
433 break;
434 case 'r':
435 c = '\r';
436 break;
437 case 't':
438 c = '\t';
439 break;
440 case 'a':
441 c = '\007';
442 break;
443 case 'v':
444 c = '\v';
445 break;
446 case 'x':
3c9a524f 447 {
7337d56d
LC
448 int a, b;
449 a = scm_getc (port);
450 if (a == EOF) goto str_eof;
451 b = scm_getc (port);
452 if (b == EOF) goto str_eof;
453 if ('0' <= a && a <= '9') a -= '0';
454 else if ('A' <= a && a <= 'F') a = a - 'A' + 10;
455 else if ('a' <= a && a <= 'f') a = a - 'a' + 10;
456 else goto bad_escaped;
457 if ('0' <= b && b <= '9') b -= '0';
458 else if ('A' <= b && b <= 'F') b = b - 'A' + 10;
459 else if ('a' <= b && b <= 'f') b = b - 'a' + 10;
460 else goto bad_escaped;
461 c = a * 16 + b;
462 break;
3c9a524f 463 }
7337d56d
LC
464 default:
465 bad_escaped:
466 scm_i_input_error (FUNC_NAME, port,
467 "illegal character in escape sequence: ~S",
468 scm_list_1 (SCM_MAKE_CHAR (c)));
469 }
470 c_str[c_str_len++] = c;
471 }
f13b4400 472
7337d56d
LC
473 if (c_str_len > 0)
474 {
475 SCM addy;
0f2d19dd 476
7337d56d
LC
477 addy = scm_from_locale_stringn (c_str, c_str_len);
478 if (str == SCM_BOOL_F)
479 str = addy;
480 else
481 str = scm_string_append_shared (scm_list_2 (str, addy));
0f2d19dd 482 }
7337d56d
LC
483 else
484 str = (str == SCM_BOOL_F) ? scm_nullstr : str;
485
486 return str;
0f2d19dd 487}
db4b4ca6
DH
488#undef FUNC_NAME
489
0f2d19dd 490
7337d56d
LC
491static SCM
492scm_read_number (int chr, SCM port)
0f2d19dd 493{
7337d56d
LC
494 SCM result, str = SCM_EOL;
495 char buffer[READER_BUFFER_SIZE];
496 size_t read;
497 int overflow = 0;
0f2d19dd 498
7337d56d
LC
499 scm_ungetc (chr, port);
500 do
0f2d19dd 501 {
7337d56d
LC
502 overflow = read_token (port, buffer, sizeof (buffer), &read);
503
504 if ((overflow) || (scm_is_pair (str)))
505 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
0f2d19dd 506 }
7337d56d 507 while (overflow);
0f2d19dd 508
7337d56d 509 if (scm_is_pair (str))
0f2d19dd 510 {
7337d56d 511 /* The slow path. */
0f2d19dd 512
7337d56d
LC
513 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
514 result = scm_string_to_number (str, SCM_UNDEFINED);
515 if (!scm_is_true (result))
516 /* Return a symbol instead of a number. */
517 result = scm_string_to_symbol (str);
518 }
519 else
520 {
521 result = scm_c_locale_stringn_to_number (buffer, read, 10);
522 if (!scm_is_true (result))
523 /* Return a symbol instead of a number. */
524 result = scm_from_locale_symboln (buffer, read);
525 }
0f2d19dd 526
7337d56d
LC
527 return result;
528}
0f2d19dd 529
7337d56d
LC
530static SCM
531scm_read_mixed_case_symbol (int chr, SCM port)
532{
533 SCM result, str = SCM_EOL;
534 int overflow = 0;
535 char buffer[READER_BUFFER_SIZE];
536 size_t read = 0;
537
538 scm_ungetc (chr, port);
539 do
540 {
541 overflow = read_token (port, buffer, sizeof (buffer), &read);
542
543 if ((overflow) || (scm_is_pair (str)))
544 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
545 }
546 while (overflow);
547
548 if (scm_is_pair (str))
549 {
550 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
551 result = scm_string_to_symbol (str);
552 }
553 else
554 /* For symbols smaller than `sizeof (buffer)', we don't need to recur to
555 Scheme strings. Therefore, we only create one Scheme object (a
556 symbol) per symbol read. */
557 result = scm_from_locale_symboln (buffer, read);
558
559 return result;
560}
561
562static SCM
563scm_read_number_and_radix (int chr, SCM port)
564#define FUNC_NAME "scm_lreadr"
565{
566 SCM result, str = SCM_EOL;
567 size_t read;
568 char buffer[READER_BUFFER_SIZE];
569 unsigned int radix;
570 int overflow = 0;
571
572 switch (chr)
573 {
574 case 'B':
575 case 'b':
576 radix = 2;
577 break;
578
579 case 'o':
580 case 'O':
581 radix = 8;
582 break;
583
584 case 'd':
585 case 'D':
586 radix = 10;
587 break;
588
589 case 'x':
590 case 'X':
591 radix = 16;
592 break;
593
594 default:
595 scm_ungetc (chr, port);
596 scm_ungetc ('#', port);
597 radix = 10;
598 }
599
600 do
601 {
602 overflow = read_token (port, buffer, sizeof (buffer), &read);
603
604 if ((overflow) || (scm_is_pair (str)))
605 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
606 }
607 while (overflow);
608
609 if (scm_is_pair (str))
610 {
611 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
612 result = scm_string_to_number (str, scm_from_uint (radix));
613 }
614 else
615 result = scm_c_locale_stringn_to_number (buffer, read, radix);
616
617 if (scm_is_true (result))
618 return result;
619
620 scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
621
622 return SCM_BOOL_F;
623}
624#undef FUNC_NAME
625
626static SCM
627scm_read_quote (int chr, SCM port)
628{
629 SCM p;
492faee1
LC
630 long line = SCM_LINUM (port);
631 int column = SCM_COL (port) - 1;
7337d56d
LC
632
633 switch (chr)
634 {
635 case '`':
636 p = scm_sym_quasiquote;
637 break;
638
639 case '\'':
640 p = scm_sym_quote;
641 break;
642
643 case ',':
644 {
645 int c;
646
647 c = scm_getc (port);
648 if ('@' == c)
649 p = scm_sym_uq_splicing;
650 else
0f2d19dd 651 {
7337d56d
LC
652 scm_ungetc (c, port);
653 p = scm_sym_unquote;
0f2d19dd 654 }
7337d56d
LC
655 break;
656 }
0f2d19dd 657
7337d56d
LC
658 default:
659 fprintf (stderr, "%s: unhandled quote character (%i)\n",
7f74cf9a 660 "scm_read_quote", chr);
7337d56d 661 abort ();
0f2d19dd 662 }
1cc91f1b 663
7337d56d 664 p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
492faee1
LC
665 if (SCM_RECORD_POSITIONS_P)
666 scm_whash_insert (scm_source_whash, p,
667 scm_make_srcprops (line, column,
668 SCM_FILENAME (port),
669 SCM_COPY_SOURCE_P
670 ? (scm_cons2 (SCM_CAR (p),
671 SCM_CAR (SCM_CDR (p)),
672 SCM_EOL))
673 : SCM_UNDEFINED,
674 SCM_EOL));
675
0f2d19dd 676
7337d56d
LC
677 return p;
678}
679
680static inline SCM
681scm_read_semicolon_comment (int chr, SCM port)
0f2d19dd 682{
0f2d19dd
JB
683 int c;
684
7337d56d
LC
685 for (c = scm_getc (port);
686 (c != EOF) && (c != '\n');
687 c = scm_getc (port));
688
689 return SCM_UNSPECIFIED;
690}
691
692\f
693/* Sharp readers, i.e. readers called after a `#' sign has been read. */
694
695static SCM
696scm_read_boolean (int chr, SCM port)
697{
698 switch (chr)
0f2d19dd 699 {
7337d56d
LC
700 case 't':
701 case 'T':
702 return SCM_BOOL_T;
703
704 case 'f':
705 case 'F':
706 return SCM_BOOL_F;
0f2d19dd 707 }
7337d56d
LC
708
709 return SCM_UNSPECIFIED;
710}
711
712static SCM
713scm_read_character (int chr, SCM port)
714#define FUNC_NAME "scm_lreadr"
715{
716 unsigned c;
717 char charname[READER_CHAR_NAME_MAX_SIZE];
718 size_t charname_len;
719
720 if (read_token (port, charname, sizeof (charname), &charname_len))
721 goto char_error;
722
723 if (charname_len == 0)
0f2d19dd 724 {
7337d56d
LC
725 chr = scm_getc (port);
726 if (chr == EOF)
727 scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
728 "while reading character", SCM_EOL);
729
730 /* CHR must be a token delimiter, like a whitespace. */
731 return (SCM_MAKE_CHAR (chr));
0f2d19dd 732 }
7337d56d
LC
733
734 if (charname_len == 1)
735 return SCM_MAKE_CHAR (charname[0]);
736
737 if (*charname >= '0' && *charname < '8')
738 {
739 /* Dirk:FIXME:: This type of character syntax is not R5RS
740 * compliant. Further, it should be verified that the constant
741 * does only consist of octal digits. Finally, it should be
742 * checked whether the resulting fixnum is in the range of
743 * characters. */
744 SCM p = scm_c_locale_stringn_to_number (charname, charname_len, 8);
745 if (SCM_I_INUMP (p))
746 return SCM_MAKE_CHAR (SCM_I_INUM (p));
747 }
748
749 for (c = 0; c < scm_n_charnames; c++)
750 if (scm_charnames[c]
751 && (!strncasecmp (scm_charnames[c], charname, charname_len)))
752 return SCM_MAKE_CHAR (scm_charnums[c]);
753
754 char_error:
755 scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
756 scm_list_1 (scm_from_locale_stringn (charname,
757 charname_len)));
758
759 return SCM_UNSPECIFIED;
0f2d19dd 760}
db4b4ca6 761#undef FUNC_NAME
0f2d19dd 762
7337d56d
LC
763static inline SCM
764scm_read_keyword (int chr, SCM port)
765{
766 SCM symbol;
767
768 /* Read the symbol that comprises the keyword. Doing this instead of
769 invoking a specific symbol reader function allows `scm_read_keyword ()'
770 to adapt to the delimiters currently valid of symbols.
1cc91f1b 771
7337d56d
LC
772 XXX: This implementation allows sloppy syntaxes like `#: key'. */
773 symbol = scm_read_expression (port);
774 if (!scm_is_symbol (symbol))
7f74cf9a 775 scm_i_input_error ("scm_read_keyword", port,
7337d56d
LC
776 "keyword prefix `~a' not followed by a symbol: ~s",
777 scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
778
779 return (scm_symbol_to_keyword (symbol));
780}
781
782static inline SCM
783scm_read_vector (int chr, SCM port)
09a4f039 784{
7337d56d
LC
785 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
786 guarantee that it's going to do what we want. After all, this is an
787 implementation detail of `scm_read_vector ()', not a desirable
788 property. */
789 return (scm_vector (scm_read_sexp (chr, port)));
790}
09a4f039 791
7337d56d
LC
792static inline SCM
793scm_read_srfi4_vector (int chr, SCM port)
794{
795 return scm_i_read_array (port, chr);
796}
797
798static SCM
799scm_read_guile_bit_vector (int chr, SCM port)
800{
801 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
802 terribly inefficient but who cares? */
803 SCM s_bits = SCM_EOL;
804
805 for (chr = scm_getc (port);
806 (chr != EOF) && ((chr == '0') || (chr == '1'));
807 chr = scm_getc (port))
09a4f039 808 {
7337d56d 809 s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
09a4f039 810 }
7337d56d
LC
811
812 if (chr != EOF)
813 scm_ungetc (chr, port);
814
815 return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
816}
817
818static inline SCM
819scm_read_scsh_block_comment (int chr, SCM port)
820{
821 int bang_seen = 0;
822
823 for (;;)
09a4f039 824 {
7337d56d 825 int c = scm_getc (port);
62850ef3 826
7337d56d
LC
827 if (c == EOF)
828 scm_i_input_error ("skip_block_comment", port,
829 "unterminated `#! ... !#' comment", SCM_EOL);
830
831 if (c == '!')
832 bang_seen = 1;
833 else if (c == '#' && bang_seen)
834 break;
835 else
836 bang_seen = 0;
837 }
838
839 return SCM_UNSPECIFIED;
840}
841
842static SCM
843scm_read_extended_symbol (int chr, SCM port)
844{
845 /* Guile's extended symbol read syntax looks like this:
846
847 #{This is all a symbol name}#
848
849 So here, CHR is expected to be `{'. */
850 SCM result;
851 int saw_brace = 0, finished = 0;
852 size_t len = 0;
853 char buf[1024];
854
855 result = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
856
857 while ((chr = scm_getc (port)) != EOF)
858 {
859 if (saw_brace)
09a4f039 860 {
7337d56d
LC
861 if (chr == '#')
862 {
863 finished = 1;
864 break;
865 }
866 else
867 {
868 saw_brace = 0;
869 buf[len++] = '}';
870 buf[len++] = chr;
871 }
09a4f039 872 }
7337d56d
LC
873 else if (chr == '}')
874 saw_brace = 1;
875 else
876 buf[len++] = chr;
62850ef3 877
7337d56d
LC
878 if (len >= sizeof (buf) - 2)
879 {
880 scm_string_append (scm_list_2 (result,
881 scm_from_locale_stringn (buf, len)));
882 len = 0;
883 }
62850ef3 884
7337d56d
LC
885 if (finished)
886 break;
887 }
888
889 if (len)
890 result = scm_string_append (scm_list_2
891 (result,
892 scm_from_locale_stringn (buf, len)));
893
894 return (scm_string_to_symbol (result));
895}
896
897
898\f
899/* Top-level token readers, i.e., dispatchers. */
900
901static SCM
902scm_read_sharp_extension (int chr, SCM port)
903{
904 SCM proc;
905
906 proc = scm_get_hash_procedure (chr);
907 if (scm_is_true (scm_procedure_p (proc)))
908 {
909 long line = SCM_LINUM (port);
910 int column = SCM_COL (port) - 2;
911 SCM got;
912
913 got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
914 if (!scm_is_eq (got, SCM_UNSPECIFIED))
62850ef3 915 {
7337d56d
LC
916 if (SCM_RECORD_POSITIONS_P)
917 return (recsexpr (got, line, column,
918 SCM_FILENAME (port)));
919 else
920 return got;
62850ef3 921 }
09a4f039 922 }
7337d56d
LC
923
924 return SCM_UNSPECIFIED;
925}
926
927/* The reader for the sharp `#' character. It basically dispatches reads
928 among the above token readers. */
929static SCM
930scm_read_sharp (int chr, SCM port)
931#define FUNC_NAME "scm_lreadr"
932{
933 SCM result;
934
935 chr = scm_getc (port);
936
937 result = scm_read_sharp_extension (chr, port);
938 if (!scm_is_eq (result, SCM_UNSPECIFIED))
939 return result;
940
941 switch (chr)
942 {
943 case '\\':
944 return (scm_read_character (chr, port));
945 case '(':
946 return (scm_read_vector (chr, port));
947 case 's':
948 case 'u':
949 case 'f':
950 /* This one may return either a boolean or an SRFI-4 vector. */
951 return (scm_read_srfi4_vector (chr, port));
952 case '*':
953 return (scm_read_guile_bit_vector (chr, port));
954 case 't':
955 case 'T':
956 case 'F':
957 /* This one may return either a boolean or an SRFI-4 vector. */
958 return (scm_read_boolean (chr, port));
959 case ':':
960 return (scm_read_keyword (chr, port));
961 case '0': case '1': case '2': case '3': case '4':
962 case '5': case '6': case '7': case '8': case '9':
963 case '@':
964#if SCM_ENABLE_DEPRECATED
965 /* See below for 'i' and 'e'. */
966 case 'a':
967 case 'c':
968 case 'y':
969 case 'h':
970 case 'l':
971#endif
972 return (scm_i_read_array (port, chr));
973
974 case 'i':
975 case 'e':
976#if SCM_ENABLE_DEPRECATED
977 {
978 /* When next char is '(', it really is an old-style
979 uniform array. */
980 int next_c = scm_getc (port);
981 if (next_c != EOF)
982 scm_ungetc (next_c, port);
983 if (next_c == '(')
984 return scm_i_read_array (port, chr);
985 /* Fall through. */
986 }
987#endif
988 case 'b':
989 case 'B':
990 case 'o':
991 case 'O':
992 case 'd':
993 case 'D':
994 case 'x':
995 case 'X':
996 case 'I':
997 case 'E':
998 return (scm_read_number_and_radix (chr, port));
999 case '{':
1000 return (scm_read_extended_symbol (chr, port));
1001 case '!':
1002 return (scm_read_scsh_block_comment (chr, port));
1003 default:
1004 result = scm_read_sharp_extension (chr, port);
1005 if (scm_is_eq (result, SCM_UNSPECIFIED))
1006 scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
1007 scm_list_1 (SCM_MAKE_CHAR (chr)));
1008 else
1009 return result;
1010 }
1011
1012 return SCM_UNSPECIFIED;
1013}
1014#undef FUNC_NAME
1015
1016static SCM
1017scm_read_expression (SCM port)
1018#define FUNC_NAME "scm_read_expression"
1019{
1020 while (1)
1021 {
1022 register int chr;
1023
1024 chr = scm_getc (port);
1025
1026 switch (chr)
1027 {
1028 case SCM_WHITE_SPACES:
1029 case SCM_LINE_INCREMENTORS:
1030 break;
1031 case ';':
1032 (void) scm_read_semicolon_comment (chr, port);
1033 break;
1034 case '(':
1035 return (scm_read_sexp (chr, port));
1036 case '"':
1037 return (scm_read_string (chr, port));
1038 case '\'':
1039 case '`':
1040 case ',':
1041 return (scm_read_quote (chr, port));
1042 case '#':
1043 {
1044 SCM result;
1045 result = scm_read_sharp (chr, port);
1046 if (scm_is_eq (result, SCM_UNSPECIFIED))
1047 /* We read a comment or some such. */
1048 break;
1049 else
1050 return result;
1051 }
1052 case ')':
1053 scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
1054 break;
1055 case EOF:
1056 return SCM_EOF_VAL;
1057 case ':':
1058 if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
1059 return scm_symbol_to_keyword (scm_read_expression (port));
1060 /* Fall through. */
1061
1062 default:
1063 {
1064 if (((chr >= '0') && (chr <= '9'))
1065 || (strchr ("+-.", chr)))
1066 return (scm_read_number (chr, port));
1067 else
1068 return (scm_read_mixed_case_symbol (chr, port));
1069 }
1070 }
1071 }
1072}
1073#undef FUNC_NAME
1074
1075\f
1076/* Actual reader. */
1077
1078SCM_DEFINE (scm_read, "read", 0, 1, 0,
1079 (SCM port),
1080 "Read an s-expression from the input port @var{port}, or from\n"
1081 "the current input port if @var{port} is not specified.\n"
1082 "Any whitespace before the next token is discarded.")
1083#define FUNC_NAME s_scm_read
1084{
1085 int c;
1086
1087 if (SCM_UNBNDP (port))
1088 port = scm_current_input_port ();
1089 SCM_VALIDATE_OPINPORT (1, port);
1090
1091 c = flush_ws (port, (char *) NULL);
1092 if (EOF == c)
1093 return SCM_EOF_VAL;
1094 scm_ungetc (c, port);
1095
1096 return (scm_read_expression (port));
09a4f039 1097}
db4b4ca6 1098#undef FUNC_NAME
09a4f039 1099
0f2d19dd
JB
1100
1101\f
1102
7337d56d
LC
1103/* Used when recording expressions constructed by `scm_read_sharp ()'. */
1104static SCM
1105recsexpr (SCM obj, long line, int column, SCM filename)
1106{
1107 if (!scm_is_pair(obj)) {
1108 return obj;
1109 } else {
1110 SCM tmp = obj, copy;
1111 /* If this sexpr is visible in the read:sharp source, we want to
1112 keep that information, so only record non-constant cons cells
1113 which haven't previously been read by the reader. */
1114 if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
1115 {
1116 if (SCM_COPY_SOURCE_P)
1117 {
1118 copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
1119 SCM_UNDEFINED);
1120 while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
1121 {
1122 SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
1123 line,
1124 column,
1125 filename),
1126 SCM_UNDEFINED));
1127 copy = SCM_CDR (copy);
1128 }
1129 SCM_SETCDR (copy, tmp);
1130 }
1131 else
1132 {
1133 recsexpr (SCM_CAR (obj), line, column, filename);
1134 while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
1135 recsexpr (SCM_CAR (tmp), line, column, filename);
1136 copy = SCM_UNDEFINED;
1137 }
1138 scm_whash_insert (scm_source_whash,
1139 obj,
1140 scm_make_srcprops (line,
1141 column,
1142 filename,
1143 copy,
1144 SCM_EOL));
1145 }
1146 return obj;
1147 }
1148}
1149
14de3b42
GH
1150/* Manipulate the read-hash-procedures alist. This could be written in
1151 Scheme, but maybe it will also be used by C code during initialisation. */
a1ec6916 1152SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
1bbd0b84 1153 (SCM chr, SCM proc),
dc7fa443
MG
1154 "Install the procedure @var{proc} for reading expressions\n"
1155 "starting with the character sequence @code{#} and @var{chr}.\n"
1156 "@var{proc} will be called with two arguments: the character\n"
1157 "@var{chr} and the port to read further data from. The object\n"
391f57e6
HWN
1158 "returned will be the return value of @code{read}. \n"
1159 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1160 )
1bbd0b84 1161#define FUNC_NAME s_scm_read_hash_extend
deca31e1 1162{
fed9c9a2
GH
1163 SCM this;
1164 SCM prev;
1165
36284627 1166 SCM_VALIDATE_CHAR (1, chr);
7888309b 1167 SCM_ASSERT (scm_is_false (proc)
bc36d050 1168 || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
36284627 1169 proc, SCM_ARG2, FUNC_NAME);
fed9c9a2 1170
14de3b42
GH
1171 /* Check if chr is already in the alist. */
1172 this = *scm_read_hash_procedures;
1173 prev = SCM_BOOL_F;
fed9c9a2
GH
1174 while (1)
1175 {
d2e53ed6 1176 if (scm_is_null (this))
fed9c9a2
GH
1177 {
1178 /* not found, so add it to the beginning. */
7888309b 1179 if (scm_is_true (proc))
fed9c9a2 1180 {
14de3b42
GH
1181 *scm_read_hash_procedures =
1182 scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
fed9c9a2
GH
1183 }
1184 break;
1185 }
bc36d050 1186 if (scm_is_eq (chr, SCM_CAAR (this)))
fed9c9a2
GH
1187 {
1188 /* already in the alist. */
7888309b 1189 if (scm_is_false (proc))
14de3b42
GH
1190 {
1191 /* remove it. */
7888309b 1192 if (scm_is_false (prev))
14de3b42
GH
1193 {
1194 *scm_read_hash_procedures =
1195 SCM_CDR (*scm_read_hash_procedures);
1196 }
1197 else
1198 scm_set_cdr_x (prev, SCM_CDR (this));
1199 }
fed9c9a2 1200 else
14de3b42
GH
1201 {
1202 /* replace it. */
1203 scm_set_cdr_x (SCM_CAR (this), proc);
1204 }
fed9c9a2
GH
1205 break;
1206 }
1207 prev = this;
1208 this = SCM_CDR (this);
1209 }
deca31e1 1210
deca31e1
GH
1211 return SCM_UNSPECIFIED;
1212}
1bbd0b84 1213#undef FUNC_NAME
0f2d19dd 1214
deca31e1
GH
1215/* Recover the read-hash procedure corresponding to char c. */
1216static SCM
6e8d25a6 1217scm_get_hash_procedure (int c)
deca31e1 1218{
14de3b42 1219 SCM rest = *scm_read_hash_procedures;
fed9c9a2 1220
deca31e1
GH
1221 while (1)
1222 {
d2e53ed6 1223 if (scm_is_null (rest))
deca31e1
GH
1224 return SCM_BOOL_F;
1225
7866a09b 1226 if (SCM_CHAR (SCM_CAAR (rest)) == c)
deca31e1
GH
1227 return SCM_CDAR (rest);
1228
1229 rest = SCM_CDR (rest);
1230 }
1231}
1cc91f1b 1232
0f2d19dd
JB
1233void
1234scm_init_read ()
0f2d19dd 1235{
14de3b42 1236 scm_read_hash_procedures =
86d31dfe 1237 SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));
fed9c9a2 1238
62560650 1239 scm_init_opts (scm_read_options, scm_read_opts);
a0599745 1240#include "libguile/read.x"
0f2d19dd 1241}
89e00824
ML
1242
1243/*
1244 Local Variables:
1245 c-file-style: "gnu"
1246 End:
1247*/