Change Guile license to LGPLv3+
[bpt/guile.git] / libguile / read.c
CommitLineData
7f74cf9a 1/* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008 Free Software
dd72382c 2 * Foundation, Inc.
0f2d19dd 3 *
73be1d9e 4 * This library is free software; you can redistribute it and/or
53befeb7
NJ
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
0f2d19dd 8 *
53befeb7
NJ
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
0f2d19dd 13 *
73be1d9e
MV
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
53befeb7
NJ
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
73be1d9e 18 */
1bbd0b84 19
1bbd0b84 20
0f2d19dd
JB
21\f
22
7337d56d
LC
23#ifdef HAVE_CONFIG_H
24# include <config.h>
25#endif
26
0f2d19dd 27#include <stdio.h>
7337d56d
LC
28#include <ctype.h>
29#include <string.h>
7337d56d 30
a0599745
MD
31#include "libguile/_scm.h"
32#include "libguile/chars.h"
33#include "libguile/eval.h"
34#include "libguile/unif.h"
35#include "libguile/keywords.h"
36#include "libguile/alist.h"
37#include "libguile/srcprop.h"
38#include "libguile/hashtab.h"
39#include "libguile/hash.h"
40#include "libguile/ports.h"
41#include "libguile/root.h"
42#include "libguile/strings.h"
ba1b2226 43#include "libguile/strports.h"
a0599745 44#include "libguile/vectors.h"
a0599745 45#include "libguile/validate.h"
a4022e69 46#include "libguile/srfi-4.h"
7337d56d 47#include "libguile/srfi-13.h"
ba1b2226 48
a0599745 49#include "libguile/read.h"
22fc179a
HWN
50#include "libguile/private-options.h"
51
0f2d19dd
JB
52
53\f
54
5bf6a6f0 55SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
c7733771 56SCM_SYMBOL (scm_keyword_prefix, "prefix");
ef4cbc08 57SCM_SYMBOL (scm_keyword_postfix, "postfix");
c7733771 58
92c2555f 59scm_t_option scm_read_opts[] = {
b7ff98dd
MD
60 { SCM_OPTION_BOOLEAN, "copy", 0,
61 "Copy source code expressions." },
ac74fc22 62 { SCM_OPTION_BOOLEAN, "positions", 0,
deca31e1
GH
63 "Record positions of source code expressions." },
64 { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
c7733771 65 "Convert symbols to lower case."},
f1267706 66 { SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
904fabb6 67 "Style of keyword recognition: #f, 'prefix or 'postfix."},
16353acc 68#if SCM_ENABLE_ELISP
16353acc
NJ
69 { SCM_OPTION_BOOLEAN, "elisp-vectors", 0,
70 "Support Elisp vector syntax, namely `[...]'."},
cd21f5eb 71 { SCM_OPTION_BOOLEAN, "elisp-strings", 0,
62560650 72 "Support `\\(' and `\\)' in strings."},
16353acc 73#endif
62560650 74 { 0, },
a16f6fe7
MD
75};
76
39e8f371
HWN
77/*
78 Give meaningful error messages for errors
79
80 We use the format
81
ba1b2226 82 FILE:LINE:COL: MESSAGE
39e8f371
HWN
83 This happened in ....
84
85 This is not standard GNU format, but the test-suite likes the real
86 message to be in front.
87
39e8f371
HWN
88 */
89
90
a4022e69
MV
91void
92scm_i_input_error (char const *function,
93 SCM port, const char *message, SCM arg)
ba1b2226 94{
29a837fd
MV
95 SCM fn = (scm_is_string (SCM_FILENAME(port))
96 ? SCM_FILENAME(port)
97 : scm_from_locale_string ("#<unknown port>"));
ba1b2226 98
29a837fd 99 SCM string_port = scm_open_output_string ();
ba1b2226
HWN
100 SCM string = SCM_EOL;
101 scm_simple_format (string_port,
272632a6 102 scm_from_locale_string ("~A:~S:~S: ~A"),
29a837fd 103 scm_list_4 (fn,
b3aa4626 104 scm_from_long (SCM_LINUM (port) + 1),
b9bd8526 105 scm_from_int (SCM_COL (port) + 1),
272632a6 106 scm_from_locale_string (message)));
ba1b2226
HWN
107
108 string = scm_get_output_string (string_port);
109 scm_close_output_port (string_port);
272632a6 110 scm_error_scm (scm_from_locale_symbol ("read-error"),
a4022e69 111 function? scm_from_locale_string (function) : SCM_BOOL_F,
ba1b2226 112 string,
dd72382c 113 arg,
ba1b2226
HWN
114 SCM_BOOL_F);
115}
39e8f371
HWN
116
117
a1ec6916 118SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
1bbd0b84 119 (SCM setting),
dc7fa443
MG
120 "Option interface for the read options. Instead of using\n"
121 "this procedure directly, use the procedures @code{read-enable},\n"
3939e9df 122 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
1bbd0b84 123#define FUNC_NAME s_scm_read_options
a16f6fe7 124{
b7ff98dd
MD
125 SCM ans = scm_options (setting,
126 scm_read_opts,
1bbd0b84 127 FUNC_NAME);
b7ff98dd
MD
128 if (SCM_COPY_SOURCE_P)
129 SCM_RECORD_POSITIONS_P = 1;
a16f6fe7
MD
130 return ans;
131}
1bbd0b84 132#undef FUNC_NAME
a16f6fe7 133
14de3b42
GH
134/* An association list mapping extra hash characters to procedures. */
135static SCM *scm_read_hash_procedures;
deca31e1 136
0f2d19dd 137
7337d56d
LC
138\f
139/* Token readers. */
0f2d19dd 140
0f2d19dd 141
7337d56d
LC
142/* Size of the C buffer used to read symbols and numbers. */
143#define READER_BUFFER_SIZE 128
0f2d19dd 144
7337d56d
LC
145/* Size of the C buffer used to read strings. */
146#define READER_STRING_BUFFER_SIZE 512
0f2d19dd 147
7337d56d
LC
148/* The maximum size of Scheme character names. */
149#define READER_CHAR_NAME_MAX_SIZE 50
1cc91f1b 150
94115ae3 151
7337d56d
LC
152/* `isblank' is only in C99. */
153#define CHAR_IS_BLANK_(_chr) \
154 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
d41668fa 155 || ((_chr) == '\f') || ((_chr) == '\r'))
7337d56d
LC
156
157#ifdef MSDOS
158# define CHAR_IS_BLANK(_chr) \
159 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
160#else
161# define CHAR_IS_BLANK CHAR_IS_BLANK_
162#endif
163
164
165/* R5RS one-character delimiters (see section 7.1.1, ``Lexical
166 structure''). */
167#define CHAR_IS_R5RS_DELIMITER(c) \
168 (CHAR_IS_BLANK (c) \
169 || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
170
171#define CHAR_IS_DELIMITER CHAR_IS_R5RS_DELIMITER
172
173/* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
174 Structure''. */
175#define CHAR_IS_EXPONENT_MARKER(_chr) \
176 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
177 || ((_chr) == 'd') || ((_chr) == 'l'))
178
179/* An inlinable version of `scm_c_downcase ()'. */
180#define CHAR_DOWNCASE(_chr) \
181 (((_chr) <= UCHAR_MAX) ? tolower (_chr) : (_chr))
182
183
454866e0
LC
184/* Read an SCSH block comment. */
185static inline SCM scm_read_scsh_block_comment (int chr, SCM port);
34f3d47d 186static SCM scm_read_commented_expression (int chr, SCM port);
454866e0 187
d41668fa
LC
188/* Read from PORT until a delimiter (e.g., a whitespace) is read. Return
189 zero if the whole token fits in BUF, non-zero otherwise. */
7337d56d
LC
190static inline int
191read_token (SCM port, char *buf, size_t buf_size, size_t *read)
0520c320 192{
7337d56d 193 *read = 0;
0520c320 194
7337d56d 195 while (*read < buf_size)
0520c320 196 {
7337d56d 197 int chr;
0520c320 198
7337d56d
LC
199 chr = scm_getc (port);
200 chr = (SCM_CASE_INSENSITIVE_P ? CHAR_DOWNCASE (chr) : chr);
201
202 if (chr == EOF)
203 return 0;
204 else if (CHAR_IS_DELIMITER (chr))
205 {
206 scm_ungetc (chr, port);
207 return 0;
208 }
0520c320 209 else
7337d56d
LC
210 {
211 *buf = (char) chr;
212 buf++, (*read)++;
213 }
0520c320 214 }
7337d56d
LC
215
216 return 1;
0520c320 217}
1cc91f1b 218
7337d56d
LC
219
220/* Skip whitespace from PORT and return the first non-whitespace character
221 read. Raise an error on end-of-file. */
222static int
223flush_ws (SCM port, const char *eoferr)
0f2d19dd
JB
224{
225 register int c;
226 while (1)
b7f3516f 227 switch (c = scm_getc (port))
0f2d19dd
JB
228 {
229 case EOF:
230 goteof:
231 if (eoferr)
d156d3b7 232 {
a4022e69
MV
233 scm_i_input_error (eoferr,
234 port,
235 "end of file",
236 SCM_EOL);
d156d3b7 237 }
0f2d19dd 238 return c;
7337d56d 239
0f2d19dd
JB
240 case ';':
241 lp:
b7f3516f 242 switch (c = scm_getc (port))
0f2d19dd
JB
243 {
244 case EOF:
245 goto goteof;
246 default:
247 goto lp;
248 case SCM_LINE_INCREMENTORS:
249 break;
250 }
251 break;
7337d56d 252
454866e0
LC
253 case '#':
254 switch (c = scm_getc (port))
255 {
256 case EOF:
257 eoferr = "read_sharp";
258 goto goteof;
259 case '!':
260 scm_read_scsh_block_comment (c, port);
261 break;
34f3d47d
AW
262 case ';':
263 scm_read_commented_expression (c, port);
264 break;
454866e0
LC
265 default:
266 scm_ungetc (c, port);
267 return '#';
268 }
269 break;
270
0f2d19dd 271 case SCM_LINE_INCREMENTORS:
0f2d19dd 272 case SCM_SINGLE_SPACES:
0f2d19dd 273 case '\t':
0f2d19dd 274 break;
7337d56d 275
0f2d19dd
JB
276 default:
277 return c;
278 }
7337d56d
LC
279
280 return 0;
0f2d19dd
JB
281}
282
283
7337d56d
LC
284\f
285/* Token readers. */
1cc91f1b 286
7337d56d
LC
287static SCM scm_read_expression (SCM port);
288static SCM scm_read_sharp (int chr, SCM port);
289static SCM scm_get_hash_procedure (int c);
290static SCM recsexpr (SCM obj, long line, int column, SCM filename);
0f2d19dd
JB
291
292
09a4f039 293static SCM
7337d56d
LC
294scm_read_sexp (int chr, SCM port)
295#define FUNC_NAME "scm_i_lreadparen"
09a4f039 296{
7337d56d
LC
297 register int c;
298 register SCM tmp;
299 register SCM tl, ans = SCM_EOL;
bd22f1c7 300 SCM tl2 = SCM_EOL, ans2 = SCM_EOL, copy = SCM_BOOL_F;
7337d56d
LC
301 static const int terminating_char = ')';
302
303 /* Need to capture line and column numbers here. */
304 long line = SCM_LINUM (port);
305 int column = SCM_COL (port) - 1;
f9c68a47 306
f9c68a47 307
7337d56d
LC
308 c = flush_ws (port, FUNC_NAME);
309 if (terminating_char == c)
310 return SCM_EOL;
f9c68a47 311
7337d56d
LC
312 scm_ungetc (c, port);
313 if (scm_is_eq (scm_sym_dot,
314 (tmp = scm_read_expression (port))))
315 {
316 ans = scm_read_expression (port);
317 if (terminating_char != (c = flush_ws (port, FUNC_NAME)))
318 scm_i_input_error (FUNC_NAME, port, "missing close paren",
319 SCM_EOL);
320 return ans;
321 }
1cc91f1b 322
7337d56d
LC
323 /* Build the head of the list structure. */
324 ans = tl = scm_cons (tmp, SCM_EOL);
325
326 if (SCM_COPY_SOURCE_P)
327 ans2 = tl2 = scm_cons (scm_is_pair (tmp)
328 ? copy
329 : tmp,
330 SCM_EOL);
331
332 while (terminating_char != (c = flush_ws (port, FUNC_NAME)))
0f2d19dd 333 {
7337d56d 334 SCM new_tail;
0f2d19dd 335
7337d56d
LC
336 scm_ungetc (c, port);
337 if (scm_is_eq (scm_sym_dot,
338 (tmp = scm_read_expression (port))))
0f2d19dd 339 {
7337d56d
LC
340 SCM_SETCDR (tl, tmp = scm_read_expression (port));
341
342 if (SCM_COPY_SOURCE_P)
343 SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp) ? copy : tmp,
344 SCM_EOL));
345
346 c = flush_ws (port, FUNC_NAME);
347 if (terminating_char != c)
348 scm_i_input_error (FUNC_NAME, port,
349 "in pair: missing close paren", SCM_EOL);
350 goto exit;
0f2d19dd 351 }
b858464a 352
7337d56d
LC
353 new_tail = scm_cons (tmp, SCM_EOL);
354 SCM_SETCDR (tl, new_tail);
355 tl = new_tail;
356
357 if (SCM_COPY_SOURCE_P)
0f2d19dd 358 {
7337d56d
LC
359 SCM new_tail2 = scm_cons (scm_is_pair (tmp)
360 ? copy
361 : tmp, SCM_EOL);
362 SCM_SETCDR (tl2, new_tail2);
363 tl2 = new_tail2;
364 }
365 }
0f2d19dd 366
7337d56d
LC
367 exit:
368 if (SCM_RECORD_POSITIONS_P)
369 scm_whash_insert (scm_source_whash,
370 ans,
371 scm_make_srcprops (line, column,
372 SCM_FILENAME (port),
373 SCM_COPY_SOURCE_P
374 ? ans2
375 : SCM_UNDEFINED,
376 SCM_EOL));
377 return ans;
378}
379#undef FUNC_NAME
a4022e69 380
7337d56d
LC
381static SCM
382scm_read_string (int chr, SCM port)
383#define FUNC_NAME "scm_lreadr"
384{
385 /* For strings smaller than C_STR, this function creates only one Scheme
386 object (the string returned). */
0f2d19dd 387
7337d56d
LC
388 SCM str = SCM_BOOL_F;
389 char c_str[READER_STRING_BUFFER_SIZE];
390 unsigned c_str_len = 0;
391 int c;
eb42ff25 392
7337d56d
LC
393 while ('"' != (c = scm_getc (port)))
394 {
395 if (c == EOF)
396 str_eof: scm_i_input_error (FUNC_NAME, port,
397 "end of file in string constant",
398 SCM_EOL);
0f2d19dd 399
7337d56d
LC
400 if (c_str_len + 1 >= sizeof (c_str))
401 {
402 /* Flush the C buffer onto a Scheme string. */
403 SCM addy;
deca31e1 404
7337d56d
LC
405 if (str == SCM_BOOL_F)
406 str = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
0f2d19dd 407
7337d56d
LC
408 addy = scm_from_locale_stringn (c_str, c_str_len);
409 str = scm_string_append_shared (scm_list_2 (str, addy));
0f2d19dd 410
7337d56d
LC
411 c_str_len = 0;
412 }
0f2d19dd 413
7337d56d
LC
414 if (c == '\\')
415 switch (c = scm_getc (port))
416 {
417 case EOF:
418 goto str_eof;
419 case '"':
420 case '\\':
421 break;
16353acc 422#if SCM_ENABLE_ELISP
7337d56d
LC
423 case '(':
424 case ')':
425 if (SCM_ESCAPED_PARENS_P)
426 break;
427 goto bad_escaped;
16353acc 428#endif
7337d56d
LC
429 case '\n':
430 continue;
431 case '0':
432 c = '\0';
433 break;
434 case 'f':
435 c = '\f';
436 break;
437 case 'n':
438 c = '\n';
439 break;
440 case 'r':
441 c = '\r';
442 break;
443 case 't':
444 c = '\t';
445 break;
446 case 'a':
447 c = '\007';
448 break;
449 case 'v':
450 c = '\v';
451 break;
452 case 'x':
3c9a524f 453 {
7337d56d
LC
454 int a, b;
455 a = scm_getc (port);
456 if (a == EOF) goto str_eof;
457 b = scm_getc (port);
458 if (b == EOF) goto str_eof;
459 if ('0' <= a && a <= '9') a -= '0';
460 else if ('A' <= a && a <= 'F') a = a - 'A' + 10;
461 else if ('a' <= a && a <= 'f') a = a - 'a' + 10;
462 else goto bad_escaped;
463 if ('0' <= b && b <= '9') b -= '0';
464 else if ('A' <= b && b <= 'F') b = b - 'A' + 10;
465 else if ('a' <= b && b <= 'f') b = b - 'a' + 10;
466 else goto bad_escaped;
467 c = a * 16 + b;
468 break;
3c9a524f 469 }
7337d56d
LC
470 default:
471 bad_escaped:
472 scm_i_input_error (FUNC_NAME, port,
473 "illegal character in escape sequence: ~S",
474 scm_list_1 (SCM_MAKE_CHAR (c)));
475 }
476 c_str[c_str_len++] = c;
477 }
f13b4400 478
7337d56d
LC
479 if (c_str_len > 0)
480 {
481 SCM addy;
0f2d19dd 482
7337d56d
LC
483 addy = scm_from_locale_stringn (c_str, c_str_len);
484 if (str == SCM_BOOL_F)
485 str = addy;
486 else
487 str = scm_string_append_shared (scm_list_2 (str, addy));
0f2d19dd 488 }
7337d56d
LC
489 else
490 str = (str == SCM_BOOL_F) ? scm_nullstr : str;
491
45a9f430 492 return str;
0f2d19dd 493}
db4b4ca6
DH
494#undef FUNC_NAME
495
0f2d19dd 496
7337d56d
LC
497static SCM
498scm_read_number (int chr, SCM port)
0f2d19dd 499{
7337d56d
LC
500 SCM result, str = SCM_EOL;
501 char buffer[READER_BUFFER_SIZE];
502 size_t read;
503 int overflow = 0;
0f2d19dd 504
7337d56d
LC
505 scm_ungetc (chr, port);
506 do
0f2d19dd 507 {
7337d56d
LC
508 overflow = read_token (port, buffer, sizeof (buffer), &read);
509
510 if ((overflow) || (scm_is_pair (str)))
511 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
0f2d19dd 512 }
7337d56d 513 while (overflow);
0f2d19dd 514
7337d56d 515 if (scm_is_pair (str))
0f2d19dd 516 {
7337d56d 517 /* The slow path. */
0f2d19dd 518
7337d56d
LC
519 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
520 result = scm_string_to_number (str, SCM_UNDEFINED);
521 if (!scm_is_true (result))
522 /* Return a symbol instead of a number. */
523 result = scm_string_to_symbol (str);
524 }
525 else
526 {
527 result = scm_c_locale_stringn_to_number (buffer, read, 10);
528 if (!scm_is_true (result))
529 /* Return a symbol instead of a number. */
530 result = scm_from_locale_symboln (buffer, read);
531 }
0f2d19dd 532
7337d56d
LC
533 return result;
534}
0f2d19dd 535
7337d56d
LC
536static SCM
537scm_read_mixed_case_symbol (int chr, SCM port)
538{
539 SCM result, str = SCM_EOL;
ef4cbc08 540 int overflow = 0, ends_with_colon = 0;
7337d56d
LC
541 char buffer[READER_BUFFER_SIZE];
542 size_t read = 0;
ef4cbc08 543 int postfix = scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_postfix);
7337d56d
LC
544
545 scm_ungetc (chr, port);
546 do
547 {
548 overflow = read_token (port, buffer, sizeof (buffer), &read);
549
ef4cbc08
LC
550 if (read > 0)
551 ends_with_colon = (buffer[read - 1] == ':');
552
7337d56d
LC
553 if ((overflow) || (scm_is_pair (str)))
554 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
555 }
556 while (overflow);
557
558 if (scm_is_pair (str))
559 {
5d660052
MG
560 size_t len;
561
7337d56d 562 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
5d660052 563 len = scm_c_string_length (str);
ef4cbc08
LC
564
565 /* Per SRFI-88, `:' alone is an identifier, not a keyword. */
5d660052
MG
566 if (postfix && ends_with_colon && (len > 1))
567 {
568 /* Strip off colon. */
569 str = scm_c_substring (str, 0, len-1);
570 result = scm_string_to_symbol (str);
571 result = scm_symbol_to_keyword (result);
572 }
573 else
574 result = scm_string_to_symbol (str);
7337d56d
LC
575 }
576 else
ef4cbc08
LC
577 {
578 /* For symbols smaller than `sizeof (buffer)', we don't need to recur
579 to Scheme strings. Therefore, we only create one Scheme object (a
580 symbol) per symbol read. */
581 if (postfix && ends_with_colon && (read > 1))
582 result = scm_from_locale_keywordn (buffer, read - 1);
583 else
584 result = scm_from_locale_symboln (buffer, read);
585 }
7337d56d
LC
586
587 return result;
588}
589
590static SCM
591scm_read_number_and_radix (int chr, SCM port)
592#define FUNC_NAME "scm_lreadr"
593{
594 SCM result, str = SCM_EOL;
595 size_t read;
596 char buffer[READER_BUFFER_SIZE];
597 unsigned int radix;
598 int overflow = 0;
599
600 switch (chr)
601 {
602 case 'B':
603 case 'b':
604 radix = 2;
605 break;
606
607 case 'o':
608 case 'O':
609 radix = 8;
610 break;
611
612 case 'd':
613 case 'D':
614 radix = 10;
615 break;
616
617 case 'x':
618 case 'X':
619 radix = 16;
620 break;
621
622 default:
623 scm_ungetc (chr, port);
624 scm_ungetc ('#', port);
625 radix = 10;
626 }
627
628 do
629 {
630 overflow = read_token (port, buffer, sizeof (buffer), &read);
631
632 if ((overflow) || (scm_is_pair (str)))
633 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
634 }
635 while (overflow);
636
637 if (scm_is_pair (str))
638 {
639 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
640 result = scm_string_to_number (str, scm_from_uint (radix));
641 }
642 else
643 result = scm_c_locale_stringn_to_number (buffer, read, radix);
644
645 if (scm_is_true (result))
646 return result;
647
648 scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
649
650 return SCM_BOOL_F;
651}
652#undef FUNC_NAME
653
654static SCM
655scm_read_quote (int chr, SCM port)
656{
657 SCM p;
492faee1
LC
658 long line = SCM_LINUM (port);
659 int column = SCM_COL (port) - 1;
7337d56d
LC
660
661 switch (chr)
662 {
663 case '`':
664 p = scm_sym_quasiquote;
665 break;
666
667 case '\'':
668 p = scm_sym_quote;
669 break;
670
671 case ',':
672 {
673 int c;
674
675 c = scm_getc (port);
676 if ('@' == c)
677 p = scm_sym_uq_splicing;
678 else
0f2d19dd 679 {
7337d56d
LC
680 scm_ungetc (c, port);
681 p = scm_sym_unquote;
0f2d19dd 682 }
7337d56d
LC
683 break;
684 }
0f2d19dd 685
7337d56d
LC
686 default:
687 fprintf (stderr, "%s: unhandled quote character (%i)\n",
7f74cf9a 688 "scm_read_quote", chr);
7337d56d 689 abort ();
0f2d19dd 690 }
1cc91f1b 691
7337d56d 692 p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
492faee1
LC
693 if (SCM_RECORD_POSITIONS_P)
694 scm_whash_insert (scm_source_whash, p,
695 scm_make_srcprops (line, column,
696 SCM_FILENAME (port),
697 SCM_COPY_SOURCE_P
698 ? (scm_cons2 (SCM_CAR (p),
699 SCM_CAR (SCM_CDR (p)),
700 SCM_EOL))
701 : SCM_UNDEFINED,
702 SCM_EOL));
703
0f2d19dd 704
7337d56d
LC
705 return p;
706}
707
34f3d47d
AW
708SCM_SYMBOL (sym_syntax, "syntax");
709SCM_SYMBOL (sym_quasisyntax, "quasisyntax");
710SCM_SYMBOL (sym_unsyntax, "unsyntax");
711SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing");
712
713static SCM
714scm_read_syntax (int chr, SCM port)
715{
716 SCM p;
717 long line = SCM_LINUM (port);
718 int column = SCM_COL (port) - 1;
719
720 switch (chr)
721 {
722 case '`':
723 p = sym_quasisyntax;
724 break;
725
726 case '\'':
727 p = sym_syntax;
728 break;
729
730 case ',':
731 {
732 int c;
733
734 c = scm_getc (port);
735 if ('@' == c)
736 p = sym_unsyntax_splicing;
737 else
738 {
739 scm_ungetc (c, port);
740 p = sym_unsyntax;
741 }
742 break;
743 }
744
745 default:
746 fprintf (stderr, "%s: unhandled syntax character (%i)\n",
747 "scm_read_syntax", chr);
748 abort ();
749 }
750
751 p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
752 if (SCM_RECORD_POSITIONS_P)
753 scm_whash_insert (scm_source_whash, p,
754 scm_make_srcprops (line, column,
755 SCM_FILENAME (port),
756 SCM_COPY_SOURCE_P
757 ? (scm_cons2 (SCM_CAR (p),
758 SCM_CAR (SCM_CDR (p)),
759 SCM_EOL))
760 : SCM_UNDEFINED,
761 SCM_EOL));
762
763
764 return p;
765}
766
7337d56d
LC
767static inline SCM
768scm_read_semicolon_comment (int chr, SCM port)
0f2d19dd 769{
0f2d19dd
JB
770 int c;
771
7337d56d
LC
772 for (c = scm_getc (port);
773 (c != EOF) && (c != '\n');
774 c = scm_getc (port));
775
776 return SCM_UNSPECIFIED;
777}
778
779\f
780/* Sharp readers, i.e. readers called after a `#' sign has been read. */
781
782static SCM
783scm_read_boolean (int chr, SCM port)
784{
785 switch (chr)
0f2d19dd 786 {
7337d56d
LC
787 case 't':
788 case 'T':
789 return SCM_BOOL_T;
790
791 case 'f':
792 case 'F':
793 return SCM_BOOL_F;
0f2d19dd 794 }
7337d56d
LC
795
796 return SCM_UNSPECIFIED;
797}
798
799static SCM
800scm_read_character (int chr, SCM port)
801#define FUNC_NAME "scm_lreadr"
802{
803 unsigned c;
804 char charname[READER_CHAR_NAME_MAX_SIZE];
805 size_t charname_len;
806
807 if (read_token (port, charname, sizeof (charname), &charname_len))
808 goto char_error;
809
810 if (charname_len == 0)
0f2d19dd 811 {
7337d56d
LC
812 chr = scm_getc (port);
813 if (chr == EOF)
814 scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
815 "while reading character", SCM_EOL);
816
817 /* CHR must be a token delimiter, like a whitespace. */
818 return (SCM_MAKE_CHAR (chr));
0f2d19dd 819 }
7337d56d
LC
820
821 if (charname_len == 1)
822 return SCM_MAKE_CHAR (charname[0]);
823
824 if (*charname >= '0' && *charname < '8')
825 {
826 /* Dirk:FIXME:: This type of character syntax is not R5RS
827 * compliant. Further, it should be verified that the constant
828 * does only consist of octal digits. Finally, it should be
829 * checked whether the resulting fixnum is in the range of
830 * characters. */
831 SCM p = scm_c_locale_stringn_to_number (charname, charname_len, 8);
832 if (SCM_I_INUMP (p))
833 return SCM_MAKE_CHAR (SCM_I_INUM (p));
834 }
835
836 for (c = 0; c < scm_n_charnames; c++)
837 if (scm_charnames[c]
838 && (!strncasecmp (scm_charnames[c], charname, charname_len)))
839 return SCM_MAKE_CHAR (scm_charnums[c]);
840
841 char_error:
842 scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
843 scm_list_1 (scm_from_locale_stringn (charname,
844 charname_len)));
845
846 return SCM_UNSPECIFIED;
0f2d19dd 847}
db4b4ca6 848#undef FUNC_NAME
0f2d19dd 849
7337d56d
LC
850static inline SCM
851scm_read_keyword (int chr, SCM port)
852{
853 SCM symbol;
854
855 /* Read the symbol that comprises the keyword. Doing this instead of
856 invoking a specific symbol reader function allows `scm_read_keyword ()'
857 to adapt to the delimiters currently valid of symbols.
1cc91f1b 858
7337d56d
LC
859 XXX: This implementation allows sloppy syntaxes like `#: key'. */
860 symbol = scm_read_expression (port);
861 if (!scm_is_symbol (symbol))
7f74cf9a 862 scm_i_input_error ("scm_read_keyword", port,
7337d56d
LC
863 "keyword prefix `~a' not followed by a symbol: ~s",
864 scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
865
866 return (scm_symbol_to_keyword (symbol));
867}
868
869static inline SCM
870scm_read_vector (int chr, SCM port)
09a4f039 871{
7337d56d
LC
872 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
873 guarantee that it's going to do what we want. After all, this is an
874 implementation detail of `scm_read_vector ()', not a desirable
875 property. */
876 return (scm_vector (scm_read_sexp (chr, port)));
877}
09a4f039 878
7337d56d
LC
879static inline SCM
880scm_read_srfi4_vector (int chr, SCM port)
881{
882 return scm_i_read_array (port, chr);
883}
884
885static SCM
886scm_read_guile_bit_vector (int chr, SCM port)
887{
888 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
889 terribly inefficient but who cares? */
890 SCM s_bits = SCM_EOL;
891
892 for (chr = scm_getc (port);
893 (chr != EOF) && ((chr == '0') || (chr == '1'));
894 chr = scm_getc (port))
09a4f039 895 {
7337d56d 896 s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
09a4f039 897 }
7337d56d
LC
898
899 if (chr != EOF)
900 scm_ungetc (chr, port);
901
902 return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
903}
904
905static inline SCM
906scm_read_scsh_block_comment (int chr, SCM port)
907{
908 int bang_seen = 0;
909
910 for (;;)
09a4f039 911 {
7337d56d 912 int c = scm_getc (port);
62850ef3 913
7337d56d
LC
914 if (c == EOF)
915 scm_i_input_error ("skip_block_comment", port,
916 "unterminated `#! ... !#' comment", SCM_EOL);
917
918 if (c == '!')
919 bang_seen = 1;
920 else if (c == '#' && bang_seen)
921 break;
922 else
923 bang_seen = 0;
924 }
925
926 return SCM_UNSPECIFIED;
927}
928
34f3d47d
AW
929static SCM
930scm_read_commented_expression (int chr, SCM port)
931{
932 int c;
933
934 c = flush_ws (port, (char *) NULL);
935 if (EOF == c)
936 scm_i_input_error ("read_commented_expression", port,
937 "no expression after #; comment", SCM_EOL);
938 scm_ungetc (c, port);
939 scm_read_expression (port);
940 return SCM_UNSPECIFIED;
941}
942
7337d56d
LC
943static SCM
944scm_read_extended_symbol (int chr, SCM port)
945{
946 /* Guile's extended symbol read syntax looks like this:
947
948 #{This is all a symbol name}#
949
950 So here, CHR is expected to be `{'. */
951 SCM result;
952 int saw_brace = 0, finished = 0;
953 size_t len = 0;
954 char buf[1024];
955
956 result = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
957
958 while ((chr = scm_getc (port)) != EOF)
959 {
960 if (saw_brace)
09a4f039 961 {
7337d56d
LC
962 if (chr == '#')
963 {
964 finished = 1;
965 break;
966 }
967 else
968 {
969 saw_brace = 0;
970 buf[len++] = '}';
971 buf[len++] = chr;
972 }
09a4f039 973 }
7337d56d
LC
974 else if (chr == '}')
975 saw_brace = 1;
976 else
977 buf[len++] = chr;
62850ef3 978
7337d56d
LC
979 if (len >= sizeof (buf) - 2)
980 {
981 scm_string_append (scm_list_2 (result,
982 scm_from_locale_stringn (buf, len)));
983 len = 0;
984 }
62850ef3 985
7337d56d
LC
986 if (finished)
987 break;
988 }
989
990 if (len)
991 result = scm_string_append (scm_list_2
992 (result,
993 scm_from_locale_stringn (buf, len)));
994
995 return (scm_string_to_symbol (result));
996}
997
998
999\f
1000/* Top-level token readers, i.e., dispatchers. */
1001
1002static SCM
1003scm_read_sharp_extension (int chr, SCM port)
1004{
1005 SCM proc;
1006
1007 proc = scm_get_hash_procedure (chr);
1008 if (scm_is_true (scm_procedure_p (proc)))
1009 {
1010 long line = SCM_LINUM (port);
1011 int column = SCM_COL (port) - 2;
1012 SCM got;
1013
1014 got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
1015 if (!scm_is_eq (got, SCM_UNSPECIFIED))
62850ef3 1016 {
7337d56d
LC
1017 if (SCM_RECORD_POSITIONS_P)
1018 return (recsexpr (got, line, column,
1019 SCM_FILENAME (port)));
1020 else
1021 return got;
62850ef3 1022 }
09a4f039 1023 }
7337d56d
LC
1024
1025 return SCM_UNSPECIFIED;
1026}
1027
1028/* The reader for the sharp `#' character. It basically dispatches reads
1029 among the above token readers. */
1030static SCM
1031scm_read_sharp (int chr, SCM port)
1032#define FUNC_NAME "scm_lreadr"
1033{
1034 SCM result;
1035
1036 chr = scm_getc (port);
1037
1038 result = scm_read_sharp_extension (chr, port);
1039 if (!scm_is_eq (result, SCM_UNSPECIFIED))
1040 return result;
1041
1042 switch (chr)
1043 {
1044 case '\\':
1045 return (scm_read_character (chr, port));
1046 case '(':
1047 return (scm_read_vector (chr, port));
1048 case 's':
1049 case 'u':
1050 case 'f':
1051 /* This one may return either a boolean or an SRFI-4 vector. */
1052 return (scm_read_srfi4_vector (chr, port));
1053 case '*':
1054 return (scm_read_guile_bit_vector (chr, port));
1055 case 't':
1056 case 'T':
1057 case 'F':
1058 /* This one may return either a boolean or an SRFI-4 vector. */
1059 return (scm_read_boolean (chr, port));
1060 case ':':
1061 return (scm_read_keyword (chr, port));
1062 case '0': case '1': case '2': case '3': case '4':
1063 case '5': case '6': case '7': case '8': case '9':
1064 case '@':
1065#if SCM_ENABLE_DEPRECATED
1066 /* See below for 'i' and 'e'. */
1067 case 'a':
1068 case 'c':
1069 case 'y':
1070 case 'h':
1071 case 'l':
1072#endif
1073 return (scm_i_read_array (port, chr));
1074
1075 case 'i':
1076 case 'e':
1077#if SCM_ENABLE_DEPRECATED
1078 {
1079 /* When next char is '(', it really is an old-style
1080 uniform array. */
1081 int next_c = scm_getc (port);
1082 if (next_c != EOF)
1083 scm_ungetc (next_c, port);
1084 if (next_c == '(')
1085 return scm_i_read_array (port, chr);
1086 /* Fall through. */
1087 }
1088#endif
1089 case 'b':
1090 case 'B':
1091 case 'o':
1092 case 'O':
1093 case 'd':
1094 case 'D':
1095 case 'x':
1096 case 'X':
1097 case 'I':
1098 case 'E':
1099 return (scm_read_number_and_radix (chr, port));
1100 case '{':
1101 return (scm_read_extended_symbol (chr, port));
1102 case '!':
1103 return (scm_read_scsh_block_comment (chr, port));
34f3d47d
AW
1104 case ';':
1105 return (scm_read_commented_expression (chr, port));
1106 case '`':
1107 case '\'':
1108 case ',':
1109 return (scm_read_syntax (chr, port));
7337d56d
LC
1110 default:
1111 result = scm_read_sharp_extension (chr, port);
1112 if (scm_is_eq (result, SCM_UNSPECIFIED))
1113 scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
1114 scm_list_1 (SCM_MAKE_CHAR (chr)));
1115 else
1116 return result;
1117 }
1118
1119 return SCM_UNSPECIFIED;
1120}
1121#undef FUNC_NAME
1122
1123static SCM
1124scm_read_expression (SCM port)
1125#define FUNC_NAME "scm_read_expression"
1126{
1127 while (1)
1128 {
1129 register int chr;
1130
1131 chr = scm_getc (port);
1132
1133 switch (chr)
1134 {
1135 case SCM_WHITE_SPACES:
1136 case SCM_LINE_INCREMENTORS:
1137 break;
1138 case ';':
1139 (void) scm_read_semicolon_comment (chr, port);
1140 break;
1141 case '(':
1142 return (scm_read_sexp (chr, port));
1143 case '"':
1144 return (scm_read_string (chr, port));
1145 case '\'':
1146 case '`':
1147 case ',':
1148 return (scm_read_quote (chr, port));
1149 case '#':
1150 {
1151 SCM result;
1152 result = scm_read_sharp (chr, port);
1153 if (scm_is_eq (result, SCM_UNSPECIFIED))
1154 /* We read a comment or some such. */
1155 break;
1156 else
1157 return result;
1158 }
1159 case ')':
1160 scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
1161 break;
1162 case EOF:
1163 return SCM_EOF_VAL;
1164 case ':':
1165 if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
1166 return scm_symbol_to_keyword (scm_read_expression (port));
1167 /* Fall through. */
1168
1169 default:
1170 {
1171 if (((chr >= '0') && (chr <= '9'))
1172 || (strchr ("+-.", chr)))
1173 return (scm_read_number (chr, port));
1174 else
1175 return (scm_read_mixed_case_symbol (chr, port));
1176 }
1177 }
1178 }
1179}
1180#undef FUNC_NAME
1181
1182\f
1183/* Actual reader. */
1184
1185SCM_DEFINE (scm_read, "read", 0, 1, 0,
1186 (SCM port),
1187 "Read an s-expression from the input port @var{port}, or from\n"
1188 "the current input port if @var{port} is not specified.\n"
1189 "Any whitespace before the next token is discarded.")
1190#define FUNC_NAME s_scm_read
1191{
1192 int c;
1193
1194 if (SCM_UNBNDP (port))
1195 port = scm_current_input_port ();
1196 SCM_VALIDATE_OPINPORT (1, port);
1197
1198 c = flush_ws (port, (char *) NULL);
1199 if (EOF == c)
1200 return SCM_EOF_VAL;
1201 scm_ungetc (c, port);
1202
1203 return (scm_read_expression (port));
09a4f039 1204}
db4b4ca6 1205#undef FUNC_NAME
09a4f039 1206
0f2d19dd
JB
1207
1208\f
1209
7337d56d
LC
1210/* Used when recording expressions constructed by `scm_read_sharp ()'. */
1211static SCM
1212recsexpr (SCM obj, long line, int column, SCM filename)
1213{
1214 if (!scm_is_pair(obj)) {
1215 return obj;
1216 } else {
1217 SCM tmp = obj, copy;
1218 /* If this sexpr is visible in the read:sharp source, we want to
1219 keep that information, so only record non-constant cons cells
1220 which haven't previously been read by the reader. */
1221 if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
1222 {
1223 if (SCM_COPY_SOURCE_P)
1224 {
1225 copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
1226 SCM_UNDEFINED);
1227 while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
1228 {
1229 SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
1230 line,
1231 column,
1232 filename),
1233 SCM_UNDEFINED));
1234 copy = SCM_CDR (copy);
1235 }
1236 SCM_SETCDR (copy, tmp);
1237 }
1238 else
1239 {
1240 recsexpr (SCM_CAR (obj), line, column, filename);
1241 while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
1242 recsexpr (SCM_CAR (tmp), line, column, filename);
1243 copy = SCM_UNDEFINED;
1244 }
1245 scm_whash_insert (scm_source_whash,
1246 obj,
1247 scm_make_srcprops (line,
1248 column,
1249 filename,
1250 copy,
1251 SCM_EOL));
1252 }
1253 return obj;
1254 }
1255}
1256
14de3b42
GH
1257/* Manipulate the read-hash-procedures alist. This could be written in
1258 Scheme, but maybe it will also be used by C code during initialisation. */
a1ec6916 1259SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
1bbd0b84 1260 (SCM chr, SCM proc),
dc7fa443
MG
1261 "Install the procedure @var{proc} for reading expressions\n"
1262 "starting with the character sequence @code{#} and @var{chr}.\n"
1263 "@var{proc} will be called with two arguments: the character\n"
1264 "@var{chr} and the port to read further data from. The object\n"
391f57e6
HWN
1265 "returned will be the return value of @code{read}. \n"
1266 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1267 )
1bbd0b84 1268#define FUNC_NAME s_scm_read_hash_extend
deca31e1 1269{
fed9c9a2
GH
1270 SCM this;
1271 SCM prev;
1272
36284627 1273 SCM_VALIDATE_CHAR (1, chr);
7888309b 1274 SCM_ASSERT (scm_is_false (proc)
bc36d050 1275 || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
36284627 1276 proc, SCM_ARG2, FUNC_NAME);
fed9c9a2 1277
14de3b42
GH
1278 /* Check if chr is already in the alist. */
1279 this = *scm_read_hash_procedures;
1280 prev = SCM_BOOL_F;
fed9c9a2
GH
1281 while (1)
1282 {
d2e53ed6 1283 if (scm_is_null (this))
fed9c9a2
GH
1284 {
1285 /* not found, so add it to the beginning. */
7888309b 1286 if (scm_is_true (proc))
fed9c9a2 1287 {
14de3b42
GH
1288 *scm_read_hash_procedures =
1289 scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
fed9c9a2
GH
1290 }
1291 break;
1292 }
bc36d050 1293 if (scm_is_eq (chr, SCM_CAAR (this)))
fed9c9a2
GH
1294 {
1295 /* already in the alist. */
7888309b 1296 if (scm_is_false (proc))
14de3b42
GH
1297 {
1298 /* remove it. */
7888309b 1299 if (scm_is_false (prev))
14de3b42
GH
1300 {
1301 *scm_read_hash_procedures =
1302 SCM_CDR (*scm_read_hash_procedures);
1303 }
1304 else
1305 scm_set_cdr_x (prev, SCM_CDR (this));
1306 }
fed9c9a2 1307 else
14de3b42
GH
1308 {
1309 /* replace it. */
1310 scm_set_cdr_x (SCM_CAR (this), proc);
1311 }
fed9c9a2
GH
1312 break;
1313 }
1314 prev = this;
1315 this = SCM_CDR (this);
1316 }
deca31e1 1317
deca31e1
GH
1318 return SCM_UNSPECIFIED;
1319}
1bbd0b84 1320#undef FUNC_NAME
0f2d19dd 1321
deca31e1
GH
1322/* Recover the read-hash procedure corresponding to char c. */
1323static SCM
6e8d25a6 1324scm_get_hash_procedure (int c)
deca31e1 1325{
14de3b42 1326 SCM rest = *scm_read_hash_procedures;
fed9c9a2 1327
deca31e1
GH
1328 while (1)
1329 {
d2e53ed6 1330 if (scm_is_null (rest))
deca31e1
GH
1331 return SCM_BOOL_F;
1332
7866a09b 1333 if (SCM_CHAR (SCM_CAAR (rest)) == c)
deca31e1
GH
1334 return SCM_CDAR (rest);
1335
1336 rest = SCM_CDR (rest);
1337 }
1338}
1cc91f1b 1339
0f2d19dd
JB
1340void
1341scm_init_read ()
0f2d19dd 1342{
14de3b42 1343 scm_read_hash_procedures =
86d31dfe 1344 SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));
fed9c9a2 1345
62560650 1346 scm_init_opts (scm_read_options, scm_read_opts);
a0599745 1347#include "libguile/read.x"
0f2d19dd 1348}
89e00824
ML
1349
1350/*
1351 Local Variables:
1352 c-file-style: "gnu"
1353 End:
1354*/