Implementation of SRFI-98 (An interface to access environment variables).
[bpt/guile.git] / libguile / read.c
1 /* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008 Free Software
2 * Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19
20 \f
21
22 #ifdef HAVE_CONFIG_H
23 # include <config.h>
24 #endif
25
26 #include <stdio.h>
27 #include <ctype.h>
28 #include <string.h>
29
30 #include "libguile/_scm.h"
31 #include "libguile/chars.h"
32 #include "libguile/eval.h"
33 #include "libguile/unif.h"
34 #include "libguile/keywords.h"
35 #include "libguile/alist.h"
36 #include "libguile/srcprop.h"
37 #include "libguile/hashtab.h"
38 #include "libguile/hash.h"
39 #include "libguile/ports.h"
40 #include "libguile/root.h"
41 #include "libguile/strings.h"
42 #include "libguile/strports.h"
43 #include "libguile/vectors.h"
44 #include "libguile/validate.h"
45 #include "libguile/srfi-4.h"
46 #include "libguile/srfi-13.h"
47
48 #include "libguile/read.h"
49 #include "libguile/private-options.h"
50
51
52 \f
53
54 SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
55 SCM_SYMBOL (scm_keyword_prefix, "prefix");
56 SCM_SYMBOL (scm_keyword_postfix, "postfix");
57
58 scm_t_option scm_read_opts[] = {
59 { SCM_OPTION_BOOLEAN, "copy", 0,
60 "Copy source code expressions." },
61 { SCM_OPTION_BOOLEAN, "positions", 0,
62 "Record positions of source code expressions." },
63 { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
64 "Convert symbols to lower case."},
65 { SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
66 "Style of keyword recognition: #f, 'prefix or 'postfix."},
67 #if SCM_ENABLE_ELISP
68 { SCM_OPTION_BOOLEAN, "elisp-vectors", 0,
69 "Support Elisp vector syntax, namely `[...]'."},
70 { SCM_OPTION_BOOLEAN, "elisp-strings", 0,
71 "Support `\\(' and `\\)' in strings."},
72 #endif
73 { 0, },
74 };
75
76 /*
77 Give meaningful error messages for errors
78
79 We use the format
80
81 FILE:LINE:COL: MESSAGE
82 This happened in ....
83
84 This is not standard GNU format, but the test-suite likes the real
85 message to be in front.
86
87 */
88
89
90 void
91 scm_i_input_error (char const *function,
92 SCM port, const char *message, SCM arg)
93 {
94 SCM fn = (scm_is_string (SCM_FILENAME(port))
95 ? SCM_FILENAME(port)
96 : scm_from_locale_string ("#<unknown port>"));
97
98 SCM string_port = scm_open_output_string ();
99 SCM string = SCM_EOL;
100 scm_simple_format (string_port,
101 scm_from_locale_string ("~A:~S:~S: ~A"),
102 scm_list_4 (fn,
103 scm_from_long (SCM_LINUM (port) + 1),
104 scm_from_int (SCM_COL (port) + 1),
105 scm_from_locale_string (message)));
106
107 string = scm_get_output_string (string_port);
108 scm_close_output_port (string_port);
109 scm_error_scm (scm_from_locale_symbol ("read-error"),
110 function? scm_from_locale_string (function) : SCM_BOOL_F,
111 string,
112 arg,
113 SCM_BOOL_F);
114 }
115
116
117 SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
118 (SCM setting),
119 "Option interface for the read options. Instead of using\n"
120 "this procedure directly, use the procedures @code{read-enable},\n"
121 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
122 #define FUNC_NAME s_scm_read_options
123 {
124 SCM ans = scm_options (setting,
125 scm_read_opts,
126 FUNC_NAME);
127 if (SCM_COPY_SOURCE_P)
128 SCM_RECORD_POSITIONS_P = 1;
129 return ans;
130 }
131 #undef FUNC_NAME
132
133 /* An association list mapping extra hash characters to procedures. */
134 static SCM *scm_read_hash_procedures;
135
136
137 \f
138 /* Token readers. */
139
140
141 /* Size of the C buffer used to read symbols and numbers. */
142 #define READER_BUFFER_SIZE 128
143
144 /* Size of the C buffer used to read strings. */
145 #define READER_STRING_BUFFER_SIZE 512
146
147 /* The maximum size of Scheme character names. */
148 #define READER_CHAR_NAME_MAX_SIZE 50
149
150
151 /* `isblank' is only in C99. */
152 #define CHAR_IS_BLANK_(_chr) \
153 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
154 || ((_chr) == '\f') || ((_chr) == '\r'))
155
156 #ifdef MSDOS
157 # define CHAR_IS_BLANK(_chr) \
158 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
159 #else
160 # define CHAR_IS_BLANK CHAR_IS_BLANK_
161 #endif
162
163
164 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
165 structure''). */
166 #define CHAR_IS_R5RS_DELIMITER(c) \
167 (CHAR_IS_BLANK (c) \
168 || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
169
170 #define CHAR_IS_DELIMITER CHAR_IS_R5RS_DELIMITER
171
172 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
173 Structure''. */
174 #define CHAR_IS_EXPONENT_MARKER(_chr) \
175 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
176 || ((_chr) == 'd') || ((_chr) == 'l'))
177
178 /* An inlinable version of `scm_c_downcase ()'. */
179 #define CHAR_DOWNCASE(_chr) \
180 (((_chr) <= UCHAR_MAX) ? tolower (_chr) : (_chr))
181
182
183 /* Read an SCSH block comment. */
184 static inline SCM scm_read_scsh_block_comment (int chr, SCM port);
185
186 /* Read from PORT until a delimiter (e.g., a whitespace) is read. Return
187 zero if the whole token fits in BUF, non-zero otherwise. */
188 static inline int
189 read_token (SCM port, char *buf, size_t buf_size, size_t *read)
190 {
191 *read = 0;
192
193 while (*read < buf_size)
194 {
195 int chr;
196
197 chr = scm_getc (port);
198 chr = (SCM_CASE_INSENSITIVE_P ? CHAR_DOWNCASE (chr) : chr);
199
200 if (chr == EOF)
201 return 0;
202 else if (CHAR_IS_DELIMITER (chr))
203 {
204 scm_ungetc (chr, port);
205 return 0;
206 }
207 else
208 {
209 *buf = (char) chr;
210 buf++, (*read)++;
211 }
212 }
213
214 return 1;
215 }
216
217
218 /* Skip whitespace from PORT and return the first non-whitespace character
219 read. Raise an error on end-of-file. */
220 static int
221 flush_ws (SCM port, const char *eoferr)
222 {
223 register int c;
224 while (1)
225 switch (c = scm_getc (port))
226 {
227 case EOF:
228 goteof:
229 if (eoferr)
230 {
231 scm_i_input_error (eoferr,
232 port,
233 "end of file",
234 SCM_EOL);
235 }
236 return c;
237
238 case ';':
239 lp:
240 switch (c = scm_getc (port))
241 {
242 case EOF:
243 goto goteof;
244 default:
245 goto lp;
246 case SCM_LINE_INCREMENTORS:
247 break;
248 }
249 break;
250
251 case '#':
252 switch (c = scm_getc (port))
253 {
254 case EOF:
255 eoferr = "read_sharp";
256 goto goteof;
257 case '!':
258 scm_read_scsh_block_comment (c, port);
259 break;
260 default:
261 scm_ungetc (c, port);
262 return '#';
263 }
264 break;
265
266 case SCM_LINE_INCREMENTORS:
267 case SCM_SINGLE_SPACES:
268 case '\t':
269 break;
270
271 default:
272 return c;
273 }
274
275 return 0;
276 }
277
278
279 \f
280 /* Token readers. */
281
282 static SCM scm_read_expression (SCM port);
283 static SCM scm_read_sharp (int chr, SCM port);
284 static SCM scm_get_hash_procedure (int c);
285 static SCM recsexpr (SCM obj, long line, int column, SCM filename);
286
287
288 static SCM
289 scm_read_sexp (int chr, SCM port)
290 #define FUNC_NAME "scm_i_lreadparen"
291 {
292 register int c;
293 register SCM tmp;
294 register SCM tl, ans = SCM_EOL;
295 SCM tl2 = SCM_EOL, ans2 = SCM_EOL, copy = SCM_BOOL_F;
296 static const int terminating_char = ')';
297
298 /* Need to capture line and column numbers here. */
299 long line = SCM_LINUM (port);
300 int column = SCM_COL (port) - 1;
301
302
303 c = flush_ws (port, FUNC_NAME);
304 if (terminating_char == c)
305 return SCM_EOL;
306
307 scm_ungetc (c, port);
308 if (scm_is_eq (scm_sym_dot,
309 (tmp = scm_read_expression (port))))
310 {
311 ans = scm_read_expression (port);
312 if (terminating_char != (c = flush_ws (port, FUNC_NAME)))
313 scm_i_input_error (FUNC_NAME, port, "missing close paren",
314 SCM_EOL);
315 return ans;
316 }
317
318 /* Build the head of the list structure. */
319 ans = tl = scm_cons (tmp, SCM_EOL);
320
321 if (SCM_COPY_SOURCE_P)
322 ans2 = tl2 = scm_cons (scm_is_pair (tmp)
323 ? copy
324 : tmp,
325 SCM_EOL);
326
327 while (terminating_char != (c = flush_ws (port, FUNC_NAME)))
328 {
329 SCM new_tail;
330
331 scm_ungetc (c, port);
332 if (scm_is_eq (scm_sym_dot,
333 (tmp = scm_read_expression (port))))
334 {
335 SCM_SETCDR (tl, tmp = scm_read_expression (port));
336
337 if (SCM_COPY_SOURCE_P)
338 SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp) ? copy : tmp,
339 SCM_EOL));
340
341 c = flush_ws (port, FUNC_NAME);
342 if (terminating_char != c)
343 scm_i_input_error (FUNC_NAME, port,
344 "in pair: missing close paren", SCM_EOL);
345 goto exit;
346 }
347
348 new_tail = scm_cons (tmp, SCM_EOL);
349 SCM_SETCDR (tl, new_tail);
350 tl = new_tail;
351
352 if (SCM_COPY_SOURCE_P)
353 {
354 SCM new_tail2 = scm_cons (scm_is_pair (tmp)
355 ? copy
356 : tmp, SCM_EOL);
357 SCM_SETCDR (tl2, new_tail2);
358 tl2 = new_tail2;
359 }
360 }
361
362 exit:
363 if (SCM_RECORD_POSITIONS_P)
364 scm_whash_insert (scm_source_whash,
365 ans,
366 scm_make_srcprops (line, column,
367 SCM_FILENAME (port),
368 SCM_COPY_SOURCE_P
369 ? ans2
370 : SCM_UNDEFINED,
371 SCM_EOL));
372 return ans;
373 }
374 #undef FUNC_NAME
375
376 static SCM
377 scm_read_string (int chr, SCM port)
378 #define FUNC_NAME "scm_lreadr"
379 {
380 /* For strings smaller than C_STR, this function creates only one Scheme
381 object (the string returned). */
382
383 SCM str = SCM_BOOL_F;
384 char c_str[READER_STRING_BUFFER_SIZE];
385 unsigned c_str_len = 0;
386 int c;
387
388 while ('"' != (c = scm_getc (port)))
389 {
390 if (c == EOF)
391 str_eof: scm_i_input_error (FUNC_NAME, port,
392 "end of file in string constant",
393 SCM_EOL);
394
395 if (c_str_len + 1 >= sizeof (c_str))
396 {
397 /* Flush the C buffer onto a Scheme string. */
398 SCM addy;
399
400 if (str == SCM_BOOL_F)
401 str = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
402
403 addy = scm_from_locale_stringn (c_str, c_str_len);
404 str = scm_string_append_shared (scm_list_2 (str, addy));
405
406 c_str_len = 0;
407 }
408
409 if (c == '\\')
410 switch (c = scm_getc (port))
411 {
412 case EOF:
413 goto str_eof;
414 case '"':
415 case '\\':
416 break;
417 #if SCM_ENABLE_ELISP
418 case '(':
419 case ')':
420 if (SCM_ESCAPED_PARENS_P)
421 break;
422 goto bad_escaped;
423 #endif
424 case '\n':
425 continue;
426 case '0':
427 c = '\0';
428 break;
429 case 'f':
430 c = '\f';
431 break;
432 case 'n':
433 c = '\n';
434 break;
435 case 'r':
436 c = '\r';
437 break;
438 case 't':
439 c = '\t';
440 break;
441 case 'a':
442 c = '\007';
443 break;
444 case 'v':
445 c = '\v';
446 break;
447 case 'x':
448 {
449 int a, b;
450 a = scm_getc (port);
451 if (a == EOF) goto str_eof;
452 b = scm_getc (port);
453 if (b == EOF) goto str_eof;
454 if ('0' <= a && a <= '9') a -= '0';
455 else if ('A' <= a && a <= 'F') a = a - 'A' + 10;
456 else if ('a' <= a && a <= 'f') a = a - 'a' + 10;
457 else goto bad_escaped;
458 if ('0' <= b && b <= '9') b -= '0';
459 else if ('A' <= b && b <= 'F') b = b - 'A' + 10;
460 else if ('a' <= b && b <= 'f') b = b - 'a' + 10;
461 else goto bad_escaped;
462 c = a * 16 + b;
463 break;
464 }
465 default:
466 bad_escaped:
467 scm_i_input_error (FUNC_NAME, port,
468 "illegal character in escape sequence: ~S",
469 scm_list_1 (SCM_MAKE_CHAR (c)));
470 }
471 c_str[c_str_len++] = c;
472 }
473
474 if (c_str_len > 0)
475 {
476 SCM addy;
477
478 addy = scm_from_locale_stringn (c_str, c_str_len);
479 if (str == SCM_BOOL_F)
480 str = addy;
481 else
482 str = scm_string_append_shared (scm_list_2 (str, addy));
483 }
484 else
485 str = (str == SCM_BOOL_F) ? scm_nullstr : str;
486
487 return str;
488 }
489 #undef FUNC_NAME
490
491
492 static SCM
493 scm_read_number (int chr, SCM port)
494 {
495 SCM result, str = SCM_EOL;
496 char buffer[READER_BUFFER_SIZE];
497 size_t read;
498 int overflow = 0;
499
500 scm_ungetc (chr, port);
501 do
502 {
503 overflow = read_token (port, buffer, sizeof (buffer), &read);
504
505 if ((overflow) || (scm_is_pair (str)))
506 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
507 }
508 while (overflow);
509
510 if (scm_is_pair (str))
511 {
512 /* The slow path. */
513
514 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
515 result = scm_string_to_number (str, SCM_UNDEFINED);
516 if (!scm_is_true (result))
517 /* Return a symbol instead of a number. */
518 result = scm_string_to_symbol (str);
519 }
520 else
521 {
522 result = scm_c_locale_stringn_to_number (buffer, read, 10);
523 if (!scm_is_true (result))
524 /* Return a symbol instead of a number. */
525 result = scm_from_locale_symboln (buffer, read);
526 }
527
528 return result;
529 }
530
531 static SCM
532 scm_read_mixed_case_symbol (int chr, SCM port)
533 {
534 SCM result, str = SCM_EOL;
535 int overflow = 0, ends_with_colon = 0;
536 char buffer[READER_BUFFER_SIZE];
537 size_t read = 0;
538 int postfix = scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_postfix);
539
540 scm_ungetc (chr, port);
541 do
542 {
543 overflow = read_token (port, buffer, sizeof (buffer), &read);
544
545 if (read > 0)
546 ends_with_colon = (buffer[read - 1] == ':');
547
548 if ((overflow) || (scm_is_pair (str)))
549 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
550 }
551 while (overflow);
552
553 if (scm_is_pair (str))
554 {
555 size_t len;
556
557 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
558 len = scm_c_string_length (str);
559
560 /* Per SRFI-88, `:' alone is an identifier, not a keyword. */
561 if (postfix && ends_with_colon && (len > 1))
562 {
563 /* Strip off colon. */
564 str = scm_c_substring (str, 0, len-1);
565 result = scm_string_to_symbol (str);
566 result = scm_symbol_to_keyword (result);
567 }
568 else
569 result = scm_string_to_symbol (str);
570 }
571 else
572 {
573 /* For symbols smaller than `sizeof (buffer)', we don't need to recur
574 to Scheme strings. Therefore, we only create one Scheme object (a
575 symbol) per symbol read. */
576 if (postfix && ends_with_colon && (read > 1))
577 result = scm_from_locale_keywordn (buffer, read - 1);
578 else
579 result = scm_from_locale_symboln (buffer, read);
580 }
581
582 return result;
583 }
584
585 static SCM
586 scm_read_number_and_radix (int chr, SCM port)
587 #define FUNC_NAME "scm_lreadr"
588 {
589 SCM result, str = SCM_EOL;
590 size_t read;
591 char buffer[READER_BUFFER_SIZE];
592 unsigned int radix;
593 int overflow = 0;
594
595 switch (chr)
596 {
597 case 'B':
598 case 'b':
599 radix = 2;
600 break;
601
602 case 'o':
603 case 'O':
604 radix = 8;
605 break;
606
607 case 'd':
608 case 'D':
609 radix = 10;
610 break;
611
612 case 'x':
613 case 'X':
614 radix = 16;
615 break;
616
617 default:
618 scm_ungetc (chr, port);
619 scm_ungetc ('#', port);
620 radix = 10;
621 }
622
623 do
624 {
625 overflow = read_token (port, buffer, sizeof (buffer), &read);
626
627 if ((overflow) || (scm_is_pair (str)))
628 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
629 }
630 while (overflow);
631
632 if (scm_is_pair (str))
633 {
634 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
635 result = scm_string_to_number (str, scm_from_uint (radix));
636 }
637 else
638 result = scm_c_locale_stringn_to_number (buffer, read, radix);
639
640 if (scm_is_true (result))
641 return result;
642
643 scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
644
645 return SCM_BOOL_F;
646 }
647 #undef FUNC_NAME
648
649 static SCM
650 scm_read_quote (int chr, SCM port)
651 {
652 SCM p;
653 long line = SCM_LINUM (port);
654 int column = SCM_COL (port) - 1;
655
656 switch (chr)
657 {
658 case '`':
659 p = scm_sym_quasiquote;
660 break;
661
662 case '\'':
663 p = scm_sym_quote;
664 break;
665
666 case ',':
667 {
668 int c;
669
670 c = scm_getc (port);
671 if ('@' == c)
672 p = scm_sym_uq_splicing;
673 else
674 {
675 scm_ungetc (c, port);
676 p = scm_sym_unquote;
677 }
678 break;
679 }
680
681 default:
682 fprintf (stderr, "%s: unhandled quote character (%i)\n",
683 "scm_read_quote", chr);
684 abort ();
685 }
686
687 p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
688 if (SCM_RECORD_POSITIONS_P)
689 scm_whash_insert (scm_source_whash, p,
690 scm_make_srcprops (line, column,
691 SCM_FILENAME (port),
692 SCM_COPY_SOURCE_P
693 ? (scm_cons2 (SCM_CAR (p),
694 SCM_CAR (SCM_CDR (p)),
695 SCM_EOL))
696 : SCM_UNDEFINED,
697 SCM_EOL));
698
699
700 return p;
701 }
702
703 static inline SCM
704 scm_read_semicolon_comment (int chr, SCM port)
705 {
706 int c;
707
708 for (c = scm_getc (port);
709 (c != EOF) && (c != '\n');
710 c = scm_getc (port));
711
712 return SCM_UNSPECIFIED;
713 }
714
715 \f
716 /* Sharp readers, i.e. readers called after a `#' sign has been read. */
717
718 static SCM
719 scm_read_boolean (int chr, SCM port)
720 {
721 switch (chr)
722 {
723 case 't':
724 case 'T':
725 return SCM_BOOL_T;
726
727 case 'f':
728 case 'F':
729 return SCM_BOOL_F;
730 }
731
732 return SCM_UNSPECIFIED;
733 }
734
735 static SCM
736 scm_read_character (int chr, SCM port)
737 #define FUNC_NAME "scm_lreadr"
738 {
739 unsigned c;
740 char charname[READER_CHAR_NAME_MAX_SIZE];
741 size_t charname_len;
742
743 if (read_token (port, charname, sizeof (charname), &charname_len))
744 goto char_error;
745
746 if (charname_len == 0)
747 {
748 chr = scm_getc (port);
749 if (chr == EOF)
750 scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
751 "while reading character", SCM_EOL);
752
753 /* CHR must be a token delimiter, like a whitespace. */
754 return (SCM_MAKE_CHAR (chr));
755 }
756
757 if (charname_len == 1)
758 return SCM_MAKE_CHAR (charname[0]);
759
760 if (*charname >= '0' && *charname < '8')
761 {
762 /* Dirk:FIXME:: This type of character syntax is not R5RS
763 * compliant. Further, it should be verified that the constant
764 * does only consist of octal digits. Finally, it should be
765 * checked whether the resulting fixnum is in the range of
766 * characters. */
767 SCM p = scm_c_locale_stringn_to_number (charname, charname_len, 8);
768 if (SCM_I_INUMP (p))
769 return SCM_MAKE_CHAR (SCM_I_INUM (p));
770 }
771
772 for (c = 0; c < scm_n_charnames; c++)
773 if (scm_charnames[c]
774 && (!strncasecmp (scm_charnames[c], charname, charname_len)))
775 return SCM_MAKE_CHAR (scm_charnums[c]);
776
777 char_error:
778 scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
779 scm_list_1 (scm_from_locale_stringn (charname,
780 charname_len)));
781
782 return SCM_UNSPECIFIED;
783 }
784 #undef FUNC_NAME
785
786 static inline SCM
787 scm_read_keyword (int chr, SCM port)
788 {
789 SCM symbol;
790
791 /* Read the symbol that comprises the keyword. Doing this instead of
792 invoking a specific symbol reader function allows `scm_read_keyword ()'
793 to adapt to the delimiters currently valid of symbols.
794
795 XXX: This implementation allows sloppy syntaxes like `#: key'. */
796 symbol = scm_read_expression (port);
797 if (!scm_is_symbol (symbol))
798 scm_i_input_error ("scm_read_keyword", port,
799 "keyword prefix `~a' not followed by a symbol: ~s",
800 scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
801
802 return (scm_symbol_to_keyword (symbol));
803 }
804
805 static inline SCM
806 scm_read_vector (int chr, SCM port)
807 {
808 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
809 guarantee that it's going to do what we want. After all, this is an
810 implementation detail of `scm_read_vector ()', not a desirable
811 property. */
812 return (scm_vector (scm_read_sexp (chr, port)));
813 }
814
815 static inline SCM
816 scm_read_srfi4_vector (int chr, SCM port)
817 {
818 return scm_i_read_array (port, chr);
819 }
820
821 static SCM
822 scm_read_guile_bit_vector (int chr, SCM port)
823 {
824 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
825 terribly inefficient but who cares? */
826 SCM s_bits = SCM_EOL;
827
828 for (chr = scm_getc (port);
829 (chr != EOF) && ((chr == '0') || (chr == '1'));
830 chr = scm_getc (port))
831 {
832 s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
833 }
834
835 if (chr != EOF)
836 scm_ungetc (chr, port);
837
838 return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
839 }
840
841 static inline SCM
842 scm_read_scsh_block_comment (int chr, SCM port)
843 {
844 int bang_seen = 0;
845
846 for (;;)
847 {
848 int c = scm_getc (port);
849
850 if (c == EOF)
851 scm_i_input_error ("skip_block_comment", port,
852 "unterminated `#! ... !#' comment", SCM_EOL);
853
854 if (c == '!')
855 bang_seen = 1;
856 else if (c == '#' && bang_seen)
857 break;
858 else
859 bang_seen = 0;
860 }
861
862 return SCM_UNSPECIFIED;
863 }
864
865 static SCM
866 scm_read_extended_symbol (int chr, SCM port)
867 {
868 /* Guile's extended symbol read syntax looks like this:
869
870 #{This is all a symbol name}#
871
872 So here, CHR is expected to be `{'. */
873 SCM result;
874 int saw_brace = 0, finished = 0;
875 size_t len = 0;
876 char buf[1024];
877
878 result = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
879
880 while ((chr = scm_getc (port)) != EOF)
881 {
882 if (saw_brace)
883 {
884 if (chr == '#')
885 {
886 finished = 1;
887 break;
888 }
889 else
890 {
891 saw_brace = 0;
892 buf[len++] = '}';
893 buf[len++] = chr;
894 }
895 }
896 else if (chr == '}')
897 saw_brace = 1;
898 else
899 buf[len++] = chr;
900
901 if (len >= sizeof (buf) - 2)
902 {
903 scm_string_append (scm_list_2 (result,
904 scm_from_locale_stringn (buf, len)));
905 len = 0;
906 }
907
908 if (finished)
909 break;
910 }
911
912 if (len)
913 result = scm_string_append (scm_list_2
914 (result,
915 scm_from_locale_stringn (buf, len)));
916
917 return (scm_string_to_symbol (result));
918 }
919
920
921 \f
922 /* Top-level token readers, i.e., dispatchers. */
923
924 static SCM
925 scm_read_sharp_extension (int chr, SCM port)
926 {
927 SCM proc;
928
929 proc = scm_get_hash_procedure (chr);
930 if (scm_is_true (scm_procedure_p (proc)))
931 {
932 long line = SCM_LINUM (port);
933 int column = SCM_COL (port) - 2;
934 SCM got;
935
936 got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
937 if (!scm_is_eq (got, SCM_UNSPECIFIED))
938 {
939 if (SCM_RECORD_POSITIONS_P)
940 return (recsexpr (got, line, column,
941 SCM_FILENAME (port)));
942 else
943 return got;
944 }
945 }
946
947 return SCM_UNSPECIFIED;
948 }
949
950 /* The reader for the sharp `#' character. It basically dispatches reads
951 among the above token readers. */
952 static SCM
953 scm_read_sharp (int chr, SCM port)
954 #define FUNC_NAME "scm_lreadr"
955 {
956 SCM result;
957
958 chr = scm_getc (port);
959
960 result = scm_read_sharp_extension (chr, port);
961 if (!scm_is_eq (result, SCM_UNSPECIFIED))
962 return result;
963
964 switch (chr)
965 {
966 case '\\':
967 return (scm_read_character (chr, port));
968 case '(':
969 return (scm_read_vector (chr, port));
970 case 's':
971 case 'u':
972 case 'f':
973 /* This one may return either a boolean or an SRFI-4 vector. */
974 return (scm_read_srfi4_vector (chr, port));
975 case '*':
976 return (scm_read_guile_bit_vector (chr, port));
977 case 't':
978 case 'T':
979 case 'F':
980 /* This one may return either a boolean or an SRFI-4 vector. */
981 return (scm_read_boolean (chr, port));
982 case ':':
983 return (scm_read_keyword (chr, port));
984 case '0': case '1': case '2': case '3': case '4':
985 case '5': case '6': case '7': case '8': case '9':
986 case '@':
987 #if SCM_ENABLE_DEPRECATED
988 /* See below for 'i' and 'e'. */
989 case 'a':
990 case 'c':
991 case 'y':
992 case 'h':
993 case 'l':
994 #endif
995 return (scm_i_read_array (port, chr));
996
997 case 'i':
998 case 'e':
999 #if SCM_ENABLE_DEPRECATED
1000 {
1001 /* When next char is '(', it really is an old-style
1002 uniform array. */
1003 int next_c = scm_getc (port);
1004 if (next_c != EOF)
1005 scm_ungetc (next_c, port);
1006 if (next_c == '(')
1007 return scm_i_read_array (port, chr);
1008 /* Fall through. */
1009 }
1010 #endif
1011 case 'b':
1012 case 'B':
1013 case 'o':
1014 case 'O':
1015 case 'd':
1016 case 'D':
1017 case 'x':
1018 case 'X':
1019 case 'I':
1020 case 'E':
1021 return (scm_read_number_and_radix (chr, port));
1022 case '{':
1023 return (scm_read_extended_symbol (chr, port));
1024 case '!':
1025 return (scm_read_scsh_block_comment (chr, port));
1026 default:
1027 result = scm_read_sharp_extension (chr, port);
1028 if (scm_is_eq (result, SCM_UNSPECIFIED))
1029 scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
1030 scm_list_1 (SCM_MAKE_CHAR (chr)));
1031 else
1032 return result;
1033 }
1034
1035 return SCM_UNSPECIFIED;
1036 }
1037 #undef FUNC_NAME
1038
1039 static SCM
1040 scm_read_expression (SCM port)
1041 #define FUNC_NAME "scm_read_expression"
1042 {
1043 while (1)
1044 {
1045 register int chr;
1046
1047 chr = scm_getc (port);
1048
1049 switch (chr)
1050 {
1051 case SCM_WHITE_SPACES:
1052 case SCM_LINE_INCREMENTORS:
1053 break;
1054 case ';':
1055 (void) scm_read_semicolon_comment (chr, port);
1056 break;
1057 case '(':
1058 return (scm_read_sexp (chr, port));
1059 case '"':
1060 return (scm_read_string (chr, port));
1061 case '\'':
1062 case '`':
1063 case ',':
1064 return (scm_read_quote (chr, port));
1065 case '#':
1066 {
1067 SCM result;
1068 result = scm_read_sharp (chr, port);
1069 if (scm_is_eq (result, SCM_UNSPECIFIED))
1070 /* We read a comment or some such. */
1071 break;
1072 else
1073 return result;
1074 }
1075 case ')':
1076 scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
1077 break;
1078 case EOF:
1079 return SCM_EOF_VAL;
1080 case ':':
1081 if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
1082 return scm_symbol_to_keyword (scm_read_expression (port));
1083 /* Fall through. */
1084
1085 default:
1086 {
1087 if (((chr >= '0') && (chr <= '9'))
1088 || (strchr ("+-.", chr)))
1089 return (scm_read_number (chr, port));
1090 else
1091 return (scm_read_mixed_case_symbol (chr, port));
1092 }
1093 }
1094 }
1095 }
1096 #undef FUNC_NAME
1097
1098 \f
1099 /* Actual reader. */
1100
1101 SCM_DEFINE (scm_read, "read", 0, 1, 0,
1102 (SCM port),
1103 "Read an s-expression from the input port @var{port}, or from\n"
1104 "the current input port if @var{port} is not specified.\n"
1105 "Any whitespace before the next token is discarded.")
1106 #define FUNC_NAME s_scm_read
1107 {
1108 int c;
1109
1110 if (SCM_UNBNDP (port))
1111 port = scm_current_input_port ();
1112 SCM_VALIDATE_OPINPORT (1, port);
1113
1114 c = flush_ws (port, (char *) NULL);
1115 if (EOF == c)
1116 return SCM_EOF_VAL;
1117 scm_ungetc (c, port);
1118
1119 return (scm_read_expression (port));
1120 }
1121 #undef FUNC_NAME
1122
1123
1124 \f
1125
1126 /* Used when recording expressions constructed by `scm_read_sharp ()'. */
1127 static SCM
1128 recsexpr (SCM obj, long line, int column, SCM filename)
1129 {
1130 if (!scm_is_pair(obj)) {
1131 return obj;
1132 } else {
1133 SCM tmp = obj, copy;
1134 /* If this sexpr is visible in the read:sharp source, we want to
1135 keep that information, so only record non-constant cons cells
1136 which haven't previously been read by the reader. */
1137 if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
1138 {
1139 if (SCM_COPY_SOURCE_P)
1140 {
1141 copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
1142 SCM_UNDEFINED);
1143 while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
1144 {
1145 SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
1146 line,
1147 column,
1148 filename),
1149 SCM_UNDEFINED));
1150 copy = SCM_CDR (copy);
1151 }
1152 SCM_SETCDR (copy, tmp);
1153 }
1154 else
1155 {
1156 recsexpr (SCM_CAR (obj), line, column, filename);
1157 while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
1158 recsexpr (SCM_CAR (tmp), line, column, filename);
1159 copy = SCM_UNDEFINED;
1160 }
1161 scm_whash_insert (scm_source_whash,
1162 obj,
1163 scm_make_srcprops (line,
1164 column,
1165 filename,
1166 copy,
1167 SCM_EOL));
1168 }
1169 return obj;
1170 }
1171 }
1172
1173 /* Manipulate the read-hash-procedures alist. This could be written in
1174 Scheme, but maybe it will also be used by C code during initialisation. */
1175 SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
1176 (SCM chr, SCM proc),
1177 "Install the procedure @var{proc} for reading expressions\n"
1178 "starting with the character sequence @code{#} and @var{chr}.\n"
1179 "@var{proc} will be called with two arguments: the character\n"
1180 "@var{chr} and the port to read further data from. The object\n"
1181 "returned will be the return value of @code{read}. \n"
1182 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1183 )
1184 #define FUNC_NAME s_scm_read_hash_extend
1185 {
1186 SCM this;
1187 SCM prev;
1188
1189 SCM_VALIDATE_CHAR (1, chr);
1190 SCM_ASSERT (scm_is_false (proc)
1191 || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
1192 proc, SCM_ARG2, FUNC_NAME);
1193
1194 /* Check if chr is already in the alist. */
1195 this = *scm_read_hash_procedures;
1196 prev = SCM_BOOL_F;
1197 while (1)
1198 {
1199 if (scm_is_null (this))
1200 {
1201 /* not found, so add it to the beginning. */
1202 if (scm_is_true (proc))
1203 {
1204 *scm_read_hash_procedures =
1205 scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
1206 }
1207 break;
1208 }
1209 if (scm_is_eq (chr, SCM_CAAR (this)))
1210 {
1211 /* already in the alist. */
1212 if (scm_is_false (proc))
1213 {
1214 /* remove it. */
1215 if (scm_is_false (prev))
1216 {
1217 *scm_read_hash_procedures =
1218 SCM_CDR (*scm_read_hash_procedures);
1219 }
1220 else
1221 scm_set_cdr_x (prev, SCM_CDR (this));
1222 }
1223 else
1224 {
1225 /* replace it. */
1226 scm_set_cdr_x (SCM_CAR (this), proc);
1227 }
1228 break;
1229 }
1230 prev = this;
1231 this = SCM_CDR (this);
1232 }
1233
1234 return SCM_UNSPECIFIED;
1235 }
1236 #undef FUNC_NAME
1237
1238 /* Recover the read-hash procedure corresponding to char c. */
1239 static SCM
1240 scm_get_hash_procedure (int c)
1241 {
1242 SCM rest = *scm_read_hash_procedures;
1243
1244 while (1)
1245 {
1246 if (scm_is_null (rest))
1247 return SCM_BOOL_F;
1248
1249 if (SCM_CHAR (SCM_CAAR (rest)) == c)
1250 return SCM_CDAR (rest);
1251
1252 rest = SCM_CDR (rest);
1253 }
1254 }
1255
1256 void
1257 scm_init_read ()
1258 {
1259 scm_read_hash_procedures =
1260 SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));
1261
1262 scm_init_opts (scm_read_options, scm_read_opts);
1263 #include "libguile/read.x"
1264 }
1265
1266 /*
1267 Local Variables:
1268 c-file-style: "gnu"
1269 End:
1270 */