Changes from arch/CVS synchronization
[bpt/guile.git] / libguile / read.c
1 /* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007 Free Software
2 * Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19
20 \f
21
22 #ifdef HAVE_CONFIG_H
23 # include <config.h>
24 #endif
25
26 #include <stdio.h>
27 #include <ctype.h>
28 #include <string.h>
29
30 #include "libguile/_scm.h"
31 #include "libguile/chars.h"
32 #include "libguile/eval.h"
33 #include "libguile/unif.h"
34 #include "libguile/keywords.h"
35 #include "libguile/alist.h"
36 #include "libguile/srcprop.h"
37 #include "libguile/hashtab.h"
38 #include "libguile/hash.h"
39 #include "libguile/ports.h"
40 #include "libguile/root.h"
41 #include "libguile/strings.h"
42 #include "libguile/strports.h"
43 #include "libguile/vectors.h"
44 #include "libguile/validate.h"
45 #include "libguile/srfi-4.h"
46 #include "libguile/srfi-13.h"
47
48 #include "libguile/read.h"
49 #include "libguile/private-options.h"
50
51
52 \f
53
54 SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
55 SCM_SYMBOL (scm_keyword_prefix, "prefix");
56
57 scm_t_option scm_read_opts[] = {
58 { SCM_OPTION_BOOLEAN, "copy", 0,
59 "Copy source code expressions." },
60 { SCM_OPTION_BOOLEAN, "positions", 0,
61 "Record positions of source code expressions." },
62 { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
63 "Convert symbols to lower case."},
64 { SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
65 "Style of keyword recognition: #f or 'prefix."},
66 #if SCM_ENABLE_ELISP
67 { SCM_OPTION_BOOLEAN, "elisp-vectors", 0,
68 "Support Elisp vector syntax, namely `[...]'."},
69 { SCM_OPTION_BOOLEAN, "elisp-strings", 0,
70 "Support `\\(' and `\\)' in strings."},
71 #endif
72 { 0, },
73 };
74
75 /*
76 Give meaningful error messages for errors
77
78 We use the format
79
80 FILE:LINE:COL: MESSAGE
81 This happened in ....
82
83 This is not standard GNU format, but the test-suite likes the real
84 message to be in front.
85
86 */
87
88
89 void
90 scm_i_input_error (char const *function,
91 SCM port, const char *message, SCM arg)
92 {
93 SCM fn = (scm_is_string (SCM_FILENAME(port))
94 ? SCM_FILENAME(port)
95 : scm_from_locale_string ("#<unknown port>"));
96
97 SCM string_port = scm_open_output_string ();
98 SCM string = SCM_EOL;
99 scm_simple_format (string_port,
100 scm_from_locale_string ("~A:~S:~S: ~A"),
101 scm_list_4 (fn,
102 scm_from_long (SCM_LINUM (port) + 1),
103 scm_from_int (SCM_COL (port) + 1),
104 scm_from_locale_string (message)));
105
106 string = scm_get_output_string (string_port);
107 scm_close_output_port (string_port);
108 scm_error_scm (scm_from_locale_symbol ("read-error"),
109 function? scm_from_locale_string (function) : SCM_BOOL_F,
110 string,
111 arg,
112 SCM_BOOL_F);
113 }
114
115
116 SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
117 (SCM setting),
118 "Option interface for the read options. Instead of using\n"
119 "this procedure directly, use the procedures @code{read-enable},\n"
120 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
121 #define FUNC_NAME s_scm_read_options
122 {
123 SCM ans = scm_options (setting,
124 scm_read_opts,
125 FUNC_NAME);
126 if (SCM_COPY_SOURCE_P)
127 SCM_RECORD_POSITIONS_P = 1;
128 return ans;
129 }
130 #undef FUNC_NAME
131
132 /* An association list mapping extra hash characters to procedures. */
133 static SCM *scm_read_hash_procedures;
134
135
136 \f
137 /* Token readers. */
138
139
140 /* Size of the C buffer used to read symbols and numbers. */
141 #define READER_BUFFER_SIZE 128
142
143 /* Size of the C buffer used to read strings. */
144 #define READER_STRING_BUFFER_SIZE 512
145
146 /* The maximum size of Scheme character names. */
147 #define READER_CHAR_NAME_MAX_SIZE 50
148
149
150 /* `isblank' is only in C99. */
151 #define CHAR_IS_BLANK_(_chr) \
152 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
153 || ((_chr) == '\f'))
154
155 #ifdef MSDOS
156 # define CHAR_IS_BLANK(_chr) \
157 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
158 #else
159 # define CHAR_IS_BLANK CHAR_IS_BLANK_
160 #endif
161
162
163 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
164 structure''). */
165 #define CHAR_IS_R5RS_DELIMITER(c) \
166 (CHAR_IS_BLANK (c) \
167 || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
168
169 #define CHAR_IS_DELIMITER CHAR_IS_R5RS_DELIMITER
170
171 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
172 Structure''. */
173 #define CHAR_IS_EXPONENT_MARKER(_chr) \
174 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
175 || ((_chr) == 'd') || ((_chr) == 'l'))
176
177 /* An inlinable version of `scm_c_downcase ()'. */
178 #define CHAR_DOWNCASE(_chr) \
179 (((_chr) <= UCHAR_MAX) ? tolower (_chr) : (_chr))
180
181
182 /* Helper function similar to `scm_read_token ()'. Read from PORT until a
183 whitespace is read. Return zero if the whole token could fit in BUF,
184 non-zero otherwise. */
185 static inline int
186 read_token (SCM port, char *buf, size_t buf_size, size_t *read)
187 {
188 *read = 0;
189
190 while (*read < buf_size)
191 {
192 int chr;
193
194 chr = scm_getc (port);
195 chr = (SCM_CASE_INSENSITIVE_P ? CHAR_DOWNCASE (chr) : chr);
196
197 if (chr == EOF)
198 return 0;
199 else if (CHAR_IS_DELIMITER (chr))
200 {
201 scm_ungetc (chr, port);
202 return 0;
203 }
204 else
205 {
206 *buf = (char) chr;
207 buf++, (*read)++;
208 }
209 }
210
211 return 1;
212 }
213
214
215 /* Skip whitespace from PORT and return the first non-whitespace character
216 read. Raise an error on end-of-file. */
217 static int
218 flush_ws (SCM port, const char *eoferr)
219 {
220 register int c;
221 while (1)
222 switch (c = scm_getc (port))
223 {
224 case EOF:
225 goteof:
226 if (eoferr)
227 {
228 scm_i_input_error (eoferr,
229 port,
230 "end of file",
231 SCM_EOL);
232 }
233 return c;
234
235 case ';':
236 lp:
237 switch (c = scm_getc (port))
238 {
239 case EOF:
240 goto goteof;
241 default:
242 goto lp;
243 case SCM_LINE_INCREMENTORS:
244 break;
245 }
246 break;
247
248 case SCM_LINE_INCREMENTORS:
249 case SCM_SINGLE_SPACES:
250 case '\t':
251 break;
252
253 default:
254 return c;
255 }
256
257 return 0;
258 }
259
260
261 \f
262 /* Token readers. */
263
264 static SCM scm_read_expression (SCM port);
265 static SCM scm_read_sharp (int chr, SCM port);
266 static SCM scm_get_hash_procedure (int c);
267 static SCM recsexpr (SCM obj, long line, int column, SCM filename);
268
269
270 static SCM
271 scm_read_sexp (int chr, SCM port)
272 #define FUNC_NAME "scm_i_lreadparen"
273 {
274 register int c;
275 register SCM tmp;
276 register SCM tl, ans = SCM_EOL;
277 SCM tl2 = SCM_EOL, ans2 = SCM_EOL, copy = SCM_BOOL_F;;
278 static const int terminating_char = ')';
279
280 /* Need to capture line and column numbers here. */
281 long line = SCM_LINUM (port);
282 int column = SCM_COL (port) - 1;
283
284
285 c = flush_ws (port, FUNC_NAME);
286 if (terminating_char == c)
287 return SCM_EOL;
288
289 scm_ungetc (c, port);
290 if (scm_is_eq (scm_sym_dot,
291 (tmp = scm_read_expression (port))))
292 {
293 ans = scm_read_expression (port);
294 if (terminating_char != (c = flush_ws (port, FUNC_NAME)))
295 scm_i_input_error (FUNC_NAME, port, "missing close paren",
296 SCM_EOL);
297 return ans;
298 }
299
300 /* Build the head of the list structure. */
301 ans = tl = scm_cons (tmp, SCM_EOL);
302
303 if (SCM_COPY_SOURCE_P)
304 ans2 = tl2 = scm_cons (scm_is_pair (tmp)
305 ? copy
306 : tmp,
307 SCM_EOL);
308
309 while (terminating_char != (c = flush_ws (port, FUNC_NAME)))
310 {
311 SCM new_tail;
312
313 scm_ungetc (c, port);
314 if (scm_is_eq (scm_sym_dot,
315 (tmp = scm_read_expression (port))))
316 {
317 SCM_SETCDR (tl, tmp = scm_read_expression (port));
318
319 if (SCM_COPY_SOURCE_P)
320 SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp) ? copy : tmp,
321 SCM_EOL));
322
323 c = flush_ws (port, FUNC_NAME);
324 if (terminating_char != c)
325 scm_i_input_error (FUNC_NAME, port,
326 "in pair: missing close paren", SCM_EOL);
327 goto exit;
328 }
329
330 new_tail = scm_cons (tmp, SCM_EOL);
331 SCM_SETCDR (tl, new_tail);
332 tl = new_tail;
333
334 if (SCM_COPY_SOURCE_P)
335 {
336 SCM new_tail2 = scm_cons (scm_is_pair (tmp)
337 ? copy
338 : tmp, SCM_EOL);
339 SCM_SETCDR (tl2, new_tail2);
340 tl2 = new_tail2;
341 }
342 }
343
344 exit:
345 if (SCM_RECORD_POSITIONS_P)
346 scm_whash_insert (scm_source_whash,
347 ans,
348 scm_make_srcprops (line, column,
349 SCM_FILENAME (port),
350 SCM_COPY_SOURCE_P
351 ? ans2
352 : SCM_UNDEFINED,
353 SCM_EOL));
354 return ans;
355 }
356 #undef FUNC_NAME
357
358 static SCM
359 scm_read_string (int chr, SCM port)
360 #define FUNC_NAME "scm_lreadr"
361 {
362 /* For strings smaller than C_STR, this function creates only one Scheme
363 object (the string returned). */
364
365 SCM str = SCM_BOOL_F;
366 char c_str[READER_STRING_BUFFER_SIZE];
367 unsigned c_str_len = 0;
368 int c;
369
370 while ('"' != (c = scm_getc (port)))
371 {
372 if (c == EOF)
373 str_eof: scm_i_input_error (FUNC_NAME, port,
374 "end of file in string constant",
375 SCM_EOL);
376
377 if (c_str_len + 1 >= sizeof (c_str))
378 {
379 /* Flush the C buffer onto a Scheme string. */
380 SCM addy;
381
382 if (str == SCM_BOOL_F)
383 str = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
384
385 addy = scm_from_locale_stringn (c_str, c_str_len);
386 str = scm_string_append_shared (scm_list_2 (str, addy));
387
388 c_str_len = 0;
389 }
390
391 if (c == '\\')
392 switch (c = scm_getc (port))
393 {
394 case EOF:
395 goto str_eof;
396 case '"':
397 case '\\':
398 break;
399 #if SCM_ENABLE_ELISP
400 case '(':
401 case ')':
402 if (SCM_ESCAPED_PARENS_P)
403 break;
404 goto bad_escaped;
405 #endif
406 case '\n':
407 continue;
408 case '0':
409 c = '\0';
410 break;
411 case 'f':
412 c = '\f';
413 break;
414 case 'n':
415 c = '\n';
416 break;
417 case 'r':
418 c = '\r';
419 break;
420 case 't':
421 c = '\t';
422 break;
423 case 'a':
424 c = '\007';
425 break;
426 case 'v':
427 c = '\v';
428 break;
429 case 'x':
430 {
431 int a, b;
432 a = scm_getc (port);
433 if (a == EOF) goto str_eof;
434 b = scm_getc (port);
435 if (b == EOF) goto str_eof;
436 if ('0' <= a && a <= '9') a -= '0';
437 else if ('A' <= a && a <= 'F') a = a - 'A' + 10;
438 else if ('a' <= a && a <= 'f') a = a - 'a' + 10;
439 else goto bad_escaped;
440 if ('0' <= b && b <= '9') b -= '0';
441 else if ('A' <= b && b <= 'F') b = b - 'A' + 10;
442 else if ('a' <= b && b <= 'f') b = b - 'a' + 10;
443 else goto bad_escaped;
444 c = a * 16 + b;
445 break;
446 }
447 default:
448 bad_escaped:
449 scm_i_input_error (FUNC_NAME, port,
450 "illegal character in escape sequence: ~S",
451 scm_list_1 (SCM_MAKE_CHAR (c)));
452 }
453 c_str[c_str_len++] = c;
454 }
455
456 if (c_str_len > 0)
457 {
458 SCM addy;
459
460 addy = scm_from_locale_stringn (c_str, c_str_len);
461 if (str == SCM_BOOL_F)
462 str = addy;
463 else
464 str = scm_string_append_shared (scm_list_2 (str, addy));
465 }
466 else
467 str = (str == SCM_BOOL_F) ? scm_nullstr : str;
468
469 return str;
470 }
471 #undef FUNC_NAME
472
473
474 static SCM
475 scm_read_number (int chr, SCM port)
476 {
477 SCM result, str = SCM_EOL;
478 char buffer[READER_BUFFER_SIZE];
479 size_t read;
480 int overflow = 0;
481
482 scm_ungetc (chr, port);
483 do
484 {
485 overflow = read_token (port, buffer, sizeof (buffer), &read);
486
487 if ((overflow) || (scm_is_pair (str)))
488 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
489 }
490 while (overflow);
491
492 if (scm_is_pair (str))
493 {
494 /* The slow path. */
495
496 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
497 result = scm_string_to_number (str, SCM_UNDEFINED);
498 if (!scm_is_true (result))
499 /* Return a symbol instead of a number. */
500 result = scm_string_to_symbol (str);
501 }
502 else
503 {
504 result = scm_c_locale_stringn_to_number (buffer, read, 10);
505 if (!scm_is_true (result))
506 /* Return a symbol instead of a number. */
507 result = scm_from_locale_symboln (buffer, read);
508 }
509
510 return result;
511 }
512
513 static SCM
514 scm_read_mixed_case_symbol (int chr, SCM port)
515 {
516 SCM result, str = SCM_EOL;
517 int overflow = 0;
518 char buffer[READER_BUFFER_SIZE];
519 size_t read = 0;
520
521 scm_ungetc (chr, port);
522 do
523 {
524 overflow = read_token (port, buffer, sizeof (buffer), &read);
525
526 if ((overflow) || (scm_is_pair (str)))
527 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
528 }
529 while (overflow);
530
531 if (scm_is_pair (str))
532 {
533 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
534 result = scm_string_to_symbol (str);
535 }
536 else
537 /* For symbols smaller than `sizeof (buffer)', we don't need to recur to
538 Scheme strings. Therefore, we only create one Scheme object (a
539 symbol) per symbol read. */
540 result = scm_from_locale_symboln (buffer, read);
541
542 return result;
543 }
544
545 static SCM
546 scm_read_number_and_radix (int chr, SCM port)
547 #define FUNC_NAME "scm_lreadr"
548 {
549 SCM result, str = SCM_EOL;
550 size_t read;
551 char buffer[READER_BUFFER_SIZE];
552 unsigned int radix;
553 int overflow = 0;
554
555 switch (chr)
556 {
557 case 'B':
558 case 'b':
559 radix = 2;
560 break;
561
562 case 'o':
563 case 'O':
564 radix = 8;
565 break;
566
567 case 'd':
568 case 'D':
569 radix = 10;
570 break;
571
572 case 'x':
573 case 'X':
574 radix = 16;
575 break;
576
577 default:
578 scm_ungetc (chr, port);
579 scm_ungetc ('#', port);
580 radix = 10;
581 }
582
583 do
584 {
585 overflow = read_token (port, buffer, sizeof (buffer), &read);
586
587 if ((overflow) || (scm_is_pair (str)))
588 str = scm_cons (scm_from_locale_stringn (buffer, read), str);
589 }
590 while (overflow);
591
592 if (scm_is_pair (str))
593 {
594 str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
595 result = scm_string_to_number (str, scm_from_uint (radix));
596 }
597 else
598 result = scm_c_locale_stringn_to_number (buffer, read, radix);
599
600 if (scm_is_true (result))
601 return result;
602
603 scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
604
605 return SCM_BOOL_F;
606 }
607 #undef FUNC_NAME
608
609 static SCM
610 scm_read_quote (int chr, SCM port)
611 {
612 SCM p;
613 long line = SCM_LINUM (port);
614 int column = SCM_COL (port) - 1;
615
616 switch (chr)
617 {
618 case '`':
619 p = scm_sym_quasiquote;
620 break;
621
622 case '\'':
623 p = scm_sym_quote;
624 break;
625
626 case ',':
627 {
628 int c;
629
630 c = scm_getc (port);
631 if ('@' == c)
632 p = scm_sym_uq_splicing;
633 else
634 {
635 scm_ungetc (c, port);
636 p = scm_sym_unquote;
637 }
638 break;
639 }
640
641 default:
642 fprintf (stderr, "%s: unhandled quote character (%i)\n",
643 __FUNCTION__, chr);
644 abort ();
645 }
646
647 p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
648 if (SCM_RECORD_POSITIONS_P)
649 scm_whash_insert (scm_source_whash, p,
650 scm_make_srcprops (line, column,
651 SCM_FILENAME (port),
652 SCM_COPY_SOURCE_P
653 ? (scm_cons2 (SCM_CAR (p),
654 SCM_CAR (SCM_CDR (p)),
655 SCM_EOL))
656 : SCM_UNDEFINED,
657 SCM_EOL));
658
659
660 return p;
661 }
662
663 static inline SCM
664 scm_read_semicolon_comment (int chr, SCM port)
665 {
666 int c;
667
668 for (c = scm_getc (port);
669 (c != EOF) && (c != '\n');
670 c = scm_getc (port));
671
672 return SCM_UNSPECIFIED;
673 }
674
675 \f
676 /* Sharp readers, i.e. readers called after a `#' sign has been read. */
677
678 static SCM
679 scm_read_boolean (int chr, SCM port)
680 {
681 switch (chr)
682 {
683 case 't':
684 case 'T':
685 return SCM_BOOL_T;
686
687 case 'f':
688 case 'F':
689 return SCM_BOOL_F;
690 }
691
692 return SCM_UNSPECIFIED;
693 }
694
695 static SCM
696 scm_read_character (int chr, SCM port)
697 #define FUNC_NAME "scm_lreadr"
698 {
699 unsigned c;
700 char charname[READER_CHAR_NAME_MAX_SIZE];
701 size_t charname_len;
702
703 if (read_token (port, charname, sizeof (charname), &charname_len))
704 goto char_error;
705
706 if (charname_len == 0)
707 {
708 chr = scm_getc (port);
709 if (chr == EOF)
710 scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
711 "while reading character", SCM_EOL);
712
713 /* CHR must be a token delimiter, like a whitespace. */
714 return (SCM_MAKE_CHAR (chr));
715 }
716
717 if (charname_len == 1)
718 return SCM_MAKE_CHAR (charname[0]);
719
720 if (*charname >= '0' && *charname < '8')
721 {
722 /* Dirk:FIXME:: This type of character syntax is not R5RS
723 * compliant. Further, it should be verified that the constant
724 * does only consist of octal digits. Finally, it should be
725 * checked whether the resulting fixnum is in the range of
726 * characters. */
727 SCM p = scm_c_locale_stringn_to_number (charname, charname_len, 8);
728 if (SCM_I_INUMP (p))
729 return SCM_MAKE_CHAR (SCM_I_INUM (p));
730 }
731
732 for (c = 0; c < scm_n_charnames; c++)
733 if (scm_charnames[c]
734 && (!strncasecmp (scm_charnames[c], charname, charname_len)))
735 return SCM_MAKE_CHAR (scm_charnums[c]);
736
737 char_error:
738 scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
739 scm_list_1 (scm_from_locale_stringn (charname,
740 charname_len)));
741
742 return SCM_UNSPECIFIED;
743 }
744 #undef FUNC_NAME
745
746 static inline SCM
747 scm_read_keyword (int chr, SCM port)
748 {
749 SCM symbol;
750
751 /* Read the symbol that comprises the keyword. Doing this instead of
752 invoking a specific symbol reader function allows `scm_read_keyword ()'
753 to adapt to the delimiters currently valid of symbols.
754
755 XXX: This implementation allows sloppy syntaxes like `#: key'. */
756 symbol = scm_read_expression (port);
757 if (!scm_is_symbol (symbol))
758 scm_i_input_error (__FUNCTION__, port,
759 "keyword prefix `~a' not followed by a symbol: ~s",
760 scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
761
762 return (scm_symbol_to_keyword (symbol));
763 }
764
765 static inline SCM
766 scm_read_vector (int chr, SCM port)
767 {
768 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
769 guarantee that it's going to do what we want. After all, this is an
770 implementation detail of `scm_read_vector ()', not a desirable
771 property. */
772 return (scm_vector (scm_read_sexp (chr, port)));
773 }
774
775 static inline SCM
776 scm_read_srfi4_vector (int chr, SCM port)
777 {
778 return scm_i_read_array (port, chr);
779 }
780
781 static SCM
782 scm_read_guile_bit_vector (int chr, SCM port)
783 {
784 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
785 terribly inefficient but who cares? */
786 SCM s_bits = SCM_EOL;
787
788 for (chr = scm_getc (port);
789 (chr != EOF) && ((chr == '0') || (chr == '1'));
790 chr = scm_getc (port))
791 {
792 s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
793 }
794
795 if (chr != EOF)
796 scm_ungetc (chr, port);
797
798 return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
799 }
800
801 static inline SCM
802 scm_read_scsh_block_comment (int chr, SCM port)
803 {
804 int bang_seen = 0;
805
806 for (;;)
807 {
808 int c = scm_getc (port);
809
810 if (c == EOF)
811 scm_i_input_error ("skip_block_comment", port,
812 "unterminated `#! ... !#' comment", SCM_EOL);
813
814 if (c == '!')
815 bang_seen = 1;
816 else if (c == '#' && bang_seen)
817 break;
818 else
819 bang_seen = 0;
820 }
821
822 return SCM_UNSPECIFIED;
823 }
824
825 static SCM
826 scm_read_extended_symbol (int chr, SCM port)
827 {
828 /* Guile's extended symbol read syntax looks like this:
829
830 #{This is all a symbol name}#
831
832 So here, CHR is expected to be `{'. */
833 SCM result;
834 int saw_brace = 0, finished = 0;
835 size_t len = 0;
836 char buf[1024];
837
838 result = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
839
840 while ((chr = scm_getc (port)) != EOF)
841 {
842 if (saw_brace)
843 {
844 if (chr == '#')
845 {
846 finished = 1;
847 break;
848 }
849 else
850 {
851 saw_brace = 0;
852 buf[len++] = '}';
853 buf[len++] = chr;
854 }
855 }
856 else if (chr == '}')
857 saw_brace = 1;
858 else
859 buf[len++] = chr;
860
861 if (len >= sizeof (buf) - 2)
862 {
863 scm_string_append (scm_list_2 (result,
864 scm_from_locale_stringn (buf, len)));
865 len = 0;
866 }
867
868 if (finished)
869 break;
870 }
871
872 if (len)
873 result = scm_string_append (scm_list_2
874 (result,
875 scm_from_locale_stringn (buf, len)));
876
877 return (scm_string_to_symbol (result));
878 }
879
880
881 \f
882 /* Top-level token readers, i.e., dispatchers. */
883
884 static SCM
885 scm_read_sharp_extension (int chr, SCM port)
886 {
887 SCM proc;
888
889 proc = scm_get_hash_procedure (chr);
890 if (scm_is_true (scm_procedure_p (proc)))
891 {
892 long line = SCM_LINUM (port);
893 int column = SCM_COL (port) - 2;
894 SCM got;
895
896 got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
897 if (!scm_is_eq (got, SCM_UNSPECIFIED))
898 {
899 if (SCM_RECORD_POSITIONS_P)
900 return (recsexpr (got, line, column,
901 SCM_FILENAME (port)));
902 else
903 return got;
904 }
905 }
906
907 return SCM_UNSPECIFIED;
908 }
909
910 /* The reader for the sharp `#' character. It basically dispatches reads
911 among the above token readers. */
912 static SCM
913 scm_read_sharp (int chr, SCM port)
914 #define FUNC_NAME "scm_lreadr"
915 {
916 SCM result;
917
918 chr = scm_getc (port);
919
920 result = scm_read_sharp_extension (chr, port);
921 if (!scm_is_eq (result, SCM_UNSPECIFIED))
922 return result;
923
924 switch (chr)
925 {
926 case '\\':
927 return (scm_read_character (chr, port));
928 case '(':
929 return (scm_read_vector (chr, port));
930 case 's':
931 case 'u':
932 case 'f':
933 /* This one may return either a boolean or an SRFI-4 vector. */
934 return (scm_read_srfi4_vector (chr, port));
935 case '*':
936 return (scm_read_guile_bit_vector (chr, port));
937 case 't':
938 case 'T':
939 case 'F':
940 /* This one may return either a boolean or an SRFI-4 vector. */
941 return (scm_read_boolean (chr, port));
942 case ':':
943 return (scm_read_keyword (chr, port));
944 case '0': case '1': case '2': case '3': case '4':
945 case '5': case '6': case '7': case '8': case '9':
946 case '@':
947 #if SCM_ENABLE_DEPRECATED
948 /* See below for 'i' and 'e'. */
949 case 'a':
950 case 'c':
951 case 'y':
952 case 'h':
953 case 'l':
954 #endif
955 return (scm_i_read_array (port, chr));
956
957 case 'i':
958 case 'e':
959 #if SCM_ENABLE_DEPRECATED
960 {
961 /* When next char is '(', it really is an old-style
962 uniform array. */
963 int next_c = scm_getc (port);
964 if (next_c != EOF)
965 scm_ungetc (next_c, port);
966 if (next_c == '(')
967 return scm_i_read_array (port, chr);
968 /* Fall through. */
969 }
970 #endif
971 case 'b':
972 case 'B':
973 case 'o':
974 case 'O':
975 case 'd':
976 case 'D':
977 case 'x':
978 case 'X':
979 case 'I':
980 case 'E':
981 return (scm_read_number_and_radix (chr, port));
982 case '{':
983 return (scm_read_extended_symbol (chr, port));
984 case '!':
985 return (scm_read_scsh_block_comment (chr, port));
986 default:
987 result = scm_read_sharp_extension (chr, port);
988 if (scm_is_eq (result, SCM_UNSPECIFIED))
989 scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
990 scm_list_1 (SCM_MAKE_CHAR (chr)));
991 else
992 return result;
993 }
994
995 return SCM_UNSPECIFIED;
996 }
997 #undef FUNC_NAME
998
999 static SCM
1000 scm_read_expression (SCM port)
1001 #define FUNC_NAME "scm_read_expression"
1002 {
1003 while (1)
1004 {
1005 register int chr;
1006
1007 chr = scm_getc (port);
1008
1009 switch (chr)
1010 {
1011 case SCM_WHITE_SPACES:
1012 case SCM_LINE_INCREMENTORS:
1013 break;
1014 case ';':
1015 (void) scm_read_semicolon_comment (chr, port);
1016 break;
1017 case '(':
1018 return (scm_read_sexp (chr, port));
1019 case '"':
1020 return (scm_read_string (chr, port));
1021 case '\'':
1022 case '`':
1023 case ',':
1024 return (scm_read_quote (chr, port));
1025 case '#':
1026 {
1027 SCM result;
1028 result = scm_read_sharp (chr, port);
1029 if (scm_is_eq (result, SCM_UNSPECIFIED))
1030 /* We read a comment or some such. */
1031 break;
1032 else
1033 return result;
1034 }
1035 case ')':
1036 scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
1037 break;
1038 case EOF:
1039 return SCM_EOF_VAL;
1040 case ':':
1041 if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
1042 return scm_symbol_to_keyword (scm_read_expression (port));
1043 /* Fall through. */
1044
1045 default:
1046 {
1047 if (((chr >= '0') && (chr <= '9'))
1048 || (strchr ("+-.", chr)))
1049 return (scm_read_number (chr, port));
1050 else
1051 return (scm_read_mixed_case_symbol (chr, port));
1052 }
1053 }
1054 }
1055 }
1056 #undef FUNC_NAME
1057
1058 \f
1059 /* Actual reader. */
1060
1061 SCM_DEFINE (scm_read, "read", 0, 1, 0,
1062 (SCM port),
1063 "Read an s-expression from the input port @var{port}, or from\n"
1064 "the current input port if @var{port} is not specified.\n"
1065 "Any whitespace before the next token is discarded.")
1066 #define FUNC_NAME s_scm_read
1067 {
1068 int c;
1069
1070 if (SCM_UNBNDP (port))
1071 port = scm_current_input_port ();
1072 SCM_VALIDATE_OPINPORT (1, port);
1073
1074 c = flush_ws (port, (char *) NULL);
1075 if (EOF == c)
1076 return SCM_EOF_VAL;
1077 scm_ungetc (c, port);
1078
1079 return (scm_read_expression (port));
1080 }
1081 #undef FUNC_NAME
1082
1083
1084 \f
1085
1086 /* Used when recording expressions constructed by `scm_read_sharp ()'. */
1087 static SCM
1088 recsexpr (SCM obj, long line, int column, SCM filename)
1089 {
1090 if (!scm_is_pair(obj)) {
1091 return obj;
1092 } else {
1093 SCM tmp = obj, copy;
1094 /* If this sexpr is visible in the read:sharp source, we want to
1095 keep that information, so only record non-constant cons cells
1096 which haven't previously been read by the reader. */
1097 if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
1098 {
1099 if (SCM_COPY_SOURCE_P)
1100 {
1101 copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
1102 SCM_UNDEFINED);
1103 while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
1104 {
1105 SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
1106 line,
1107 column,
1108 filename),
1109 SCM_UNDEFINED));
1110 copy = SCM_CDR (copy);
1111 }
1112 SCM_SETCDR (copy, tmp);
1113 }
1114 else
1115 {
1116 recsexpr (SCM_CAR (obj), line, column, filename);
1117 while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
1118 recsexpr (SCM_CAR (tmp), line, column, filename);
1119 copy = SCM_UNDEFINED;
1120 }
1121 scm_whash_insert (scm_source_whash,
1122 obj,
1123 scm_make_srcprops (line,
1124 column,
1125 filename,
1126 copy,
1127 SCM_EOL));
1128 }
1129 return obj;
1130 }
1131 }
1132
1133 /* Manipulate the read-hash-procedures alist. This could be written in
1134 Scheme, but maybe it will also be used by C code during initialisation. */
1135 SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
1136 (SCM chr, SCM proc),
1137 "Install the procedure @var{proc} for reading expressions\n"
1138 "starting with the character sequence @code{#} and @var{chr}.\n"
1139 "@var{proc} will be called with two arguments: the character\n"
1140 "@var{chr} and the port to read further data from. The object\n"
1141 "returned will be the return value of @code{read}. \n"
1142 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1143 )
1144 #define FUNC_NAME s_scm_read_hash_extend
1145 {
1146 SCM this;
1147 SCM prev;
1148
1149 SCM_VALIDATE_CHAR (1, chr);
1150 SCM_ASSERT (scm_is_false (proc)
1151 || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
1152 proc, SCM_ARG2, FUNC_NAME);
1153
1154 /* Check if chr is already in the alist. */
1155 this = *scm_read_hash_procedures;
1156 prev = SCM_BOOL_F;
1157 while (1)
1158 {
1159 if (scm_is_null (this))
1160 {
1161 /* not found, so add it to the beginning. */
1162 if (scm_is_true (proc))
1163 {
1164 *scm_read_hash_procedures =
1165 scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
1166 }
1167 break;
1168 }
1169 if (scm_is_eq (chr, SCM_CAAR (this)))
1170 {
1171 /* already in the alist. */
1172 if (scm_is_false (proc))
1173 {
1174 /* remove it. */
1175 if (scm_is_false (prev))
1176 {
1177 *scm_read_hash_procedures =
1178 SCM_CDR (*scm_read_hash_procedures);
1179 }
1180 else
1181 scm_set_cdr_x (prev, SCM_CDR (this));
1182 }
1183 else
1184 {
1185 /* replace it. */
1186 scm_set_cdr_x (SCM_CAR (this), proc);
1187 }
1188 break;
1189 }
1190 prev = this;
1191 this = SCM_CDR (this);
1192 }
1193
1194 return SCM_UNSPECIFIED;
1195 }
1196 #undef FUNC_NAME
1197
1198 /* Recover the read-hash procedure corresponding to char c. */
1199 static SCM
1200 scm_get_hash_procedure (int c)
1201 {
1202 SCM rest = *scm_read_hash_procedures;
1203
1204 while (1)
1205 {
1206 if (scm_is_null (rest))
1207 return SCM_BOOL_F;
1208
1209 if (SCM_CHAR (SCM_CAAR (rest)) == c)
1210 return SCM_CDAR (rest);
1211
1212 rest = SCM_CDR (rest);
1213 }
1214 }
1215
1216 void
1217 scm_init_read ()
1218 {
1219 scm_read_hash_procedures =
1220 SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));
1221
1222 scm_init_opts (scm_read_options, scm_read_opts);
1223 #include "libguile/read.x"
1224 }
1225
1226 /*
1227 Local Variables:
1228 c-file-style: "gnu"
1229 End:
1230 */