1 /* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008, 2009 Free Software
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 #include "libguile/_scm.h"
32 #include "libguile/bytevectors.h"
33 #include "libguile/chars.h"
34 #include "libguile/eval.h"
35 #include "libguile/unif.h"
36 #include "libguile/keywords.h"
37 #include "libguile/alist.h"
38 #include "libguile/srcprop.h"
39 #include "libguile/hashtab.h"
40 #include "libguile/hash.h"
41 #include "libguile/ports.h"
42 #include "libguile/root.h"
43 #include "libguile/strings.h"
44 #include "libguile/strports.h"
45 #include "libguile/vectors.h"
46 #include "libguile/validate.h"
47 #include "libguile/srfi-4.h"
48 #include "libguile/srfi-13.h"
50 #include "libguile/read.h"
51 #include "libguile/private-options.h"
56 SCM_GLOBAL_SYMBOL (scm_sym_dot
, ".");
57 SCM_SYMBOL (scm_keyword_prefix
, "prefix");
58 SCM_SYMBOL (scm_keyword_postfix
, "postfix");
60 scm_t_option scm_read_opts
[] = {
61 { SCM_OPTION_BOOLEAN
, "copy", 0,
62 "Copy source code expressions." },
63 { SCM_OPTION_BOOLEAN
, "positions", 0,
64 "Record positions of source code expressions." },
65 { SCM_OPTION_BOOLEAN
, "case-insensitive", 0,
66 "Convert symbols to lower case."},
67 { SCM_OPTION_SCM
, "keywords", SCM_UNPACK (SCM_BOOL_F
),
68 "Style of keyword recognition: #f, 'prefix or 'postfix."},
70 { SCM_OPTION_BOOLEAN
, "elisp-vectors", 0,
71 "Support Elisp vector syntax, namely `[...]'."},
72 { SCM_OPTION_BOOLEAN
, "elisp-strings", 0,
73 "Support `\\(' and `\\)' in strings."},
79 Give meaningful error messages for errors
83 FILE:LINE:COL: MESSAGE
86 This is not standard GNU format, but the test-suite likes the real
87 message to be in front.
93 scm_i_input_error (char const *function
,
94 SCM port
, const char *message
, SCM arg
)
96 SCM fn
= (scm_is_string (SCM_FILENAME(port
))
98 : scm_from_locale_string ("#<unknown port>"));
100 SCM string_port
= scm_open_output_string ();
101 SCM string
= SCM_EOL
;
102 scm_simple_format (string_port
,
103 scm_from_locale_string ("~A:~S:~S: ~A"),
105 scm_from_long (SCM_LINUM (port
) + 1),
106 scm_from_int (SCM_COL (port
) + 1),
107 scm_from_locale_string (message
)));
109 string
= scm_get_output_string (string_port
);
110 scm_close_output_port (string_port
);
111 scm_error_scm (scm_from_locale_symbol ("read-error"),
112 function
? scm_from_locale_string (function
) : SCM_BOOL_F
,
119 SCM_DEFINE (scm_read_options
, "read-options-interface", 0, 1, 0,
121 "Option interface for the read options. Instead of using\n"
122 "this procedure directly, use the procedures @code{read-enable},\n"
123 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
124 #define FUNC_NAME s_scm_read_options
126 SCM ans
= scm_options (setting
,
129 if (SCM_COPY_SOURCE_P
)
130 SCM_RECORD_POSITIONS_P
= 1;
135 /* An association list mapping extra hash characters to procedures. */
136 static SCM
*scm_read_hash_procedures
;
143 /* Size of the C buffer used to read symbols and numbers. */
144 #define READER_BUFFER_SIZE 128
146 /* Size of the C buffer used to read strings. */
147 #define READER_STRING_BUFFER_SIZE 512
149 /* The maximum size of Scheme character names. */
150 #define READER_CHAR_NAME_MAX_SIZE 50
153 /* `isblank' is only in C99. */
154 #define CHAR_IS_BLANK_(_chr) \
155 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
156 || ((_chr) == '\f') || ((_chr) == '\r'))
159 # define CHAR_IS_BLANK(_chr) \
160 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
162 # define CHAR_IS_BLANK CHAR_IS_BLANK_
166 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
168 #define CHAR_IS_R5RS_DELIMITER(c) \
170 || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
172 #define CHAR_IS_DELIMITER CHAR_IS_R5RS_DELIMITER
174 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
176 #define CHAR_IS_EXPONENT_MARKER(_chr) \
177 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
178 || ((_chr) == 'd') || ((_chr) == 'l'))
180 /* An inlinable version of `scm_c_downcase ()'. */
181 #define CHAR_DOWNCASE(_chr) \
182 (((_chr) <= UCHAR_MAX) ? tolower (_chr) : (_chr))
185 /* Read an SCSH block comment. */
186 static inline SCM
scm_read_scsh_block_comment (int chr
, SCM port
);
187 static SCM
scm_read_commented_expression (int chr
, SCM port
);
189 /* Read from PORT until a delimiter (e.g., a whitespace) is read. Return
190 zero if the whole token fits in BUF, non-zero otherwise. */
192 read_token (SCM port
, char *buf
, size_t buf_size
, size_t *read
)
196 while (*read
< buf_size
)
200 chr
= scm_getc (port
);
201 chr
= (SCM_CASE_INSENSITIVE_P
? CHAR_DOWNCASE (chr
) : chr
);
205 else if (CHAR_IS_DELIMITER (chr
))
207 scm_ungetc (chr
, port
);
221 /* Skip whitespace from PORT and return the first non-whitespace character
222 read. Raise an error on end-of-file. */
224 flush_ws (SCM port
, const char *eoferr
)
228 switch (c
= scm_getc (port
))
234 scm_i_input_error (eoferr
,
243 switch (c
= scm_getc (port
))
249 case SCM_LINE_INCREMENTORS
:
255 switch (c
= scm_getc (port
))
258 eoferr
= "read_sharp";
261 scm_read_scsh_block_comment (c
, port
);
264 scm_read_commented_expression (c
, port
);
267 scm_ungetc (c
, port
);
272 case SCM_LINE_INCREMENTORS
:
273 case SCM_SINGLE_SPACES
:
288 static SCM
scm_read_expression (SCM port
);
289 static SCM
scm_read_sharp (int chr
, SCM port
);
290 static SCM
scm_get_hash_procedure (int c
);
291 static SCM
recsexpr (SCM obj
, long line
, int column
, SCM filename
);
295 scm_read_sexp (int chr
, SCM port
)
296 #define FUNC_NAME "scm_i_lreadparen"
300 register SCM tl
, ans
= SCM_EOL
;
301 SCM tl2
= SCM_EOL
, ans2
= SCM_EOL
, copy
= SCM_BOOL_F
;
302 static const int terminating_char
= ')';
304 /* Need to capture line and column numbers here. */
305 long line
= SCM_LINUM (port
);
306 int column
= SCM_COL (port
) - 1;
309 c
= flush_ws (port
, FUNC_NAME
);
310 if (terminating_char
== c
)
313 scm_ungetc (c
, port
);
314 if (scm_is_eq (scm_sym_dot
,
315 (tmp
= scm_read_expression (port
))))
317 ans
= scm_read_expression (port
);
318 if (terminating_char
!= (c
= flush_ws (port
, FUNC_NAME
)))
319 scm_i_input_error (FUNC_NAME
, port
, "missing close paren",
324 /* Build the head of the list structure. */
325 ans
= tl
= scm_cons (tmp
, SCM_EOL
);
327 if (SCM_COPY_SOURCE_P
)
328 ans2
= tl2
= scm_cons (scm_is_pair (tmp
)
333 while (terminating_char
!= (c
= flush_ws (port
, FUNC_NAME
)))
337 scm_ungetc (c
, port
);
338 if (scm_is_eq (scm_sym_dot
,
339 (tmp
= scm_read_expression (port
))))
341 SCM_SETCDR (tl
, tmp
= scm_read_expression (port
));
343 if (SCM_COPY_SOURCE_P
)
344 SCM_SETCDR (tl2
, scm_cons (scm_is_pair (tmp
) ? copy
: tmp
,
347 c
= flush_ws (port
, FUNC_NAME
);
348 if (terminating_char
!= c
)
349 scm_i_input_error (FUNC_NAME
, port
,
350 "in pair: missing close paren", SCM_EOL
);
354 new_tail
= scm_cons (tmp
, SCM_EOL
);
355 SCM_SETCDR (tl
, new_tail
);
358 if (SCM_COPY_SOURCE_P
)
360 SCM new_tail2
= scm_cons (scm_is_pair (tmp
)
363 SCM_SETCDR (tl2
, new_tail2
);
369 if (SCM_RECORD_POSITIONS_P
)
370 scm_whash_insert (scm_source_whash
,
372 scm_make_srcprops (line
, column
,
383 scm_read_string (int chr
, SCM port
)
384 #define FUNC_NAME "scm_lreadr"
386 /* For strings smaller than C_STR, this function creates only one Scheme
387 object (the string returned). */
389 SCM str
= SCM_BOOL_F
;
390 unsigned c_str_len
= 0;
393 str
= scm_i_make_string (READER_STRING_BUFFER_SIZE
, NULL
);
394 while ('"' != (c
= scm_getc (port
)))
399 scm_i_input_error (FUNC_NAME
, port
,
400 "end of file in string constant", SCM_EOL
);
403 if (c_str_len
+ 1 >= scm_i_string_length (str
))
405 SCM addy
= scm_i_make_string (READER_STRING_BUFFER_SIZE
, NULL
);
407 str
= scm_string_append (scm_list_2 (str
, addy
));
412 switch (c
= scm_getc (port
))
422 if (SCM_ESCAPED_PARENS_P
)
458 if ('0' <= a
&& a
<= '9')
460 else if ('A' <= a
&& a
<= 'F')
462 else if ('a' <= a
&& a
<= 'f')
469 if ('0' <= b
&& b
<= '9')
471 else if ('A' <= b
&& b
<= 'F')
473 else if ('a' <= b
&& b
<= 'f')
488 for (i
= 0; i
< 4; i
++)
493 if ('0' <= a
&& a
<= '9')
495 else if ('A' <= a
&& a
<= 'F')
497 else if ('a' <= a
&& a
<= 'f')
513 for (i
= 0; i
< 6; i
++)
518 if ('0' <= a
&& a
<= '9')
520 else if ('A' <= a
&& a
<= 'F')
522 else if ('a' <= a
&& a
<= 'f')
535 scm_i_input_error (FUNC_NAME
, port
,
536 "illegal character in escape sequence: ~S",
537 scm_list_1 (SCM_MAKE_CHAR (c
)));
540 str
= scm_i_string_start_writing (str
);
541 scm_i_string_set_x (str
, c_str_len
++, c
);
542 scm_i_string_stop_writing ();
547 return scm_i_substring_copy (str
, 0, c_str_len
);
556 scm_read_number (int chr
, SCM port
)
558 SCM result
, str
= SCM_EOL
;
559 char buffer
[READER_BUFFER_SIZE
];
563 scm_ungetc (chr
, port
);
566 overflow
= read_token (port
, buffer
, sizeof (buffer
), &read
);
568 if ((overflow
) || (scm_is_pair (str
)))
569 str
= scm_cons (scm_from_locale_stringn (buffer
, read
), str
);
573 if (scm_is_pair (str
))
577 str
= scm_string_concatenate (scm_reverse_x (str
, SCM_EOL
));
578 result
= scm_string_to_number (str
, SCM_UNDEFINED
);
579 if (!scm_is_true (result
))
580 /* Return a symbol instead of a number. */
581 result
= scm_string_to_symbol (str
);
585 result
= scm_c_locale_stringn_to_number (buffer
, read
, 10);
586 if (!scm_is_true (result
))
587 /* Return a symbol instead of a number. */
588 result
= scm_from_locale_symboln (buffer
, read
);
595 scm_read_mixed_case_symbol (int chr
, SCM port
)
597 SCM result
, str
= SCM_EOL
;
598 int overflow
= 0, ends_with_colon
= 0;
599 char buffer
[READER_BUFFER_SIZE
];
601 int postfix
= scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE
), scm_keyword_postfix
);
603 scm_ungetc (chr
, port
);
606 overflow
= read_token (port
, buffer
, sizeof (buffer
), &read
);
609 ends_with_colon
= (buffer
[read
- 1] == ':');
611 if ((overflow
) || (scm_is_pair (str
)))
612 str
= scm_cons (scm_from_locale_stringn (buffer
, read
), str
);
616 if (scm_is_pair (str
))
620 str
= scm_string_concatenate (scm_reverse_x (str
, SCM_EOL
));
621 len
= scm_c_string_length (str
);
623 /* Per SRFI-88, `:' alone is an identifier, not a keyword. */
624 if (postfix
&& ends_with_colon
&& (len
> 1))
626 /* Strip off colon. */
627 str
= scm_c_substring (str
, 0, len
-1);
628 result
= scm_string_to_symbol (str
);
629 result
= scm_symbol_to_keyword (result
);
632 result
= scm_string_to_symbol (str
);
636 /* For symbols smaller than `sizeof (buffer)', we don't need to recur
637 to Scheme strings. Therefore, we only create one Scheme object (a
638 symbol) per symbol read. */
639 if (postfix
&& ends_with_colon
&& (read
> 1))
640 result
= scm_from_locale_keywordn (buffer
, read
- 1);
642 result
= scm_from_locale_symboln (buffer
, read
);
649 scm_read_number_and_radix (int chr
, SCM port
)
650 #define FUNC_NAME "scm_lreadr"
652 SCM result
, str
= SCM_EOL
;
654 char buffer
[READER_BUFFER_SIZE
];
681 scm_ungetc (chr
, port
);
682 scm_ungetc ('#', port
);
688 overflow
= read_token (port
, buffer
, sizeof (buffer
), &read
);
690 if ((overflow
) || (scm_is_pair (str
)))
691 str
= scm_cons (scm_from_locale_stringn (buffer
, read
), str
);
695 if (scm_is_pair (str
))
697 str
= scm_string_concatenate (scm_reverse_x (str
, SCM_EOL
));
698 result
= scm_string_to_number (str
, scm_from_uint (radix
));
701 result
= scm_c_locale_stringn_to_number (buffer
, read
, radix
);
703 if (scm_is_true (result
))
706 scm_i_input_error (FUNC_NAME
, port
, "unknown # object", SCM_EOL
);
713 scm_read_quote (int chr
, SCM port
)
716 long line
= SCM_LINUM (port
);
717 int column
= SCM_COL (port
) - 1;
722 p
= scm_sym_quasiquote
;
735 p
= scm_sym_uq_splicing
;
738 scm_ungetc (c
, port
);
745 fprintf (stderr
, "%s: unhandled quote character (%i)\n",
746 "scm_read_quote", chr
);
750 p
= scm_cons2 (p
, scm_read_expression (port
), SCM_EOL
);
751 if (SCM_RECORD_POSITIONS_P
)
752 scm_whash_insert (scm_source_whash
, p
,
753 scm_make_srcprops (line
, column
,
756 ? (scm_cons2 (SCM_CAR (p
),
757 SCM_CAR (SCM_CDR (p
)),
766 SCM_SYMBOL (sym_syntax
, "syntax");
767 SCM_SYMBOL (sym_quasisyntax
, "quasisyntax");
768 SCM_SYMBOL (sym_unsyntax
, "unsyntax");
769 SCM_SYMBOL (sym_unsyntax_splicing
, "unsyntax-splicing");
772 scm_read_syntax (int chr
, SCM port
)
775 long line
= SCM_LINUM (port
);
776 int column
= SCM_COL (port
) - 1;
794 p
= sym_unsyntax_splicing
;
797 scm_ungetc (c
, port
);
804 fprintf (stderr
, "%s: unhandled syntax character (%i)\n",
805 "scm_read_syntax", chr
);
809 p
= scm_cons2 (p
, scm_read_expression (port
), SCM_EOL
);
810 if (SCM_RECORD_POSITIONS_P
)
811 scm_whash_insert (scm_source_whash
, p
,
812 scm_make_srcprops (line
, column
,
815 ? (scm_cons2 (SCM_CAR (p
),
816 SCM_CAR (SCM_CDR (p
)),
826 scm_read_semicolon_comment (int chr
, SCM port
)
830 for (c
= scm_getc (port
);
831 (c
!= EOF
) && (c
!= '\n');
832 c
= scm_getc (port
));
834 return SCM_UNSPECIFIED
;
838 /* Sharp readers, i.e. readers called after a `#' sign has been read. */
841 scm_read_boolean (int chr
, SCM port
)
854 return SCM_UNSPECIFIED
;
858 scm_read_character (int chr
, SCM port
)
859 #define FUNC_NAME "scm_lreadr"
862 char charname
[READER_CHAR_NAME_MAX_SIZE
];
865 if (read_token (port
, charname
, sizeof (charname
), &charname_len
))
868 if (charname_len
== 0)
870 chr
= scm_getc (port
);
872 scm_i_input_error (FUNC_NAME
, port
, "unexpected end of file "
873 "while reading character", SCM_EOL
);
875 /* CHR must be a token delimiter, like a whitespace. */
876 return (SCM_MAKE_CHAR (chr
));
879 if (charname_len
== 1)
880 return SCM_MAKE_CHAR (charname
[0]);
882 if (*charname
>= '0' && *charname
< '8')
884 /* Dirk:FIXME:: This type of character syntax is not R5RS
885 * compliant. Further, it should be verified that the constant
886 * does only consist of octal digits. Finally, it should be
887 * checked whether the resulting fixnum is in the range of
889 SCM p
= scm_c_locale_stringn_to_number (charname
, charname_len
, 8);
891 return SCM_MAKE_CHAR (SCM_I_INUM (p
));
894 ch
= scm_i_charname_to_char (charname
, charname_len
);
895 if (scm_is_true (ch
))
899 scm_i_input_error (FUNC_NAME
, port
, "unknown character name ~a",
900 scm_list_1 (scm_from_locale_stringn (charname
,
903 return SCM_UNSPECIFIED
;
908 scm_read_keyword (int chr
, SCM port
)
912 /* Read the symbol that comprises the keyword. Doing this instead of
913 invoking a specific symbol reader function allows `scm_read_keyword ()'
914 to adapt to the delimiters currently valid of symbols.
916 XXX: This implementation allows sloppy syntaxes like `#: key'. */
917 symbol
= scm_read_expression (port
);
918 if (!scm_is_symbol (symbol
))
919 scm_i_input_error ("scm_read_keyword", port
,
920 "keyword prefix `~a' not followed by a symbol: ~s",
921 scm_list_2 (SCM_MAKE_CHAR (chr
), symbol
));
923 return (scm_symbol_to_keyword (symbol
));
927 scm_read_vector (int chr
, SCM port
)
929 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
930 guarantee that it's going to do what we want. After all, this is an
931 implementation detail of `scm_read_vector ()', not a desirable
933 return (scm_vector (scm_read_sexp (chr
, port
)));
937 scm_read_srfi4_vector (int chr
, SCM port
)
939 return scm_i_read_array (port
, chr
);
943 scm_read_bytevector (int chr
, SCM port
)
945 chr
= scm_getc (port
);
949 chr
= scm_getc (port
);
953 chr
= scm_getc (port
);
957 return scm_u8_list_to_bytevector (scm_read_sexp (chr
, port
));
960 scm_i_input_error ("read_bytevector", port
,
961 "invalid bytevector prefix",
962 SCM_MAKE_CHAR (chr
));
963 return SCM_UNSPECIFIED
;
967 scm_read_guile_bit_vector (int chr
, SCM port
)
969 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
970 terribly inefficient but who cares? */
971 SCM s_bits
= SCM_EOL
;
973 for (chr
= scm_getc (port
);
974 (chr
!= EOF
) && ((chr
== '0') || (chr
== '1'));
975 chr
= scm_getc (port
))
977 s_bits
= scm_cons ((chr
== '0') ? SCM_BOOL_F
: SCM_BOOL_T
, s_bits
);
981 scm_ungetc (chr
, port
);
983 return scm_bitvector (scm_reverse_x (s_bits
, SCM_EOL
));
987 scm_read_scsh_block_comment (int chr
, SCM port
)
993 int c
= scm_getc (port
);
996 scm_i_input_error ("skip_block_comment", port
,
997 "unterminated `#! ... !#' comment", SCM_EOL
);
1001 else if (c
== '#' && bang_seen
)
1007 return SCM_UNSPECIFIED
;
1011 scm_read_commented_expression (int chr
, SCM port
)
1015 c
= flush_ws (port
, (char *) NULL
);
1017 scm_i_input_error ("read_commented_expression", port
,
1018 "no expression after #; comment", SCM_EOL
);
1019 scm_ungetc (c
, port
);
1020 scm_read_expression (port
);
1021 return SCM_UNSPECIFIED
;
1025 scm_read_extended_symbol (int chr
, SCM port
)
1027 /* Guile's extended symbol read syntax looks like this:
1029 #{This is all a symbol name}#
1031 So here, CHR is expected to be `{'. */
1033 int saw_brace
= 0, finished
= 0;
1037 result
= scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
1039 while ((chr
= scm_getc (port
)) != EOF
)
1055 else if (chr
== '}')
1060 if (len
>= sizeof (buf
) - 2)
1062 scm_string_append (scm_list_2 (result
,
1063 scm_from_locale_stringn (buf
, len
)));
1072 result
= scm_string_append (scm_list_2
1074 scm_from_locale_stringn (buf
, len
)));
1076 return (scm_string_to_symbol (result
));
1081 /* Top-level token readers, i.e., dispatchers. */
1084 scm_read_sharp_extension (int chr
, SCM port
)
1088 proc
= scm_get_hash_procedure (chr
);
1089 if (scm_is_true (scm_procedure_p (proc
)))
1091 long line
= SCM_LINUM (port
);
1092 int column
= SCM_COL (port
) - 2;
1095 got
= scm_call_2 (proc
, SCM_MAKE_CHAR (chr
), port
);
1096 if (!scm_is_eq (got
, SCM_UNSPECIFIED
))
1098 if (SCM_RECORD_POSITIONS_P
)
1099 return (recsexpr (got
, line
, column
,
1100 SCM_FILENAME (port
)));
1106 return SCM_UNSPECIFIED
;
1109 /* The reader for the sharp `#' character. It basically dispatches reads
1110 among the above token readers. */
1112 scm_read_sharp (int chr
, SCM port
)
1113 #define FUNC_NAME "scm_lreadr"
1117 chr
= scm_getc (port
);
1119 result
= scm_read_sharp_extension (chr
, port
);
1120 if (!scm_is_eq (result
, SCM_UNSPECIFIED
))
1126 return (scm_read_character (chr
, port
));
1128 return (scm_read_vector (chr
, port
));
1132 /* This one may return either a boolean or an SRFI-4 vector. */
1133 return (scm_read_srfi4_vector (chr
, port
));
1135 return (scm_read_bytevector (chr
, port
));
1137 return (scm_read_guile_bit_vector (chr
, port
));
1141 /* This one may return either a boolean or an SRFI-4 vector. */
1142 return (scm_read_boolean (chr
, port
));
1144 return (scm_read_keyword (chr
, port
));
1145 case '0': case '1': case '2': case '3': case '4':
1146 case '5': case '6': case '7': case '8': case '9':
1148 #if SCM_ENABLE_DEPRECATED
1149 /* See below for 'i' and 'e'. */
1156 return (scm_i_read_array (port
, chr
));
1160 #if SCM_ENABLE_DEPRECATED
1162 /* When next char is '(', it really is an old-style
1164 int next_c
= scm_getc (port
);
1166 scm_ungetc (next_c
, port
);
1168 return scm_i_read_array (port
, chr
);
1182 return (scm_read_number_and_radix (chr
, port
));
1184 return (scm_read_extended_symbol (chr
, port
));
1186 return (scm_read_scsh_block_comment (chr
, port
));
1188 return (scm_read_commented_expression (chr
, port
));
1192 return (scm_read_syntax (chr
, port
));
1194 result
= scm_read_sharp_extension (chr
, port
);
1195 if (scm_is_eq (result
, SCM_UNSPECIFIED
))
1196 scm_i_input_error (FUNC_NAME
, port
, "Unknown # object: ~S",
1197 scm_list_1 (SCM_MAKE_CHAR (chr
)));
1202 return SCM_UNSPECIFIED
;
1207 scm_read_expression (SCM port
)
1208 #define FUNC_NAME "scm_read_expression"
1214 chr
= scm_getc (port
);
1218 case SCM_WHITE_SPACES
:
1219 case SCM_LINE_INCREMENTORS
:
1222 (void) scm_read_semicolon_comment (chr
, port
);
1225 return (scm_read_sexp (chr
, port
));
1227 return (scm_read_string (chr
, port
));
1231 return (scm_read_quote (chr
, port
));
1235 result
= scm_read_sharp (chr
, port
);
1236 if (scm_is_eq (result
, SCM_UNSPECIFIED
))
1237 /* We read a comment or some such. */
1243 scm_i_input_error (FUNC_NAME
, port
, "unexpected \")\"", SCM_EOL
);
1248 if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE
), scm_keyword_prefix
))
1249 return scm_symbol_to_keyword (scm_read_expression (port
));
1254 if (((chr
>= '0') && (chr
<= '9'))
1255 || (strchr ("+-.", chr
)))
1256 return (scm_read_number (chr
, port
));
1258 return (scm_read_mixed_case_symbol (chr
, port
));
1266 /* Actual reader. */
1268 SCM_DEFINE (scm_read
, "read", 0, 1, 0,
1270 "Read an s-expression from the input port @var{port}, or from\n"
1271 "the current input port if @var{port} is not specified.\n"
1272 "Any whitespace before the next token is discarded.")
1273 #define FUNC_NAME s_scm_read
1277 if (SCM_UNBNDP (port
))
1278 port
= scm_current_input_port ();
1279 SCM_VALIDATE_OPINPORT (1, port
);
1281 c
= flush_ws (port
, (char *) NULL
);
1284 scm_ungetc (c
, port
);
1286 return (scm_read_expression (port
));
1293 /* Used when recording expressions constructed by `scm_read_sharp ()'. */
1295 recsexpr (SCM obj
, long line
, int column
, SCM filename
)
1297 if (!scm_is_pair(obj
)) {
1300 SCM tmp
= obj
, copy
;
1301 /* If this sexpr is visible in the read:sharp source, we want to
1302 keep that information, so only record non-constant cons cells
1303 which haven't previously been read by the reader. */
1304 if (scm_is_false (scm_whash_lookup (scm_source_whash
, obj
)))
1306 if (SCM_COPY_SOURCE_P
)
1308 copy
= scm_cons (recsexpr (SCM_CAR (obj
), line
, column
, filename
),
1310 while ((tmp
= SCM_CDR (tmp
)) && scm_is_pair (tmp
))
1312 SCM_SETCDR (copy
, scm_cons (recsexpr (SCM_CAR (tmp
),
1317 copy
= SCM_CDR (copy
);
1319 SCM_SETCDR (copy
, tmp
);
1323 recsexpr (SCM_CAR (obj
), line
, column
, filename
);
1324 while ((tmp
= SCM_CDR (tmp
)) && scm_is_pair (tmp
))
1325 recsexpr (SCM_CAR (tmp
), line
, column
, filename
);
1326 copy
= SCM_UNDEFINED
;
1328 scm_whash_insert (scm_source_whash
,
1330 scm_make_srcprops (line
,
1340 /* Manipulate the read-hash-procedures alist. This could be written in
1341 Scheme, but maybe it will also be used by C code during initialisation. */
1342 SCM_DEFINE (scm_read_hash_extend
, "read-hash-extend", 2, 0, 0,
1343 (SCM chr
, SCM proc
),
1344 "Install the procedure @var{proc} for reading expressions\n"
1345 "starting with the character sequence @code{#} and @var{chr}.\n"
1346 "@var{proc} will be called with two arguments: the character\n"
1347 "@var{chr} and the port to read further data from. The object\n"
1348 "returned will be the return value of @code{read}. \n"
1349 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1351 #define FUNC_NAME s_scm_read_hash_extend
1356 SCM_VALIDATE_CHAR (1, chr
);
1357 SCM_ASSERT (scm_is_false (proc
)
1358 || scm_is_eq (scm_procedure_p (proc
), SCM_BOOL_T
),
1359 proc
, SCM_ARG2
, FUNC_NAME
);
1361 /* Check if chr is already in the alist. */
1362 this = *scm_read_hash_procedures
;
1366 if (scm_is_null (this))
1368 /* not found, so add it to the beginning. */
1369 if (scm_is_true (proc
))
1371 *scm_read_hash_procedures
=
1372 scm_cons (scm_cons (chr
, proc
), *scm_read_hash_procedures
);
1376 if (scm_is_eq (chr
, SCM_CAAR (this)))
1378 /* already in the alist. */
1379 if (scm_is_false (proc
))
1382 if (scm_is_false (prev
))
1384 *scm_read_hash_procedures
=
1385 SCM_CDR (*scm_read_hash_procedures
);
1388 scm_set_cdr_x (prev
, SCM_CDR (this));
1393 scm_set_cdr_x (SCM_CAR (this), proc
);
1398 this = SCM_CDR (this);
1401 return SCM_UNSPECIFIED
;
1405 /* Recover the read-hash procedure corresponding to char c. */
1407 scm_get_hash_procedure (int c
)
1409 SCM rest
= *scm_read_hash_procedures
;
1413 if (scm_is_null (rest
))
1416 if (SCM_CHAR (SCM_CAAR (rest
)) == c
)
1417 return SCM_CDAR (rest
);
1419 rest
= SCM_CDR (rest
);
1426 scm_read_hash_procedures
=
1427 SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL
));
1429 scm_init_opts (scm_read_options
, scm_read_opts
);
1430 #include "libguile/read.x"