1 /* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008, 2009 Free Software
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
33 #include "libguile/_scm.h"
34 #include "libguile/bytevectors.h"
35 #include "libguile/chars.h"
36 #include "libguile/eval.h"
37 #include "libguile/arrays.h"
38 #include "libguile/bitvectors.h"
39 #include "libguile/keywords.h"
40 #include "libguile/alist.h"
41 #include "libguile/srcprop.h"
42 #include "libguile/hashtab.h"
43 #include "libguile/hash.h"
44 #include "libguile/ports.h"
45 #include "libguile/fports.h"
46 #include "libguile/root.h"
47 #include "libguile/strings.h"
48 #include "libguile/strports.h"
49 #include "libguile/vectors.h"
50 #include "libguile/validate.h"
51 #include "libguile/srfi-4.h"
52 #include "libguile/srfi-13.h"
54 #include "libguile/read.h"
55 #include "libguile/private-options.h"
60 SCM_GLOBAL_SYMBOL (scm_sym_dot
, ".");
61 SCM_SYMBOL (scm_keyword_prefix
, "prefix");
62 SCM_SYMBOL (scm_keyword_postfix
, "postfix");
64 scm_t_option scm_read_opts
[] = {
65 { SCM_OPTION_BOOLEAN
, "copy", 0,
66 "Copy source code expressions." },
67 { SCM_OPTION_BOOLEAN
, "positions", 0,
68 "Record positions of source code expressions." },
69 { SCM_OPTION_BOOLEAN
, "case-insensitive", 0,
70 "Convert symbols to lower case."},
71 { SCM_OPTION_SCM
, "keywords", (unsigned long) SCM_BOOL_F
,
72 "Style of keyword recognition: #f, 'prefix or 'postfix."},
74 { SCM_OPTION_BOOLEAN
, "elisp-vectors", 0,
75 "Support Elisp vector syntax, namely `[...]'."},
76 { SCM_OPTION_BOOLEAN
, "elisp-strings", 0,
77 "Support `\\(' and `\\)' in strings."},
83 Give meaningful error messages for errors
87 FILE:LINE:COL: MESSAGE
90 This is not standard GNU format, but the test-suite likes the real
91 message to be in front.
97 scm_i_input_error (char const *function
,
98 SCM port
, const char *message
, SCM arg
)
100 SCM fn
= (scm_is_string (SCM_FILENAME(port
))
102 : scm_from_locale_string ("#<unknown port>"));
104 SCM string_port
= scm_open_output_string ();
105 SCM string
= SCM_EOL
;
106 scm_simple_format (string_port
,
107 scm_from_locale_string ("~A:~S:~S: ~A"),
109 scm_from_long (SCM_LINUM (port
) + 1),
110 scm_from_int (SCM_COL (port
) + 1),
111 scm_from_locale_string (message
)));
113 string
= scm_get_output_string (string_port
);
114 scm_close_output_port (string_port
);
115 scm_error_scm (scm_from_locale_symbol ("read-error"),
116 function
? scm_from_locale_string (function
) : SCM_BOOL_F
,
123 SCM_DEFINE (scm_read_options
, "read-options-interface", 0, 1, 0,
125 "Option interface for the read options. Instead of using\n"
126 "this procedure directly, use the procedures @code{read-enable},\n"
127 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
128 #define FUNC_NAME s_scm_read_options
130 SCM ans
= scm_options (setting
,
133 if (SCM_COPY_SOURCE_P
)
134 SCM_RECORD_POSITIONS_P
= 1;
139 /* An association list mapping extra hash characters to procedures. */
140 static SCM
*scm_read_hash_procedures
;
147 /* Size of the C buffer used to read symbols and numbers. */
148 #define READER_BUFFER_SIZE 128
150 /* Size of the C buffer used to read strings. */
151 #define READER_STRING_BUFFER_SIZE 512
153 /* The maximum size of Scheme character names. */
154 #define READER_CHAR_NAME_MAX_SIZE 50
157 /* `isblank' is only in C99. */
158 #define CHAR_IS_BLANK_(_chr) \
159 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
160 || ((_chr) == '\f') || ((_chr) == '\r'))
163 # define CHAR_IS_BLANK(_chr) \
164 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
166 # define CHAR_IS_BLANK CHAR_IS_BLANK_
170 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
172 #define CHAR_IS_R5RS_DELIMITER(c) \
174 || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
176 #define CHAR_IS_DELIMITER CHAR_IS_R5RS_DELIMITER
178 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
180 #define CHAR_IS_EXPONENT_MARKER(_chr) \
181 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
182 || ((_chr) == 'd') || ((_chr) == 'l'))
184 /* Read an SCSH block comment. */
185 static inline SCM
scm_read_scsh_block_comment (scm_t_wchar
, SCM
);
186 static SCM
scm_read_r6rs_block_comment (scm_t_wchar
, SCM
);
187 static SCM
scm_read_commented_expression (scm_t_wchar
, SCM
);
188 static SCM
scm_get_hash_procedure (int);
190 /* Read from PORT until a delimiter (e.g., a whitespace) is read. Return
191 zero if the whole token fits in BUF, non-zero otherwise. */
193 read_token (SCM port
, SCM buf
, size_t *read
)
198 while (*read
< scm_i_string_length (buf
))
200 chr
= scm_getc (port
);
205 chr
= (SCM_CASE_INSENSITIVE_P
? uc_tolower (chr
) : chr
);
207 if (CHAR_IS_DELIMITER (chr
))
209 scm_ungetc (chr
, port
);
213 scm_i_string_set_x (buf
, *read
, chr
);
221 read_complete_token (SCM port
, size_t *read
)
225 size_t overflow_read
;
228 buffer
= scm_i_make_string (READER_BUFFER_SIZE
, NULL
);
229 overflow
= read_token (port
, buffer
, read
);
232 tail
= scm_cons (buffer
, tail
);
233 buffer
= scm_i_make_string (READER_BUFFER_SIZE
, NULL
);
234 overflow
= read_token (port
, buffer
, &overflow_read
);
235 *read
+= overflow_read
;
238 if (scm_is_null (tail
))
239 return scm_i_substring (buffer
, 0, *read
);
241 return scm_string_append
242 (scm_reverse (scm_cons (scm_i_substring (buffer
, 0, overflow_read
),
246 /* Skip whitespace from PORT and return the first non-whitespace character
247 read. Raise an error on end-of-file. */
249 flush_ws (SCM port
, const char *eoferr
)
251 register scm_t_wchar c
;
253 switch (c
= scm_getc (port
))
259 scm_i_input_error (eoferr
,
268 switch (c
= scm_getc (port
))
274 case SCM_LINE_INCREMENTORS
:
280 switch (c
= scm_getc (port
))
283 eoferr
= "read_sharp";
286 scm_read_scsh_block_comment (c
, port
);
289 scm_read_commented_expression (c
, port
);
292 if (scm_is_false (scm_get_hash_procedure (c
)))
294 scm_read_r6rs_block_comment (c
, port
);
299 scm_ungetc (c
, port
);
304 case SCM_LINE_INCREMENTORS
:
305 case SCM_SINGLE_SPACES
:
320 static SCM
scm_read_expression (SCM port
);
321 static SCM
scm_read_sharp (int chr
, SCM port
);
322 static SCM
recsexpr (SCM obj
, long line
, int column
, SCM filename
);
326 scm_read_sexp (scm_t_wchar chr
, SCM port
)
327 #define FUNC_NAME "scm_i_lreadparen"
331 register SCM tl
, ans
= SCM_EOL
;
332 SCM tl2
= SCM_EOL
, ans2
= SCM_EOL
, copy
= SCM_BOOL_F
;
333 static const int terminating_char
= ')';
335 /* Need to capture line and column numbers here. */
336 long line
= SCM_LINUM (port
);
337 int column
= SCM_COL (port
) - 1;
340 c
= flush_ws (port
, FUNC_NAME
);
341 if (terminating_char
== c
)
344 scm_ungetc (c
, port
);
345 if (scm_is_eq (scm_sym_dot
,
346 (tmp
= scm_read_expression (port
))))
348 ans
= scm_read_expression (port
);
349 if (terminating_char
!= (c
= flush_ws (port
, FUNC_NAME
)))
350 scm_i_input_error (FUNC_NAME
, port
, "missing close paren",
355 /* Build the head of the list structure. */
356 ans
= tl
= scm_cons (tmp
, SCM_EOL
);
358 if (SCM_COPY_SOURCE_P
)
359 ans2
= tl2
= scm_cons (scm_is_pair (tmp
)
364 while (terminating_char
!= (c
= flush_ws (port
, FUNC_NAME
)))
368 scm_ungetc (c
, port
);
369 if (scm_is_eq (scm_sym_dot
,
370 (tmp
= scm_read_expression (port
))))
372 SCM_SETCDR (tl
, tmp
= scm_read_expression (port
));
374 if (SCM_COPY_SOURCE_P
)
375 SCM_SETCDR (tl2
, scm_cons (scm_is_pair (tmp
) ? copy
: tmp
,
378 c
= flush_ws (port
, FUNC_NAME
);
379 if (terminating_char
!= c
)
380 scm_i_input_error (FUNC_NAME
, port
,
381 "in pair: missing close paren", SCM_EOL
);
385 new_tail
= scm_cons (tmp
, SCM_EOL
);
386 SCM_SETCDR (tl
, new_tail
);
389 if (SCM_COPY_SOURCE_P
)
391 SCM new_tail2
= scm_cons (scm_is_pair (tmp
)
394 SCM_SETCDR (tl2
, new_tail2
);
400 if (SCM_RECORD_POSITIONS_P
)
401 scm_whash_insert (scm_source_whash
,
403 scm_make_srcprops (line
, column
,
414 scm_read_string (int chr
, SCM port
)
415 #define FUNC_NAME "scm_lreadr"
417 /* For strings smaller than C_STR, this function creates only one Scheme
418 object (the string returned). */
420 SCM str
= SCM_BOOL_F
;
421 unsigned c_str_len
= 0;
424 str
= scm_i_make_string (READER_STRING_BUFFER_SIZE
, NULL
);
425 while ('"' != (c
= scm_getc (port
)))
430 scm_i_input_error (FUNC_NAME
, port
,
431 "end of file in string constant", SCM_EOL
);
434 if (c_str_len
+ 1 >= scm_i_string_length (str
))
436 SCM addy
= scm_i_make_string (READER_STRING_BUFFER_SIZE
, NULL
);
438 str
= scm_string_append (scm_list_2 (str
, addy
));
443 switch (c
= scm_getc (port
))
453 if (SCM_ESCAPED_PARENS_P
)
489 if ('0' <= a
&& a
<= '9')
491 else if ('A' <= a
&& a
<= 'F')
493 else if ('a' <= a
&& a
<= 'f')
500 if ('0' <= b
&& b
<= '9')
502 else if ('A' <= b
&& b
<= 'F')
504 else if ('a' <= b
&& b
<= 'f')
519 for (i
= 0; i
< 4; i
++)
524 if ('0' <= a
&& a
<= '9')
526 else if ('A' <= a
&& a
<= 'F')
528 else if ('a' <= a
&& a
<= 'f')
544 for (i
= 0; i
< 6; i
++)
549 if ('0' <= a
&& a
<= '9')
551 else if ('A' <= a
&& a
<= 'F')
553 else if ('a' <= a
&& a
<= 'f')
566 scm_i_input_error (FUNC_NAME
, port
,
567 "illegal character in escape sequence: ~S",
568 scm_list_1 (SCM_MAKE_CHAR (c
)));
571 str
= scm_i_string_start_writing (str
);
572 scm_i_string_set_x (str
, c_str_len
++, c
);
573 scm_i_string_stop_writing ();
578 return scm_i_substring_copy (str
, 0, c_str_len
);
587 scm_read_number (scm_t_wchar chr
, SCM port
)
593 scm_ungetc (chr
, port
);
594 buffer
= read_complete_token (port
, &read
);
595 result
= scm_string_to_number (buffer
, SCM_UNDEFINED
);
596 if (!scm_is_true (result
))
597 /* Return a symbol instead of a number. */
598 result
= scm_string_to_symbol (buffer
);
604 scm_read_mixed_case_symbol (scm_t_wchar chr
, SCM port
)
607 int ends_with_colon
= 0;
610 int postfix
= scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE
), scm_keyword_postfix
);
612 scm_ungetc (chr
, port
);
613 buffer
= read_complete_token (port
, &read
);
615 ends_with_colon
= scm_i_string_ref (buffer
, read
- 1) == ':';
617 if (postfix
&& ends_with_colon
&& (read
> 1))
618 result
= scm_symbol_to_keyword (scm_string_to_symbol (scm_i_substring (buffer
, 0, read
- 1)));
620 result
= scm_string_to_symbol (buffer
);
626 scm_read_number_and_radix (scm_t_wchar chr
, SCM port
)
627 #define FUNC_NAME "scm_lreadr"
631 SCM buffer
= scm_i_make_string (READER_BUFFER_SIZE
, NULL
);
657 scm_ungetc (chr
, port
);
658 scm_ungetc ('#', port
);
662 buffer
= read_complete_token (port
, &read
);
663 result
= scm_string_to_number (buffer
, scm_from_uint (radix
));
665 if (scm_is_true (result
))
668 scm_i_input_error (FUNC_NAME
, port
, "unknown # object", SCM_EOL
);
675 scm_read_quote (int chr
, SCM port
)
678 long line
= SCM_LINUM (port
);
679 int column
= SCM_COL (port
) - 1;
684 p
= scm_sym_quasiquote
;
697 p
= scm_sym_uq_splicing
;
700 scm_ungetc (c
, port
);
707 fprintf (stderr
, "%s: unhandled quote character (%i)\n",
708 "scm_read_quote", chr
);
712 p
= scm_cons2 (p
, scm_read_expression (port
), SCM_EOL
);
713 if (SCM_RECORD_POSITIONS_P
)
714 scm_whash_insert (scm_source_whash
, p
,
715 scm_make_srcprops (line
, column
,
718 ? (scm_cons2 (SCM_CAR (p
),
719 SCM_CAR (SCM_CDR (p
)),
728 SCM_SYMBOL (sym_syntax
, "syntax");
729 SCM_SYMBOL (sym_quasisyntax
, "quasisyntax");
730 SCM_SYMBOL (sym_unsyntax
, "unsyntax");
731 SCM_SYMBOL (sym_unsyntax_splicing
, "unsyntax-splicing");
734 scm_read_syntax (int chr
, SCM port
)
737 long line
= SCM_LINUM (port
);
738 int column
= SCM_COL (port
) - 1;
756 p
= sym_unsyntax_splicing
;
759 scm_ungetc (c
, port
);
766 fprintf (stderr
, "%s: unhandled syntax character (%i)\n",
767 "scm_read_syntax", chr
);
771 p
= scm_cons2 (p
, scm_read_expression (port
), SCM_EOL
);
772 if (SCM_RECORD_POSITIONS_P
)
773 scm_whash_insert (scm_source_whash
, p
,
774 scm_make_srcprops (line
, column
,
777 ? (scm_cons2 (SCM_CAR (p
),
778 SCM_CAR (SCM_CDR (p
)),
788 scm_read_semicolon_comment (int chr
, SCM port
)
792 /* We use the get_byte here because there is no need to get the
793 locale correct with comment input. This presumes that newline
794 always represents itself no matter what the encoding is. */
795 for (c
= scm_get_byte_or_eof (port
);
796 (c
!= EOF
) && (c
!= '\n');
797 c
= scm_getc (port
));
799 return SCM_UNSPECIFIED
;
803 /* Sharp readers, i.e. readers called after a `#' sign has been read. */
806 scm_read_boolean (int chr
, SCM port
)
819 return SCM_UNSPECIFIED
;
823 scm_read_character (scm_t_wchar chr
, SCM port
)
824 #define FUNC_NAME "scm_lreadr"
826 SCM charname
= scm_i_make_string (READER_CHAR_NAME_MAX_SIZE
, NULL
);
831 overflow
= read_token (port
, charname
, &charname_len
);
832 charname
= scm_c_substring (charname
, 0, charname_len
);
837 if (charname_len
== 0)
839 chr
= scm_getc (port
);
841 scm_i_input_error (FUNC_NAME
, port
, "unexpected end of file "
842 "while reading character", SCM_EOL
);
844 /* CHR must be a token delimiter, like a whitespace. */
845 return (SCM_MAKE_CHAR (chr
));
848 if (charname_len
== 1)
849 return SCM_MAKE_CHAR (scm_i_string_ref (charname
, 0));
851 cp
= scm_i_string_ref (charname
, 0);
852 if (cp
== SCM_CODEPOINT_DOTTED_CIRCLE
&& charname_len
== 2)
853 return SCM_MAKE_CHAR (scm_i_string_ref (charname
, 1));
855 if (cp
>= '0' && cp
< '8')
857 /* Dirk:FIXME:: This type of character syntax is not R5RS
858 * compliant. Further, it should be verified that the constant
859 * does only consist of octal digits. */
860 SCM p
= scm_string_to_number (charname
, scm_from_uint (8));
863 scm_t_wchar c
= SCM_I_INUM (p
);
864 if (SCM_IS_UNICODE_CHAR (c
))
865 return SCM_MAKE_CHAR (c
);
867 scm_i_input_error (FUNC_NAME
, port
,
868 "out-of-range octal character escape: ~a",
869 scm_list_1 (charname
));
873 /* The names of characters should never have non-Latin1
875 if (scm_i_is_narrow_string (charname
)
876 || scm_i_try_narrow_string (charname
))
877 { SCM ch
= scm_i_charname_to_char (scm_i_string_chars (charname
),
879 if (scm_is_true (ch
))
884 scm_i_input_error (FUNC_NAME
, port
, "unknown character name ~a",
885 scm_list_1 (charname
));
887 return SCM_UNSPECIFIED
;
892 scm_read_keyword (int chr
, SCM port
)
896 /* Read the symbol that comprises the keyword. Doing this instead of
897 invoking a specific symbol reader function allows `scm_read_keyword ()'
898 to adapt to the delimiters currently valid of symbols.
900 XXX: This implementation allows sloppy syntaxes like `#: key'. */
901 symbol
= scm_read_expression (port
);
902 if (!scm_is_symbol (symbol
))
903 scm_i_input_error ("scm_read_keyword", port
,
904 "keyword prefix `~a' not followed by a symbol: ~s",
905 scm_list_2 (SCM_MAKE_CHAR (chr
), symbol
));
907 return (scm_symbol_to_keyword (symbol
));
911 scm_read_vector (int chr
, SCM port
)
913 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
914 guarantee that it's going to do what we want. After all, this is an
915 implementation detail of `scm_read_vector ()', not a desirable
917 return (scm_vector (scm_read_sexp (chr
, port
)));
921 scm_read_srfi4_vector (int chr
, SCM port
)
923 return scm_i_read_array (port
, chr
);
927 scm_read_bytevector (scm_t_wchar chr
, SCM port
)
929 chr
= scm_getc (port
);
933 chr
= scm_getc (port
);
937 chr
= scm_getc (port
);
941 return scm_u8_list_to_bytevector (scm_read_sexp (chr
, port
));
944 scm_i_input_error ("read_bytevector", port
,
945 "invalid bytevector prefix",
946 SCM_MAKE_CHAR (chr
));
947 return SCM_UNSPECIFIED
;
951 scm_read_guile_bit_vector (scm_t_wchar chr
, SCM port
)
953 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
954 terribly inefficient but who cares? */
955 SCM s_bits
= SCM_EOL
;
957 for (chr
= scm_getc (port
);
958 (chr
!= EOF
) && ((chr
== '0') || (chr
== '1'));
959 chr
= scm_getc (port
))
961 s_bits
= scm_cons ((chr
== '0') ? SCM_BOOL_F
: SCM_BOOL_T
, s_bits
);
965 scm_ungetc (chr
, port
);
967 return scm_bitvector (scm_reverse_x (s_bits
, SCM_EOL
));
971 scm_read_scsh_block_comment (scm_t_wchar chr
, SCM port
)
975 /* We can use the get_byte here because there is no need to get the
976 locale correct when reading comments. This presumes that
977 hash and exclamation points always represent themselves no
978 matter what the source encoding is.*/
981 int c
= scm_get_byte_or_eof (port
);
984 scm_i_input_error ("skip_block_comment", port
,
985 "unterminated `#! ... !#' comment", SCM_EOL
);
989 else if (c
== '#' && bang_seen
)
995 return SCM_UNSPECIFIED
;
999 scm_read_r6rs_block_comment (scm_t_wchar chr
, SCM port
)
1001 /* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
1002 nested. So care must be taken. */
1003 int nesting_level
= 1;
1004 int opening_seen
= 0, closing_seen
= 0;
1006 while (nesting_level
> 0)
1008 int c
= scm_getc (port
);
1011 scm_i_input_error ("scm_read_r6rs_block_comment", port
,
1012 "unterminated `#| ... |#' comment", SCM_EOL
);
1020 else if (closing_seen
)
1031 opening_seen
= closing_seen
= 0;
1034 return SCM_UNSPECIFIED
;
1038 scm_read_commented_expression (scm_t_wchar chr
, SCM port
)
1042 c
= flush_ws (port
, (char *) NULL
);
1044 scm_i_input_error ("read_commented_expression", port
,
1045 "no expression after #; comment", SCM_EOL
);
1046 scm_ungetc (c
, port
);
1047 scm_read_expression (port
);
1048 return SCM_UNSPECIFIED
;
1052 scm_read_extended_symbol (scm_t_wchar chr
, SCM port
)
1054 /* Guile's extended symbol read syntax looks like this:
1056 #{This is all a symbol name}#
1058 So here, CHR is expected to be `{'. */
1059 int saw_brace
= 0, finished
= 0;
1061 SCM buf
= scm_i_make_string (1024, NULL
);
1063 buf
= scm_i_string_start_writing (buf
);
1065 while ((chr
= scm_getc (port
)) != EOF
)
1077 scm_i_string_set_x (buf
, len
++, '}');
1078 scm_i_string_set_x (buf
, len
++, chr
);
1081 else if (chr
== '}')
1084 scm_i_string_set_x (buf
, len
++, chr
);
1086 if (len
>= scm_i_string_length (buf
) - 2)
1090 scm_i_string_stop_writing ();
1091 addy
= scm_i_make_string (1024, NULL
);
1092 buf
= scm_string_append (scm_list_2 (buf
, addy
));
1094 buf
= scm_i_string_start_writing (buf
);
1100 scm_i_string_stop_writing ();
1102 return (scm_string_to_symbol (scm_c_substring (buf
, 0, len
)));
1107 /* Top-level token readers, i.e., dispatchers. */
1110 scm_read_sharp_extension (int chr
, SCM port
)
1114 proc
= scm_get_hash_procedure (chr
);
1115 if (scm_is_true (scm_procedure_p (proc
)))
1117 long line
= SCM_LINUM (port
);
1118 int column
= SCM_COL (port
) - 2;
1121 got
= scm_call_2 (proc
, SCM_MAKE_CHAR (chr
), port
);
1122 if (!scm_is_eq (got
, SCM_UNSPECIFIED
))
1124 if (SCM_RECORD_POSITIONS_P
)
1125 return (recsexpr (got
, line
, column
,
1126 SCM_FILENAME (port
)));
1132 return SCM_UNSPECIFIED
;
1135 /* The reader for the sharp `#' character. It basically dispatches reads
1136 among the above token readers. */
1138 scm_read_sharp (scm_t_wchar chr
, SCM port
)
1139 #define FUNC_NAME "scm_lreadr"
1143 chr
= scm_getc (port
);
1145 result
= scm_read_sharp_extension (chr
, port
);
1146 if (!scm_is_eq (result
, SCM_UNSPECIFIED
))
1152 return (scm_read_character (chr
, port
));
1154 return (scm_read_vector (chr
, port
));
1158 /* This one may return either a boolean or an SRFI-4 vector. */
1159 return (scm_read_srfi4_vector (chr
, port
));
1161 return (scm_read_bytevector (chr
, port
));
1163 return (scm_read_guile_bit_vector (chr
, port
));
1167 /* This one may return either a boolean or an SRFI-4 vector. */
1168 return (scm_read_boolean (chr
, port
));
1170 return (scm_read_keyword (chr
, port
));
1171 case '0': case '1': case '2': case '3': case '4':
1172 case '5': case '6': case '7': case '8': case '9':
1174 #if SCM_ENABLE_DEPRECATED
1175 /* See below for 'i' and 'e'. */
1182 return (scm_i_read_array (port
, chr
));
1186 #if SCM_ENABLE_DEPRECATED
1188 /* When next char is '(', it really is an old-style
1190 scm_t_wchar next_c
= scm_getc (port
);
1192 scm_ungetc (next_c
, port
);
1194 return scm_i_read_array (port
, chr
);
1208 return (scm_read_number_and_radix (chr
, port
));
1210 return (scm_read_extended_symbol (chr
, port
));
1212 return (scm_read_scsh_block_comment (chr
, port
));
1214 return (scm_read_commented_expression (chr
, port
));
1218 return (scm_read_syntax (chr
, port
));
1220 result
= scm_read_sharp_extension (chr
, port
);
1221 if (scm_is_eq (result
, SCM_UNSPECIFIED
))
1223 /* To remain compatible with 1.8 and earlier, the following
1224 characters have lower precedence than `read-hash-extend'
1229 return scm_read_r6rs_block_comment (chr
, port
);
1231 scm_i_input_error (FUNC_NAME
, port
, "Unknown # object: ~S",
1232 scm_list_1 (SCM_MAKE_CHAR (chr
)));
1239 return SCM_UNSPECIFIED
;
1244 scm_read_expression (SCM port
)
1245 #define FUNC_NAME "scm_read_expression"
1249 register scm_t_wchar chr
;
1251 chr
= scm_getc (port
);
1255 case SCM_WHITE_SPACES
:
1256 case SCM_LINE_INCREMENTORS
:
1259 (void) scm_read_semicolon_comment (chr
, port
);
1262 return (scm_read_sexp (chr
, port
));
1264 return (scm_read_string (chr
, port
));
1268 return (scm_read_quote (chr
, port
));
1272 result
= scm_read_sharp (chr
, port
);
1273 if (scm_is_eq (result
, SCM_UNSPECIFIED
))
1274 /* We read a comment or some such. */
1280 scm_i_input_error (FUNC_NAME
, port
, "unexpected \")\"", SCM_EOL
);
1285 if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE
), scm_keyword_prefix
))
1286 return scm_symbol_to_keyword (scm_read_expression (port
));
1291 if (((chr
>= '0') && (chr
<= '9'))
1292 || (strchr ("+-.", chr
)))
1293 return (scm_read_number (chr
, port
));
1295 return (scm_read_mixed_case_symbol (chr
, port
));
1303 /* Actual reader. */
1305 SCM_DEFINE (scm_read
, "read", 0, 1, 0,
1307 "Read an s-expression from the input port @var{port}, or from\n"
1308 "the current input port if @var{port} is not specified.\n"
1309 "Any whitespace before the next token is discarded.")
1310 #define FUNC_NAME s_scm_read
1314 if (SCM_UNBNDP (port
))
1315 port
= scm_current_input_port ();
1316 SCM_VALIDATE_OPINPORT (1, port
);
1318 c
= flush_ws (port
, (char *) NULL
);
1321 scm_ungetc (c
, port
);
1323 return (scm_read_expression (port
));
1330 /* Used when recording expressions constructed by `scm_read_sharp ()'. */
1332 recsexpr (SCM obj
, long line
, int column
, SCM filename
)
1334 if (!scm_is_pair(obj
)) {
1337 SCM tmp
= obj
, copy
;
1338 /* If this sexpr is visible in the read:sharp source, we want to
1339 keep that information, so only record non-constant cons cells
1340 which haven't previously been read by the reader. */
1341 if (scm_is_false (scm_whash_lookup (scm_source_whash
, obj
)))
1343 if (SCM_COPY_SOURCE_P
)
1345 copy
= scm_cons (recsexpr (SCM_CAR (obj
), line
, column
, filename
),
1347 while ((tmp
= SCM_CDR (tmp
)) && scm_is_pair (tmp
))
1349 SCM_SETCDR (copy
, scm_cons (recsexpr (SCM_CAR (tmp
),
1354 copy
= SCM_CDR (copy
);
1356 SCM_SETCDR (copy
, tmp
);
1360 recsexpr (SCM_CAR (obj
), line
, column
, filename
);
1361 while ((tmp
= SCM_CDR (tmp
)) && scm_is_pair (tmp
))
1362 recsexpr (SCM_CAR (tmp
), line
, column
, filename
);
1363 copy
= SCM_UNDEFINED
;
1365 scm_whash_insert (scm_source_whash
,
1367 scm_make_srcprops (line
,
1377 /* Manipulate the read-hash-procedures alist. This could be written in
1378 Scheme, but maybe it will also be used by C code during initialisation. */
1379 SCM_DEFINE (scm_read_hash_extend
, "read-hash-extend", 2, 0, 0,
1380 (SCM chr
, SCM proc
),
1381 "Install the procedure @var{proc} for reading expressions\n"
1382 "starting with the character sequence @code{#} and @var{chr}.\n"
1383 "@var{proc} will be called with two arguments: the character\n"
1384 "@var{chr} and the port to read further data from. The object\n"
1385 "returned will be the return value of @code{read}. \n"
1386 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1388 #define FUNC_NAME s_scm_read_hash_extend
1393 SCM_VALIDATE_CHAR (1, chr
);
1394 SCM_ASSERT (scm_is_false (proc
)
1395 || scm_is_eq (scm_procedure_p (proc
), SCM_BOOL_T
),
1396 proc
, SCM_ARG2
, FUNC_NAME
);
1398 /* Check if chr is already in the alist. */
1399 this = *scm_read_hash_procedures
;
1403 if (scm_is_null (this))
1405 /* not found, so add it to the beginning. */
1406 if (scm_is_true (proc
))
1408 *scm_read_hash_procedures
=
1409 scm_cons (scm_cons (chr
, proc
), *scm_read_hash_procedures
);
1413 if (scm_is_eq (chr
, SCM_CAAR (this)))
1415 /* already in the alist. */
1416 if (scm_is_false (proc
))
1419 if (scm_is_false (prev
))
1421 *scm_read_hash_procedures
=
1422 SCM_CDR (*scm_read_hash_procedures
);
1425 scm_set_cdr_x (prev
, SCM_CDR (this));
1430 scm_set_cdr_x (SCM_CAR (this), proc
);
1435 this = SCM_CDR (this);
1438 return SCM_UNSPECIFIED
;
1442 /* Recover the read-hash procedure corresponding to char c. */
1444 scm_get_hash_procedure (int c
)
1446 SCM rest
= *scm_read_hash_procedures
;
1450 if (scm_is_null (rest
))
1453 if (SCM_CHAR (SCM_CAAR (rest
)) == c
)
1454 return SCM_CDAR (rest
);
1456 rest
= SCM_CDR (rest
);
1460 #define SCM_ENCODING_SEARCH_SIZE (500)
1462 /* Search the first few hundred characters of a file for an Emacs-like coding
1463 declaration. Returns either NULL or a string whose storage has been
1464 allocated with `scm_gc_malloc ()'. */
1466 scm_i_scan_for_encoding (SCM port
)
1468 char header
[SCM_ENCODING_SEARCH_SIZE
+1];
1470 char *encoding
= NULL
;
1476 if (SCM_FPORTP (port
) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port
)))
1477 /* PORT is a non-seekable file port (e.g., as created by Bash when using
1478 "guile <(echo '(display "hello")')") so bail out. */
1481 bytes_read
= scm_c_read (port
, header
, SCM_ENCODING_SEARCH_SIZE
);
1483 scm_seek (port
, scm_from_int (0), scm_from_int (SEEK_SET
));
1486 && header
[0] == '\xef' && header
[1] == '\xbb' && header
[2] == '\xbf')
1489 /* search past "coding[:=]" */
1493 if ((pos
= strstr(pos
, "coding")) == NULL
)
1496 pos
+= strlen("coding");
1497 if (pos
- header
>= SCM_ENCODING_SEARCH_SIZE
||
1498 (*pos
== ':' || *pos
== '='))
1506 while (pos
- header
<= SCM_ENCODING_SEARCH_SIZE
&&
1507 (*pos
== ' ' || *pos
== '\t'))
1510 /* grab the next token */
1512 while (pos
+ i
- header
<= SCM_ENCODING_SEARCH_SIZE
1513 && pos
+ i
- header
< bytes_read
1514 && (isalnum ((int) pos
[i
]) || strchr ("_-.:/,+=()", pos
[i
]) != NULL
))
1520 encoding
= scm_gc_strndup (pos
, i
, "encoding");
1521 for (i
= 0; i
< strlen (encoding
); i
++)
1522 encoding
[i
] = toupper ((int) encoding
[i
]);
1524 /* push backwards to make sure we were in a comment */
1526 while (pos
- i
- header
> 0)
1528 if (*(pos
- i
) == '\n')
1530 /* This wasn't in a semicolon comment. Check for a
1531 hash-bang comment. */
1532 char *beg
= strstr (header
, "#!");
1533 char *end
= strstr (header
, "!#");
1534 if (beg
< pos
&& pos
< end
)
1538 if (*(pos
- i
) == ';')
1546 /* This wasn't in a comment */
1549 if (utf8_bom
&& strcmp(encoding
, "UTF-8"))
1550 scm_misc_error (NULL
,
1551 "the port input declares the encoding ~s but is encoded as UTF-8",
1552 scm_list_1 (scm_from_locale_string (encoding
)));
1557 SCM_DEFINE (scm_file_encoding
, "file-encoding", 1, 0, 0,
1559 "Scans the port for an Emacs-like character coding declaration\n"
1560 "near the top of the contents of a port with random-acessible contents.\n"
1561 "The coding declaration is of the form\n"
1562 "@code{coding: XXXXX} and must appear in a scheme comment.\n"
1564 "Returns a string containing the character encoding of the file\n"
1565 "if a declaration was found, or @code{#f} otherwise.\n")
1566 #define FUNC_NAME s_scm_file_encoding
1571 enc
= scm_i_scan_for_encoding (port
);
1576 s_enc
= scm_from_locale_string (enc
);
1587 scm_read_hash_procedures
=
1588 SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL
));
1590 scm_init_opts (scm_read_options
, scm_read_opts
);
1591 #include "libguile/read.x"