1 /* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008, 2009 Free Software
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
33 #include "libguile/_scm.h"
34 #include "libguile/bytevectors.h"
35 #include "libguile/chars.h"
36 #include "libguile/eval.h"
37 #include "libguile/arrays.h"
38 #include "libguile/bitvectors.h"
39 #include "libguile/keywords.h"
40 #include "libguile/alist.h"
41 #include "libguile/srcprop.h"
42 #include "libguile/hashtab.h"
43 #include "libguile/hash.h"
44 #include "libguile/ports.h"
45 #include "libguile/root.h"
46 #include "libguile/strings.h"
47 #include "libguile/strports.h"
48 #include "libguile/vectors.h"
49 #include "libguile/validate.h"
50 #include "libguile/srfi-4.h"
51 #include "libguile/srfi-13.h"
53 #include "libguile/read.h"
54 #include "libguile/private-options.h"
59 SCM_GLOBAL_SYMBOL (scm_sym_dot
, ".");
60 SCM_SYMBOL (scm_keyword_prefix
, "prefix");
61 SCM_SYMBOL (scm_keyword_postfix
, "postfix");
63 scm_t_option scm_read_opts
[] = {
64 { SCM_OPTION_BOOLEAN
, "copy", 0,
65 "Copy source code expressions." },
66 { SCM_OPTION_BOOLEAN
, "positions", 0,
67 "Record positions of source code expressions." },
68 { SCM_OPTION_BOOLEAN
, "case-insensitive", 0,
69 "Convert symbols to lower case."},
70 { SCM_OPTION_SCM
, "keywords", SCM_UNPACK (SCM_BOOL_F
),
71 "Style of keyword recognition: #f, 'prefix or 'postfix."},
73 { SCM_OPTION_BOOLEAN
, "elisp-vectors", 0,
74 "Support Elisp vector syntax, namely `[...]'."},
75 { SCM_OPTION_BOOLEAN
, "elisp-strings", 0,
76 "Support `\\(' and `\\)' in strings."},
82 Give meaningful error messages for errors
86 FILE:LINE:COL: MESSAGE
89 This is not standard GNU format, but the test-suite likes the real
90 message to be in front.
96 scm_i_input_error (char const *function
,
97 SCM port
, const char *message
, SCM arg
)
99 SCM fn
= (scm_is_string (SCM_FILENAME(port
))
101 : scm_from_locale_string ("#<unknown port>"));
103 SCM string_port
= scm_open_output_string ();
104 SCM string
= SCM_EOL
;
105 scm_simple_format (string_port
,
106 scm_from_locale_string ("~A:~S:~S: ~A"),
108 scm_from_long (SCM_LINUM (port
) + 1),
109 scm_from_int (SCM_COL (port
) + 1),
110 scm_from_locale_string (message
)));
112 string
= scm_get_output_string (string_port
);
113 scm_close_output_port (string_port
);
114 scm_error_scm (scm_from_locale_symbol ("read-error"),
115 function
? scm_from_locale_string (function
) : SCM_BOOL_F
,
122 SCM_DEFINE (scm_read_options
, "read-options-interface", 0, 1, 0,
124 "Option interface for the read options. Instead of using\n"
125 "this procedure directly, use the procedures @code{read-enable},\n"
126 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
127 #define FUNC_NAME s_scm_read_options
129 SCM ans
= scm_options (setting
,
132 if (SCM_COPY_SOURCE_P
)
133 SCM_RECORD_POSITIONS_P
= 1;
138 /* An association list mapping extra hash characters to procedures. */
139 static SCM
*scm_read_hash_procedures
;
146 /* Size of the C buffer used to read symbols and numbers. */
147 #define READER_BUFFER_SIZE 128
149 /* Size of the C buffer used to read strings. */
150 #define READER_STRING_BUFFER_SIZE 512
152 /* The maximum size of Scheme character names. */
153 #define READER_CHAR_NAME_MAX_SIZE 50
156 /* `isblank' is only in C99. */
157 #define CHAR_IS_BLANK_(_chr) \
158 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
159 || ((_chr) == '\f') || ((_chr) == '\r'))
162 # define CHAR_IS_BLANK(_chr) \
163 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
165 # define CHAR_IS_BLANK CHAR_IS_BLANK_
169 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
171 #define CHAR_IS_R5RS_DELIMITER(c) \
173 || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
175 #define CHAR_IS_DELIMITER CHAR_IS_R5RS_DELIMITER
177 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
179 #define CHAR_IS_EXPONENT_MARKER(_chr) \
180 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
181 || ((_chr) == 'd') || ((_chr) == 'l'))
183 /* Read an SCSH block comment. */
184 static inline SCM
scm_read_scsh_block_comment (scm_t_wchar
, SCM
);
185 static SCM
scm_read_r6rs_block_comment (scm_t_wchar
, SCM
);
186 static SCM
scm_read_commented_expression (scm_t_wchar
, SCM
);
187 static SCM
scm_get_hash_procedure (int);
189 /* Read from PORT until a delimiter (e.g., a whitespace) is read. Return
190 zero if the whole token fits in BUF, non-zero otherwise. */
192 read_token (SCM port
, SCM buf
, size_t *read
)
197 buf
= scm_i_string_start_writing (buf
);
198 while (*read
< scm_i_string_length (buf
))
200 chr
= scm_getc (port
);
204 scm_i_string_stop_writing ();
208 chr
= (SCM_CASE_INSENSITIVE_P
? uc_tolower (chr
) : chr
);
210 if (CHAR_IS_DELIMITER (chr
))
212 scm_i_string_stop_writing ();
213 scm_ungetc (chr
, port
);
217 scm_i_string_set_x (buf
, *read
, chr
);
220 scm_i_string_stop_writing ();
226 read_complete_token (SCM port
, size_t *read
)
228 SCM buffer
, str
= SCM_EOL
;
232 buffer
= scm_i_make_string (READER_BUFFER_SIZE
, NULL
);
233 overflow
= read_token (port
, buffer
, read
);
235 return scm_i_substring (buffer
, 0, *read
);
237 str
= scm_string_copy (buffer
);
240 overflow
= read_token (port
, buffer
, &len
);
241 str
= scm_string_append (scm_list_2 (str
, buffer
));
246 return scm_i_substring (str
, 0, *read
);
249 /* Skip whitespace from PORT and return the first non-whitespace character
250 read. Raise an error on end-of-file. */
252 flush_ws (SCM port
, const char *eoferr
)
254 register scm_t_wchar c
;
256 switch (c
= scm_getc (port
))
262 scm_i_input_error (eoferr
,
271 switch (c
= scm_getc (port
))
277 case SCM_LINE_INCREMENTORS
:
283 switch (c
= scm_getc (port
))
286 eoferr
= "read_sharp";
289 scm_read_scsh_block_comment (c
, port
);
292 scm_read_commented_expression (c
, port
);
295 if (scm_is_false (scm_get_hash_procedure (c
)))
297 scm_read_r6rs_block_comment (c
, port
);
302 scm_ungetc (c
, port
);
307 case SCM_LINE_INCREMENTORS
:
308 case SCM_SINGLE_SPACES
:
323 static SCM
scm_read_expression (SCM port
);
324 static SCM
scm_read_sharp (int chr
, SCM port
);
325 static SCM
recsexpr (SCM obj
, long line
, int column
, SCM filename
);
329 scm_read_sexp (scm_t_wchar chr
, SCM port
)
330 #define FUNC_NAME "scm_i_lreadparen"
334 register SCM tl
, ans
= SCM_EOL
;
335 SCM tl2
= SCM_EOL
, ans2
= SCM_EOL
, copy
= SCM_BOOL_F
;
336 static const int terminating_char
= ')';
338 /* Need to capture line and column numbers here. */
339 long line
= SCM_LINUM (port
);
340 int column
= SCM_COL (port
) - 1;
343 c
= flush_ws (port
, FUNC_NAME
);
344 if (terminating_char
== c
)
347 scm_ungetc (c
, port
);
348 if (scm_is_eq (scm_sym_dot
,
349 (tmp
= scm_read_expression (port
))))
351 ans
= scm_read_expression (port
);
352 if (terminating_char
!= (c
= flush_ws (port
, FUNC_NAME
)))
353 scm_i_input_error (FUNC_NAME
, port
, "missing close paren",
358 /* Build the head of the list structure. */
359 ans
= tl
= scm_cons (tmp
, SCM_EOL
);
361 if (SCM_COPY_SOURCE_P
)
362 ans2
= tl2
= scm_cons (scm_is_pair (tmp
)
367 while (terminating_char
!= (c
= flush_ws (port
, FUNC_NAME
)))
371 scm_ungetc (c
, port
);
372 if (scm_is_eq (scm_sym_dot
,
373 (tmp
= scm_read_expression (port
))))
375 SCM_SETCDR (tl
, tmp
= scm_read_expression (port
));
377 if (SCM_COPY_SOURCE_P
)
378 SCM_SETCDR (tl2
, scm_cons (scm_is_pair (tmp
) ? copy
: tmp
,
381 c
= flush_ws (port
, FUNC_NAME
);
382 if (terminating_char
!= c
)
383 scm_i_input_error (FUNC_NAME
, port
,
384 "in pair: missing close paren", SCM_EOL
);
388 new_tail
= scm_cons (tmp
, SCM_EOL
);
389 SCM_SETCDR (tl
, new_tail
);
392 if (SCM_COPY_SOURCE_P
)
394 SCM new_tail2
= scm_cons (scm_is_pair (tmp
)
397 SCM_SETCDR (tl2
, new_tail2
);
403 if (SCM_RECORD_POSITIONS_P
)
404 scm_whash_insert (scm_source_whash
,
406 scm_make_srcprops (line
, column
,
417 scm_read_string (int chr
, SCM port
)
418 #define FUNC_NAME "scm_lreadr"
420 /* For strings smaller than C_STR, this function creates only one Scheme
421 object (the string returned). */
423 SCM str
= SCM_BOOL_F
;
424 unsigned c_str_len
= 0;
427 str
= scm_i_make_string (READER_STRING_BUFFER_SIZE
, NULL
);
428 while ('"' != (c
= scm_getc (port
)))
433 scm_i_input_error (FUNC_NAME
, port
,
434 "end of file in string constant", SCM_EOL
);
437 if (c_str_len
+ 1 >= scm_i_string_length (str
))
439 SCM addy
= scm_i_make_string (READER_STRING_BUFFER_SIZE
, NULL
);
441 str
= scm_string_append (scm_list_2 (str
, addy
));
446 switch (c
= scm_getc (port
))
456 if (SCM_ESCAPED_PARENS_P
)
492 if ('0' <= a
&& a
<= '9')
494 else if ('A' <= a
&& a
<= 'F')
496 else if ('a' <= a
&& a
<= 'f')
503 if ('0' <= b
&& b
<= '9')
505 else if ('A' <= b
&& b
<= 'F')
507 else if ('a' <= b
&& b
<= 'f')
522 for (i
= 0; i
< 4; i
++)
527 if ('0' <= a
&& a
<= '9')
529 else if ('A' <= a
&& a
<= 'F')
531 else if ('a' <= a
&& a
<= 'f')
547 for (i
= 0; i
< 6; i
++)
552 if ('0' <= a
&& a
<= '9')
554 else if ('A' <= a
&& a
<= 'F')
556 else if ('a' <= a
&& a
<= 'f')
569 scm_i_input_error (FUNC_NAME
, port
,
570 "illegal character in escape sequence: ~S",
571 scm_list_1 (SCM_MAKE_CHAR (c
)));
574 str
= scm_i_string_start_writing (str
);
575 scm_i_string_set_x (str
, c_str_len
++, c
);
576 scm_i_string_stop_writing ();
581 return scm_i_substring_copy (str
, 0, c_str_len
);
590 scm_read_number (scm_t_wchar chr
, SCM port
)
596 scm_ungetc (chr
, port
);
597 buffer
= read_complete_token (port
, &read
);
598 result
= scm_string_to_number (buffer
, SCM_UNDEFINED
);
599 if (!scm_is_true (result
))
600 /* Return a symbol instead of a number. */
601 result
= scm_string_to_symbol (buffer
);
607 scm_read_mixed_case_symbol (scm_t_wchar chr
, SCM port
)
610 int ends_with_colon
= 0;
613 int postfix
= scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE
), scm_keyword_postfix
);
615 scm_ungetc (chr
, port
);
616 buffer
= read_complete_token (port
, &read
);
618 ends_with_colon
= scm_i_string_ref (buffer
, read
- 1) == ':';
620 if (postfix
&& ends_with_colon
&& (read
> 1))
621 result
= scm_symbol_to_keyword (scm_string_to_symbol (scm_i_substring (buffer
, 0, read
- 1)));
623 result
= scm_string_to_symbol (buffer
);
629 scm_read_number_and_radix (scm_t_wchar chr
, SCM port
)
630 #define FUNC_NAME "scm_lreadr"
634 SCM buffer
= scm_i_make_string (READER_BUFFER_SIZE
, NULL
);
660 scm_ungetc (chr
, port
);
661 scm_ungetc ('#', port
);
665 buffer
= read_complete_token (port
, &read
);
666 result
= scm_string_to_number (buffer
, scm_from_uint (radix
));
668 if (scm_is_true (result
))
671 scm_i_input_error (FUNC_NAME
, port
, "unknown # object", SCM_EOL
);
678 scm_read_quote (int chr
, SCM port
)
681 long line
= SCM_LINUM (port
);
682 int column
= SCM_COL (port
) - 1;
687 p
= scm_sym_quasiquote
;
700 p
= scm_sym_uq_splicing
;
703 scm_ungetc (c
, port
);
710 fprintf (stderr
, "%s: unhandled quote character (%i)\n",
711 "scm_read_quote", chr
);
715 p
= scm_cons2 (p
, scm_read_expression (port
), SCM_EOL
);
716 if (SCM_RECORD_POSITIONS_P
)
717 scm_whash_insert (scm_source_whash
, p
,
718 scm_make_srcprops (line
, column
,
721 ? (scm_cons2 (SCM_CAR (p
),
722 SCM_CAR (SCM_CDR (p
)),
731 SCM_SYMBOL (sym_syntax
, "syntax");
732 SCM_SYMBOL (sym_quasisyntax
, "quasisyntax");
733 SCM_SYMBOL (sym_unsyntax
, "unsyntax");
734 SCM_SYMBOL (sym_unsyntax_splicing
, "unsyntax-splicing");
737 scm_read_syntax (int chr
, SCM port
)
740 long line
= SCM_LINUM (port
);
741 int column
= SCM_COL (port
) - 1;
759 p
= sym_unsyntax_splicing
;
762 scm_ungetc (c
, port
);
769 fprintf (stderr
, "%s: unhandled syntax character (%i)\n",
770 "scm_read_syntax", chr
);
774 p
= scm_cons2 (p
, scm_read_expression (port
), SCM_EOL
);
775 if (SCM_RECORD_POSITIONS_P
)
776 scm_whash_insert (scm_source_whash
, p
,
777 scm_make_srcprops (line
, column
,
780 ? (scm_cons2 (SCM_CAR (p
),
781 SCM_CAR (SCM_CDR (p
)),
791 scm_read_semicolon_comment (int chr
, SCM port
)
795 /* We use the get_byte here because there is no need to get the
796 locale correct with comment input. This presumes that newline
797 always represents itself no matter what the encoding is. */
798 for (c
= scm_get_byte_or_eof (port
);
799 (c
!= EOF
) && (c
!= '\n');
800 c
= scm_getc (port
));
802 return SCM_UNSPECIFIED
;
806 /* Sharp readers, i.e. readers called after a `#' sign has been read. */
809 scm_read_boolean (int chr
, SCM port
)
822 return SCM_UNSPECIFIED
;
826 scm_read_character (scm_t_wchar chr
, SCM port
)
827 #define FUNC_NAME "scm_lreadr"
829 SCM charname
= scm_i_make_string (READER_CHAR_NAME_MAX_SIZE
, NULL
);
834 overflow
= read_token (port
, charname
, &charname_len
);
835 charname
= scm_c_substring (charname
, 0, charname_len
);
840 if (charname_len
== 0)
842 chr
= scm_getc (port
);
844 scm_i_input_error (FUNC_NAME
, port
, "unexpected end of file "
845 "while reading character", SCM_EOL
);
847 /* CHR must be a token delimiter, like a whitespace. */
848 return (SCM_MAKE_CHAR (chr
));
851 if (charname_len
== 1)
852 return SCM_MAKE_CHAR (scm_i_string_ref (charname
, 0));
854 cp
= scm_i_string_ref (charname
, 0);
855 if (cp
== SCM_CODEPOINT_DOTTED_CIRCLE
&& charname_len
== 2)
856 return SCM_MAKE_CHAR (scm_i_string_ref (charname
, 1));
858 if (cp
>= '0' && cp
< '8')
860 /* Dirk:FIXME:: This type of character syntax is not R5RS
861 * compliant. Further, it should be verified that the constant
862 * does only consist of octal digits. */
863 SCM p
= scm_string_to_number (charname
, scm_from_uint (8));
866 scm_t_wchar c
= SCM_I_INUM (p
);
867 if (SCM_IS_UNICODE_CHAR (c
))
868 return SCM_MAKE_CHAR (c
);
870 scm_i_input_error (FUNC_NAME
, port
,
871 "out-of-range octal character escape: ~a",
872 scm_list_1 (charname
));
876 /* The names of characters should never have non-Latin1
878 if (scm_i_is_narrow_string (charname
)
879 || scm_i_try_narrow_string (charname
))
880 { SCM ch
= scm_i_charname_to_char (scm_i_string_chars (charname
),
882 if (scm_is_true (ch
))
887 scm_i_input_error (FUNC_NAME
, port
, "unknown character name ~a",
888 scm_list_1 (charname
));
890 return SCM_UNSPECIFIED
;
895 scm_read_keyword (int chr
, SCM port
)
899 /* Read the symbol that comprises the keyword. Doing this instead of
900 invoking a specific symbol reader function allows `scm_read_keyword ()'
901 to adapt to the delimiters currently valid of symbols.
903 XXX: This implementation allows sloppy syntaxes like `#: key'. */
904 symbol
= scm_read_expression (port
);
905 if (!scm_is_symbol (symbol
))
906 scm_i_input_error ("scm_read_keyword", port
,
907 "keyword prefix `~a' not followed by a symbol: ~s",
908 scm_list_2 (SCM_MAKE_CHAR (chr
), symbol
));
910 return (scm_symbol_to_keyword (symbol
));
914 scm_read_vector (int chr
, SCM port
)
916 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
917 guarantee that it's going to do what we want. After all, this is an
918 implementation detail of `scm_read_vector ()', not a desirable
920 return (scm_vector (scm_read_sexp (chr
, port
)));
924 scm_read_srfi4_vector (int chr
, SCM port
)
926 return scm_i_read_array (port
, chr
);
930 scm_read_bytevector (scm_t_wchar chr
, SCM port
)
932 chr
= scm_getc (port
);
936 chr
= scm_getc (port
);
940 chr
= scm_getc (port
);
944 return scm_u8_list_to_bytevector (scm_read_sexp (chr
, port
));
947 scm_i_input_error ("read_bytevector", port
,
948 "invalid bytevector prefix",
949 SCM_MAKE_CHAR (chr
));
950 return SCM_UNSPECIFIED
;
954 scm_read_guile_bit_vector (scm_t_wchar chr
, SCM port
)
956 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
957 terribly inefficient but who cares? */
958 SCM s_bits
= SCM_EOL
;
960 for (chr
= scm_getc (port
);
961 (chr
!= EOF
) && ((chr
== '0') || (chr
== '1'));
962 chr
= scm_getc (port
))
964 s_bits
= scm_cons ((chr
== '0') ? SCM_BOOL_F
: SCM_BOOL_T
, s_bits
);
968 scm_ungetc (chr
, port
);
970 return scm_bitvector (scm_reverse_x (s_bits
, SCM_EOL
));
974 scm_read_scsh_block_comment (scm_t_wchar chr
, SCM port
)
978 /* We can use the get_byte here because there is no need to get the
979 locale correct when reading comments. This presumes that
980 hash and exclamation points always represent themselves no
981 matter what the source encoding is.*/
984 int c
= scm_get_byte_or_eof (port
);
987 scm_i_input_error ("skip_block_comment", port
,
988 "unterminated `#! ... !#' comment", SCM_EOL
);
992 else if (c
== '#' && bang_seen
)
998 return SCM_UNSPECIFIED
;
1002 scm_read_r6rs_block_comment (scm_t_wchar chr
, SCM port
)
1004 /* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
1005 nested. So care must be taken. */
1006 int nesting_level
= 1;
1007 int opening_seen
= 0, closing_seen
= 0;
1009 while (nesting_level
> 0)
1011 int c
= scm_getc (port
);
1014 scm_i_input_error (__FUNCTION__
, port
,
1015 "unterminated `#| ... |#' comment", SCM_EOL
);
1023 else if (closing_seen
)
1034 opening_seen
= closing_seen
= 0;
1037 return SCM_UNSPECIFIED
;
1041 scm_read_commented_expression (scm_t_wchar chr
, SCM port
)
1045 c
= flush_ws (port
, (char *) NULL
);
1047 scm_i_input_error ("read_commented_expression", port
,
1048 "no expression after #; comment", SCM_EOL
);
1049 scm_ungetc (c
, port
);
1050 scm_read_expression (port
);
1051 return SCM_UNSPECIFIED
;
1055 scm_read_extended_symbol (scm_t_wchar chr
, SCM port
)
1057 /* Guile's extended symbol read syntax looks like this:
1059 #{This is all a symbol name}#
1061 So here, CHR is expected to be `{'. */
1062 int saw_brace
= 0, finished
= 0;
1064 SCM buf
= scm_i_make_string (1024, NULL
);
1066 buf
= scm_i_string_start_writing (buf
);
1068 while ((chr
= scm_getc (port
)) != EOF
)
1080 scm_i_string_set_x (buf
, len
++, '}');
1081 scm_i_string_set_x (buf
, len
++, chr
);
1084 else if (chr
== '}')
1087 scm_i_string_set_x (buf
, len
++, chr
);
1089 if (len
>= scm_i_string_length (buf
) - 2)
1091 scm_i_string_stop_writing ();
1092 SCM addy
= scm_i_make_string (1024, NULL
);
1093 buf
= scm_string_append (scm_list_2 (buf
, addy
));
1095 buf
= scm_i_string_start_writing (buf
);
1101 scm_i_string_stop_writing ();
1103 return (scm_string_to_symbol (scm_c_substring (buf
, 0, len
)));
1108 /* Top-level token readers, i.e., dispatchers. */
1111 scm_read_sharp_extension (int chr
, SCM port
)
1115 proc
= scm_get_hash_procedure (chr
);
1116 if (scm_is_true (scm_procedure_p (proc
)))
1118 long line
= SCM_LINUM (port
);
1119 int column
= SCM_COL (port
) - 2;
1122 got
= scm_call_2 (proc
, SCM_MAKE_CHAR (chr
), port
);
1123 if (!scm_is_eq (got
, SCM_UNSPECIFIED
))
1125 if (SCM_RECORD_POSITIONS_P
)
1126 return (recsexpr (got
, line
, column
,
1127 SCM_FILENAME (port
)));
1133 return SCM_UNSPECIFIED
;
1136 /* The reader for the sharp `#' character. It basically dispatches reads
1137 among the above token readers. */
1139 scm_read_sharp (scm_t_wchar chr
, SCM port
)
1140 #define FUNC_NAME "scm_lreadr"
1144 chr
= scm_getc (port
);
1146 result
= scm_read_sharp_extension (chr
, port
);
1147 if (!scm_is_eq (result
, SCM_UNSPECIFIED
))
1153 return (scm_read_character (chr
, port
));
1155 return (scm_read_vector (chr
, port
));
1159 /* This one may return either a boolean or an SRFI-4 vector. */
1160 return (scm_read_srfi4_vector (chr
, port
));
1162 return (scm_read_bytevector (chr
, port
));
1164 return (scm_read_guile_bit_vector (chr
, port
));
1168 /* This one may return either a boolean or an SRFI-4 vector. */
1169 return (scm_read_boolean (chr
, port
));
1171 return (scm_read_keyword (chr
, port
));
1172 case '0': case '1': case '2': case '3': case '4':
1173 case '5': case '6': case '7': case '8': case '9':
1175 #if SCM_ENABLE_DEPRECATED
1176 /* See below for 'i' and 'e'. */
1183 return (scm_i_read_array (port
, chr
));
1187 #if SCM_ENABLE_DEPRECATED
1189 /* When next char is '(', it really is an old-style
1191 scm_t_wchar next_c
= scm_getc (port
);
1193 scm_ungetc (next_c
, port
);
1195 return scm_i_read_array (port
, chr
);
1209 return (scm_read_number_and_radix (chr
, port
));
1211 return (scm_read_extended_symbol (chr
, port
));
1213 return (scm_read_scsh_block_comment (chr
, port
));
1215 return (scm_read_commented_expression (chr
, port
));
1219 return (scm_read_syntax (chr
, port
));
1221 result
= scm_read_sharp_extension (chr
, port
);
1222 if (scm_is_eq (result
, SCM_UNSPECIFIED
))
1224 /* To remain compatible with 1.8 and earlier, the following
1225 characters have lower precedence than `read-hash-extend'
1230 return scm_read_r6rs_block_comment (chr
, port
);
1232 scm_i_input_error (FUNC_NAME
, port
, "Unknown # object: ~S",
1233 scm_list_1 (SCM_MAKE_CHAR (chr
)));
1240 return SCM_UNSPECIFIED
;
1245 scm_read_expression (SCM port
)
1246 #define FUNC_NAME "scm_read_expression"
1250 register scm_t_wchar chr
;
1252 chr
= scm_getc (port
);
1256 case SCM_WHITE_SPACES
:
1257 case SCM_LINE_INCREMENTORS
:
1260 (void) scm_read_semicolon_comment (chr
, port
);
1263 return (scm_read_sexp (chr
, port
));
1265 return (scm_read_string (chr
, port
));
1269 return (scm_read_quote (chr
, port
));
1273 result
= scm_read_sharp (chr
, port
);
1274 if (scm_is_eq (result
, SCM_UNSPECIFIED
))
1275 /* We read a comment or some such. */
1281 scm_i_input_error (FUNC_NAME
, port
, "unexpected \")\"", SCM_EOL
);
1286 if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE
), scm_keyword_prefix
))
1287 return scm_symbol_to_keyword (scm_read_expression (port
));
1292 if (((chr
>= '0') && (chr
<= '9'))
1293 || (strchr ("+-.", chr
)))
1294 return (scm_read_number (chr
, port
));
1296 return (scm_read_mixed_case_symbol (chr
, port
));
1304 /* Actual reader. */
1306 SCM_DEFINE (scm_read
, "read", 0, 1, 0,
1308 "Read an s-expression from the input port @var{port}, or from\n"
1309 "the current input port if @var{port} is not specified.\n"
1310 "Any whitespace before the next token is discarded.")
1311 #define FUNC_NAME s_scm_read
1315 if (SCM_UNBNDP (port
))
1316 port
= scm_current_input_port ();
1317 SCM_VALIDATE_OPINPORT (1, port
);
1319 c
= flush_ws (port
, (char *) NULL
);
1322 scm_ungetc (c
, port
);
1324 return (scm_read_expression (port
));
1331 /* Used when recording expressions constructed by `scm_read_sharp ()'. */
1333 recsexpr (SCM obj
, long line
, int column
, SCM filename
)
1335 if (!scm_is_pair(obj
)) {
1338 SCM tmp
= obj
, copy
;
1339 /* If this sexpr is visible in the read:sharp source, we want to
1340 keep that information, so only record non-constant cons cells
1341 which haven't previously been read by the reader. */
1342 if (scm_is_false (scm_whash_lookup (scm_source_whash
, obj
)))
1344 if (SCM_COPY_SOURCE_P
)
1346 copy
= scm_cons (recsexpr (SCM_CAR (obj
), line
, column
, filename
),
1348 while ((tmp
= SCM_CDR (tmp
)) && scm_is_pair (tmp
))
1350 SCM_SETCDR (copy
, scm_cons (recsexpr (SCM_CAR (tmp
),
1355 copy
= SCM_CDR (copy
);
1357 SCM_SETCDR (copy
, tmp
);
1361 recsexpr (SCM_CAR (obj
), line
, column
, filename
);
1362 while ((tmp
= SCM_CDR (tmp
)) && scm_is_pair (tmp
))
1363 recsexpr (SCM_CAR (tmp
), line
, column
, filename
);
1364 copy
= SCM_UNDEFINED
;
1366 scm_whash_insert (scm_source_whash
,
1368 scm_make_srcprops (line
,
1378 /* Manipulate the read-hash-procedures alist. This could be written in
1379 Scheme, but maybe it will also be used by C code during initialisation. */
1380 SCM_DEFINE (scm_read_hash_extend
, "read-hash-extend", 2, 0, 0,
1381 (SCM chr
, SCM proc
),
1382 "Install the procedure @var{proc} for reading expressions\n"
1383 "starting with the character sequence @code{#} and @var{chr}.\n"
1384 "@var{proc} will be called with two arguments: the character\n"
1385 "@var{chr} and the port to read further data from. The object\n"
1386 "returned will be the return value of @code{read}. \n"
1387 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1389 #define FUNC_NAME s_scm_read_hash_extend
1394 SCM_VALIDATE_CHAR (1, chr
);
1395 SCM_ASSERT (scm_is_false (proc
)
1396 || scm_is_eq (scm_procedure_p (proc
), SCM_BOOL_T
),
1397 proc
, SCM_ARG2
, FUNC_NAME
);
1399 /* Check if chr is already in the alist. */
1400 this = *scm_read_hash_procedures
;
1404 if (scm_is_null (this))
1406 /* not found, so add it to the beginning. */
1407 if (scm_is_true (proc
))
1409 *scm_read_hash_procedures
=
1410 scm_cons (scm_cons (chr
, proc
), *scm_read_hash_procedures
);
1414 if (scm_is_eq (chr
, SCM_CAAR (this)))
1416 /* already in the alist. */
1417 if (scm_is_false (proc
))
1420 if (scm_is_false (prev
))
1422 *scm_read_hash_procedures
=
1423 SCM_CDR (*scm_read_hash_procedures
);
1426 scm_set_cdr_x (prev
, SCM_CDR (this));
1431 scm_set_cdr_x (SCM_CAR (this), proc
);
1436 this = SCM_CDR (this);
1439 return SCM_UNSPECIFIED
;
1443 /* Recover the read-hash procedure corresponding to char c. */
1445 scm_get_hash_procedure (int c
)
1447 SCM rest
= *scm_read_hash_procedures
;
1451 if (scm_is_null (rest
))
1454 if (SCM_CHAR (SCM_CAAR (rest
)) == c
)
1455 return SCM_CDAR (rest
);
1457 rest
= SCM_CDR (rest
);
1461 #define SCM_ENCODING_SEARCH_SIZE (500)
1463 /* Search the first few hundred characters of a file for an Emacs-like coding
1464 declaration. Returns either NULL or a string whose storage has been
1465 allocated with `scm_gc_malloc ()'. */
1467 scm_i_scan_for_encoding (SCM port
)
1469 char header
[SCM_ENCODING_SEARCH_SIZE
+1];
1471 char *encoding
= NULL
;
1477 bytes_read
= scm_c_read (port
, header
, SCM_ENCODING_SEARCH_SIZE
);
1478 scm_seek (port
, scm_from_int (0), scm_from_int (SEEK_SET
));
1481 && header
[0] == '\xef' && header
[1] == '\xbb' && header
[2] == '\xbf')
1484 /* search past "coding[:=]" */
1488 if ((pos
= strstr(pos
, "coding")) == NULL
)
1491 pos
+= strlen("coding");
1492 if (pos
- header
>= SCM_ENCODING_SEARCH_SIZE
||
1493 (*pos
== ':' || *pos
== '='))
1501 while (pos
- header
<= SCM_ENCODING_SEARCH_SIZE
&&
1502 (*pos
== ' ' || *pos
== '\t'))
1505 /* grab the next token */
1507 while (pos
+ i
- header
<= SCM_ENCODING_SEARCH_SIZE
1508 && pos
+ i
- header
< bytes_read
1509 && (isalnum((int) pos
[i
]) || pos
[i
] == '_' || pos
[i
] == '-'
1516 encoding
= scm_gc_strndup (pos
, i
+ 1, "encoding");
1517 for (i
= 0; i
< strlen (encoding
); i
++)
1518 encoding
[i
] = toupper ((int) encoding
[i
]);
1520 /* push backwards to make sure we were in a comment */
1522 while (pos
- i
- header
> 0)
1524 if (*(pos
- i
) == '\n')
1526 /* This wasn't in a semicolon comment. Check for a
1527 hash-bang comment. */
1528 char *beg
= strstr (header
, "#!");
1529 char *end
= strstr (header
, "!#");
1530 if (beg
< pos
&& pos
< end
)
1534 if (*(pos
- i
) == ';')
1542 /* This wasn't in a comment */
1545 if (utf8_bom
&& strcmp(encoding
, "UTF-8"))
1546 scm_misc_error (NULL
,
1547 "the port input declares the encoding ~s but is encoded as UTF-8",
1548 scm_list_1 (scm_from_locale_string (encoding
)));
1553 SCM_DEFINE (scm_file_encoding
, "file-encoding", 1, 0, 0,
1555 "Scans the port for an EMACS-like character coding declaration\n"
1556 "near the top of the contents of a port with random-acessible contents.\n"
1557 "The coding declaration is of the form\n"
1558 "@code{coding: XXXXX} and must appear in a scheme comment.\n"
1560 "Returns a string containing the character encoding of the file\n"
1561 "if a declaration was found, or @code{#f} otherwise.\n")
1562 #define FUNC_NAME s_scm_file_encoding
1567 enc
= scm_i_scan_for_encoding (port
);
1572 s_enc
= scm_from_locale_string (enc
);
1583 scm_read_hash_procedures
=
1584 SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL
));
1586 scm_init_opts (scm_read_options
, scm_read_opts
);
1587 #include "libguile/read.x"