1 /* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008, 2009 Free Software
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
33 #include "libguile/_scm.h"
34 #include "libguile/bytevectors.h"
35 #include "libguile/chars.h"
36 #include "libguile/eval.h"
37 #include "libguile/arrays.h"
38 #include "libguile/bitvectors.h"
39 #include "libguile/keywords.h"
40 #include "libguile/alist.h"
41 #include "libguile/srcprop.h"
42 #include "libguile/hashtab.h"
43 #include "libguile/hash.h"
44 #include "libguile/ports.h"
45 #include "libguile/root.h"
46 #include "libguile/strings.h"
47 #include "libguile/strports.h"
48 #include "libguile/vectors.h"
49 #include "libguile/validate.h"
50 #include "libguile/srfi-4.h"
51 #include "libguile/srfi-13.h"
53 #include "libguile/read.h"
54 #include "libguile/private-options.h"
59 SCM_GLOBAL_SYMBOL (scm_sym_dot
, ".");
60 SCM_SYMBOL (scm_keyword_prefix
, "prefix");
61 SCM_SYMBOL (scm_keyword_postfix
, "postfix");
63 scm_t_option scm_read_opts
[] = {
64 { SCM_OPTION_BOOLEAN
, "copy", 0,
65 "Copy source code expressions." },
66 { SCM_OPTION_BOOLEAN
, "positions", 0,
67 "Record positions of source code expressions." },
68 { SCM_OPTION_BOOLEAN
, "case-insensitive", 0,
69 "Convert symbols to lower case."},
70 { SCM_OPTION_SCM
, "keywords", SCM_UNPACK (SCM_BOOL_F
),
71 "Style of keyword recognition: #f, 'prefix or 'postfix."},
73 { SCM_OPTION_BOOLEAN
, "elisp-vectors", 0,
74 "Support Elisp vector syntax, namely `[...]'."},
75 { SCM_OPTION_BOOLEAN
, "elisp-strings", 0,
76 "Support `\\(' and `\\)' in strings."},
82 Give meaningful error messages for errors
86 FILE:LINE:COL: MESSAGE
89 This is not standard GNU format, but the test-suite likes the real
90 message to be in front.
96 scm_i_input_error (char const *function
,
97 SCM port
, const char *message
, SCM arg
)
99 SCM fn
= (scm_is_string (SCM_FILENAME(port
))
101 : scm_from_locale_string ("#<unknown port>"));
103 SCM string_port
= scm_open_output_string ();
104 SCM string
= SCM_EOL
;
105 scm_simple_format (string_port
,
106 scm_from_locale_string ("~A:~S:~S: ~A"),
108 scm_from_long (SCM_LINUM (port
) + 1),
109 scm_from_int (SCM_COL (port
) + 1),
110 scm_from_locale_string (message
)));
112 string
= scm_get_output_string (string_port
);
113 scm_close_output_port (string_port
);
114 scm_error_scm (scm_from_locale_symbol ("read-error"),
115 function
? scm_from_locale_string (function
) : SCM_BOOL_F
,
122 SCM_DEFINE (scm_read_options
, "read-options-interface", 0, 1, 0,
124 "Option interface for the read options. Instead of using\n"
125 "this procedure directly, use the procedures @code{read-enable},\n"
126 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
127 #define FUNC_NAME s_scm_read_options
129 SCM ans
= scm_options (setting
,
132 if (SCM_COPY_SOURCE_P
)
133 SCM_RECORD_POSITIONS_P
= 1;
138 /* An association list mapping extra hash characters to procedures. */
139 static SCM
*scm_read_hash_procedures
;
146 /* Size of the C buffer used to read symbols and numbers. */
147 #define READER_BUFFER_SIZE 128
149 /* Size of the C buffer used to read strings. */
150 #define READER_STRING_BUFFER_SIZE 512
152 /* The maximum size of Scheme character names. */
153 #define READER_CHAR_NAME_MAX_SIZE 50
156 /* `isblank' is only in C99. */
157 #define CHAR_IS_BLANK_(_chr) \
158 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
159 || ((_chr) == '\f') || ((_chr) == '\r'))
162 # define CHAR_IS_BLANK(_chr) \
163 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
165 # define CHAR_IS_BLANK CHAR_IS_BLANK_
169 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
171 #define CHAR_IS_R5RS_DELIMITER(c) \
173 || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
175 #define CHAR_IS_DELIMITER CHAR_IS_R5RS_DELIMITER
177 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
179 #define CHAR_IS_EXPONENT_MARKER(_chr) \
180 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
181 || ((_chr) == 'd') || ((_chr) == 'l'))
183 /* Read an SCSH block comment. */
184 static inline SCM
scm_read_scsh_block_comment (int chr
, SCM port
);
185 static SCM
scm_read_commented_expression (int chr
, SCM port
);
187 /* Read from PORT until a delimiter (e.g., a whitespace) is read. Return
188 zero if the whole token fits in BUF, non-zero otherwise. */
190 read_token (SCM port
, SCM buf
, size_t *read
)
195 buf
= scm_i_string_start_writing (buf
);
196 while (*read
< scm_i_string_length (buf
))
198 chr
= scm_getc (port
);
202 scm_i_string_stop_writing ();
206 chr
= (SCM_CASE_INSENSITIVE_P
? uc_tolower (chr
) : chr
);
208 if (CHAR_IS_DELIMITER (chr
))
210 scm_i_string_stop_writing ();
211 scm_ungetc (chr
, port
);
215 scm_i_string_set_x (buf
, *read
, chr
);
218 scm_i_string_stop_writing ();
224 read_complete_token (SCM port
, size_t *read
)
226 SCM buffer
, str
= SCM_EOL
;
230 buffer
= scm_i_make_string (READER_BUFFER_SIZE
, NULL
);
231 overflow
= read_token (port
, buffer
, read
);
233 return scm_i_substring (buffer
, 0, *read
);
235 str
= scm_string_copy (buffer
);
238 overflow
= read_token (port
, buffer
, &len
);
239 str
= scm_string_append (scm_list_2 (str
, buffer
));
244 return scm_i_substring (str
, 0, *read
);
247 /* Skip whitespace from PORT and return the first non-whitespace character
248 read. Raise an error on end-of-file. */
250 flush_ws (SCM port
, const char *eoferr
)
252 register scm_t_wchar c
;
254 switch (c
= scm_getc (port
))
260 scm_i_input_error (eoferr
,
269 switch (c
= scm_getc (port
))
275 case SCM_LINE_INCREMENTORS
:
281 switch (c
= scm_getc (port
))
284 eoferr
= "read_sharp";
287 scm_read_scsh_block_comment (c
, port
);
290 scm_read_commented_expression (c
, port
);
293 scm_ungetc (c
, port
);
298 case SCM_LINE_INCREMENTORS
:
299 case SCM_SINGLE_SPACES
:
314 static SCM
scm_read_expression (SCM port
);
315 static SCM
scm_read_sharp (int chr
, SCM port
);
316 static SCM
scm_get_hash_procedure (int c
);
317 static SCM
recsexpr (SCM obj
, long line
, int column
, SCM filename
);
321 scm_read_sexp (scm_t_wchar chr
, SCM port
)
322 #define FUNC_NAME "scm_i_lreadparen"
326 register SCM tl
, ans
= SCM_EOL
;
327 SCM tl2
= SCM_EOL
, ans2
= SCM_EOL
, copy
= SCM_BOOL_F
;
328 static const int terminating_char
= ')';
330 /* Need to capture line and column numbers here. */
331 long line
= SCM_LINUM (port
);
332 int column
= SCM_COL (port
) - 1;
335 c
= flush_ws (port
, FUNC_NAME
);
336 if (terminating_char
== c
)
339 scm_ungetc (c
, port
);
340 if (scm_is_eq (scm_sym_dot
,
341 (tmp
= scm_read_expression (port
))))
343 ans
= scm_read_expression (port
);
344 if (terminating_char
!= (c
= flush_ws (port
, FUNC_NAME
)))
345 scm_i_input_error (FUNC_NAME
, port
, "missing close paren",
350 /* Build the head of the list structure. */
351 ans
= tl
= scm_cons (tmp
, SCM_EOL
);
353 if (SCM_COPY_SOURCE_P
)
354 ans2
= tl2
= scm_cons (scm_is_pair (tmp
)
359 while (terminating_char
!= (c
= flush_ws (port
, FUNC_NAME
)))
363 scm_ungetc (c
, port
);
364 if (scm_is_eq (scm_sym_dot
,
365 (tmp
= scm_read_expression (port
))))
367 SCM_SETCDR (tl
, tmp
= scm_read_expression (port
));
369 if (SCM_COPY_SOURCE_P
)
370 SCM_SETCDR (tl2
, scm_cons (scm_is_pair (tmp
) ? copy
: tmp
,
373 c
= flush_ws (port
, FUNC_NAME
);
374 if (terminating_char
!= c
)
375 scm_i_input_error (FUNC_NAME
, port
,
376 "in pair: missing close paren", SCM_EOL
);
380 new_tail
= scm_cons (tmp
, SCM_EOL
);
381 SCM_SETCDR (tl
, new_tail
);
384 if (SCM_COPY_SOURCE_P
)
386 SCM new_tail2
= scm_cons (scm_is_pair (tmp
)
389 SCM_SETCDR (tl2
, new_tail2
);
395 if (SCM_RECORD_POSITIONS_P
)
396 scm_whash_insert (scm_source_whash
,
398 scm_make_srcprops (line
, column
,
409 scm_read_string (int chr
, SCM port
)
410 #define FUNC_NAME "scm_lreadr"
412 /* For strings smaller than C_STR, this function creates only one Scheme
413 object (the string returned). */
415 SCM str
= SCM_BOOL_F
;
416 unsigned c_str_len
= 0;
419 str
= scm_i_make_string (READER_STRING_BUFFER_SIZE
, NULL
);
420 while ('"' != (c
= scm_getc (port
)))
425 scm_i_input_error (FUNC_NAME
, port
,
426 "end of file in string constant", SCM_EOL
);
429 if (c_str_len
+ 1 >= scm_i_string_length (str
))
431 SCM addy
= scm_i_make_string (READER_STRING_BUFFER_SIZE
, NULL
);
433 str
= scm_string_append (scm_list_2 (str
, addy
));
438 switch (c
= scm_getc (port
))
448 if (SCM_ESCAPED_PARENS_P
)
484 if ('0' <= a
&& a
<= '9')
486 else if ('A' <= a
&& a
<= 'F')
488 else if ('a' <= a
&& a
<= 'f')
495 if ('0' <= b
&& b
<= '9')
497 else if ('A' <= b
&& b
<= 'F')
499 else if ('a' <= b
&& b
<= 'f')
514 for (i
= 0; i
< 4; i
++)
519 if ('0' <= a
&& a
<= '9')
521 else if ('A' <= a
&& a
<= 'F')
523 else if ('a' <= a
&& a
<= 'f')
539 for (i
= 0; i
< 6; i
++)
544 if ('0' <= a
&& a
<= '9')
546 else if ('A' <= a
&& a
<= 'F')
548 else if ('a' <= a
&& a
<= 'f')
561 scm_i_input_error (FUNC_NAME
, port
,
562 "illegal character in escape sequence: ~S",
563 scm_list_1 (SCM_MAKE_CHAR (c
)));
566 str
= scm_i_string_start_writing (str
);
567 scm_i_string_set_x (str
, c_str_len
++, c
);
568 scm_i_string_stop_writing ();
573 return scm_i_substring_copy (str
, 0, c_str_len
);
582 scm_read_number (scm_t_wchar chr
, SCM port
)
588 scm_ungetc (chr
, port
);
589 buffer
= read_complete_token (port
, &read
);
590 result
= scm_string_to_number (buffer
, SCM_UNDEFINED
);
591 if (!scm_is_true (result
))
592 /* Return a symbol instead of a number. */
593 result
= scm_string_to_symbol (buffer
);
599 scm_read_mixed_case_symbol (scm_t_wchar chr
, SCM port
)
602 int ends_with_colon
= 0;
605 int postfix
= scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE
), scm_keyword_postfix
);
607 scm_ungetc (chr
, port
);
608 buffer
= read_complete_token (port
, &read
);
610 ends_with_colon
= scm_i_string_ref (buffer
, read
- 1) == ':';
612 if (postfix
&& ends_with_colon
&& (read
> 1))
613 result
= scm_symbol_to_keyword (scm_string_to_symbol (scm_i_substring (buffer
, 0, read
- 1)));
615 result
= scm_string_to_symbol (buffer
);
621 scm_read_number_and_radix (scm_t_wchar chr
, SCM port
)
622 #define FUNC_NAME "scm_lreadr"
626 SCM buffer
= scm_i_make_string (READER_BUFFER_SIZE
, NULL
);
652 scm_ungetc (chr
, port
);
653 scm_ungetc ('#', port
);
657 buffer
= read_complete_token (port
, &read
);
658 result
= scm_string_to_number (buffer
, scm_from_uint (radix
));
660 if (scm_is_true (result
))
663 scm_i_input_error (FUNC_NAME
, port
, "unknown # object", SCM_EOL
);
670 scm_read_quote (int chr
, SCM port
)
673 long line
= SCM_LINUM (port
);
674 int column
= SCM_COL (port
) - 1;
679 p
= scm_sym_quasiquote
;
692 p
= scm_sym_uq_splicing
;
695 scm_ungetc (c
, port
);
702 fprintf (stderr
, "%s: unhandled quote character (%i)\n",
703 "scm_read_quote", chr
);
707 p
= scm_cons2 (p
, scm_read_expression (port
), SCM_EOL
);
708 if (SCM_RECORD_POSITIONS_P
)
709 scm_whash_insert (scm_source_whash
, p
,
710 scm_make_srcprops (line
, column
,
713 ? (scm_cons2 (SCM_CAR (p
),
714 SCM_CAR (SCM_CDR (p
)),
723 SCM_SYMBOL (sym_syntax
, "syntax");
724 SCM_SYMBOL (sym_quasisyntax
, "quasisyntax");
725 SCM_SYMBOL (sym_unsyntax
, "unsyntax");
726 SCM_SYMBOL (sym_unsyntax_splicing
, "unsyntax-splicing");
729 scm_read_syntax (int chr
, SCM port
)
732 long line
= SCM_LINUM (port
);
733 int column
= SCM_COL (port
) - 1;
751 p
= sym_unsyntax_splicing
;
754 scm_ungetc (c
, port
);
761 fprintf (stderr
, "%s: unhandled syntax character (%i)\n",
762 "scm_read_syntax", chr
);
766 p
= scm_cons2 (p
, scm_read_expression (port
), SCM_EOL
);
767 if (SCM_RECORD_POSITIONS_P
)
768 scm_whash_insert (scm_source_whash
, p
,
769 scm_make_srcprops (line
, column
,
772 ? (scm_cons2 (SCM_CAR (p
),
773 SCM_CAR (SCM_CDR (p
)),
783 scm_read_semicolon_comment (int chr
, SCM port
)
787 /* We use the get_byte here because there is no need to get the
788 locale correct with comment input. This presumes that newline
789 always represents itself no matter what the encoding is. */
790 for (c
= scm_get_byte_or_eof (port
);
791 (c
!= EOF
) && (c
!= '\n');
792 c
= scm_getc (port
));
794 return SCM_UNSPECIFIED
;
798 /* Sharp readers, i.e. readers called after a `#' sign has been read. */
801 scm_read_boolean (int chr
, SCM port
)
814 return SCM_UNSPECIFIED
;
818 scm_read_character (scm_t_wchar chr
, SCM port
)
819 #define FUNC_NAME "scm_lreadr"
821 SCM charname
= scm_i_make_string (READER_CHAR_NAME_MAX_SIZE
, NULL
);
826 overflow
= read_token (port
, charname
, &charname_len
);
827 charname
= scm_c_substring (charname
, 0, charname_len
);
832 if (charname_len
== 0)
834 chr
= scm_getc (port
);
836 scm_i_input_error (FUNC_NAME
, port
, "unexpected end of file "
837 "while reading character", SCM_EOL
);
839 /* CHR must be a token delimiter, like a whitespace. */
840 return (SCM_MAKE_CHAR (chr
));
843 if (charname_len
== 1)
844 return SCM_MAKE_CHAR (scm_i_string_ref (charname
, 0));
846 cp
= scm_i_string_ref (charname
, 0);
847 if (cp
== SCM_CODEPOINT_DOTTED_CIRCLE
&& charname_len
== 2)
848 return SCM_MAKE_CHAR (scm_i_string_ref (charname
, 1));
850 if (cp
>= '0' && cp
< '8')
852 /* Dirk:FIXME:: This type of character syntax is not R5RS
853 * compliant. Further, it should be verified that the constant
854 * does only consist of octal digits. */
855 SCM p
= scm_string_to_number (charname
, scm_from_uint (8));
858 scm_t_wchar c
= SCM_I_INUM (p
);
859 if (SCM_IS_UNICODE_CHAR (c
))
860 return SCM_MAKE_CHAR (c
);
862 scm_i_input_error (FUNC_NAME
, port
,
863 "out-of-range octal character escape: ~a",
864 scm_list_1 (charname
));
868 /* The names of characters should never have non-Latin1
870 if (scm_i_is_narrow_string (charname
)
871 || scm_i_try_narrow_string (charname
))
872 { SCM ch
= scm_i_charname_to_char (scm_i_string_chars (charname
),
874 if (scm_is_true (ch
))
879 scm_i_input_error (FUNC_NAME
, port
, "unknown character name ~a",
880 scm_list_1 (charname
));
882 return SCM_UNSPECIFIED
;
887 scm_read_keyword (int chr
, SCM port
)
891 /* Read the symbol that comprises the keyword. Doing this instead of
892 invoking a specific symbol reader function allows `scm_read_keyword ()'
893 to adapt to the delimiters currently valid of symbols.
895 XXX: This implementation allows sloppy syntaxes like `#: key'. */
896 symbol
= scm_read_expression (port
);
897 if (!scm_is_symbol (symbol
))
898 scm_i_input_error ("scm_read_keyword", port
,
899 "keyword prefix `~a' not followed by a symbol: ~s",
900 scm_list_2 (SCM_MAKE_CHAR (chr
), symbol
));
902 return (scm_symbol_to_keyword (symbol
));
906 scm_read_vector (int chr
, SCM port
)
908 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
909 guarantee that it's going to do what we want. After all, this is an
910 implementation detail of `scm_read_vector ()', not a desirable
912 return (scm_vector (scm_read_sexp (chr
, port
)));
916 scm_read_srfi4_vector (int chr
, SCM port
)
918 return scm_i_read_array (port
, chr
);
922 scm_read_bytevector (scm_t_wchar chr
, SCM port
)
924 chr
= scm_getc (port
);
928 chr
= scm_getc (port
);
932 chr
= scm_getc (port
);
936 return scm_u8_list_to_bytevector (scm_read_sexp (chr
, port
));
939 scm_i_input_error ("read_bytevector", port
,
940 "invalid bytevector prefix",
941 SCM_MAKE_CHAR (chr
));
942 return SCM_UNSPECIFIED
;
946 scm_read_guile_bit_vector (scm_t_wchar chr
, SCM port
)
948 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
949 terribly inefficient but who cares? */
950 SCM s_bits
= SCM_EOL
;
952 for (chr
= scm_getc (port
);
953 (chr
!= EOF
) && ((chr
== '0') || (chr
== '1'));
954 chr
= scm_getc (port
))
956 s_bits
= scm_cons ((chr
== '0') ? SCM_BOOL_F
: SCM_BOOL_T
, s_bits
);
960 scm_ungetc (chr
, port
);
962 return scm_bitvector (scm_reverse_x (s_bits
, SCM_EOL
));
966 scm_read_scsh_block_comment (scm_t_wchar chr
, SCM port
)
970 /* We can use the get_byte here because there is no need to get the
971 locale correct when reading comments. This presumes that
972 hash and exclamation points always represent themselves no
973 matter what the source encoding is.*/
976 int c
= scm_get_byte_or_eof (port
);
979 scm_i_input_error ("skip_block_comment", port
,
980 "unterminated `#! ... !#' comment", SCM_EOL
);
984 else if (c
== '#' && bang_seen
)
990 return SCM_UNSPECIFIED
;
994 scm_read_commented_expression (scm_t_wchar chr
, SCM port
)
998 c
= flush_ws (port
, (char *) NULL
);
1000 scm_i_input_error ("read_commented_expression", port
,
1001 "no expression after #; comment", SCM_EOL
);
1002 scm_ungetc (c
, port
);
1003 scm_read_expression (port
);
1004 return SCM_UNSPECIFIED
;
1008 scm_read_extended_symbol (scm_t_wchar chr
, SCM port
)
1010 /* Guile's extended symbol read syntax looks like this:
1012 #{This is all a symbol name}#
1014 So here, CHR is expected to be `{'. */
1015 int saw_brace
= 0, finished
= 0;
1017 SCM buf
= scm_i_make_string (1024, NULL
);
1019 buf
= scm_i_string_start_writing (buf
);
1021 while ((chr
= scm_getc (port
)) != EOF
)
1033 scm_i_string_set_x (buf
, len
++, '}');
1034 scm_i_string_set_x (buf
, len
++, chr
);
1037 else if (chr
== '}')
1040 scm_i_string_set_x (buf
, len
++, chr
);
1042 if (len
>= scm_i_string_length (buf
) - 2)
1044 scm_i_string_stop_writing ();
1045 SCM addy
= scm_i_make_string (1024, NULL
);
1046 buf
= scm_string_append (scm_list_2 (buf
, addy
));
1048 buf
= scm_i_string_start_writing (buf
);
1054 scm_i_string_stop_writing ();
1056 return (scm_string_to_symbol (scm_c_substring (buf
, 0, len
)));
1061 /* Top-level token readers, i.e., dispatchers. */
1064 scm_read_sharp_extension (int chr
, SCM port
)
1068 proc
= scm_get_hash_procedure (chr
);
1069 if (scm_is_true (scm_procedure_p (proc
)))
1071 long line
= SCM_LINUM (port
);
1072 int column
= SCM_COL (port
) - 2;
1075 got
= scm_call_2 (proc
, SCM_MAKE_CHAR (chr
), port
);
1076 if (!scm_is_eq (got
, SCM_UNSPECIFIED
))
1078 if (SCM_RECORD_POSITIONS_P
)
1079 return (recsexpr (got
, line
, column
,
1080 SCM_FILENAME (port
)));
1086 return SCM_UNSPECIFIED
;
1089 /* The reader for the sharp `#' character. It basically dispatches reads
1090 among the above token readers. */
1092 scm_read_sharp (scm_t_wchar chr
, SCM port
)
1093 #define FUNC_NAME "scm_lreadr"
1097 chr
= scm_getc (port
);
1099 result
= scm_read_sharp_extension (chr
, port
);
1100 if (!scm_is_eq (result
, SCM_UNSPECIFIED
))
1106 return (scm_read_character (chr
, port
));
1108 return (scm_read_vector (chr
, port
));
1112 /* This one may return either a boolean or an SRFI-4 vector. */
1113 return (scm_read_srfi4_vector (chr
, port
));
1115 return (scm_read_bytevector (chr
, port
));
1117 return (scm_read_guile_bit_vector (chr
, port
));
1121 /* This one may return either a boolean or an SRFI-4 vector. */
1122 return (scm_read_boolean (chr
, port
));
1124 return (scm_read_keyword (chr
, port
));
1125 case '0': case '1': case '2': case '3': case '4':
1126 case '5': case '6': case '7': case '8': case '9':
1128 #if SCM_ENABLE_DEPRECATED
1129 /* See below for 'i' and 'e'. */
1136 return (scm_i_read_array (port
, chr
));
1140 #if SCM_ENABLE_DEPRECATED
1142 /* When next char is '(', it really is an old-style
1144 scm_t_wchar next_c
= scm_getc (port
);
1146 scm_ungetc (next_c
, port
);
1148 return scm_i_read_array (port
, chr
);
1162 return (scm_read_number_and_radix (chr
, port
));
1164 return (scm_read_extended_symbol (chr
, port
));
1166 return (scm_read_scsh_block_comment (chr
, port
));
1168 return (scm_read_commented_expression (chr
, port
));
1172 return (scm_read_syntax (chr
, port
));
1174 result
= scm_read_sharp_extension (chr
, port
);
1175 if (scm_is_eq (result
, SCM_UNSPECIFIED
))
1176 scm_i_input_error (FUNC_NAME
, port
, "Unknown # object: ~S",
1177 scm_list_1 (SCM_MAKE_CHAR (chr
)));
1182 return SCM_UNSPECIFIED
;
1187 scm_read_expression (SCM port
)
1188 #define FUNC_NAME "scm_read_expression"
1192 register scm_t_wchar chr
;
1194 chr
= scm_getc (port
);
1198 case SCM_WHITE_SPACES
:
1199 case SCM_LINE_INCREMENTORS
:
1202 (void) scm_read_semicolon_comment (chr
, port
);
1205 return (scm_read_sexp (chr
, port
));
1207 return (scm_read_string (chr
, port
));
1211 return (scm_read_quote (chr
, port
));
1215 result
= scm_read_sharp (chr
, port
);
1216 if (scm_is_eq (result
, SCM_UNSPECIFIED
))
1217 /* We read a comment or some such. */
1223 scm_i_input_error (FUNC_NAME
, port
, "unexpected \")\"", SCM_EOL
);
1228 if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE
), scm_keyword_prefix
))
1229 return scm_symbol_to_keyword (scm_read_expression (port
));
1234 if (((chr
>= '0') && (chr
<= '9'))
1235 || (strchr ("+-.", chr
)))
1236 return (scm_read_number (chr
, port
));
1238 return (scm_read_mixed_case_symbol (chr
, port
));
1246 /* Actual reader. */
1248 SCM_DEFINE (scm_read
, "read", 0, 1, 0,
1250 "Read an s-expression from the input port @var{port}, or from\n"
1251 "the current input port if @var{port} is not specified.\n"
1252 "Any whitespace before the next token is discarded.")
1253 #define FUNC_NAME s_scm_read
1257 if (SCM_UNBNDP (port
))
1258 port
= scm_current_input_port ();
1259 SCM_VALIDATE_OPINPORT (1, port
);
1261 c
= flush_ws (port
, (char *) NULL
);
1264 scm_ungetc (c
, port
);
1266 return (scm_read_expression (port
));
1273 /* Used when recording expressions constructed by `scm_read_sharp ()'. */
1275 recsexpr (SCM obj
, long line
, int column
, SCM filename
)
1277 if (!scm_is_pair(obj
)) {
1280 SCM tmp
= obj
, copy
;
1281 /* If this sexpr is visible in the read:sharp source, we want to
1282 keep that information, so only record non-constant cons cells
1283 which haven't previously been read by the reader. */
1284 if (scm_is_false (scm_whash_lookup (scm_source_whash
, obj
)))
1286 if (SCM_COPY_SOURCE_P
)
1288 copy
= scm_cons (recsexpr (SCM_CAR (obj
), line
, column
, filename
),
1290 while ((tmp
= SCM_CDR (tmp
)) && scm_is_pair (tmp
))
1292 SCM_SETCDR (copy
, scm_cons (recsexpr (SCM_CAR (tmp
),
1297 copy
= SCM_CDR (copy
);
1299 SCM_SETCDR (copy
, tmp
);
1303 recsexpr (SCM_CAR (obj
), line
, column
, filename
);
1304 while ((tmp
= SCM_CDR (tmp
)) && scm_is_pair (tmp
))
1305 recsexpr (SCM_CAR (tmp
), line
, column
, filename
);
1306 copy
= SCM_UNDEFINED
;
1308 scm_whash_insert (scm_source_whash
,
1310 scm_make_srcprops (line
,
1320 /* Manipulate the read-hash-procedures alist. This could be written in
1321 Scheme, but maybe it will also be used by C code during initialisation. */
1322 SCM_DEFINE (scm_read_hash_extend
, "read-hash-extend", 2, 0, 0,
1323 (SCM chr
, SCM proc
),
1324 "Install the procedure @var{proc} for reading expressions\n"
1325 "starting with the character sequence @code{#} and @var{chr}.\n"
1326 "@var{proc} will be called with two arguments: the character\n"
1327 "@var{chr} and the port to read further data from. The object\n"
1328 "returned will be the return value of @code{read}. \n"
1329 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1331 #define FUNC_NAME s_scm_read_hash_extend
1336 SCM_VALIDATE_CHAR (1, chr
);
1337 SCM_ASSERT (scm_is_false (proc
)
1338 || scm_is_eq (scm_procedure_p (proc
), SCM_BOOL_T
),
1339 proc
, SCM_ARG2
, FUNC_NAME
);
1341 /* Check if chr is already in the alist. */
1342 this = *scm_read_hash_procedures
;
1346 if (scm_is_null (this))
1348 /* not found, so add it to the beginning. */
1349 if (scm_is_true (proc
))
1351 *scm_read_hash_procedures
=
1352 scm_cons (scm_cons (chr
, proc
), *scm_read_hash_procedures
);
1356 if (scm_is_eq (chr
, SCM_CAAR (this)))
1358 /* already in the alist. */
1359 if (scm_is_false (proc
))
1362 if (scm_is_false (prev
))
1364 *scm_read_hash_procedures
=
1365 SCM_CDR (*scm_read_hash_procedures
);
1368 scm_set_cdr_x (prev
, SCM_CDR (this));
1373 scm_set_cdr_x (SCM_CAR (this), proc
);
1378 this = SCM_CDR (this);
1381 return SCM_UNSPECIFIED
;
1385 /* Recover the read-hash procedure corresponding to char c. */
1387 scm_get_hash_procedure (int c
)
1389 SCM rest
= *scm_read_hash_procedures
;
1393 if (scm_is_null (rest
))
1396 if (SCM_CHAR (SCM_CAAR (rest
)) == c
)
1397 return SCM_CDAR (rest
);
1399 rest
= SCM_CDR (rest
);
1403 #define SCM_ENCODING_SEARCH_SIZE (500)
1405 /* Search the first few hundred characters of a file for
1406 an emacs-like coding declaration. */
1408 scm_scan_for_encoding (SCM port
)
1410 char header
[SCM_ENCODING_SEARCH_SIZE
+1];
1412 char *encoding
= NULL
;
1418 bytes_read
= scm_c_read (port
, header
, SCM_ENCODING_SEARCH_SIZE
);
1419 scm_seek (port
, scm_from_int (0), scm_from_int (SEEK_SET
));
1422 && header
[0] == '\xef' && header
[1] == '\xbb' && header
[2] == '\xbf')
1425 /* search past "coding[:=]" */
1429 if ((pos
= strstr(pos
, "coding")) == NULL
)
1432 pos
+= strlen("coding");
1433 if (pos
- header
>= SCM_ENCODING_SEARCH_SIZE
||
1434 (*pos
== ':' || *pos
== '='))
1442 while (pos
- header
<= SCM_ENCODING_SEARCH_SIZE
&&
1443 (*pos
== ' ' || *pos
== '\t'))
1446 /* grab the next token */
1448 while (pos
+ i
- header
<= SCM_ENCODING_SEARCH_SIZE
1449 && pos
+ i
- header
< bytes_read
1450 && (isalnum((int) pos
[i
]) || pos
[i
] == '_' || pos
[i
] == '-'
1457 encoding
= scm_malloc (i
+1);
1458 memcpy (encoding
, pos
, i
);
1460 for (i
= 0; i
< strlen (encoding
); i
++)
1461 encoding
[i
] = toupper ((int) encoding
[i
]);
1463 /* push backwards to make sure we were in a comment */
1465 while (pos
- i
- header
> 0)
1467 if (*(pos
- i
) == '\n')
1469 /* This wasn't in a semicolon comment. Check for a
1470 hash-bang comment. */
1471 char *beg
= strstr (header
, "#!");
1472 char *end
= strstr (header
, "!#");
1473 if (beg
< pos
&& pos
< end
)
1477 if (*(pos
- i
) == ';')
1486 /* This wasn't in a comment */
1490 if (utf8_bom
&& strcmp(encoding
, "UTF-8"))
1491 scm_misc_error (NULL
,
1492 "the port input declares the encoding ~s but is encoded as UTF-8",
1493 scm_list_1 (scm_from_locale_string (encoding
)));
1498 SCM_DEFINE (scm_file_encoding
, "file-encoding", 1, 0, 0,
1500 "Scans the port for an EMACS-like character coding declaration\n"
1501 "near the top of the contents of a port with random-acessible contents.\n"
1502 "The coding declaration is of the form\n"
1503 "@code{coding: XXXXX} and must appear in a scheme comment.\n"
1505 "Returns a string containing the character encoding of the file\n"
1506 "if a declaration was found, or @code{#f} otherwise.\n")
1507 #define FUNC_NAME s_scm_file_encoding
1512 enc
= scm_scan_for_encoding (port
);
1517 s_enc
= scm_from_locale_string (enc
);
1529 scm_read_hash_procedures
=
1530 SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL
));
1532 scm_init_opts (scm_read_options
, scm_read_opts
);
1533 #include "libguile/read.x"