1 /* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008, 2009 Free Software
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
33 #include "libguile/_scm.h"
34 #include "libguile/bytevectors.h"
35 #include "libguile/chars.h"
36 #include "libguile/eval.h"
37 #include "libguile/arrays.h"
38 #include "libguile/bitvectors.h"
39 #include "libguile/keywords.h"
40 #include "libguile/alist.h"
41 #include "libguile/srcprop.h"
42 #include "libguile/hashtab.h"
43 #include "libguile/hash.h"
44 #include "libguile/ports.h"
45 #include "libguile/fports.h"
46 #include "libguile/root.h"
47 #include "libguile/strings.h"
48 #include "libguile/strports.h"
49 #include "libguile/vectors.h"
50 #include "libguile/validate.h"
51 #include "libguile/srfi-4.h"
52 #include "libguile/srfi-13.h"
54 #include "libguile/read.h"
55 #include "libguile/private-options.h"
60 SCM_GLOBAL_SYMBOL (scm_sym_dot
, ".");
61 SCM_SYMBOL (scm_keyword_prefix
, "prefix");
62 SCM_SYMBOL (scm_keyword_postfix
, "postfix");
64 scm_t_option scm_read_opts
[] = {
65 { SCM_OPTION_BOOLEAN
, "copy", 0,
66 "Copy source code expressions." },
67 { SCM_OPTION_BOOLEAN
, "positions", 0,
68 "Record positions of source code expressions." },
69 { SCM_OPTION_BOOLEAN
, "case-insensitive", 0,
70 "Convert symbols to lower case."},
71 { SCM_OPTION_SCM
, "keywords", (unsigned long) SCM_BOOL_F
,
72 "Style of keyword recognition: #f, 'prefix or 'postfix."},
74 { SCM_OPTION_BOOLEAN
, "elisp-vectors", 0,
75 "Support Elisp vector syntax, namely `[...]'."},
76 { SCM_OPTION_BOOLEAN
, "elisp-strings", 0,
77 "Support `\\(' and `\\)' in strings."},
83 Give meaningful error messages for errors
87 FILE:LINE:COL: MESSAGE
90 This is not standard GNU format, but the test-suite likes the real
91 message to be in front.
97 scm_i_input_error (char const *function
,
98 SCM port
, const char *message
, SCM arg
)
100 SCM fn
= (scm_is_string (SCM_FILENAME(port
))
102 : scm_from_locale_string ("#<unknown port>"));
104 SCM string_port
= scm_open_output_string ();
105 SCM string
= SCM_EOL
;
106 scm_simple_format (string_port
,
107 scm_from_locale_string ("~A:~S:~S: ~A"),
109 scm_from_long (SCM_LINUM (port
) + 1),
110 scm_from_int (SCM_COL (port
) + 1),
111 scm_from_locale_string (message
)));
113 string
= scm_get_output_string (string_port
);
114 scm_close_output_port (string_port
);
115 scm_error_scm (scm_from_locale_symbol ("read-error"),
116 function
? scm_from_locale_string (function
) : SCM_BOOL_F
,
123 SCM_DEFINE (scm_read_options
, "read-options-interface", 0, 1, 0,
125 "Option interface for the read options. Instead of using\n"
126 "this procedure directly, use the procedures @code{read-enable},\n"
127 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
128 #define FUNC_NAME s_scm_read_options
130 SCM ans
= scm_options (setting
,
133 if (SCM_COPY_SOURCE_P
)
134 SCM_RECORD_POSITIONS_P
= 1;
139 /* An association list mapping extra hash characters to procedures. */
140 static SCM
*scm_read_hash_procedures
;
147 /* Size of the C buffer used to read symbols and numbers. */
148 #define READER_BUFFER_SIZE 128
150 /* Size of the C buffer used to read strings. */
151 #define READER_STRING_BUFFER_SIZE 512
153 /* The maximum size of Scheme character names. */
154 #define READER_CHAR_NAME_MAX_SIZE 50
157 /* `isblank' is only in C99. */
158 #define CHAR_IS_BLANK_(_chr) \
159 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
160 || ((_chr) == '\f') || ((_chr) == '\r'))
163 # define CHAR_IS_BLANK(_chr) \
164 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
166 # define CHAR_IS_BLANK CHAR_IS_BLANK_
170 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
172 #define CHAR_IS_R5RS_DELIMITER(c) \
174 || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
176 #define CHAR_IS_DELIMITER CHAR_IS_R5RS_DELIMITER
178 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
180 #define CHAR_IS_EXPONENT_MARKER(_chr) \
181 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
182 || ((_chr) == 'd') || ((_chr) == 'l'))
184 /* Read an SCSH block comment. */
185 static inline SCM
scm_read_scsh_block_comment (scm_t_wchar
, SCM
);
186 static SCM
scm_read_r6rs_block_comment (scm_t_wchar
, SCM
);
187 static SCM
scm_read_commented_expression (scm_t_wchar
, SCM
);
188 static SCM
scm_get_hash_procedure (int);
190 /* Read from PORT until a delimiter (e.g., a whitespace) is read. Return
191 zero if the whole token fits in BUF, non-zero otherwise. */
193 read_token (SCM port
, SCM buf
, size_t *read
)
198 while (*read
< scm_i_string_length (buf
))
200 chr
= scm_getc (port
);
204 scm_i_string_stop_writing ();
208 chr
= (SCM_CASE_INSENSITIVE_P
? uc_tolower (chr
) : chr
);
210 if (CHAR_IS_DELIMITER (chr
))
212 scm_i_string_stop_writing ();
213 scm_ungetc (chr
, port
);
217 scm_i_string_set_x (buf
, *read
, chr
);
225 read_complete_token (SCM port
, size_t *read
)
227 SCM buffer
, str
= SCM_EOL
;
231 buffer
= scm_i_make_string (READER_BUFFER_SIZE
, NULL
);
232 overflow
= read_token (port
, buffer
, read
);
234 return scm_i_substring (buffer
, 0, *read
);
236 str
= scm_string_copy (buffer
);
239 overflow
= read_token (port
, buffer
, &len
);
240 str
= scm_string_append (scm_list_2 (str
, buffer
));
245 return scm_i_substring (str
, 0, *read
);
248 /* Skip whitespace from PORT and return the first non-whitespace character
249 read. Raise an error on end-of-file. */
251 flush_ws (SCM port
, const char *eoferr
)
253 register scm_t_wchar c
;
255 switch (c
= scm_getc (port
))
261 scm_i_input_error (eoferr
,
270 switch (c
= scm_getc (port
))
276 case SCM_LINE_INCREMENTORS
:
282 switch (c
= scm_getc (port
))
285 eoferr
= "read_sharp";
288 scm_read_scsh_block_comment (c
, port
);
291 scm_read_commented_expression (c
, port
);
294 if (scm_is_false (scm_get_hash_procedure (c
)))
296 scm_read_r6rs_block_comment (c
, port
);
301 scm_ungetc (c
, port
);
306 case SCM_LINE_INCREMENTORS
:
307 case SCM_SINGLE_SPACES
:
322 static SCM
scm_read_expression (SCM port
);
323 static SCM
scm_read_sharp (int chr
, SCM port
);
324 static SCM
recsexpr (SCM obj
, long line
, int column
, SCM filename
);
328 scm_read_sexp (scm_t_wchar chr
, SCM port
)
329 #define FUNC_NAME "scm_i_lreadparen"
333 register SCM tl
, ans
= SCM_EOL
;
334 SCM tl2
= SCM_EOL
, ans2
= SCM_EOL
, copy
= SCM_BOOL_F
;
335 static const int terminating_char
= ')';
337 /* Need to capture line and column numbers here. */
338 long line
= SCM_LINUM (port
);
339 int column
= SCM_COL (port
) - 1;
342 c
= flush_ws (port
, FUNC_NAME
);
343 if (terminating_char
== c
)
346 scm_ungetc (c
, port
);
347 if (scm_is_eq (scm_sym_dot
,
348 (tmp
= scm_read_expression (port
))))
350 ans
= scm_read_expression (port
);
351 if (terminating_char
!= (c
= flush_ws (port
, FUNC_NAME
)))
352 scm_i_input_error (FUNC_NAME
, port
, "missing close paren",
357 /* Build the head of the list structure. */
358 ans
= tl
= scm_cons (tmp
, SCM_EOL
);
360 if (SCM_COPY_SOURCE_P
)
361 ans2
= tl2
= scm_cons (scm_is_pair (tmp
)
366 while (terminating_char
!= (c
= flush_ws (port
, FUNC_NAME
)))
370 scm_ungetc (c
, port
);
371 if (scm_is_eq (scm_sym_dot
,
372 (tmp
= scm_read_expression (port
))))
374 SCM_SETCDR (tl
, tmp
= scm_read_expression (port
));
376 if (SCM_COPY_SOURCE_P
)
377 SCM_SETCDR (tl2
, scm_cons (scm_is_pair (tmp
) ? copy
: tmp
,
380 c
= flush_ws (port
, FUNC_NAME
);
381 if (terminating_char
!= c
)
382 scm_i_input_error (FUNC_NAME
, port
,
383 "in pair: missing close paren", SCM_EOL
);
387 new_tail
= scm_cons (tmp
, SCM_EOL
);
388 SCM_SETCDR (tl
, new_tail
);
391 if (SCM_COPY_SOURCE_P
)
393 SCM new_tail2
= scm_cons (scm_is_pair (tmp
)
396 SCM_SETCDR (tl2
, new_tail2
);
402 if (SCM_RECORD_POSITIONS_P
)
403 scm_whash_insert (scm_source_whash
,
405 scm_make_srcprops (line
, column
,
416 scm_read_string (int chr
, SCM port
)
417 #define FUNC_NAME "scm_lreadr"
419 /* For strings smaller than C_STR, this function creates only one Scheme
420 object (the string returned). */
422 SCM str
= SCM_BOOL_F
;
423 unsigned c_str_len
= 0;
426 str
= scm_i_make_string (READER_STRING_BUFFER_SIZE
, NULL
);
427 while ('"' != (c
= scm_getc (port
)))
432 scm_i_input_error (FUNC_NAME
, port
,
433 "end of file in string constant", SCM_EOL
);
436 if (c_str_len
+ 1 >= scm_i_string_length (str
))
438 SCM addy
= scm_i_make_string (READER_STRING_BUFFER_SIZE
, NULL
);
440 str
= scm_string_append (scm_list_2 (str
, addy
));
445 switch (c
= scm_getc (port
))
455 if (SCM_ESCAPED_PARENS_P
)
491 if ('0' <= a
&& a
<= '9')
493 else if ('A' <= a
&& a
<= 'F')
495 else if ('a' <= a
&& a
<= 'f')
502 if ('0' <= b
&& b
<= '9')
504 else if ('A' <= b
&& b
<= 'F')
506 else if ('a' <= b
&& b
<= 'f')
521 for (i
= 0; i
< 4; i
++)
526 if ('0' <= a
&& a
<= '9')
528 else if ('A' <= a
&& a
<= 'F')
530 else if ('a' <= a
&& a
<= 'f')
546 for (i
= 0; i
< 6; i
++)
551 if ('0' <= a
&& a
<= '9')
553 else if ('A' <= a
&& a
<= 'F')
555 else if ('a' <= a
&& a
<= 'f')
568 scm_i_input_error (FUNC_NAME
, port
,
569 "illegal character in escape sequence: ~S",
570 scm_list_1 (SCM_MAKE_CHAR (c
)));
573 str
= scm_i_string_start_writing (str
);
574 scm_i_string_set_x (str
, c_str_len
++, c
);
575 scm_i_string_stop_writing ();
580 return scm_i_substring_copy (str
, 0, c_str_len
);
589 scm_read_number (scm_t_wchar chr
, SCM port
)
595 scm_ungetc (chr
, port
);
596 buffer
= read_complete_token (port
, &read
);
597 result
= scm_string_to_number (buffer
, SCM_UNDEFINED
);
598 if (!scm_is_true (result
))
599 /* Return a symbol instead of a number. */
600 result
= scm_string_to_symbol (buffer
);
606 scm_read_mixed_case_symbol (scm_t_wchar chr
, SCM port
)
609 int ends_with_colon
= 0;
612 int postfix
= scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE
), scm_keyword_postfix
);
614 scm_ungetc (chr
, port
);
615 buffer
= read_complete_token (port
, &read
);
617 ends_with_colon
= scm_i_string_ref (buffer
, read
- 1) == ':';
619 if (postfix
&& ends_with_colon
&& (read
> 1))
620 result
= scm_symbol_to_keyword (scm_string_to_symbol (scm_i_substring (buffer
, 0, read
- 1)));
622 result
= scm_string_to_symbol (buffer
);
628 scm_read_number_and_radix (scm_t_wchar chr
, SCM port
)
629 #define FUNC_NAME "scm_lreadr"
633 SCM buffer
= scm_i_make_string (READER_BUFFER_SIZE
, NULL
);
659 scm_ungetc (chr
, port
);
660 scm_ungetc ('#', port
);
664 buffer
= read_complete_token (port
, &read
);
665 result
= scm_string_to_number (buffer
, scm_from_uint (radix
));
667 if (scm_is_true (result
))
670 scm_i_input_error (FUNC_NAME
, port
, "unknown # object", SCM_EOL
);
677 scm_read_quote (int chr
, SCM port
)
680 long line
= SCM_LINUM (port
);
681 int column
= SCM_COL (port
) - 1;
686 p
= scm_sym_quasiquote
;
699 p
= scm_sym_uq_splicing
;
702 scm_ungetc (c
, port
);
709 fprintf (stderr
, "%s: unhandled quote character (%i)\n",
710 "scm_read_quote", chr
);
714 p
= scm_cons2 (p
, scm_read_expression (port
), SCM_EOL
);
715 if (SCM_RECORD_POSITIONS_P
)
716 scm_whash_insert (scm_source_whash
, p
,
717 scm_make_srcprops (line
, column
,
720 ? (scm_cons2 (SCM_CAR (p
),
721 SCM_CAR (SCM_CDR (p
)),
730 SCM_SYMBOL (sym_syntax
, "syntax");
731 SCM_SYMBOL (sym_quasisyntax
, "quasisyntax");
732 SCM_SYMBOL (sym_unsyntax
, "unsyntax");
733 SCM_SYMBOL (sym_unsyntax_splicing
, "unsyntax-splicing");
736 scm_read_syntax (int chr
, SCM port
)
739 long line
= SCM_LINUM (port
);
740 int column
= SCM_COL (port
) - 1;
758 p
= sym_unsyntax_splicing
;
761 scm_ungetc (c
, port
);
768 fprintf (stderr
, "%s: unhandled syntax character (%i)\n",
769 "scm_read_syntax", chr
);
773 p
= scm_cons2 (p
, scm_read_expression (port
), SCM_EOL
);
774 if (SCM_RECORD_POSITIONS_P
)
775 scm_whash_insert (scm_source_whash
, p
,
776 scm_make_srcprops (line
, column
,
779 ? (scm_cons2 (SCM_CAR (p
),
780 SCM_CAR (SCM_CDR (p
)),
790 scm_read_semicolon_comment (int chr
, SCM port
)
794 /* We use the get_byte here because there is no need to get the
795 locale correct with comment input. This presumes that newline
796 always represents itself no matter what the encoding is. */
797 for (c
= scm_get_byte_or_eof (port
);
798 (c
!= EOF
) && (c
!= '\n');
799 c
= scm_getc (port
));
801 return SCM_UNSPECIFIED
;
805 /* Sharp readers, i.e. readers called after a `#' sign has been read. */
808 scm_read_boolean (int chr
, SCM port
)
821 return SCM_UNSPECIFIED
;
825 scm_read_character (scm_t_wchar chr
, SCM port
)
826 #define FUNC_NAME "scm_lreadr"
828 SCM charname
= scm_i_make_string (READER_CHAR_NAME_MAX_SIZE
, NULL
);
833 overflow
= read_token (port
, charname
, &charname_len
);
834 charname
= scm_c_substring (charname
, 0, charname_len
);
839 if (charname_len
== 0)
841 chr
= scm_getc (port
);
843 scm_i_input_error (FUNC_NAME
, port
, "unexpected end of file "
844 "while reading character", SCM_EOL
);
846 /* CHR must be a token delimiter, like a whitespace. */
847 return (SCM_MAKE_CHAR (chr
));
850 if (charname_len
== 1)
851 return SCM_MAKE_CHAR (scm_i_string_ref (charname
, 0));
853 cp
= scm_i_string_ref (charname
, 0);
854 if (cp
== SCM_CODEPOINT_DOTTED_CIRCLE
&& charname_len
== 2)
855 return SCM_MAKE_CHAR (scm_i_string_ref (charname
, 1));
857 if (cp
>= '0' && cp
< '8')
859 /* Dirk:FIXME:: This type of character syntax is not R5RS
860 * compliant. Further, it should be verified that the constant
861 * does only consist of octal digits. */
862 SCM p
= scm_string_to_number (charname
, scm_from_uint (8));
865 scm_t_wchar c
= SCM_I_INUM (p
);
866 if (SCM_IS_UNICODE_CHAR (c
))
867 return SCM_MAKE_CHAR (c
);
869 scm_i_input_error (FUNC_NAME
, port
,
870 "out-of-range octal character escape: ~a",
871 scm_list_1 (charname
));
875 /* The names of characters should never have non-Latin1
877 if (scm_i_is_narrow_string (charname
)
878 || scm_i_try_narrow_string (charname
))
879 { SCM ch
= scm_i_charname_to_char (scm_i_string_chars (charname
),
881 if (scm_is_true (ch
))
886 scm_i_input_error (FUNC_NAME
, port
, "unknown character name ~a",
887 scm_list_1 (charname
));
889 return SCM_UNSPECIFIED
;
894 scm_read_keyword (int chr
, SCM port
)
898 /* Read the symbol that comprises the keyword. Doing this instead of
899 invoking a specific symbol reader function allows `scm_read_keyword ()'
900 to adapt to the delimiters currently valid of symbols.
902 XXX: This implementation allows sloppy syntaxes like `#: key'. */
903 symbol
= scm_read_expression (port
);
904 if (!scm_is_symbol (symbol
))
905 scm_i_input_error ("scm_read_keyword", port
,
906 "keyword prefix `~a' not followed by a symbol: ~s",
907 scm_list_2 (SCM_MAKE_CHAR (chr
), symbol
));
909 return (scm_symbol_to_keyword (symbol
));
913 scm_read_vector (int chr
, SCM port
)
915 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
916 guarantee that it's going to do what we want. After all, this is an
917 implementation detail of `scm_read_vector ()', not a desirable
919 return (scm_vector (scm_read_sexp (chr
, port
)));
923 scm_read_srfi4_vector (int chr
, SCM port
)
925 return scm_i_read_array (port
, chr
);
929 scm_read_bytevector (scm_t_wchar chr
, SCM port
)
931 chr
= scm_getc (port
);
935 chr
= scm_getc (port
);
939 chr
= scm_getc (port
);
943 return scm_u8_list_to_bytevector (scm_read_sexp (chr
, port
));
946 scm_i_input_error ("read_bytevector", port
,
947 "invalid bytevector prefix",
948 SCM_MAKE_CHAR (chr
));
949 return SCM_UNSPECIFIED
;
953 scm_read_guile_bit_vector (scm_t_wchar chr
, SCM port
)
955 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
956 terribly inefficient but who cares? */
957 SCM s_bits
= SCM_EOL
;
959 for (chr
= scm_getc (port
);
960 (chr
!= EOF
) && ((chr
== '0') || (chr
== '1'));
961 chr
= scm_getc (port
))
963 s_bits
= scm_cons ((chr
== '0') ? SCM_BOOL_F
: SCM_BOOL_T
, s_bits
);
967 scm_ungetc (chr
, port
);
969 return scm_bitvector (scm_reverse_x (s_bits
, SCM_EOL
));
973 scm_read_scsh_block_comment (scm_t_wchar chr
, SCM port
)
977 /* We can use the get_byte here because there is no need to get the
978 locale correct when reading comments. This presumes that
979 hash and exclamation points always represent themselves no
980 matter what the source encoding is.*/
983 int c
= scm_get_byte_or_eof (port
);
986 scm_i_input_error ("skip_block_comment", port
,
987 "unterminated `#! ... !#' comment", SCM_EOL
);
991 else if (c
== '#' && bang_seen
)
997 return SCM_UNSPECIFIED
;
1001 scm_read_r6rs_block_comment (scm_t_wchar chr
, SCM port
)
1003 /* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
1004 nested. So care must be taken. */
1005 int nesting_level
= 1;
1006 int opening_seen
= 0, closing_seen
= 0;
1008 while (nesting_level
> 0)
1010 int c
= scm_getc (port
);
1013 scm_i_input_error ("scm_read_r6rs_block_comment", port
,
1014 "unterminated `#| ... |#' comment", SCM_EOL
);
1022 else if (closing_seen
)
1033 opening_seen
= closing_seen
= 0;
1036 return SCM_UNSPECIFIED
;
1040 scm_read_commented_expression (scm_t_wchar chr
, SCM port
)
1044 c
= flush_ws (port
, (char *) NULL
);
1046 scm_i_input_error ("read_commented_expression", port
,
1047 "no expression after #; comment", SCM_EOL
);
1048 scm_ungetc (c
, port
);
1049 scm_read_expression (port
);
1050 return SCM_UNSPECIFIED
;
1054 scm_read_extended_symbol (scm_t_wchar chr
, SCM port
)
1056 /* Guile's extended symbol read syntax looks like this:
1058 #{This is all a symbol name}#
1060 So here, CHR is expected to be `{'. */
1061 int saw_brace
= 0, finished
= 0;
1063 SCM buf
= scm_i_make_string (1024, NULL
);
1065 buf
= scm_i_string_start_writing (buf
);
1067 while ((chr
= scm_getc (port
)) != EOF
)
1079 scm_i_string_set_x (buf
, len
++, '}');
1080 scm_i_string_set_x (buf
, len
++, chr
);
1083 else if (chr
== '}')
1086 scm_i_string_set_x (buf
, len
++, chr
);
1088 if (len
>= scm_i_string_length (buf
) - 2)
1092 scm_i_string_stop_writing ();
1093 addy
= scm_i_make_string (1024, NULL
);
1094 buf
= scm_string_append (scm_list_2 (buf
, addy
));
1096 buf
= scm_i_string_start_writing (buf
);
1102 scm_i_string_stop_writing ();
1104 return (scm_string_to_symbol (scm_c_substring (buf
, 0, len
)));
1109 /* Top-level token readers, i.e., dispatchers. */
1112 scm_read_sharp_extension (int chr
, SCM port
)
1116 proc
= scm_get_hash_procedure (chr
);
1117 if (scm_is_true (scm_procedure_p (proc
)))
1119 long line
= SCM_LINUM (port
);
1120 int column
= SCM_COL (port
) - 2;
1123 got
= scm_call_2 (proc
, SCM_MAKE_CHAR (chr
), port
);
1124 if (!scm_is_eq (got
, SCM_UNSPECIFIED
))
1126 if (SCM_RECORD_POSITIONS_P
)
1127 return (recsexpr (got
, line
, column
,
1128 SCM_FILENAME (port
)));
1134 return SCM_UNSPECIFIED
;
1137 /* The reader for the sharp `#' character. It basically dispatches reads
1138 among the above token readers. */
1140 scm_read_sharp (scm_t_wchar chr
, SCM port
)
1141 #define FUNC_NAME "scm_lreadr"
1145 chr
= scm_getc (port
);
1147 result
= scm_read_sharp_extension (chr
, port
);
1148 if (!scm_is_eq (result
, SCM_UNSPECIFIED
))
1154 return (scm_read_character (chr
, port
));
1156 return (scm_read_vector (chr
, port
));
1160 /* This one may return either a boolean or an SRFI-4 vector. */
1161 return (scm_read_srfi4_vector (chr
, port
));
1163 return (scm_read_bytevector (chr
, port
));
1165 return (scm_read_guile_bit_vector (chr
, port
));
1169 /* This one may return either a boolean or an SRFI-4 vector. */
1170 return (scm_read_boolean (chr
, port
));
1172 return (scm_read_keyword (chr
, port
));
1173 case '0': case '1': case '2': case '3': case '4':
1174 case '5': case '6': case '7': case '8': case '9':
1176 #if SCM_ENABLE_DEPRECATED
1177 /* See below for 'i' and 'e'. */
1184 return (scm_i_read_array (port
, chr
));
1188 #if SCM_ENABLE_DEPRECATED
1190 /* When next char is '(', it really is an old-style
1192 scm_t_wchar next_c
= scm_getc (port
);
1194 scm_ungetc (next_c
, port
);
1196 return scm_i_read_array (port
, chr
);
1210 return (scm_read_number_and_radix (chr
, port
));
1212 return (scm_read_extended_symbol (chr
, port
));
1214 return (scm_read_scsh_block_comment (chr
, port
));
1216 return (scm_read_commented_expression (chr
, port
));
1220 return (scm_read_syntax (chr
, port
));
1222 result
= scm_read_sharp_extension (chr
, port
);
1223 if (scm_is_eq (result
, SCM_UNSPECIFIED
))
1225 /* To remain compatible with 1.8 and earlier, the following
1226 characters have lower precedence than `read-hash-extend'
1231 return scm_read_r6rs_block_comment (chr
, port
);
1233 scm_i_input_error (FUNC_NAME
, port
, "Unknown # object: ~S",
1234 scm_list_1 (SCM_MAKE_CHAR (chr
)));
1241 return SCM_UNSPECIFIED
;
1246 scm_read_expression (SCM port
)
1247 #define FUNC_NAME "scm_read_expression"
1251 register scm_t_wchar chr
;
1253 chr
= scm_getc (port
);
1257 case SCM_WHITE_SPACES
:
1258 case SCM_LINE_INCREMENTORS
:
1261 (void) scm_read_semicolon_comment (chr
, port
);
1264 return (scm_read_sexp (chr
, port
));
1266 return (scm_read_string (chr
, port
));
1270 return (scm_read_quote (chr
, port
));
1274 result
= scm_read_sharp (chr
, port
);
1275 if (scm_is_eq (result
, SCM_UNSPECIFIED
))
1276 /* We read a comment or some such. */
1282 scm_i_input_error (FUNC_NAME
, port
, "unexpected \")\"", SCM_EOL
);
1287 if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE
), scm_keyword_prefix
))
1288 return scm_symbol_to_keyword (scm_read_expression (port
));
1293 if (((chr
>= '0') && (chr
<= '9'))
1294 || (strchr ("+-.", chr
)))
1295 return (scm_read_number (chr
, port
));
1297 return (scm_read_mixed_case_symbol (chr
, port
));
1305 /* Actual reader. */
1307 SCM_DEFINE (scm_read
, "read", 0, 1, 0,
1309 "Read an s-expression from the input port @var{port}, or from\n"
1310 "the current input port if @var{port} is not specified.\n"
1311 "Any whitespace before the next token is discarded.")
1312 #define FUNC_NAME s_scm_read
1316 if (SCM_UNBNDP (port
))
1317 port
= scm_current_input_port ();
1318 SCM_VALIDATE_OPINPORT (1, port
);
1320 c
= flush_ws (port
, (char *) NULL
);
1323 scm_ungetc (c
, port
);
1325 return (scm_read_expression (port
));
1332 /* Used when recording expressions constructed by `scm_read_sharp ()'. */
1334 recsexpr (SCM obj
, long line
, int column
, SCM filename
)
1336 if (!scm_is_pair(obj
)) {
1339 SCM tmp
= obj
, copy
;
1340 /* If this sexpr is visible in the read:sharp source, we want to
1341 keep that information, so only record non-constant cons cells
1342 which haven't previously been read by the reader. */
1343 if (scm_is_false (scm_whash_lookup (scm_source_whash
, obj
)))
1345 if (SCM_COPY_SOURCE_P
)
1347 copy
= scm_cons (recsexpr (SCM_CAR (obj
), line
, column
, filename
),
1349 while ((tmp
= SCM_CDR (tmp
)) && scm_is_pair (tmp
))
1351 SCM_SETCDR (copy
, scm_cons (recsexpr (SCM_CAR (tmp
),
1356 copy
= SCM_CDR (copy
);
1358 SCM_SETCDR (copy
, tmp
);
1362 recsexpr (SCM_CAR (obj
), line
, column
, filename
);
1363 while ((tmp
= SCM_CDR (tmp
)) && scm_is_pair (tmp
))
1364 recsexpr (SCM_CAR (tmp
), line
, column
, filename
);
1365 copy
= SCM_UNDEFINED
;
1367 scm_whash_insert (scm_source_whash
,
1369 scm_make_srcprops (line
,
1379 /* Manipulate the read-hash-procedures alist. This could be written in
1380 Scheme, but maybe it will also be used by C code during initialisation. */
1381 SCM_DEFINE (scm_read_hash_extend
, "read-hash-extend", 2, 0, 0,
1382 (SCM chr
, SCM proc
),
1383 "Install the procedure @var{proc} for reading expressions\n"
1384 "starting with the character sequence @code{#} and @var{chr}.\n"
1385 "@var{proc} will be called with two arguments: the character\n"
1386 "@var{chr} and the port to read further data from. The object\n"
1387 "returned will be the return value of @code{read}. \n"
1388 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1390 #define FUNC_NAME s_scm_read_hash_extend
1395 SCM_VALIDATE_CHAR (1, chr
);
1396 SCM_ASSERT (scm_is_false (proc
)
1397 || scm_is_eq (scm_procedure_p (proc
), SCM_BOOL_T
),
1398 proc
, SCM_ARG2
, FUNC_NAME
);
1400 /* Check if chr is already in the alist. */
1401 this = *scm_read_hash_procedures
;
1405 if (scm_is_null (this))
1407 /* not found, so add it to the beginning. */
1408 if (scm_is_true (proc
))
1410 *scm_read_hash_procedures
=
1411 scm_cons (scm_cons (chr
, proc
), *scm_read_hash_procedures
);
1415 if (scm_is_eq (chr
, SCM_CAAR (this)))
1417 /* already in the alist. */
1418 if (scm_is_false (proc
))
1421 if (scm_is_false (prev
))
1423 *scm_read_hash_procedures
=
1424 SCM_CDR (*scm_read_hash_procedures
);
1427 scm_set_cdr_x (prev
, SCM_CDR (this));
1432 scm_set_cdr_x (SCM_CAR (this), proc
);
1437 this = SCM_CDR (this);
1440 return SCM_UNSPECIFIED
;
1444 /* Recover the read-hash procedure corresponding to char c. */
1446 scm_get_hash_procedure (int c
)
1448 SCM rest
= *scm_read_hash_procedures
;
1452 if (scm_is_null (rest
))
1455 if (SCM_CHAR (SCM_CAAR (rest
)) == c
)
1456 return SCM_CDAR (rest
);
1458 rest
= SCM_CDR (rest
);
1462 #define SCM_ENCODING_SEARCH_SIZE (500)
1464 /* Search the first few hundred characters of a file for an Emacs-like coding
1465 declaration. Returns either NULL or a string whose storage has been
1466 allocated with `scm_gc_malloc ()'. */
1468 scm_i_scan_for_encoding (SCM port
)
1470 char header
[SCM_ENCODING_SEARCH_SIZE
+1];
1472 char *encoding
= NULL
;
1478 if (SCM_FPORTP (port
) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port
)))
1479 /* PORT is a non-seekable file port (e.g., as created by Bash when using
1480 "guile <(echo '(display "hello")')") so bail out. */
1483 bytes_read
= scm_c_read (port
, header
, SCM_ENCODING_SEARCH_SIZE
);
1485 scm_seek (port
, scm_from_int (0), scm_from_int (SEEK_SET
));
1488 && header
[0] == '\xef' && header
[1] == '\xbb' && header
[2] == '\xbf')
1491 /* search past "coding[:=]" */
1495 if ((pos
= strstr(pos
, "coding")) == NULL
)
1498 pos
+= strlen("coding");
1499 if (pos
- header
>= SCM_ENCODING_SEARCH_SIZE
||
1500 (*pos
== ':' || *pos
== '='))
1508 while (pos
- header
<= SCM_ENCODING_SEARCH_SIZE
&&
1509 (*pos
== ' ' || *pos
== '\t'))
1512 /* grab the next token */
1514 while (pos
+ i
- header
<= SCM_ENCODING_SEARCH_SIZE
1515 && pos
+ i
- header
< bytes_read
1516 && (isalnum ((int) pos
[i
]) || strchr ("_-.:/,+=()", pos
[i
]) != NULL
))
1522 encoding
= scm_gc_strndup (pos
, i
, "encoding");
1523 for (i
= 0; i
< strlen (encoding
); i
++)
1524 encoding
[i
] = toupper ((int) encoding
[i
]);
1526 /* push backwards to make sure we were in a comment */
1528 while (pos
- i
- header
> 0)
1530 if (*(pos
- i
) == '\n')
1532 /* This wasn't in a semicolon comment. Check for a
1533 hash-bang comment. */
1534 char *beg
= strstr (header
, "#!");
1535 char *end
= strstr (header
, "!#");
1536 if (beg
< pos
&& pos
< end
)
1540 if (*(pos
- i
) == ';')
1548 /* This wasn't in a comment */
1551 if (utf8_bom
&& strcmp(encoding
, "UTF-8"))
1552 scm_misc_error (NULL
,
1553 "the port input declares the encoding ~s but is encoded as UTF-8",
1554 scm_list_1 (scm_from_locale_string (encoding
)));
1559 SCM_DEFINE (scm_file_encoding
, "file-encoding", 1, 0, 0,
1561 "Scans the port for an Emacs-like character coding declaration\n"
1562 "near the top of the contents of a port with random-acessible contents.\n"
1563 "The coding declaration is of the form\n"
1564 "@code{coding: XXXXX} and must appear in a scheme comment.\n"
1566 "Returns a string containing the character encoding of the file\n"
1567 "if a declaration was found, or @code{#f} otherwise.\n")
1568 #define FUNC_NAME s_scm_file_encoding
1573 enc
= scm_i_scan_for_encoding (port
);
1578 s_enc
= scm_from_locale_string (enc
);
1589 scm_read_hash_procedures
=
1590 SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL
));
1592 scm_init_opts (scm_read_options
, scm_read_opts
);
1593 #include "libguile/read.x"