1 /* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007 Free Software
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libguile/_scm.h"
24 #include "libguile/chars.h"
25 #include "libguile/eval.h"
26 #include "libguile/unif.h"
27 #include "libguile/keywords.h"
28 #include "libguile/alist.h"
29 #include "libguile/srcprop.h"
30 #include "libguile/hashtab.h"
31 #include "libguile/hash.h"
32 #include "libguile/ports.h"
33 #include "libguile/root.h"
34 #include "libguile/strings.h"
35 #include "libguile/strports.h"
36 #include "libguile/vectors.h"
37 #include "libguile/validate.h"
38 #include "libguile/srfi-4.h"
40 #include "libguile/read.h"
41 #include "libguile/private-options.h"
46 SCM_GLOBAL_SYMBOL (scm_sym_dot
, ".");
47 SCM_SYMBOL (scm_keyword_prefix
, "prefix");
49 scm_t_option scm_read_opts
[] = {
50 { SCM_OPTION_BOOLEAN
, "copy", 0,
51 "Copy source code expressions." },
52 { SCM_OPTION_BOOLEAN
, "positions", 0,
53 "Record positions of source code expressions." },
54 { SCM_OPTION_BOOLEAN
, "case-insensitive", 0,
55 "Convert symbols to lower case."},
56 { SCM_OPTION_SCM
, "keywords", SCM_UNPACK (SCM_BOOL_F
),
57 "Style of keyword recognition: #f or 'prefix."},
59 { SCM_OPTION_BOOLEAN
, "elisp-vectors", 0,
60 "Support Elisp vector syntax, namely `[...]'."},
61 { SCM_OPTION_BOOLEAN
, "elisp-strings", 0,
62 "Support `\\(' and `\\)' in strings."},
68 Give meaningful error messages for errors
72 FILE:LINE:COL: MESSAGE
75 This is not standard GNU format, but the test-suite likes the real
76 message to be in front.
82 scm_i_input_error (char const *function
,
83 SCM port
, const char *message
, SCM arg
)
85 SCM fn
= (scm_is_string (SCM_FILENAME(port
))
87 : scm_from_locale_string ("#<unknown port>"));
89 SCM string_port
= scm_open_output_string ();
91 scm_simple_format (string_port
,
92 scm_from_locale_string ("~A:~S:~S: ~A"),
94 scm_from_long (SCM_LINUM (port
) + 1),
95 scm_from_int (SCM_COL (port
) + 1),
96 scm_from_locale_string (message
)));
98 string
= scm_get_output_string (string_port
);
99 scm_close_output_port (string_port
);
100 scm_error_scm (scm_from_locale_symbol ("read-error"),
101 function
? scm_from_locale_string (function
) : SCM_BOOL_F
,
108 SCM_DEFINE (scm_read_options
, "read-options-interface", 0, 1, 0,
110 "Option interface for the read options. Instead of using\n"
111 "this procedure directly, use the procedures @code{read-enable},\n"
112 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
113 #define FUNC_NAME s_scm_read_options
115 SCM ans
= scm_options (setting
,
118 if (SCM_COPY_SOURCE_P
)
119 SCM_RECORD_POSITIONS_P
= 1;
124 /* An association list mapping extra hash characters to procedures. */
125 static SCM
*scm_read_hash_procedures
;
127 SCM_DEFINE (scm_read
, "read", 0, 1, 0,
129 "Read an s-expression from the input port @var{port}, or from\n"
130 "the current input port if @var{port} is not specified.\n"
131 "Any whitespace before the next token is discarded.")
132 #define FUNC_NAME s_scm_read
137 if (SCM_UNBNDP (port
))
138 port
= scm_current_input_port ();
139 SCM_VALIDATE_OPINPORT (1, port
);
141 c
= scm_flush_ws (port
, (char *) NULL
);
144 scm_ungetc (c
, port
);
146 tok_buf
= scm_c_make_string (30, SCM_UNDEFINED
);
147 return scm_lreadr (&tok_buf
, port
, ©
);
154 scm_grow_tok_buf (SCM
*tok_buf
)
156 size_t oldlen
= scm_i_string_length (*tok_buf
);
157 const char *olddata
= scm_i_string_chars (*tok_buf
);
159 SCM newstr
= scm_i_make_string (2 * oldlen
, &newdata
);
162 for (i
= 0; i
!= oldlen
; ++i
)
163 newdata
[i
] = olddata
[i
];
169 /* Consume an SCSH-style block comment. Assume that we've already
170 read the initial `#!', and eat characters until we get a
171 exclamation-point/sharp-sign sequence.
175 skip_scsh_block_comment (SCM port
)
181 int c
= scm_getc (port
);
184 scm_i_input_error ("skip_block_comment", port
,
185 "unterminated `#! ... !#' comment", SCM_EOL
);
189 else if (c
== '#' && bang_seen
)
197 scm_flush_ws (SCM port
, const char *eoferr
)
201 switch (c
= scm_getc (port
))
207 scm_i_input_error (eoferr
,
215 switch (c
= scm_getc (port
))
221 case SCM_LINE_INCREMENTORS
:
226 switch (c
= scm_getc (port
))
229 eoferr
= "read_sharp";
232 skip_scsh_block_comment (port
);
235 scm_ungetc (c
, port
);
239 case SCM_LINE_INCREMENTORS
:
240 case SCM_SINGLE_SPACES
:
251 scm_casei_streq (char *s1
, char *s2
)
254 if (scm_c_downcase((int)*s1
) != scm_c_downcase((int)*s2
))
261 return !(*s1
|| *s2
);
265 scm_i_casei_streq (const char *s1
, const char *s2
, size_t len2
)
267 while (*s1
&& len2
> 0)
268 if (scm_c_downcase((int)*s1
) != scm_c_downcase((int)*s2
))
276 return !(*s1
|| len2
> 0);
279 /* recsexpr is used when recording expressions
280 * constructed by read:sharp.
283 recsexpr (SCM obj
, long line
, int column
, SCM filename
)
285 if (!scm_is_pair(obj
)) {
289 /* If this sexpr is visible in the read:sharp source, we want to
290 keep that information, so only record non-constant cons cells
291 which haven't previously been read by the reader. */
292 if (scm_is_false (scm_whash_lookup (scm_source_whash
, obj
)))
294 if (SCM_COPY_SOURCE_P
)
296 copy
= scm_cons (recsexpr (SCM_CAR (obj
), line
, column
, filename
),
298 while ((tmp
= SCM_CDR (tmp
)) && scm_is_pair (tmp
))
300 SCM_SETCDR (copy
, scm_cons (recsexpr (SCM_CAR (tmp
),
305 copy
= SCM_CDR (copy
);
307 SCM_SETCDR (copy
, tmp
);
311 recsexpr (SCM_CAR (obj
), line
, column
, filename
);
312 while ((tmp
= SCM_CDR (tmp
)) && scm_is_pair (tmp
))
313 recsexpr (SCM_CAR (tmp
), line
, column
, filename
);
314 copy
= SCM_UNDEFINED
;
316 scm_whash_insert (scm_source_whash
,
318 scm_make_srcprops (line
,
329 static SCM
scm_get_hash_procedure(int c
);
330 static SCM
scm_i_lreadparen (SCM
*, SCM
, char *, SCM
*, char);
332 static char s_list
[]="list";
334 static char s_vector
[]="vector";
338 scm_lreadr (SCM
*tok_buf
, SCM port
, SCM
*copy
)
339 #define FUNC_NAME "scm_lreadr"
346 c
= scm_flush_ws (port
, s_scm_read
);
353 return SCM_RECORD_POSITIONS_P
354 ? scm_lreadrecparen (tok_buf
, port
, s_list
, copy
)
355 : scm_i_lreadparen (tok_buf
, port
, s_list
, copy
, ')');
357 scm_i_input_error (FUNC_NAME
, port
,"unexpected \")\"", SCM_EOL
);
362 if (SCM_ELISP_VECTORS_P
)
364 p
= scm_i_lreadparen (tok_buf
, port
, s_vector
, copy
, ']');
365 return scm_is_null (p
) ? scm_nullvect
: scm_vector (p
);
373 p
= scm_sym_quasiquote
;
378 p
= scm_sym_uq_splicing
;
381 scm_ungetc (c
, port
);
386 scm_lreadr (tok_buf
, port
, copy
),
388 if (SCM_RECORD_POSITIONS_P
)
389 scm_whash_insert (scm_source_whash
,
391 scm_make_srcprops (SCM_LINUM (port
),
395 ? (*copy
= scm_cons2 (SCM_CAR (p
),
396 SCM_CAR (SCM_CDR (p
)),
405 /* Check for user-defined hash procedure first, to allow
406 overriding of builtin hash read syntaxes. */
407 SCM sharp
= scm_get_hash_procedure (c
);
408 if (scm_is_true (sharp
))
410 long line
= SCM_LINUM (port
);
411 int column
= SCM_COL (port
) - 2;
414 got
= scm_call_2 (sharp
, SCM_MAKE_CHAR (c
), port
);
415 if (scm_is_eq (got
, SCM_UNSPECIFIED
))
417 if (SCM_RECORD_POSITIONS_P
)
418 return *copy
= recsexpr (got
, line
, column
,
419 SCM_FILENAME (port
));
427 /* Vector, arrays, both uniform and not are handled by this
428 one function. It also disambiguates between '#f' and
431 case '0': case '1': case '2': case '3': case '4':
432 case '5': case '6': case '7': case '8': case '9':
433 case 'u': case 's': case 'f':
436 #if SCM_ENABLE_DEPRECATED
437 /* See below for 'i' and 'e'. */
444 return scm_i_read_array (port
, c
);
451 /* See above for lower case 'f'. */
457 #if SCM_ENABLE_DEPRECATED
459 /* When next char is '(', it really is an old-style
461 int next_c
= scm_getc (port
);
463 scm_ungetc (next_c
, port
);
465 return scm_i_read_array (port
, c
);
479 scm_ungetc (c
, port
);
484 /* should never happen, #!...!# block comments are skipped
485 over in scm_flush_ws. */
489 j
= scm_read_token (c
, tok_buf
, port
, 0);
490 p
= scm_istr2bve (scm_c_substring_shared (*tok_buf
, 1, j
));
497 j
= scm_read_token (c
, tok_buf
, port
, 1);
498 return scm_string_to_symbol (scm_c_substring_copy (*tok_buf
, 0, j
));
502 j
= scm_read_token (c
, tok_buf
, port
, 0);
504 return SCM_MAKE_CHAR (c
);
505 if (c
>= '0' && c
< '8')
507 /* Dirk:FIXME:: This type of character syntax is not R5RS
508 * compliant. Further, it should be verified that the constant
509 * does only consist of octal digits. Finally, it should be
510 * checked whether the resulting fixnum is in the range of
512 p
= scm_c_locale_stringn_to_number (scm_i_string_chars (*tok_buf
),
515 return SCM_MAKE_CHAR (SCM_I_INUM (p
));
517 for (c
= 0; c
< scm_n_charnames
; c
++)
519 && (scm_i_casei_streq (scm_charnames
[c
],
520 scm_i_string_chars (*tok_buf
), j
)))
521 return SCM_MAKE_CHAR (scm_charnums
[c
]);
522 scm_i_input_error (FUNC_NAME
, port
, "unknown character name ~a",
523 scm_list_1 (scm_c_substring (*tok_buf
, 0, j
)));
525 /* #:SYMBOL is a syntax for keywords supported in all contexts. */
527 return scm_symbol_to_keyword (scm_read (port
));
532 SCM sharp
= scm_get_hash_procedure (c
);
534 if (scm_is_true (sharp
))
536 long line
= SCM_LINUM (port
);
537 int column
= SCM_COL (port
) - 2;
540 got
= scm_call_2 (sharp
, SCM_MAKE_CHAR (c
), port
);
541 if (scm_is_eq (got
, SCM_UNSPECIFIED
))
543 if (SCM_RECORD_POSITIONS_P
)
544 return *copy
= recsexpr (got
, line
, column
,
545 SCM_FILENAME (port
));
551 scm_i_input_error (FUNC_NAME
, port
, "Unknown # object: ~S",
552 scm_list_1 (SCM_MAKE_CHAR (c
)));
557 while ('"' != (c
= scm_getc (port
)))
560 str_eof
: scm_i_input_error (FUNC_NAME
, port
,
561 "end of file in string constant",
564 while (j
+ 2 >= scm_i_string_length (*tok_buf
))
565 scm_grow_tok_buf (tok_buf
);
568 switch (c
= scm_getc (port
))
578 if (SCM_ESCAPED_PARENS_P
)
609 if (a
== EOF
) goto str_eof
;
611 if (b
== EOF
) goto str_eof
;
612 if ('0' <= a
&& a
<= '9') a
-= '0';
613 else if ('A' <= a
&& a
<= 'F') a
= a
- 'A' + 10;
614 else if ('a' <= a
&& a
<= 'f') a
= a
- 'a' + 10;
615 else goto bad_escaped
;
616 if ('0' <= b
&& b
<= '9') b
-= '0';
617 else if ('A' <= b
&& b
<= 'F') b
= b
- 'A' + 10;
618 else if ('a' <= b
&& b
<= 'f') b
= b
- 'a' + 10;
619 else goto bad_escaped
;
625 scm_i_input_error(FUNC_NAME
, port
,
626 "illegal character in escape sequence: ~S",
627 scm_list_1 (SCM_MAKE_CHAR (c
)));
629 scm_c_string_set_x (*tok_buf
, j
, SCM_MAKE_CHAR (c
));
635 /* Change this to scm_c_substring_read_only when
636 SCM_STRING_CHARS has been removed.
638 return scm_c_substring_copy (*tok_buf
, 0, j
);
640 case '0': case '1': case '2': case '3': case '4':
641 case '5': case '6': case '7': case '8': case '9':
646 j
= scm_read_token (c
, tok_buf
, port
, 0);
647 if (j
== 1 && (c
== '+' || c
== '-'))
648 /* Shortcut: Detected symbol '+ or '- */
651 p
= scm_c_locale_stringn_to_number (scm_i_string_chars (*tok_buf
), j
, 10);
656 if ((j
== 2) && (scm_getc (port
) == '('))
658 scm_ungetc ('(', port
);
659 c
= scm_i_string_chars (*tok_buf
)[1];
662 scm_i_input_error (FUNC_NAME
, port
, "unknown # object", SCM_EOL
);
667 if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE
), scm_keyword_prefix
))
668 return scm_symbol_to_keyword (scm_read (port
));
675 j
= scm_read_token (c
, tok_buf
, port
, 0);
679 return scm_string_to_symbol (scm_c_substring (*tok_buf
, 0, j
));
686 _Pragma ("noopt"); /* # pragma _CRI noopt */
690 scm_read_token (int ic
, SCM
*tok_buf
, SCM port
, int weird
)
695 c
= (SCM_CASE_INSENSITIVE_P
? scm_c_downcase(ic
) : ic
);
702 while (j
+ 2 >= scm_i_string_length (*tok_buf
))
703 scm_grow_tok_buf (tok_buf
);
704 scm_c_string_set_x (*tok_buf
, j
, SCM_MAKE_CHAR (c
));
710 while (j
+ 2 >= scm_i_string_length (*tok_buf
))
711 scm_grow_tok_buf (tok_buf
);
723 case SCM_WHITE_SPACES
:
724 case SCM_LINE_INCREMENTORS
:
727 || ((!SCM_ELISP_VECTORS_P
) && ((c
== '[') || (c
== ']')))
732 scm_ungetc (c
, port
);
758 scm_ungetc (c
, port
);
766 c
= (SCM_CASE_INSENSITIVE_P
? scm_c_downcase(c
) : c
);
767 scm_c_string_set_x (*tok_buf
, j
, SCM_MAKE_CHAR (c
));
776 _Pragma ("opt"); /* # pragma _CRI opt */
780 scm_i_lreadparen (SCM
*tok_buf
, SCM port
, char *name
, SCM
*copy
, char term_char
)
781 #define FUNC_NAME "scm_i_lreadparen"
788 c
= scm_flush_ws (port
, name
);
791 scm_ungetc (c
, port
);
792 if (scm_is_eq (scm_sym_dot
, (tmp
= scm_lreadr (tok_buf
, port
, copy
))))
794 ans
= scm_lreadr (tok_buf
, port
, copy
);
796 if (term_char
!= (c
= scm_flush_ws (port
, name
)))
797 scm_i_input_error (FUNC_NAME
, port
, "missing close paren", SCM_EOL
);
800 ans
= tl
= scm_cons (tmp
, SCM_EOL
);
801 while (term_char
!= (c
= scm_flush_ws (port
, name
)))
803 scm_ungetc (c
, port
);
804 if (scm_is_eq (scm_sym_dot
, (tmp
= scm_lreadr (tok_buf
, port
, copy
))))
806 SCM_SETCDR (tl
, scm_lreadr (tok_buf
, port
, copy
));
809 SCM_SETCDR (tl
, scm_cons (tmp
, SCM_EOL
));
818 scm_lreadrecparen (SCM
*tok_buf
, SCM port
, char *name
, SCM
*copy
)
819 #define FUNC_NAME "scm_lreadrecparen"
823 register SCM tl
, tl2
= SCM_EOL
;
824 SCM ans
, ans2
= SCM_EOL
;
825 /* Need to capture line and column numbers here. */
826 long line
= SCM_LINUM (port
);
827 int column
= SCM_COL (port
) - 1;
829 c
= scm_flush_ws (port
, name
);
832 scm_ungetc (c
, port
);
833 if (scm_is_eq (scm_sym_dot
, (tmp
= scm_lreadr (tok_buf
, port
, copy
))))
835 ans
= scm_lreadr (tok_buf
, port
, copy
);
836 if (')' != (c
= scm_flush_ws (port
, name
)))
837 scm_i_input_error (FUNC_NAME
, port
, "missing close paren", SCM_EOL
);
840 /* Build the head of the list structure. */
841 ans
= tl
= scm_cons (tmp
, SCM_EOL
);
842 if (SCM_COPY_SOURCE_P
)
843 ans2
= tl2
= scm_cons (scm_is_pair (tmp
)
847 while (')' != (c
= scm_flush_ws (port
, name
)))
851 scm_ungetc (c
, port
);
852 if (scm_is_eq (scm_sym_dot
, (tmp
= scm_lreadr (tok_buf
, port
, copy
))))
854 SCM_SETCDR (tl
, tmp
= scm_lreadr (tok_buf
, port
, copy
));
855 if (SCM_COPY_SOURCE_P
)
856 SCM_SETCDR (tl2
, scm_cons (scm_is_pair (tmp
)
860 if (')' != (c
= scm_flush_ws (port
, name
)))
861 scm_i_input_error (FUNC_NAME
, port
,
862 "missing close paren", SCM_EOL
);
866 new_tail
= scm_cons (tmp
, SCM_EOL
);
867 SCM_SETCDR (tl
, new_tail
);
870 if (SCM_COPY_SOURCE_P
)
872 SCM new_tail2
= scm_cons (scm_is_pair (tmp
) ? *copy
: tmp
, SCM_EOL
);
873 SCM_SETCDR (tl2
, new_tail2
);
878 scm_whash_insert (scm_source_whash
,
880 scm_make_srcprops (line
,
894 /* Manipulate the read-hash-procedures alist. This could be written in
895 Scheme, but maybe it will also be used by C code during initialisation. */
896 SCM_DEFINE (scm_read_hash_extend
, "read-hash-extend", 2, 0, 0,
898 "Install the procedure @var{proc} for reading expressions\n"
899 "starting with the character sequence @code{#} and @var{chr}.\n"
900 "@var{proc} will be called with two arguments: the character\n"
901 "@var{chr} and the port to read further data from. The object\n"
902 "returned will be the return value of @code{read}. \n"
903 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
905 #define FUNC_NAME s_scm_read_hash_extend
910 SCM_VALIDATE_CHAR (1, chr
);
911 SCM_ASSERT (scm_is_false (proc
)
912 || scm_is_eq (scm_procedure_p (proc
), SCM_BOOL_T
),
913 proc
, SCM_ARG2
, FUNC_NAME
);
915 /* Check if chr is already in the alist. */
916 this = *scm_read_hash_procedures
;
920 if (scm_is_null (this))
922 /* not found, so add it to the beginning. */
923 if (scm_is_true (proc
))
925 *scm_read_hash_procedures
=
926 scm_cons (scm_cons (chr
, proc
), *scm_read_hash_procedures
);
930 if (scm_is_eq (chr
, SCM_CAAR (this)))
932 /* already in the alist. */
933 if (scm_is_false (proc
))
936 if (scm_is_false (prev
))
938 *scm_read_hash_procedures
=
939 SCM_CDR (*scm_read_hash_procedures
);
942 scm_set_cdr_x (prev
, SCM_CDR (this));
947 scm_set_cdr_x (SCM_CAR (this), proc
);
952 this = SCM_CDR (this);
955 return SCM_UNSPECIFIED
;
959 /* Recover the read-hash procedure corresponding to char c. */
961 scm_get_hash_procedure (int c
)
963 SCM rest
= *scm_read_hash_procedures
;
967 if (scm_is_null (rest
))
970 if (SCM_CHAR (SCM_CAAR (rest
)) == c
)
971 return SCM_CDAR (rest
);
973 rest
= SCM_CDR (rest
);
980 scm_read_hash_procedures
=
981 SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL
));
983 scm_init_opts (scm_read_options
, scm_read_opts
);
984 #include "libguile/read.x"