1 /* Copyright (C) 1995,1996,1997,1999,2000,2001,2003 Free Software
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include "libguile/_scm.h"
24 #include "libguile/chars.h"
25 #include "libguile/eval.h"
26 #include "libguile/unif.h"
27 #include "libguile/keywords.h"
28 #include "libguile/alist.h"
29 #include "libguile/srcprop.h"
30 #include "libguile/hashtab.h"
31 #include "libguile/hash.h"
32 #include "libguile/ports.h"
33 #include "libguile/root.h"
34 #include "libguile/strings.h"
35 #include "libguile/strports.h"
36 #include "libguile/vectors.h"
37 #include "libguile/validate.h"
39 #include "libguile/read.h"
43 SCM_GLOBAL_SYMBOL (scm_sym_dot
, ".");
44 SCM_SYMBOL (scm_keyword_prefix
, "prefix");
46 scm_t_option scm_read_opts
[] = {
47 { SCM_OPTION_BOOLEAN
, "copy", 0,
48 "Copy source code expressions." },
49 { SCM_OPTION_BOOLEAN
, "positions", 0,
50 "Record positions of source code expressions." },
51 { SCM_OPTION_BOOLEAN
, "case-insensitive", 0,
52 "Convert symbols to lower case."},
53 { SCM_OPTION_SCM
, "keywords", SCM_UNPACK (SCM_BOOL_F
),
54 "Style of keyword recognition: #f or 'prefix."}
58 Give meaningful error messages for errors
62 FILE:LINE:COL: MESSAGE
65 This is not standard GNU format, but the test-suite likes the real
66 message to be in front.
72 scm_input_error(char const * function
,
73 SCM port
, const char * message
, SCM arg
)
75 char *fn
= SCM_STRINGP (SCM_FILENAME(port
))
76 ? SCM_STRING_CHARS(SCM_FILENAME(port
))
79 SCM string_port
= scm_open_output_string ();
81 scm_simple_format (string_port
,
82 scm_makfrom0str ("~A:~S:~S: ~A"),
83 scm_list_4 (scm_makfrom0str (fn
),
84 scm_int2num (SCM_LINUM (port
) + 1),
85 scm_int2num (SCM_COL (port
) + 1),
86 scm_makfrom0str (message
)));
89 string
= scm_get_output_string (string_port
);
90 scm_close_output_port (string_port
);
91 scm_error_scm (scm_str2symbol ("read-error"),
92 scm_makfrom0str (function
),
99 SCM_DEFINE (scm_read_options
, "read-options-interface", 0, 1, 0,
101 "Option interface for the read options. Instead of using\n"
102 "this procedure directly, use the procedures @code{read-enable},\n"
103 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
104 #define FUNC_NAME s_scm_read_options
106 SCM ans
= scm_options (setting
,
110 if (SCM_COPY_SOURCE_P
)
111 SCM_RECORD_POSITIONS_P
= 1;
116 /* An association list mapping extra hash characters to procedures. */
117 static SCM
*scm_read_hash_procedures
;
119 SCM_DEFINE (scm_read
, "read", 0, 1, 0,
121 "Read an s-expression from the input port @var{port}, or from\n"
122 "the current input port if @var{port} is not specified.\n"
123 "Any whitespace before the next token is discarded.")
124 #define FUNC_NAME s_scm_read
129 if (SCM_UNBNDP (port
))
131 SCM_VALIDATE_OPINPORT (1, port
);
133 c
= scm_flush_ws (port
, (char *) NULL
);
136 scm_ungetc (c
, port
);
138 tok_buf
= scm_allocate_string (30);
139 return scm_lreadr (&tok_buf
, port
, ©
);
146 scm_grow_tok_buf (SCM
*tok_buf
)
148 size_t oldlen
= SCM_STRING_LENGTH (*tok_buf
);
149 SCM newstr
= scm_allocate_string (2 * oldlen
);
152 for (i
= 0; i
!= oldlen
; ++i
)
153 SCM_STRING_CHARS (newstr
) [i
] = SCM_STRING_CHARS (*tok_buf
) [i
];
156 return SCM_STRING_CHARS (newstr
);
162 scm_flush_ws (SCM port
, const char *eoferr
)
166 switch (c
= scm_getc (port
))
172 scm_input_error (eoferr
,
180 switch (c
= scm_getc (port
))
186 case SCM_LINE_INCREMENTORS
:
190 case SCM_LINE_INCREMENTORS
:
191 case SCM_SINGLE_SPACES
:
202 scm_casei_streq (char *s1
, char *s2
)
205 if (scm_downcase((int)*s1
) != scm_downcase((int)*s2
))
212 return !(*s1
|| *s2
);
216 /* recsexpr is used when recording expressions
217 * constructed by read:sharp.
220 recsexpr (SCM obj
, long line
, int column
, SCM filename
)
222 if (!SCM_CONSP(obj
)) {
226 /* If this sexpr is visible in the read:sharp source, we want to
227 keep that information, so only record non-constant cons cells
228 which haven't previously been read by the reader. */
229 if (SCM_FALSEP (scm_whash_lookup (scm_source_whash
, obj
)))
231 if (SCM_COPY_SOURCE_P
)
233 copy
= scm_cons (recsexpr (SCM_CAR (obj
), line
, column
, filename
),
235 while ((tmp
= SCM_CDR (tmp
)) && SCM_CONSP (tmp
))
237 SCM_SETCDR (copy
, scm_cons (recsexpr (SCM_CAR (tmp
),
242 copy
= SCM_CDR (copy
);
244 SCM_SETCDR (copy
, tmp
);
248 recsexpr (SCM_CAR (obj
), line
, column
, filename
);
249 while ((tmp
= SCM_CDR (tmp
)) && SCM_CONSP (tmp
))
250 recsexpr (SCM_CAR (tmp
), line
, column
, filename
);
251 copy
= SCM_UNDEFINED
;
253 scm_whash_insert (scm_source_whash
,
255 scm_make_srcprops (line
,
265 /* Consume an SCSH-style block comment. Assume that we've already
266 read the initial `#!', and eat characters until we get a
267 newline/exclamation-point/sharp-sign/newline sequence. */
270 skip_scsh_block_comment (SCM port
)
271 #define FUNC_NAME "skip_scsh_block_comment"
273 /* Is this portable? Dear God, spare me from the non-eight-bit
274 characters. But is it tasteful? */
279 int c
= scm_getc (port
);
282 SCM_MISC_ERROR ("unterminated `#! ... !#' comment", SCM_EOL
);
283 history
= ((history
<< 8) | (c
& 0xff)) & 0xffffffff;
285 /* Were the last four characters read "\n!#\n"? */
286 if (history
== (('\n' << 24) | ('!' << 16) | ('#' << 8) | '\n'))
293 static SCM
scm_get_hash_procedure(int c
);
295 static char s_list
[]="list";
298 scm_lreadr (SCM
*tok_buf
, SCM port
, SCM
*copy
)
299 #define FUNC_NAME "scm_lreadr"
306 c
= scm_flush_ws (port
, s_scm_read
);
307 tryagain_no_flush_ws
:
314 return SCM_RECORD_POSITIONS_P
315 ? scm_lreadrecparen (tok_buf
, port
, s_list
, copy
)
316 : scm_lreadparen (tok_buf
, port
, s_list
, copy SCM_ELISP_CLOSE
);
318 scm_input_error (FUNC_NAME
, port
,"unexpected \")\"", SCM_EOL
);
321 #ifdef SCM_ELISP_READ_EXTENSIONS
323 p
= scm_lreadparen (tok_buf
, port
, "vector", copy
, ']');
324 return SCM_NULLP (p
) ? scm_nullvect
: scm_vector (p
);
330 p
= scm_sym_quasiquote
;
335 p
= scm_sym_uq_splicing
;
338 scm_ungetc (c
, port
);
343 scm_lreadr (tok_buf
, port
, copy
),
345 if (SCM_RECORD_POSITIONS_P
)
346 scm_whash_insert (scm_source_whash
,
348 scm_make_srcprops (SCM_LINUM (port
),
352 ? (*copy
= scm_cons2 (SCM_CAR (p
),
353 SCM_CAR (SCM_CDR (p
)),
362 /* Check for user-defined hash procedure first, to allow
363 overriding of builtin hash read syntaxes. */
364 SCM sharp
= scm_get_hash_procedure (c
);
365 if (!SCM_FALSEP (sharp
))
367 int line
= SCM_LINUM (port
);
368 int column
= SCM_COL (port
) - 2;
371 got
= scm_call_2 (sharp
, SCM_MAKE_CHAR (c
), port
);
372 if (SCM_EQ_P (got
, SCM_UNSPECIFIED
))
374 if (SCM_RECORD_POSITIONS_P
)
375 return *copy
= recsexpr (got
, line
, column
,
376 SCM_FILENAME (port
));
385 p
= scm_lreadparen (tok_buf
, port
, "vector", copy SCM_ELISP_CLOSE
);
386 return SCM_NULLP (p
) ? scm_nullvect
: scm_vector (p
);
407 scm_ungetc (c
, port
);
412 /* start of a shell script. Parse as a block comment,
413 terminated by !#, just like SCSH. */
414 skip_scsh_block_comment (port
);
415 /* EOF is not an error here */
416 c
= scm_flush_ws (port
, (char *)NULL
);
417 goto tryagain_no_flush_ws
;
421 j
= scm_read_token (c
, tok_buf
, port
, 0);
422 p
= scm_istr2bve (SCM_STRING_CHARS (*tok_buf
) + 1, (long) (j
- 1));
430 j
= scm_read_token (c
, tok_buf
, port
, 1);
431 return scm_mem2symbol (SCM_STRING_CHARS (*tok_buf
), j
);
435 j
= scm_read_token (c
, tok_buf
, port
, 0);
437 return SCM_MAKE_CHAR (c
);
438 if (c
>= '0' && c
< '8')
440 /* Dirk:FIXME:: This type of character syntax is not R5RS
441 * compliant. Further, it should be verified that the constant
442 * does only consist of octal digits. Finally, it should be
443 * checked whether the resulting fixnum is in the range of
445 p
= scm_i_mem2number (SCM_STRING_CHARS (*tok_buf
), j
, 8);
447 return SCM_MAKE_CHAR (SCM_INUM (p
));
449 for (c
= 0; c
< scm_n_charnames
; c
++)
451 && (scm_casei_streq (scm_charnames
[c
], SCM_STRING_CHARS (*tok_buf
))))
452 return SCM_MAKE_CHAR (scm_charnums
[c
]);
453 scm_input_error (FUNC_NAME
, port
, "unknown # object", SCM_EOL
);
455 /* #:SYMBOL is a syntax for keywords supported in all contexts. */
457 j
= scm_read_token ('-', tok_buf
, port
, 0);
458 p
= scm_mem2symbol (SCM_STRING_CHARS (*tok_buf
), j
);
459 return scm_make_keyword_from_dash_symbol (p
);
464 SCM sharp
= scm_get_hash_procedure (c
);
466 if (!SCM_FALSEP (sharp
))
468 int line
= SCM_LINUM (port
);
469 int column
= SCM_COL (port
) - 2;
472 got
= scm_call_2 (sharp
, SCM_MAKE_CHAR (c
), port
);
473 if (SCM_EQ_P (got
, SCM_UNSPECIFIED
))
475 if (SCM_RECORD_POSITIONS_P
)
476 return *copy
= recsexpr (got
, line
, column
,
477 SCM_FILENAME (port
));
483 scm_input_error (FUNC_NAME
, port
, "Unknown # object: ~S",
484 scm_list_1 (SCM_MAKE_CHAR (c
)));
489 while ('"' != (c
= scm_getc (port
)))
492 str_eof
: scm_input_error (FUNC_NAME
, port
, "end of file in string constant", SCM_EOL
);
494 while (j
+ 2 >= SCM_STRING_LENGTH (*tok_buf
))
495 scm_grow_tok_buf (tok_buf
);
498 switch (c
= scm_getc (port
))
527 int a
, b
, a_09
= 0, b_09
= 0, a_AF
= 0, b_AF
= 0, a_af
= 0,
530 if (a
== EOF
) goto str_eof
;
532 if (b
== EOF
) goto str_eof
;
533 if ('0' <= a
&& a
<= '9') a_09
= 1;
534 else if ('A' <= a
&& a
<= 'F') a_AF
= 1;
535 else if ('a' <= a
&& a
<= 'f') a_af
= 1;
536 if ('0' <= b
&& b
<= '9') b_09
= 1;
537 else if ('A' <= b
&& b
<= 'F') b_AF
= 1;
538 else if ('a' <= b
&& b
<= 'f') b_af
= 1;
539 if ((a_09
|| a_AF
|| a_af
) && (b_09
|| b_AF
|| b_af
))
540 c
= (a_09
? a
- '0': a_AF
? a
- 'A' + 10: a
- 'a' + 10) * 16
541 + (b_09
? b
- '0': b_AF
? b
- 'A' + 10: b
- 'a' + 10);
544 scm_ungetc (b
, port
);
545 scm_ungetc (a
, port
);
550 SCM_STRING_CHARS (*tok_buf
)[j
] = c
;
555 SCM_STRING_CHARS (*tok_buf
)[j
] = 0;
556 return scm_mem2string (SCM_STRING_CHARS (*tok_buf
), j
);
558 case '0': case '1': case '2': case '3': case '4':
559 case '5': case '6': case '7': case '8': case '9':
564 j
= scm_read_token (c
, tok_buf
, port
, 0);
565 if (j
== 1 && (c
== '+' || c
== '-'))
566 /* Shortcut: Detected symbol '+ or '- */
569 p
= scm_i_mem2number (SCM_STRING_CHARS (*tok_buf
), j
, 10);
574 if ((j
== 2) && (scm_getc (port
) == '('))
576 scm_ungetc ('(', port
);
577 c
= SCM_STRING_CHARS (*tok_buf
)[1];
580 scm_input_error (FUNC_NAME
, port
, "unknown # object", SCM_EOL
);
585 if (SCM_EQ_P (SCM_PACK (SCM_KEYWORD_STYLE
), scm_keyword_prefix
))
587 j
= scm_read_token ('-', tok_buf
, port
, 0);
588 p
= scm_mem2symbol (SCM_STRING_CHARS (*tok_buf
), j
);
589 return scm_make_keyword_from_dash_symbol (p
);
593 j
= scm_read_token (c
, tok_buf
, port
, 0);
597 return scm_mem2symbol (SCM_STRING_CHARS (*tok_buf
), j
);
604 _Pragma ("noopt"); /* # pragma _CRI noopt */
608 scm_read_token (int ic
, SCM
*tok_buf
, SCM port
, int weird
)
614 c
= (SCM_CASE_INSENSITIVE_P
? scm_downcase(ic
) : ic
);
615 p
= SCM_STRING_CHARS (*tok_buf
);
622 while (j
+ 2 >= SCM_STRING_LENGTH (*tok_buf
))
623 p
= scm_grow_tok_buf (tok_buf
);
630 while (j
+ 2 >= SCM_STRING_LENGTH (*tok_buf
))
631 p
= scm_grow_tok_buf (tok_buf
);
637 #ifdef SCM_ELISP_READ_EXTENSIONS
643 case SCM_WHITE_SPACES
:
644 case SCM_LINE_INCREMENTORS
:
648 scm_ungetc (c
, port
);
676 scm_ungetc (c
, port
);
684 c
= (SCM_CASE_INSENSITIVE_P
? scm_downcase(c
) : c
);
694 _Pragma ("opt"); /* # pragma _CRI opt */
698 scm_lreadparen (SCM
*tok_buf
, SCM port
, char *name
, SCM
*copy
699 #ifdef SCM_ELISP_READ_EXTENSIONS
702 #define term_char ')'
705 #define FUNC_NAME "scm_lreadparen"
712 c
= scm_flush_ws (port
, name
);
715 scm_ungetc (c
, port
);
716 if (SCM_EQ_P (scm_sym_dot
, (tmp
= scm_lreadr (tok_buf
, port
, copy
))))
718 ans
= scm_lreadr (tok_buf
, port
, copy
);
720 if (term_char
!= (c
= scm_flush_ws (port
, name
)))
721 scm_input_error (FUNC_NAME
, port
, "missing close paren", SCM_EOL
);
724 ans
= tl
= scm_cons (tmp
, SCM_EOL
);
725 while (term_char
!= (c
= scm_flush_ws (port
, name
)))
727 scm_ungetc (c
, port
);
728 if (SCM_EQ_P (scm_sym_dot
, (tmp
= scm_lreadr (tok_buf
, port
, copy
))))
730 SCM_SETCDR (tl
, scm_lreadr (tok_buf
, port
, copy
));
733 SCM_SETCDR (tl
, scm_cons (tmp
, SCM_EOL
));
739 #ifndef SCM_ELISP_READ_EXTENSIONS
745 scm_lreadrecparen (SCM
*tok_buf
, SCM port
, char *name
, SCM
*copy
)
746 #define FUNC_NAME "scm_lreadrecparen"
750 register SCM tl
, tl2
= SCM_EOL
;
751 SCM ans
, ans2
= SCM_EOL
;
752 /* Need to capture line and column numbers here. */
753 int line
= SCM_LINUM (port
);
754 int column
= SCM_COL (port
) - 1;
756 c
= scm_flush_ws (port
, name
);
759 scm_ungetc (c
, port
);
760 if (SCM_EQ_P (scm_sym_dot
, (tmp
= scm_lreadr (tok_buf
, port
, copy
))))
762 ans
= scm_lreadr (tok_buf
, port
, copy
);
763 if (')' != (c
= scm_flush_ws (port
, name
)))
764 scm_input_error (FUNC_NAME
, port
, "missing close paren", SCM_EOL
);
767 /* Build the head of the list structure. */
768 ans
= tl
= scm_cons (tmp
, SCM_EOL
);
769 if (SCM_COPY_SOURCE_P
)
770 ans2
= tl2
= scm_cons (SCM_CONSP (tmp
)
774 while (')' != (c
= scm_flush_ws (port
, name
)))
778 scm_ungetc (c
, port
);
779 if (SCM_EQ_P (scm_sym_dot
, (tmp
= scm_lreadr (tok_buf
, port
, copy
))))
781 SCM_SETCDR (tl
, tmp
= scm_lreadr (tok_buf
, port
, copy
));
782 if (SCM_COPY_SOURCE_P
)
783 SCM_SETCDR (tl2
, scm_cons (SCM_CONSP (tmp
)
787 if (')' != (c
= scm_flush_ws (port
, name
)))
788 scm_input_error (FUNC_NAME
, port
, "missing close paren", SCM_EOL
);
792 new_tail
= scm_cons (tmp
, SCM_EOL
);
793 SCM_SETCDR (tl
, new_tail
);
796 if (SCM_COPY_SOURCE_P
)
798 SCM new_tail2
= scm_cons (SCM_CONSP (tmp
) ? *copy
: tmp
, SCM_EOL
);
799 SCM_SETCDR (tl2
, new_tail2
);
804 scm_whash_insert (scm_source_whash
,
806 scm_make_srcprops (line
,
820 /* Manipulate the read-hash-procedures alist. This could be written in
821 Scheme, but maybe it will also be used by C code during initialisation. */
822 SCM_DEFINE (scm_read_hash_extend
, "read-hash-extend", 2, 0, 0,
824 "Install the procedure @var{proc} for reading expressions\n"
825 "starting with the character sequence @code{#} and @var{chr}.\n"
826 "@var{proc} will be called with two arguments: the character\n"
827 "@var{chr} and the port to read further data from. The object\n"
828 "returned will be the return value of @code{read}.")
829 #define FUNC_NAME s_scm_read_hash_extend
834 SCM_VALIDATE_CHAR (1, chr
);
835 SCM_ASSERT (SCM_FALSEP (proc
)
836 || SCM_EQ_P (scm_procedure_p (proc
), SCM_BOOL_T
),
837 proc
, SCM_ARG2
, FUNC_NAME
);
839 /* Check if chr is already in the alist. */
840 this = *scm_read_hash_procedures
;
844 if (SCM_NULLP (this))
846 /* not found, so add it to the beginning. */
847 if (!SCM_FALSEP (proc
))
849 *scm_read_hash_procedures
=
850 scm_cons (scm_cons (chr
, proc
), *scm_read_hash_procedures
);
854 if (SCM_EQ_P (chr
, SCM_CAAR (this)))
856 /* already in the alist. */
857 if (SCM_FALSEP (proc
))
860 if (SCM_FALSEP (prev
))
862 *scm_read_hash_procedures
=
863 SCM_CDR (*scm_read_hash_procedures
);
866 scm_set_cdr_x (prev
, SCM_CDR (this));
871 scm_set_cdr_x (SCM_CAR (this), proc
);
876 this = SCM_CDR (this);
879 return SCM_UNSPECIFIED
;
883 /* Recover the read-hash procedure corresponding to char c. */
885 scm_get_hash_procedure (int c
)
887 SCM rest
= *scm_read_hash_procedures
;
891 if (SCM_NULLP (rest
))
894 if (SCM_CHAR (SCM_CAAR (rest
)) == c
)
895 return SCM_CDAR (rest
);
897 rest
= SCM_CDR (rest
);
904 scm_read_hash_procedures
=
905 SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL
));
907 scm_init_opts (scm_read_options
, scm_read_opts
, SCM_N_READ_OPTIONS
);
908 #include "libguile/read.x"