1 /* Copyright (C) 1995,1996,1997,1999,2000,2001 Free Software Foundation, Inc.
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
16 * Boston, MA 02111-1307 USA
18 * As a special exception, the Free Software Foundation gives permission
19 * for additional uses of the text contained in its release of GUILE.
21 * The exception is that, if you link the GUILE library with other files
22 * to produce an executable, this does not by itself cause the
23 * resulting executable to be covered by the GNU General Public License.
24 * Your use of that executable is in no way restricted on account of
25 * linking the GUILE library code into it.
27 * This exception does not however invalidate any other reasons why
28 * the executable file might be covered by the GNU General Public License.
30 * This exception applies only to the code released by the
31 * Free Software Foundation under the name GUILE. If you copy
32 * code from other Free Software Foundation releases into a copy of
33 * GUILE, as the General Public License permits, the exception does
34 * not apply to the code that you add in this way. To avoid misleading
35 * anyone as to the status of such modified files, you must delete
36 * this exception notice from them.
38 * If you write modifications of your own for GUILE, it is your choice
39 * whether to permit this exception to apply to your modifications.
40 * If you do not wish that, delete this exception notice. */
46 #include "libguile/_scm.h"
47 #include "libguile/chars.h"
48 #include "libguile/eval.h"
49 #include "libguile/unif.h"
50 #include "libguile/keywords.h"
51 #include "libguile/alist.h"
52 #include "libguile/srcprop.h"
53 #include "libguile/hashtab.h"
54 #include "libguile/hash.h"
55 #include "libguile/ports.h"
56 #include "libguile/root.h"
57 #include "libguile/strings.h"
58 #include "libguile/vectors.h"
60 #include "libguile/validate.h"
61 #include "libguile/read.h"
65 SCM_SYMBOL (scm_keyword_prefix
, "prefix");
67 scm_t_option scm_read_opts
[] = {
68 { SCM_OPTION_BOOLEAN
, "copy", 0,
69 "Copy source code expressions." },
70 { SCM_OPTION_BOOLEAN
, "positions", 0,
71 "Record positions of source code expressions." },
72 { SCM_OPTION_BOOLEAN
, "case-insensitive", 0,
73 "Convert symbols to lower case."},
74 { SCM_OPTION_SCM
, "keywords", SCM_UNPACK (SCM_BOOL_F
),
75 "Style of keyword recognition: #f or 'prefix."}
78 SCM_DEFINE (scm_read_options
, "read-options-interface", 0, 1, 0,
80 "Option interface for the read options. Instead of using\n"
81 "this procedure directly, use the procedures @code{read-enable},\n"
82 "@code{read-disable}, @code{read-set!} and @var{read-options}.")
83 #define FUNC_NAME s_scm_read_options
85 SCM ans
= scm_options (setting
,
89 if (SCM_COPY_SOURCE_P
)
90 SCM_RECORD_POSITIONS_P
= 1;
95 /* An association list mapping extra hash characters to procedures. */
96 static SCM
*scm_read_hash_procedures
;
98 SCM_DEFINE (scm_read
, "read", 0, 1, 0,
100 "Read an s-expression from the input port @var{port}, or from\n"
101 "the current input port if @var{port} is not specified.\n"
102 "Any whitespace before the next token is discarded.")
103 #define FUNC_NAME s_scm_read
108 if (SCM_UNBNDP (port
))
110 SCM_VALIDATE_OPINPORT (1,port
);
112 c
= scm_flush_ws (port
, (char *) NULL
);
115 scm_ungetc (c
, port
);
117 tok_buf
= scm_allocate_string (30);
118 return scm_lreadr (&tok_buf
, port
, ©
);
125 scm_grow_tok_buf (SCM
*tok_buf
)
127 size_t oldlen
= SCM_STRING_LENGTH (*tok_buf
);
128 SCM newstr
= scm_allocate_string (2 * oldlen
);
131 for (i
= 0; i
!= oldlen
; ++i
)
132 SCM_STRING_CHARS (newstr
) [i
] = SCM_STRING_CHARS (*tok_buf
) [i
];
135 return SCM_STRING_CHARS (newstr
);
141 scm_flush_ws (SCM port
, const char *eoferr
)
145 switch (c
= scm_getc (port
))
151 if (!SCM_FALSEP (SCM_FILENAME (port
)))
152 scm_misc_error (eoferr
,
154 scm_list_1 (SCM_FILENAME (port
)));
156 scm_misc_error (eoferr
, "end of file", SCM_EOL
);
161 switch (c
= scm_getc (port
))
167 case SCM_LINE_INCREMENTORS
:
171 case SCM_LINE_INCREMENTORS
:
172 case SCM_SINGLE_SPACES
:
183 scm_casei_streq (char *s1
, char *s2
)
186 if (scm_downcase((int)*s1
) != scm_downcase((int)*s2
))
193 return !(*s1
|| *s2
);
197 /* recsexpr is used when recording expressions
198 * constructed by read:sharp.
200 #ifndef DEBUG_EXTENSIONS
201 #define recsexpr(obj, line, column, filename) (obj)
204 recsexpr (SCM obj
, long line
, int column
, SCM filename
)
206 if (!SCM_CONSP(obj
)) {
210 /* If this sexpr is visible in the read:sharp source, we want to
211 keep that information, so only record non-constant cons cells
212 which haven't previously been read by the reader. */
213 if (SCM_FALSEP (scm_whash_lookup (scm_source_whash
, obj
)))
215 if (SCM_COPY_SOURCE_P
)
217 copy
= scm_cons (recsexpr (SCM_CAR (obj
), line
, column
, filename
),
219 while ((tmp
= SCM_CDR (tmp
)) && SCM_CONSP (tmp
))
221 SCM_SETCDR (copy
, scm_cons (recsexpr (SCM_CAR (tmp
),
226 copy
= SCM_CDR (copy
);
228 SCM_SETCDR (copy
, tmp
);
232 recsexpr (SCM_CAR (obj
), line
, column
, filename
);
233 while ((tmp
= SCM_CDR (tmp
)) && SCM_CONSP (tmp
))
234 recsexpr (SCM_CAR (tmp
), line
, column
, filename
);
235 copy
= SCM_UNDEFINED
;
237 scm_whash_insert (scm_source_whash
,
239 scm_make_srcprops (line
,
250 /* Consume an SCSH-style block comment. Assume that we've already
251 read the initial `#!', and eat characters until we get a
252 newline/exclamation-point/sharp-sign/newline sequence. */
255 skip_scsh_block_comment (SCM port
)
256 #define FUNC_NAME "skip_scsh_block_comment"
258 /* Is this portable? Dear God, spare me from the non-eight-bit
259 characters. But is it tasteful? */
264 int c
= scm_getc (port
);
267 SCM_MISC_ERROR ("unterminated `#! ... !#' comment", SCM_EOL
);
268 history
= ((history
<< 8) | (c
& 0xff)) & 0xffffffff;
270 /* Were the last four characters read "\n!#\n"? */
271 if (history
== (('\n' << 24) | ('!' << 16) | ('#' << 8) | '\n'))
278 static SCM
scm_get_hash_procedure(int c
);
280 static char s_list
[]="list";
283 scm_lreadr (SCM
*tok_buf
,SCM port
,SCM
*copy
)
284 #define FUNC_NAME "scm_lreadr"
291 c
= scm_flush_ws (port
, s_scm_read
);
292 tryagain_no_flush_ws
:
299 return SCM_RECORD_POSITIONS_P
300 ? scm_lreadrecparen (tok_buf
, port
, s_list
, copy
)
301 : scm_lreadparen (tok_buf
, port
, s_list
, copy
);
303 SCM_MISC_ERROR ("unexpected \")\"", SCM_EOL
);
310 p
= scm_sym_quasiquote
;
315 p
= scm_sym_uq_splicing
;
318 scm_ungetc (c
, port
);
323 scm_lreadr (tok_buf
, port
, copy
),
325 if (SCM_RECORD_POSITIONS_P
)
326 scm_whash_insert (scm_source_whash
,
328 scm_make_srcprops (SCM_LINUM (port
),
332 ? (*copy
= scm_cons2 (SCM_CAR (p
),
333 SCM_CAR (SCM_CDR (p
)),
342 /* Check for user-defined hash procedure first, to allow
343 overriding of builtin hash read syntaxes. */
344 SCM sharp
= scm_get_hash_procedure (c
);
345 if (!SCM_FALSEP (sharp
))
347 int line
= SCM_LINUM (port
);
348 int column
= SCM_COL (port
) - 2;
351 got
= scm_call_2 (sharp
, SCM_MAKE_CHAR (c
), port
);
352 if (SCM_EQ_P (got
, SCM_UNSPECIFIED
))
354 if (SCM_RECORD_POSITIONS_P
)
355 return *copy
= recsexpr (got
, line
, column
,
356 SCM_FILENAME (port
));
365 p
= scm_lreadparen (tok_buf
, port
, "vector", copy
);
366 return SCM_NULLP (p
) ? scm_nullvect
: scm_vector (p
);
387 scm_ungetc (c
, port
);
392 /* start of a shell script. Parse as a block comment,
393 terminated by !#, just like SCSH. */
394 skip_scsh_block_comment (port
);
395 /* EOF is not an error here */
396 c
= scm_flush_ws (port
, (char *)NULL
);
397 goto tryagain_no_flush_ws
;
401 j
= scm_read_token (c
, tok_buf
, port
, 0);
402 p
= scm_istr2bve (SCM_STRING_CHARS (*tok_buf
) + 1, (long) (j
- 1));
410 j
= scm_read_token (c
, tok_buf
, port
, 1);
411 return scm_mem2symbol (SCM_STRING_CHARS (*tok_buf
), j
);
415 j
= scm_read_token (c
, tok_buf
, port
, 0);
417 return SCM_MAKE_CHAR (c
);
418 if (c
>= '0' && c
< '8')
420 /* Dirk:FIXME:: This type of character syntax is not R5RS
421 * compliant. Further, it should be verified that the constant
422 * does only consist of octal digits. Finally, it should be
423 * checked whether the resulting fixnum is in the range of
425 p
= scm_i_mem2number (SCM_STRING_CHARS (*tok_buf
), j
, 8);
427 return SCM_MAKE_CHAR (SCM_INUM (p
));
429 for (c
= 0; c
< scm_n_charnames
; c
++)
431 && (scm_casei_streq (scm_charnames
[c
], SCM_STRING_CHARS (*tok_buf
))))
432 return SCM_MAKE_CHAR (scm_charnums
[c
]);
433 SCM_MISC_ERROR ("unknown # object", SCM_EOL
);
435 /* #:SYMBOL is a syntax for keywords supported in all contexts. */
437 j
= scm_read_token ('-', tok_buf
, port
, 0);
438 p
= scm_mem2symbol (SCM_STRING_CHARS (*tok_buf
), j
);
439 return scm_make_keyword_from_dash_symbol (p
);
444 SCM sharp
= scm_get_hash_procedure (c
);
446 if (!SCM_FALSEP (sharp
))
448 int line
= SCM_LINUM (port
);
449 int column
= SCM_COL (port
) - 2;
452 got
= scm_call_2 (sharp
, SCM_MAKE_CHAR (c
), port
);
453 if (SCM_EQ_P (got
, SCM_UNSPECIFIED
))
455 if (SCM_RECORD_POSITIONS_P
)
456 return *copy
= recsexpr (got
, line
, column
,
457 SCM_FILENAME (port
));
463 scm_misc_error (s_scm_read
, "Unknown # object: ~S",
464 scm_list_1 (SCM_MAKE_CHAR (c
)));
469 while ('"' != (c
= scm_getc (port
)))
472 SCM_MISC_ERROR ("end of file in string constant", SCM_EOL
);
474 while (j
+ 2 >= SCM_STRING_LENGTH (*tok_buf
))
475 scm_grow_tok_buf (tok_buf
);
478 switch (c
= scm_getc (port
))
504 SCM_STRING_CHARS (*tok_buf
)[j
] = c
;
509 SCM_STRING_CHARS (*tok_buf
)[j
] = 0;
510 return scm_mem2string (SCM_STRING_CHARS (*tok_buf
), j
);
512 case '0': case '1': case '2': case '3': case '4':
513 case '5': case '6': case '7': case '8': case '9':
518 j
= scm_read_token (c
, tok_buf
, port
, 0);
519 if (j
== 1 && (c
== '+' || c
== '-'))
520 /* Shortcut: Detected symbol '+ or '- */
523 p
= scm_i_mem2number (SCM_STRING_CHARS (*tok_buf
), j
, 10);
528 if ((j
== 2) && (scm_getc (port
) == '('))
530 scm_ungetc ('(', port
);
531 c
= SCM_STRING_CHARS (*tok_buf
)[1];
534 SCM_MISC_ERROR ("unknown # object", SCM_EOL
);
539 if (SCM_EQ_P (SCM_PACK (SCM_KEYWORD_STYLE
), scm_keyword_prefix
))
541 j
= scm_read_token ('-', tok_buf
, port
, 0);
542 p
= scm_mem2symbol (SCM_STRING_CHARS (*tok_buf
), j
);
543 return scm_make_keyword_from_dash_symbol (p
);
547 j
= scm_read_token (c
, tok_buf
, port
, 0);
551 return scm_mem2symbol (SCM_STRING_CHARS (*tok_buf
), j
);
558 _Pragma ("noopt"); /* # pragma _CRI noopt */
562 scm_read_token (int ic
, SCM
*tok_buf
, SCM port
, int weird
)
568 c
= (SCM_CASE_INSENSITIVE_P
? scm_downcase(ic
) : ic
);
569 p
= SCM_STRING_CHARS (*tok_buf
);
576 while (j
+ 2 >= SCM_STRING_LENGTH (*tok_buf
))
577 p
= scm_grow_tok_buf (tok_buf
);
584 while (j
+ 2 >= SCM_STRING_LENGTH (*tok_buf
))
585 p
= scm_grow_tok_buf (tok_buf
);
593 case SCM_WHITE_SPACES
:
594 case SCM_LINE_INCREMENTORS
:
598 scm_ungetc (c
, port
);
626 scm_ungetc (c
, port
);
634 c
= (SCM_CASE_INSENSITIVE_P
? scm_downcase(c
) : c
);
644 _Pragma ("opt"); /* # pragma _CRI opt */
648 scm_lreadparen (SCM
*tok_buf
, SCM port
, char *name
, SCM
*copy
)
649 #define FUNC_NAME "scm_lreadparen"
656 c
= scm_flush_ws (port
, name
);
659 scm_ungetc (c
, port
);
660 if (SCM_EQ_P (scm_sym_dot
, (tmp
= scm_lreadr (tok_buf
, port
, copy
))))
662 ans
= scm_lreadr (tok_buf
, port
, copy
);
664 if (')' != (c
= scm_flush_ws (port
, name
)))
665 SCM_MISC_ERROR ("missing close paren", SCM_EOL
);
668 ans
= tl
= scm_cons (tmp
, SCM_EOL
);
669 while (')' != (c
= scm_flush_ws (port
, name
)))
671 scm_ungetc (c
, port
);
672 if (SCM_EQ_P (scm_sym_dot
, (tmp
= scm_lreadr (tok_buf
, port
, copy
))))
674 SCM_SETCDR (tl
, scm_lreadr (tok_buf
, port
, copy
));
677 SCM_SETCDR (tl
, scm_cons (tmp
, SCM_EOL
));
686 scm_lreadrecparen (SCM
*tok_buf
, SCM port
, char *name
, SCM
*copy
)
687 #define FUNC_NAME "scm_lreadrecparen"
691 register SCM tl
, tl2
= SCM_EOL
;
692 SCM ans
, ans2
= SCM_EOL
;
693 /* Need to capture line and column numbers here. */
694 int line
= SCM_LINUM (port
);
695 int column
= SCM_COL (port
) - 1;
697 c
= scm_flush_ws (port
, name
);
700 scm_ungetc (c
, port
);
701 if (SCM_EQ_P (scm_sym_dot
, (tmp
= scm_lreadr (tok_buf
, port
, copy
))))
703 ans
= scm_lreadr (tok_buf
, port
, copy
);
704 if (')' != (c
= scm_flush_ws (port
, name
)))
705 SCM_MISC_ERROR ("missing close paren", SCM_EOL
);
708 /* Build the head of the list structure. */
709 ans
= tl
= scm_cons (tmp
, SCM_EOL
);
710 if (SCM_COPY_SOURCE_P
)
711 ans2
= tl2
= scm_cons (SCM_CONSP (tmp
)
715 while (')' != (c
= scm_flush_ws (port
, name
)))
719 scm_ungetc (c
, port
);
720 if (SCM_EQ_P (scm_sym_dot
, (tmp
= scm_lreadr (tok_buf
, port
, copy
))))
722 SCM_SETCDR (tl
, tmp
= scm_lreadr (tok_buf
, port
, copy
));
723 if (SCM_COPY_SOURCE_P
)
724 SCM_SETCDR (tl2
, scm_cons (SCM_CONSP (tmp
)
728 if (')' != (c
= scm_flush_ws (port
, name
)))
729 SCM_MISC_ERROR ("missing close paren", SCM_EOL
);
733 new_tail
= scm_cons (tmp
, SCM_EOL
);
734 SCM_SETCDR (tl
, new_tail
);
737 if (SCM_COPY_SOURCE_P
)
739 SCM new_tail2
= scm_cons (SCM_CONSP (tmp
) ? *copy
: tmp
, SCM_EOL
);
740 SCM_SETCDR (tl2
, new_tail2
);
745 scm_whash_insert (scm_source_whash
,
747 scm_make_srcprops (line
,
761 /* Manipulate the read-hash-procedures alist. This could be written in
762 Scheme, but maybe it will also be used by C code during initialisation. */
763 SCM_DEFINE (scm_read_hash_extend
, "read-hash-extend", 2, 0, 0,
765 "Install the procedure @var{proc} for reading expressions\n"
766 "starting with the character sequence @code{#} and @var{chr}.\n"
767 "@var{proc} will be called with two arguments: the character\n"
768 "@var{chr} and the port to read further data from. The object\n"
769 "returned will be the return value of @code{read}.")
770 #define FUNC_NAME s_scm_read_hash_extend
775 SCM_VALIDATE_CHAR (1, chr
);
776 SCM_ASSERT (SCM_FALSEP (proc
)
777 || SCM_EQ_P (scm_procedure_p (proc
), SCM_BOOL_T
),
778 proc
, SCM_ARG2
, FUNC_NAME
);
780 /* Check if chr is already in the alist. */
781 this = *scm_read_hash_procedures
;
785 if (SCM_NULLP (this))
787 /* not found, so add it to the beginning. */
788 if (!SCM_FALSEP (proc
))
790 *scm_read_hash_procedures
=
791 scm_cons (scm_cons (chr
, proc
), *scm_read_hash_procedures
);
795 if (SCM_EQ_P (chr
, SCM_CAAR (this)))
797 /* already in the alist. */
798 if (SCM_FALSEP (proc
))
801 if (SCM_FALSEP (prev
))
803 *scm_read_hash_procedures
=
804 SCM_CDR (*scm_read_hash_procedures
);
807 scm_set_cdr_x (prev
, SCM_CDR (this));
812 scm_set_cdr_x (SCM_CAR (this), proc
);
817 this = SCM_CDR (this);
820 return SCM_UNSPECIFIED
;
824 /* Recover the read-hash procedure corresponding to char c. */
826 scm_get_hash_procedure (int c
)
828 SCM rest
= *scm_read_hash_procedures
;
832 if (SCM_NULLP (rest
))
835 if (SCM_CHAR (SCM_CAAR (rest
)) == c
)
836 return SCM_CDAR (rest
);
838 rest
= SCM_CDR (rest
);
845 scm_read_hash_procedures
=
846 SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL
));
848 scm_init_opts (scm_read_options
, scm_read_opts
, SCM_N_READ_OPTIONS
);
849 #ifndef SCM_MAGIC_SNARFER
850 #include "libguile/read.x"