1 /* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
2 * 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
34 #include "libguile/_scm.h"
35 #include "libguile/bytevectors.h"
36 #include "libguile/chars.h"
37 #include "libguile/eval.h"
38 #include "libguile/arrays.h"
39 #include "libguile/bitvectors.h"
40 #include "libguile/keywords.h"
41 #include "libguile/alist.h"
42 #include "libguile/srcprop.h"
43 #include "libguile/hashtab.h"
44 #include "libguile/hash.h"
45 #include "libguile/ports.h"
46 #include "libguile/fports.h"
47 #include "libguile/root.h"
48 #include "libguile/strings.h"
49 #include "libguile/strports.h"
50 #include "libguile/vectors.h"
51 #include "libguile/validate.h"
52 #include "libguile/srfi-4.h"
53 #include "libguile/srfi-13.h"
55 #include "libguile/read.h"
56 #include "libguile/private-options.h"
61 SCM_GLOBAL_SYMBOL (scm_sym_dot
, ".");
62 SCM_SYMBOL (scm_keyword_prefix
, "prefix");
63 SCM_SYMBOL (scm_keyword_postfix
, "postfix");
64 SCM_SYMBOL (sym_nil
, "nil");
66 scm_t_option scm_read_opts
[] = {
67 { SCM_OPTION_BOOLEAN
, "copy", 0,
68 "Copy source code expressions." },
69 { SCM_OPTION_BOOLEAN
, "positions", 1,
70 "Record positions of source code expressions." },
71 { SCM_OPTION_BOOLEAN
, "case-insensitive", 0,
72 "Convert symbols to lower case."},
73 { SCM_OPTION_SCM
, "keywords", (scm_t_bits
) SCM_BOOL_F_BITS
,
74 "Style of keyword recognition: #f, 'prefix or 'postfix."},
75 { SCM_OPTION_BOOLEAN
, "r6rs-hex-escapes", 0,
76 "Use R6RS variable-length character and string hex escapes."},
77 { SCM_OPTION_BOOLEAN
, "square-brackets", 1,
78 "Treat `[' and `]' as parentheses, for R6RS compatibility."},
79 { SCM_OPTION_BOOLEAN
, "hungry-eol-escapes", 0,
80 "In strings, consume leading whitespace after an escaped end-of-line."},
84 /* Internal read options structure. This is initialized by 'scm_read'
85 from the global and per-port read options, and a pointer is passed
86 down to all helper functions. */
90 KEYWORD_STYLE_HASH_PREFIX
,
97 enum t_keyword_style keyword_style
;
98 unsigned int copy_source_p
: 1;
99 unsigned int record_positions_p
: 1;
100 unsigned int case_insensitive_p
: 1;
101 unsigned int r6rs_escapes_p
: 1;
102 unsigned int square_brackets_p
: 1;
103 unsigned int hungry_eol_escapes_p
: 1;
106 typedef struct t_read_opts scm_t_read_opts
;
110 Give meaningful error messages for errors
114 FILE:LINE:COL: MESSAGE
115 This happened in ....
117 This is not standard GNU format, but the test-suite likes the real
118 message to be in front.
124 scm_i_input_error (char const *function
,
125 SCM port
, const char *message
, SCM arg
)
127 SCM fn
= (scm_is_string (SCM_FILENAME(port
))
129 : scm_from_locale_string ("#<unknown port>"));
131 SCM string_port
= scm_open_output_string ();
132 SCM string
= SCM_EOL
;
133 scm_simple_format (string_port
,
134 scm_from_locale_string ("~A:~S:~S: ~A"),
136 scm_from_long (SCM_LINUM (port
) + 1),
137 scm_from_int (SCM_COL (port
) + 1),
138 scm_from_locale_string (message
)));
140 string
= scm_get_output_string (string_port
);
141 scm_close_output_port (string_port
);
142 scm_error_scm (scm_from_latin1_symbol ("read-error"),
143 function
? scm_from_locale_string (function
) : SCM_BOOL_F
,
150 SCM_DEFINE (scm_read_options
, "read-options-interface", 0, 1, 0,
152 "Option interface for the read options. Instead of using\n"
153 "this procedure directly, use the procedures @code{read-enable},\n"
154 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
155 #define FUNC_NAME s_scm_read_options
157 SCM ans
= scm_options (setting
,
160 if (SCM_COPY_SOURCE_P
)
161 SCM_RECORD_POSITIONS_P
= 1;
166 /* A fluid referring to an association list mapping extra hash
167 characters to procedures. */
168 static SCM
*scm_i_read_hash_procedures
;
171 scm_i_read_hash_procedures_ref (void)
173 return scm_fluid_ref (*scm_i_read_hash_procedures
);
177 scm_i_read_hash_procedures_set_x (SCM value
)
179 scm_fluid_set_x (*scm_i_read_hash_procedures
, value
);
186 /* Size of the C buffer used to read symbols and numbers. */
187 #define READER_BUFFER_SIZE 128
189 /* Number of 32-bit codepoints in the buffer used to read strings. */
190 #define READER_STRING_BUFFER_SIZE 128
192 /* The maximum size of Scheme character names. */
193 #define READER_CHAR_NAME_MAX_SIZE 50
195 /* The maximum size of reader directive names. */
196 #define READER_DIRECTIVE_NAME_MAX_SIZE 50
199 /* `isblank' is only in C99. */
200 #define CHAR_IS_BLANK_(_chr) \
201 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
202 || ((_chr) == '\f') || ((_chr) == '\r'))
205 # define CHAR_IS_BLANK(_chr) \
206 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
208 # define CHAR_IS_BLANK CHAR_IS_BLANK_
212 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
214 #define CHAR_IS_R5RS_DELIMITER(c) \
216 || (c) == ')' || (c) == '(' || (c) == ';' || (c) == '"')
218 #define CHAR_IS_DELIMITER(c) \
219 (CHAR_IS_R5RS_DELIMITER (c) \
220 || (((c) == ']' || (c) == '[') && opts->square_brackets_p))
222 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
224 #define CHAR_IS_EXPONENT_MARKER(_chr) \
225 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
226 || ((_chr) == 'd') || ((_chr) == 'l'))
228 /* Read an SCSH block comment. */
229 static SCM
scm_read_scsh_block_comment (scm_t_wchar
, SCM
);
230 static SCM
scm_read_r6rs_block_comment (scm_t_wchar
, SCM
);
231 static SCM
scm_read_commented_expression (scm_t_wchar
, SCM
, scm_t_read_opts
*);
232 static SCM
scm_read_shebang (scm_t_wchar
, SCM
, scm_t_read_opts
*);
233 static SCM
scm_get_hash_procedure (int);
235 /* Read from PORT until a delimiter (e.g., a whitespace) is read. Put the
236 result in the pre-allocated buffer BUF. Return zero if the whole token has
237 fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
238 bytes actually read. */
240 read_token (SCM port
, scm_t_read_opts
*opts
,
241 char *buf
, size_t buf_size
, size_t *read
)
245 while (*read
< buf_size
)
249 chr
= scm_get_byte_or_eof (port
);
253 else if (CHAR_IS_DELIMITER (chr
))
255 scm_unget_byte (chr
, port
);
268 /* Like `read_token', but return either BUFFER, or a GC-allocated buffer
269 if the token doesn't fit in BUFFER_SIZE bytes. */
271 read_complete_token (SCM port
, scm_t_read_opts
*opts
,
272 char *buffer
, size_t buffer_size
, size_t *read
)
275 size_t bytes_read
, overflow_size
= 0;
276 char *overflow_buffer
= NULL
;
280 overflow
= read_token (port
, opts
, buffer
, buffer_size
, &bytes_read
);
283 if (overflow
|| overflow_size
!= 0)
285 if (overflow_size
== 0)
287 overflow_buffer
= scm_gc_malloc_pointerless (bytes_read
, "read");
288 memcpy (overflow_buffer
, buffer
, bytes_read
);
289 overflow_size
= bytes_read
;
294 scm_gc_malloc_pointerless (overflow_size
+ bytes_read
, "read");
296 memcpy (new_buf
, overflow_buffer
, overflow_size
);
297 memcpy (new_buf
+ overflow_size
, buffer
, bytes_read
);
299 overflow_buffer
= new_buf
;
300 overflow_size
+= bytes_read
;
307 *read
= overflow_size
;
311 return (overflow_size
> 0 ? overflow_buffer
: buffer
);
314 /* Skip whitespace from PORT and return the first non-whitespace character
315 read. Raise an error on end-of-file. */
317 flush_ws (SCM port
, scm_t_read_opts
*opts
, const char *eoferr
)
321 switch (c
= scm_getc (port
))
327 scm_i_input_error (eoferr
,
336 switch (c
= scm_getc (port
))
342 case SCM_LINE_INCREMENTORS
:
348 switch (c
= scm_getc (port
))
351 eoferr
= "read_sharp";
354 scm_read_shebang (c
, port
, opts
);
357 scm_read_commented_expression (c
, port
, opts
);
360 if (scm_is_false (scm_get_hash_procedure (c
)))
362 scm_read_r6rs_block_comment (c
, port
);
367 scm_ungetc (c
, port
);
372 case SCM_LINE_INCREMENTORS
:
373 case SCM_SINGLE_SPACES
:
388 static SCM
scm_read_expression (SCM port
, scm_t_read_opts
*opts
);
389 static SCM
scm_read_sharp (int chr
, SCM port
, scm_t_read_opts
*opts
,
390 long line
, int column
);
394 maybe_annotate_source (SCM x
, SCM port
, scm_t_read_opts
*opts
,
395 long line
, int column
)
397 if (opts
->record_positions_p
)
398 scm_i_set_source_properties_x (x
, line
, column
, SCM_FILENAME (port
));
403 scm_read_sexp (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
404 #define FUNC_NAME "scm_i_lreadparen"
407 SCM tmp
, tl
, ans
= SCM_EOL
;
408 const int terminating_char
= ((chr
== '[') ? ']' : ')');
410 /* Need to capture line and column numbers here. */
411 long line
= SCM_LINUM (port
);
412 int column
= SCM_COL (port
) - 1;
414 c
= flush_ws (port
, opts
, FUNC_NAME
);
415 if (terminating_char
== c
)
418 scm_ungetc (c
, port
);
419 tmp
= scm_read_expression (port
, opts
);
421 /* Note that it is possible for scm_read_expression to return
422 scm_sym_dot, but not as part of a dotted pair: as in #{.}#. So
423 check that it's a real dot by checking `c'. */
424 if (c
== '.' && scm_is_eq (scm_sym_dot
, tmp
))
426 ans
= scm_read_expression (port
, opts
);
427 if (terminating_char
!= (c
= flush_ws (port
, opts
, FUNC_NAME
)))
428 scm_i_input_error (FUNC_NAME
, port
, "missing close paren",
433 /* Build the head of the list structure. */
434 ans
= tl
= scm_cons (tmp
, SCM_EOL
);
436 while (terminating_char
!= (c
= flush_ws (port
, opts
, FUNC_NAME
)))
440 if (c
== ')' || (c
== ']' && opts
->square_brackets_p
))
441 scm_i_input_error (FUNC_NAME
, port
,
442 "in pair: mismatched close paren: ~A",
443 scm_list_1 (SCM_MAKE_CHAR (c
)));
445 scm_ungetc (c
, port
);
446 tmp
= scm_read_expression (port
, opts
);
448 /* See above note about scm_sym_dot. */
449 if (c
== '.' && scm_is_eq (scm_sym_dot
, tmp
))
451 SCM_SETCDR (tl
, scm_read_expression (port
, opts
));
453 c
= flush_ws (port
, opts
, FUNC_NAME
);
454 if (terminating_char
!= c
)
455 scm_i_input_error (FUNC_NAME
, port
,
456 "in pair: missing close paren", SCM_EOL
);
460 new_tail
= scm_cons (tmp
, SCM_EOL
);
461 SCM_SETCDR (tl
, new_tail
);
466 return maybe_annotate_source (ans
, port
, opts
, line
, column
);
471 /* Read a hexadecimal number NDIGITS in length. Put its value into the variable
472 C. If TERMINATOR is non-null, terminate early if the TERMINATOR character is
474 #define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
480 while (i < ndigits) \
482 a = scm_getc (port); \
486 && (a == (scm_t_wchar) terminator) \
489 if ('0' <= a && a <= '9') \
491 else if ('A' <= a && a <= 'F') \
493 else if ('a' <= a && a <= 'f') \
506 skip_intraline_whitespace (SCM port
)
516 while (c
== '\t' || uc_is_general_category (c
, UC_SPACE_SEPARATOR
));
518 scm_ungetc (c
, port
);
522 scm_read_string (int chr
, SCM port
, scm_t_read_opts
*opts
)
523 #define FUNC_NAME "scm_lreadr"
525 /* For strings smaller than C_STR, this function creates only one Scheme
526 object (the string returned). */
529 size_t c_str_len
= 0;
530 scm_t_wchar c
, c_str
[READER_STRING_BUFFER_SIZE
];
532 /* Need to capture line and column numbers here. */
533 long line
= SCM_LINUM (port
);
534 int column
= SCM_COL (port
) - 1;
536 while ('"' != (c
= scm_getc (port
)))
541 scm_i_input_error (FUNC_NAME
, port
,
542 "end of file in string constant", SCM_EOL
);
545 if (c_str_len
+ 1 >= READER_STRING_BUFFER_SIZE
)
547 str
= scm_cons (scm_from_utf32_stringn (c_str
, c_str_len
), str
);
553 switch (c
= scm_getc (port
))
561 if (opts
->hungry_eol_escapes_p
)
562 skip_intraline_whitespace (port
);
589 if (opts
->r6rs_escapes_p
)
590 SCM_READ_HEX_ESCAPE (10, ';');
592 SCM_READ_HEX_ESCAPE (2, '\0');
595 if (!opts
->r6rs_escapes_p
)
597 SCM_READ_HEX_ESCAPE (4, '\0');
601 if (!opts
->r6rs_escapes_p
)
603 SCM_READ_HEX_ESCAPE (6, '\0');
608 scm_i_input_error (FUNC_NAME
, port
,
609 "illegal character in escape sequence: ~S",
610 scm_list_1 (SCM_MAKE_CHAR (c
)));
614 c_str
[c_str_len
++] = c
;
617 if (scm_is_null (str
))
618 /* Fast path: we got a string that fits in C_STR. */
619 str
= scm_from_utf32_stringn (c_str
, c_str_len
);
623 str
= scm_cons (scm_from_utf32_stringn (c_str
, c_str_len
), str
);
625 str
= scm_string_concatenate_reverse (str
, SCM_UNDEFINED
, SCM_UNDEFINED
);
628 return maybe_annotate_source (str
, port
, opts
, line
, column
);
634 scm_read_number (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
636 SCM result
, str
= SCM_EOL
;
637 char local_buffer
[READER_BUFFER_SIZE
], *buffer
;
639 scm_t_port
*pt
= SCM_PTAB_ENTRY (port
);
641 /* Need to capture line and column numbers here. */
642 long line
= SCM_LINUM (port
);
643 int column
= SCM_COL (port
) - 1;
645 scm_ungetc (chr
, port
);
646 buffer
= read_complete_token (port
, opts
, local_buffer
, sizeof local_buffer
,
649 str
= scm_from_stringn (buffer
, bytes_read
, pt
->encoding
, pt
->ilseq_handler
);
651 result
= scm_string_to_number (str
, SCM_UNDEFINED
);
652 if (scm_is_false (result
))
654 /* Return a symbol instead of a number */
655 if (opts
->case_insensitive_p
)
656 str
= scm_string_downcase_x (str
);
657 result
= scm_string_to_symbol (str
);
659 else if (SCM_NIMP (result
))
660 result
= maybe_annotate_source (result
, port
, opts
, line
, column
);
662 SCM_COL (port
) += scm_i_string_length (str
);
667 scm_read_mixed_case_symbol (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
670 int ends_with_colon
= 0;
672 int postfix
= (opts
->keyword_style
== KEYWORD_STYLE_POSTFIX
);
673 char local_buffer
[READER_BUFFER_SIZE
], *buffer
;
674 scm_t_port
*pt
= SCM_PTAB_ENTRY (port
);
677 scm_ungetc (chr
, port
);
678 buffer
= read_complete_token (port
, opts
, local_buffer
, sizeof local_buffer
,
681 ends_with_colon
= buffer
[bytes_read
- 1] == ':';
683 if (postfix
&& ends_with_colon
&& (bytes_read
> 1))
685 str
= scm_from_stringn (buffer
, bytes_read
- 1,
686 pt
->encoding
, pt
->ilseq_handler
);
688 if (opts
->case_insensitive_p
)
689 str
= scm_string_downcase_x (str
);
690 result
= scm_symbol_to_keyword (scm_string_to_symbol (str
));
694 str
= scm_from_stringn (buffer
, bytes_read
,
695 pt
->encoding
, pt
->ilseq_handler
);
697 if (opts
->case_insensitive_p
)
698 str
= scm_string_downcase_x (str
);
699 result
= scm_string_to_symbol (str
);
702 SCM_COL (port
) += scm_i_string_length (str
);
707 scm_read_number_and_radix (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
708 #define FUNC_NAME "scm_lreadr"
712 char local_buffer
[READER_BUFFER_SIZE
], *buffer
;
740 scm_ungetc (chr
, port
);
741 scm_ungetc ('#', port
);
745 buffer
= read_complete_token (port
, opts
, local_buffer
, sizeof local_buffer
,
748 pt
= SCM_PTAB_ENTRY (port
);
749 str
= scm_from_stringn (buffer
, read
, pt
->encoding
, pt
->ilseq_handler
);
751 result
= scm_string_to_number (str
, scm_from_uint (radix
));
753 SCM_COL (port
) += scm_i_string_length (str
);
755 if (scm_is_true (result
))
758 scm_i_input_error (FUNC_NAME
, port
, "unknown # object", SCM_EOL
);
765 scm_read_quote (int chr
, SCM port
, scm_t_read_opts
*opts
)
768 long line
= SCM_LINUM (port
);
769 int column
= SCM_COL (port
) - 1;
774 p
= scm_sym_quasiquote
;
787 p
= scm_sym_uq_splicing
;
790 scm_ungetc (c
, port
);
797 fprintf (stderr
, "%s: unhandled quote character (%i)\n",
798 "scm_read_quote", chr
);
802 p
= scm_cons2 (p
, scm_read_expression (port
, opts
), SCM_EOL
);
803 return maybe_annotate_source (p
, port
, opts
, line
, column
);
806 SCM_SYMBOL (sym_syntax
, "syntax");
807 SCM_SYMBOL (sym_quasisyntax
, "quasisyntax");
808 SCM_SYMBOL (sym_unsyntax
, "unsyntax");
809 SCM_SYMBOL (sym_unsyntax_splicing
, "unsyntax-splicing");
812 scm_read_syntax (int chr
, SCM port
, scm_t_read_opts
*opts
)
815 long line
= SCM_LINUM (port
);
816 int column
= SCM_COL (port
) - 1;
834 p
= sym_unsyntax_splicing
;
837 scm_ungetc (c
, port
);
844 fprintf (stderr
, "%s: unhandled syntax character (%i)\n",
845 "scm_read_syntax", chr
);
849 p
= scm_cons2 (p
, scm_read_expression (port
, opts
), SCM_EOL
);
850 return maybe_annotate_source (p
, port
, opts
, line
, column
);
854 scm_read_nil (int chr
, SCM port
, scm_t_read_opts
*opts
)
856 SCM id
= scm_read_mixed_case_symbol (chr
, port
, opts
);
858 if (!scm_is_eq (id
, sym_nil
))
859 scm_i_input_error ("scm_read_nil", port
,
860 "unexpected input while reading #nil: ~a",
863 return SCM_ELISP_NIL
;
867 scm_read_semicolon_comment (int chr
, SCM port
)
871 /* We use the get_byte here because there is no need to get the
872 locale correct with comment input. This presumes that newline
873 always represents itself no matter what the encoding is. */
874 for (c
= scm_get_byte_or_eof (port
);
875 (c
!= EOF
) && (c
!= '\n');
876 c
= scm_get_byte_or_eof (port
));
878 return SCM_UNSPECIFIED
;
882 /* Sharp readers, i.e. readers called after a `#' sign has been read. */
885 scm_read_boolean (int chr
, SCM port
)
898 return SCM_UNSPECIFIED
;
902 scm_read_character (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
903 #define FUNC_NAME "scm_lreadr"
905 char buffer
[READER_CHAR_NAME_MAX_SIZE
];
907 size_t charname_len
, bytes_read
;
912 overflow
= read_token (port
, opts
, buffer
, READER_CHAR_NAME_MAX_SIZE
,
915 scm_i_input_error (FUNC_NAME
, port
, "character name too long", SCM_EOL
);
919 chr
= scm_getc (port
);
921 scm_i_input_error (FUNC_NAME
, port
, "unexpected end of file "
922 "while reading character", SCM_EOL
);
924 /* CHR must be a token delimiter, like a whitespace. */
925 return (SCM_MAKE_CHAR (chr
));
928 pt
= SCM_PTAB_ENTRY (port
);
930 /* Simple ASCII characters can be processed immediately. Also, simple
931 ISO-8859-1 characters can be processed immediately if the encoding for this
932 port is ISO-8859-1. */
933 if (bytes_read
== 1 && ((unsigned char) buffer
[0] <= 127 || pt
->encoding
== NULL
))
936 return SCM_MAKE_CHAR (buffer
[0]);
939 /* Otherwise, convert the buffer into a proper scheme string for
941 charname
= scm_from_stringn (buffer
, bytes_read
, pt
->encoding
,
943 charname_len
= scm_i_string_length (charname
);
944 SCM_COL (port
) += charname_len
;
945 cp
= scm_i_string_ref (charname
, 0);
946 if (charname_len
== 1)
947 return SCM_MAKE_CHAR (cp
);
949 /* Ignore dotted circles, which may be used to keep combining characters from
950 combining with the backslash in #\charname. */
951 if (cp
== SCM_CODEPOINT_DOTTED_CIRCLE
&& charname_len
== 2)
952 return SCM_MAKE_CHAR (scm_i_string_ref (charname
, 1));
954 if (cp
>= '0' && cp
< '8')
956 /* Dirk:FIXME:: This type of character syntax is not R5RS
957 * compliant. Further, it should be verified that the constant
958 * does only consist of octal digits. */
959 SCM p
= scm_string_to_number (charname
, scm_from_uint (8));
962 scm_t_wchar c
= scm_to_uint32 (p
);
963 if (SCM_IS_UNICODE_CHAR (c
))
964 return SCM_MAKE_CHAR (c
);
966 scm_i_input_error (FUNC_NAME
, port
,
967 "out-of-range octal character escape: ~a",
968 scm_list_1 (charname
));
972 if (cp
== 'x' && (charname_len
> 1))
976 /* Convert from hex, skipping the initial 'x' character in CHARNAME */
977 p
= scm_string_to_number (scm_c_substring (charname
, 1, charname_len
),
981 scm_t_wchar c
= scm_to_uint32 (p
);
982 if (SCM_IS_UNICODE_CHAR (c
))
983 return SCM_MAKE_CHAR (c
);
985 scm_i_input_error (FUNC_NAME
, port
,
986 "out-of-range hex character escape: ~a",
987 scm_list_1 (charname
));
991 /* The names of characters should never have non-Latin1
993 if (scm_i_is_narrow_string (charname
)
994 || scm_i_try_narrow_string (charname
))
995 { SCM ch
= scm_i_charname_to_char (scm_i_string_chars (charname
),
997 if (scm_is_true (ch
))
1001 scm_i_input_error (FUNC_NAME
, port
, "unknown character name ~a",
1002 scm_list_1 (charname
));
1004 return SCM_UNSPECIFIED
;
1009 scm_read_keyword (int chr
, SCM port
, scm_t_read_opts
*opts
)
1013 /* Read the symbol that comprises the keyword. Doing this instead of
1014 invoking a specific symbol reader function allows `scm_read_keyword ()'
1015 to adapt to the delimiters currently valid of symbols.
1017 XXX: This implementation allows sloppy syntaxes like `#: key'. */
1018 symbol
= scm_read_expression (port
, opts
);
1019 if (!scm_is_symbol (symbol
))
1020 scm_i_input_error ("scm_read_keyword", port
,
1021 "keyword prefix `~a' not followed by a symbol: ~s",
1022 scm_list_2 (SCM_MAKE_CHAR (chr
), symbol
));
1024 return (scm_symbol_to_keyword (symbol
));
1028 scm_read_vector (int chr
, SCM port
, scm_t_read_opts
*opts
,
1029 long line
, int column
)
1031 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
1032 guarantee that it's going to do what we want. After all, this is an
1033 implementation detail of `scm_read_vector ()', not a desirable
1035 return maybe_annotate_source (scm_vector (scm_read_sexp (chr
, port
, opts
)),
1036 port
, opts
, line
, column
);
1039 /* Helper used by scm_read_array */
1041 read_decimal_integer (SCM port
, int c
, ssize_t
*resp
)
1050 c
= scm_getc (port
);
1053 while ('0' <= c
&& c
<= '9')
1055 res
= 10*res
+ c
-'0';
1057 c
= scm_getc (port
);
1065 /* Read an array. This function can also read vectors and uniform
1066 vectors. Also, the conflict between '#f' and '#f32' and '#f64' is
1069 C is the first character read after the '#'.
1072 scm_read_array (int c
, SCM port
, scm_t_read_opts
*opts
, long line
, int column
)
1075 scm_t_wchar tag_buf
[8];
1078 SCM tag
, shape
= SCM_BOOL_F
, elements
, array
;
1080 /* XXX - shortcut for ordinary vectors. Shouldn't be necessary but
1081 the array code can not deal with zero-length dimensions yet, and
1082 we want to allow zero-length vectors, of course.
1085 return scm_read_vector (c
, port
, opts
, line
, column
);
1087 /* Disambiguate between '#f' and uniform floating point vectors.
1091 c
= scm_getc (port
);
1092 if (c
!= '3' && c
!= '6')
1095 scm_ungetc (c
, port
);
1101 goto continue_reading_tag
;
1106 c
= read_decimal_integer (port
, c
, &rank
);
1108 scm_i_input_error (NULL
, port
, "array rank must be non-negative",
1113 continue_reading_tag
:
1114 while (c
!= EOF
&& c
!= '(' && c
!= '@' && c
!= ':'
1115 && tag_len
< sizeof tag_buf
/ sizeof tag_buf
[0])
1117 tag_buf
[tag_len
++] = c
;
1118 c
= scm_getc (port
);
1124 tag
= scm_string_to_symbol (scm_from_utf32_stringn (tag_buf
, tag_len
));
1125 if (tag_len
== sizeof tag_buf
/ sizeof tag_buf
[0])
1126 scm_i_input_error (NULL
, port
, "invalid array tag, starting with: ~a",
1131 if (c
== '@' || c
== ':')
1137 ssize_t lbnd
= 0, len
= 0;
1142 c
= scm_getc (port
);
1143 c
= read_decimal_integer (port
, c
, &lbnd
);
1146 s
= scm_from_ssize_t (lbnd
);
1150 c
= scm_getc (port
);
1151 c
= read_decimal_integer (port
, c
, &len
);
1153 scm_i_input_error (NULL
, port
,
1154 "array length must be non-negative",
1157 s
= scm_list_2 (s
, scm_from_ssize_t (lbnd
+len
-1));
1160 shape
= scm_cons (s
, shape
);
1161 } while (c
== '@' || c
== ':');
1163 shape
= scm_reverse_x (shape
, SCM_EOL
);
1166 /* Read nested lists of elements. */
1168 scm_i_input_error (NULL
, port
,
1169 "missing '(' in vector or array literal",
1171 elements
= scm_read_sexp (c
, port
, opts
);
1173 if (scm_is_false (shape
))
1174 shape
= scm_from_ssize_t (rank
);
1175 else if (scm_ilength (shape
) != rank
)
1178 "the number of shape specifications must match the array rank",
1181 /* Handle special print syntax of rank zero arrays; see
1182 scm_i_print_array for a rationale. */
1185 if (!scm_is_pair (elements
))
1186 scm_i_input_error (NULL
, port
,
1187 "too few elements in array literal, need 1",
1189 if (!scm_is_null (SCM_CDR (elements
)))
1190 scm_i_input_error (NULL
, port
,
1191 "too many elements in array literal, want 1",
1193 elements
= SCM_CAR (elements
);
1196 /* Construct array, annotate with source location, and return. */
1197 array
= scm_list_to_typed_array (tag
, shape
, elements
);
1198 return maybe_annotate_source (array
, port
, opts
, line
, column
);
1202 scm_read_srfi4_vector (int chr
, SCM port
, scm_t_read_opts
*opts
,
1203 long line
, int column
)
1205 return scm_read_array (chr
, port
, opts
, line
, column
);
1209 scm_read_bytevector (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
,
1210 long line
, int column
)
1212 chr
= scm_getc (port
);
1216 chr
= scm_getc (port
);
1220 chr
= scm_getc (port
);
1224 return maybe_annotate_source
1225 (scm_u8_list_to_bytevector (scm_read_sexp (chr
, port
, opts
)),
1226 port
, opts
, line
, column
);
1229 scm_i_input_error ("read_bytevector", port
,
1230 "invalid bytevector prefix",
1231 SCM_MAKE_CHAR (chr
));
1232 return SCM_UNSPECIFIED
;
1236 scm_read_guile_bit_vector (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
,
1237 long line
, int column
)
1239 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
1240 terribly inefficient but who cares? */
1241 SCM s_bits
= SCM_EOL
;
1243 for (chr
= scm_getc (port
);
1244 (chr
!= EOF
) && ((chr
== '0') || (chr
== '1'));
1245 chr
= scm_getc (port
))
1247 s_bits
= scm_cons ((chr
== '0') ? SCM_BOOL_F
: SCM_BOOL_T
, s_bits
);
1251 scm_ungetc (chr
, port
);
1253 return maybe_annotate_source
1254 (scm_bitvector (scm_reverse_x (s_bits
, SCM_EOL
)),
1255 port
, opts
, line
, column
);
1259 scm_read_scsh_block_comment (scm_t_wchar chr
, SCM port
)
1265 int c
= scm_getc (port
);
1268 scm_i_input_error ("skip_block_comment", port
,
1269 "unterminated `#! ... !#' comment", SCM_EOL
);
1273 else if (c
== '#' && bang_seen
)
1279 return SCM_UNSPECIFIED
;
1282 static void set_port_case_insensitive_p (SCM port
, scm_t_read_opts
*opts
,
1286 scm_read_shebang (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
1288 char name
[READER_DIRECTIVE_NAME_MAX_SIZE
+ 1];
1292 while (i
<= READER_DIRECTIVE_NAME_MAX_SIZE
)
1294 c
= scm_getc (port
);
1296 scm_i_input_error ("skip_block_comment", port
,
1297 "unterminated `#! ... !#' comment", SCM_EOL
);
1298 else if (('a' <= c
&& c
<= 'z') || ('0' <= c
&& c
<= '9') || c
== '-')
1300 else if (CHAR_IS_DELIMITER (c
))
1302 scm_ungetc (c
, port
);
1304 if (0 == strcmp ("r6rs", name
))
1305 ; /* Silently ignore */
1306 else if (0 == strcmp ("fold-case", name
))
1307 set_port_case_insensitive_p (port
, opts
, 1);
1308 else if (0 == strcmp ("no-fold-case", name
))
1309 set_port_case_insensitive_p (port
, opts
, 0);
1313 return SCM_UNSPECIFIED
;
1317 scm_ungetc (name
[--i
], port
);
1318 return scm_read_scsh_block_comment (chr
, port
);
1322 scm_read_r6rs_block_comment (scm_t_wchar chr
, SCM port
)
1324 /* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
1325 nested. So care must be taken. */
1326 int nesting_level
= 1;
1328 int a
= scm_getc (port
);
1331 scm_i_input_error ("scm_read_r6rs_block_comment", port
,
1332 "unterminated `#| ... |#' comment", SCM_EOL
);
1334 while (nesting_level
> 0)
1336 int b
= scm_getc (port
);
1339 scm_i_input_error ("scm_read_r6rs_block_comment", port
,
1340 "unterminated `#| ... |#' comment", SCM_EOL
);
1342 if (a
== '|' && b
== '#')
1347 else if (a
== '#' && b
== '|')
1356 return SCM_UNSPECIFIED
;
1360 scm_read_commented_expression (scm_t_wchar chr
, SCM port
,
1361 scm_t_read_opts
*opts
)
1365 c
= flush_ws (port
, opts
, (char *) NULL
);
1367 scm_i_input_error ("read_commented_expression", port
,
1368 "no expression after #; comment", SCM_EOL
);
1369 scm_ungetc (c
, port
);
1370 scm_read_expression (port
, opts
);
1371 return SCM_UNSPECIFIED
;
1375 scm_read_extended_symbol (scm_t_wchar chr
, SCM port
)
1377 /* Guile's extended symbol read syntax looks like this:
1379 #{This is all a symbol name}#
1381 So here, CHR is expected to be `{'. */
1384 SCM buf
= scm_i_make_string (1024, NULL
, 0);
1386 buf
= scm_i_string_start_writing (buf
);
1388 while ((chr
= scm_getc (port
)) != EOF
)
1399 scm_i_string_set_x (buf
, len
++, '}');
1405 else if (chr
== '\\')
1407 /* It used to be that print.c would print extended-read-syntax
1408 symbols with backslashes before "non-standard" chars, but
1409 this routine wouldn't do anything with those escapes.
1410 Bummer. What we've done is to change print.c to output
1411 R6RS hex escapes for those characters, relying on the fact
1412 that the extended read syntax would never put a `\' before
1413 an `x'. For now, we just ignore other instances of
1414 backslash in the string. */
1415 switch ((chr
= scm_getc (port
)))
1423 SCM_READ_HEX_ESCAPE (10, ';');
1424 scm_i_string_set_x (buf
, len
++, c
);
1432 scm_i_string_stop_writing ();
1433 scm_i_input_error ("scm_read_extended_symbol", port
,
1434 "illegal character in escape sequence: ~S",
1435 scm_list_1 (SCM_MAKE_CHAR (c
)));
1439 scm_i_string_set_x (buf
, len
++, chr
);
1444 scm_i_string_set_x (buf
, len
++, chr
);
1446 if (len
>= scm_i_string_length (buf
) - 2)
1450 scm_i_string_stop_writing ();
1451 addy
= scm_i_make_string (1024, NULL
, 0);
1452 buf
= scm_string_append (scm_list_2 (buf
, addy
));
1454 buf
= scm_i_string_start_writing (buf
);
1459 scm_i_string_stop_writing ();
1461 scm_i_input_error ("scm_read_extended_symbol", port
,
1462 "end of file while reading symbol", SCM_EOL
);
1464 return (scm_string_to_symbol (scm_c_substring (buf
, 0, len
)));
1469 /* Top-level token readers, i.e., dispatchers. */
1472 scm_read_sharp_extension (int chr
, SCM port
, scm_t_read_opts
*opts
)
1476 proc
= scm_get_hash_procedure (chr
);
1477 if (scm_is_true (scm_procedure_p (proc
)))
1479 long line
= SCM_LINUM (port
);
1480 int column
= SCM_COL (port
) - 2;
1483 got
= scm_call_2 (proc
, SCM_MAKE_CHAR (chr
), port
);
1485 if (opts
->record_positions_p
&& SCM_NIMP (got
)
1486 && !scm_i_has_source_properties (got
))
1487 scm_i_set_source_properties_x (got
, line
, column
, SCM_FILENAME (port
));
1492 return SCM_UNSPECIFIED
;
1495 /* The reader for the sharp `#' character. It basically dispatches reads
1496 among the above token readers. */
1498 scm_read_sharp (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
,
1499 long line
, int column
)
1500 #define FUNC_NAME "scm_lreadr"
1504 chr
= scm_getc (port
);
1506 result
= scm_read_sharp_extension (chr
, port
, opts
);
1507 if (!scm_is_eq (result
, SCM_UNSPECIFIED
))
1513 return (scm_read_character (chr
, port
, opts
));
1515 return (scm_read_vector (chr
, port
, opts
, line
, column
));
1520 /* This one may return either a boolean or an SRFI-4 vector. */
1521 return (scm_read_srfi4_vector (chr
, port
, opts
, line
, column
));
1523 return (scm_read_bytevector (chr
, port
, opts
, line
, column
));
1525 return (scm_read_guile_bit_vector (chr
, port
, opts
, line
, column
));
1529 return (scm_read_boolean (chr
, port
));
1531 return (scm_read_keyword (chr
, port
, opts
));
1532 case '0': case '1': case '2': case '3': case '4':
1533 case '5': case '6': case '7': case '8': case '9':
1535 #if SCM_ENABLE_DEPRECATED
1536 /* See below for 'i' and 'e'. */
1542 return (scm_read_array (chr
, port
, opts
, line
, column
));
1546 #if SCM_ENABLE_DEPRECATED
1548 /* When next char is '(', it really is an old-style
1550 scm_t_wchar next_c
= scm_getc (port
);
1552 scm_ungetc (next_c
, port
);
1554 return scm_read_array (chr
, port
, opts
, line
, column
);
1568 return (scm_read_number_and_radix (chr
, port
, opts
));
1570 return (scm_read_extended_symbol (chr
, port
));
1572 return (scm_read_shebang (chr
, port
, opts
));
1574 return (scm_read_commented_expression (chr
, port
, opts
));
1578 return (scm_read_syntax (chr
, port
, opts
));
1580 return (scm_read_nil (chr
, port
, opts
));
1582 result
= scm_read_sharp_extension (chr
, port
, opts
);
1583 if (scm_is_eq (result
, SCM_UNSPECIFIED
))
1585 /* To remain compatible with 1.8 and earlier, the following
1586 characters have lower precedence than `read-hash-extend'
1591 return scm_read_r6rs_block_comment (chr
, port
);
1593 scm_i_input_error (FUNC_NAME
, port
, "Unknown # object: ~S",
1594 scm_list_1 (SCM_MAKE_CHAR (chr
)));
1601 return SCM_UNSPECIFIED
;
1606 scm_read_expression (SCM port
, scm_t_read_opts
*opts
)
1607 #define FUNC_NAME "scm_read_expression"
1613 chr
= scm_getc (port
);
1617 case SCM_WHITE_SPACES
:
1618 case SCM_LINE_INCREMENTORS
:
1621 (void) scm_read_semicolon_comment (chr
, port
);
1624 if (!opts
->square_brackets_p
)
1625 return (scm_read_mixed_case_symbol (chr
, port
, opts
));
1626 /* otherwise fall through */
1628 return (scm_read_sexp (chr
, port
, opts
));
1630 return (scm_read_string (chr
, port
, opts
));
1634 return (scm_read_quote (chr
, port
, opts
));
1637 long line
= SCM_LINUM (port
);
1638 int column
= SCM_COL (port
) - 1;
1639 SCM result
= scm_read_sharp (chr
, port
, opts
, line
, column
);
1640 if (scm_is_eq (result
, SCM_UNSPECIFIED
))
1641 /* We read a comment or some such. */
1647 scm_i_input_error (FUNC_NAME
, port
, "unexpected \")\"", SCM_EOL
);
1650 if (opts
->square_brackets_p
)
1651 scm_i_input_error (FUNC_NAME
, port
, "unexpected \"]\"", SCM_EOL
);
1652 /* otherwise fall through */
1656 if (opts
->keyword_style
== KEYWORD_STYLE_PREFIX
)
1657 return scm_symbol_to_keyword (scm_read_expression (port
, opts
));
1662 if (((chr
>= '0') && (chr
<= '9'))
1663 || (strchr ("+-.", chr
)))
1664 return (scm_read_number (chr
, port
, opts
));
1666 return (scm_read_mixed_case_symbol (chr
, port
, opts
));
1674 /* Actual reader. */
1676 static void init_read_options (SCM port
, scm_t_read_opts
*opts
);
1678 SCM_DEFINE (scm_read
, "read", 0, 1, 0,
1680 "Read an s-expression from the input port @var{port}, or from\n"
1681 "the current input port if @var{port} is not specified.\n"
1682 "Any whitespace before the next token is discarded.")
1683 #define FUNC_NAME s_scm_read
1685 scm_t_read_opts opts
;
1688 if (SCM_UNBNDP (port
))
1689 port
= scm_current_input_port ();
1690 SCM_VALIDATE_OPINPORT (1, port
);
1692 init_read_options (port
, &opts
);
1694 c
= flush_ws (port
, &opts
, (char *) NULL
);
1697 scm_ungetc (c
, port
);
1699 return (scm_read_expression (port
, &opts
));
1706 /* Manipulate the read-hash-procedures alist. This could be written in
1707 Scheme, but maybe it will also be used by C code during initialisation. */
1708 SCM_DEFINE (scm_read_hash_extend
, "read-hash-extend", 2, 0, 0,
1709 (SCM chr
, SCM proc
),
1710 "Install the procedure @var{proc} for reading expressions\n"
1711 "starting with the character sequence @code{#} and @var{chr}.\n"
1712 "@var{proc} will be called with two arguments: the character\n"
1713 "@var{chr} and the port to read further data from. The object\n"
1714 "returned will be the return value of @code{read}. \n"
1715 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1717 #define FUNC_NAME s_scm_read_hash_extend
1722 SCM_VALIDATE_CHAR (1, chr
);
1723 SCM_ASSERT (scm_is_false (proc
)
1724 || scm_is_eq (scm_procedure_p (proc
), SCM_BOOL_T
),
1725 proc
, SCM_ARG2
, FUNC_NAME
);
1727 /* Check if chr is already in the alist. */
1728 this = scm_i_read_hash_procedures_ref ();
1732 if (scm_is_null (this))
1734 /* not found, so add it to the beginning. */
1735 if (scm_is_true (proc
))
1737 SCM
new = scm_cons (scm_cons (chr
, proc
),
1738 scm_i_read_hash_procedures_ref ());
1739 scm_i_read_hash_procedures_set_x (new);
1743 if (scm_is_eq (chr
, SCM_CAAR (this)))
1745 /* already in the alist. */
1746 if (scm_is_false (proc
))
1749 if (scm_is_false (prev
))
1751 SCM rest
= SCM_CDR (scm_i_read_hash_procedures_ref ());
1752 scm_i_read_hash_procedures_set_x (rest
);
1755 scm_set_cdr_x (prev
, SCM_CDR (this));
1760 scm_set_cdr_x (SCM_CAR (this), proc
);
1765 this = SCM_CDR (this);
1768 return SCM_UNSPECIFIED
;
1772 /* Recover the read-hash procedure corresponding to char c. */
1774 scm_get_hash_procedure (int c
)
1776 SCM rest
= scm_i_read_hash_procedures_ref ();
1780 if (scm_is_null (rest
))
1783 if (SCM_CHAR (SCM_CAAR (rest
)) == c
)
1784 return SCM_CDAR (rest
);
1786 rest
= SCM_CDR (rest
);
1790 #define SCM_ENCODING_SEARCH_SIZE (500)
1792 /* Search the first few hundred characters of a file for an Emacs-like coding
1793 declaration. Returns either NULL or a string whose storage has been
1794 allocated with `scm_gc_malloc ()'. */
1796 scm_i_scan_for_encoding (SCM port
)
1799 char header
[SCM_ENCODING_SEARCH_SIZE
+1];
1800 size_t bytes_read
, encoding_length
, i
;
1801 char *encoding
= NULL
;
1803 char *pos
, *encoding_start
;
1806 pt
= SCM_PTAB_ENTRY (port
);
1808 if (pt
->rw_active
== SCM_PORT_WRITE
)
1812 pt
->rw_active
= SCM_PORT_READ
;
1814 if (pt
->read_pos
== pt
->read_end
)
1816 /* We can use the read buffer, and thus avoid a seek. */
1817 if (scm_fill_input (port
) == EOF
)
1820 bytes_read
= pt
->read_end
- pt
->read_pos
;
1821 if (bytes_read
> SCM_ENCODING_SEARCH_SIZE
)
1822 bytes_read
= SCM_ENCODING_SEARCH_SIZE
;
1824 if (bytes_read
<= 1)
1825 /* An unbuffered port -- don't scan. */
1828 memcpy (header
, pt
->read_pos
, bytes_read
);
1829 header
[bytes_read
] = '\0';
1833 /* Try to read some bytes and then seek back. Not all ports
1834 support seeking back; and indeed some file ports (like
1835 /dev/urandom) will succeed on an lseek (fd, 0, SEEK_CUR)---the
1836 check performed by SCM_FPORT_FDES---but fail to seek
1837 backwards. Hence this block comes second. We prefer to use
1838 the read buffer in-place. */
1839 if (SCM_FPORTP (port
) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port
)))
1842 bytes_read
= scm_c_read (port
, header
, SCM_ENCODING_SEARCH_SIZE
);
1843 header
[bytes_read
] = '\0';
1844 scm_seek (port
, scm_from_int (0), scm_from_int (SEEK_SET
));
1848 && header
[0] == '\xef' && header
[1] == '\xbb' && header
[2] == '\xbf')
1851 /* search past "coding[:=]" */
1855 if ((pos
= strstr(pos
, "coding")) == NULL
)
1858 pos
+= strlen("coding");
1859 if (pos
- header
>= SCM_ENCODING_SEARCH_SIZE
||
1860 (*pos
== ':' || *pos
== '='))
1868 while (pos
- header
<= SCM_ENCODING_SEARCH_SIZE
&&
1869 (*pos
== ' ' || *pos
== '\t'))
1872 /* grab the next token */
1873 encoding_start
= pos
;
1875 while (encoding_start
+ i
- header
<= SCM_ENCODING_SEARCH_SIZE
1876 && encoding_start
+ i
- header
< bytes_read
1877 && (isalnum ((int) encoding_start
[i
])
1878 || strchr ("_-.:/,+=()", encoding_start
[i
]) != NULL
))
1881 encoding_length
= i
;
1882 if (encoding_length
== 0)
1885 encoding
= scm_gc_strndup (encoding_start
, encoding_length
, "encoding");
1886 for (i
= 0; i
< encoding_length
; i
++)
1887 encoding
[i
] = toupper ((int) encoding
[i
]);
1889 /* push backwards to make sure we were in a comment */
1891 pos
= encoding_start
;
1892 while (pos
>= header
)
1899 else if (*pos
== '\n' || pos
== header
)
1901 /* This wasn't in a semicolon comment. Check for a
1902 hash-bang comment. */
1903 char *beg
= strstr (header
, "#!");
1904 char *end
= strstr (header
, "!#");
1905 if (beg
< encoding_start
&& encoding_start
+ encoding_length
<= end
)
1916 /* This wasn't in a comment */
1919 if (utf8_bom
&& strcmp(encoding
, "UTF-8"))
1920 scm_misc_error (NULL
,
1921 "the port input declares the encoding ~s but is encoded as UTF-8",
1922 scm_list_1 (scm_from_locale_string (encoding
)));
1927 SCM_DEFINE (scm_file_encoding
, "file-encoding", 1, 0, 0,
1929 "Scans the port for an Emacs-like character coding declaration\n"
1930 "near the top of the contents of a port with random-accessible contents.\n"
1931 "The coding declaration is of the form\n"
1932 "@code{coding: XXXXX} and must appear in a scheme comment.\n"
1934 "Returns a string containing the character encoding of the file\n"
1935 "if a declaration was found, or @code{#f} otherwise.\n")
1936 #define FUNC_NAME s_scm_file_encoding
1941 SCM_VALIDATE_OPINPORT (SCM_ARG1
, port
);
1943 enc
= scm_i_scan_for_encoding (port
);
1948 s_enc
= scm_from_locale_string (enc
);
1957 /* Per-port read options.
1959 We store per-port read options in the 'port-read-options' key of the
1960 port's alist, which is stored in 'scm_i_port_weak_hash'. The value
1961 stored in the alist is a single integer that contains a two-bit field
1962 for each read option.
1964 If a bit field contains READ_OPTION_INHERIT (3), that indicates that
1965 the applicable value should be inherited from the corresponding
1966 global read option. Otherwise, the bit field contains the value of
1967 the read option. For boolean read options that have been set
1968 per-port, the possible values are 0 or 1. If the 'keyword_style'
1969 read option has been set per-port, its possible values are those in
1970 'enum t_keyword_style'. */
1972 /* Key to read options in per-port alists. */
1973 SCM_SYMBOL (sym_port_read_options
, "port-read-options");
1975 /* Offsets of bit fields for each per-port override */
1976 #define READ_OPTION_COPY_SOURCE_P 0
1977 #define READ_OPTION_RECORD_POSITIONS_P 2
1978 #define READ_OPTION_CASE_INSENSITIVE_P 4
1979 #define READ_OPTION_KEYWORD_STYLE 6
1980 #define READ_OPTION_R6RS_ESCAPES_P 8
1981 #define READ_OPTION_SQUARE_BRACKETS_P 10
1982 #define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12
1984 #define READ_OPTIONS_NUM_BITS 14
1986 #define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1)
1987 #define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL
1989 #define READ_OPTION_MASK 3
1990 #define READ_OPTION_INHERIT 3
1993 set_port_read_option (SCM port
, int option
, int new_value
)
1995 SCM alist
, scm_read_options
;
1996 unsigned int read_options
;
1998 new_value
&= READ_OPTION_MASK
;
1999 scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex
);
2000 alist
= scm_hashq_ref (scm_i_port_weak_hash
, port
, SCM_BOOL_F
);
2001 scm_read_options
= scm_assq_ref (alist
, sym_port_read_options
);
2002 if (scm_is_unsigned_integer (scm_read_options
, 0, READ_OPTIONS_MAX_VALUE
))
2003 read_options
= scm_to_uint (scm_read_options
);
2005 read_options
= READ_OPTIONS_INHERIT_ALL
;
2006 read_options
&= ~(READ_OPTION_MASK
<< option
);
2007 read_options
|= new_value
<< option
;
2008 scm_read_options
= scm_from_uint (read_options
);
2009 alist
= scm_assq_set_x (alist
, sym_port_read_options
, scm_read_options
);
2010 scm_hashq_set_x (scm_i_port_weak_hash
, port
, alist
);
2011 scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex
);
2014 /* Set OPTS and PORT's case-insensitivity according to VALUE. */
2016 set_port_case_insensitive_p (SCM port
, scm_t_read_opts
*opts
, int value
)
2019 opts
->case_insensitive_p
= value
;
2020 set_port_read_option (port
, READ_OPTION_CASE_INSENSITIVE_P
, value
);
2023 /* Initialize OPTS based on PORT's read options and the global read
2026 init_read_options (SCM port
, scm_t_read_opts
*opts
)
2028 SCM alist
, val
, scm_read_options
;
2029 unsigned int read_options
, x
;
2031 scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex
);
2032 alist
= scm_hashq_ref (scm_i_port_weak_hash
, port
, SCM_BOOL_F
);
2033 scm_read_options
= scm_assq_ref (alist
, sym_port_read_options
);
2034 scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex
);
2036 if (scm_is_unsigned_integer (scm_read_options
, 0, READ_OPTIONS_MAX_VALUE
))
2037 read_options
= scm_to_uint (scm_read_options
);
2039 read_options
= READ_OPTIONS_INHERIT_ALL
;
2041 x
= READ_OPTION_MASK
& (read_options
>> READ_OPTION_KEYWORD_STYLE
);
2042 if (x
== READ_OPTION_INHERIT
)
2044 val
= SCM_PACK (SCM_KEYWORD_STYLE
);
2045 if (scm_is_eq (val
, scm_keyword_prefix
))
2046 x
= KEYWORD_STYLE_PREFIX
;
2047 else if (scm_is_eq (val
, scm_keyword_postfix
))
2048 x
= KEYWORD_STYLE_POSTFIX
;
2050 x
= KEYWORD_STYLE_HASH_PREFIX
;
2052 opts
->keyword_style
= x
;
2054 #define RESOLVE_BOOLEAN_OPTION(NAME, name) \
2057 x = READ_OPTION_MASK & (read_options >> READ_OPTION_ ## NAME); \
2058 if (x == READ_OPTION_INHERIT) \
2059 x = !!SCM_ ## NAME; \
2064 RESOLVE_BOOLEAN_OPTION (COPY_SOURCE_P
, copy_source_p
);
2065 RESOLVE_BOOLEAN_OPTION (RECORD_POSITIONS_P
, record_positions_p
);
2066 RESOLVE_BOOLEAN_OPTION (CASE_INSENSITIVE_P
, case_insensitive_p
);
2067 RESOLVE_BOOLEAN_OPTION (R6RS_ESCAPES_P
, r6rs_escapes_p
);
2068 RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P
, square_brackets_p
);
2069 RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P
, hungry_eol_escapes_p
);
2071 #undef RESOLVE_BOOLEAN_OPTION
2077 SCM read_hash_procs
;
2079 read_hash_procs
= scm_make_fluid_with_default (SCM_EOL
);
2081 scm_i_read_hash_procedures
=
2082 SCM_VARIABLE_LOC (scm_c_define ("%read-hash-procedures", read_hash_procs
));
2084 scm_init_opts (scm_read_options
, scm_read_opts
);
2085 #include "libguile/read.x"