1 /* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
2 * 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
34 #include "libguile/_scm.h"
35 #include "libguile/bytevectors.h"
36 #include "libguile/chars.h"
37 #include "libguile/eval.h"
38 #include "libguile/arrays.h"
39 #include "libguile/bitvectors.h"
40 #include "libguile/keywords.h"
41 #include "libguile/alist.h"
42 #include "libguile/srcprop.h"
43 #include "libguile/hashtab.h"
44 #include "libguile/hash.h"
45 #include "libguile/ports.h"
46 #include "libguile/fports.h"
47 #include "libguile/root.h"
48 #include "libguile/strings.h"
49 #include "libguile/strports.h"
50 #include "libguile/vectors.h"
51 #include "libguile/validate.h"
52 #include "libguile/srfi-4.h"
53 #include "libguile/srfi-13.h"
55 #include "libguile/read.h"
56 #include "libguile/private-options.h"
61 SCM_GLOBAL_SYMBOL (scm_sym_dot
, ".");
62 SCM_SYMBOL (scm_keyword_prefix
, "prefix");
63 SCM_SYMBOL (scm_keyword_postfix
, "postfix");
64 SCM_SYMBOL (sym_nil
, "nil");
66 scm_t_option scm_read_opts
[] = {
67 { SCM_OPTION_BOOLEAN
, "copy", 0,
68 "Copy source code expressions." },
69 { SCM_OPTION_BOOLEAN
, "positions", 1,
70 "Record positions of source code expressions." },
71 { SCM_OPTION_BOOLEAN
, "case-insensitive", 0,
72 "Convert symbols to lower case."},
73 { SCM_OPTION_SCM
, "keywords", (scm_t_bits
) SCM_BOOL_F_BITS
,
74 "Style of keyword recognition: #f, 'prefix or 'postfix."},
75 { SCM_OPTION_BOOLEAN
, "r6rs-hex-escapes", 0,
76 "Use R6RS variable-length character and string hex escapes."},
77 { SCM_OPTION_BOOLEAN
, "square-brackets", 1,
78 "Treat `[' and `]' as parentheses, for R6RS compatibility."},
79 { SCM_OPTION_BOOLEAN
, "hungry-eol-escapes", 0,
80 "In strings, consume leading whitespace after an escaped end-of-line."},
84 /* Internal read options structure. This is initialized by 'scm_read'
85 from the global read options, and a pointer is passed down to all
87 enum t_keyword_style
{
88 KEYWORD_STYLE_HASH_PREFIX
,
94 enum t_keyword_style keyword_style
;
95 unsigned int copy_source_p
: 1;
96 unsigned int record_positions_p
: 1;
97 unsigned int case_insensitive_p
: 1;
98 unsigned int r6rs_escapes_p
: 1;
99 unsigned int square_brackets_p
: 1;
100 unsigned int hungry_eol_escapes_p
: 1;
103 typedef struct t_read_opts scm_t_read_opts
;
105 /* Initialize OPTS from the global read options. */
107 init_read_options (scm_t_read_opts
*opts
)
112 val
= SCM_PACK (SCM_KEYWORD_STYLE
);
113 if (scm_is_eq (val
, scm_keyword_prefix
))
114 x
= KEYWORD_STYLE_PREFIX
;
115 else if (scm_is_eq (val
, scm_keyword_postfix
))
116 x
= KEYWORD_STYLE_POSTFIX
;
118 x
= KEYWORD_STYLE_HASH_PREFIX
;
119 opts
->keyword_style
= x
;
121 #define RESOLVE_BOOLEAN_OPTION(NAME, name) \
122 (opts->name = !!SCM_ ## NAME)
124 RESOLVE_BOOLEAN_OPTION (COPY_SOURCE_P
, copy_source_p
);
125 RESOLVE_BOOLEAN_OPTION (RECORD_POSITIONS_P
, record_positions_p
);
126 RESOLVE_BOOLEAN_OPTION (CASE_INSENSITIVE_P
, case_insensitive_p
);
127 RESOLVE_BOOLEAN_OPTION (R6RS_ESCAPES_P
, r6rs_escapes_p
);
128 RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P
, square_brackets_p
);
129 RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P
, hungry_eol_escapes_p
);
131 #undef RESOLVE_BOOLEAN_OPTION
136 Give meaningful error messages for errors
140 FILE:LINE:COL: MESSAGE
141 This happened in ....
143 This is not standard GNU format, but the test-suite likes the real
144 message to be in front.
150 scm_i_input_error (char const *function
,
151 SCM port
, const char *message
, SCM arg
)
153 SCM fn
= (scm_is_string (SCM_FILENAME(port
))
155 : scm_from_locale_string ("#<unknown port>"));
157 SCM string_port
= scm_open_output_string ();
158 SCM string
= SCM_EOL
;
159 scm_simple_format (string_port
,
160 scm_from_locale_string ("~A:~S:~S: ~A"),
162 scm_from_long (SCM_LINUM (port
) + 1),
163 scm_from_int (SCM_COL (port
) + 1),
164 scm_from_locale_string (message
)));
166 string
= scm_get_output_string (string_port
);
167 scm_close_output_port (string_port
);
168 scm_error_scm (scm_from_latin1_symbol ("read-error"),
169 function
? scm_from_locale_string (function
) : SCM_BOOL_F
,
176 SCM_DEFINE (scm_read_options
, "read-options-interface", 0, 1, 0,
178 "Option interface for the read options. Instead of using\n"
179 "this procedure directly, use the procedures @code{read-enable},\n"
180 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
181 #define FUNC_NAME s_scm_read_options
183 SCM ans
= scm_options (setting
,
186 if (SCM_COPY_SOURCE_P
)
187 SCM_RECORD_POSITIONS_P
= 1;
192 /* A fluid referring to an association list mapping extra hash
193 characters to procedures. */
194 static SCM
*scm_i_read_hash_procedures
;
197 scm_i_read_hash_procedures_ref (void)
199 return scm_fluid_ref (*scm_i_read_hash_procedures
);
203 scm_i_read_hash_procedures_set_x (SCM value
)
205 scm_fluid_set_x (*scm_i_read_hash_procedures
, value
);
212 /* Size of the C buffer used to read symbols and numbers. */
213 #define READER_BUFFER_SIZE 128
215 /* Number of 32-bit codepoints in the buffer used to read strings. */
216 #define READER_STRING_BUFFER_SIZE 128
218 /* The maximum size of Scheme character names. */
219 #define READER_CHAR_NAME_MAX_SIZE 50
221 /* The maximum size of reader directive names. */
222 #define READER_DIRECTIVE_NAME_MAX_SIZE 50
225 /* `isblank' is only in C99. */
226 #define CHAR_IS_BLANK_(_chr) \
227 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
228 || ((_chr) == '\f') || ((_chr) == '\r'))
231 # define CHAR_IS_BLANK(_chr) \
232 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
234 # define CHAR_IS_BLANK CHAR_IS_BLANK_
238 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
240 #define CHAR_IS_R5RS_DELIMITER(c) \
242 || (c) == ')' || (c) == '(' || (c) == ';' || (c) == '"')
244 #define CHAR_IS_DELIMITER(c) \
245 (CHAR_IS_R5RS_DELIMITER (c) \
246 || (((c) == ']' || (c) == '[') && opts->square_brackets_p))
248 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
250 #define CHAR_IS_EXPONENT_MARKER(_chr) \
251 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
252 || ((_chr) == 'd') || ((_chr) == 'l'))
254 /* Read an SCSH block comment. */
255 static SCM
scm_read_scsh_block_comment (scm_t_wchar
, SCM
);
256 static SCM
scm_read_r6rs_block_comment (scm_t_wchar
, SCM
);
257 static SCM
scm_read_commented_expression (scm_t_wchar
, SCM
, scm_t_read_opts
*);
258 static SCM
scm_read_shebang (scm_t_wchar
, SCM
, scm_t_read_opts
*);
259 static SCM
scm_get_hash_procedure (int);
261 /* Read from PORT until a delimiter (e.g., a whitespace) is read. Put the
262 result in the pre-allocated buffer BUF. Return zero if the whole token has
263 fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
264 bytes actually read. */
266 read_token (SCM port
, scm_t_read_opts
*opts
,
267 char *buf
, size_t buf_size
, size_t *read
)
271 while (*read
< buf_size
)
275 chr
= scm_get_byte_or_eof (port
);
279 else if (CHAR_IS_DELIMITER (chr
))
281 scm_unget_byte (chr
, port
);
294 /* Like `read_token', but return either BUFFER, or a GC-allocated buffer
295 if the token doesn't fit in BUFFER_SIZE bytes. */
297 read_complete_token (SCM port
, scm_t_read_opts
*opts
,
298 char *buffer
, size_t buffer_size
, size_t *read
)
301 size_t bytes_read
, overflow_size
= 0;
302 char *overflow_buffer
= NULL
;
306 overflow
= read_token (port
, opts
, buffer
, buffer_size
, &bytes_read
);
309 if (overflow
|| overflow_size
!= 0)
311 if (overflow_size
== 0)
313 overflow_buffer
= scm_gc_malloc_pointerless (bytes_read
, "read");
314 memcpy (overflow_buffer
, buffer
, bytes_read
);
315 overflow_size
= bytes_read
;
320 scm_gc_malloc_pointerless (overflow_size
+ bytes_read
, "read");
322 memcpy (new_buf
, overflow_buffer
, overflow_size
);
323 memcpy (new_buf
+ overflow_size
, buffer
, bytes_read
);
325 overflow_buffer
= new_buf
;
326 overflow_size
+= bytes_read
;
333 *read
= overflow_size
;
337 return (overflow_size
> 0 ? overflow_buffer
: buffer
);
340 /* Skip whitespace from PORT and return the first non-whitespace character
341 read. Raise an error on end-of-file. */
343 flush_ws (SCM port
, scm_t_read_opts
*opts
, const char *eoferr
)
347 switch (c
= scm_getc (port
))
353 scm_i_input_error (eoferr
,
362 switch (c
= scm_getc (port
))
368 case SCM_LINE_INCREMENTORS
:
374 switch (c
= scm_getc (port
))
377 eoferr
= "read_sharp";
380 scm_read_shebang (c
, port
, opts
);
383 scm_read_commented_expression (c
, port
, opts
);
386 if (scm_is_false (scm_get_hash_procedure (c
)))
388 scm_read_r6rs_block_comment (c
, port
);
393 scm_ungetc (c
, port
);
398 case SCM_LINE_INCREMENTORS
:
399 case SCM_SINGLE_SPACES
:
414 static SCM
scm_read_expression (SCM port
, scm_t_read_opts
*opts
);
415 static SCM
scm_read_sharp (int chr
, SCM port
, scm_t_read_opts
*opts
,
416 long line
, int column
);
420 maybe_annotate_source (SCM x
, SCM port
, scm_t_read_opts
*opts
,
421 long line
, int column
)
423 if (opts
->record_positions_p
)
424 scm_i_set_source_properties_x (x
, line
, column
, SCM_FILENAME (port
));
429 scm_read_sexp (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
430 #define FUNC_NAME "scm_i_lreadparen"
433 SCM tmp
, tl
, ans
= SCM_EOL
;
434 const int terminating_char
= ((chr
== '[') ? ']' : ')');
436 /* Need to capture line and column numbers here. */
437 long line
= SCM_LINUM (port
);
438 int column
= SCM_COL (port
) - 1;
440 c
= flush_ws (port
, opts
, FUNC_NAME
);
441 if (terminating_char
== c
)
444 scm_ungetc (c
, port
);
445 tmp
= scm_read_expression (port
, opts
);
447 /* Note that it is possible for scm_read_expression to return
448 scm_sym_dot, but not as part of a dotted pair: as in #{.}#. So
449 check that it's a real dot by checking `c'. */
450 if (c
== '.' && scm_is_eq (scm_sym_dot
, tmp
))
452 ans
= scm_read_expression (port
, opts
);
453 if (terminating_char
!= (c
= flush_ws (port
, opts
, FUNC_NAME
)))
454 scm_i_input_error (FUNC_NAME
, port
, "missing close paren",
459 /* Build the head of the list structure. */
460 ans
= tl
= scm_cons (tmp
, SCM_EOL
);
462 while (terminating_char
!= (c
= flush_ws (port
, opts
, FUNC_NAME
)))
466 if (c
== ')' || (c
== ']' && opts
->square_brackets_p
))
467 scm_i_input_error (FUNC_NAME
, port
,
468 "in pair: mismatched close paren: ~A",
469 scm_list_1 (SCM_MAKE_CHAR (c
)));
471 scm_ungetc (c
, port
);
472 tmp
= scm_read_expression (port
, opts
);
474 /* See above note about scm_sym_dot. */
475 if (c
== '.' && scm_is_eq (scm_sym_dot
, tmp
))
477 SCM_SETCDR (tl
, scm_read_expression (port
, opts
));
479 c
= flush_ws (port
, opts
, FUNC_NAME
);
480 if (terminating_char
!= c
)
481 scm_i_input_error (FUNC_NAME
, port
,
482 "in pair: missing close paren", SCM_EOL
);
486 new_tail
= scm_cons (tmp
, SCM_EOL
);
487 SCM_SETCDR (tl
, new_tail
);
492 return maybe_annotate_source (ans
, port
, opts
, line
, column
);
497 /* Read a hexadecimal number NDIGITS in length. Put its value into the variable
498 C. If TERMINATOR is non-null, terminate early if the TERMINATOR character is
500 #define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
506 while (i < ndigits) \
508 a = scm_getc (port); \
512 && (a == (scm_t_wchar) terminator) \
515 if ('0' <= a && a <= '9') \
517 else if ('A' <= a && a <= 'F') \
519 else if ('a' <= a && a <= 'f') \
532 skip_intraline_whitespace (SCM port
)
542 while (c
== '\t' || uc_is_general_category (c
, UC_SPACE_SEPARATOR
));
544 scm_ungetc (c
, port
);
548 scm_read_string (int chr
, SCM port
, scm_t_read_opts
*opts
)
549 #define FUNC_NAME "scm_lreadr"
551 /* For strings smaller than C_STR, this function creates only one Scheme
552 object (the string returned). */
555 size_t c_str_len
= 0;
556 scm_t_wchar c
, c_str
[READER_STRING_BUFFER_SIZE
];
558 /* Need to capture line and column numbers here. */
559 long line
= SCM_LINUM (port
);
560 int column
= SCM_COL (port
) - 1;
562 while ('"' != (c
= scm_getc (port
)))
567 scm_i_input_error (FUNC_NAME
, port
,
568 "end of file in string constant", SCM_EOL
);
571 if (c_str_len
+ 1 >= READER_STRING_BUFFER_SIZE
)
573 str
= scm_cons (scm_from_utf32_stringn (c_str
, c_str_len
), str
);
579 switch (c
= scm_getc (port
))
587 if (opts
->hungry_eol_escapes_p
)
588 skip_intraline_whitespace (port
);
615 if (opts
->r6rs_escapes_p
)
616 SCM_READ_HEX_ESCAPE (10, ';');
618 SCM_READ_HEX_ESCAPE (2, '\0');
621 if (!opts
->r6rs_escapes_p
)
623 SCM_READ_HEX_ESCAPE (4, '\0');
627 if (!opts
->r6rs_escapes_p
)
629 SCM_READ_HEX_ESCAPE (6, '\0');
634 scm_i_input_error (FUNC_NAME
, port
,
635 "illegal character in escape sequence: ~S",
636 scm_list_1 (SCM_MAKE_CHAR (c
)));
640 c_str
[c_str_len
++] = c
;
643 if (scm_is_null (str
))
644 /* Fast path: we got a string that fits in C_STR. */
645 str
= scm_from_utf32_stringn (c_str
, c_str_len
);
649 str
= scm_cons (scm_from_utf32_stringn (c_str
, c_str_len
), str
);
651 str
= scm_string_concatenate_reverse (str
, SCM_UNDEFINED
, SCM_UNDEFINED
);
654 return maybe_annotate_source (str
, port
, opts
, line
, column
);
660 scm_read_number (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
662 SCM result
, str
= SCM_EOL
;
663 char local_buffer
[READER_BUFFER_SIZE
], *buffer
;
665 scm_t_port
*pt
= SCM_PTAB_ENTRY (port
);
667 /* Need to capture line and column numbers here. */
668 long line
= SCM_LINUM (port
);
669 int column
= SCM_COL (port
) - 1;
671 scm_ungetc (chr
, port
);
672 buffer
= read_complete_token (port
, opts
, local_buffer
, sizeof local_buffer
,
675 str
= scm_from_stringn (buffer
, bytes_read
, pt
->encoding
, pt
->ilseq_handler
);
677 result
= scm_string_to_number (str
, SCM_UNDEFINED
);
678 if (scm_is_false (result
))
680 /* Return a symbol instead of a number */
681 if (opts
->case_insensitive_p
)
682 str
= scm_string_downcase_x (str
);
683 result
= scm_string_to_symbol (str
);
685 else if (SCM_NIMP (result
))
686 result
= maybe_annotate_source (result
, port
, opts
, line
, column
);
688 SCM_COL (port
) += scm_i_string_length (str
);
693 scm_read_mixed_case_symbol (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
696 int ends_with_colon
= 0;
698 int postfix
= (opts
->keyword_style
== KEYWORD_STYLE_POSTFIX
);
699 char local_buffer
[READER_BUFFER_SIZE
], *buffer
;
700 scm_t_port
*pt
= SCM_PTAB_ENTRY (port
);
703 scm_ungetc (chr
, port
);
704 buffer
= read_complete_token (port
, opts
, local_buffer
, sizeof local_buffer
,
707 ends_with_colon
= buffer
[bytes_read
- 1] == ':';
709 if (postfix
&& ends_with_colon
&& (bytes_read
> 1))
711 str
= scm_from_stringn (buffer
, bytes_read
- 1,
712 pt
->encoding
, pt
->ilseq_handler
);
714 if (opts
->case_insensitive_p
)
715 str
= scm_string_downcase_x (str
);
716 result
= scm_symbol_to_keyword (scm_string_to_symbol (str
));
720 str
= scm_from_stringn (buffer
, bytes_read
,
721 pt
->encoding
, pt
->ilseq_handler
);
723 if (opts
->case_insensitive_p
)
724 str
= scm_string_downcase_x (str
);
725 result
= scm_string_to_symbol (str
);
728 SCM_COL (port
) += scm_i_string_length (str
);
733 scm_read_number_and_radix (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
734 #define FUNC_NAME "scm_lreadr"
738 char local_buffer
[READER_BUFFER_SIZE
], *buffer
;
766 scm_ungetc (chr
, port
);
767 scm_ungetc ('#', port
);
771 buffer
= read_complete_token (port
, opts
, local_buffer
, sizeof local_buffer
,
774 pt
= SCM_PTAB_ENTRY (port
);
775 str
= scm_from_stringn (buffer
, read
, pt
->encoding
, pt
->ilseq_handler
);
777 result
= scm_string_to_number (str
, scm_from_uint (radix
));
779 SCM_COL (port
) += scm_i_string_length (str
);
781 if (scm_is_true (result
))
784 scm_i_input_error (FUNC_NAME
, port
, "unknown # object", SCM_EOL
);
791 scm_read_quote (int chr
, SCM port
, scm_t_read_opts
*opts
)
794 long line
= SCM_LINUM (port
);
795 int column
= SCM_COL (port
) - 1;
800 p
= scm_sym_quasiquote
;
813 p
= scm_sym_uq_splicing
;
816 scm_ungetc (c
, port
);
823 fprintf (stderr
, "%s: unhandled quote character (%i)\n",
824 "scm_read_quote", chr
);
828 p
= scm_cons2 (p
, scm_read_expression (port
, opts
), SCM_EOL
);
829 return maybe_annotate_source (p
, port
, opts
, line
, column
);
832 SCM_SYMBOL (sym_syntax
, "syntax");
833 SCM_SYMBOL (sym_quasisyntax
, "quasisyntax");
834 SCM_SYMBOL (sym_unsyntax
, "unsyntax");
835 SCM_SYMBOL (sym_unsyntax_splicing
, "unsyntax-splicing");
838 scm_read_syntax (int chr
, SCM port
, scm_t_read_opts
*opts
)
841 long line
= SCM_LINUM (port
);
842 int column
= SCM_COL (port
) - 1;
860 p
= sym_unsyntax_splicing
;
863 scm_ungetc (c
, port
);
870 fprintf (stderr
, "%s: unhandled syntax character (%i)\n",
871 "scm_read_syntax", chr
);
875 p
= scm_cons2 (p
, scm_read_expression (port
, opts
), SCM_EOL
);
876 return maybe_annotate_source (p
, port
, opts
, line
, column
);
880 scm_read_nil (int chr
, SCM port
, scm_t_read_opts
*opts
)
882 SCM id
= scm_read_mixed_case_symbol (chr
, port
, opts
);
884 if (!scm_is_eq (id
, sym_nil
))
885 scm_i_input_error ("scm_read_nil", port
,
886 "unexpected input while reading #nil: ~a",
889 return SCM_ELISP_NIL
;
893 scm_read_semicolon_comment (int chr
, SCM port
)
897 /* We use the get_byte here because there is no need to get the
898 locale correct with comment input. This presumes that newline
899 always represents itself no matter what the encoding is. */
900 for (c
= scm_get_byte_or_eof (port
);
901 (c
!= EOF
) && (c
!= '\n');
902 c
= scm_get_byte_or_eof (port
));
904 return SCM_UNSPECIFIED
;
908 /* Sharp readers, i.e. readers called after a `#' sign has been read. */
911 scm_read_boolean (int chr
, SCM port
)
924 return SCM_UNSPECIFIED
;
928 scm_read_character (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
929 #define FUNC_NAME "scm_lreadr"
931 char buffer
[READER_CHAR_NAME_MAX_SIZE
];
933 size_t charname_len
, bytes_read
;
938 overflow
= read_token (port
, opts
, buffer
, READER_CHAR_NAME_MAX_SIZE
,
941 scm_i_input_error (FUNC_NAME
, port
, "character name too long", SCM_EOL
);
945 chr
= scm_getc (port
);
947 scm_i_input_error (FUNC_NAME
, port
, "unexpected end of file "
948 "while reading character", SCM_EOL
);
950 /* CHR must be a token delimiter, like a whitespace. */
951 return (SCM_MAKE_CHAR (chr
));
954 pt
= SCM_PTAB_ENTRY (port
);
956 /* Simple ASCII characters can be processed immediately. Also, simple
957 ISO-8859-1 characters can be processed immediately if the encoding for this
958 port is ISO-8859-1. */
959 if (bytes_read
== 1 && ((unsigned char) buffer
[0] <= 127 || pt
->encoding
== NULL
))
962 return SCM_MAKE_CHAR (buffer
[0]);
965 /* Otherwise, convert the buffer into a proper scheme string for
967 charname
= scm_from_stringn (buffer
, bytes_read
, pt
->encoding
,
969 charname_len
= scm_i_string_length (charname
);
970 SCM_COL (port
) += charname_len
;
971 cp
= scm_i_string_ref (charname
, 0);
972 if (charname_len
== 1)
973 return SCM_MAKE_CHAR (cp
);
975 /* Ignore dotted circles, which may be used to keep combining characters from
976 combining with the backslash in #\charname. */
977 if (cp
== SCM_CODEPOINT_DOTTED_CIRCLE
&& charname_len
== 2)
978 return SCM_MAKE_CHAR (scm_i_string_ref (charname
, 1));
980 if (cp
>= '0' && cp
< '8')
982 /* Dirk:FIXME:: This type of character syntax is not R5RS
983 * compliant. Further, it should be verified that the constant
984 * does only consist of octal digits. */
985 SCM p
= scm_string_to_number (charname
, scm_from_uint (8));
988 scm_t_wchar c
= scm_to_uint32 (p
);
989 if (SCM_IS_UNICODE_CHAR (c
))
990 return SCM_MAKE_CHAR (c
);
992 scm_i_input_error (FUNC_NAME
, port
,
993 "out-of-range octal character escape: ~a",
994 scm_list_1 (charname
));
998 if (cp
== 'x' && (charname_len
> 1))
1002 /* Convert from hex, skipping the initial 'x' character in CHARNAME */
1003 p
= scm_string_to_number (scm_c_substring (charname
, 1, charname_len
),
1004 scm_from_uint (16));
1005 if (SCM_I_INUMP (p
))
1007 scm_t_wchar c
= scm_to_uint32 (p
);
1008 if (SCM_IS_UNICODE_CHAR (c
))
1009 return SCM_MAKE_CHAR (c
);
1011 scm_i_input_error (FUNC_NAME
, port
,
1012 "out-of-range hex character escape: ~a",
1013 scm_list_1 (charname
));
1017 /* The names of characters should never have non-Latin1
1019 if (scm_i_is_narrow_string (charname
)
1020 || scm_i_try_narrow_string (charname
))
1021 { SCM ch
= scm_i_charname_to_char (scm_i_string_chars (charname
),
1023 if (scm_is_true (ch
))
1027 scm_i_input_error (FUNC_NAME
, port
, "unknown character name ~a",
1028 scm_list_1 (charname
));
1030 return SCM_UNSPECIFIED
;
1035 scm_read_keyword (int chr
, SCM port
, scm_t_read_opts
*opts
)
1039 /* Read the symbol that comprises the keyword. Doing this instead of
1040 invoking a specific symbol reader function allows `scm_read_keyword ()'
1041 to adapt to the delimiters currently valid of symbols.
1043 XXX: This implementation allows sloppy syntaxes like `#: key'. */
1044 symbol
= scm_read_expression (port
, opts
);
1045 if (!scm_is_symbol (symbol
))
1046 scm_i_input_error ("scm_read_keyword", port
,
1047 "keyword prefix `~a' not followed by a symbol: ~s",
1048 scm_list_2 (SCM_MAKE_CHAR (chr
), symbol
));
1050 return (scm_symbol_to_keyword (symbol
));
1054 scm_read_vector (int chr
, SCM port
, scm_t_read_opts
*opts
,
1055 long line
, int column
)
1057 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
1058 guarantee that it's going to do what we want. After all, this is an
1059 implementation detail of `scm_read_vector ()', not a desirable
1061 return maybe_annotate_source (scm_vector (scm_read_sexp (chr
, port
, opts
)),
1062 port
, opts
, line
, column
);
1065 /* Helper used by scm_read_array */
1067 read_decimal_integer (SCM port
, int c
, ssize_t
*resp
)
1076 c
= scm_getc (port
);
1079 while ('0' <= c
&& c
<= '9')
1081 res
= 10*res
+ c
-'0';
1083 c
= scm_getc (port
);
1091 /* Read an array. This function can also read vectors and uniform
1092 vectors. Also, the conflict between '#f' and '#f32' and '#f64' is
1095 C is the first character read after the '#'.
1098 scm_read_array (int c
, SCM port
, scm_t_read_opts
*opts
, long line
, int column
)
1101 scm_t_wchar tag_buf
[8];
1104 SCM tag
, shape
= SCM_BOOL_F
, elements
, array
;
1106 /* XXX - shortcut for ordinary vectors. Shouldn't be necessary but
1107 the array code can not deal with zero-length dimensions yet, and
1108 we want to allow zero-length vectors, of course.
1111 return scm_read_vector (c
, port
, opts
, line
, column
);
1113 /* Disambiguate between '#f' and uniform floating point vectors.
1117 c
= scm_getc (port
);
1118 if (c
!= '3' && c
!= '6')
1121 scm_ungetc (c
, port
);
1127 goto continue_reading_tag
;
1132 c
= read_decimal_integer (port
, c
, &rank
);
1134 scm_i_input_error (NULL
, port
, "array rank must be non-negative",
1139 continue_reading_tag
:
1140 while (c
!= EOF
&& c
!= '(' && c
!= '@' && c
!= ':'
1141 && tag_len
< sizeof tag_buf
/ sizeof tag_buf
[0])
1143 tag_buf
[tag_len
++] = c
;
1144 c
= scm_getc (port
);
1150 tag
= scm_string_to_symbol (scm_from_utf32_stringn (tag_buf
, tag_len
));
1151 if (tag_len
== sizeof tag_buf
/ sizeof tag_buf
[0])
1152 scm_i_input_error (NULL
, port
, "invalid array tag, starting with: ~a",
1157 if (c
== '@' || c
== ':')
1163 ssize_t lbnd
= 0, len
= 0;
1168 c
= scm_getc (port
);
1169 c
= read_decimal_integer (port
, c
, &lbnd
);
1172 s
= scm_from_ssize_t (lbnd
);
1176 c
= scm_getc (port
);
1177 c
= read_decimal_integer (port
, c
, &len
);
1179 scm_i_input_error (NULL
, port
,
1180 "array length must be non-negative",
1183 s
= scm_list_2 (s
, scm_from_ssize_t (lbnd
+len
-1));
1186 shape
= scm_cons (s
, shape
);
1187 } while (c
== '@' || c
== ':');
1189 shape
= scm_reverse_x (shape
, SCM_EOL
);
1192 /* Read nested lists of elements. */
1194 scm_i_input_error (NULL
, port
,
1195 "missing '(' in vector or array literal",
1197 elements
= scm_read_sexp (c
, port
, opts
);
1199 if (scm_is_false (shape
))
1200 shape
= scm_from_ssize_t (rank
);
1201 else if (scm_ilength (shape
) != rank
)
1204 "the number of shape specifications must match the array rank",
1207 /* Handle special print syntax of rank zero arrays; see
1208 scm_i_print_array for a rationale. */
1211 if (!scm_is_pair (elements
))
1212 scm_i_input_error (NULL
, port
,
1213 "too few elements in array literal, need 1",
1215 if (!scm_is_null (SCM_CDR (elements
)))
1216 scm_i_input_error (NULL
, port
,
1217 "too many elements in array literal, want 1",
1219 elements
= SCM_CAR (elements
);
1222 /* Construct array, annotate with source location, and return. */
1223 array
= scm_list_to_typed_array (tag
, shape
, elements
);
1224 return maybe_annotate_source (array
, port
, opts
, line
, column
);
1228 scm_read_srfi4_vector (int chr
, SCM port
, scm_t_read_opts
*opts
,
1229 long line
, int column
)
1231 return scm_read_array (chr
, port
, opts
, line
, column
);
1235 scm_read_bytevector (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
,
1236 long line
, int column
)
1238 chr
= scm_getc (port
);
1242 chr
= scm_getc (port
);
1246 chr
= scm_getc (port
);
1250 return maybe_annotate_source
1251 (scm_u8_list_to_bytevector (scm_read_sexp (chr
, port
, opts
)),
1252 port
, opts
, line
, column
);
1255 scm_i_input_error ("read_bytevector", port
,
1256 "invalid bytevector prefix",
1257 SCM_MAKE_CHAR (chr
));
1258 return SCM_UNSPECIFIED
;
1262 scm_read_guile_bit_vector (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
,
1263 long line
, int column
)
1265 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
1266 terribly inefficient but who cares? */
1267 SCM s_bits
= SCM_EOL
;
1269 for (chr
= scm_getc (port
);
1270 (chr
!= EOF
) && ((chr
== '0') || (chr
== '1'));
1271 chr
= scm_getc (port
))
1273 s_bits
= scm_cons ((chr
== '0') ? SCM_BOOL_F
: SCM_BOOL_T
, s_bits
);
1277 scm_ungetc (chr
, port
);
1279 return maybe_annotate_source
1280 (scm_bitvector (scm_reverse_x (s_bits
, SCM_EOL
)),
1281 port
, opts
, line
, column
);
1285 scm_read_scsh_block_comment (scm_t_wchar chr
, SCM port
)
1291 int c
= scm_getc (port
);
1294 scm_i_input_error ("skip_block_comment", port
,
1295 "unterminated `#! ... !#' comment", SCM_EOL
);
1299 else if (c
== '#' && bang_seen
)
1305 return SCM_UNSPECIFIED
;
1309 scm_read_shebang (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
1311 char name
[READER_DIRECTIVE_NAME_MAX_SIZE
+ 1];
1315 while (i
<= READER_DIRECTIVE_NAME_MAX_SIZE
)
1317 c
= scm_getc (port
);
1319 scm_i_input_error ("skip_block_comment", port
,
1320 "unterminated `#! ... !#' comment", SCM_EOL
);
1321 else if (('a' <= c
&& c
<= 'z') || ('0' <= c
&& c
<= '9') || c
== '-')
1323 else if (CHAR_IS_DELIMITER (c
))
1325 scm_ungetc (c
, port
);
1327 if (0 == strcmp ("r6rs", name
))
1328 ; /* Silently ignore */
1332 return SCM_UNSPECIFIED
;
1336 scm_ungetc (name
[--i
], port
);
1337 return scm_read_scsh_block_comment (chr
, port
);
1341 scm_read_r6rs_block_comment (scm_t_wchar chr
, SCM port
)
1343 /* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
1344 nested. So care must be taken. */
1345 int nesting_level
= 1;
1347 int a
= scm_getc (port
);
1350 scm_i_input_error ("scm_read_r6rs_block_comment", port
,
1351 "unterminated `#| ... |#' comment", SCM_EOL
);
1353 while (nesting_level
> 0)
1355 int b
= scm_getc (port
);
1358 scm_i_input_error ("scm_read_r6rs_block_comment", port
,
1359 "unterminated `#| ... |#' comment", SCM_EOL
);
1361 if (a
== '|' && b
== '#')
1366 else if (a
== '#' && b
== '|')
1375 return SCM_UNSPECIFIED
;
1379 scm_read_commented_expression (scm_t_wchar chr
, SCM port
,
1380 scm_t_read_opts
*opts
)
1384 c
= flush_ws (port
, opts
, (char *) NULL
);
1386 scm_i_input_error ("read_commented_expression", port
,
1387 "no expression after #; comment", SCM_EOL
);
1388 scm_ungetc (c
, port
);
1389 scm_read_expression (port
, opts
);
1390 return SCM_UNSPECIFIED
;
1394 scm_read_extended_symbol (scm_t_wchar chr
, SCM port
)
1396 /* Guile's extended symbol read syntax looks like this:
1398 #{This is all a symbol name}#
1400 So here, CHR is expected to be `{'. */
1403 SCM buf
= scm_i_make_string (1024, NULL
, 0);
1405 buf
= scm_i_string_start_writing (buf
);
1407 while ((chr
= scm_getc (port
)) != EOF
)
1418 scm_i_string_set_x (buf
, len
++, '}');
1424 else if (chr
== '\\')
1426 /* It used to be that print.c would print extended-read-syntax
1427 symbols with backslashes before "non-standard" chars, but
1428 this routine wouldn't do anything with those escapes.
1429 Bummer. What we've done is to change print.c to output
1430 R6RS hex escapes for those characters, relying on the fact
1431 that the extended read syntax would never put a `\' before
1432 an `x'. For now, we just ignore other instances of
1433 backslash in the string. */
1434 switch ((chr
= scm_getc (port
)))
1442 SCM_READ_HEX_ESCAPE (10, ';');
1443 scm_i_string_set_x (buf
, len
++, c
);
1451 scm_i_string_stop_writing ();
1452 scm_i_input_error ("scm_read_extended_symbol", port
,
1453 "illegal character in escape sequence: ~S",
1454 scm_list_1 (SCM_MAKE_CHAR (c
)));
1458 scm_i_string_set_x (buf
, len
++, chr
);
1463 scm_i_string_set_x (buf
, len
++, chr
);
1465 if (len
>= scm_i_string_length (buf
) - 2)
1469 scm_i_string_stop_writing ();
1470 addy
= scm_i_make_string (1024, NULL
, 0);
1471 buf
= scm_string_append (scm_list_2 (buf
, addy
));
1473 buf
= scm_i_string_start_writing (buf
);
1478 scm_i_string_stop_writing ();
1480 scm_i_input_error ("scm_read_extended_symbol", port
,
1481 "end of file while reading symbol", SCM_EOL
);
1483 return (scm_string_to_symbol (scm_c_substring (buf
, 0, len
)));
1488 /* Top-level token readers, i.e., dispatchers. */
1491 scm_read_sharp_extension (int chr
, SCM port
, scm_t_read_opts
*opts
)
1495 proc
= scm_get_hash_procedure (chr
);
1496 if (scm_is_true (scm_procedure_p (proc
)))
1498 long line
= SCM_LINUM (port
);
1499 int column
= SCM_COL (port
) - 2;
1502 got
= scm_call_2 (proc
, SCM_MAKE_CHAR (chr
), port
);
1504 if (opts
->record_positions_p
&& SCM_NIMP (got
)
1505 && !scm_i_has_source_properties (got
))
1506 scm_i_set_source_properties_x (got
, line
, column
, SCM_FILENAME (port
));
1511 return SCM_UNSPECIFIED
;
1514 /* The reader for the sharp `#' character. It basically dispatches reads
1515 among the above token readers. */
1517 scm_read_sharp (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
,
1518 long line
, int column
)
1519 #define FUNC_NAME "scm_lreadr"
1523 chr
= scm_getc (port
);
1525 result
= scm_read_sharp_extension (chr
, port
, opts
);
1526 if (!scm_is_eq (result
, SCM_UNSPECIFIED
))
1532 return (scm_read_character (chr
, port
, opts
));
1534 return (scm_read_vector (chr
, port
, opts
, line
, column
));
1539 /* This one may return either a boolean or an SRFI-4 vector. */
1540 return (scm_read_srfi4_vector (chr
, port
, opts
, line
, column
));
1542 return (scm_read_bytevector (chr
, port
, opts
, line
, column
));
1544 return (scm_read_guile_bit_vector (chr
, port
, opts
, line
, column
));
1548 return (scm_read_boolean (chr
, port
));
1550 return (scm_read_keyword (chr
, port
, opts
));
1551 case '0': case '1': case '2': case '3': case '4':
1552 case '5': case '6': case '7': case '8': case '9':
1554 #if SCM_ENABLE_DEPRECATED
1555 /* See below for 'i' and 'e'. */
1561 return (scm_read_array (chr
, port
, opts
, line
, column
));
1565 #if SCM_ENABLE_DEPRECATED
1567 /* When next char is '(', it really is an old-style
1569 scm_t_wchar next_c
= scm_getc (port
);
1571 scm_ungetc (next_c
, port
);
1573 return scm_read_array (chr
, port
, opts
, line
, column
);
1587 return (scm_read_number_and_radix (chr
, port
, opts
));
1589 return (scm_read_extended_symbol (chr
, port
));
1591 return (scm_read_shebang (chr
, port
, opts
));
1593 return (scm_read_commented_expression (chr
, port
, opts
));
1597 return (scm_read_syntax (chr
, port
, opts
));
1599 return (scm_read_nil (chr
, port
, opts
));
1601 result
= scm_read_sharp_extension (chr
, port
, opts
);
1602 if (scm_is_eq (result
, SCM_UNSPECIFIED
))
1604 /* To remain compatible with 1.8 and earlier, the following
1605 characters have lower precedence than `read-hash-extend'
1610 return scm_read_r6rs_block_comment (chr
, port
);
1612 scm_i_input_error (FUNC_NAME
, port
, "Unknown # object: ~S",
1613 scm_list_1 (SCM_MAKE_CHAR (chr
)));
1620 return SCM_UNSPECIFIED
;
1625 scm_read_expression (SCM port
, scm_t_read_opts
*opts
)
1626 #define FUNC_NAME "scm_read_expression"
1632 chr
= scm_getc (port
);
1636 case SCM_WHITE_SPACES
:
1637 case SCM_LINE_INCREMENTORS
:
1640 (void) scm_read_semicolon_comment (chr
, port
);
1643 if (!opts
->square_brackets_p
)
1644 return (scm_read_mixed_case_symbol (chr
, port
, opts
));
1645 /* otherwise fall through */
1647 return (scm_read_sexp (chr
, port
, opts
));
1649 return (scm_read_string (chr
, port
, opts
));
1653 return (scm_read_quote (chr
, port
, opts
));
1656 long line
= SCM_LINUM (port
);
1657 int column
= SCM_COL (port
) - 1;
1658 SCM result
= scm_read_sharp (chr
, port
, opts
, line
, column
);
1659 if (scm_is_eq (result
, SCM_UNSPECIFIED
))
1660 /* We read a comment or some such. */
1666 scm_i_input_error (FUNC_NAME
, port
, "unexpected \")\"", SCM_EOL
);
1669 if (opts
->square_brackets_p
)
1670 scm_i_input_error (FUNC_NAME
, port
, "unexpected \"]\"", SCM_EOL
);
1671 /* otherwise fall through */
1675 if (opts
->keyword_style
== KEYWORD_STYLE_PREFIX
)
1676 return scm_symbol_to_keyword (scm_read_expression (port
, opts
));
1681 if (((chr
>= '0') && (chr
<= '9'))
1682 || (strchr ("+-.", chr
)))
1683 return (scm_read_number (chr
, port
, opts
));
1685 return (scm_read_mixed_case_symbol (chr
, port
, opts
));
1693 /* Actual reader. */
1695 SCM_DEFINE (scm_read
, "read", 0, 1, 0,
1697 "Read an s-expression from the input port @var{port}, or from\n"
1698 "the current input port if @var{port} is not specified.\n"
1699 "Any whitespace before the next token is discarded.")
1700 #define FUNC_NAME s_scm_read
1702 scm_t_read_opts opts
;
1705 if (SCM_UNBNDP (port
))
1706 port
= scm_current_input_port ();
1707 SCM_VALIDATE_OPINPORT (1, port
);
1709 init_read_options (&opts
);
1711 c
= flush_ws (port
, &opts
, (char *) NULL
);
1714 scm_ungetc (c
, port
);
1716 return (scm_read_expression (port
, &opts
));
1723 /* Manipulate the read-hash-procedures alist. This could be written in
1724 Scheme, but maybe it will also be used by C code during initialisation. */
1725 SCM_DEFINE (scm_read_hash_extend
, "read-hash-extend", 2, 0, 0,
1726 (SCM chr
, SCM proc
),
1727 "Install the procedure @var{proc} for reading expressions\n"
1728 "starting with the character sequence @code{#} and @var{chr}.\n"
1729 "@var{proc} will be called with two arguments: the character\n"
1730 "@var{chr} and the port to read further data from. The object\n"
1731 "returned will be the return value of @code{read}. \n"
1732 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1734 #define FUNC_NAME s_scm_read_hash_extend
1739 SCM_VALIDATE_CHAR (1, chr
);
1740 SCM_ASSERT (scm_is_false (proc
)
1741 || scm_is_eq (scm_procedure_p (proc
), SCM_BOOL_T
),
1742 proc
, SCM_ARG2
, FUNC_NAME
);
1744 /* Check if chr is already in the alist. */
1745 this = scm_i_read_hash_procedures_ref ();
1749 if (scm_is_null (this))
1751 /* not found, so add it to the beginning. */
1752 if (scm_is_true (proc
))
1754 SCM
new = scm_cons (scm_cons (chr
, proc
),
1755 scm_i_read_hash_procedures_ref ());
1756 scm_i_read_hash_procedures_set_x (new);
1760 if (scm_is_eq (chr
, SCM_CAAR (this)))
1762 /* already in the alist. */
1763 if (scm_is_false (proc
))
1766 if (scm_is_false (prev
))
1768 SCM rest
= SCM_CDR (scm_i_read_hash_procedures_ref ());
1769 scm_i_read_hash_procedures_set_x (rest
);
1772 scm_set_cdr_x (prev
, SCM_CDR (this));
1777 scm_set_cdr_x (SCM_CAR (this), proc
);
1782 this = SCM_CDR (this);
1785 return SCM_UNSPECIFIED
;
1789 /* Recover the read-hash procedure corresponding to char c. */
1791 scm_get_hash_procedure (int c
)
1793 SCM rest
= scm_i_read_hash_procedures_ref ();
1797 if (scm_is_null (rest
))
1800 if (SCM_CHAR (SCM_CAAR (rest
)) == c
)
1801 return SCM_CDAR (rest
);
1803 rest
= SCM_CDR (rest
);
1807 #define SCM_ENCODING_SEARCH_SIZE (500)
1809 /* Search the first few hundred characters of a file for an Emacs-like coding
1810 declaration. Returns either NULL or a string whose storage has been
1811 allocated with `scm_gc_malloc ()'. */
1813 scm_i_scan_for_encoding (SCM port
)
1816 char header
[SCM_ENCODING_SEARCH_SIZE
+1];
1817 size_t bytes_read
, encoding_length
, i
;
1818 char *encoding
= NULL
;
1820 char *pos
, *encoding_start
;
1823 pt
= SCM_PTAB_ENTRY (port
);
1825 if (pt
->rw_active
== SCM_PORT_WRITE
)
1829 pt
->rw_active
= SCM_PORT_READ
;
1831 if (pt
->read_pos
== pt
->read_end
)
1833 /* We can use the read buffer, and thus avoid a seek. */
1834 if (scm_fill_input (port
) == EOF
)
1837 bytes_read
= pt
->read_end
- pt
->read_pos
;
1838 if (bytes_read
> SCM_ENCODING_SEARCH_SIZE
)
1839 bytes_read
= SCM_ENCODING_SEARCH_SIZE
;
1841 if (bytes_read
<= 1)
1842 /* An unbuffered port -- don't scan. */
1845 memcpy (header
, pt
->read_pos
, bytes_read
);
1846 header
[bytes_read
] = '\0';
1850 /* Try to read some bytes and then seek back. Not all ports
1851 support seeking back; and indeed some file ports (like
1852 /dev/urandom) will succeed on an lseek (fd, 0, SEEK_CUR)---the
1853 check performed by SCM_FPORT_FDES---but fail to seek
1854 backwards. Hence this block comes second. We prefer to use
1855 the read buffer in-place. */
1856 if (SCM_FPORTP (port
) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port
)))
1859 bytes_read
= scm_c_read (port
, header
, SCM_ENCODING_SEARCH_SIZE
);
1860 header
[bytes_read
] = '\0';
1861 scm_seek (port
, scm_from_int (0), scm_from_int (SEEK_SET
));
1865 && header
[0] == '\xef' && header
[1] == '\xbb' && header
[2] == '\xbf')
1868 /* search past "coding[:=]" */
1872 if ((pos
= strstr(pos
, "coding")) == NULL
)
1875 pos
+= strlen("coding");
1876 if (pos
- header
>= SCM_ENCODING_SEARCH_SIZE
||
1877 (*pos
== ':' || *pos
== '='))
1885 while (pos
- header
<= SCM_ENCODING_SEARCH_SIZE
&&
1886 (*pos
== ' ' || *pos
== '\t'))
1889 /* grab the next token */
1890 encoding_start
= pos
;
1892 while (encoding_start
+ i
- header
<= SCM_ENCODING_SEARCH_SIZE
1893 && encoding_start
+ i
- header
< bytes_read
1894 && (isalnum ((int) encoding_start
[i
])
1895 || strchr ("_-.:/,+=()", encoding_start
[i
]) != NULL
))
1898 encoding_length
= i
;
1899 if (encoding_length
== 0)
1902 encoding
= scm_gc_strndup (encoding_start
, encoding_length
, "encoding");
1903 for (i
= 0; i
< encoding_length
; i
++)
1904 encoding
[i
] = toupper ((int) encoding
[i
]);
1906 /* push backwards to make sure we were in a comment */
1908 pos
= encoding_start
;
1909 while (pos
>= header
)
1916 else if (*pos
== '\n' || pos
== header
)
1918 /* This wasn't in a semicolon comment. Check for a
1919 hash-bang comment. */
1920 char *beg
= strstr (header
, "#!");
1921 char *end
= strstr (header
, "!#");
1922 if (beg
< encoding_start
&& encoding_start
+ encoding_length
<= end
)
1933 /* This wasn't in a comment */
1936 if (utf8_bom
&& strcmp(encoding
, "UTF-8"))
1937 scm_misc_error (NULL
,
1938 "the port input declares the encoding ~s but is encoded as UTF-8",
1939 scm_list_1 (scm_from_locale_string (encoding
)));
1944 SCM_DEFINE (scm_file_encoding
, "file-encoding", 1, 0, 0,
1946 "Scans the port for an Emacs-like character coding declaration\n"
1947 "near the top of the contents of a port with random-accessible contents.\n"
1948 "The coding declaration is of the form\n"
1949 "@code{coding: XXXXX} and must appear in a scheme comment.\n"
1951 "Returns a string containing the character encoding of the file\n"
1952 "if a declaration was found, or @code{#f} otherwise.\n")
1953 #define FUNC_NAME s_scm_file_encoding
1958 SCM_VALIDATE_OPINPORT (SCM_ARG1
, port
);
1960 enc
= scm_i_scan_for_encoding (port
);
1965 s_enc
= scm_from_locale_string (enc
);
1976 SCM read_hash_procs
;
1978 read_hash_procs
= scm_make_fluid_with_default (SCM_EOL
);
1980 scm_i_read_hash_procedures
=
1981 SCM_VARIABLE_LOC (scm_c_define ("%read-hash-procedures", read_hash_procs
));
1983 scm_init_opts (scm_read_options
, scm_read_opts
);
1984 #include "libguile/read.x"