1 /* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
2 * 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
34 #include "libguile/_scm.h"
35 #include "libguile/bytevectors.h"
36 #include "libguile/chars.h"
37 #include "libguile/eval.h"
38 #include "libguile/arrays.h"
39 #include "libguile/bitvectors.h"
40 #include "libguile/keywords.h"
41 #include "libguile/alist.h"
42 #include "libguile/srcprop.h"
43 #include "libguile/hashtab.h"
44 #include "libguile/hash.h"
45 #include "libguile/ports.h"
46 #include "libguile/fports.h"
47 #include "libguile/root.h"
48 #include "libguile/strings.h"
49 #include "libguile/strports.h"
50 #include "libguile/vectors.h"
51 #include "libguile/validate.h"
52 #include "libguile/srfi-4.h"
53 #include "libguile/srfi-13.h"
55 #include "libguile/read.h"
56 #include "libguile/private-options.h"
61 SCM_GLOBAL_SYMBOL (scm_sym_dot
, ".");
62 SCM_SYMBOL (scm_keyword_prefix
, "prefix");
63 SCM_SYMBOL (scm_keyword_postfix
, "postfix");
64 SCM_SYMBOL (sym_nil
, "nil");
66 scm_t_option scm_read_opts
[] = {
67 { SCM_OPTION_BOOLEAN
, "copy", 0,
68 "Copy source code expressions." },
69 { SCM_OPTION_BOOLEAN
, "positions", 1,
70 "Record positions of source code expressions." },
71 { SCM_OPTION_BOOLEAN
, "case-insensitive", 0,
72 "Convert symbols to lower case."},
73 { SCM_OPTION_SCM
, "keywords", (scm_t_bits
) SCM_BOOL_F_BITS
,
74 "Style of keyword recognition: #f, 'prefix or 'postfix."},
75 { SCM_OPTION_BOOLEAN
, "r6rs-hex-escapes", 0,
76 "Use R6RS variable-length character and string hex escapes."},
77 { SCM_OPTION_BOOLEAN
, "square-brackets", 1,
78 "Treat `[' and `]' as parentheses, for R6RS compatibility."},
79 { SCM_OPTION_BOOLEAN
, "hungry-eol-escapes", 0,
80 "In strings, consume leading whitespace after an escaped end-of-line."},
84 /* Internal read options structure. This is initialized by 'scm_read'
85 from the global read options, and a pointer is passed down to all
87 enum t_keyword_style
{
88 KEYWORD_STYLE_HASH_PREFIX
,
94 enum t_keyword_style keyword_style
;
95 unsigned int copy_source_p
: 1;
96 unsigned int record_positions_p
: 1;
97 unsigned int case_insensitive_p
: 1;
98 unsigned int r6rs_escapes_p
: 1;
99 unsigned int square_brackets_p
: 1;
100 unsigned int hungry_eol_escapes_p
: 1;
103 typedef struct t_read_opts scm_t_read_opts
;
105 /* Initialize OPTS from the global read options. */
107 init_read_options (scm_t_read_opts
*opts
)
112 val
= SCM_PACK (SCM_KEYWORD_STYLE
);
113 if (scm_is_eq (val
, scm_keyword_prefix
))
114 x
= KEYWORD_STYLE_PREFIX
;
115 else if (scm_is_eq (val
, scm_keyword_postfix
))
116 x
= KEYWORD_STYLE_POSTFIX
;
118 x
= KEYWORD_STYLE_HASH_PREFIX
;
119 opts
->keyword_style
= x
;
121 #define RESOLVE_BOOLEAN_OPTION(NAME, name) \
122 (opts->name = !!SCM_ ## NAME)
124 RESOLVE_BOOLEAN_OPTION (COPY_SOURCE_P
, copy_source_p
);
125 RESOLVE_BOOLEAN_OPTION (RECORD_POSITIONS_P
, record_positions_p
);
126 RESOLVE_BOOLEAN_OPTION (CASE_INSENSITIVE_P
, case_insensitive_p
);
127 RESOLVE_BOOLEAN_OPTION (R6RS_ESCAPES_P
, r6rs_escapes_p
);
128 RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P
, square_brackets_p
);
129 RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P
, hungry_eol_escapes_p
);
131 #undef RESOLVE_BOOLEAN_OPTION
136 Give meaningful error messages for errors
140 FILE:LINE:COL: MESSAGE
141 This happened in ....
143 This is not standard GNU format, but the test-suite likes the real
144 message to be in front.
150 scm_i_input_error (char const *function
,
151 SCM port
, const char *message
, SCM arg
)
153 SCM fn
= (scm_is_string (SCM_FILENAME(port
))
155 : scm_from_locale_string ("#<unknown port>"));
157 SCM string_port
= scm_open_output_string ();
158 SCM string
= SCM_EOL
;
159 scm_simple_format (string_port
,
160 scm_from_locale_string ("~A:~S:~S: ~A"),
162 scm_from_long (SCM_LINUM (port
) + 1),
163 scm_from_int (SCM_COL (port
) + 1),
164 scm_from_locale_string (message
)));
166 string
= scm_get_output_string (string_port
);
167 scm_close_output_port (string_port
);
168 scm_error_scm (scm_from_latin1_symbol ("read-error"),
169 function
? scm_from_locale_string (function
) : SCM_BOOL_F
,
176 SCM_DEFINE (scm_read_options
, "read-options-interface", 0, 1, 0,
178 "Option interface for the read options. Instead of using\n"
179 "this procedure directly, use the procedures @code{read-enable},\n"
180 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
181 #define FUNC_NAME s_scm_read_options
183 SCM ans
= scm_options (setting
,
186 if (SCM_COPY_SOURCE_P
)
187 SCM_RECORD_POSITIONS_P
= 1;
192 /* A fluid referring to an association list mapping extra hash
193 characters to procedures. */
194 static SCM
*scm_i_read_hash_procedures
;
197 scm_i_read_hash_procedures_ref (void)
199 return scm_fluid_ref (*scm_i_read_hash_procedures
);
203 scm_i_read_hash_procedures_set_x (SCM value
)
205 scm_fluid_set_x (*scm_i_read_hash_procedures
, value
);
212 /* Size of the C buffer used to read symbols and numbers. */
213 #define READER_BUFFER_SIZE 128
215 /* Number of 32-bit codepoints in the buffer used to read strings. */
216 #define READER_STRING_BUFFER_SIZE 128
218 /* The maximum size of Scheme character names. */
219 #define READER_CHAR_NAME_MAX_SIZE 50
222 /* `isblank' is only in C99. */
223 #define CHAR_IS_BLANK_(_chr) \
224 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
225 || ((_chr) == '\f') || ((_chr) == '\r'))
228 # define CHAR_IS_BLANK(_chr) \
229 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
231 # define CHAR_IS_BLANK CHAR_IS_BLANK_
235 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
237 #define CHAR_IS_R5RS_DELIMITER(c) \
239 || (c) == ')' || (c) == '(' || (c) == ';' || (c) == '"')
241 #define CHAR_IS_DELIMITER(c) \
242 (CHAR_IS_R5RS_DELIMITER (c) \
243 || (((c) == ']' || (c) == '[') && opts->square_brackets_p))
245 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
247 #define CHAR_IS_EXPONENT_MARKER(_chr) \
248 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
249 || ((_chr) == 'd') || ((_chr) == 'l'))
251 /* Read an SCSH block comment. */
252 static SCM
scm_read_scsh_block_comment (scm_t_wchar
, SCM
);
253 static SCM
scm_read_r6rs_block_comment (scm_t_wchar
, SCM
);
254 static SCM
scm_read_commented_expression (scm_t_wchar
, SCM
, scm_t_read_opts
*);
255 static SCM
scm_read_shebang (scm_t_wchar
, SCM
, scm_t_read_opts
*);
256 static SCM
scm_get_hash_procedure (int);
258 /* Read from PORT until a delimiter (e.g., a whitespace) is read. Put the
259 result in the pre-allocated buffer BUF. Return zero if the whole token has
260 fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
261 bytes actually read. */
263 read_token (SCM port
, scm_t_read_opts
*opts
,
264 char *buf
, size_t buf_size
, size_t *read
)
268 while (*read
< buf_size
)
272 chr
= scm_get_byte_or_eof (port
);
276 else if (CHAR_IS_DELIMITER (chr
))
278 scm_unget_byte (chr
, port
);
291 /* Like `read_token', but return either BUFFER, or a GC-allocated buffer
292 if the token doesn't fit in BUFFER_SIZE bytes. */
294 read_complete_token (SCM port
, scm_t_read_opts
*opts
,
295 char *buffer
, size_t buffer_size
, size_t *read
)
298 size_t bytes_read
, overflow_size
= 0;
299 char *overflow_buffer
= NULL
;
303 overflow
= read_token (port
, opts
, buffer
, buffer_size
, &bytes_read
);
306 if (overflow
|| overflow_size
!= 0)
308 if (overflow_size
== 0)
310 overflow_buffer
= scm_gc_malloc_pointerless (bytes_read
, "read");
311 memcpy (overflow_buffer
, buffer
, bytes_read
);
312 overflow_size
= bytes_read
;
317 scm_gc_malloc_pointerless (overflow_size
+ bytes_read
, "read");
319 memcpy (new_buf
, overflow_buffer
, overflow_size
);
320 memcpy (new_buf
+ overflow_size
, buffer
, bytes_read
);
322 overflow_buffer
= new_buf
;
323 overflow_size
+= bytes_read
;
330 *read
= overflow_size
;
334 return (overflow_size
> 0 ? overflow_buffer
: buffer
);
337 /* Skip whitespace from PORT and return the first non-whitespace character
338 read. Raise an error on end-of-file. */
340 flush_ws (SCM port
, scm_t_read_opts
*opts
, const char *eoferr
)
344 switch (c
= scm_getc (port
))
350 scm_i_input_error (eoferr
,
359 switch (c
= scm_getc (port
))
365 case SCM_LINE_INCREMENTORS
:
371 switch (c
= scm_getc (port
))
374 eoferr
= "read_sharp";
377 scm_read_shebang (c
, port
, opts
);
380 scm_read_commented_expression (c
, port
, opts
);
383 if (scm_is_false (scm_get_hash_procedure (c
)))
385 scm_read_r6rs_block_comment (c
, port
);
390 scm_ungetc (c
, port
);
395 case SCM_LINE_INCREMENTORS
:
396 case SCM_SINGLE_SPACES
:
411 static SCM
scm_read_expression (SCM port
, scm_t_read_opts
*opts
);
412 static SCM
scm_read_sharp (int chr
, SCM port
, scm_t_read_opts
*opts
,
413 long line
, int column
);
417 maybe_annotate_source (SCM x
, SCM port
, scm_t_read_opts
*opts
,
418 long line
, int column
)
420 if (opts
->record_positions_p
)
421 scm_i_set_source_properties_x (x
, line
, column
, SCM_FILENAME (port
));
426 scm_read_sexp (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
427 #define FUNC_NAME "scm_i_lreadparen"
430 SCM tmp
, tl
, ans
= SCM_EOL
;
431 const int terminating_char
= ((chr
== '[') ? ']' : ')');
433 /* Need to capture line and column numbers here. */
434 long line
= SCM_LINUM (port
);
435 int column
= SCM_COL (port
) - 1;
437 c
= flush_ws (port
, opts
, FUNC_NAME
);
438 if (terminating_char
== c
)
441 scm_ungetc (c
, port
);
442 tmp
= scm_read_expression (port
, opts
);
444 /* Note that it is possible for scm_read_expression to return
445 scm_sym_dot, but not as part of a dotted pair: as in #{.}#. So
446 check that it's a real dot by checking `c'. */
447 if (c
== '.' && scm_is_eq (scm_sym_dot
, tmp
))
449 ans
= scm_read_expression (port
, opts
);
450 if (terminating_char
!= (c
= flush_ws (port
, opts
, FUNC_NAME
)))
451 scm_i_input_error (FUNC_NAME
, port
, "missing close paren",
456 /* Build the head of the list structure. */
457 ans
= tl
= scm_cons (tmp
, SCM_EOL
);
459 while (terminating_char
!= (c
= flush_ws (port
, opts
, FUNC_NAME
)))
463 if (c
== ')' || (c
== ']' && opts
->square_brackets_p
))
464 scm_i_input_error (FUNC_NAME
, port
,
465 "in pair: mismatched close paren: ~A",
466 scm_list_1 (SCM_MAKE_CHAR (c
)));
468 scm_ungetc (c
, port
);
469 tmp
= scm_read_expression (port
, opts
);
471 /* See above note about scm_sym_dot. */
472 if (c
== '.' && scm_is_eq (scm_sym_dot
, tmp
))
474 SCM_SETCDR (tl
, scm_read_expression (port
, opts
));
476 c
= flush_ws (port
, opts
, FUNC_NAME
);
477 if (terminating_char
!= c
)
478 scm_i_input_error (FUNC_NAME
, port
,
479 "in pair: missing close paren", SCM_EOL
);
483 new_tail
= scm_cons (tmp
, SCM_EOL
);
484 SCM_SETCDR (tl
, new_tail
);
489 return maybe_annotate_source (ans
, port
, opts
, line
, column
);
494 /* Read a hexadecimal number NDIGITS in length. Put its value into the variable
495 C. If TERMINATOR is non-null, terminate early if the TERMINATOR character is
497 #define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
503 while (i < ndigits) \
505 a = scm_getc (port); \
509 && (a == (scm_t_wchar) terminator) \
512 if ('0' <= a && a <= '9') \
514 else if ('A' <= a && a <= 'F') \
516 else if ('a' <= a && a <= 'f') \
529 skip_intraline_whitespace (SCM port
)
539 while (c
== '\t' || uc_is_general_category (c
, UC_SPACE_SEPARATOR
));
541 scm_ungetc (c
, port
);
545 scm_read_string (int chr
, SCM port
, scm_t_read_opts
*opts
)
546 #define FUNC_NAME "scm_lreadr"
548 /* For strings smaller than C_STR, this function creates only one Scheme
549 object (the string returned). */
552 size_t c_str_len
= 0;
553 scm_t_wchar c
, c_str
[READER_STRING_BUFFER_SIZE
];
555 /* Need to capture line and column numbers here. */
556 long line
= SCM_LINUM (port
);
557 int column
= SCM_COL (port
) - 1;
559 while ('"' != (c
= scm_getc (port
)))
564 scm_i_input_error (FUNC_NAME
, port
,
565 "end of file in string constant", SCM_EOL
);
568 if (c_str_len
+ 1 >= READER_STRING_BUFFER_SIZE
)
570 str
= scm_cons (scm_from_utf32_stringn (c_str
, c_str_len
), str
);
576 switch (c
= scm_getc (port
))
584 if (opts
->hungry_eol_escapes_p
)
585 skip_intraline_whitespace (port
);
612 if (opts
->r6rs_escapes_p
)
613 SCM_READ_HEX_ESCAPE (10, ';');
615 SCM_READ_HEX_ESCAPE (2, '\0');
618 if (!opts
->r6rs_escapes_p
)
620 SCM_READ_HEX_ESCAPE (4, '\0');
624 if (!opts
->r6rs_escapes_p
)
626 SCM_READ_HEX_ESCAPE (6, '\0');
631 scm_i_input_error (FUNC_NAME
, port
,
632 "illegal character in escape sequence: ~S",
633 scm_list_1 (SCM_MAKE_CHAR (c
)));
637 c_str
[c_str_len
++] = c
;
640 if (scm_is_null (str
))
641 /* Fast path: we got a string that fits in C_STR. */
642 str
= scm_from_utf32_stringn (c_str
, c_str_len
);
646 str
= scm_cons (scm_from_utf32_stringn (c_str
, c_str_len
), str
);
648 str
= scm_string_concatenate_reverse (str
, SCM_UNDEFINED
, SCM_UNDEFINED
);
651 return maybe_annotate_source (str
, port
, opts
, line
, column
);
657 scm_read_number (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
659 SCM result
, str
= SCM_EOL
;
660 char local_buffer
[READER_BUFFER_SIZE
], *buffer
;
662 scm_t_port
*pt
= SCM_PTAB_ENTRY (port
);
664 /* Need to capture line and column numbers here. */
665 long line
= SCM_LINUM (port
);
666 int column
= SCM_COL (port
) - 1;
668 scm_ungetc (chr
, port
);
669 buffer
= read_complete_token (port
, opts
, local_buffer
, sizeof local_buffer
,
672 str
= scm_from_stringn (buffer
, bytes_read
, pt
->encoding
, pt
->ilseq_handler
);
674 result
= scm_string_to_number (str
, SCM_UNDEFINED
);
675 if (scm_is_false (result
))
677 /* Return a symbol instead of a number */
678 if (opts
->case_insensitive_p
)
679 str
= scm_string_downcase_x (str
);
680 result
= scm_string_to_symbol (str
);
682 else if (SCM_NIMP (result
))
683 result
= maybe_annotate_source (result
, port
, opts
, line
, column
);
685 SCM_COL (port
) += scm_i_string_length (str
);
690 scm_read_mixed_case_symbol (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
693 int ends_with_colon
= 0;
695 int postfix
= (opts
->keyword_style
== KEYWORD_STYLE_POSTFIX
);
696 char local_buffer
[READER_BUFFER_SIZE
], *buffer
;
697 scm_t_port
*pt
= SCM_PTAB_ENTRY (port
);
700 scm_ungetc (chr
, port
);
701 buffer
= read_complete_token (port
, opts
, local_buffer
, sizeof local_buffer
,
704 ends_with_colon
= buffer
[bytes_read
- 1] == ':';
706 if (postfix
&& ends_with_colon
&& (bytes_read
> 1))
708 str
= scm_from_stringn (buffer
, bytes_read
- 1,
709 pt
->encoding
, pt
->ilseq_handler
);
711 if (opts
->case_insensitive_p
)
712 str
= scm_string_downcase_x (str
);
713 result
= scm_symbol_to_keyword (scm_string_to_symbol (str
));
717 str
= scm_from_stringn (buffer
, bytes_read
,
718 pt
->encoding
, pt
->ilseq_handler
);
720 if (opts
->case_insensitive_p
)
721 str
= scm_string_downcase_x (str
);
722 result
= scm_string_to_symbol (str
);
725 SCM_COL (port
) += scm_i_string_length (str
);
730 scm_read_number_and_radix (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
731 #define FUNC_NAME "scm_lreadr"
735 char local_buffer
[READER_BUFFER_SIZE
], *buffer
;
763 scm_ungetc (chr
, port
);
764 scm_ungetc ('#', port
);
768 buffer
= read_complete_token (port
, opts
, local_buffer
, sizeof local_buffer
,
771 pt
= SCM_PTAB_ENTRY (port
);
772 str
= scm_from_stringn (buffer
, read
, pt
->encoding
, pt
->ilseq_handler
);
774 result
= scm_string_to_number (str
, scm_from_uint (radix
));
776 SCM_COL (port
) += scm_i_string_length (str
);
778 if (scm_is_true (result
))
781 scm_i_input_error (FUNC_NAME
, port
, "unknown # object", SCM_EOL
);
788 scm_read_quote (int chr
, SCM port
, scm_t_read_opts
*opts
)
791 long line
= SCM_LINUM (port
);
792 int column
= SCM_COL (port
) - 1;
797 p
= scm_sym_quasiquote
;
810 p
= scm_sym_uq_splicing
;
813 scm_ungetc (c
, port
);
820 fprintf (stderr
, "%s: unhandled quote character (%i)\n",
821 "scm_read_quote", chr
);
825 p
= scm_cons2 (p
, scm_read_expression (port
, opts
), SCM_EOL
);
826 return maybe_annotate_source (p
, port
, opts
, line
, column
);
829 SCM_SYMBOL (sym_syntax
, "syntax");
830 SCM_SYMBOL (sym_quasisyntax
, "quasisyntax");
831 SCM_SYMBOL (sym_unsyntax
, "unsyntax");
832 SCM_SYMBOL (sym_unsyntax_splicing
, "unsyntax-splicing");
835 scm_read_syntax (int chr
, SCM port
, scm_t_read_opts
*opts
)
838 long line
= SCM_LINUM (port
);
839 int column
= SCM_COL (port
) - 1;
857 p
= sym_unsyntax_splicing
;
860 scm_ungetc (c
, port
);
867 fprintf (stderr
, "%s: unhandled syntax character (%i)\n",
868 "scm_read_syntax", chr
);
872 p
= scm_cons2 (p
, scm_read_expression (port
, opts
), SCM_EOL
);
873 return maybe_annotate_source (p
, port
, opts
, line
, column
);
877 scm_read_nil (int chr
, SCM port
, scm_t_read_opts
*opts
)
879 SCM id
= scm_read_mixed_case_symbol (chr
, port
, opts
);
881 if (!scm_is_eq (id
, sym_nil
))
882 scm_i_input_error ("scm_read_nil", port
,
883 "unexpected input while reading #nil: ~a",
886 return SCM_ELISP_NIL
;
890 scm_read_semicolon_comment (int chr
, SCM port
)
894 /* We use the get_byte here because there is no need to get the
895 locale correct with comment input. This presumes that newline
896 always represents itself no matter what the encoding is. */
897 for (c
= scm_get_byte_or_eof (port
);
898 (c
!= EOF
) && (c
!= '\n');
899 c
= scm_get_byte_or_eof (port
));
901 return SCM_UNSPECIFIED
;
905 /* Sharp readers, i.e. readers called after a `#' sign has been read. */
908 scm_read_boolean (int chr
, SCM port
)
921 return SCM_UNSPECIFIED
;
925 scm_read_character (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
926 #define FUNC_NAME "scm_lreadr"
928 char buffer
[READER_CHAR_NAME_MAX_SIZE
];
930 size_t charname_len
, bytes_read
;
935 overflow
= read_token (port
, opts
, buffer
, READER_CHAR_NAME_MAX_SIZE
,
938 scm_i_input_error (FUNC_NAME
, port
, "character name too long", SCM_EOL
);
942 chr
= scm_getc (port
);
944 scm_i_input_error (FUNC_NAME
, port
, "unexpected end of file "
945 "while reading character", SCM_EOL
);
947 /* CHR must be a token delimiter, like a whitespace. */
948 return (SCM_MAKE_CHAR (chr
));
951 pt
= SCM_PTAB_ENTRY (port
);
953 /* Simple ASCII characters can be processed immediately. Also, simple
954 ISO-8859-1 characters can be processed immediately if the encoding for this
955 port is ISO-8859-1. */
956 if (bytes_read
== 1 && ((unsigned char) buffer
[0] <= 127 || pt
->encoding
== NULL
))
959 return SCM_MAKE_CHAR (buffer
[0]);
962 /* Otherwise, convert the buffer into a proper scheme string for
964 charname
= scm_from_stringn (buffer
, bytes_read
, pt
->encoding
,
966 charname_len
= scm_i_string_length (charname
);
967 SCM_COL (port
) += charname_len
;
968 cp
= scm_i_string_ref (charname
, 0);
969 if (charname_len
== 1)
970 return SCM_MAKE_CHAR (cp
);
972 /* Ignore dotted circles, which may be used to keep combining characters from
973 combining with the backslash in #\charname. */
974 if (cp
== SCM_CODEPOINT_DOTTED_CIRCLE
&& charname_len
== 2)
975 return SCM_MAKE_CHAR (scm_i_string_ref (charname
, 1));
977 if (cp
>= '0' && cp
< '8')
979 /* Dirk:FIXME:: This type of character syntax is not R5RS
980 * compliant. Further, it should be verified that the constant
981 * does only consist of octal digits. */
982 SCM p
= scm_string_to_number (charname
, scm_from_uint (8));
985 scm_t_wchar c
= scm_to_uint32 (p
);
986 if (SCM_IS_UNICODE_CHAR (c
))
987 return SCM_MAKE_CHAR (c
);
989 scm_i_input_error (FUNC_NAME
, port
,
990 "out-of-range octal character escape: ~a",
991 scm_list_1 (charname
));
995 if (cp
== 'x' && (charname_len
> 1))
999 /* Convert from hex, skipping the initial 'x' character in CHARNAME */
1000 p
= scm_string_to_number (scm_c_substring (charname
, 1, charname_len
),
1001 scm_from_uint (16));
1002 if (SCM_I_INUMP (p
))
1004 scm_t_wchar c
= scm_to_uint32 (p
);
1005 if (SCM_IS_UNICODE_CHAR (c
))
1006 return SCM_MAKE_CHAR (c
);
1008 scm_i_input_error (FUNC_NAME
, port
,
1009 "out-of-range hex character escape: ~a",
1010 scm_list_1 (charname
));
1014 /* The names of characters should never have non-Latin1
1016 if (scm_i_is_narrow_string (charname
)
1017 || scm_i_try_narrow_string (charname
))
1018 { SCM ch
= scm_i_charname_to_char (scm_i_string_chars (charname
),
1020 if (scm_is_true (ch
))
1024 scm_i_input_error (FUNC_NAME
, port
, "unknown character name ~a",
1025 scm_list_1 (charname
));
1027 return SCM_UNSPECIFIED
;
1032 scm_read_keyword (int chr
, SCM port
, scm_t_read_opts
*opts
)
1036 /* Read the symbol that comprises the keyword. Doing this instead of
1037 invoking a specific symbol reader function allows `scm_read_keyword ()'
1038 to adapt to the delimiters currently valid of symbols.
1040 XXX: This implementation allows sloppy syntaxes like `#: key'. */
1041 symbol
= scm_read_expression (port
, opts
);
1042 if (!scm_is_symbol (symbol
))
1043 scm_i_input_error ("scm_read_keyword", port
,
1044 "keyword prefix `~a' not followed by a symbol: ~s",
1045 scm_list_2 (SCM_MAKE_CHAR (chr
), symbol
));
1047 return (scm_symbol_to_keyword (symbol
));
1051 scm_read_vector (int chr
, SCM port
, scm_t_read_opts
*opts
,
1052 long line
, int column
)
1054 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
1055 guarantee that it's going to do what we want. After all, this is an
1056 implementation detail of `scm_read_vector ()', not a desirable
1058 return maybe_annotate_source (scm_vector (scm_read_sexp (chr
, port
, opts
)),
1059 port
, opts
, line
, column
);
1062 /* Helper used by scm_read_array */
1064 read_decimal_integer (SCM port
, int c
, ssize_t
*resp
)
1073 c
= scm_getc (port
);
1076 while ('0' <= c
&& c
<= '9')
1078 res
= 10*res
+ c
-'0';
1080 c
= scm_getc (port
);
1088 /* Read an array. This function can also read vectors and uniform
1089 vectors. Also, the conflict between '#f' and '#f32' and '#f64' is
1092 C is the first character read after the '#'.
1095 scm_read_array (int c
, SCM port
, scm_t_read_opts
*opts
, long line
, int column
)
1098 scm_t_wchar tag_buf
[8];
1101 SCM tag
, shape
= SCM_BOOL_F
, elements
, array
;
1103 /* XXX - shortcut for ordinary vectors. Shouldn't be necessary but
1104 the array code can not deal with zero-length dimensions yet, and
1105 we want to allow zero-length vectors, of course.
1108 return scm_read_vector (c
, port
, opts
, line
, column
);
1110 /* Disambiguate between '#f' and uniform floating point vectors.
1114 c
= scm_getc (port
);
1115 if (c
!= '3' && c
!= '6')
1118 scm_ungetc (c
, port
);
1124 goto continue_reading_tag
;
1129 c
= read_decimal_integer (port
, c
, &rank
);
1131 scm_i_input_error (NULL
, port
, "array rank must be non-negative",
1136 continue_reading_tag
:
1137 while (c
!= EOF
&& c
!= '(' && c
!= '@' && c
!= ':'
1138 && tag_len
< sizeof tag_buf
/ sizeof tag_buf
[0])
1140 tag_buf
[tag_len
++] = c
;
1141 c
= scm_getc (port
);
1147 tag
= scm_string_to_symbol (scm_from_utf32_stringn (tag_buf
, tag_len
));
1148 if (tag_len
== sizeof tag_buf
/ sizeof tag_buf
[0])
1149 scm_i_input_error (NULL
, port
, "invalid array tag, starting with: ~a",
1154 if (c
== '@' || c
== ':')
1160 ssize_t lbnd
= 0, len
= 0;
1165 c
= scm_getc (port
);
1166 c
= read_decimal_integer (port
, c
, &lbnd
);
1169 s
= scm_from_ssize_t (lbnd
);
1173 c
= scm_getc (port
);
1174 c
= read_decimal_integer (port
, c
, &len
);
1176 scm_i_input_error (NULL
, port
,
1177 "array length must be non-negative",
1180 s
= scm_list_2 (s
, scm_from_ssize_t (lbnd
+len
-1));
1183 shape
= scm_cons (s
, shape
);
1184 } while (c
== '@' || c
== ':');
1186 shape
= scm_reverse_x (shape
, SCM_EOL
);
1189 /* Read nested lists of elements. */
1191 scm_i_input_error (NULL
, port
,
1192 "missing '(' in vector or array literal",
1194 elements
= scm_read_sexp (c
, port
, opts
);
1196 if (scm_is_false (shape
))
1197 shape
= scm_from_ssize_t (rank
);
1198 else if (scm_ilength (shape
) != rank
)
1201 "the number of shape specifications must match the array rank",
1204 /* Handle special print syntax of rank zero arrays; see
1205 scm_i_print_array for a rationale. */
1208 if (!scm_is_pair (elements
))
1209 scm_i_input_error (NULL
, port
,
1210 "too few elements in array literal, need 1",
1212 if (!scm_is_null (SCM_CDR (elements
)))
1213 scm_i_input_error (NULL
, port
,
1214 "too many elements in array literal, want 1",
1216 elements
= SCM_CAR (elements
);
1219 /* Construct array, annotate with source location, and return. */
1220 array
= scm_list_to_typed_array (tag
, shape
, elements
);
1221 return maybe_annotate_source (array
, port
, opts
, line
, column
);
1225 scm_read_srfi4_vector (int chr
, SCM port
, scm_t_read_opts
*opts
,
1226 long line
, int column
)
1228 return scm_read_array (chr
, port
, opts
, line
, column
);
1232 scm_read_bytevector (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
,
1233 long line
, int column
)
1235 chr
= scm_getc (port
);
1239 chr
= scm_getc (port
);
1243 chr
= scm_getc (port
);
1247 return maybe_annotate_source
1248 (scm_u8_list_to_bytevector (scm_read_sexp (chr
, port
, opts
)),
1249 port
, opts
, line
, column
);
1252 scm_i_input_error ("read_bytevector", port
,
1253 "invalid bytevector prefix",
1254 SCM_MAKE_CHAR (chr
));
1255 return SCM_UNSPECIFIED
;
1259 scm_read_guile_bit_vector (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
,
1260 long line
, int column
)
1262 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
1263 terribly inefficient but who cares? */
1264 SCM s_bits
= SCM_EOL
;
1266 for (chr
= scm_getc (port
);
1267 (chr
!= EOF
) && ((chr
== '0') || (chr
== '1'));
1268 chr
= scm_getc (port
))
1270 s_bits
= scm_cons ((chr
== '0') ? SCM_BOOL_F
: SCM_BOOL_T
, s_bits
);
1274 scm_ungetc (chr
, port
);
1276 return maybe_annotate_source
1277 (scm_bitvector (scm_reverse_x (s_bits
, SCM_EOL
)),
1278 port
, opts
, line
, column
);
1282 scm_read_scsh_block_comment (scm_t_wchar chr
, SCM port
)
1288 int c
= scm_getc (port
);
1291 scm_i_input_error ("skip_block_comment", port
,
1292 "unterminated `#! ... !#' comment", SCM_EOL
);
1296 else if (c
== '#' && bang_seen
)
1302 return SCM_UNSPECIFIED
;
1306 scm_read_shebang (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
)
1309 if ((c
= scm_get_byte_or_eof (port
)) != 'r')
1311 scm_ungetc (c
, port
);
1312 return scm_read_scsh_block_comment (chr
, port
);
1314 if ((c
= scm_get_byte_or_eof (port
)) != '6')
1316 scm_ungetc (c
, port
);
1317 scm_ungetc ('r', port
);
1318 return scm_read_scsh_block_comment (chr
, port
);
1320 if ((c
= scm_get_byte_or_eof (port
)) != 'r')
1322 scm_ungetc (c
, port
);
1323 scm_ungetc ('6', port
);
1324 scm_ungetc ('r', port
);
1325 return scm_read_scsh_block_comment (chr
, port
);
1327 if ((c
= scm_get_byte_or_eof (port
)) != 's')
1329 scm_ungetc (c
, port
);
1330 scm_ungetc ('r', port
);
1331 scm_ungetc ('6', port
);
1332 scm_ungetc ('r', port
);
1333 return scm_read_scsh_block_comment (chr
, port
);
1336 return SCM_UNSPECIFIED
;
1340 scm_read_r6rs_block_comment (scm_t_wchar chr
, SCM port
)
1342 /* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
1343 nested. So care must be taken. */
1344 int nesting_level
= 1;
1346 int a
= scm_getc (port
);
1349 scm_i_input_error ("scm_read_r6rs_block_comment", port
,
1350 "unterminated `#| ... |#' comment", SCM_EOL
);
1352 while (nesting_level
> 0)
1354 int b
= scm_getc (port
);
1357 scm_i_input_error ("scm_read_r6rs_block_comment", port
,
1358 "unterminated `#| ... |#' comment", SCM_EOL
);
1360 if (a
== '|' && b
== '#')
1365 else if (a
== '#' && b
== '|')
1374 return SCM_UNSPECIFIED
;
1378 scm_read_commented_expression (scm_t_wchar chr
, SCM port
,
1379 scm_t_read_opts
*opts
)
1383 c
= flush_ws (port
, opts
, (char *) NULL
);
1385 scm_i_input_error ("read_commented_expression", port
,
1386 "no expression after #; comment", SCM_EOL
);
1387 scm_ungetc (c
, port
);
1388 scm_read_expression (port
, opts
);
1389 return SCM_UNSPECIFIED
;
1393 scm_read_extended_symbol (scm_t_wchar chr
, SCM port
)
1395 /* Guile's extended symbol read syntax looks like this:
1397 #{This is all a symbol name}#
1399 So here, CHR is expected to be `{'. */
1402 SCM buf
= scm_i_make_string (1024, NULL
, 0);
1404 buf
= scm_i_string_start_writing (buf
);
1406 while ((chr
= scm_getc (port
)) != EOF
)
1417 scm_i_string_set_x (buf
, len
++, '}');
1423 else if (chr
== '\\')
1425 /* It used to be that print.c would print extended-read-syntax
1426 symbols with backslashes before "non-standard" chars, but
1427 this routine wouldn't do anything with those escapes.
1428 Bummer. What we've done is to change print.c to output
1429 R6RS hex escapes for those characters, relying on the fact
1430 that the extended read syntax would never put a `\' before
1431 an `x'. For now, we just ignore other instances of
1432 backslash in the string. */
1433 switch ((chr
= scm_getc (port
)))
1441 SCM_READ_HEX_ESCAPE (10, ';');
1442 scm_i_string_set_x (buf
, len
++, c
);
1450 scm_i_string_stop_writing ();
1451 scm_i_input_error ("scm_read_extended_symbol", port
,
1452 "illegal character in escape sequence: ~S",
1453 scm_list_1 (SCM_MAKE_CHAR (c
)));
1457 scm_i_string_set_x (buf
, len
++, chr
);
1462 scm_i_string_set_x (buf
, len
++, chr
);
1464 if (len
>= scm_i_string_length (buf
) - 2)
1468 scm_i_string_stop_writing ();
1469 addy
= scm_i_make_string (1024, NULL
, 0);
1470 buf
= scm_string_append (scm_list_2 (buf
, addy
));
1472 buf
= scm_i_string_start_writing (buf
);
1477 scm_i_string_stop_writing ();
1479 scm_i_input_error ("scm_read_extended_symbol", port
,
1480 "end of file while reading symbol", SCM_EOL
);
1482 return (scm_string_to_symbol (scm_c_substring (buf
, 0, len
)));
1487 /* Top-level token readers, i.e., dispatchers. */
1490 scm_read_sharp_extension (int chr
, SCM port
, scm_t_read_opts
*opts
)
1494 proc
= scm_get_hash_procedure (chr
);
1495 if (scm_is_true (scm_procedure_p (proc
)))
1497 long line
= SCM_LINUM (port
);
1498 int column
= SCM_COL (port
) - 2;
1501 got
= scm_call_2 (proc
, SCM_MAKE_CHAR (chr
), port
);
1503 if (opts
->record_positions_p
&& SCM_NIMP (got
)
1504 && !scm_i_has_source_properties (got
))
1505 scm_i_set_source_properties_x (got
, line
, column
, SCM_FILENAME (port
));
1510 return SCM_UNSPECIFIED
;
1513 /* The reader for the sharp `#' character. It basically dispatches reads
1514 among the above token readers. */
1516 scm_read_sharp (scm_t_wchar chr
, SCM port
, scm_t_read_opts
*opts
,
1517 long line
, int column
)
1518 #define FUNC_NAME "scm_lreadr"
1522 chr
= scm_getc (port
);
1524 result
= scm_read_sharp_extension (chr
, port
, opts
);
1525 if (!scm_is_eq (result
, SCM_UNSPECIFIED
))
1531 return (scm_read_character (chr
, port
, opts
));
1533 return (scm_read_vector (chr
, port
, opts
, line
, column
));
1538 /* This one may return either a boolean or an SRFI-4 vector. */
1539 return (scm_read_srfi4_vector (chr
, port
, opts
, line
, column
));
1541 return (scm_read_bytevector (chr
, port
, opts
, line
, column
));
1543 return (scm_read_guile_bit_vector (chr
, port
, opts
, line
, column
));
1547 return (scm_read_boolean (chr
, port
));
1549 return (scm_read_keyword (chr
, port
, opts
));
1550 case '0': case '1': case '2': case '3': case '4':
1551 case '5': case '6': case '7': case '8': case '9':
1553 #if SCM_ENABLE_DEPRECATED
1554 /* See below for 'i' and 'e'. */
1560 return (scm_read_array (chr
, port
, opts
, line
, column
));
1564 #if SCM_ENABLE_DEPRECATED
1566 /* When next char is '(', it really is an old-style
1568 scm_t_wchar next_c
= scm_getc (port
);
1570 scm_ungetc (next_c
, port
);
1572 return scm_read_array (chr
, port
, opts
, line
, column
);
1586 return (scm_read_number_and_radix (chr
, port
, opts
));
1588 return (scm_read_extended_symbol (chr
, port
));
1590 return (scm_read_shebang (chr
, port
, opts
));
1592 return (scm_read_commented_expression (chr
, port
, opts
));
1596 return (scm_read_syntax (chr
, port
, opts
));
1598 return (scm_read_nil (chr
, port
, opts
));
1600 result
= scm_read_sharp_extension (chr
, port
, opts
);
1601 if (scm_is_eq (result
, SCM_UNSPECIFIED
))
1603 /* To remain compatible with 1.8 and earlier, the following
1604 characters have lower precedence than `read-hash-extend'
1609 return scm_read_r6rs_block_comment (chr
, port
);
1611 scm_i_input_error (FUNC_NAME
, port
, "Unknown # object: ~S",
1612 scm_list_1 (SCM_MAKE_CHAR (chr
)));
1619 return SCM_UNSPECIFIED
;
1624 scm_read_expression (SCM port
, scm_t_read_opts
*opts
)
1625 #define FUNC_NAME "scm_read_expression"
1631 chr
= scm_getc (port
);
1635 case SCM_WHITE_SPACES
:
1636 case SCM_LINE_INCREMENTORS
:
1639 (void) scm_read_semicolon_comment (chr
, port
);
1642 if (!opts
->square_brackets_p
)
1643 return (scm_read_mixed_case_symbol (chr
, port
, opts
));
1644 /* otherwise fall through */
1646 return (scm_read_sexp (chr
, port
, opts
));
1648 return (scm_read_string (chr
, port
, opts
));
1652 return (scm_read_quote (chr
, port
, opts
));
1655 long line
= SCM_LINUM (port
);
1656 int column
= SCM_COL (port
) - 1;
1657 SCM result
= scm_read_sharp (chr
, port
, opts
, line
, column
);
1658 if (scm_is_eq (result
, SCM_UNSPECIFIED
))
1659 /* We read a comment or some such. */
1665 scm_i_input_error (FUNC_NAME
, port
, "unexpected \")\"", SCM_EOL
);
1668 if (opts
->square_brackets_p
)
1669 scm_i_input_error (FUNC_NAME
, port
, "unexpected \"]\"", SCM_EOL
);
1670 /* otherwise fall through */
1674 if (opts
->keyword_style
== KEYWORD_STYLE_PREFIX
)
1675 return scm_symbol_to_keyword (scm_read_expression (port
, opts
));
1680 if (((chr
>= '0') && (chr
<= '9'))
1681 || (strchr ("+-.", chr
)))
1682 return (scm_read_number (chr
, port
, opts
));
1684 return (scm_read_mixed_case_symbol (chr
, port
, opts
));
1692 /* Actual reader. */
1694 SCM_DEFINE (scm_read
, "read", 0, 1, 0,
1696 "Read an s-expression from the input port @var{port}, or from\n"
1697 "the current input port if @var{port} is not specified.\n"
1698 "Any whitespace before the next token is discarded.")
1699 #define FUNC_NAME s_scm_read
1701 scm_t_read_opts opts
;
1704 if (SCM_UNBNDP (port
))
1705 port
= scm_current_input_port ();
1706 SCM_VALIDATE_OPINPORT (1, port
);
1708 init_read_options (&opts
);
1710 c
= flush_ws (port
, &opts
, (char *) NULL
);
1713 scm_ungetc (c
, port
);
1715 return (scm_read_expression (port
, &opts
));
1722 /* Manipulate the read-hash-procedures alist. This could be written in
1723 Scheme, but maybe it will also be used by C code during initialisation. */
1724 SCM_DEFINE (scm_read_hash_extend
, "read-hash-extend", 2, 0, 0,
1725 (SCM chr
, SCM proc
),
1726 "Install the procedure @var{proc} for reading expressions\n"
1727 "starting with the character sequence @code{#} and @var{chr}.\n"
1728 "@var{proc} will be called with two arguments: the character\n"
1729 "@var{chr} and the port to read further data from. The object\n"
1730 "returned will be the return value of @code{read}. \n"
1731 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1733 #define FUNC_NAME s_scm_read_hash_extend
1738 SCM_VALIDATE_CHAR (1, chr
);
1739 SCM_ASSERT (scm_is_false (proc
)
1740 || scm_is_eq (scm_procedure_p (proc
), SCM_BOOL_T
),
1741 proc
, SCM_ARG2
, FUNC_NAME
);
1743 /* Check if chr is already in the alist. */
1744 this = scm_i_read_hash_procedures_ref ();
1748 if (scm_is_null (this))
1750 /* not found, so add it to the beginning. */
1751 if (scm_is_true (proc
))
1753 SCM
new = scm_cons (scm_cons (chr
, proc
),
1754 scm_i_read_hash_procedures_ref ());
1755 scm_i_read_hash_procedures_set_x (new);
1759 if (scm_is_eq (chr
, SCM_CAAR (this)))
1761 /* already in the alist. */
1762 if (scm_is_false (proc
))
1765 if (scm_is_false (prev
))
1767 SCM rest
= SCM_CDR (scm_i_read_hash_procedures_ref ());
1768 scm_i_read_hash_procedures_set_x (rest
);
1771 scm_set_cdr_x (prev
, SCM_CDR (this));
1776 scm_set_cdr_x (SCM_CAR (this), proc
);
1781 this = SCM_CDR (this);
1784 return SCM_UNSPECIFIED
;
1788 /* Recover the read-hash procedure corresponding to char c. */
1790 scm_get_hash_procedure (int c
)
1792 SCM rest
= scm_i_read_hash_procedures_ref ();
1796 if (scm_is_null (rest
))
1799 if (SCM_CHAR (SCM_CAAR (rest
)) == c
)
1800 return SCM_CDAR (rest
);
1802 rest
= SCM_CDR (rest
);
1806 #define SCM_ENCODING_SEARCH_SIZE (500)
1808 /* Search the first few hundred characters of a file for an Emacs-like coding
1809 declaration. Returns either NULL or a string whose storage has been
1810 allocated with `scm_gc_malloc ()'. */
1812 scm_i_scan_for_encoding (SCM port
)
1815 char header
[SCM_ENCODING_SEARCH_SIZE
+1];
1816 size_t bytes_read
, encoding_length
, i
;
1817 char *encoding
= NULL
;
1819 char *pos
, *encoding_start
;
1822 pt
= SCM_PTAB_ENTRY (port
);
1824 if (pt
->rw_active
== SCM_PORT_WRITE
)
1828 pt
->rw_active
= SCM_PORT_READ
;
1830 if (pt
->read_pos
== pt
->read_end
)
1832 /* We can use the read buffer, and thus avoid a seek. */
1833 if (scm_fill_input (port
) == EOF
)
1836 bytes_read
= pt
->read_end
- pt
->read_pos
;
1837 if (bytes_read
> SCM_ENCODING_SEARCH_SIZE
)
1838 bytes_read
= SCM_ENCODING_SEARCH_SIZE
;
1840 if (bytes_read
<= 1)
1841 /* An unbuffered port -- don't scan. */
1844 memcpy (header
, pt
->read_pos
, bytes_read
);
1845 header
[bytes_read
] = '\0';
1849 /* Try to read some bytes and then seek back. Not all ports
1850 support seeking back; and indeed some file ports (like
1851 /dev/urandom) will succeed on an lseek (fd, 0, SEEK_CUR)---the
1852 check performed by SCM_FPORT_FDES---but fail to seek
1853 backwards. Hence this block comes second. We prefer to use
1854 the read buffer in-place. */
1855 if (SCM_FPORTP (port
) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port
)))
1858 bytes_read
= scm_c_read (port
, header
, SCM_ENCODING_SEARCH_SIZE
);
1859 header
[bytes_read
] = '\0';
1860 scm_seek (port
, scm_from_int (0), scm_from_int (SEEK_SET
));
1864 && header
[0] == '\xef' && header
[1] == '\xbb' && header
[2] == '\xbf')
1867 /* search past "coding[:=]" */
1871 if ((pos
= strstr(pos
, "coding")) == NULL
)
1874 pos
+= strlen("coding");
1875 if (pos
- header
>= SCM_ENCODING_SEARCH_SIZE
||
1876 (*pos
== ':' || *pos
== '='))
1884 while (pos
- header
<= SCM_ENCODING_SEARCH_SIZE
&&
1885 (*pos
== ' ' || *pos
== '\t'))
1888 /* grab the next token */
1889 encoding_start
= pos
;
1891 while (encoding_start
+ i
- header
<= SCM_ENCODING_SEARCH_SIZE
1892 && encoding_start
+ i
- header
< bytes_read
1893 && (isalnum ((int) encoding_start
[i
])
1894 || strchr ("_-.:/,+=()", encoding_start
[i
]) != NULL
))
1897 encoding_length
= i
;
1898 if (encoding_length
== 0)
1901 encoding
= scm_gc_strndup (encoding_start
, encoding_length
, "encoding");
1902 for (i
= 0; i
< encoding_length
; i
++)
1903 encoding
[i
] = toupper ((int) encoding
[i
]);
1905 /* push backwards to make sure we were in a comment */
1907 pos
= encoding_start
;
1908 while (pos
>= header
)
1915 else if (*pos
== '\n' || pos
== header
)
1917 /* This wasn't in a semicolon comment. Check for a
1918 hash-bang comment. */
1919 char *beg
= strstr (header
, "#!");
1920 char *end
= strstr (header
, "!#");
1921 if (beg
< encoding_start
&& encoding_start
+ encoding_length
<= end
)
1932 /* This wasn't in a comment */
1935 if (utf8_bom
&& strcmp(encoding
, "UTF-8"))
1936 scm_misc_error (NULL
,
1937 "the port input declares the encoding ~s but is encoded as UTF-8",
1938 scm_list_1 (scm_from_locale_string (encoding
)));
1943 SCM_DEFINE (scm_file_encoding
, "file-encoding", 1, 0, 0,
1945 "Scans the port for an Emacs-like character coding declaration\n"
1946 "near the top of the contents of a port with random-accessible contents.\n"
1947 "The coding declaration is of the form\n"
1948 "@code{coding: XXXXX} and must appear in a scheme comment.\n"
1950 "Returns a string containing the character encoding of the file\n"
1951 "if a declaration was found, or @code{#f} otherwise.\n")
1952 #define FUNC_NAME s_scm_file_encoding
1957 SCM_VALIDATE_OPINPORT (SCM_ARG1
, port
);
1959 enc
= scm_i_scan_for_encoding (port
);
1964 s_enc
= scm_from_locale_string (enc
);
1975 SCM read_hash_procs
;
1977 read_hash_procs
= scm_make_fluid_with_default (SCM_EOL
);
1979 scm_i_read_hash_procedures
=
1980 SCM_VARIABLE_LOC (scm_c_define ("%read-hash-procedures", read_hash_procs
));
1982 scm_init_opts (scm_read_options
, scm_read_opts
);
1983 #include "libguile/read.x"