Merge remote-tracking branch 'origin/stable-2.0'
[bpt/guile.git] / libguile / read.c
1 /* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
2 * 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20
21 \f
22
23 #ifdef HAVE_CONFIG_H
24 # include <config.h>
25 #endif
26
27 #include <stdio.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <unicase.h>
31 #include <unictype.h>
32
33 #include "libguile/_scm.h"
34 #include "libguile/bytevectors.h"
35 #include "libguile/chars.h"
36 #include "libguile/eval.h"
37 #include "libguile/arrays.h"
38 #include "libguile/bitvectors.h"
39 #include "libguile/keywords.h"
40 #include "libguile/alist.h"
41 #include "libguile/srcprop.h"
42 #include "libguile/hashtab.h"
43 #include "libguile/hash.h"
44 #include "libguile/ports.h"
45 #include "libguile/fports.h"
46 #include "libguile/root.h"
47 #include "libguile/strings.h"
48 #include "libguile/strports.h"
49 #include "libguile/vectors.h"
50 #include "libguile/validate.h"
51 #include "libguile/srfi-4.h"
52 #include "libguile/srfi-13.h"
53
54 #include "libguile/read.h"
55 #include "libguile/private-options.h"
56
57
58 \f
59
60 SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
61 SCM_SYMBOL (scm_keyword_prefix, "prefix");
62 SCM_SYMBOL (scm_keyword_postfix, "postfix");
63 SCM_SYMBOL (sym_nil, "nil");
64
65 /* SRFI-105 curly infix expression support */
66 SCM_SYMBOL (sym_nfx, "$nfx$");
67 SCM_SYMBOL (sym_bracket_list, "$bracket-list$");
68 SCM_SYMBOL (sym_bracket_apply, "$bracket-apply$");
69
70 scm_t_option scm_read_opts[] =
71 {
72 { SCM_OPTION_BOOLEAN, "copy", 0,
73 "Copy source code expressions." },
74 { SCM_OPTION_BOOLEAN, "positions", 1,
75 "Record positions of source code expressions." },
76 { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
77 "Convert symbols to lower case."},
78 { SCM_OPTION_SCM, "keywords", (scm_t_bits) SCM_BOOL_F_BITS,
79 "Style of keyword recognition: #f, 'prefix or 'postfix."},
80 { SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
81 "Use R6RS variable-length character and string hex escapes."},
82 { SCM_OPTION_BOOLEAN, "square-brackets", 1,
83 "Treat `[' and `]' as parentheses, for R6RS compatibility."},
84 { SCM_OPTION_BOOLEAN, "hungry-eol-escapes", 0,
85 "In strings, consume leading whitespace after an escaped end-of-line."},
86 { SCM_OPTION_BOOLEAN, "curly-infix", 0,
87 "Support SRFI-105 curly infix expressions."},
88 { 0, },
89 };
90
91 /* Internal read options structure. This is initialized by 'scm_read'
92 from the global and per-port read options, and a pointer is passed
93 down to all helper functions. */
94
95 enum t_keyword_style
96 {
97 KEYWORD_STYLE_HASH_PREFIX,
98 KEYWORD_STYLE_PREFIX,
99 KEYWORD_STYLE_POSTFIX
100 };
101
102 struct t_read_opts
103 {
104 enum t_keyword_style keyword_style;
105 unsigned int copy_source_p : 1;
106 unsigned int record_positions_p : 1;
107 unsigned int case_insensitive_p : 1;
108 unsigned int r6rs_escapes_p : 1;
109 unsigned int square_brackets_p : 1;
110 unsigned int hungry_eol_escapes_p : 1;
111 unsigned int curly_infix_p : 1;
112 unsigned int neoteric_p : 1;
113 };
114
115 typedef struct t_read_opts scm_t_read_opts;
116
117
118 /*
119 Give meaningful error messages for errors
120
121 We use the format
122
123 FILE:LINE:COL: MESSAGE
124 This happened in ....
125
126 This is not standard GNU format, but the test-suite likes the real
127 message to be in front.
128
129 */
130
131
132 void
133 scm_i_input_error (char const *function,
134 SCM port, const char *message, SCM arg)
135 {
136 SCM fn = (scm_is_string (SCM_FILENAME(port))
137 ? SCM_FILENAME(port)
138 : scm_from_locale_string ("#<unknown port>"));
139
140 SCM string_port = scm_open_output_string ();
141 SCM string = SCM_EOL;
142 scm_simple_format (string_port,
143 scm_from_locale_string ("~A:~S:~S: ~A"),
144 scm_list_4 (fn,
145 scm_from_long (SCM_LINUM (port) + 1),
146 scm_from_int (SCM_COL (port) + 1),
147 scm_from_locale_string (message)));
148
149 string = scm_get_output_string (string_port);
150 scm_close_output_port (string_port);
151 scm_error_scm (scm_from_latin1_symbol ("read-error"),
152 function? scm_from_locale_string (function) : SCM_BOOL_F,
153 string,
154 arg,
155 SCM_BOOL_F);
156 }
157
158
159 SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
160 (SCM setting),
161 "Option interface for the read options. Instead of using\n"
162 "this procedure directly, use the procedures @code{read-enable},\n"
163 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
164 #define FUNC_NAME s_scm_read_options
165 {
166 SCM ans = scm_options (setting,
167 scm_read_opts,
168 FUNC_NAME);
169 if (SCM_COPY_SOURCE_P)
170 SCM_RECORD_POSITIONS_P = 1;
171 return ans;
172 }
173 #undef FUNC_NAME
174
175 /* A fluid referring to an association list mapping extra hash
176 characters to procedures. */
177 static SCM *scm_i_read_hash_procedures;
178
179 static SCM
180 scm_i_read_hash_procedures_ref (void)
181 {
182 return scm_fluid_ref (*scm_i_read_hash_procedures);
183 }
184
185 static void
186 scm_i_read_hash_procedures_set_x (SCM value)
187 {
188 scm_fluid_set_x (*scm_i_read_hash_procedures, value);
189 }
190
191 \f
192 /* Token readers. */
193
194
195 /* Size of the C buffer used to read symbols and numbers. */
196 #define READER_BUFFER_SIZE 128
197
198 /* Number of 32-bit codepoints in the buffer used to read strings. */
199 #define READER_STRING_BUFFER_SIZE 128
200
201 /* The maximum size of Scheme character names. */
202 #define READER_CHAR_NAME_MAX_SIZE 50
203
204 /* The maximum size of reader directive names. */
205 #define READER_DIRECTIVE_NAME_MAX_SIZE 50
206
207
208 /* `isblank' is only in C99. */
209 #define CHAR_IS_BLANK_(_chr) \
210 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
211 || ((_chr) == '\f') || ((_chr) == '\r'))
212
213 #ifdef MSDOS
214 # define CHAR_IS_BLANK(_chr) \
215 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
216 #else
217 # define CHAR_IS_BLANK CHAR_IS_BLANK_
218 #endif
219
220
221 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
222 structure''). */
223 #define CHAR_IS_R5RS_DELIMITER(c) \
224 (CHAR_IS_BLANK (c) \
225 || (c) == ')' || (c) == '(' || (c) == ';' || (c) == '"')
226
227 #define CHAR_IS_DELIMITER(c) \
228 (CHAR_IS_R5RS_DELIMITER (c) \
229 || (((c) == ']' || (c) == '[') && (opts->square_brackets_p \
230 || opts->curly_infix_p)) \
231 || (((c) == '}' || (c) == '{') && opts->curly_infix_p))
232
233 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
234 Structure''. */
235 #define CHAR_IS_EXPONENT_MARKER(_chr) \
236 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
237 || ((_chr) == 'd') || ((_chr) == 'l'))
238
239 /* Read an SCSH block comment. */
240 static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
241 static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM);
242 static SCM scm_read_commented_expression (scm_t_wchar, SCM, scm_t_read_opts *);
243 static SCM scm_read_shebang (scm_t_wchar, SCM, scm_t_read_opts *);
244 static SCM scm_get_hash_procedure (int);
245
246 /* Read from PORT until a delimiter (e.g., a whitespace) is read. Put the
247 result in the pre-allocated buffer BUF. Return zero if the whole token has
248 fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
249 bytes actually read. */
250 static int
251 read_token (SCM port, scm_t_read_opts *opts,
252 char *buf, size_t buf_size, size_t *read)
253 {
254 *read = 0;
255
256 while (*read < buf_size)
257 {
258 int chr;
259
260 chr = scm_get_byte_or_eof_unlocked (port);
261
262 if (chr == EOF)
263 return 0;
264 else if (CHAR_IS_DELIMITER (chr))
265 {
266 scm_unget_byte_unlocked (chr, port);
267 return 0;
268 }
269 else
270 {
271 *buf = (char) chr;
272 buf++, (*read)++;
273 }
274 }
275
276 return 1;
277 }
278
279 /* Like `read_token', but return either BUFFER, or a GC-allocated buffer
280 if the token doesn't fit in BUFFER_SIZE bytes. */
281 static char *
282 read_complete_token (SCM port, scm_t_read_opts *opts,
283 char *buffer, size_t buffer_size, size_t *read)
284 {
285 int overflow = 0;
286 size_t bytes_read, overflow_size = 0;
287 char *overflow_buffer = NULL;
288
289 do
290 {
291 overflow = read_token (port, opts, buffer, buffer_size, &bytes_read);
292 if (bytes_read == 0)
293 break;
294 if (overflow || overflow_size != 0)
295 {
296 if (overflow_size == 0)
297 {
298 overflow_buffer = scm_gc_malloc_pointerless (bytes_read, "read");
299 memcpy (overflow_buffer, buffer, bytes_read);
300 overflow_size = bytes_read;
301 }
302 else
303 {
304 char *new_buf =
305 scm_gc_malloc_pointerless (overflow_size + bytes_read, "read");
306
307 memcpy (new_buf, overflow_buffer, overflow_size);
308 memcpy (new_buf + overflow_size, buffer, bytes_read);
309
310 overflow_buffer = new_buf;
311 overflow_size += bytes_read;
312 }
313 }
314 }
315 while (overflow);
316
317 if (overflow_size)
318 *read = overflow_size;
319 else
320 *read = bytes_read;
321
322 return (overflow_size > 0 ? overflow_buffer : buffer);
323 }
324
325 /* Skip whitespace from PORT and return the first non-whitespace character
326 read. Raise an error on end-of-file. */
327 static int
328 flush_ws (SCM port, scm_t_read_opts *opts, const char *eoferr)
329 {
330 scm_t_wchar c;
331 while (1)
332 switch (c = scm_getc_unlocked (port))
333 {
334 case EOF:
335 goteof:
336 if (eoferr)
337 {
338 scm_i_input_error (eoferr,
339 port,
340 "end of file",
341 SCM_EOL);
342 }
343 return c;
344
345 case ';':
346 lp:
347 switch (c = scm_getc_unlocked (port))
348 {
349 case EOF:
350 goto goteof;
351 default:
352 goto lp;
353 case SCM_LINE_INCREMENTORS:
354 break;
355 }
356 break;
357
358 case '#':
359 switch (c = scm_getc_unlocked (port))
360 {
361 case EOF:
362 eoferr = "read_sharp";
363 goto goteof;
364 case '!':
365 scm_read_shebang (c, port, opts);
366 break;
367 case ';':
368 scm_read_commented_expression (c, port, opts);
369 break;
370 case '|':
371 if (scm_is_false (scm_get_hash_procedure (c)))
372 {
373 scm_read_r6rs_block_comment (c, port);
374 break;
375 }
376 /* fall through */
377 default:
378 scm_ungetc_unlocked (c, port);
379 return '#';
380 }
381 break;
382
383 case SCM_LINE_INCREMENTORS:
384 case SCM_SINGLE_SPACES:
385 case '\t':
386 break;
387
388 default:
389 return c;
390 }
391
392 return 0;
393 }
394
395
396 \f
397 /* Token readers. */
398
399 static SCM scm_read_expression (SCM port, scm_t_read_opts *opts);
400 static SCM scm_read_sharp (int chr, SCM port, scm_t_read_opts *opts,
401 long line, int column);
402
403
404 static SCM
405 maybe_annotate_source (SCM x, SCM port, scm_t_read_opts *opts,
406 long line, int column)
407 {
408 if (opts->record_positions_p)
409 scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port));
410 return x;
411 }
412
413 static SCM
414 scm_read_sexp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
415 #define FUNC_NAME "scm_i_lreadparen"
416 {
417 int c;
418 SCM tmp, tl, ans = SCM_EOL;
419 const int curly_list_p = (chr == '{') && opts->curly_infix_p;
420 const int terminating_char = ((chr == '{') ? '}'
421 : ((chr == '[') ? ']'
422 : ')'));
423
424 /* Need to capture line and column numbers here. */
425 long line = SCM_LINUM (port);
426 int column = SCM_COL (port) - 1;
427
428 c = flush_ws (port, opts, FUNC_NAME);
429 if (terminating_char == c)
430 return SCM_EOL;
431
432 scm_ungetc_unlocked (c, port);
433 tmp = scm_read_expression (port, opts);
434
435 /* Note that it is possible for scm_read_expression to return
436 scm_sym_dot, but not as part of a dotted pair: as in #{.}#. So
437 check that it's a real dot by checking `c'. */
438 if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
439 {
440 ans = scm_read_expression (port, opts);
441 if (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
442 scm_i_input_error (FUNC_NAME, port, "missing close paren",
443 SCM_EOL);
444 return ans;
445 }
446
447 /* Build the head of the list structure. */
448 ans = tl = scm_cons (tmp, SCM_EOL);
449
450 while (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
451 {
452 SCM new_tail;
453
454 if (c == ')' || (c == ']' && opts->square_brackets_p)
455 || ((c == '}' || c == ']') && opts->curly_infix_p))
456 scm_i_input_error (FUNC_NAME, port,
457 "in pair: mismatched close paren: ~A",
458 scm_list_1 (SCM_MAKE_CHAR (c)));
459
460 scm_ungetc_unlocked (c, port);
461 tmp = scm_read_expression (port, opts);
462
463 /* See above note about scm_sym_dot. */
464 if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
465 {
466 SCM_SETCDR (tl, scm_read_expression (port, opts));
467
468 c = flush_ws (port, opts, FUNC_NAME);
469 if (terminating_char != c)
470 scm_i_input_error (FUNC_NAME, port,
471 "in pair: missing close paren", SCM_EOL);
472 break;
473 }
474
475 new_tail = scm_cons (tmp, SCM_EOL);
476 SCM_SETCDR (tl, new_tail);
477 tl = new_tail;
478 }
479
480 if (curly_list_p)
481 {
482 /* In addition to finding the length, 'scm_ilength' checks for
483 improper or circular lists, in which case it returns -1. */
484 int len = scm_ilength (ans);
485
486 /* The (len == 0) case is handled above */
487 if (len == 1)
488 /* Return directly to avoid re-annotating the element's source
489 location with the position of the outer brace. Also, it
490 might not be possible to annotate the element. */
491 return scm_car (ans); /* {e} => e */
492 else if (len == 2)
493 ; /* Leave the list unchanged: {e1 e2} => (e1 e2) */
494 else if (len >= 3 && (len & 1))
495 {
496 /* It's a proper list whose length is odd and at least 3. If
497 the elements at odd indices (the infix operator positions)
498 are all 'equal?', then it's a simple curly-infix list.
499 Otherwise it's a mixed curly-infix list. */
500 SCM op = scm_cadr (ans);
501
502 /* Check to see if the elements at odd indices are 'equal?' */
503 for (tl = scm_cdddr (ans); ; tl = scm_cddr (tl))
504 {
505 if (scm_is_null (tl))
506 {
507 /* Convert simple curly-infix list to prefix:
508 {a <op> b <op> ...} => (<op> a b ...) */
509 tl = ans;
510 while (scm_is_pair (scm_cdr (tl)))
511 {
512 tmp = scm_cddr (tl);
513 SCM_SETCDR (tl, tmp);
514 tl = tmp;
515 }
516 ans = scm_cons (op, ans);
517 break;
518 }
519 else if (scm_is_false (scm_equal_p (op, scm_car (tl))))
520 {
521 /* Mixed curly-infix list: {e ...} => ($nfx$ e ...) */
522 ans = scm_cons (sym_nfx, ans);
523 break;
524 }
525 }
526 }
527 else
528 /* Mixed curly-infix (possibly improper) list:
529 {e . tail} => ($nfx$ e . tail) */
530 ans = scm_cons (sym_nfx, ans);
531 }
532
533 return maybe_annotate_source (ans, port, opts, line, column);
534 }
535 #undef FUNC_NAME
536
537
538 /* Read a hexadecimal number NDIGITS in length. Put its value into the variable
539 C. If TERMINATOR is non-null, terminate early if the TERMINATOR character is
540 found. */
541 #define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
542 do \
543 { \
544 scm_t_wchar a; \
545 size_t i = 0; \
546 c = 0; \
547 while (i < ndigits) \
548 { \
549 a = scm_getc_unlocked (port); \
550 if (a == EOF) \
551 goto str_eof; \
552 if (terminator \
553 && (a == (scm_t_wchar) terminator) \
554 && (i > 0)) \
555 break; \
556 if ('0' <= a && a <= '9') \
557 a -= '0'; \
558 else if ('A' <= a && a <= 'F') \
559 a = a - 'A' + 10; \
560 else if ('a' <= a && a <= 'f') \
561 a = a - 'a' + 10; \
562 else \
563 { \
564 c = a; \
565 goto bad_escaped; \
566 } \
567 c = c * 16 + a; \
568 i ++; \
569 } \
570 } while (0)
571
572 static void
573 skip_intraline_whitespace (SCM port)
574 {
575 scm_t_wchar c;
576
577 do
578 {
579 c = scm_getc_unlocked (port);
580 if (c == EOF)
581 return;
582 }
583 while (c == '\t' || uc_is_general_category (c, UC_SPACE_SEPARATOR));
584
585 scm_ungetc_unlocked (c, port);
586 }
587
588 static SCM
589 scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
590 #define FUNC_NAME "scm_lreadr"
591 {
592 /* For strings smaller than C_STR, this function creates only one Scheme
593 object (the string returned). */
594
595 SCM str = SCM_EOL;
596 size_t c_str_len = 0;
597 scm_t_wchar c, c_str[READER_STRING_BUFFER_SIZE];
598
599 /* Need to capture line and column numbers here. */
600 long line = SCM_LINUM (port);
601 int column = SCM_COL (port) - 1;
602
603 while ('"' != (c = scm_getc_unlocked (port)))
604 {
605 if (c == EOF)
606 {
607 str_eof:
608 scm_i_input_error (FUNC_NAME, port,
609 "end of file in string constant", SCM_EOL);
610 }
611
612 if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE)
613 {
614 str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
615 c_str_len = 0;
616 }
617
618 if (c == '\\')
619 {
620 switch (c = scm_getc_unlocked (port))
621 {
622 case EOF:
623 goto str_eof;
624 case '"':
625 case '\\':
626 break;
627 case '\n':
628 if (opts->hungry_eol_escapes_p)
629 skip_intraline_whitespace (port);
630 continue;
631 case '0':
632 c = '\0';
633 break;
634 case 'f':
635 c = '\f';
636 break;
637 case 'n':
638 c = '\n';
639 break;
640 case 'r':
641 c = '\r';
642 break;
643 case 't':
644 c = '\t';
645 break;
646 case 'a':
647 c = '\007';
648 break;
649 case 'v':
650 c = '\v';
651 break;
652 case 'b':
653 c = '\010';
654 break;
655 case 'x':
656 if (opts->r6rs_escapes_p)
657 SCM_READ_HEX_ESCAPE (10, ';');
658 else
659 SCM_READ_HEX_ESCAPE (2, '\0');
660 break;
661 case 'u':
662 if (!opts->r6rs_escapes_p)
663 {
664 SCM_READ_HEX_ESCAPE (4, '\0');
665 break;
666 }
667 case 'U':
668 if (!opts->r6rs_escapes_p)
669 {
670 SCM_READ_HEX_ESCAPE (6, '\0');
671 break;
672 }
673 default:
674 bad_escaped:
675 scm_i_input_error (FUNC_NAME, port,
676 "illegal character in escape sequence: ~S",
677 scm_list_1 (SCM_MAKE_CHAR (c)));
678 }
679 }
680
681 c_str[c_str_len++] = c;
682 }
683
684 if (scm_is_null (str))
685 /* Fast path: we got a string that fits in C_STR. */
686 str = scm_from_utf32_stringn (c_str, c_str_len);
687 else
688 {
689 if (c_str_len > 0)
690 str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
691
692 str = scm_string_concatenate_reverse (str, SCM_UNDEFINED, SCM_UNDEFINED);
693 }
694
695 return maybe_annotate_source (str, port, opts, line, column);
696 }
697 #undef FUNC_NAME
698
699
700 static SCM
701 scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
702 {
703 SCM result, str = SCM_EOL;
704 char local_buffer[READER_BUFFER_SIZE], *buffer;
705 size_t bytes_read;
706
707 /* Need to capture line and column numbers here. */
708 long line = SCM_LINUM (port);
709 int column = SCM_COL (port) - 1;
710
711 scm_ungetc_unlocked (chr, port);
712 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
713 &bytes_read);
714
715 str = scm_from_port_stringn (buffer, bytes_read, port);
716
717 result = scm_string_to_number (str, SCM_UNDEFINED);
718 if (scm_is_false (result))
719 {
720 /* Return a symbol instead of a number */
721 if (opts->case_insensitive_p)
722 str = scm_string_downcase_x (str);
723 result = scm_string_to_symbol (str);
724 }
725 else if (SCM_NIMP (result))
726 result = maybe_annotate_source (result, port, opts, line, column);
727
728 SCM_COL (port) += scm_i_string_length (str);
729 return result;
730 }
731
732 static SCM
733 scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
734 {
735 SCM result;
736 int ends_with_colon = 0;
737 size_t bytes_read;
738 int postfix = (opts->keyword_style == KEYWORD_STYLE_POSTFIX);
739 char local_buffer[READER_BUFFER_SIZE], *buffer;
740 SCM str;
741
742 scm_ungetc_unlocked (chr, port);
743 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
744 &bytes_read);
745 if (bytes_read > 0)
746 ends_with_colon = buffer[bytes_read - 1] == ':';
747
748 if (postfix && ends_with_colon && (bytes_read > 1))
749 {
750 str = scm_from_port_stringn (buffer, bytes_read - 1, port);
751
752 if (opts->case_insensitive_p)
753 str = scm_string_downcase_x (str);
754 result = scm_symbol_to_keyword (scm_string_to_symbol (str));
755 }
756 else
757 {
758 str = scm_from_port_stringn (buffer, bytes_read, port);
759
760 if (opts->case_insensitive_p)
761 str = scm_string_downcase_x (str);
762 result = scm_string_to_symbol (str);
763 }
764
765 SCM_COL (port) += scm_i_string_length (str);
766 return result;
767 }
768
769 static SCM
770 scm_read_number_and_radix (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
771 #define FUNC_NAME "scm_lreadr"
772 {
773 SCM result;
774 size_t read;
775 char local_buffer[READER_BUFFER_SIZE], *buffer;
776 unsigned int radix;
777 SCM str;
778
779 switch (chr)
780 {
781 case 'B':
782 case 'b':
783 radix = 2;
784 break;
785
786 case 'o':
787 case 'O':
788 radix = 8;
789 break;
790
791 case 'd':
792 case 'D':
793 radix = 10;
794 break;
795
796 case 'x':
797 case 'X':
798 radix = 16;
799 break;
800
801 default:
802 scm_ungetc_unlocked (chr, port);
803 scm_ungetc_unlocked ('#', port);
804 radix = 10;
805 }
806
807 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
808 &read);
809
810 str = scm_from_port_stringn (buffer, read, port);
811
812 result = scm_string_to_number (str, scm_from_uint (radix));
813
814 SCM_COL (port) += scm_i_string_length (str);
815
816 if (scm_is_true (result))
817 return result;
818
819 scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
820
821 return SCM_BOOL_F;
822 }
823 #undef FUNC_NAME
824
825 static SCM
826 scm_read_quote (int chr, SCM port, scm_t_read_opts *opts)
827 {
828 SCM p;
829 long line = SCM_LINUM (port);
830 int column = SCM_COL (port) - 1;
831
832 switch (chr)
833 {
834 case '`':
835 p = scm_sym_quasiquote;
836 break;
837
838 case '\'':
839 p = scm_sym_quote;
840 break;
841
842 case ',':
843 {
844 scm_t_wchar c;
845
846 c = scm_getc_unlocked (port);
847 if ('@' == c)
848 p = scm_sym_uq_splicing;
849 else
850 {
851 scm_ungetc_unlocked (c, port);
852 p = scm_sym_unquote;
853 }
854 break;
855 }
856
857 default:
858 fprintf (stderr, "%s: unhandled quote character (%i)\n",
859 "scm_read_quote", chr);
860 abort ();
861 }
862
863 p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
864 return maybe_annotate_source (p, port, opts, line, column);
865 }
866
867 SCM_SYMBOL (sym_syntax, "syntax");
868 SCM_SYMBOL (sym_quasisyntax, "quasisyntax");
869 SCM_SYMBOL (sym_unsyntax, "unsyntax");
870 SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing");
871
872 static SCM
873 scm_read_syntax (int chr, SCM port, scm_t_read_opts *opts)
874 {
875 SCM p;
876 long line = SCM_LINUM (port);
877 int column = SCM_COL (port) - 1;
878
879 switch (chr)
880 {
881 case '`':
882 p = sym_quasisyntax;
883 break;
884
885 case '\'':
886 p = sym_syntax;
887 break;
888
889 case ',':
890 {
891 int c;
892
893 c = scm_getc_unlocked (port);
894 if ('@' == c)
895 p = sym_unsyntax_splicing;
896 else
897 {
898 scm_ungetc_unlocked (c, port);
899 p = sym_unsyntax;
900 }
901 break;
902 }
903
904 default:
905 fprintf (stderr, "%s: unhandled syntax character (%i)\n",
906 "scm_read_syntax", chr);
907 abort ();
908 }
909
910 p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
911 return maybe_annotate_source (p, port, opts, line, column);
912 }
913
914 static SCM
915 scm_read_nil (int chr, SCM port, scm_t_read_opts *opts)
916 {
917 SCM id = scm_read_mixed_case_symbol (chr, port, opts);
918
919 if (!scm_is_eq (id, sym_nil))
920 scm_i_input_error ("scm_read_nil", port,
921 "unexpected input while reading #nil: ~a",
922 scm_list_1 (id));
923
924 return SCM_ELISP_NIL;
925 }
926
927 static SCM
928 scm_read_semicolon_comment (int chr, SCM port)
929 {
930 int c;
931
932 /* We use the get_byte here because there is no need to get the
933 locale correct with comment input. This presumes that newline
934 always represents itself no matter what the encoding is. */
935 for (c = scm_get_byte_or_eof_unlocked (port);
936 (c != EOF) && (c != '\n');
937 c = scm_get_byte_or_eof_unlocked (port));
938
939 return SCM_UNSPECIFIED;
940 }
941
942 \f
943 /* Sharp readers, i.e. readers called after a `#' sign has been read. */
944
945 static SCM
946 scm_read_boolean (int chr, SCM port)
947 {
948 switch (chr)
949 {
950 case 't':
951 case 'T':
952 return SCM_BOOL_T;
953
954 case 'f':
955 case 'F':
956 return SCM_BOOL_F;
957 }
958
959 return SCM_UNSPECIFIED;
960 }
961
962 static SCM
963 scm_read_character (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
964 #define FUNC_NAME "scm_lreadr"
965 {
966 char buffer[READER_CHAR_NAME_MAX_SIZE];
967 SCM charname;
968 size_t charname_len, bytes_read;
969 scm_t_wchar cp;
970 int overflow;
971 scm_t_port *pt;
972
973 overflow = read_token (port, opts, buffer, READER_CHAR_NAME_MAX_SIZE,
974 &bytes_read);
975 if (overflow)
976 scm_i_input_error (FUNC_NAME, port, "character name too long", SCM_EOL);
977
978 if (bytes_read == 0)
979 {
980 chr = scm_getc_unlocked (port);
981 if (chr == EOF)
982 scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
983 "while reading character", SCM_EOL);
984
985 /* CHR must be a token delimiter, like a whitespace. */
986 return (SCM_MAKE_CHAR (chr));
987 }
988
989 pt = SCM_PTAB_ENTRY (port);
990
991 /* Simple ASCII characters can be processed immediately. Also, simple
992 ISO-8859-1 characters can be processed immediately if the encoding for this
993 port is ISO-8859-1. */
994 if (bytes_read == 1 &&
995 ((unsigned char) buffer[0] <= 127
996 || pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1))
997 {
998 SCM_COL (port) += 1;
999 return SCM_MAKE_CHAR (buffer[0]);
1000 }
1001
1002 /* Otherwise, convert the buffer into a proper scheme string for
1003 processing. */
1004 charname = scm_from_port_stringn (buffer, bytes_read, port);
1005 charname_len = scm_i_string_length (charname);
1006 SCM_COL (port) += charname_len;
1007 cp = scm_i_string_ref (charname, 0);
1008 if (charname_len == 1)
1009 return SCM_MAKE_CHAR (cp);
1010
1011 /* Ignore dotted circles, which may be used to keep combining characters from
1012 combining with the backslash in #\charname. */
1013 if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2)
1014 return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1));
1015
1016 if (cp >= '0' && cp < '8')
1017 {
1018 /* Dirk:FIXME:: This type of character syntax is not R5RS
1019 * compliant. Further, it should be verified that the constant
1020 * does only consist of octal digits. */
1021 SCM p = scm_string_to_number (charname, scm_from_uint (8));
1022 if (SCM_I_INUMP (p))
1023 {
1024 scm_t_wchar c = scm_to_uint32 (p);
1025 if (SCM_IS_UNICODE_CHAR (c))
1026 return SCM_MAKE_CHAR (c);
1027 else
1028 scm_i_input_error (FUNC_NAME, port,
1029 "out-of-range octal character escape: ~a",
1030 scm_list_1 (charname));
1031 }
1032 }
1033
1034 if (cp == 'x' && (charname_len > 1))
1035 {
1036 SCM p;
1037
1038 /* Convert from hex, skipping the initial 'x' character in CHARNAME */
1039 p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
1040 scm_from_uint (16));
1041 if (SCM_I_INUMP (p))
1042 {
1043 scm_t_wchar c = scm_to_uint32 (p);
1044 if (SCM_IS_UNICODE_CHAR (c))
1045 return SCM_MAKE_CHAR (c);
1046 else
1047 scm_i_input_error (FUNC_NAME, port,
1048 "out-of-range hex character escape: ~a",
1049 scm_list_1 (charname));
1050 }
1051 }
1052
1053 /* The names of characters should never have non-Latin1
1054 characters. */
1055 if (scm_i_is_narrow_string (charname)
1056 || scm_i_try_narrow_string (charname))
1057 { SCM ch = scm_i_charname_to_char (scm_i_string_chars (charname),
1058 charname_len);
1059 if (scm_is_true (ch))
1060 return ch;
1061 }
1062
1063 scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
1064 scm_list_1 (charname));
1065
1066 return SCM_UNSPECIFIED;
1067 }
1068 #undef FUNC_NAME
1069
1070 static SCM
1071 scm_read_keyword (int chr, SCM port, scm_t_read_opts *opts)
1072 {
1073 SCM symbol;
1074
1075 /* Read the symbol that comprises the keyword. Doing this instead of
1076 invoking a specific symbol reader function allows `scm_read_keyword ()'
1077 to adapt to the delimiters currently valid of symbols.
1078
1079 XXX: This implementation allows sloppy syntaxes like `#: key'. */
1080 symbol = scm_read_expression (port, opts);
1081 if (!scm_is_symbol (symbol))
1082 scm_i_input_error ("scm_read_keyword", port,
1083 "keyword prefix `~a' not followed by a symbol: ~s",
1084 scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
1085
1086 return (scm_symbol_to_keyword (symbol));
1087 }
1088
1089 static SCM
1090 scm_read_vector (int chr, SCM port, scm_t_read_opts *opts,
1091 long line, int column)
1092 {
1093 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
1094 guarantee that it's going to do what we want. After all, this is an
1095 implementation detail of `scm_read_vector ()', not a desirable
1096 property. */
1097 return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port, opts)),
1098 port, opts, line, column);
1099 }
1100
1101 /* Helper used by scm_read_array */
1102 static int
1103 read_decimal_integer (SCM port, int c, ssize_t *resp)
1104 {
1105 ssize_t sign = 1;
1106 ssize_t res = 0;
1107 int got_it = 0;
1108
1109 if (c == '-')
1110 {
1111 sign = -1;
1112 c = scm_getc_unlocked (port);
1113 }
1114
1115 while ('0' <= c && c <= '9')
1116 {
1117 res = 10*res + c-'0';
1118 got_it = 1;
1119 c = scm_getc_unlocked (port);
1120 }
1121
1122 if (got_it)
1123 *resp = sign * res;
1124 return c;
1125 }
1126
1127 /* Read an array. This function can also read vectors and uniform
1128 vectors. Also, the conflict between '#f' and '#f32' and '#f64' is
1129 handled here.
1130
1131 C is the first character read after the '#'. */
1132 static SCM
1133 scm_read_array (int c, SCM port, scm_t_read_opts *opts, long line, int column)
1134 {
1135 ssize_t rank;
1136 scm_t_wchar tag_buf[8];
1137 int tag_len;
1138
1139 SCM tag, shape = SCM_BOOL_F, elements, array;
1140
1141 /* XXX - shortcut for ordinary vectors. Shouldn't be necessary but
1142 the array code can not deal with zero-length dimensions yet, and
1143 we want to allow zero-length vectors, of course. */
1144 if (c == '(')
1145 return scm_read_vector (c, port, opts, line, column);
1146
1147 /* Disambiguate between '#f' and uniform floating point vectors. */
1148 if (c == 'f')
1149 {
1150 c = scm_getc_unlocked (port);
1151 if (c != '3' && c != '6')
1152 {
1153 if (c != EOF)
1154 scm_ungetc_unlocked (c, port);
1155 return SCM_BOOL_F;
1156 }
1157 rank = 1;
1158 tag_buf[0] = 'f';
1159 tag_len = 1;
1160 goto continue_reading_tag;
1161 }
1162
1163 /* Read rank. */
1164 rank = 1;
1165 c = read_decimal_integer (port, c, &rank);
1166 if (rank < 0)
1167 scm_i_input_error (NULL, port, "array rank must be non-negative",
1168 SCM_EOL);
1169
1170 /* Read tag. */
1171 tag_len = 0;
1172 continue_reading_tag:
1173 while (c != EOF && c != '(' && c != '@' && c != ':'
1174 && tag_len < sizeof tag_buf / sizeof tag_buf[0])
1175 {
1176 tag_buf[tag_len++] = c;
1177 c = scm_getc_unlocked (port);
1178 }
1179 if (tag_len == 0)
1180 tag = SCM_BOOL_T;
1181 else
1182 {
1183 tag = scm_string_to_symbol (scm_from_utf32_stringn (tag_buf, tag_len));
1184 if (tag_len == sizeof tag_buf / sizeof tag_buf[0])
1185 scm_i_input_error (NULL, port, "invalid array tag, starting with: ~a",
1186 scm_list_1 (tag));
1187 }
1188
1189 /* Read shape. */
1190 if (c == '@' || c == ':')
1191 {
1192 shape = SCM_EOL;
1193
1194 do
1195 {
1196 ssize_t lbnd = 0, len = 0;
1197 SCM s;
1198
1199 if (c == '@')
1200 {
1201 c = scm_getc_unlocked (port);
1202 c = read_decimal_integer (port, c, &lbnd);
1203 }
1204
1205 s = scm_from_ssize_t (lbnd);
1206
1207 if (c == ':')
1208 {
1209 c = scm_getc_unlocked (port);
1210 c = read_decimal_integer (port, c, &len);
1211 if (len < 0)
1212 scm_i_input_error (NULL, port,
1213 "array length must be non-negative",
1214 SCM_EOL);
1215
1216 s = scm_list_2 (s, scm_from_ssize_t (lbnd+len-1));
1217 }
1218
1219 shape = scm_cons (s, shape);
1220 } while (c == '@' || c == ':');
1221
1222 shape = scm_reverse_x (shape, SCM_EOL);
1223 }
1224
1225 /* Read nested lists of elements. */
1226 if (c != '(')
1227 scm_i_input_error (NULL, port,
1228 "missing '(' in vector or array literal",
1229 SCM_EOL);
1230 elements = scm_read_sexp (c, port, opts);
1231
1232 if (scm_is_false (shape))
1233 shape = scm_from_ssize_t (rank);
1234 else if (scm_ilength (shape) != rank)
1235 scm_i_input_error
1236 (NULL, port,
1237 "the number of shape specifications must match the array rank",
1238 SCM_EOL);
1239
1240 /* Handle special print syntax of rank zero arrays; see
1241 scm_i_print_array for a rationale. */
1242 if (rank == 0)
1243 {
1244 if (!scm_is_pair (elements))
1245 scm_i_input_error (NULL, port,
1246 "too few elements in array literal, need 1",
1247 SCM_EOL);
1248 if (!scm_is_null (SCM_CDR (elements)))
1249 scm_i_input_error (NULL, port,
1250 "too many elements in array literal, want 1",
1251 SCM_EOL);
1252 elements = SCM_CAR (elements);
1253 }
1254
1255 /* Construct array, annotate with source location, and return. */
1256 array = scm_list_to_typed_array (tag, shape, elements);
1257 return maybe_annotate_source (array, port, opts, line, column);
1258 }
1259
1260 static SCM
1261 scm_read_srfi4_vector (int chr, SCM port, scm_t_read_opts *opts,
1262 long line, int column)
1263 {
1264 return scm_read_array (chr, port, opts, line, column);
1265 }
1266
1267 static SCM
1268 scm_read_bytevector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1269 long line, int column)
1270 {
1271 chr = scm_getc_unlocked (port);
1272 if (chr != 'u')
1273 goto syntax;
1274
1275 chr = scm_getc_unlocked (port);
1276 if (chr != '8')
1277 goto syntax;
1278
1279 chr = scm_getc_unlocked (port);
1280 if (chr != '(')
1281 goto syntax;
1282
1283 return maybe_annotate_source
1284 (scm_u8_list_to_bytevector (scm_read_sexp (chr, port, opts)),
1285 port, opts, line, column);
1286
1287 syntax:
1288 scm_i_input_error ("read_bytevector", port,
1289 "invalid bytevector prefix",
1290 SCM_MAKE_CHAR (chr));
1291 return SCM_UNSPECIFIED;
1292 }
1293
1294 static SCM
1295 scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1296 long line, int column)
1297 {
1298 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
1299 terribly inefficient but who cares? */
1300 SCM s_bits = SCM_EOL;
1301
1302 for (chr = scm_getc_unlocked (port);
1303 (chr != EOF) && ((chr == '0') || (chr == '1'));
1304 chr = scm_getc_unlocked (port))
1305 {
1306 s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
1307 }
1308
1309 if (chr != EOF)
1310 scm_ungetc_unlocked (chr, port);
1311
1312 return maybe_annotate_source
1313 (scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)),
1314 port, opts, line, column);
1315 }
1316
1317 static SCM
1318 scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
1319 {
1320 int bang_seen = 0;
1321
1322 for (;;)
1323 {
1324 int c = scm_getc_unlocked (port);
1325
1326 if (c == EOF)
1327 scm_i_input_error ("skip_block_comment", port,
1328 "unterminated `#! ... !#' comment", SCM_EOL);
1329
1330 if (c == '!')
1331 bang_seen = 1;
1332 else if (c == '#' && bang_seen)
1333 break;
1334 else
1335 bang_seen = 0;
1336 }
1337
1338 return SCM_UNSPECIFIED;
1339 }
1340
1341 static void set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts,
1342 int value);
1343 static void set_port_square_brackets_p (SCM port, scm_t_read_opts *opts,
1344 int value);
1345 static void set_port_curly_infix_p (SCM port, scm_t_read_opts *opts,
1346 int value);
1347
1348 static SCM
1349 scm_read_shebang (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
1350 {
1351 char name[READER_DIRECTIVE_NAME_MAX_SIZE + 1];
1352 int c;
1353 int i = 0;
1354
1355 while (i <= READER_DIRECTIVE_NAME_MAX_SIZE)
1356 {
1357 c = scm_getc_unlocked (port);
1358 if (c == EOF)
1359 scm_i_input_error ("skip_block_comment", port,
1360 "unterminated `#! ... !#' comment", SCM_EOL);
1361 else if (('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || c == '-')
1362 name[i++] = c;
1363 else if (CHAR_IS_DELIMITER (c))
1364 {
1365 scm_ungetc_unlocked (c, port);
1366 name[i] = '\0';
1367 if (0 == strcmp ("r6rs", name))
1368 ; /* Silently ignore */
1369 else if (0 == strcmp ("fold-case", name))
1370 set_port_case_insensitive_p (port, opts, 1);
1371 else if (0 == strcmp ("no-fold-case", name))
1372 set_port_case_insensitive_p (port, opts, 0);
1373 else if (0 == strcmp ("curly-infix", name))
1374 set_port_curly_infix_p (port, opts, 1);
1375 else if (0 == strcmp ("curly-infix-and-bracket-lists", name))
1376 {
1377 set_port_curly_infix_p (port, opts, 1);
1378 set_port_square_brackets_p (port, opts, 0);
1379 }
1380 else
1381 break;
1382
1383 return SCM_UNSPECIFIED;
1384 }
1385 else
1386 {
1387 scm_ungetc_unlocked (c, port);
1388 break;
1389 }
1390 }
1391 while (i > 0)
1392 scm_ungetc_unlocked (name[--i], port);
1393 return scm_read_scsh_block_comment (chr, port);
1394 }
1395
1396 static SCM
1397 scm_read_r6rs_block_comment (scm_t_wchar chr, SCM port)
1398 {
1399 /* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
1400 nested. So care must be taken. */
1401 int nesting_level = 1;
1402
1403 int a = scm_getc_unlocked (port);
1404
1405 if (a == EOF)
1406 scm_i_input_error ("scm_read_r6rs_block_comment", port,
1407 "unterminated `#| ... |#' comment", SCM_EOL);
1408
1409 while (nesting_level > 0)
1410 {
1411 int b = scm_getc_unlocked (port);
1412
1413 if (b == EOF)
1414 scm_i_input_error ("scm_read_r6rs_block_comment", port,
1415 "unterminated `#| ... |#' comment", SCM_EOL);
1416
1417 if (a == '|' && b == '#')
1418 {
1419 nesting_level--;
1420 b = EOF;
1421 }
1422 else if (a == '#' && b == '|')
1423 {
1424 nesting_level++;
1425 b = EOF;
1426 }
1427
1428 a = b;
1429 }
1430
1431 return SCM_UNSPECIFIED;
1432 }
1433
1434 static SCM
1435 scm_read_commented_expression (scm_t_wchar chr, SCM port,
1436 scm_t_read_opts *opts)
1437 {
1438 scm_t_wchar c;
1439
1440 c = flush_ws (port, opts, (char *) NULL);
1441 if (EOF == c)
1442 scm_i_input_error ("read_commented_expression", port,
1443 "no expression after #; comment", SCM_EOL);
1444 scm_ungetc_unlocked (c, port);
1445 scm_read_expression (port, opts);
1446 return SCM_UNSPECIFIED;
1447 }
1448
1449 static SCM
1450 scm_read_extended_symbol (scm_t_wchar chr, SCM port)
1451 {
1452 /* Guile's extended symbol read syntax looks like this:
1453
1454 #{This is all a symbol name}#
1455
1456 So here, CHR is expected to be `{'. */
1457 int saw_brace = 0;
1458 size_t len = 0;
1459 SCM buf = scm_i_make_string (1024, NULL, 0);
1460
1461 buf = scm_i_string_start_writing (buf);
1462
1463 while ((chr = scm_getc_unlocked (port)) != EOF)
1464 {
1465 if (saw_brace)
1466 {
1467 if (chr == '#')
1468 {
1469 break;
1470 }
1471 else
1472 {
1473 saw_brace = 0;
1474 scm_i_string_set_x (buf, len++, '}');
1475 }
1476 }
1477
1478 if (chr == '}')
1479 saw_brace = 1;
1480 else if (chr == '\\')
1481 {
1482 /* It used to be that print.c would print extended-read-syntax
1483 symbols with backslashes before "non-standard" chars, but
1484 this routine wouldn't do anything with those escapes.
1485 Bummer. What we've done is to change print.c to output
1486 R6RS hex escapes for those characters, relying on the fact
1487 that the extended read syntax would never put a `\' before
1488 an `x'. For now, we just ignore other instances of
1489 backslash in the string. */
1490 switch ((chr = scm_getc_unlocked (port)))
1491 {
1492 case EOF:
1493 goto done;
1494 case 'x':
1495 {
1496 scm_t_wchar c;
1497
1498 SCM_READ_HEX_ESCAPE (10, ';');
1499 scm_i_string_set_x (buf, len++, c);
1500 break;
1501
1502 str_eof:
1503 chr = EOF;
1504 goto done;
1505
1506 bad_escaped:
1507 scm_i_string_stop_writing ();
1508 scm_i_input_error ("scm_read_extended_symbol", port,
1509 "illegal character in escape sequence: ~S",
1510 scm_list_1 (SCM_MAKE_CHAR (c)));
1511 break;
1512 }
1513 default:
1514 scm_i_string_set_x (buf, len++, chr);
1515 break;
1516 }
1517 }
1518 else
1519 scm_i_string_set_x (buf, len++, chr);
1520
1521 if (len >= scm_i_string_length (buf) - 2)
1522 {
1523 SCM addy;
1524
1525 scm_i_string_stop_writing ();
1526 addy = scm_i_make_string (1024, NULL, 0);
1527 buf = scm_string_append (scm_list_2 (buf, addy));
1528 len = 0;
1529 buf = scm_i_string_start_writing (buf);
1530 }
1531 }
1532
1533 done:
1534 scm_i_string_stop_writing ();
1535 if (chr == EOF)
1536 scm_i_input_error ("scm_read_extended_symbol", port,
1537 "end of file while reading symbol", SCM_EOL);
1538
1539 return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
1540 }
1541
1542
1543 \f
1544 /* Top-level token readers, i.e., dispatchers. */
1545
1546 static SCM
1547 scm_read_sharp_extension (int chr, SCM port, scm_t_read_opts *opts)
1548 {
1549 SCM proc;
1550
1551 proc = scm_get_hash_procedure (chr);
1552 if (scm_is_true (scm_procedure_p (proc)))
1553 {
1554 long line = SCM_LINUM (port);
1555 int column = SCM_COL (port) - 2;
1556 SCM got;
1557
1558 got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
1559
1560 if (opts->record_positions_p && SCM_NIMP (got)
1561 && !scm_i_has_source_properties (got))
1562 scm_i_set_source_properties_x (got, line, column, SCM_FILENAME (port));
1563
1564 return got;
1565 }
1566
1567 return SCM_UNSPECIFIED;
1568 }
1569
1570 /* The reader for the sharp `#' character. It basically dispatches reads
1571 among the above token readers. */
1572 static SCM
1573 scm_read_sharp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1574 long line, int column)
1575 #define FUNC_NAME "scm_lreadr"
1576 {
1577 SCM result;
1578
1579 chr = scm_getc_unlocked (port);
1580
1581 result = scm_read_sharp_extension (chr, port, opts);
1582 if (!scm_is_eq (result, SCM_UNSPECIFIED))
1583 return result;
1584
1585 switch (chr)
1586 {
1587 case '\\':
1588 return (scm_read_character (chr, port, opts));
1589 case '(':
1590 return (scm_read_vector (chr, port, opts, line, column));
1591 case 's':
1592 case 'u':
1593 case 'f':
1594 case 'c':
1595 /* This one may return either a boolean or an SRFI-4 vector. */
1596 return (scm_read_srfi4_vector (chr, port, opts, line, column));
1597 case 'v':
1598 return (scm_read_bytevector (chr, port, opts, line, column));
1599 case '*':
1600 return (scm_read_guile_bit_vector (chr, port, opts, line, column));
1601 case 't':
1602 case 'T':
1603 case 'F':
1604 return (scm_read_boolean (chr, port));
1605 case ':':
1606 return (scm_read_keyword (chr, port, opts));
1607 case '0': case '1': case '2': case '3': case '4':
1608 case '5': case '6': case '7': case '8': case '9':
1609 case '@':
1610 return (scm_read_array (chr, port, opts, line, column));
1611
1612 case 'i':
1613 case 'e':
1614 case 'b':
1615 case 'B':
1616 case 'o':
1617 case 'O':
1618 case 'd':
1619 case 'D':
1620 case 'x':
1621 case 'X':
1622 case 'I':
1623 case 'E':
1624 return (scm_read_number_and_radix (chr, port, opts));
1625 case '{':
1626 return (scm_read_extended_symbol (chr, port));
1627 case '!':
1628 return (scm_read_shebang (chr, port, opts));
1629 case ';':
1630 return (scm_read_commented_expression (chr, port, opts));
1631 case '`':
1632 case '\'':
1633 case ',':
1634 return (scm_read_syntax (chr, port, opts));
1635 case 'n':
1636 return (scm_read_nil (chr, port, opts));
1637 default:
1638 result = scm_read_sharp_extension (chr, port, opts);
1639 if (scm_is_eq (result, SCM_UNSPECIFIED))
1640 {
1641 /* To remain compatible with 1.8 and earlier, the following
1642 characters have lower precedence than `read-hash-extend'
1643 characters. */
1644 switch (chr)
1645 {
1646 case '|':
1647 return scm_read_r6rs_block_comment (chr, port);
1648 default:
1649 scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
1650 scm_list_1 (SCM_MAKE_CHAR (chr)));
1651 }
1652 }
1653 else
1654 return result;
1655 }
1656
1657 return SCM_UNSPECIFIED;
1658 }
1659 #undef FUNC_NAME
1660
1661 static SCM
1662 read_inner_expression (SCM port, scm_t_read_opts *opts)
1663 #define FUNC_NAME "read_inner_expression"
1664 {
1665 while (1)
1666 {
1667 scm_t_wchar chr;
1668
1669 chr = scm_getc_unlocked (port);
1670
1671 switch (chr)
1672 {
1673 case SCM_WHITE_SPACES:
1674 case SCM_LINE_INCREMENTORS:
1675 break;
1676 case ';':
1677 (void) scm_read_semicolon_comment (chr, port);
1678 break;
1679 case '{':
1680 if (opts->curly_infix_p)
1681 {
1682 if (opts->neoteric_p)
1683 return scm_read_sexp (chr, port, opts);
1684 else
1685 {
1686 SCM expr;
1687
1688 /* Enable neoteric expressions within curly braces */
1689 opts->neoteric_p = 1;
1690 expr = scm_read_sexp (chr, port, opts);
1691 opts->neoteric_p = 0;
1692 return expr;
1693 }
1694 }
1695 else
1696 return scm_read_mixed_case_symbol (chr, port, opts);
1697 case '[':
1698 if (opts->square_brackets_p)
1699 return scm_read_sexp (chr, port, opts);
1700 else if (opts->curly_infix_p)
1701 {
1702 /* The syntax of neoteric expressions requires that '[' be
1703 a delimiter when curly-infix is enabled, so it cannot
1704 be part of an unescaped symbol. We might as well do
1705 something useful with it, so we adopt Kawa's convention:
1706 [...] => ($bracket-list$ ...) */
1707 long line = SCM_LINUM (port);
1708 int column = SCM_COL (port) - 1;
1709 return maybe_annotate_source
1710 (scm_cons (sym_bracket_list, scm_read_sexp (chr, port, opts)),
1711 port, opts, line, column);
1712 }
1713 else
1714 return scm_read_mixed_case_symbol (chr, port, opts);
1715 case '(':
1716 return (scm_read_sexp (chr, port, opts));
1717 case '"':
1718 return (scm_read_string (chr, port, opts));
1719 case '\'':
1720 case '`':
1721 case ',':
1722 return (scm_read_quote (chr, port, opts));
1723 case '#':
1724 {
1725 long line = SCM_LINUM (port);
1726 int column = SCM_COL (port) - 1;
1727 SCM result = scm_read_sharp (chr, port, opts, line, column);
1728 if (scm_is_eq (result, SCM_UNSPECIFIED))
1729 /* We read a comment or some such. */
1730 break;
1731 else
1732 return result;
1733 }
1734 case ')':
1735 scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
1736 break;
1737 case '}':
1738 if (opts->curly_infix_p)
1739 scm_i_input_error (FUNC_NAME, port, "unexpected \"}\"", SCM_EOL);
1740 else
1741 return scm_read_mixed_case_symbol (chr, port, opts);
1742 case ']':
1743 if (opts->square_brackets_p)
1744 scm_i_input_error (FUNC_NAME, port, "unexpected \"]\"", SCM_EOL);
1745 /* otherwise fall through */
1746 case EOF:
1747 return SCM_EOF_VAL;
1748 case ':':
1749 if (opts->keyword_style == KEYWORD_STYLE_PREFIX)
1750 return scm_symbol_to_keyword (scm_read_expression (port, opts));
1751 /* Fall through. */
1752
1753 default:
1754 {
1755 if (((chr >= '0') && (chr <= '9'))
1756 || (strchr ("+-.", chr)))
1757 return (scm_read_number (chr, port, opts));
1758 else
1759 return (scm_read_mixed_case_symbol (chr, port, opts));
1760 }
1761 }
1762 }
1763 }
1764 #undef FUNC_NAME
1765
1766 static SCM
1767 scm_read_expression (SCM port, scm_t_read_opts *opts)
1768 #define FUNC_NAME "scm_read_expression"
1769 {
1770 if (!opts->neoteric_p)
1771 return read_inner_expression (port, opts);
1772 else
1773 {
1774 long line = 0;
1775 int column = 0;
1776 SCM expr;
1777
1778 if (opts->record_positions_p)
1779 {
1780 /* We need to get the position of the first non-whitespace
1781 character in order to correctly annotate neoteric
1782 expressions. For example, for the expression 'f(x)', the
1783 first call to 'read_inner_expression' reads the 'f' (which
1784 cannot be annotated), and then we later read the '(x)' and
1785 use it to construct the new list (f x). */
1786 int c = flush_ws (port, opts, (char *) NULL);
1787 if (c == EOF)
1788 return SCM_EOF_VAL;
1789 scm_ungetc_unlocked (c, port);
1790 line = SCM_LINUM (port);
1791 column = SCM_COL (port);
1792 }
1793
1794 expr = read_inner_expression (port, opts);
1795
1796 /* 'expr' is the first component of the neoteric expression. Now
1797 we loop, and as long as the next character is '(', '[', or '{',
1798 (without any intervening whitespace), we use it to construct a
1799 new expression. For example, f{n - 1}(x) => ((f (- n 1)) x). */
1800 for (;;)
1801 {
1802 int chr = scm_getc_unlocked (port);
1803
1804 if (chr == '(')
1805 /* e(...) => (e ...) */
1806 expr = scm_cons (expr, scm_read_sexp (chr, port, opts));
1807 else if (chr == '[')
1808 /* e[...] => ($bracket-apply$ e ...) */
1809 expr = scm_cons (sym_bracket_apply,
1810 scm_cons (expr,
1811 scm_read_sexp (chr, port, opts)));
1812 else if (chr == '{')
1813 {
1814 SCM arg = scm_read_sexp (chr, port, opts);
1815
1816 if (scm_is_null (arg))
1817 expr = scm_list_1 (expr); /* e{} => (e) */
1818 else
1819 expr = scm_list_2 (expr, arg); /* e{...} => (e {...}) */
1820 }
1821 else
1822 {
1823 if (chr != EOF)
1824 scm_ungetc_unlocked (chr, port);
1825 break;
1826 }
1827 maybe_annotate_source (expr, port, opts, line, column);
1828 }
1829 return expr;
1830 }
1831 }
1832 #undef FUNC_NAME
1833
1834 \f
1835 /* Actual reader. */
1836
1837 static void init_read_options (SCM port, scm_t_read_opts *opts);
1838
1839 SCM_DEFINE (scm_read, "read", 0, 1, 0,
1840 (SCM port),
1841 "Read an s-expression from the input port @var{port}, or from\n"
1842 "the current input port if @var{port} is not specified.\n"
1843 "Any whitespace before the next token is discarded.")
1844 #define FUNC_NAME s_scm_read
1845 {
1846 scm_t_read_opts opts;
1847 int c;
1848
1849 if (SCM_UNBNDP (port))
1850 port = scm_current_input_port ();
1851 SCM_VALIDATE_OPINPORT (1, port);
1852
1853 init_read_options (port, &opts);
1854
1855 c = flush_ws (port, &opts, (char *) NULL);
1856 if (EOF == c)
1857 return SCM_EOF_VAL;
1858 scm_ungetc_unlocked (c, port);
1859
1860 return (scm_read_expression (port, &opts));
1861 }
1862 #undef FUNC_NAME
1863
1864
1865 \f
1866
1867 /* Manipulate the read-hash-procedures alist. This could be written in
1868 Scheme, but maybe it will also be used by C code during initialisation. */
1869 SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
1870 (SCM chr, SCM proc),
1871 "Install the procedure @var{proc} for reading expressions\n"
1872 "starting with the character sequence @code{#} and @var{chr}.\n"
1873 "@var{proc} will be called with two arguments: the character\n"
1874 "@var{chr} and the port to read further data from. The object\n"
1875 "returned will be the return value of @code{read}. \n"
1876 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1877 )
1878 #define FUNC_NAME s_scm_read_hash_extend
1879 {
1880 SCM this;
1881 SCM prev;
1882
1883 SCM_VALIDATE_CHAR (1, chr);
1884 SCM_ASSERT (scm_is_false (proc)
1885 || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
1886 proc, SCM_ARG2, FUNC_NAME);
1887
1888 /* Check if chr is already in the alist. */
1889 this = scm_i_read_hash_procedures_ref ();
1890 prev = SCM_BOOL_F;
1891 while (1)
1892 {
1893 if (scm_is_null (this))
1894 {
1895 /* not found, so add it to the beginning. */
1896 if (scm_is_true (proc))
1897 {
1898 SCM new = scm_cons (scm_cons (chr, proc),
1899 scm_i_read_hash_procedures_ref ());
1900 scm_i_read_hash_procedures_set_x (new);
1901 }
1902 break;
1903 }
1904 if (scm_is_eq (chr, SCM_CAAR (this)))
1905 {
1906 /* already in the alist. */
1907 if (scm_is_false (proc))
1908 {
1909 /* remove it. */
1910 if (scm_is_false (prev))
1911 {
1912 SCM rest = SCM_CDR (scm_i_read_hash_procedures_ref ());
1913 scm_i_read_hash_procedures_set_x (rest);
1914 }
1915 else
1916 scm_set_cdr_x (prev, SCM_CDR (this));
1917 }
1918 else
1919 {
1920 /* replace it. */
1921 scm_set_cdr_x (SCM_CAR (this), proc);
1922 }
1923 break;
1924 }
1925 prev = this;
1926 this = SCM_CDR (this);
1927 }
1928
1929 return SCM_UNSPECIFIED;
1930 }
1931 #undef FUNC_NAME
1932
1933 /* Recover the read-hash procedure corresponding to char c. */
1934 static SCM
1935 scm_get_hash_procedure (int c)
1936 {
1937 SCM rest = scm_i_read_hash_procedures_ref ();
1938
1939 while (1)
1940 {
1941 if (scm_is_null (rest))
1942 return SCM_BOOL_F;
1943
1944 if (SCM_CHAR (SCM_CAAR (rest)) == c)
1945 return SCM_CDAR (rest);
1946
1947 rest = SCM_CDR (rest);
1948 }
1949 }
1950
1951 #define SCM_ENCODING_SEARCH_SIZE (500)
1952
1953 static int
1954 is_encoding_char (char c)
1955 {
1956 if (c >= 'a' && c <= 'z') return 1;
1957 if (c >= 'A' && c <= 'Z') return 1;
1958 if (c >= '0' && c <= '9') return 1;
1959 return strchr ("_-.:/,+=()", c) != NULL;
1960 }
1961
1962 /* Search the first few hundred characters of a file for an Emacs-like coding
1963 declaration. Returns either NULL or a string whose storage has been
1964 allocated with `scm_gc_malloc ()'. */
1965 char *
1966 scm_i_scan_for_encoding (SCM port)
1967 {
1968 scm_t_port *pt;
1969 char header[SCM_ENCODING_SEARCH_SIZE+1];
1970 size_t bytes_read, encoding_length, i;
1971 char *encoding = NULL;
1972 int utf8_bom = 0;
1973 char *pos, *encoding_start;
1974 int in_comment;
1975
1976 pt = SCM_PTAB_ENTRY (port);
1977
1978 if (pt->rw_active == SCM_PORT_WRITE)
1979 scm_flush_unlocked (port);
1980
1981 if (pt->rw_random)
1982 pt->rw_active = SCM_PORT_READ;
1983
1984 if (pt->read_pos == pt->read_end)
1985 {
1986 /* We can use the read buffer, and thus avoid a seek. */
1987 if (scm_fill_input_unlocked (port) == EOF)
1988 return NULL;
1989
1990 bytes_read = pt->read_end - pt->read_pos;
1991 if (bytes_read > SCM_ENCODING_SEARCH_SIZE)
1992 bytes_read = SCM_ENCODING_SEARCH_SIZE;
1993
1994 if (bytes_read <= 1)
1995 /* An unbuffered port -- don't scan. */
1996 return NULL;
1997
1998 memcpy (header, pt->read_pos, bytes_read);
1999 header[bytes_read] = '\0';
2000 }
2001 else
2002 {
2003 /* Try to read some bytes and then seek back. Not all ports
2004 support seeking back; and indeed some file ports (like
2005 /dev/urandom) will succeed on an lseek (fd, 0, SEEK_CUR)---the
2006 check performed by SCM_FPORT_FDES---but fail to seek
2007 backwards. Hence this block comes second. We prefer to use
2008 the read buffer in-place. */
2009 if (SCM_FPORTP (port) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port)))
2010 return NULL;
2011
2012 bytes_read = scm_c_read_unlocked (port, header, SCM_ENCODING_SEARCH_SIZE);
2013 header[bytes_read] = '\0';
2014 scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
2015 }
2016
2017 if (bytes_read > 3
2018 && header[0] == '\xef' && header[1] == '\xbb' && header[2] == '\xbf')
2019 utf8_bom = 1;
2020
2021 /* search past "coding[:=]" */
2022 pos = header;
2023 while (1)
2024 {
2025 if ((pos = strstr(pos, "coding")) == NULL)
2026 return NULL;
2027
2028 pos += strlen("coding");
2029 if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
2030 (*pos == ':' || *pos == '='))
2031 {
2032 pos ++;
2033 break;
2034 }
2035 }
2036
2037 /* skip spaces */
2038 while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
2039 (*pos == ' ' || *pos == '\t'))
2040 pos ++;
2041
2042 /* grab the next token */
2043 encoding_start = pos;
2044 i = 0;
2045 while (encoding_start + i - header <= SCM_ENCODING_SEARCH_SIZE
2046 && encoding_start + i - header < bytes_read
2047 && is_encoding_char (encoding_start[i]))
2048 i++;
2049
2050 encoding_length = i;
2051 if (encoding_length == 0)
2052 return NULL;
2053
2054 encoding = scm_gc_strndup (encoding_start, encoding_length, "encoding");
2055
2056 /* push backwards to make sure we were in a comment */
2057 in_comment = 0;
2058 pos = encoding_start;
2059 while (pos >= header)
2060 {
2061 if (*pos == ';')
2062 {
2063 in_comment = 1;
2064 break;
2065 }
2066 else if (*pos == '\n' || pos == header)
2067 {
2068 /* This wasn't in a semicolon comment. Check for a
2069 hash-bang comment. */
2070 char *beg = strstr (header, "#!");
2071 char *end = strstr (header, "!#");
2072 if (beg < encoding_start && encoding_start + encoding_length <= end)
2073 in_comment = 1;
2074 break;
2075 }
2076 else
2077 {
2078 pos --;
2079 continue;
2080 }
2081 }
2082 if (!in_comment)
2083 /* This wasn't in a comment */
2084 return NULL;
2085
2086 if (utf8_bom && strcasecmp (encoding, "UTF-8"))
2087 scm_misc_error (NULL,
2088 "the port input declares the encoding ~s but is encoded as UTF-8",
2089 scm_list_1 (scm_from_locale_string (encoding)));
2090
2091 return encoding;
2092 }
2093
2094 SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
2095 (SCM port),
2096 "Scans the port for an Emacs-like character coding declaration\n"
2097 "near the top of the contents of a port with random-accessible contents.\n"
2098 "The coding declaration is of the form\n"
2099 "@code{coding: XXXXX} and must appear in a scheme comment.\n"
2100 "\n"
2101 "Returns a string containing the character encoding of the file\n"
2102 "if a declaration was found, or @code{#f} otherwise.\n")
2103 #define FUNC_NAME s_scm_file_encoding
2104 {
2105 char *enc;
2106 SCM s_enc;
2107
2108 SCM_VALIDATE_OPINPORT (SCM_ARG1, port);
2109
2110 enc = scm_i_scan_for_encoding (port);
2111 if (enc == NULL)
2112 return SCM_BOOL_F;
2113 else
2114 {
2115 s_enc = scm_from_locale_string (enc);
2116 return s_enc;
2117 }
2118
2119 return SCM_BOOL_F;
2120 }
2121 #undef FUNC_NAME
2122
2123 \f
2124 /* Per-port read options.
2125
2126 We store per-port read options in the 'port-read-options' key of the
2127 port's alist. The value stored in the alist is a single integer that
2128 contains a two-bit field for each read option.
2129
2130 If a bit field contains READ_OPTION_INHERIT (3), that indicates that
2131 the applicable value should be inherited from the corresponding
2132 global read option. Otherwise, the bit field contains the value of
2133 the read option. For boolean read options that have been set
2134 per-port, the possible values are 0 or 1. If the 'keyword_style'
2135 read option has been set per-port, its possible values are those in
2136 'enum t_keyword_style'. */
2137
2138 /* Key to read options in per-port alists. */
2139 SCM_SYMBOL (sym_port_read_options, "port-read-options");
2140
2141 /* Offsets of bit fields for each per-port override */
2142 #define READ_OPTION_COPY_SOURCE_P 0
2143 #define READ_OPTION_RECORD_POSITIONS_P 2
2144 #define READ_OPTION_CASE_INSENSITIVE_P 4
2145 #define READ_OPTION_KEYWORD_STYLE 6
2146 #define READ_OPTION_R6RS_ESCAPES_P 8
2147 #define READ_OPTION_SQUARE_BRACKETS_P 10
2148 #define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12
2149 #define READ_OPTION_CURLY_INFIX_P 14
2150
2151 /* The total width in bits of the per-port overrides */
2152 #define READ_OPTIONS_NUM_BITS 16
2153
2154 #define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1)
2155 #define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL
2156
2157 #define READ_OPTION_MASK 3
2158 #define READ_OPTION_INHERIT 3
2159
2160 static void
2161 set_port_read_option (SCM port, int option, int new_value)
2162 {
2163 SCM scm_read_options;
2164 unsigned int read_options;
2165
2166 new_value &= READ_OPTION_MASK;
2167 scm_read_options = scm_assq_ref (SCM_PTAB_ENTRY(port)->alist,
2168 sym_port_read_options);
2169 if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
2170 read_options = scm_to_uint (scm_read_options);
2171 else
2172 read_options = READ_OPTIONS_INHERIT_ALL;
2173 read_options &= ~(READ_OPTION_MASK << option);
2174 read_options |= new_value << option;
2175 scm_read_options = scm_from_uint (read_options);
2176 SCM_PTAB_ENTRY(port)->alist = scm_assq_set_x (SCM_PTAB_ENTRY(port)->alist,
2177 sym_port_read_options,
2178 scm_read_options);
2179 }
2180
2181 /* Set OPTS and PORT's case-insensitivity according to VALUE. */
2182 static void
2183 set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts, int value)
2184 {
2185 value = !!value;
2186 opts->case_insensitive_p = value;
2187 set_port_read_option (port, READ_OPTION_CASE_INSENSITIVE_P, value);
2188 }
2189
2190 /* Set OPTS and PORT's square_brackets_p option according to VALUE. */
2191 static void
2192 set_port_square_brackets_p (SCM port, scm_t_read_opts *opts, int value)
2193 {
2194 value = !!value;
2195 opts->square_brackets_p = value;
2196 set_port_read_option (port, READ_OPTION_SQUARE_BRACKETS_P, value);
2197 }
2198
2199 /* Set OPTS and PORT's curly_infix_p option according to VALUE. */
2200 static void
2201 set_port_curly_infix_p (SCM port, scm_t_read_opts *opts, int value)
2202 {
2203 value = !!value;
2204 opts->curly_infix_p = value;
2205 set_port_read_option (port, READ_OPTION_CURLY_INFIX_P, value);
2206 }
2207
2208 /* Initialize OPTS based on PORT's read options and the global read
2209 options. */
2210 static void
2211 init_read_options (SCM port, scm_t_read_opts *opts)
2212 {
2213 SCM val, scm_read_options;
2214 unsigned int read_options, x;
2215
2216 scm_read_options = scm_assq_ref (SCM_PTAB_ENTRY(port)->alist,
2217 sym_port_read_options);
2218
2219 if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
2220 read_options = scm_to_uint (scm_read_options);
2221 else
2222 read_options = READ_OPTIONS_INHERIT_ALL;
2223
2224 x = READ_OPTION_MASK & (read_options >> READ_OPTION_KEYWORD_STYLE);
2225 if (x == READ_OPTION_INHERIT)
2226 {
2227 val = SCM_PACK (SCM_KEYWORD_STYLE);
2228 if (scm_is_eq (val, scm_keyword_prefix))
2229 x = KEYWORD_STYLE_PREFIX;
2230 else if (scm_is_eq (val, scm_keyword_postfix))
2231 x = KEYWORD_STYLE_POSTFIX;
2232 else
2233 x = KEYWORD_STYLE_HASH_PREFIX;
2234 }
2235 opts->keyword_style = x;
2236
2237 #define RESOLVE_BOOLEAN_OPTION(NAME, name) \
2238 do \
2239 { \
2240 x = READ_OPTION_MASK & (read_options >> READ_OPTION_ ## NAME); \
2241 if (x == READ_OPTION_INHERIT) \
2242 x = !!SCM_ ## NAME; \
2243 opts->name = x; \
2244 } \
2245 while (0)
2246
2247 RESOLVE_BOOLEAN_OPTION (COPY_SOURCE_P, copy_source_p);
2248 RESOLVE_BOOLEAN_OPTION (RECORD_POSITIONS_P, record_positions_p);
2249 RESOLVE_BOOLEAN_OPTION (CASE_INSENSITIVE_P, case_insensitive_p);
2250 RESOLVE_BOOLEAN_OPTION (R6RS_ESCAPES_P, r6rs_escapes_p);
2251 RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P, square_brackets_p);
2252 RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p);
2253 RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p);
2254
2255 #undef RESOLVE_BOOLEAN_OPTION
2256
2257 opts->neoteric_p = 0;
2258 }
2259
2260 void
2261 scm_init_read ()
2262 {
2263 SCM read_hash_procs;
2264
2265 read_hash_procs = scm_make_fluid_with_default (SCM_EOL);
2266
2267 scm_i_read_hash_procedures =
2268 SCM_VARIABLE_LOC (scm_c_define ("%read-hash-procedures", read_hash_procs));
2269
2270 scm_init_opts (scm_read_options, scm_read_opts);
2271 #include "libguile/read.x"
2272 }
2273
2274 /*
2275 Local Variables:
2276 c-file-style: "gnu"
2277 End:
2278 */