Merge remote-tracking branch 'origin/stable-2.0'
[bpt/guile.git] / libguile / read.c
1 /* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
2 * 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20
21 \f
22
23 #ifdef HAVE_CONFIG_H
24 # include <config.h>
25 #endif
26
27 #include <stdio.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <unicase.h>
31 #include <unictype.h>
32 #include <c-strcase.h>
33
34 #include "libguile/_scm.h"
35 #include "libguile/bytevectors.h"
36 #include "libguile/chars.h"
37 #include "libguile/eval.h"
38 #include "libguile/arrays.h"
39 #include "libguile/bitvectors.h"
40 #include "libguile/keywords.h"
41 #include "libguile/alist.h"
42 #include "libguile/srcprop.h"
43 #include "libguile/hashtab.h"
44 #include "libguile/hash.h"
45 #include "libguile/ports.h"
46 #include "libguile/ports-internal.h"
47 #include "libguile/fports.h"
48 #include "libguile/root.h"
49 #include "libguile/strings.h"
50 #include "libguile/strports.h"
51 #include "libguile/vectors.h"
52 #include "libguile/validate.h"
53 #include "libguile/srfi-4.h"
54 #include "libguile/srfi-13.h"
55
56 #include "libguile/read.h"
57 #include "libguile/private-options.h"
58
59
60 \f
61
62 SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
63 SCM_SYMBOL (scm_keyword_prefix, "prefix");
64 SCM_SYMBOL (scm_keyword_postfix, "postfix");
65 SCM_SYMBOL (sym_nil, "nil");
66
67 /* SRFI-105 curly infix expression support */
68 SCM_SYMBOL (sym_nfx, "$nfx$");
69 SCM_SYMBOL (sym_bracket_list, "$bracket-list$");
70 SCM_SYMBOL (sym_bracket_apply, "$bracket-apply$");
71
72 scm_t_option scm_read_opts[] =
73 {
74 { SCM_OPTION_BOOLEAN, "copy", 0,
75 "Copy source code expressions." },
76 { SCM_OPTION_BOOLEAN, "positions", 1,
77 "Record positions of source code expressions." },
78 { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
79 "Convert symbols to lower case."},
80 { SCM_OPTION_SCM, "keywords", (scm_t_bits) SCM_BOOL_F_BITS,
81 "Style of keyword recognition: #f, 'prefix or 'postfix."},
82 { SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
83 "Use R6RS variable-length character and string hex escapes."},
84 { SCM_OPTION_BOOLEAN, "square-brackets", 1,
85 "Treat `[' and `]' as parentheses, for R6RS compatibility."},
86 { SCM_OPTION_BOOLEAN, "hungry-eol-escapes", 0,
87 "In strings, consume leading whitespace after an escaped end-of-line."},
88 { SCM_OPTION_BOOLEAN, "curly-infix", 0,
89 "Support SRFI-105 curly infix expressions."},
90 { 0, },
91 };
92
93 /* Internal read options structure. This is initialized by 'scm_read'
94 from the global and per-port read options, and a pointer is passed
95 down to all helper functions. */
96
97 enum t_keyword_style
98 {
99 KEYWORD_STYLE_HASH_PREFIX,
100 KEYWORD_STYLE_PREFIX,
101 KEYWORD_STYLE_POSTFIX
102 };
103
104 struct t_read_opts
105 {
106 enum t_keyword_style keyword_style;
107 unsigned int copy_source_p : 1;
108 unsigned int record_positions_p : 1;
109 unsigned int case_insensitive_p : 1;
110 unsigned int r6rs_escapes_p : 1;
111 unsigned int square_brackets_p : 1;
112 unsigned int hungry_eol_escapes_p : 1;
113 unsigned int curly_infix_p : 1;
114 unsigned int neoteric_p : 1;
115 };
116
117 typedef struct t_read_opts scm_t_read_opts;
118
119
120 /*
121 Give meaningful error messages for errors
122
123 We use the format
124
125 FILE:LINE:COL: MESSAGE
126 This happened in ....
127
128 This is not standard GNU format, but the test-suite likes the real
129 message to be in front.
130
131 */
132
133
134 void
135 scm_i_input_error (char const *function,
136 SCM port, const char *message, SCM arg)
137 {
138 SCM fn = (scm_is_string (SCM_FILENAME(port))
139 ? SCM_FILENAME(port)
140 : scm_from_locale_string ("#<unknown port>"));
141
142 SCM string_port = scm_open_output_string ();
143 SCM string = SCM_EOL;
144 scm_simple_format (string_port,
145 scm_from_locale_string ("~A:~S:~S: ~A"),
146 scm_list_4 (fn,
147 scm_from_long (SCM_LINUM (port) + 1),
148 scm_from_int (SCM_COL (port) + 1),
149 scm_from_locale_string (message)));
150
151 string = scm_get_output_string (string_port);
152 scm_close_output_port (string_port);
153 scm_error_scm (scm_from_latin1_symbol ("read-error"),
154 function? scm_from_locale_string (function) : SCM_BOOL_F,
155 string,
156 arg,
157 SCM_BOOL_F);
158 }
159
160
161 SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
162 (SCM setting),
163 "Option interface for the read options. Instead of using\n"
164 "this procedure directly, use the procedures @code{read-enable},\n"
165 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
166 #define FUNC_NAME s_scm_read_options
167 {
168 SCM ans = scm_options (setting,
169 scm_read_opts,
170 FUNC_NAME);
171 if (SCM_COPY_SOURCE_P)
172 SCM_RECORD_POSITIONS_P = 1;
173 return ans;
174 }
175 #undef FUNC_NAME
176
177 /* A fluid referring to an association list mapping extra hash
178 characters to procedures. */
179 static SCM *scm_i_read_hash_procedures;
180
181 static SCM
182 scm_i_read_hash_procedures_ref (void)
183 {
184 return scm_fluid_ref (*scm_i_read_hash_procedures);
185 }
186
187 static void
188 scm_i_read_hash_procedures_set_x (SCM value)
189 {
190 scm_fluid_set_x (*scm_i_read_hash_procedures, value);
191 }
192
193 \f
194 /* Token readers. */
195
196
197 /* Size of the C buffer used to read symbols and numbers. */
198 #define READER_BUFFER_SIZE 128
199
200 /* Number of 32-bit codepoints in the buffer used to read strings. */
201 #define READER_STRING_BUFFER_SIZE 128
202
203 /* The maximum size of Scheme character names. */
204 #define READER_CHAR_NAME_MAX_SIZE 50
205
206 /* The maximum size of reader directive names. */
207 #define READER_DIRECTIVE_NAME_MAX_SIZE 50
208
209
210 /* `isblank' is only in C99. */
211 #define CHAR_IS_BLANK_(_chr) \
212 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
213 || ((_chr) == '\f') || ((_chr) == '\r'))
214
215 #ifdef MSDOS
216 # define CHAR_IS_BLANK(_chr) \
217 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
218 #else
219 # define CHAR_IS_BLANK CHAR_IS_BLANK_
220 #endif
221
222
223 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
224 structure''). */
225 #define CHAR_IS_R5RS_DELIMITER(c) \
226 (CHAR_IS_BLANK (c) \
227 || (c) == ')' || (c) == '(' || (c) == ';' || (c) == '"')
228
229 #define CHAR_IS_DELIMITER(c) \
230 (CHAR_IS_R5RS_DELIMITER (c) \
231 || (((c) == ']' || (c) == '[') && (opts->square_brackets_p \
232 || opts->curly_infix_p)) \
233 || (((c) == '}' || (c) == '{') && opts->curly_infix_p))
234
235 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
236 Structure''. */
237 #define CHAR_IS_EXPONENT_MARKER(_chr) \
238 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
239 || ((_chr) == 'd') || ((_chr) == 'l'))
240
241 /* Read an SCSH block comment. */
242 static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
243 static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM);
244 static SCM scm_read_commented_expression (scm_t_wchar, SCM, scm_t_read_opts *);
245 static SCM scm_read_shebang (scm_t_wchar, SCM, scm_t_read_opts *);
246 static SCM scm_get_hash_procedure (int);
247
248 /* Read from PORT until a delimiter (e.g., a whitespace) is read. Put the
249 result in the pre-allocated buffer BUF. Return zero if the whole token has
250 fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
251 bytes actually read. */
252 static int
253 read_token (SCM port, scm_t_read_opts *opts,
254 char *buf, size_t buf_size, size_t *read)
255 {
256 *read = 0;
257
258 while (*read < buf_size)
259 {
260 int chr;
261
262 chr = scm_get_byte_or_eof_unlocked (port);
263
264 if (chr == EOF)
265 return 0;
266 else if (CHAR_IS_DELIMITER (chr))
267 {
268 scm_unget_byte_unlocked (chr, port);
269 return 0;
270 }
271 else
272 {
273 *buf = (char) chr;
274 buf++, (*read)++;
275 }
276 }
277
278 return 1;
279 }
280
281 /* Like `read_token', but return either BUFFER, or a GC-allocated buffer
282 if the token doesn't fit in BUFFER_SIZE bytes. */
283 static char *
284 read_complete_token (SCM port, scm_t_read_opts *opts,
285 char *buffer, size_t buffer_size, size_t *read)
286 {
287 int overflow = 0;
288 size_t bytes_read, overflow_size = 0;
289 char *overflow_buffer = NULL;
290
291 do
292 {
293 overflow = read_token (port, opts, buffer, buffer_size, &bytes_read);
294 if (bytes_read == 0)
295 break;
296 if (overflow || overflow_size != 0)
297 {
298 if (overflow_size == 0)
299 {
300 overflow_buffer = scm_gc_malloc_pointerless (bytes_read, "read");
301 memcpy (overflow_buffer, buffer, bytes_read);
302 overflow_size = bytes_read;
303 }
304 else
305 {
306 char *new_buf =
307 scm_gc_malloc_pointerless (overflow_size + bytes_read, "read");
308
309 memcpy (new_buf, overflow_buffer, overflow_size);
310 memcpy (new_buf + overflow_size, buffer, bytes_read);
311
312 overflow_buffer = new_buf;
313 overflow_size += bytes_read;
314 }
315 }
316 }
317 while (overflow);
318
319 if (overflow_size)
320 *read = overflow_size;
321 else
322 *read = bytes_read;
323
324 return (overflow_size > 0 ? overflow_buffer : buffer);
325 }
326
327 /* Skip whitespace from PORT and return the first non-whitespace character
328 read. Raise an error on end-of-file. */
329 static int
330 flush_ws (SCM port, scm_t_read_opts *opts, const char *eoferr)
331 {
332 scm_t_wchar c;
333 while (1)
334 switch (c = scm_getc_unlocked (port))
335 {
336 case EOF:
337 goteof:
338 if (eoferr)
339 {
340 scm_i_input_error (eoferr,
341 port,
342 "end of file",
343 SCM_EOL);
344 }
345 return c;
346
347 case ';':
348 lp:
349 switch (c = scm_getc_unlocked (port))
350 {
351 case EOF:
352 goto goteof;
353 default:
354 goto lp;
355 case SCM_LINE_INCREMENTORS:
356 break;
357 }
358 break;
359
360 case '#':
361 switch (c = scm_getc_unlocked (port))
362 {
363 case EOF:
364 eoferr = "read_sharp";
365 goto goteof;
366 case '!':
367 scm_read_shebang (c, port, opts);
368 break;
369 case ';':
370 scm_read_commented_expression (c, port, opts);
371 break;
372 case '|':
373 if (scm_is_false (scm_get_hash_procedure (c)))
374 {
375 scm_read_r6rs_block_comment (c, port);
376 break;
377 }
378 /* fall through */
379 default:
380 scm_ungetc_unlocked (c, port);
381 return '#';
382 }
383 break;
384
385 case SCM_LINE_INCREMENTORS:
386 case SCM_SINGLE_SPACES:
387 case '\t':
388 break;
389
390 default:
391 return c;
392 }
393
394 return 0;
395 }
396
397
398 \f
399 /* Token readers. */
400
401 static SCM scm_read_expression (SCM port, scm_t_read_opts *opts);
402 static SCM scm_read_sharp (int chr, SCM port, scm_t_read_opts *opts,
403 long line, int column);
404
405
406 static SCM
407 maybe_annotate_source (SCM x, SCM port, scm_t_read_opts *opts,
408 long line, int column)
409 {
410 if (opts->record_positions_p)
411 scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port));
412 return x;
413 }
414
415 static SCM
416 scm_read_sexp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
417 #define FUNC_NAME "scm_i_lreadparen"
418 {
419 int c;
420 SCM tmp, tl, ans = SCM_EOL;
421 const int curly_list_p = (chr == '{') && opts->curly_infix_p;
422 const int terminating_char = ((chr == '{') ? '}'
423 : ((chr == '[') ? ']'
424 : ')'));
425
426 /* Need to capture line and column numbers here. */
427 long line = SCM_LINUM (port);
428 int column = SCM_COL (port) - 1;
429
430 c = flush_ws (port, opts, FUNC_NAME);
431 if (terminating_char == c)
432 return SCM_EOL;
433
434 scm_ungetc_unlocked (c, port);
435 tmp = scm_read_expression (port, opts);
436
437 /* Note that it is possible for scm_read_expression to return
438 scm_sym_dot, but not as part of a dotted pair: as in #{.}#. So
439 check that it's a real dot by checking `c'. */
440 if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
441 {
442 ans = scm_read_expression (port, opts);
443 if (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
444 scm_i_input_error (FUNC_NAME, port, "missing close paren",
445 SCM_EOL);
446 return ans;
447 }
448
449 /* Build the head of the list structure. */
450 ans = tl = scm_cons (tmp, SCM_EOL);
451
452 while (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
453 {
454 SCM new_tail;
455
456 if (c == ')' || (c == ']' && opts->square_brackets_p)
457 || ((c == '}' || c == ']') && opts->curly_infix_p))
458 scm_i_input_error (FUNC_NAME, port,
459 "in pair: mismatched close paren: ~A",
460 scm_list_1 (SCM_MAKE_CHAR (c)));
461
462 scm_ungetc_unlocked (c, port);
463 tmp = scm_read_expression (port, opts);
464
465 /* See above note about scm_sym_dot. */
466 if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
467 {
468 SCM_SETCDR (tl, scm_read_expression (port, opts));
469
470 c = flush_ws (port, opts, FUNC_NAME);
471 if (terminating_char != c)
472 scm_i_input_error (FUNC_NAME, port,
473 "in pair: missing close paren", SCM_EOL);
474 break;
475 }
476
477 new_tail = scm_cons (tmp, SCM_EOL);
478 SCM_SETCDR (tl, new_tail);
479 tl = new_tail;
480 }
481
482 if (curly_list_p)
483 {
484 /* In addition to finding the length, 'scm_ilength' checks for
485 improper or circular lists, in which case it returns -1. */
486 int len = scm_ilength (ans);
487
488 /* The (len == 0) case is handled above */
489 if (len == 1)
490 /* Return directly to avoid re-annotating the element's source
491 location with the position of the outer brace. Also, it
492 might not be possible to annotate the element. */
493 return scm_car (ans); /* {e} => e */
494 else if (len == 2)
495 ; /* Leave the list unchanged: {e1 e2} => (e1 e2) */
496 else if (len >= 3 && (len & 1))
497 {
498 /* It's a proper list whose length is odd and at least 3. If
499 the elements at odd indices (the infix operator positions)
500 are all 'equal?', then it's a simple curly-infix list.
501 Otherwise it's a mixed curly-infix list. */
502 SCM op = scm_cadr (ans);
503
504 /* Check to see if the elements at odd indices are 'equal?' */
505 for (tl = scm_cdddr (ans); ; tl = scm_cddr (tl))
506 {
507 if (scm_is_null (tl))
508 {
509 /* Convert simple curly-infix list to prefix:
510 {a <op> b <op> ...} => (<op> a b ...) */
511 tl = ans;
512 while (scm_is_pair (scm_cdr (tl)))
513 {
514 tmp = scm_cddr (tl);
515 SCM_SETCDR (tl, tmp);
516 tl = tmp;
517 }
518 ans = scm_cons (op, ans);
519 break;
520 }
521 else if (scm_is_false (scm_equal_p (op, scm_car (tl))))
522 {
523 /* Mixed curly-infix list: {e ...} => ($nfx$ e ...) */
524 ans = scm_cons (sym_nfx, ans);
525 break;
526 }
527 }
528 }
529 else
530 /* Mixed curly-infix (possibly improper) list:
531 {e . tail} => ($nfx$ e . tail) */
532 ans = scm_cons (sym_nfx, ans);
533 }
534
535 return maybe_annotate_source (ans, port, opts, line, column);
536 }
537 #undef FUNC_NAME
538
539
540 /* Read a hexadecimal number NDIGITS in length. Put its value into the variable
541 C. If TERMINATOR is non-null, terminate early if the TERMINATOR character is
542 found. */
543 #define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
544 do \
545 { \
546 scm_t_wchar a; \
547 size_t i = 0; \
548 c = 0; \
549 while (i < ndigits) \
550 { \
551 a = scm_getc_unlocked (port); \
552 if (a == EOF) \
553 goto str_eof; \
554 if (terminator \
555 && (a == (scm_t_wchar) terminator) \
556 && (i > 0)) \
557 break; \
558 if ('0' <= a && a <= '9') \
559 a -= '0'; \
560 else if ('A' <= a && a <= 'F') \
561 a = a - 'A' + 10; \
562 else if ('a' <= a && a <= 'f') \
563 a = a - 'a' + 10; \
564 else \
565 { \
566 c = a; \
567 goto bad_escaped; \
568 } \
569 c = c * 16 + a; \
570 i ++; \
571 } \
572 } while (0)
573
574 static void
575 skip_intraline_whitespace (SCM port)
576 {
577 scm_t_wchar c;
578
579 do
580 {
581 c = scm_getc_unlocked (port);
582 if (c == EOF)
583 return;
584 }
585 while (c == '\t' || uc_is_general_category (c, UC_SPACE_SEPARATOR));
586
587 scm_ungetc_unlocked (c, port);
588 }
589
590 static SCM
591 scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
592 #define FUNC_NAME "scm_lreadr"
593 {
594 /* For strings smaller than C_STR, this function creates only one Scheme
595 object (the string returned). */
596
597 SCM str = SCM_EOL;
598 size_t c_str_len = 0;
599 scm_t_wchar c, c_str[READER_STRING_BUFFER_SIZE];
600
601 /* Need to capture line and column numbers here. */
602 long line = SCM_LINUM (port);
603 int column = SCM_COL (port) - 1;
604
605 while ('"' != (c = scm_getc_unlocked (port)))
606 {
607 if (c == EOF)
608 {
609 str_eof:
610 scm_i_input_error (FUNC_NAME, port,
611 "end of file in string constant", SCM_EOL);
612 }
613
614 if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE)
615 {
616 str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
617 c_str_len = 0;
618 }
619
620 if (c == '\\')
621 {
622 switch (c = scm_getc_unlocked (port))
623 {
624 case EOF:
625 goto str_eof;
626 case '"':
627 case '\\':
628 break;
629 case '\n':
630 if (opts->hungry_eol_escapes_p)
631 skip_intraline_whitespace (port);
632 continue;
633 case '0':
634 c = '\0';
635 break;
636 case 'f':
637 c = '\f';
638 break;
639 case 'n':
640 c = '\n';
641 break;
642 case 'r':
643 c = '\r';
644 break;
645 case 't':
646 c = '\t';
647 break;
648 case 'a':
649 c = '\007';
650 break;
651 case 'v':
652 c = '\v';
653 break;
654 case 'b':
655 c = '\010';
656 break;
657 case 'x':
658 if (opts->r6rs_escapes_p)
659 SCM_READ_HEX_ESCAPE (10, ';');
660 else
661 SCM_READ_HEX_ESCAPE (2, '\0');
662 break;
663 case 'u':
664 if (!opts->r6rs_escapes_p)
665 {
666 SCM_READ_HEX_ESCAPE (4, '\0');
667 break;
668 }
669 case 'U':
670 if (!opts->r6rs_escapes_p)
671 {
672 SCM_READ_HEX_ESCAPE (6, '\0');
673 break;
674 }
675 default:
676 bad_escaped:
677 scm_i_input_error (FUNC_NAME, port,
678 "illegal character in escape sequence: ~S",
679 scm_list_1 (SCM_MAKE_CHAR (c)));
680 }
681 }
682
683 c_str[c_str_len++] = c;
684 }
685
686 if (scm_is_null (str))
687 /* Fast path: we got a string that fits in C_STR. */
688 str = scm_from_utf32_stringn (c_str, c_str_len);
689 else
690 {
691 if (c_str_len > 0)
692 str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
693
694 str = scm_string_concatenate_reverse (str, SCM_UNDEFINED, SCM_UNDEFINED);
695 }
696
697 return maybe_annotate_source (str, port, opts, line, column);
698 }
699 #undef FUNC_NAME
700
701
702 static SCM
703 scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
704 {
705 SCM result, str = SCM_EOL;
706 char local_buffer[READER_BUFFER_SIZE], *buffer;
707 size_t bytes_read;
708
709 /* Need to capture line and column numbers here. */
710 long line = SCM_LINUM (port);
711 int column = SCM_COL (port) - 1;
712
713 scm_ungetc_unlocked (chr, port);
714 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
715 &bytes_read);
716
717 str = scm_from_port_stringn (buffer, bytes_read, port);
718
719 result = scm_string_to_number (str, SCM_UNDEFINED);
720 if (scm_is_false (result))
721 {
722 /* Return a symbol instead of a number */
723 if (opts->case_insensitive_p)
724 str = scm_string_downcase_x (str);
725 result = scm_string_to_symbol (str);
726 }
727 else if (SCM_NIMP (result))
728 result = maybe_annotate_source (result, port, opts, line, column);
729
730 SCM_COL (port) += scm_i_string_length (str);
731 return result;
732 }
733
734 static SCM
735 scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
736 {
737 SCM result;
738 int ends_with_colon = 0;
739 size_t bytes_read;
740 int postfix = (opts->keyword_style == KEYWORD_STYLE_POSTFIX);
741 char local_buffer[READER_BUFFER_SIZE], *buffer;
742 SCM str;
743
744 scm_ungetc_unlocked (chr, port);
745 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
746 &bytes_read);
747 if (bytes_read > 0)
748 ends_with_colon = buffer[bytes_read - 1] == ':';
749
750 if (postfix && ends_with_colon && (bytes_read > 1))
751 {
752 str = scm_from_port_stringn (buffer, bytes_read - 1, port);
753
754 if (opts->case_insensitive_p)
755 str = scm_string_downcase_x (str);
756 result = scm_symbol_to_keyword (scm_string_to_symbol (str));
757 }
758 else
759 {
760 str = scm_from_port_stringn (buffer, bytes_read, port);
761
762 if (opts->case_insensitive_p)
763 str = scm_string_downcase_x (str);
764 result = scm_string_to_symbol (str);
765 }
766
767 SCM_COL (port) += scm_i_string_length (str);
768 return result;
769 }
770
771 static SCM
772 scm_read_number_and_radix (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
773 #define FUNC_NAME "scm_lreadr"
774 {
775 SCM result;
776 size_t read;
777 char local_buffer[READER_BUFFER_SIZE], *buffer;
778 unsigned int radix;
779 SCM str;
780
781 switch (chr)
782 {
783 case 'B':
784 case 'b':
785 radix = 2;
786 break;
787
788 case 'o':
789 case 'O':
790 radix = 8;
791 break;
792
793 case 'd':
794 case 'D':
795 radix = 10;
796 break;
797
798 case 'x':
799 case 'X':
800 radix = 16;
801 break;
802
803 default:
804 scm_ungetc_unlocked (chr, port);
805 scm_ungetc_unlocked ('#', port);
806 radix = 10;
807 }
808
809 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
810 &read);
811
812 str = scm_from_port_stringn (buffer, read, port);
813
814 result = scm_string_to_number (str, scm_from_uint (radix));
815
816 SCM_COL (port) += scm_i_string_length (str);
817
818 if (scm_is_true (result))
819 return result;
820
821 scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
822
823 return SCM_BOOL_F;
824 }
825 #undef FUNC_NAME
826
827 static SCM
828 scm_read_quote (int chr, SCM port, scm_t_read_opts *opts)
829 {
830 SCM p;
831 long line = SCM_LINUM (port);
832 int column = SCM_COL (port) - 1;
833
834 switch (chr)
835 {
836 case '`':
837 p = scm_sym_quasiquote;
838 break;
839
840 case '\'':
841 p = scm_sym_quote;
842 break;
843
844 case ',':
845 {
846 scm_t_wchar c;
847
848 c = scm_getc_unlocked (port);
849 if ('@' == c)
850 p = scm_sym_uq_splicing;
851 else
852 {
853 scm_ungetc_unlocked (c, port);
854 p = scm_sym_unquote;
855 }
856 break;
857 }
858
859 default:
860 fprintf (stderr, "%s: unhandled quote character (%i)\n",
861 "scm_read_quote", chr);
862 abort ();
863 }
864
865 p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
866 return maybe_annotate_source (p, port, opts, line, column);
867 }
868
869 SCM_SYMBOL (sym_syntax, "syntax");
870 SCM_SYMBOL (sym_quasisyntax, "quasisyntax");
871 SCM_SYMBOL (sym_unsyntax, "unsyntax");
872 SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing");
873
874 static SCM
875 scm_read_syntax (int chr, SCM port, scm_t_read_opts *opts)
876 {
877 SCM p;
878 long line = SCM_LINUM (port);
879 int column = SCM_COL (port) - 1;
880
881 switch (chr)
882 {
883 case '`':
884 p = sym_quasisyntax;
885 break;
886
887 case '\'':
888 p = sym_syntax;
889 break;
890
891 case ',':
892 {
893 int c;
894
895 c = scm_getc_unlocked (port);
896 if ('@' == c)
897 p = sym_unsyntax_splicing;
898 else
899 {
900 scm_ungetc_unlocked (c, port);
901 p = sym_unsyntax;
902 }
903 break;
904 }
905
906 default:
907 fprintf (stderr, "%s: unhandled syntax character (%i)\n",
908 "scm_read_syntax", chr);
909 abort ();
910 }
911
912 p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
913 return maybe_annotate_source (p, port, opts, line, column);
914 }
915
916 static SCM
917 scm_read_nil (int chr, SCM port, scm_t_read_opts *opts)
918 {
919 SCM id = scm_read_mixed_case_symbol (chr, port, opts);
920
921 if (!scm_is_eq (id, sym_nil))
922 scm_i_input_error ("scm_read_nil", port,
923 "unexpected input while reading #nil: ~a",
924 scm_list_1 (id));
925
926 return SCM_ELISP_NIL;
927 }
928
929 static SCM
930 scm_read_semicolon_comment (int chr, SCM port)
931 {
932 int c;
933
934 /* We use the get_byte here because there is no need to get the
935 locale correct with comment input. This presumes that newline
936 always represents itself no matter what the encoding is. */
937 for (c = scm_get_byte_or_eof_unlocked (port);
938 (c != EOF) && (c != '\n');
939 c = scm_get_byte_or_eof_unlocked (port));
940
941 return SCM_UNSPECIFIED;
942 }
943
944 \f
945 /* Sharp readers, i.e. readers called after a `#' sign has been read. */
946
947 static SCM
948 scm_read_boolean (int chr, SCM port)
949 {
950 switch (chr)
951 {
952 case 't':
953 case 'T':
954 return SCM_BOOL_T;
955
956 case 'f':
957 case 'F':
958 return SCM_BOOL_F;
959 }
960
961 return SCM_UNSPECIFIED;
962 }
963
964 static SCM
965 scm_read_character (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
966 #define FUNC_NAME "scm_lreadr"
967 {
968 char buffer[READER_CHAR_NAME_MAX_SIZE];
969 SCM charname;
970 size_t charname_len, bytes_read;
971 scm_t_wchar cp;
972 int overflow;
973 scm_t_port_internal *pti;
974
975 overflow = read_token (port, opts, buffer, READER_CHAR_NAME_MAX_SIZE,
976 &bytes_read);
977 if (overflow)
978 scm_i_input_error (FUNC_NAME, port, "character name too long", SCM_EOL);
979
980 if (bytes_read == 0)
981 {
982 chr = scm_getc_unlocked (port);
983 if (chr == EOF)
984 scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
985 "while reading character", SCM_EOL);
986
987 /* CHR must be a token delimiter, like a whitespace. */
988 return (SCM_MAKE_CHAR (chr));
989 }
990
991 pti = SCM_PORT_GET_INTERNAL (port);
992
993 /* Simple ASCII characters can be processed immediately. Also, simple
994 ISO-8859-1 characters can be processed immediately if the encoding for this
995 port is ISO-8859-1. */
996 if (bytes_read == 1 &&
997 ((unsigned char) buffer[0] <= 127
998 || pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1))
999 {
1000 SCM_COL (port) += 1;
1001 return SCM_MAKE_CHAR (buffer[0]);
1002 }
1003
1004 /* Otherwise, convert the buffer into a proper scheme string for
1005 processing. */
1006 charname = scm_from_port_stringn (buffer, bytes_read, port);
1007 charname_len = scm_i_string_length (charname);
1008 SCM_COL (port) += charname_len;
1009 cp = scm_i_string_ref (charname, 0);
1010 if (charname_len == 1)
1011 return SCM_MAKE_CHAR (cp);
1012
1013 /* Ignore dotted circles, which may be used to keep combining characters from
1014 combining with the backslash in #\charname. */
1015 if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2)
1016 return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1));
1017
1018 if (cp >= '0' && cp < '8')
1019 {
1020 /* Dirk:FIXME:: This type of character syntax is not R5RS
1021 * compliant. Further, it should be verified that the constant
1022 * does only consist of octal digits. */
1023 SCM p = scm_string_to_number (charname, scm_from_uint (8));
1024 if (SCM_I_INUMP (p))
1025 {
1026 scm_t_wchar c = scm_to_uint32 (p);
1027 if (SCM_IS_UNICODE_CHAR (c))
1028 return SCM_MAKE_CHAR (c);
1029 else
1030 scm_i_input_error (FUNC_NAME, port,
1031 "out-of-range octal character escape: ~a",
1032 scm_list_1 (charname));
1033 }
1034 }
1035
1036 if (cp == 'x' && (charname_len > 1))
1037 {
1038 SCM p;
1039
1040 /* Convert from hex, skipping the initial 'x' character in CHARNAME */
1041 p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
1042 scm_from_uint (16));
1043 if (SCM_I_INUMP (p))
1044 {
1045 scm_t_wchar c = scm_to_uint32 (p);
1046 if (SCM_IS_UNICODE_CHAR (c))
1047 return SCM_MAKE_CHAR (c);
1048 else
1049 scm_i_input_error (FUNC_NAME, port,
1050 "out-of-range hex character escape: ~a",
1051 scm_list_1 (charname));
1052 }
1053 }
1054
1055 /* The names of characters should never have non-Latin1
1056 characters. */
1057 if (scm_i_is_narrow_string (charname)
1058 || scm_i_try_narrow_string (charname))
1059 { SCM ch = scm_i_charname_to_char (scm_i_string_chars (charname),
1060 charname_len);
1061 if (scm_is_true (ch))
1062 return ch;
1063 }
1064
1065 scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
1066 scm_list_1 (charname));
1067
1068 return SCM_UNSPECIFIED;
1069 }
1070 #undef FUNC_NAME
1071
1072 static SCM
1073 scm_read_keyword (int chr, SCM port, scm_t_read_opts *opts)
1074 {
1075 SCM symbol;
1076
1077 /* Read the symbol that comprises the keyword. Doing this instead of
1078 invoking a specific symbol reader function allows `scm_read_keyword ()'
1079 to adapt to the delimiters currently valid of symbols.
1080
1081 XXX: This implementation allows sloppy syntaxes like `#: key'. */
1082 symbol = scm_read_expression (port, opts);
1083 if (!scm_is_symbol (symbol))
1084 scm_i_input_error ("scm_read_keyword", port,
1085 "keyword prefix `~a' not followed by a symbol: ~s",
1086 scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
1087
1088 return (scm_symbol_to_keyword (symbol));
1089 }
1090
1091 static SCM
1092 scm_read_vector (int chr, SCM port, scm_t_read_opts *opts,
1093 long line, int column)
1094 {
1095 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
1096 guarantee that it's going to do what we want. After all, this is an
1097 implementation detail of `scm_read_vector ()', not a desirable
1098 property. */
1099 return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port, opts)),
1100 port, opts, line, column);
1101 }
1102
1103 /* Helper used by scm_read_array */
1104 static int
1105 read_decimal_integer (SCM port, int c, ssize_t *resp)
1106 {
1107 ssize_t sign = 1;
1108 ssize_t res = 0;
1109 int got_it = 0;
1110
1111 if (c == '-')
1112 {
1113 sign = -1;
1114 c = scm_getc_unlocked (port);
1115 }
1116
1117 while ('0' <= c && c <= '9')
1118 {
1119 res = 10*res + c-'0';
1120 got_it = 1;
1121 c = scm_getc_unlocked (port);
1122 }
1123
1124 if (got_it)
1125 *resp = sign * res;
1126 return c;
1127 }
1128
1129 /* Read an array. This function can also read vectors and uniform
1130 vectors. Also, the conflict between '#f' and '#f32' and '#f64' is
1131 handled here.
1132
1133 C is the first character read after the '#'. */
1134 static SCM
1135 scm_read_array (int c, SCM port, scm_t_read_opts *opts, long line, int column)
1136 {
1137 ssize_t rank;
1138 scm_t_wchar tag_buf[8];
1139 int tag_len;
1140
1141 SCM tag, shape = SCM_BOOL_F, elements, array;
1142
1143 /* XXX - shortcut for ordinary vectors. Shouldn't be necessary but
1144 the array code can not deal with zero-length dimensions yet, and
1145 we want to allow zero-length vectors, of course. */
1146 if (c == '(')
1147 return scm_read_vector (c, port, opts, line, column);
1148
1149 /* Disambiguate between '#f' and uniform floating point vectors. */
1150 if (c == 'f')
1151 {
1152 c = scm_getc_unlocked (port);
1153 if (c != '3' && c != '6')
1154 {
1155 if (c != EOF)
1156 scm_ungetc_unlocked (c, port);
1157 return SCM_BOOL_F;
1158 }
1159 rank = 1;
1160 tag_buf[0] = 'f';
1161 tag_len = 1;
1162 goto continue_reading_tag;
1163 }
1164
1165 /* Read rank. */
1166 rank = 1;
1167 c = read_decimal_integer (port, c, &rank);
1168 if (rank < 0)
1169 scm_i_input_error (NULL, port, "array rank must be non-negative",
1170 SCM_EOL);
1171
1172 /* Read tag. */
1173 tag_len = 0;
1174 continue_reading_tag:
1175 while (c != EOF && c != '(' && c != '@' && c != ':'
1176 && tag_len < sizeof tag_buf / sizeof tag_buf[0])
1177 {
1178 tag_buf[tag_len++] = c;
1179 c = scm_getc_unlocked (port);
1180 }
1181 if (tag_len == 0)
1182 tag = SCM_BOOL_T;
1183 else
1184 {
1185 tag = scm_string_to_symbol (scm_from_utf32_stringn (tag_buf, tag_len));
1186 if (tag_len == sizeof tag_buf / sizeof tag_buf[0])
1187 scm_i_input_error (NULL, port, "invalid array tag, starting with: ~a",
1188 scm_list_1 (tag));
1189 }
1190
1191 /* Read shape. */
1192 if (c == '@' || c == ':')
1193 {
1194 shape = SCM_EOL;
1195
1196 do
1197 {
1198 ssize_t lbnd = 0, len = 0;
1199 SCM s;
1200
1201 if (c == '@')
1202 {
1203 c = scm_getc_unlocked (port);
1204 c = read_decimal_integer (port, c, &lbnd);
1205 }
1206
1207 s = scm_from_ssize_t (lbnd);
1208
1209 if (c == ':')
1210 {
1211 c = scm_getc_unlocked (port);
1212 c = read_decimal_integer (port, c, &len);
1213 if (len < 0)
1214 scm_i_input_error (NULL, port,
1215 "array length must be non-negative",
1216 SCM_EOL);
1217
1218 s = scm_list_2 (s, scm_from_ssize_t (lbnd+len-1));
1219 }
1220
1221 shape = scm_cons (s, shape);
1222 } while (c == '@' || c == ':');
1223
1224 shape = scm_reverse_x (shape, SCM_EOL);
1225 }
1226
1227 /* Read nested lists of elements. */
1228 if (c != '(')
1229 scm_i_input_error (NULL, port,
1230 "missing '(' in vector or array literal",
1231 SCM_EOL);
1232 elements = scm_read_sexp (c, port, opts);
1233
1234 if (scm_is_false (shape))
1235 shape = scm_from_ssize_t (rank);
1236 else if (scm_ilength (shape) != rank)
1237 scm_i_input_error
1238 (NULL, port,
1239 "the number of shape specifications must match the array rank",
1240 SCM_EOL);
1241
1242 /* Handle special print syntax of rank zero arrays; see
1243 scm_i_print_array for a rationale. */
1244 if (rank == 0)
1245 {
1246 if (!scm_is_pair (elements))
1247 scm_i_input_error (NULL, port,
1248 "too few elements in array literal, need 1",
1249 SCM_EOL);
1250 if (!scm_is_null (SCM_CDR (elements)))
1251 scm_i_input_error (NULL, port,
1252 "too many elements in array literal, want 1",
1253 SCM_EOL);
1254 elements = SCM_CAR (elements);
1255 }
1256
1257 /* Construct array, annotate with source location, and return. */
1258 array = scm_list_to_typed_array (tag, shape, elements);
1259 return maybe_annotate_source (array, port, opts, line, column);
1260 }
1261
1262 static SCM
1263 scm_read_srfi4_vector (int chr, SCM port, scm_t_read_opts *opts,
1264 long line, int column)
1265 {
1266 return scm_read_array (chr, port, opts, line, column);
1267 }
1268
1269 static SCM
1270 scm_read_bytevector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1271 long line, int column)
1272 {
1273 chr = scm_getc_unlocked (port);
1274 if (chr != 'u')
1275 goto syntax;
1276
1277 chr = scm_getc_unlocked (port);
1278 if (chr != '8')
1279 goto syntax;
1280
1281 chr = scm_getc_unlocked (port);
1282 if (chr != '(')
1283 goto syntax;
1284
1285 return maybe_annotate_source
1286 (scm_u8_list_to_bytevector (scm_read_sexp (chr, port, opts)),
1287 port, opts, line, column);
1288
1289 syntax:
1290 scm_i_input_error ("read_bytevector", port,
1291 "invalid bytevector prefix",
1292 SCM_MAKE_CHAR (chr));
1293 return SCM_UNSPECIFIED;
1294 }
1295
1296 static SCM
1297 scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1298 long line, int column)
1299 {
1300 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
1301 terribly inefficient but who cares? */
1302 SCM s_bits = SCM_EOL;
1303
1304 for (chr = scm_getc_unlocked (port);
1305 (chr != EOF) && ((chr == '0') || (chr == '1'));
1306 chr = scm_getc_unlocked (port))
1307 {
1308 s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
1309 }
1310
1311 if (chr != EOF)
1312 scm_ungetc_unlocked (chr, port);
1313
1314 return maybe_annotate_source
1315 (scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)),
1316 port, opts, line, column);
1317 }
1318
1319 static SCM
1320 scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
1321 {
1322 int bang_seen = 0;
1323
1324 for (;;)
1325 {
1326 int c = scm_getc_unlocked (port);
1327
1328 if (c == EOF)
1329 scm_i_input_error ("skip_block_comment", port,
1330 "unterminated `#! ... !#' comment", SCM_EOL);
1331
1332 if (c == '!')
1333 bang_seen = 1;
1334 else if (c == '#' && bang_seen)
1335 break;
1336 else
1337 bang_seen = 0;
1338 }
1339
1340 return SCM_UNSPECIFIED;
1341 }
1342
1343 static void set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts,
1344 int value);
1345 static void set_port_square_brackets_p (SCM port, scm_t_read_opts *opts,
1346 int value);
1347 static void set_port_curly_infix_p (SCM port, scm_t_read_opts *opts,
1348 int value);
1349
1350 static SCM
1351 scm_read_shebang (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
1352 {
1353 char name[READER_DIRECTIVE_NAME_MAX_SIZE + 1];
1354 int c;
1355 int i = 0;
1356
1357 while (i <= READER_DIRECTIVE_NAME_MAX_SIZE)
1358 {
1359 c = scm_getc_unlocked (port);
1360 if (c == EOF)
1361 scm_i_input_error ("skip_block_comment", port,
1362 "unterminated `#! ... !#' comment", SCM_EOL);
1363 else if (('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || c == '-')
1364 name[i++] = c;
1365 else if (CHAR_IS_DELIMITER (c))
1366 {
1367 scm_ungetc_unlocked (c, port);
1368 name[i] = '\0';
1369 if (0 == strcmp ("r6rs", name))
1370 ; /* Silently ignore */
1371 else if (0 == strcmp ("fold-case", name))
1372 set_port_case_insensitive_p (port, opts, 1);
1373 else if (0 == strcmp ("no-fold-case", name))
1374 set_port_case_insensitive_p (port, opts, 0);
1375 else if (0 == strcmp ("curly-infix", name))
1376 set_port_curly_infix_p (port, opts, 1);
1377 else if (0 == strcmp ("curly-infix-and-bracket-lists", name))
1378 {
1379 set_port_curly_infix_p (port, opts, 1);
1380 set_port_square_brackets_p (port, opts, 0);
1381 }
1382 else
1383 break;
1384
1385 return SCM_UNSPECIFIED;
1386 }
1387 else
1388 {
1389 scm_ungetc_unlocked (c, port);
1390 break;
1391 }
1392 }
1393 while (i > 0)
1394 scm_ungetc_unlocked (name[--i], port);
1395 return scm_read_scsh_block_comment (chr, port);
1396 }
1397
1398 static SCM
1399 scm_read_r6rs_block_comment (scm_t_wchar chr, SCM port)
1400 {
1401 /* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
1402 nested. So care must be taken. */
1403 int nesting_level = 1;
1404
1405 int a = scm_getc_unlocked (port);
1406
1407 if (a == EOF)
1408 scm_i_input_error ("scm_read_r6rs_block_comment", port,
1409 "unterminated `#| ... |#' comment", SCM_EOL);
1410
1411 while (nesting_level > 0)
1412 {
1413 int b = scm_getc_unlocked (port);
1414
1415 if (b == EOF)
1416 scm_i_input_error ("scm_read_r6rs_block_comment", port,
1417 "unterminated `#| ... |#' comment", SCM_EOL);
1418
1419 if (a == '|' && b == '#')
1420 {
1421 nesting_level--;
1422 b = EOF;
1423 }
1424 else if (a == '#' && b == '|')
1425 {
1426 nesting_level++;
1427 b = EOF;
1428 }
1429
1430 a = b;
1431 }
1432
1433 return SCM_UNSPECIFIED;
1434 }
1435
1436 static SCM
1437 scm_read_commented_expression (scm_t_wchar chr, SCM port,
1438 scm_t_read_opts *opts)
1439 {
1440 scm_t_wchar c;
1441
1442 c = flush_ws (port, opts, (char *) NULL);
1443 if (EOF == c)
1444 scm_i_input_error ("read_commented_expression", port,
1445 "no expression after #; comment", SCM_EOL);
1446 scm_ungetc_unlocked (c, port);
1447 scm_read_expression (port, opts);
1448 return SCM_UNSPECIFIED;
1449 }
1450
1451 static SCM
1452 scm_read_extended_symbol (scm_t_wchar chr, SCM port)
1453 {
1454 /* Guile's extended symbol read syntax looks like this:
1455
1456 #{This is all a symbol name}#
1457
1458 So here, CHR is expected to be `{'. */
1459 int saw_brace = 0;
1460 size_t len = 0;
1461 SCM buf = scm_i_make_string (1024, NULL, 0);
1462
1463 buf = scm_i_string_start_writing (buf);
1464
1465 while ((chr = scm_getc_unlocked (port)) != EOF)
1466 {
1467 if (saw_brace)
1468 {
1469 if (chr == '#')
1470 {
1471 break;
1472 }
1473 else
1474 {
1475 saw_brace = 0;
1476 scm_i_string_set_x (buf, len++, '}');
1477 }
1478 }
1479
1480 if (chr == '}')
1481 saw_brace = 1;
1482 else if (chr == '\\')
1483 {
1484 /* It used to be that print.c would print extended-read-syntax
1485 symbols with backslashes before "non-standard" chars, but
1486 this routine wouldn't do anything with those escapes.
1487 Bummer. What we've done is to change print.c to output
1488 R6RS hex escapes for those characters, relying on the fact
1489 that the extended read syntax would never put a `\' before
1490 an `x'. For now, we just ignore other instances of
1491 backslash in the string. */
1492 switch ((chr = scm_getc_unlocked (port)))
1493 {
1494 case EOF:
1495 goto done;
1496 case 'x':
1497 {
1498 scm_t_wchar c;
1499
1500 SCM_READ_HEX_ESCAPE (10, ';');
1501 scm_i_string_set_x (buf, len++, c);
1502 break;
1503
1504 str_eof:
1505 chr = EOF;
1506 goto done;
1507
1508 bad_escaped:
1509 scm_i_string_stop_writing ();
1510 scm_i_input_error ("scm_read_extended_symbol", port,
1511 "illegal character in escape sequence: ~S",
1512 scm_list_1 (SCM_MAKE_CHAR (c)));
1513 break;
1514 }
1515 default:
1516 scm_i_string_set_x (buf, len++, chr);
1517 break;
1518 }
1519 }
1520 else
1521 scm_i_string_set_x (buf, len++, chr);
1522
1523 if (len >= scm_i_string_length (buf) - 2)
1524 {
1525 SCM addy;
1526
1527 scm_i_string_stop_writing ();
1528 addy = scm_i_make_string (1024, NULL, 0);
1529 buf = scm_string_append (scm_list_2 (buf, addy));
1530 len = 0;
1531 buf = scm_i_string_start_writing (buf);
1532 }
1533 }
1534
1535 done:
1536 scm_i_string_stop_writing ();
1537 if (chr == EOF)
1538 scm_i_input_error ("scm_read_extended_symbol", port,
1539 "end of file while reading symbol", SCM_EOL);
1540
1541 return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
1542 }
1543
1544
1545 \f
1546 /* Top-level token readers, i.e., dispatchers. */
1547
1548 static SCM
1549 scm_read_sharp_extension (int chr, SCM port, scm_t_read_opts *opts)
1550 {
1551 SCM proc;
1552
1553 proc = scm_get_hash_procedure (chr);
1554 if (scm_is_true (scm_procedure_p (proc)))
1555 {
1556 long line = SCM_LINUM (port);
1557 int column = SCM_COL (port) - 2;
1558 SCM got;
1559
1560 got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
1561
1562 if (opts->record_positions_p && SCM_NIMP (got)
1563 && !scm_i_has_source_properties (got))
1564 scm_i_set_source_properties_x (got, line, column, SCM_FILENAME (port));
1565
1566 return got;
1567 }
1568
1569 return SCM_UNSPECIFIED;
1570 }
1571
1572 /* The reader for the sharp `#' character. It basically dispatches reads
1573 among the above token readers. */
1574 static SCM
1575 scm_read_sharp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1576 long line, int column)
1577 #define FUNC_NAME "scm_lreadr"
1578 {
1579 SCM result;
1580
1581 chr = scm_getc_unlocked (port);
1582
1583 result = scm_read_sharp_extension (chr, port, opts);
1584 if (!scm_is_eq (result, SCM_UNSPECIFIED))
1585 return result;
1586
1587 switch (chr)
1588 {
1589 case '\\':
1590 return (scm_read_character (chr, port, opts));
1591 case '(':
1592 return (scm_read_vector (chr, port, opts, line, column));
1593 case 's':
1594 case 'u':
1595 case 'f':
1596 case 'c':
1597 /* This one may return either a boolean or an SRFI-4 vector. */
1598 return (scm_read_srfi4_vector (chr, port, opts, line, column));
1599 case 'v':
1600 return (scm_read_bytevector (chr, port, opts, line, column));
1601 case '*':
1602 return (scm_read_guile_bit_vector (chr, port, opts, line, column));
1603 case 't':
1604 case 'T':
1605 case 'F':
1606 return (scm_read_boolean (chr, port));
1607 case ':':
1608 return (scm_read_keyword (chr, port, opts));
1609 case '0': case '1': case '2': case '3': case '4':
1610 case '5': case '6': case '7': case '8': case '9':
1611 case '@':
1612 return (scm_read_array (chr, port, opts, line, column));
1613
1614 case 'i':
1615 case 'e':
1616 case 'b':
1617 case 'B':
1618 case 'o':
1619 case 'O':
1620 case 'd':
1621 case 'D':
1622 case 'x':
1623 case 'X':
1624 case 'I':
1625 case 'E':
1626 return (scm_read_number_and_radix (chr, port, opts));
1627 case '{':
1628 return (scm_read_extended_symbol (chr, port));
1629 case '!':
1630 return (scm_read_shebang (chr, port, opts));
1631 case ';':
1632 return (scm_read_commented_expression (chr, port, opts));
1633 case '`':
1634 case '\'':
1635 case ',':
1636 return (scm_read_syntax (chr, port, opts));
1637 case 'n':
1638 return (scm_read_nil (chr, port, opts));
1639 default:
1640 result = scm_read_sharp_extension (chr, port, opts);
1641 if (scm_is_eq (result, SCM_UNSPECIFIED))
1642 {
1643 /* To remain compatible with 1.8 and earlier, the following
1644 characters have lower precedence than `read-hash-extend'
1645 characters. */
1646 switch (chr)
1647 {
1648 case '|':
1649 return scm_read_r6rs_block_comment (chr, port);
1650 default:
1651 scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
1652 scm_list_1 (SCM_MAKE_CHAR (chr)));
1653 }
1654 }
1655 else
1656 return result;
1657 }
1658
1659 return SCM_UNSPECIFIED;
1660 }
1661 #undef FUNC_NAME
1662
1663 static SCM
1664 read_inner_expression (SCM port, scm_t_read_opts *opts)
1665 #define FUNC_NAME "read_inner_expression"
1666 {
1667 while (1)
1668 {
1669 scm_t_wchar chr;
1670
1671 chr = scm_getc_unlocked (port);
1672
1673 switch (chr)
1674 {
1675 case SCM_WHITE_SPACES:
1676 case SCM_LINE_INCREMENTORS:
1677 break;
1678 case ';':
1679 (void) scm_read_semicolon_comment (chr, port);
1680 break;
1681 case '{':
1682 if (opts->curly_infix_p)
1683 {
1684 if (opts->neoteric_p)
1685 return scm_read_sexp (chr, port, opts);
1686 else
1687 {
1688 SCM expr;
1689
1690 /* Enable neoteric expressions within curly braces */
1691 opts->neoteric_p = 1;
1692 expr = scm_read_sexp (chr, port, opts);
1693 opts->neoteric_p = 0;
1694 return expr;
1695 }
1696 }
1697 else
1698 return scm_read_mixed_case_symbol (chr, port, opts);
1699 case '[':
1700 if (opts->square_brackets_p)
1701 return scm_read_sexp (chr, port, opts);
1702 else if (opts->curly_infix_p)
1703 {
1704 /* The syntax of neoteric expressions requires that '[' be
1705 a delimiter when curly-infix is enabled, so it cannot
1706 be part of an unescaped symbol. We might as well do
1707 something useful with it, so we adopt Kawa's convention:
1708 [...] => ($bracket-list$ ...) */
1709 long line = SCM_LINUM (port);
1710 int column = SCM_COL (port) - 1;
1711 return maybe_annotate_source
1712 (scm_cons (sym_bracket_list, scm_read_sexp (chr, port, opts)),
1713 port, opts, line, column);
1714 }
1715 else
1716 return scm_read_mixed_case_symbol (chr, port, opts);
1717 case '(':
1718 return (scm_read_sexp (chr, port, opts));
1719 case '"':
1720 return (scm_read_string (chr, port, opts));
1721 case '\'':
1722 case '`':
1723 case ',':
1724 return (scm_read_quote (chr, port, opts));
1725 case '#':
1726 {
1727 long line = SCM_LINUM (port);
1728 int column = SCM_COL (port) - 1;
1729 SCM result = scm_read_sharp (chr, port, opts, line, column);
1730 if (scm_is_eq (result, SCM_UNSPECIFIED))
1731 /* We read a comment or some such. */
1732 break;
1733 else
1734 return result;
1735 }
1736 case ')':
1737 scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
1738 break;
1739 case '}':
1740 if (opts->curly_infix_p)
1741 scm_i_input_error (FUNC_NAME, port, "unexpected \"}\"", SCM_EOL);
1742 else
1743 return scm_read_mixed_case_symbol (chr, port, opts);
1744 case ']':
1745 if (opts->square_brackets_p)
1746 scm_i_input_error (FUNC_NAME, port, "unexpected \"]\"", SCM_EOL);
1747 /* otherwise fall through */
1748 case EOF:
1749 return SCM_EOF_VAL;
1750 case ':':
1751 if (opts->keyword_style == KEYWORD_STYLE_PREFIX)
1752 return scm_symbol_to_keyword (scm_read_expression (port, opts));
1753 /* Fall through. */
1754
1755 default:
1756 {
1757 if (((chr >= '0') && (chr <= '9'))
1758 || (strchr ("+-.", chr)))
1759 return (scm_read_number (chr, port, opts));
1760 else
1761 return (scm_read_mixed_case_symbol (chr, port, opts));
1762 }
1763 }
1764 }
1765 }
1766 #undef FUNC_NAME
1767
1768 static SCM
1769 scm_read_expression (SCM port, scm_t_read_opts *opts)
1770 #define FUNC_NAME "scm_read_expression"
1771 {
1772 if (!opts->neoteric_p)
1773 return read_inner_expression (port, opts);
1774 else
1775 {
1776 long line = 0;
1777 int column = 0;
1778 SCM expr;
1779
1780 if (opts->record_positions_p)
1781 {
1782 /* We need to get the position of the first non-whitespace
1783 character in order to correctly annotate neoteric
1784 expressions. For example, for the expression 'f(x)', the
1785 first call to 'read_inner_expression' reads the 'f' (which
1786 cannot be annotated), and then we later read the '(x)' and
1787 use it to construct the new list (f x). */
1788 int c = flush_ws (port, opts, (char *) NULL);
1789 if (c == EOF)
1790 return SCM_EOF_VAL;
1791 scm_ungetc_unlocked (c, port);
1792 line = SCM_LINUM (port);
1793 column = SCM_COL (port);
1794 }
1795
1796 expr = read_inner_expression (port, opts);
1797
1798 /* 'expr' is the first component of the neoteric expression. Now
1799 we loop, and as long as the next character is '(', '[', or '{',
1800 (without any intervening whitespace), we use it to construct a
1801 new expression. For example, f{n - 1}(x) => ((f (- n 1)) x). */
1802 for (;;)
1803 {
1804 int chr = scm_getc_unlocked (port);
1805
1806 if (chr == '(')
1807 /* e(...) => (e ...) */
1808 expr = scm_cons (expr, scm_read_sexp (chr, port, opts));
1809 else if (chr == '[')
1810 /* e[...] => ($bracket-apply$ e ...) */
1811 expr = scm_cons (sym_bracket_apply,
1812 scm_cons (expr,
1813 scm_read_sexp (chr, port, opts)));
1814 else if (chr == '{')
1815 {
1816 SCM arg = scm_read_sexp (chr, port, opts);
1817
1818 if (scm_is_null (arg))
1819 expr = scm_list_1 (expr); /* e{} => (e) */
1820 else
1821 expr = scm_list_2 (expr, arg); /* e{...} => (e {...}) */
1822 }
1823 else
1824 {
1825 if (chr != EOF)
1826 scm_ungetc_unlocked (chr, port);
1827 break;
1828 }
1829 maybe_annotate_source (expr, port, opts, line, column);
1830 }
1831 return expr;
1832 }
1833 }
1834 #undef FUNC_NAME
1835
1836 \f
1837 /* Actual reader. */
1838
1839 static void init_read_options (SCM port, scm_t_read_opts *opts);
1840
1841 SCM_DEFINE (scm_read, "read", 0, 1, 0,
1842 (SCM port),
1843 "Read an s-expression from the input port @var{port}, or from\n"
1844 "the current input port if @var{port} is not specified.\n"
1845 "Any whitespace before the next token is discarded.")
1846 #define FUNC_NAME s_scm_read
1847 {
1848 scm_t_read_opts opts;
1849 int c;
1850
1851 if (SCM_UNBNDP (port))
1852 port = scm_current_input_port ();
1853 SCM_VALIDATE_OPINPORT (1, port);
1854
1855 init_read_options (port, &opts);
1856
1857 c = flush_ws (port, &opts, (char *) NULL);
1858 if (EOF == c)
1859 return SCM_EOF_VAL;
1860 scm_ungetc_unlocked (c, port);
1861
1862 return (scm_read_expression (port, &opts));
1863 }
1864 #undef FUNC_NAME
1865
1866
1867 \f
1868
1869 /* Manipulate the read-hash-procedures alist. This could be written in
1870 Scheme, but maybe it will also be used by C code during initialisation. */
1871 SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
1872 (SCM chr, SCM proc),
1873 "Install the procedure @var{proc} for reading expressions\n"
1874 "starting with the character sequence @code{#} and @var{chr}.\n"
1875 "@var{proc} will be called with two arguments: the character\n"
1876 "@var{chr} and the port to read further data from. The object\n"
1877 "returned will be the return value of @code{read}. \n"
1878 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1879 )
1880 #define FUNC_NAME s_scm_read_hash_extend
1881 {
1882 SCM this;
1883 SCM prev;
1884
1885 SCM_VALIDATE_CHAR (1, chr);
1886 SCM_ASSERT (scm_is_false (proc)
1887 || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
1888 proc, SCM_ARG2, FUNC_NAME);
1889
1890 /* Check if chr is already in the alist. */
1891 this = scm_i_read_hash_procedures_ref ();
1892 prev = SCM_BOOL_F;
1893 while (1)
1894 {
1895 if (scm_is_null (this))
1896 {
1897 /* not found, so add it to the beginning. */
1898 if (scm_is_true (proc))
1899 {
1900 SCM new = scm_cons (scm_cons (chr, proc),
1901 scm_i_read_hash_procedures_ref ());
1902 scm_i_read_hash_procedures_set_x (new);
1903 }
1904 break;
1905 }
1906 if (scm_is_eq (chr, SCM_CAAR (this)))
1907 {
1908 /* already in the alist. */
1909 if (scm_is_false (proc))
1910 {
1911 /* remove it. */
1912 if (scm_is_false (prev))
1913 {
1914 SCM rest = SCM_CDR (scm_i_read_hash_procedures_ref ());
1915 scm_i_read_hash_procedures_set_x (rest);
1916 }
1917 else
1918 scm_set_cdr_x (prev, SCM_CDR (this));
1919 }
1920 else
1921 {
1922 /* replace it. */
1923 scm_set_cdr_x (SCM_CAR (this), proc);
1924 }
1925 break;
1926 }
1927 prev = this;
1928 this = SCM_CDR (this);
1929 }
1930
1931 return SCM_UNSPECIFIED;
1932 }
1933 #undef FUNC_NAME
1934
1935 /* Recover the read-hash procedure corresponding to char c. */
1936 static SCM
1937 scm_get_hash_procedure (int c)
1938 {
1939 SCM rest = scm_i_read_hash_procedures_ref ();
1940
1941 while (1)
1942 {
1943 if (scm_is_null (rest))
1944 return SCM_BOOL_F;
1945
1946 if (SCM_CHAR (SCM_CAAR (rest)) == c)
1947 return SCM_CDAR (rest);
1948
1949 rest = SCM_CDR (rest);
1950 }
1951 }
1952
1953 #define SCM_ENCODING_SEARCH_SIZE (500)
1954
1955 static int
1956 is_encoding_char (char c)
1957 {
1958 if (c >= 'a' && c <= 'z') return 1;
1959 if (c >= 'A' && c <= 'Z') return 1;
1960 if (c >= '0' && c <= '9') return 1;
1961 return strchr ("_-.:/,+=()", c) != NULL;
1962 }
1963
1964 /* Search the first few hundred characters of a file for an Emacs-like coding
1965 declaration. Returns either NULL or a string whose storage has been
1966 allocated with `scm_gc_malloc ()'. */
1967 char *
1968 scm_i_scan_for_encoding (SCM port)
1969 {
1970 scm_t_port *pt;
1971 char header[SCM_ENCODING_SEARCH_SIZE+1];
1972 size_t bytes_read, encoding_length, i;
1973 char *encoding = NULL;
1974 char *pos, *encoding_start;
1975 int in_comment;
1976
1977 pt = SCM_PTAB_ENTRY (port);
1978
1979 if (pt->rw_active == SCM_PORT_WRITE)
1980 scm_flush_unlocked (port);
1981
1982 if (pt->rw_random)
1983 pt->rw_active = SCM_PORT_READ;
1984
1985 if (pt->read_pos == pt->read_end)
1986 {
1987 /* We can use the read buffer, and thus avoid a seek. */
1988 if (scm_fill_input_unlocked (port) == EOF)
1989 return NULL;
1990
1991 bytes_read = pt->read_end - pt->read_pos;
1992 if (bytes_read > SCM_ENCODING_SEARCH_SIZE)
1993 bytes_read = SCM_ENCODING_SEARCH_SIZE;
1994
1995 if (bytes_read <= 1)
1996 /* An unbuffered port -- don't scan. */
1997 return NULL;
1998
1999 memcpy (header, pt->read_pos, bytes_read);
2000 header[bytes_read] = '\0';
2001 }
2002 else
2003 {
2004 /* Try to read some bytes and then seek back. Not all ports
2005 support seeking back; and indeed some file ports (like
2006 /dev/urandom) will succeed on an lseek (fd, 0, SEEK_CUR)---the
2007 check performed by SCM_FPORT_FDES---but fail to seek
2008 backwards. Hence this block comes second. We prefer to use
2009 the read buffer in-place. */
2010 if (SCM_FPORTP (port) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port)))
2011 return NULL;
2012
2013 bytes_read = scm_c_read_unlocked (port, header, SCM_ENCODING_SEARCH_SIZE);
2014 header[bytes_read] = '\0';
2015 scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
2016 }
2017
2018 /* search past "coding[:=]" */
2019 pos = header;
2020 while (1)
2021 {
2022 if ((pos = strstr(pos, "coding")) == NULL)
2023 return NULL;
2024
2025 pos += strlen("coding");
2026 if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
2027 (*pos == ':' || *pos == '='))
2028 {
2029 pos ++;
2030 break;
2031 }
2032 }
2033
2034 /* skip spaces */
2035 while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
2036 (*pos == ' ' || *pos == '\t'))
2037 pos ++;
2038
2039 /* grab the next token */
2040 encoding_start = pos;
2041 i = 0;
2042 while (encoding_start + i - header <= SCM_ENCODING_SEARCH_SIZE
2043 && encoding_start + i - header < bytes_read
2044 && is_encoding_char (encoding_start[i]))
2045 i++;
2046
2047 encoding_length = i;
2048 if (encoding_length == 0)
2049 return NULL;
2050
2051 encoding = scm_gc_strndup (encoding_start, encoding_length, "encoding");
2052
2053 /* push backwards to make sure we were in a comment */
2054 in_comment = 0;
2055 pos = encoding_start;
2056 while (pos >= header)
2057 {
2058 if (*pos == ';')
2059 {
2060 in_comment = 1;
2061 break;
2062 }
2063 else if (*pos == '\n' || pos == header)
2064 {
2065 /* This wasn't in a semicolon comment. Check for a
2066 hash-bang comment. */
2067 char *beg = strstr (header, "#!");
2068 char *end = strstr (header, "!#");
2069 if (beg < encoding_start && encoding_start + encoding_length <= end)
2070 in_comment = 1;
2071 break;
2072 }
2073 else
2074 {
2075 pos --;
2076 continue;
2077 }
2078 }
2079 if (!in_comment)
2080 /* This wasn't in a comment */
2081 return NULL;
2082
2083 return encoding;
2084 }
2085
2086 SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
2087 (SCM port),
2088 "Scans the port for an Emacs-like character coding declaration\n"
2089 "near the top of the contents of a port with random-accessible contents.\n"
2090 "The coding declaration is of the form\n"
2091 "@code{coding: XXXXX} and must appear in a scheme comment.\n"
2092 "\n"
2093 "Returns a string containing the character encoding of the file\n"
2094 "if a declaration was found, or @code{#f} otherwise.\n")
2095 #define FUNC_NAME s_scm_file_encoding
2096 {
2097 char *enc;
2098 SCM s_enc;
2099
2100 SCM_VALIDATE_OPINPORT (SCM_ARG1, port);
2101
2102 enc = scm_i_scan_for_encoding (port);
2103 if (enc == NULL)
2104 return SCM_BOOL_F;
2105 else
2106 {
2107 s_enc = scm_string_upcase (scm_from_locale_string (enc));
2108 return s_enc;
2109 }
2110
2111 return SCM_BOOL_F;
2112 }
2113 #undef FUNC_NAME
2114
2115 \f
2116 /* Per-port read options.
2117
2118 We store per-port read options in the 'port-read-options' port
2119 property, which is stored in the internal port structure. The value
2120 stored is a single integer that contains a two-bit field for each
2121 read option.
2122
2123 If a bit field contains READ_OPTION_INHERIT (3), that indicates that
2124 the applicable value should be inherited from the corresponding
2125 global read option. Otherwise, the bit field contains the value of
2126 the read option. For boolean read options that have been set
2127 per-port, the possible values are 0 or 1. If the 'keyword_style'
2128 read option has been set per-port, its possible values are those in
2129 'enum t_keyword_style'. */
2130
2131 /* Key to read options in port properties. */
2132 SCM_SYMBOL (sym_port_read_options, "port-read-options");
2133
2134 /* Offsets of bit fields for each per-port override */
2135 #define READ_OPTION_COPY_SOURCE_P 0
2136 #define READ_OPTION_RECORD_POSITIONS_P 2
2137 #define READ_OPTION_CASE_INSENSITIVE_P 4
2138 #define READ_OPTION_KEYWORD_STYLE 6
2139 #define READ_OPTION_R6RS_ESCAPES_P 8
2140 #define READ_OPTION_SQUARE_BRACKETS_P 10
2141 #define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12
2142 #define READ_OPTION_CURLY_INFIX_P 14
2143
2144 /* The total width in bits of the per-port overrides */
2145 #define READ_OPTIONS_NUM_BITS 16
2146
2147 #define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1)
2148 #define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL
2149
2150 #define READ_OPTION_MASK 3
2151 #define READ_OPTION_INHERIT 3
2152
2153 static void
2154 set_port_read_option (SCM port, int option, int new_value)
2155 {
2156 SCM scm_read_options;
2157 unsigned int read_options;
2158
2159 new_value &= READ_OPTION_MASK;
2160 scm_read_options = scm_i_port_property (port, sym_port_read_options);
2161 if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
2162 read_options = scm_to_uint (scm_read_options);
2163 else
2164 read_options = READ_OPTIONS_INHERIT_ALL;
2165 read_options &= ~(READ_OPTION_MASK << option);
2166 read_options |= new_value << option;
2167 scm_read_options = scm_from_uint (read_options);
2168 scm_i_set_port_property_x (port, sym_port_read_options, scm_read_options);
2169 }
2170
2171 /* Set OPTS and PORT's case-insensitivity according to VALUE. */
2172 static void
2173 set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts, int value)
2174 {
2175 value = !!value;
2176 opts->case_insensitive_p = value;
2177 set_port_read_option (port, READ_OPTION_CASE_INSENSITIVE_P, value);
2178 }
2179
2180 /* Set OPTS and PORT's square_brackets_p option according to VALUE. */
2181 static void
2182 set_port_square_brackets_p (SCM port, scm_t_read_opts *opts, int value)
2183 {
2184 value = !!value;
2185 opts->square_brackets_p = value;
2186 set_port_read_option (port, READ_OPTION_SQUARE_BRACKETS_P, value);
2187 }
2188
2189 /* Set OPTS and PORT's curly_infix_p option according to VALUE. */
2190 static void
2191 set_port_curly_infix_p (SCM port, scm_t_read_opts *opts, int value)
2192 {
2193 value = !!value;
2194 opts->curly_infix_p = value;
2195 set_port_read_option (port, READ_OPTION_CURLY_INFIX_P, value);
2196 }
2197
2198 /* Initialize OPTS based on PORT's read options and the global read
2199 options. */
2200 static void
2201 init_read_options (SCM port, scm_t_read_opts *opts)
2202 {
2203 SCM val, scm_read_options;
2204 unsigned int read_options, x;
2205
2206 scm_read_options = scm_i_port_property (port, sym_port_read_options);
2207
2208 if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
2209 read_options = scm_to_uint (scm_read_options);
2210 else
2211 read_options = READ_OPTIONS_INHERIT_ALL;
2212
2213 x = READ_OPTION_MASK & (read_options >> READ_OPTION_KEYWORD_STYLE);
2214 if (x == READ_OPTION_INHERIT)
2215 {
2216 val = SCM_PACK (SCM_KEYWORD_STYLE);
2217 if (scm_is_eq (val, scm_keyword_prefix))
2218 x = KEYWORD_STYLE_PREFIX;
2219 else if (scm_is_eq (val, scm_keyword_postfix))
2220 x = KEYWORD_STYLE_POSTFIX;
2221 else
2222 x = KEYWORD_STYLE_HASH_PREFIX;
2223 }
2224 opts->keyword_style = x;
2225
2226 #define RESOLVE_BOOLEAN_OPTION(NAME, name) \
2227 do \
2228 { \
2229 x = READ_OPTION_MASK & (read_options >> READ_OPTION_ ## NAME); \
2230 if (x == READ_OPTION_INHERIT) \
2231 x = !!SCM_ ## NAME; \
2232 opts->name = x; \
2233 } \
2234 while (0)
2235
2236 RESOLVE_BOOLEAN_OPTION (COPY_SOURCE_P, copy_source_p);
2237 RESOLVE_BOOLEAN_OPTION (RECORD_POSITIONS_P, record_positions_p);
2238 RESOLVE_BOOLEAN_OPTION (CASE_INSENSITIVE_P, case_insensitive_p);
2239 RESOLVE_BOOLEAN_OPTION (R6RS_ESCAPES_P, r6rs_escapes_p);
2240 RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P, square_brackets_p);
2241 RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p);
2242 RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p);
2243
2244 #undef RESOLVE_BOOLEAN_OPTION
2245
2246 opts->neoteric_p = 0;
2247 }
2248
2249 void
2250 scm_init_read ()
2251 {
2252 SCM read_hash_procs;
2253
2254 read_hash_procs = scm_make_fluid_with_default (SCM_EOL);
2255
2256 scm_i_read_hash_procedures =
2257 SCM_VARIABLE_LOC (scm_c_define ("%read-hash-procedures", read_hash_procs));
2258
2259 scm_init_opts (scm_read_options, scm_read_opts);
2260 #include "libguile/read.x"
2261 }
2262
2263 /*
2264 Local Variables:
2265 c-file-style: "gnu"
2266 End:
2267 */