833ec23421c0a3be0e6f885dac57952f9c2493d4
[bpt/guile.git] / libguile / read.c
1 /* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
2 * 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20
21 \f
22
23 #ifdef HAVE_CONFIG_H
24 # include <config.h>
25 #endif
26
27 #include <stdio.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <unicase.h>
32 #include <unictype.h>
33
34 #include "libguile/_scm.h"
35 #include "libguile/bytevectors.h"
36 #include "libguile/chars.h"
37 #include "libguile/eval.h"
38 #include "libguile/arrays.h"
39 #include "libguile/bitvectors.h"
40 #include "libguile/keywords.h"
41 #include "libguile/alist.h"
42 #include "libguile/srcprop.h"
43 #include "libguile/hashtab.h"
44 #include "libguile/hash.h"
45 #include "libguile/ports.h"
46 #include "libguile/fports.h"
47 #include "libguile/root.h"
48 #include "libguile/strings.h"
49 #include "libguile/strports.h"
50 #include "libguile/vectors.h"
51 #include "libguile/validate.h"
52 #include "libguile/srfi-4.h"
53 #include "libguile/srfi-13.h"
54
55 #include "libguile/read.h"
56 #include "libguile/private-options.h"
57
58
59 \f
60
61 SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
62 SCM_SYMBOL (scm_keyword_prefix, "prefix");
63 SCM_SYMBOL (scm_keyword_postfix, "postfix");
64 SCM_SYMBOL (sym_nil, "nil");
65
66 /* SRFI-105 curly infix expression support */
67 SCM_SYMBOL (sym_nfx, "$nfx$");
68 SCM_SYMBOL (sym_bracket_list, "$bracket-list$");
69 SCM_SYMBOL (sym_bracket_apply, "$bracket-apply$");
70
71 scm_t_option scm_read_opts[] =
72 {
73 { SCM_OPTION_BOOLEAN, "copy", 0,
74 "Copy source code expressions." },
75 { SCM_OPTION_BOOLEAN, "positions", 1,
76 "Record positions of source code expressions." },
77 { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
78 "Convert symbols to lower case."},
79 { SCM_OPTION_SCM, "keywords", (scm_t_bits) SCM_BOOL_F_BITS,
80 "Style of keyword recognition: #f, 'prefix or 'postfix."},
81 { SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
82 "Use R6RS variable-length character and string hex escapes."},
83 { SCM_OPTION_BOOLEAN, "square-brackets", 1,
84 "Treat `[' and `]' as parentheses, for R6RS compatibility."},
85 { SCM_OPTION_BOOLEAN, "hungry-eol-escapes", 0,
86 "In strings, consume leading whitespace after an escaped end-of-line."},
87 { SCM_OPTION_BOOLEAN, "curly-infix", 0,
88 "Support SRFI-105 curly infix expressions."},
89 { 0, },
90 };
91
92 /* Internal read options structure. This is initialized by 'scm_read'
93 from the global and per-port read options, and a pointer is passed
94 down to all helper functions. */
95
96 enum t_keyword_style
97 {
98 KEYWORD_STYLE_HASH_PREFIX,
99 KEYWORD_STYLE_PREFIX,
100 KEYWORD_STYLE_POSTFIX
101 };
102
103 struct t_read_opts
104 {
105 enum t_keyword_style keyword_style;
106 unsigned int copy_source_p : 1;
107 unsigned int record_positions_p : 1;
108 unsigned int case_insensitive_p : 1;
109 unsigned int r6rs_escapes_p : 1;
110 unsigned int square_brackets_p : 1;
111 unsigned int hungry_eol_escapes_p : 1;
112 unsigned int curly_infix_p : 1;
113 unsigned int neoteric_p : 1;
114 };
115
116 typedef struct t_read_opts scm_t_read_opts;
117
118
119 /*
120 Give meaningful error messages for errors
121
122 We use the format
123
124 FILE:LINE:COL: MESSAGE
125 This happened in ....
126
127 This is not standard GNU format, but the test-suite likes the real
128 message to be in front.
129
130 */
131
132
133 void
134 scm_i_input_error (char const *function,
135 SCM port, const char *message, SCM arg)
136 {
137 SCM fn = (scm_is_string (SCM_FILENAME(port))
138 ? SCM_FILENAME(port)
139 : scm_from_locale_string ("#<unknown port>"));
140
141 SCM string_port = scm_open_output_string ();
142 SCM string = SCM_EOL;
143 scm_simple_format (string_port,
144 scm_from_locale_string ("~A:~S:~S: ~A"),
145 scm_list_4 (fn,
146 scm_from_long (SCM_LINUM (port) + 1),
147 scm_from_int (SCM_COL (port) + 1),
148 scm_from_locale_string (message)));
149
150 string = scm_get_output_string (string_port);
151 scm_close_output_port (string_port);
152 scm_error_scm (scm_from_latin1_symbol ("read-error"),
153 function? scm_from_locale_string (function) : SCM_BOOL_F,
154 string,
155 arg,
156 SCM_BOOL_F);
157 }
158
159
160 SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
161 (SCM setting),
162 "Option interface for the read options. Instead of using\n"
163 "this procedure directly, use the procedures @code{read-enable},\n"
164 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
165 #define FUNC_NAME s_scm_read_options
166 {
167 SCM ans = scm_options (setting,
168 scm_read_opts,
169 FUNC_NAME);
170 if (SCM_COPY_SOURCE_P)
171 SCM_RECORD_POSITIONS_P = 1;
172 return ans;
173 }
174 #undef FUNC_NAME
175
176 /* A fluid referring to an association list mapping extra hash
177 characters to procedures. */
178 static SCM *scm_i_read_hash_procedures;
179
180 static SCM
181 scm_i_read_hash_procedures_ref (void)
182 {
183 return scm_fluid_ref (*scm_i_read_hash_procedures);
184 }
185
186 static void
187 scm_i_read_hash_procedures_set_x (SCM value)
188 {
189 scm_fluid_set_x (*scm_i_read_hash_procedures, value);
190 }
191
192 \f
193 /* Token readers. */
194
195
196 /* Size of the C buffer used to read symbols and numbers. */
197 #define READER_BUFFER_SIZE 128
198
199 /* Number of 32-bit codepoints in the buffer used to read strings. */
200 #define READER_STRING_BUFFER_SIZE 128
201
202 /* The maximum size of Scheme character names. */
203 #define READER_CHAR_NAME_MAX_SIZE 50
204
205 /* The maximum size of reader directive names. */
206 #define READER_DIRECTIVE_NAME_MAX_SIZE 50
207
208
209 /* `isblank' is only in C99. */
210 #define CHAR_IS_BLANK_(_chr) \
211 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
212 || ((_chr) == '\f') || ((_chr) == '\r'))
213
214 #ifdef MSDOS
215 # define CHAR_IS_BLANK(_chr) \
216 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
217 #else
218 # define CHAR_IS_BLANK CHAR_IS_BLANK_
219 #endif
220
221
222 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
223 structure''). */
224 #define CHAR_IS_R5RS_DELIMITER(c) \
225 (CHAR_IS_BLANK (c) \
226 || (c) == ')' || (c) == '(' || (c) == ';' || (c) == '"')
227
228 #define CHAR_IS_DELIMITER(c) \
229 (CHAR_IS_R5RS_DELIMITER (c) \
230 || (((c) == ']' || (c) == '[') && (opts->square_brackets_p \
231 || opts->curly_infix_p)) \
232 || (((c) == '}' || (c) == '{') && opts->curly_infix_p))
233
234 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
235 Structure''. */
236 #define CHAR_IS_EXPONENT_MARKER(_chr) \
237 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
238 || ((_chr) == 'd') || ((_chr) == 'l'))
239
240 /* Read an SCSH block comment. */
241 static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
242 static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM);
243 static SCM scm_read_commented_expression (scm_t_wchar, SCM, scm_t_read_opts *);
244 static SCM scm_read_shebang (scm_t_wchar, SCM, scm_t_read_opts *);
245 static SCM scm_get_hash_procedure (int);
246
247 /* Read from PORT until a delimiter (e.g., a whitespace) is read. Put the
248 result in the pre-allocated buffer BUF. Return zero if the whole token has
249 fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
250 bytes actually read. */
251 static int
252 read_token (SCM port, scm_t_read_opts *opts,
253 char *buf, size_t buf_size, size_t *read)
254 {
255 *read = 0;
256
257 while (*read < buf_size)
258 {
259 int chr;
260
261 chr = scm_get_byte_or_eof_unlocked (port);
262
263 if (chr == EOF)
264 return 0;
265 else if (CHAR_IS_DELIMITER (chr))
266 {
267 scm_unget_byte_unlocked (chr, port);
268 return 0;
269 }
270 else
271 {
272 *buf = (char) chr;
273 buf++, (*read)++;
274 }
275 }
276
277 return 1;
278 }
279
280 /* Like `read_token', but return either BUFFER, or a GC-allocated buffer
281 if the token doesn't fit in BUFFER_SIZE bytes. */
282 static char *
283 read_complete_token (SCM port, scm_t_read_opts *opts,
284 char *buffer, size_t buffer_size, size_t *read)
285 {
286 int overflow = 0;
287 size_t bytes_read, overflow_size = 0;
288 char *overflow_buffer = NULL;
289
290 do
291 {
292 overflow = read_token (port, opts, buffer, buffer_size, &bytes_read);
293 if (bytes_read == 0)
294 break;
295 if (overflow || overflow_size != 0)
296 {
297 if (overflow_size == 0)
298 {
299 overflow_buffer = scm_gc_malloc_pointerless (bytes_read, "read");
300 memcpy (overflow_buffer, buffer, bytes_read);
301 overflow_size = bytes_read;
302 }
303 else
304 {
305 char *new_buf =
306 scm_gc_malloc_pointerless (overflow_size + bytes_read, "read");
307
308 memcpy (new_buf, overflow_buffer, overflow_size);
309 memcpy (new_buf + overflow_size, buffer, bytes_read);
310
311 overflow_buffer = new_buf;
312 overflow_size += bytes_read;
313 }
314 }
315 }
316 while (overflow);
317
318 if (overflow_size)
319 *read = overflow_size;
320 else
321 *read = bytes_read;
322
323 return (overflow_size > 0 ? overflow_buffer : buffer);
324 }
325
326 /* Skip whitespace from PORT and return the first non-whitespace character
327 read. Raise an error on end-of-file. */
328 static int
329 flush_ws (SCM port, scm_t_read_opts *opts, const char *eoferr)
330 {
331 scm_t_wchar c;
332 while (1)
333 switch (c = scm_getc_unlocked (port))
334 {
335 case EOF:
336 goteof:
337 if (eoferr)
338 {
339 scm_i_input_error (eoferr,
340 port,
341 "end of file",
342 SCM_EOL);
343 }
344 return c;
345
346 case ';':
347 lp:
348 switch (c = scm_getc_unlocked (port))
349 {
350 case EOF:
351 goto goteof;
352 default:
353 goto lp;
354 case SCM_LINE_INCREMENTORS:
355 break;
356 }
357 break;
358
359 case '#':
360 switch (c = scm_getc_unlocked (port))
361 {
362 case EOF:
363 eoferr = "read_sharp";
364 goto goteof;
365 case '!':
366 scm_read_shebang (c, port, opts);
367 break;
368 case ';':
369 scm_read_commented_expression (c, port, opts);
370 break;
371 case '|':
372 if (scm_is_false (scm_get_hash_procedure (c)))
373 {
374 scm_read_r6rs_block_comment (c, port);
375 break;
376 }
377 /* fall through */
378 default:
379 scm_ungetc_unlocked (c, port);
380 return '#';
381 }
382 break;
383
384 case SCM_LINE_INCREMENTORS:
385 case SCM_SINGLE_SPACES:
386 case '\t':
387 break;
388
389 default:
390 return c;
391 }
392
393 return 0;
394 }
395
396
397 \f
398 /* Token readers. */
399
400 static SCM scm_read_expression (SCM port, scm_t_read_opts *opts);
401 static SCM scm_read_sharp (int chr, SCM port, scm_t_read_opts *opts,
402 long line, int column);
403
404
405 static SCM
406 maybe_annotate_source (SCM x, SCM port, scm_t_read_opts *opts,
407 long line, int column)
408 {
409 if (opts->record_positions_p)
410 scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port));
411 return x;
412 }
413
414 static SCM
415 scm_read_sexp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
416 #define FUNC_NAME "scm_i_lreadparen"
417 {
418 int c;
419 SCM tmp, tl, ans = SCM_EOL;
420 const int curly_list_p = (chr == '{') && opts->curly_infix_p;
421 const int terminating_char = ((chr == '{') ? '}'
422 : ((chr == '[') ? ']'
423 : ')'));
424
425 /* Need to capture line and column numbers here. */
426 long line = SCM_LINUM (port);
427 int column = SCM_COL (port) - 1;
428
429 c = flush_ws (port, opts, FUNC_NAME);
430 if (terminating_char == c)
431 return SCM_EOL;
432
433 scm_ungetc_unlocked (c, port);
434 tmp = scm_read_expression (port, opts);
435
436 /* Note that it is possible for scm_read_expression to return
437 scm_sym_dot, but not as part of a dotted pair: as in #{.}#. So
438 check that it's a real dot by checking `c'. */
439 if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
440 {
441 ans = scm_read_expression (port, opts);
442 if (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
443 scm_i_input_error (FUNC_NAME, port, "missing close paren",
444 SCM_EOL);
445 return ans;
446 }
447
448 /* Build the head of the list structure. */
449 ans = tl = scm_cons (tmp, SCM_EOL);
450
451 while (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
452 {
453 SCM new_tail;
454
455 if (c == ')' || (c == ']' && opts->square_brackets_p)
456 || ((c == '}' || c == ']') && opts->curly_infix_p))
457 scm_i_input_error (FUNC_NAME, port,
458 "in pair: mismatched close paren: ~A",
459 scm_list_1 (SCM_MAKE_CHAR (c)));
460
461 scm_ungetc_unlocked (c, port);
462 tmp = scm_read_expression (port, opts);
463
464 /* See above note about scm_sym_dot. */
465 if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
466 {
467 SCM_SETCDR (tl, scm_read_expression (port, opts));
468
469 c = flush_ws (port, opts, FUNC_NAME);
470 if (terminating_char != c)
471 scm_i_input_error (FUNC_NAME, port,
472 "in pair: missing close paren", SCM_EOL);
473 break;
474 }
475
476 new_tail = scm_cons (tmp, SCM_EOL);
477 SCM_SETCDR (tl, new_tail);
478 tl = new_tail;
479 }
480
481 if (curly_list_p)
482 {
483 /* In addition to finding the length, 'scm_ilength' checks for
484 improper or circular lists, in which case it returns -1. */
485 int len = scm_ilength (ans);
486
487 /* The (len == 0) case is handled above */
488 if (len == 1)
489 /* Return directly to avoid re-annotating the element's source
490 location with the position of the outer brace. Also, it
491 might not be possible to annotate the element. */
492 return scm_car (ans); /* {e} => e */
493 else if (len == 2)
494 ; /* Leave the list unchanged: {e1 e2} => (e1 e2) */
495 else if (len >= 3 && (len & 1))
496 {
497 /* It's a proper list whose length is odd and at least 3. If
498 the elements at odd indices (the infix operator positions)
499 are all 'equal?', then it's a simple curly-infix list.
500 Otherwise it's a mixed curly-infix list. */
501 SCM op = scm_cadr (ans);
502
503 /* Check to see if the elements at odd indices are 'equal?' */
504 for (tl = scm_cdddr (ans); ; tl = scm_cddr (tl))
505 {
506 if (scm_is_null (tl))
507 {
508 /* Convert simple curly-infix list to prefix:
509 {a <op> b <op> ...} => (<op> a b ...) */
510 tl = ans;
511 while (scm_is_pair (scm_cdr (tl)))
512 {
513 tmp = scm_cddr (tl);
514 SCM_SETCDR (tl, tmp);
515 tl = tmp;
516 }
517 ans = scm_cons (op, ans);
518 break;
519 }
520 else if (scm_is_false (scm_equal_p (op, scm_car (tl))))
521 {
522 /* Mixed curly-infix list: {e ...} => ($nfx$ e ...) */
523 ans = scm_cons (sym_nfx, ans);
524 break;
525 }
526 }
527 }
528 else
529 /* Mixed curly-infix (possibly improper) list:
530 {e . tail} => ($nfx$ e . tail) */
531 ans = scm_cons (sym_nfx, ans);
532 }
533
534 return maybe_annotate_source (ans, port, opts, line, column);
535 }
536 #undef FUNC_NAME
537
538
539 /* Read a hexadecimal number NDIGITS in length. Put its value into the variable
540 C. If TERMINATOR is non-null, terminate early if the TERMINATOR character is
541 found. */
542 #define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
543 do \
544 { \
545 scm_t_wchar a; \
546 size_t i = 0; \
547 c = 0; \
548 while (i < ndigits) \
549 { \
550 a = scm_getc_unlocked (port); \
551 if (a == EOF) \
552 goto str_eof; \
553 if (terminator \
554 && (a == (scm_t_wchar) terminator) \
555 && (i > 0)) \
556 break; \
557 if ('0' <= a && a <= '9') \
558 a -= '0'; \
559 else if ('A' <= a && a <= 'F') \
560 a = a - 'A' + 10; \
561 else if ('a' <= a && a <= 'f') \
562 a = a - 'a' + 10; \
563 else \
564 { \
565 c = a; \
566 goto bad_escaped; \
567 } \
568 c = c * 16 + a; \
569 i ++; \
570 } \
571 } while (0)
572
573 static void
574 skip_intraline_whitespace (SCM port)
575 {
576 scm_t_wchar c;
577
578 do
579 {
580 c = scm_getc_unlocked (port);
581 if (c == EOF)
582 return;
583 }
584 while (c == '\t' || uc_is_general_category (c, UC_SPACE_SEPARATOR));
585
586 scm_ungetc_unlocked (c, port);
587 }
588
589 static SCM
590 scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
591 #define FUNC_NAME "scm_lreadr"
592 {
593 /* For strings smaller than C_STR, this function creates only one Scheme
594 object (the string returned). */
595
596 SCM str = SCM_EOL;
597 size_t c_str_len = 0;
598 scm_t_wchar c, c_str[READER_STRING_BUFFER_SIZE];
599
600 /* Need to capture line and column numbers here. */
601 long line = SCM_LINUM (port);
602 int column = SCM_COL (port) - 1;
603
604 while ('"' != (c = scm_getc_unlocked (port)))
605 {
606 if (c == EOF)
607 {
608 str_eof:
609 scm_i_input_error (FUNC_NAME, port,
610 "end of file in string constant", SCM_EOL);
611 }
612
613 if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE)
614 {
615 str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
616 c_str_len = 0;
617 }
618
619 if (c == '\\')
620 {
621 switch (c = scm_getc_unlocked (port))
622 {
623 case EOF:
624 goto str_eof;
625 case '"':
626 case '\\':
627 break;
628 case '\n':
629 if (opts->hungry_eol_escapes_p)
630 skip_intraline_whitespace (port);
631 continue;
632 case '0':
633 c = '\0';
634 break;
635 case 'f':
636 c = '\f';
637 break;
638 case 'n':
639 c = '\n';
640 break;
641 case 'r':
642 c = '\r';
643 break;
644 case 't':
645 c = '\t';
646 break;
647 case 'a':
648 c = '\007';
649 break;
650 case 'v':
651 c = '\v';
652 break;
653 case 'b':
654 c = '\010';
655 break;
656 case 'x':
657 if (opts->r6rs_escapes_p)
658 SCM_READ_HEX_ESCAPE (10, ';');
659 else
660 SCM_READ_HEX_ESCAPE (2, '\0');
661 break;
662 case 'u':
663 if (!opts->r6rs_escapes_p)
664 {
665 SCM_READ_HEX_ESCAPE (4, '\0');
666 break;
667 }
668 case 'U':
669 if (!opts->r6rs_escapes_p)
670 {
671 SCM_READ_HEX_ESCAPE (6, '\0');
672 break;
673 }
674 default:
675 bad_escaped:
676 scm_i_input_error (FUNC_NAME, port,
677 "illegal character in escape sequence: ~S",
678 scm_list_1 (SCM_MAKE_CHAR (c)));
679 }
680 }
681
682 c_str[c_str_len++] = c;
683 }
684
685 if (scm_is_null (str))
686 /* Fast path: we got a string that fits in C_STR. */
687 str = scm_from_utf32_stringn (c_str, c_str_len);
688 else
689 {
690 if (c_str_len > 0)
691 str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
692
693 str = scm_string_concatenate_reverse (str, SCM_UNDEFINED, SCM_UNDEFINED);
694 }
695
696 return maybe_annotate_source (str, port, opts, line, column);
697 }
698 #undef FUNC_NAME
699
700
701 static SCM
702 scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
703 {
704 SCM result, str = SCM_EOL;
705 char local_buffer[READER_BUFFER_SIZE], *buffer;
706 size_t bytes_read;
707
708 /* Need to capture line and column numbers here. */
709 long line = SCM_LINUM (port);
710 int column = SCM_COL (port) - 1;
711
712 scm_ungetc_unlocked (chr, port);
713 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
714 &bytes_read);
715
716 str = scm_from_port_stringn (buffer, bytes_read, port);
717
718 result = scm_string_to_number (str, SCM_UNDEFINED);
719 if (scm_is_false (result))
720 {
721 /* Return a symbol instead of a number */
722 if (opts->case_insensitive_p)
723 str = scm_string_downcase_x (str);
724 result = scm_string_to_symbol (str);
725 }
726 else if (SCM_NIMP (result))
727 result = maybe_annotate_source (result, port, opts, line, column);
728
729 SCM_COL (port) += scm_i_string_length (str);
730 return result;
731 }
732
733 static SCM
734 scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
735 {
736 SCM result;
737 int ends_with_colon = 0;
738 size_t bytes_read;
739 int postfix = (opts->keyword_style == KEYWORD_STYLE_POSTFIX);
740 char local_buffer[READER_BUFFER_SIZE], *buffer;
741 SCM str;
742
743 scm_ungetc_unlocked (chr, port);
744 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
745 &bytes_read);
746 if (bytes_read > 0)
747 ends_with_colon = buffer[bytes_read - 1] == ':';
748
749 if (postfix && ends_with_colon && (bytes_read > 1))
750 {
751 str = scm_from_port_stringn (buffer, bytes_read - 1, port);
752
753 if (opts->case_insensitive_p)
754 str = scm_string_downcase_x (str);
755 result = scm_symbol_to_keyword (scm_string_to_symbol (str));
756 }
757 else
758 {
759 str = scm_from_port_stringn (buffer, bytes_read, port);
760
761 if (opts->case_insensitive_p)
762 str = scm_string_downcase_x (str);
763 result = scm_string_to_symbol (str);
764 }
765
766 SCM_COL (port) += scm_i_string_length (str);
767 return result;
768 }
769
770 static SCM
771 scm_read_number_and_radix (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
772 #define FUNC_NAME "scm_lreadr"
773 {
774 SCM result;
775 size_t read;
776 char local_buffer[READER_BUFFER_SIZE], *buffer;
777 unsigned int radix;
778 SCM str;
779
780 switch (chr)
781 {
782 case 'B':
783 case 'b':
784 radix = 2;
785 break;
786
787 case 'o':
788 case 'O':
789 radix = 8;
790 break;
791
792 case 'd':
793 case 'D':
794 radix = 10;
795 break;
796
797 case 'x':
798 case 'X':
799 radix = 16;
800 break;
801
802 default:
803 scm_ungetc_unlocked (chr, port);
804 scm_ungetc_unlocked ('#', port);
805 radix = 10;
806 }
807
808 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
809 &read);
810
811 str = scm_from_port_stringn (buffer, read, port);
812
813 result = scm_string_to_number (str, scm_from_uint (radix));
814
815 SCM_COL (port) += scm_i_string_length (str);
816
817 if (scm_is_true (result))
818 return result;
819
820 scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
821
822 return SCM_BOOL_F;
823 }
824 #undef FUNC_NAME
825
826 static SCM
827 scm_read_quote (int chr, SCM port, scm_t_read_opts *opts)
828 {
829 SCM p;
830 long line = SCM_LINUM (port);
831 int column = SCM_COL (port) - 1;
832
833 switch (chr)
834 {
835 case '`':
836 p = scm_sym_quasiquote;
837 break;
838
839 case '\'':
840 p = scm_sym_quote;
841 break;
842
843 case ',':
844 {
845 scm_t_wchar c;
846
847 c = scm_getc_unlocked (port);
848 if ('@' == c)
849 p = scm_sym_uq_splicing;
850 else
851 {
852 scm_ungetc_unlocked (c, port);
853 p = scm_sym_unquote;
854 }
855 break;
856 }
857
858 default:
859 fprintf (stderr, "%s: unhandled quote character (%i)\n",
860 "scm_read_quote", chr);
861 abort ();
862 }
863
864 p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
865 return maybe_annotate_source (p, port, opts, line, column);
866 }
867
868 SCM_SYMBOL (sym_syntax, "syntax");
869 SCM_SYMBOL (sym_quasisyntax, "quasisyntax");
870 SCM_SYMBOL (sym_unsyntax, "unsyntax");
871 SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing");
872
873 static SCM
874 scm_read_syntax (int chr, SCM port, scm_t_read_opts *opts)
875 {
876 SCM p;
877 long line = SCM_LINUM (port);
878 int column = SCM_COL (port) - 1;
879
880 switch (chr)
881 {
882 case '`':
883 p = sym_quasisyntax;
884 break;
885
886 case '\'':
887 p = sym_syntax;
888 break;
889
890 case ',':
891 {
892 int c;
893
894 c = scm_getc_unlocked (port);
895 if ('@' == c)
896 p = sym_unsyntax_splicing;
897 else
898 {
899 scm_ungetc_unlocked (c, port);
900 p = sym_unsyntax;
901 }
902 break;
903 }
904
905 default:
906 fprintf (stderr, "%s: unhandled syntax character (%i)\n",
907 "scm_read_syntax", chr);
908 abort ();
909 }
910
911 p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
912 return maybe_annotate_source (p, port, opts, line, column);
913 }
914
915 static SCM
916 scm_read_nil (int chr, SCM port, scm_t_read_opts *opts)
917 {
918 SCM id = scm_read_mixed_case_symbol (chr, port, opts);
919
920 if (!scm_is_eq (id, sym_nil))
921 scm_i_input_error ("scm_read_nil", port,
922 "unexpected input while reading #nil: ~a",
923 scm_list_1 (id));
924
925 return SCM_ELISP_NIL;
926 }
927
928 static SCM
929 scm_read_semicolon_comment (int chr, SCM port)
930 {
931 int c;
932
933 /* We use the get_byte here because there is no need to get the
934 locale correct with comment input. This presumes that newline
935 always represents itself no matter what the encoding is. */
936 for (c = scm_get_byte_or_eof_unlocked (port);
937 (c != EOF) && (c != '\n');
938 c = scm_get_byte_or_eof_unlocked (port));
939
940 return SCM_UNSPECIFIED;
941 }
942
943 \f
944 /* Sharp readers, i.e. readers called after a `#' sign has been read. */
945
946 static SCM
947 scm_read_boolean (int chr, SCM port)
948 {
949 switch (chr)
950 {
951 case 't':
952 case 'T':
953 return SCM_BOOL_T;
954
955 case 'f':
956 case 'F':
957 return SCM_BOOL_F;
958 }
959
960 return SCM_UNSPECIFIED;
961 }
962
963 static SCM
964 scm_read_character (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
965 #define FUNC_NAME "scm_lreadr"
966 {
967 char buffer[READER_CHAR_NAME_MAX_SIZE];
968 SCM charname;
969 size_t charname_len, bytes_read;
970 scm_t_wchar cp;
971 int overflow;
972 scm_t_port *pt;
973
974 overflow = read_token (port, opts, buffer, READER_CHAR_NAME_MAX_SIZE,
975 &bytes_read);
976 if (overflow)
977 scm_i_input_error (FUNC_NAME, port, "character name too long", SCM_EOL);
978
979 if (bytes_read == 0)
980 {
981 chr = scm_getc_unlocked (port);
982 if (chr == EOF)
983 scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
984 "while reading character", SCM_EOL);
985
986 /* CHR must be a token delimiter, like a whitespace. */
987 return (SCM_MAKE_CHAR (chr));
988 }
989
990 pt = SCM_PTAB_ENTRY (port);
991
992 /* Simple ASCII characters can be processed immediately. Also, simple
993 ISO-8859-1 characters can be processed immediately if the encoding for this
994 port is ISO-8859-1. */
995 if (bytes_read == 1 && ((unsigned char) buffer[0] <= 127 || pt->encoding == NULL))
996 {
997 SCM_COL (port) += 1;
998 return SCM_MAKE_CHAR (buffer[0]);
999 }
1000
1001 /* Otherwise, convert the buffer into a proper scheme string for
1002 processing. */
1003 charname = scm_from_port_stringn (buffer, bytes_read, port);
1004 charname_len = scm_i_string_length (charname);
1005 SCM_COL (port) += charname_len;
1006 cp = scm_i_string_ref (charname, 0);
1007 if (charname_len == 1)
1008 return SCM_MAKE_CHAR (cp);
1009
1010 /* Ignore dotted circles, which may be used to keep combining characters from
1011 combining with the backslash in #\charname. */
1012 if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2)
1013 return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1));
1014
1015 if (cp >= '0' && cp < '8')
1016 {
1017 /* Dirk:FIXME:: This type of character syntax is not R5RS
1018 * compliant. Further, it should be verified that the constant
1019 * does only consist of octal digits. */
1020 SCM p = scm_string_to_number (charname, scm_from_uint (8));
1021 if (SCM_I_INUMP (p))
1022 {
1023 scm_t_wchar c = scm_to_uint32 (p);
1024 if (SCM_IS_UNICODE_CHAR (c))
1025 return SCM_MAKE_CHAR (c);
1026 else
1027 scm_i_input_error (FUNC_NAME, port,
1028 "out-of-range octal character escape: ~a",
1029 scm_list_1 (charname));
1030 }
1031 }
1032
1033 if (cp == 'x' && (charname_len > 1))
1034 {
1035 SCM p;
1036
1037 /* Convert from hex, skipping the initial 'x' character in CHARNAME */
1038 p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
1039 scm_from_uint (16));
1040 if (SCM_I_INUMP (p))
1041 {
1042 scm_t_wchar c = scm_to_uint32 (p);
1043 if (SCM_IS_UNICODE_CHAR (c))
1044 return SCM_MAKE_CHAR (c);
1045 else
1046 scm_i_input_error (FUNC_NAME, port,
1047 "out-of-range hex character escape: ~a",
1048 scm_list_1 (charname));
1049 }
1050 }
1051
1052 /* The names of characters should never have non-Latin1
1053 characters. */
1054 if (scm_i_is_narrow_string (charname)
1055 || scm_i_try_narrow_string (charname))
1056 { SCM ch = scm_i_charname_to_char (scm_i_string_chars (charname),
1057 charname_len);
1058 if (scm_is_true (ch))
1059 return ch;
1060 }
1061
1062 scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
1063 scm_list_1 (charname));
1064
1065 return SCM_UNSPECIFIED;
1066 }
1067 #undef FUNC_NAME
1068
1069 static SCM
1070 scm_read_keyword (int chr, SCM port, scm_t_read_opts *opts)
1071 {
1072 SCM symbol;
1073
1074 /* Read the symbol that comprises the keyword. Doing this instead of
1075 invoking a specific symbol reader function allows `scm_read_keyword ()'
1076 to adapt to the delimiters currently valid of symbols.
1077
1078 XXX: This implementation allows sloppy syntaxes like `#: key'. */
1079 symbol = scm_read_expression (port, opts);
1080 if (!scm_is_symbol (symbol))
1081 scm_i_input_error ("scm_read_keyword", port,
1082 "keyword prefix `~a' not followed by a symbol: ~s",
1083 scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
1084
1085 return (scm_symbol_to_keyword (symbol));
1086 }
1087
1088 static SCM
1089 scm_read_vector (int chr, SCM port, scm_t_read_opts *opts,
1090 long line, int column)
1091 {
1092 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
1093 guarantee that it's going to do what we want. After all, this is an
1094 implementation detail of `scm_read_vector ()', not a desirable
1095 property. */
1096 return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port, opts)),
1097 port, opts, line, column);
1098 }
1099
1100 /* Helper used by scm_read_array */
1101 static int
1102 read_decimal_integer (SCM port, int c, ssize_t *resp)
1103 {
1104 ssize_t sign = 1;
1105 ssize_t res = 0;
1106 int got_it = 0;
1107
1108 if (c == '-')
1109 {
1110 sign = -1;
1111 c = scm_getc_unlocked (port);
1112 }
1113
1114 while ('0' <= c && c <= '9')
1115 {
1116 res = 10*res + c-'0';
1117 got_it = 1;
1118 c = scm_getc_unlocked (port);
1119 }
1120
1121 if (got_it)
1122 *resp = sign * res;
1123 return c;
1124 }
1125
1126 /* Read an array. This function can also read vectors and uniform
1127 vectors. Also, the conflict between '#f' and '#f32' and '#f64' is
1128 handled here.
1129
1130 C is the first character read after the '#'. */
1131 static SCM
1132 scm_read_array (int c, SCM port, scm_t_read_opts *opts, long line, int column)
1133 {
1134 ssize_t rank;
1135 scm_t_wchar tag_buf[8];
1136 int tag_len;
1137
1138 SCM tag, shape = SCM_BOOL_F, elements, array;
1139
1140 /* XXX - shortcut for ordinary vectors. Shouldn't be necessary but
1141 the array code can not deal with zero-length dimensions yet, and
1142 we want to allow zero-length vectors, of course. */
1143 if (c == '(')
1144 return scm_read_vector (c, port, opts, line, column);
1145
1146 /* Disambiguate between '#f' and uniform floating point vectors. */
1147 if (c == 'f')
1148 {
1149 c = scm_getc_unlocked (port);
1150 if (c != '3' && c != '6')
1151 {
1152 if (c != EOF)
1153 scm_ungetc_unlocked (c, port);
1154 return SCM_BOOL_F;
1155 }
1156 rank = 1;
1157 tag_buf[0] = 'f';
1158 tag_len = 1;
1159 goto continue_reading_tag;
1160 }
1161
1162 /* Read rank. */
1163 rank = 1;
1164 c = read_decimal_integer (port, c, &rank);
1165 if (rank < 0)
1166 scm_i_input_error (NULL, port, "array rank must be non-negative",
1167 SCM_EOL);
1168
1169 /* Read tag. */
1170 tag_len = 0;
1171 continue_reading_tag:
1172 while (c != EOF && c != '(' && c != '@' && c != ':'
1173 && tag_len < sizeof tag_buf / sizeof tag_buf[0])
1174 {
1175 tag_buf[tag_len++] = c;
1176 c = scm_getc_unlocked (port);
1177 }
1178 if (tag_len == 0)
1179 tag = SCM_BOOL_T;
1180 else
1181 {
1182 tag = scm_string_to_symbol (scm_from_utf32_stringn (tag_buf, tag_len));
1183 if (tag_len == sizeof tag_buf / sizeof tag_buf[0])
1184 scm_i_input_error (NULL, port, "invalid array tag, starting with: ~a",
1185 scm_list_1 (tag));
1186 }
1187
1188 /* Read shape. */
1189 if (c == '@' || c == ':')
1190 {
1191 shape = SCM_EOL;
1192
1193 do
1194 {
1195 ssize_t lbnd = 0, len = 0;
1196 SCM s;
1197
1198 if (c == '@')
1199 {
1200 c = scm_getc_unlocked (port);
1201 c = read_decimal_integer (port, c, &lbnd);
1202 }
1203
1204 s = scm_from_ssize_t (lbnd);
1205
1206 if (c == ':')
1207 {
1208 c = scm_getc_unlocked (port);
1209 c = read_decimal_integer (port, c, &len);
1210 if (len < 0)
1211 scm_i_input_error (NULL, port,
1212 "array length must be non-negative",
1213 SCM_EOL);
1214
1215 s = scm_list_2 (s, scm_from_ssize_t (lbnd+len-1));
1216 }
1217
1218 shape = scm_cons (s, shape);
1219 } while (c == '@' || c == ':');
1220
1221 shape = scm_reverse_x (shape, SCM_EOL);
1222 }
1223
1224 /* Read nested lists of elements. */
1225 if (c != '(')
1226 scm_i_input_error (NULL, port,
1227 "missing '(' in vector or array literal",
1228 SCM_EOL);
1229 elements = scm_read_sexp (c, port, opts);
1230
1231 if (scm_is_false (shape))
1232 shape = scm_from_ssize_t (rank);
1233 else if (scm_ilength (shape) != rank)
1234 scm_i_input_error
1235 (NULL, port,
1236 "the number of shape specifications must match the array rank",
1237 SCM_EOL);
1238
1239 /* Handle special print syntax of rank zero arrays; see
1240 scm_i_print_array for a rationale. */
1241 if (rank == 0)
1242 {
1243 if (!scm_is_pair (elements))
1244 scm_i_input_error (NULL, port,
1245 "too few elements in array literal, need 1",
1246 SCM_EOL);
1247 if (!scm_is_null (SCM_CDR (elements)))
1248 scm_i_input_error (NULL, port,
1249 "too many elements in array literal, want 1",
1250 SCM_EOL);
1251 elements = SCM_CAR (elements);
1252 }
1253
1254 /* Construct array, annotate with source location, and return. */
1255 array = scm_list_to_typed_array (tag, shape, elements);
1256 return maybe_annotate_source (array, port, opts, line, column);
1257 }
1258
1259 static SCM
1260 scm_read_srfi4_vector (int chr, SCM port, scm_t_read_opts *opts,
1261 long line, int column)
1262 {
1263 return scm_read_array (chr, port, opts, line, column);
1264 }
1265
1266 static SCM
1267 scm_read_bytevector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1268 long line, int column)
1269 {
1270 chr = scm_getc_unlocked (port);
1271 if (chr != 'u')
1272 goto syntax;
1273
1274 chr = scm_getc_unlocked (port);
1275 if (chr != '8')
1276 goto syntax;
1277
1278 chr = scm_getc_unlocked (port);
1279 if (chr != '(')
1280 goto syntax;
1281
1282 return maybe_annotate_source
1283 (scm_u8_list_to_bytevector (scm_read_sexp (chr, port, opts)),
1284 port, opts, line, column);
1285
1286 syntax:
1287 scm_i_input_error ("read_bytevector", port,
1288 "invalid bytevector prefix",
1289 SCM_MAKE_CHAR (chr));
1290 return SCM_UNSPECIFIED;
1291 }
1292
1293 static SCM
1294 scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1295 long line, int column)
1296 {
1297 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
1298 terribly inefficient but who cares? */
1299 SCM s_bits = SCM_EOL;
1300
1301 for (chr = scm_getc_unlocked (port);
1302 (chr != EOF) && ((chr == '0') || (chr == '1'));
1303 chr = scm_getc_unlocked (port))
1304 {
1305 s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
1306 }
1307
1308 if (chr != EOF)
1309 scm_ungetc_unlocked (chr, port);
1310
1311 return maybe_annotate_source
1312 (scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)),
1313 port, opts, line, column);
1314 }
1315
1316 static SCM
1317 scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
1318 {
1319 int bang_seen = 0;
1320
1321 for (;;)
1322 {
1323 int c = scm_getc_unlocked (port);
1324
1325 if (c == EOF)
1326 scm_i_input_error ("skip_block_comment", port,
1327 "unterminated `#! ... !#' comment", SCM_EOL);
1328
1329 if (c == '!')
1330 bang_seen = 1;
1331 else if (c == '#' && bang_seen)
1332 break;
1333 else
1334 bang_seen = 0;
1335 }
1336
1337 return SCM_UNSPECIFIED;
1338 }
1339
1340 static void set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts,
1341 int value);
1342 static void set_port_square_brackets_p (SCM port, scm_t_read_opts *opts,
1343 int value);
1344 static void set_port_curly_infix_p (SCM port, scm_t_read_opts *opts,
1345 int value);
1346
1347 static SCM
1348 scm_read_shebang (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
1349 {
1350 char name[READER_DIRECTIVE_NAME_MAX_SIZE + 1];
1351 int c;
1352 int i = 0;
1353
1354 while (i <= READER_DIRECTIVE_NAME_MAX_SIZE)
1355 {
1356 c = scm_getc_unlocked (port);
1357 if (c == EOF)
1358 scm_i_input_error ("skip_block_comment", port,
1359 "unterminated `#! ... !#' comment", SCM_EOL);
1360 else if (('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || c == '-')
1361 name[i++] = c;
1362 else if (CHAR_IS_DELIMITER (c))
1363 {
1364 scm_ungetc_unlocked (c, port);
1365 name[i] = '\0';
1366 if (0 == strcmp ("r6rs", name))
1367 ; /* Silently ignore */
1368 else if (0 == strcmp ("fold-case", name))
1369 set_port_case_insensitive_p (port, opts, 1);
1370 else if (0 == strcmp ("no-fold-case", name))
1371 set_port_case_insensitive_p (port, opts, 0);
1372 else if (0 == strcmp ("curly-infix", name))
1373 set_port_curly_infix_p (port, opts, 1);
1374 else if (0 == strcmp ("curly-infix-and-bracket-lists", name))
1375 {
1376 set_port_curly_infix_p (port, opts, 1);
1377 set_port_square_brackets_p (port, opts, 0);
1378 }
1379 else
1380 break;
1381
1382 return SCM_UNSPECIFIED;
1383 }
1384 else
1385 {
1386 scm_ungetc_unlocked (c, port);
1387 break;
1388 }
1389 }
1390 while (i > 0)
1391 scm_ungetc_unlocked (name[--i], port);
1392 return scm_read_scsh_block_comment (chr, port);
1393 }
1394
1395 static SCM
1396 scm_read_r6rs_block_comment (scm_t_wchar chr, SCM port)
1397 {
1398 /* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
1399 nested. So care must be taken. */
1400 int nesting_level = 1;
1401
1402 int a = scm_getc_unlocked (port);
1403
1404 if (a == EOF)
1405 scm_i_input_error ("scm_read_r6rs_block_comment", port,
1406 "unterminated `#| ... |#' comment", SCM_EOL);
1407
1408 while (nesting_level > 0)
1409 {
1410 int b = scm_getc_unlocked (port);
1411
1412 if (b == EOF)
1413 scm_i_input_error ("scm_read_r6rs_block_comment", port,
1414 "unterminated `#| ... |#' comment", SCM_EOL);
1415
1416 if (a == '|' && b == '#')
1417 {
1418 nesting_level--;
1419 b = EOF;
1420 }
1421 else if (a == '#' && b == '|')
1422 {
1423 nesting_level++;
1424 b = EOF;
1425 }
1426
1427 a = b;
1428 }
1429
1430 return SCM_UNSPECIFIED;
1431 }
1432
1433 static SCM
1434 scm_read_commented_expression (scm_t_wchar chr, SCM port,
1435 scm_t_read_opts *opts)
1436 {
1437 scm_t_wchar c;
1438
1439 c = flush_ws (port, opts, (char *) NULL);
1440 if (EOF == c)
1441 scm_i_input_error ("read_commented_expression", port,
1442 "no expression after #; comment", SCM_EOL);
1443 scm_ungetc_unlocked (c, port);
1444 scm_read_expression (port, opts);
1445 return SCM_UNSPECIFIED;
1446 }
1447
1448 static SCM
1449 scm_read_extended_symbol (scm_t_wchar chr, SCM port)
1450 {
1451 /* Guile's extended symbol read syntax looks like this:
1452
1453 #{This is all a symbol name}#
1454
1455 So here, CHR is expected to be `{'. */
1456 int saw_brace = 0;
1457 size_t len = 0;
1458 SCM buf = scm_i_make_string (1024, NULL, 0);
1459
1460 buf = scm_i_string_start_writing (buf);
1461
1462 while ((chr = scm_getc_unlocked (port)) != EOF)
1463 {
1464 if (saw_brace)
1465 {
1466 if (chr == '#')
1467 {
1468 break;
1469 }
1470 else
1471 {
1472 saw_brace = 0;
1473 scm_i_string_set_x (buf, len++, '}');
1474 }
1475 }
1476
1477 if (chr == '}')
1478 saw_brace = 1;
1479 else if (chr == '\\')
1480 {
1481 /* It used to be that print.c would print extended-read-syntax
1482 symbols with backslashes before "non-standard" chars, but
1483 this routine wouldn't do anything with those escapes.
1484 Bummer. What we've done is to change print.c to output
1485 R6RS hex escapes for those characters, relying on the fact
1486 that the extended read syntax would never put a `\' before
1487 an `x'. For now, we just ignore other instances of
1488 backslash in the string. */
1489 switch ((chr = scm_getc_unlocked (port)))
1490 {
1491 case EOF:
1492 goto done;
1493 case 'x':
1494 {
1495 scm_t_wchar c;
1496
1497 SCM_READ_HEX_ESCAPE (10, ';');
1498 scm_i_string_set_x (buf, len++, c);
1499 break;
1500
1501 str_eof:
1502 chr = EOF;
1503 goto done;
1504
1505 bad_escaped:
1506 scm_i_string_stop_writing ();
1507 scm_i_input_error ("scm_read_extended_symbol", port,
1508 "illegal character in escape sequence: ~S",
1509 scm_list_1 (SCM_MAKE_CHAR (c)));
1510 break;
1511 }
1512 default:
1513 scm_i_string_set_x (buf, len++, chr);
1514 break;
1515 }
1516 }
1517 else
1518 scm_i_string_set_x (buf, len++, chr);
1519
1520 if (len >= scm_i_string_length (buf) - 2)
1521 {
1522 SCM addy;
1523
1524 scm_i_string_stop_writing ();
1525 addy = scm_i_make_string (1024, NULL, 0);
1526 buf = scm_string_append (scm_list_2 (buf, addy));
1527 len = 0;
1528 buf = scm_i_string_start_writing (buf);
1529 }
1530 }
1531
1532 done:
1533 scm_i_string_stop_writing ();
1534 if (chr == EOF)
1535 scm_i_input_error ("scm_read_extended_symbol", port,
1536 "end of file while reading symbol", SCM_EOL);
1537
1538 return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
1539 }
1540
1541
1542 \f
1543 /* Top-level token readers, i.e., dispatchers. */
1544
1545 static SCM
1546 scm_read_sharp_extension (int chr, SCM port, scm_t_read_opts *opts)
1547 {
1548 SCM proc;
1549
1550 proc = scm_get_hash_procedure (chr);
1551 if (scm_is_true (scm_procedure_p (proc)))
1552 {
1553 long line = SCM_LINUM (port);
1554 int column = SCM_COL (port) - 2;
1555 SCM got;
1556
1557 got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
1558
1559 if (opts->record_positions_p && SCM_NIMP (got)
1560 && !scm_i_has_source_properties (got))
1561 scm_i_set_source_properties_x (got, line, column, SCM_FILENAME (port));
1562
1563 return got;
1564 }
1565
1566 return SCM_UNSPECIFIED;
1567 }
1568
1569 /* The reader for the sharp `#' character. It basically dispatches reads
1570 among the above token readers. */
1571 static SCM
1572 scm_read_sharp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1573 long line, int column)
1574 #define FUNC_NAME "scm_lreadr"
1575 {
1576 SCM result;
1577
1578 chr = scm_getc_unlocked (port);
1579
1580 result = scm_read_sharp_extension (chr, port, opts);
1581 if (!scm_is_eq (result, SCM_UNSPECIFIED))
1582 return result;
1583
1584 switch (chr)
1585 {
1586 case '\\':
1587 return (scm_read_character (chr, port, opts));
1588 case '(':
1589 return (scm_read_vector (chr, port, opts, line, column));
1590 case 's':
1591 case 'u':
1592 case 'f':
1593 case 'c':
1594 /* This one may return either a boolean or an SRFI-4 vector. */
1595 return (scm_read_srfi4_vector (chr, port, opts, line, column));
1596 case 'v':
1597 return (scm_read_bytevector (chr, port, opts, line, column));
1598 case '*':
1599 return (scm_read_guile_bit_vector (chr, port, opts, line, column));
1600 case 't':
1601 case 'T':
1602 case 'F':
1603 return (scm_read_boolean (chr, port));
1604 case ':':
1605 return (scm_read_keyword (chr, port, opts));
1606 case '0': case '1': case '2': case '3': case '4':
1607 case '5': case '6': case '7': case '8': case '9':
1608 case '@':
1609 return (scm_read_array (chr, port, opts, line, column));
1610
1611 case 'i':
1612 case 'e':
1613 case 'b':
1614 case 'B':
1615 case 'o':
1616 case 'O':
1617 case 'd':
1618 case 'D':
1619 case 'x':
1620 case 'X':
1621 case 'I':
1622 case 'E':
1623 return (scm_read_number_and_radix (chr, port, opts));
1624 case '{':
1625 return (scm_read_extended_symbol (chr, port));
1626 case '!':
1627 return (scm_read_shebang (chr, port, opts));
1628 case ';':
1629 return (scm_read_commented_expression (chr, port, opts));
1630 case '`':
1631 case '\'':
1632 case ',':
1633 return (scm_read_syntax (chr, port, opts));
1634 case 'n':
1635 return (scm_read_nil (chr, port, opts));
1636 default:
1637 result = scm_read_sharp_extension (chr, port, opts);
1638 if (scm_is_eq (result, SCM_UNSPECIFIED))
1639 {
1640 /* To remain compatible with 1.8 and earlier, the following
1641 characters have lower precedence than `read-hash-extend'
1642 characters. */
1643 switch (chr)
1644 {
1645 case '|':
1646 return scm_read_r6rs_block_comment (chr, port);
1647 default:
1648 scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
1649 scm_list_1 (SCM_MAKE_CHAR (chr)));
1650 }
1651 }
1652 else
1653 return result;
1654 }
1655
1656 return SCM_UNSPECIFIED;
1657 }
1658 #undef FUNC_NAME
1659
1660 static SCM
1661 read_inner_expression (SCM port, scm_t_read_opts *opts)
1662 #define FUNC_NAME "read_inner_expression"
1663 {
1664 while (1)
1665 {
1666 scm_t_wchar chr;
1667
1668 chr = scm_getc_unlocked (port);
1669
1670 switch (chr)
1671 {
1672 case SCM_WHITE_SPACES:
1673 case SCM_LINE_INCREMENTORS:
1674 break;
1675 case ';':
1676 (void) scm_read_semicolon_comment (chr, port);
1677 break;
1678 case '{':
1679 if (opts->curly_infix_p)
1680 {
1681 if (opts->neoteric_p)
1682 return scm_read_sexp (chr, port, opts);
1683 else
1684 {
1685 SCM expr;
1686
1687 /* Enable neoteric expressions within curly braces */
1688 opts->neoteric_p = 1;
1689 expr = scm_read_sexp (chr, port, opts);
1690 opts->neoteric_p = 0;
1691 return expr;
1692 }
1693 }
1694 else
1695 return scm_read_mixed_case_symbol (chr, port, opts);
1696 case '[':
1697 if (opts->square_brackets_p)
1698 return scm_read_sexp (chr, port, opts);
1699 else if (opts->curly_infix_p)
1700 {
1701 /* The syntax of neoteric expressions requires that '[' be
1702 a delimiter when curly-infix is enabled, so it cannot
1703 be part of an unescaped symbol. We might as well do
1704 something useful with it, so we adopt Kawa's convention:
1705 [...] => ($bracket-list$ ...) */
1706 long line = SCM_LINUM (port);
1707 int column = SCM_COL (port) - 1;
1708 return maybe_annotate_source
1709 (scm_cons (sym_bracket_list, scm_read_sexp (chr, port, opts)),
1710 port, opts, line, column);
1711 }
1712 else
1713 return scm_read_mixed_case_symbol (chr, port, opts);
1714 case '(':
1715 return (scm_read_sexp (chr, port, opts));
1716 case '"':
1717 return (scm_read_string (chr, port, opts));
1718 case '\'':
1719 case '`':
1720 case ',':
1721 return (scm_read_quote (chr, port, opts));
1722 case '#':
1723 {
1724 long line = SCM_LINUM (port);
1725 int column = SCM_COL (port) - 1;
1726 SCM result = scm_read_sharp (chr, port, opts, line, column);
1727 if (scm_is_eq (result, SCM_UNSPECIFIED))
1728 /* We read a comment or some such. */
1729 break;
1730 else
1731 return result;
1732 }
1733 case ')':
1734 scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
1735 break;
1736 case '}':
1737 if (opts->curly_infix_p)
1738 scm_i_input_error (FUNC_NAME, port, "unexpected \"}\"", SCM_EOL);
1739 else
1740 return scm_read_mixed_case_symbol (chr, port, opts);
1741 case ']':
1742 if (opts->square_brackets_p)
1743 scm_i_input_error (FUNC_NAME, port, "unexpected \"]\"", SCM_EOL);
1744 /* otherwise fall through */
1745 case EOF:
1746 return SCM_EOF_VAL;
1747 case ':':
1748 if (opts->keyword_style == KEYWORD_STYLE_PREFIX)
1749 return scm_symbol_to_keyword (scm_read_expression (port, opts));
1750 /* Fall through. */
1751
1752 default:
1753 {
1754 if (((chr >= '0') && (chr <= '9'))
1755 || (strchr ("+-.", chr)))
1756 return (scm_read_number (chr, port, opts));
1757 else
1758 return (scm_read_mixed_case_symbol (chr, port, opts));
1759 }
1760 }
1761 }
1762 }
1763 #undef FUNC_NAME
1764
1765 static SCM
1766 scm_read_expression (SCM port, scm_t_read_opts *opts)
1767 #define FUNC_NAME "scm_read_expression"
1768 {
1769 if (!opts->neoteric_p)
1770 return read_inner_expression (port, opts);
1771 else
1772 {
1773 long line = 0;
1774 int column = 0;
1775 SCM expr;
1776
1777 if (opts->record_positions_p)
1778 {
1779 /* We need to get the position of the first non-whitespace
1780 character in order to correctly annotate neoteric
1781 expressions. For example, for the expression 'f(x)', the
1782 first call to 'read_inner_expression' reads the 'f' (which
1783 cannot be annotated), and then we later read the '(x)' and
1784 use it to construct the new list (f x). */
1785 int c = flush_ws (port, opts, (char *) NULL);
1786 if (c == EOF)
1787 return SCM_EOF_VAL;
1788 scm_ungetc_unlocked (c, port);
1789 line = SCM_LINUM (port);
1790 column = SCM_COL (port);
1791 }
1792
1793 expr = read_inner_expression (port, opts);
1794
1795 /* 'expr' is the first component of the neoteric expression. Now
1796 we loop, and as long as the next character is '(', '[', or '{',
1797 (without any intervening whitespace), we use it to construct a
1798 new expression. For example, f{n - 1}(x) => ((f (- n 1)) x). */
1799 for (;;)
1800 {
1801 int chr = scm_getc_unlocked (port);
1802
1803 if (chr == '(')
1804 /* e(...) => (e ...) */
1805 expr = scm_cons (expr, scm_read_sexp (chr, port, opts));
1806 else if (chr == '[')
1807 /* e[...] => ($bracket-apply$ e ...) */
1808 expr = scm_cons (sym_bracket_apply,
1809 scm_cons (expr,
1810 scm_read_sexp (chr, port, opts)));
1811 else if (chr == '{')
1812 {
1813 SCM arg = scm_read_sexp (chr, port, opts);
1814
1815 if (scm_is_null (arg))
1816 expr = scm_list_1 (expr); /* e{} => (e) */
1817 else
1818 expr = scm_list_2 (expr, arg); /* e{...} => (e {...}) */
1819 }
1820 else
1821 {
1822 if (chr != EOF)
1823 scm_ungetc_unlocked (chr, port);
1824 break;
1825 }
1826 maybe_annotate_source (expr, port, opts, line, column);
1827 }
1828 return expr;
1829 }
1830 }
1831 #undef FUNC_NAME
1832
1833 \f
1834 /* Actual reader. */
1835
1836 static void init_read_options (SCM port, scm_t_read_opts *opts);
1837
1838 SCM_DEFINE (scm_read, "read", 0, 1, 0,
1839 (SCM port),
1840 "Read an s-expression from the input port @var{port}, or from\n"
1841 "the current input port if @var{port} is not specified.\n"
1842 "Any whitespace before the next token is discarded.")
1843 #define FUNC_NAME s_scm_read
1844 {
1845 scm_t_read_opts opts;
1846 int c;
1847
1848 if (SCM_UNBNDP (port))
1849 port = scm_current_input_port ();
1850 SCM_VALIDATE_OPINPORT (1, port);
1851
1852 init_read_options (port, &opts);
1853
1854 c = flush_ws (port, &opts, (char *) NULL);
1855 if (EOF == c)
1856 return SCM_EOF_VAL;
1857 scm_ungetc_unlocked (c, port);
1858
1859 return (scm_read_expression (port, &opts));
1860 }
1861 #undef FUNC_NAME
1862
1863
1864 \f
1865
1866 /* Manipulate the read-hash-procedures alist. This could be written in
1867 Scheme, but maybe it will also be used by C code during initialisation. */
1868 SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
1869 (SCM chr, SCM proc),
1870 "Install the procedure @var{proc} for reading expressions\n"
1871 "starting with the character sequence @code{#} and @var{chr}.\n"
1872 "@var{proc} will be called with two arguments: the character\n"
1873 "@var{chr} and the port to read further data from. The object\n"
1874 "returned will be the return value of @code{read}. \n"
1875 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1876 )
1877 #define FUNC_NAME s_scm_read_hash_extend
1878 {
1879 SCM this;
1880 SCM prev;
1881
1882 SCM_VALIDATE_CHAR (1, chr);
1883 SCM_ASSERT (scm_is_false (proc)
1884 || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
1885 proc, SCM_ARG2, FUNC_NAME);
1886
1887 /* Check if chr is already in the alist. */
1888 this = scm_i_read_hash_procedures_ref ();
1889 prev = SCM_BOOL_F;
1890 while (1)
1891 {
1892 if (scm_is_null (this))
1893 {
1894 /* not found, so add it to the beginning. */
1895 if (scm_is_true (proc))
1896 {
1897 SCM new = scm_cons (scm_cons (chr, proc),
1898 scm_i_read_hash_procedures_ref ());
1899 scm_i_read_hash_procedures_set_x (new);
1900 }
1901 break;
1902 }
1903 if (scm_is_eq (chr, SCM_CAAR (this)))
1904 {
1905 /* already in the alist. */
1906 if (scm_is_false (proc))
1907 {
1908 /* remove it. */
1909 if (scm_is_false (prev))
1910 {
1911 SCM rest = SCM_CDR (scm_i_read_hash_procedures_ref ());
1912 scm_i_read_hash_procedures_set_x (rest);
1913 }
1914 else
1915 scm_set_cdr_x (prev, SCM_CDR (this));
1916 }
1917 else
1918 {
1919 /* replace it. */
1920 scm_set_cdr_x (SCM_CAR (this), proc);
1921 }
1922 break;
1923 }
1924 prev = this;
1925 this = SCM_CDR (this);
1926 }
1927
1928 return SCM_UNSPECIFIED;
1929 }
1930 #undef FUNC_NAME
1931
1932 /* Recover the read-hash procedure corresponding to char c. */
1933 static SCM
1934 scm_get_hash_procedure (int c)
1935 {
1936 SCM rest = scm_i_read_hash_procedures_ref ();
1937
1938 while (1)
1939 {
1940 if (scm_is_null (rest))
1941 return SCM_BOOL_F;
1942
1943 if (SCM_CHAR (SCM_CAAR (rest)) == c)
1944 return SCM_CDAR (rest);
1945
1946 rest = SCM_CDR (rest);
1947 }
1948 }
1949
1950 #define SCM_ENCODING_SEARCH_SIZE (500)
1951
1952 /* Search the first few hundred characters of a file for an Emacs-like coding
1953 declaration. Returns either NULL or a string whose storage has been
1954 allocated with `scm_gc_malloc ()'. */
1955 char *
1956 scm_i_scan_for_encoding (SCM port)
1957 {
1958 scm_t_port *pt;
1959 char header[SCM_ENCODING_SEARCH_SIZE+1];
1960 size_t bytes_read, encoding_length, i;
1961 char *encoding = NULL;
1962 int utf8_bom = 0;
1963 char *pos, *encoding_start;
1964 int in_comment;
1965
1966 pt = SCM_PTAB_ENTRY (port);
1967
1968 if (pt->rw_active == SCM_PORT_WRITE)
1969 scm_flush_unlocked (port);
1970
1971 if (pt->rw_random)
1972 pt->rw_active = SCM_PORT_READ;
1973
1974 if (pt->read_pos == pt->read_end)
1975 {
1976 /* We can use the read buffer, and thus avoid a seek. */
1977 if (scm_fill_input_unlocked (port) == EOF)
1978 return NULL;
1979
1980 bytes_read = pt->read_end - pt->read_pos;
1981 if (bytes_read > SCM_ENCODING_SEARCH_SIZE)
1982 bytes_read = SCM_ENCODING_SEARCH_SIZE;
1983
1984 if (bytes_read <= 1)
1985 /* An unbuffered port -- don't scan. */
1986 return NULL;
1987
1988 memcpy (header, pt->read_pos, bytes_read);
1989 header[bytes_read] = '\0';
1990 }
1991 else
1992 {
1993 /* Try to read some bytes and then seek back. Not all ports
1994 support seeking back; and indeed some file ports (like
1995 /dev/urandom) will succeed on an lseek (fd, 0, SEEK_CUR)---the
1996 check performed by SCM_FPORT_FDES---but fail to seek
1997 backwards. Hence this block comes second. We prefer to use
1998 the read buffer in-place. */
1999 if (SCM_FPORTP (port) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port)))
2000 return NULL;
2001
2002 bytes_read = scm_c_read_unlocked (port, header, SCM_ENCODING_SEARCH_SIZE);
2003 header[bytes_read] = '\0';
2004 scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
2005 }
2006
2007 if (bytes_read > 3
2008 && header[0] == '\xef' && header[1] == '\xbb' && header[2] == '\xbf')
2009 utf8_bom = 1;
2010
2011 /* search past "coding[:=]" */
2012 pos = header;
2013 while (1)
2014 {
2015 if ((pos = strstr(pos, "coding")) == NULL)
2016 return NULL;
2017
2018 pos += strlen("coding");
2019 if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
2020 (*pos == ':' || *pos == '='))
2021 {
2022 pos ++;
2023 break;
2024 }
2025 }
2026
2027 /* skip spaces */
2028 while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
2029 (*pos == ' ' || *pos == '\t'))
2030 pos ++;
2031
2032 /* grab the next token */
2033 encoding_start = pos;
2034 i = 0;
2035 while (encoding_start + i - header <= SCM_ENCODING_SEARCH_SIZE
2036 && encoding_start + i - header < bytes_read
2037 && (isalnum ((int) encoding_start[i])
2038 || strchr ("_-.:/,+=()", encoding_start[i]) != NULL))
2039 i++;
2040
2041 encoding_length = i;
2042 if (encoding_length == 0)
2043 return NULL;
2044
2045 encoding = scm_gc_strndup (encoding_start, encoding_length, "encoding");
2046 for (i = 0; i < encoding_length; i++)
2047 encoding[i] = toupper ((int) encoding[i]);
2048
2049 /* push backwards to make sure we were in a comment */
2050 in_comment = 0;
2051 pos = encoding_start;
2052 while (pos >= header)
2053 {
2054 if (*pos == ';')
2055 {
2056 in_comment = 1;
2057 break;
2058 }
2059 else if (*pos == '\n' || pos == header)
2060 {
2061 /* This wasn't in a semicolon comment. Check for a
2062 hash-bang comment. */
2063 char *beg = strstr (header, "#!");
2064 char *end = strstr (header, "!#");
2065 if (beg < encoding_start && encoding_start + encoding_length <= end)
2066 in_comment = 1;
2067 break;
2068 }
2069 else
2070 {
2071 pos --;
2072 continue;
2073 }
2074 }
2075 if (!in_comment)
2076 /* This wasn't in a comment */
2077 return NULL;
2078
2079 if (utf8_bom && strcmp(encoding, "UTF-8"))
2080 scm_misc_error (NULL,
2081 "the port input declares the encoding ~s but is encoded as UTF-8",
2082 scm_list_1 (scm_from_locale_string (encoding)));
2083
2084 return encoding;
2085 }
2086
2087 SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
2088 (SCM port),
2089 "Scans the port for an Emacs-like character coding declaration\n"
2090 "near the top of the contents of a port with random-accessible contents.\n"
2091 "The coding declaration is of the form\n"
2092 "@code{coding: XXXXX} and must appear in a scheme comment.\n"
2093 "\n"
2094 "Returns a string containing the character encoding of the file\n"
2095 "if a declaration was found, or @code{#f} otherwise.\n")
2096 #define FUNC_NAME s_scm_file_encoding
2097 {
2098 char *enc;
2099 SCM s_enc;
2100
2101 SCM_VALIDATE_OPINPORT (SCM_ARG1, port);
2102
2103 enc = scm_i_scan_for_encoding (port);
2104 if (enc == NULL)
2105 return SCM_BOOL_F;
2106 else
2107 {
2108 s_enc = scm_from_locale_string (enc);
2109 return s_enc;
2110 }
2111
2112 return SCM_BOOL_F;
2113 }
2114 #undef FUNC_NAME
2115
2116 \f
2117 /* Per-port read options.
2118
2119 We store per-port read options in the 'port-read-options' key of the
2120 port's alist. The value stored in the alist is a single integer that
2121 contains a two-bit field for each read option.
2122
2123 If a bit field contains READ_OPTION_INHERIT (3), that indicates that
2124 the applicable value should be inherited from the corresponding
2125 global read option. Otherwise, the bit field contains the value of
2126 the read option. For boolean read options that have been set
2127 per-port, the possible values are 0 or 1. If the 'keyword_style'
2128 read option has been set per-port, its possible values are those in
2129 'enum t_keyword_style'. */
2130
2131 /* Key to read options in per-port alists. */
2132 SCM_SYMBOL (sym_port_read_options, "port-read-options");
2133
2134 /* Offsets of bit fields for each per-port override */
2135 #define READ_OPTION_COPY_SOURCE_P 0
2136 #define READ_OPTION_RECORD_POSITIONS_P 2
2137 #define READ_OPTION_CASE_INSENSITIVE_P 4
2138 #define READ_OPTION_KEYWORD_STYLE 6
2139 #define READ_OPTION_R6RS_ESCAPES_P 8
2140 #define READ_OPTION_SQUARE_BRACKETS_P 10
2141 #define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12
2142 #define READ_OPTION_CURLY_INFIX_P 14
2143
2144 /* The total width in bits of the per-port overrides */
2145 #define READ_OPTIONS_NUM_BITS 16
2146
2147 #define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1)
2148 #define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL
2149
2150 #define READ_OPTION_MASK 3
2151 #define READ_OPTION_INHERIT 3
2152
2153 static void
2154 set_port_read_option (SCM port, int option, int new_value)
2155 {
2156 SCM scm_read_options;
2157 unsigned int read_options;
2158
2159 new_value &= READ_OPTION_MASK;
2160 scm_read_options = scm_assq_ref (SCM_PTAB_ENTRY(port)->alist,
2161 sym_port_read_options);
2162 if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
2163 read_options = scm_to_uint (scm_read_options);
2164 else
2165 read_options = READ_OPTIONS_INHERIT_ALL;
2166 read_options &= ~(READ_OPTION_MASK << option);
2167 read_options |= new_value << option;
2168 scm_read_options = scm_from_uint (read_options);
2169 SCM_PTAB_ENTRY(port)->alist = scm_assq_set_x (SCM_PTAB_ENTRY(port)->alist,
2170 sym_port_read_options,
2171 scm_read_options);
2172 }
2173
2174 /* Set OPTS and PORT's case-insensitivity according to VALUE. */
2175 static void
2176 set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts, int value)
2177 {
2178 value = !!value;
2179 opts->case_insensitive_p = value;
2180 set_port_read_option (port, READ_OPTION_CASE_INSENSITIVE_P, value);
2181 }
2182
2183 /* Set OPTS and PORT's square_brackets_p option according to VALUE. */
2184 static void
2185 set_port_square_brackets_p (SCM port, scm_t_read_opts *opts, int value)
2186 {
2187 value = !!value;
2188 opts->square_brackets_p = value;
2189 set_port_read_option (port, READ_OPTION_SQUARE_BRACKETS_P, value);
2190 }
2191
2192 /* Set OPTS and PORT's curly_infix_p option according to VALUE. */
2193 static void
2194 set_port_curly_infix_p (SCM port, scm_t_read_opts *opts, int value)
2195 {
2196 value = !!value;
2197 opts->curly_infix_p = value;
2198 set_port_read_option (port, READ_OPTION_CURLY_INFIX_P, value);
2199 }
2200
2201 /* Initialize OPTS based on PORT's read options and the global read
2202 options. */
2203 static void
2204 init_read_options (SCM port, scm_t_read_opts *opts)
2205 {
2206 SCM val, scm_read_options;
2207 unsigned int read_options, x;
2208
2209 scm_read_options = scm_assq_ref (SCM_PTAB_ENTRY(port)->alist,
2210 sym_port_read_options);
2211
2212 if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
2213 read_options = scm_to_uint (scm_read_options);
2214 else
2215 read_options = READ_OPTIONS_INHERIT_ALL;
2216
2217 x = READ_OPTION_MASK & (read_options >> READ_OPTION_KEYWORD_STYLE);
2218 if (x == READ_OPTION_INHERIT)
2219 {
2220 val = SCM_PACK (SCM_KEYWORD_STYLE);
2221 if (scm_is_eq (val, scm_keyword_prefix))
2222 x = KEYWORD_STYLE_PREFIX;
2223 else if (scm_is_eq (val, scm_keyword_postfix))
2224 x = KEYWORD_STYLE_POSTFIX;
2225 else
2226 x = KEYWORD_STYLE_HASH_PREFIX;
2227 }
2228 opts->keyword_style = x;
2229
2230 #define RESOLVE_BOOLEAN_OPTION(NAME, name) \
2231 do \
2232 { \
2233 x = READ_OPTION_MASK & (read_options >> READ_OPTION_ ## NAME); \
2234 if (x == READ_OPTION_INHERIT) \
2235 x = !!SCM_ ## NAME; \
2236 opts->name = x; \
2237 } \
2238 while (0)
2239
2240 RESOLVE_BOOLEAN_OPTION (COPY_SOURCE_P, copy_source_p);
2241 RESOLVE_BOOLEAN_OPTION (RECORD_POSITIONS_P, record_positions_p);
2242 RESOLVE_BOOLEAN_OPTION (CASE_INSENSITIVE_P, case_insensitive_p);
2243 RESOLVE_BOOLEAN_OPTION (R6RS_ESCAPES_P, r6rs_escapes_p);
2244 RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P, square_brackets_p);
2245 RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p);
2246 RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p);
2247
2248 #undef RESOLVE_BOOLEAN_OPTION
2249
2250 opts->neoteric_p = 0;
2251 }
2252
2253 void
2254 scm_init_read ()
2255 {
2256 SCM read_hash_procs;
2257
2258 read_hash_procs = scm_make_fluid_with_default (SCM_EOL);
2259
2260 scm_i_read_hash_procedures =
2261 SCM_VARIABLE_LOC (scm_c_define ("%read-hash-procedures", read_hash_procs));
2262
2263 scm_init_opts (scm_read_options, scm_read_opts);
2264 #include "libguile/read.x"
2265 }
2266
2267 /*
2268 Local Variables:
2269 c-file-style: "gnu"
2270 End:
2271 */