Use 'c_strcasecmp' instead of 'strcasecmp'.
[bpt/guile.git] / libguile / read.c
1 /* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
2 * 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20
21 \f
22
23 #ifdef HAVE_CONFIG_H
24 # include <config.h>
25 #endif
26
27 #include <stdio.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <unicase.h>
32 #include <unictype.h>
33 #include <c-strcase.h>
34
35 #include "libguile/_scm.h"
36 #include "libguile/bytevectors.h"
37 #include "libguile/chars.h"
38 #include "libguile/eval.h"
39 #include "libguile/arrays.h"
40 #include "libguile/bitvectors.h"
41 #include "libguile/keywords.h"
42 #include "libguile/alist.h"
43 #include "libguile/srcprop.h"
44 #include "libguile/hashtab.h"
45 #include "libguile/hash.h"
46 #include "libguile/ports.h"
47 #include "libguile/fports.h"
48 #include "libguile/root.h"
49 #include "libguile/strings.h"
50 #include "libguile/strports.h"
51 #include "libguile/vectors.h"
52 #include "libguile/validate.h"
53 #include "libguile/srfi-4.h"
54 #include "libguile/srfi-13.h"
55
56 #include "libguile/read.h"
57 #include "libguile/private-options.h"
58
59
60 \f
61
62 SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
63 SCM_SYMBOL (scm_keyword_prefix, "prefix");
64 SCM_SYMBOL (scm_keyword_postfix, "postfix");
65 SCM_SYMBOL (sym_nil, "nil");
66
67 /* SRFI-105 curly infix expression support */
68 SCM_SYMBOL (sym_nfx, "$nfx$");
69 SCM_SYMBOL (sym_bracket_list, "$bracket-list$");
70 SCM_SYMBOL (sym_bracket_apply, "$bracket-apply$");
71
72 scm_t_option scm_read_opts[] =
73 {
74 { SCM_OPTION_BOOLEAN, "copy", 0,
75 "Copy source code expressions." },
76 { SCM_OPTION_BOOLEAN, "positions", 1,
77 "Record positions of source code expressions." },
78 { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
79 "Convert symbols to lower case."},
80 { SCM_OPTION_SCM, "keywords", (scm_t_bits) SCM_BOOL_F_BITS,
81 "Style of keyword recognition: #f, 'prefix or 'postfix."},
82 { SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
83 "Use R6RS variable-length character and string hex escapes."},
84 { SCM_OPTION_BOOLEAN, "square-brackets", 1,
85 "Treat `[' and `]' as parentheses, for R6RS compatibility."},
86 { SCM_OPTION_BOOLEAN, "hungry-eol-escapes", 0,
87 "In strings, consume leading whitespace after an escaped end-of-line."},
88 { SCM_OPTION_BOOLEAN, "curly-infix", 0,
89 "Support SRFI-105 curly infix expressions."},
90 { 0, },
91 };
92
93 /* Internal read options structure. This is initialized by 'scm_read'
94 from the global and per-port read options, and a pointer is passed
95 down to all helper functions. */
96
97 enum t_keyword_style
98 {
99 KEYWORD_STYLE_HASH_PREFIX,
100 KEYWORD_STYLE_PREFIX,
101 KEYWORD_STYLE_POSTFIX
102 };
103
104 struct t_read_opts
105 {
106 enum t_keyword_style keyword_style;
107 unsigned int copy_source_p : 1;
108 unsigned int record_positions_p : 1;
109 unsigned int case_insensitive_p : 1;
110 unsigned int r6rs_escapes_p : 1;
111 unsigned int square_brackets_p : 1;
112 unsigned int hungry_eol_escapes_p : 1;
113 unsigned int curly_infix_p : 1;
114 unsigned int neoteric_p : 1;
115 };
116
117 typedef struct t_read_opts scm_t_read_opts;
118
119
120 /*
121 Give meaningful error messages for errors
122
123 We use the format
124
125 FILE:LINE:COL: MESSAGE
126 This happened in ....
127
128 This is not standard GNU format, but the test-suite likes the real
129 message to be in front.
130
131 */
132
133
134 void
135 scm_i_input_error (char const *function,
136 SCM port, const char *message, SCM arg)
137 {
138 SCM fn = (scm_is_string (SCM_FILENAME(port))
139 ? SCM_FILENAME(port)
140 : scm_from_locale_string ("#<unknown port>"));
141
142 SCM string_port = scm_open_output_string ();
143 SCM string = SCM_EOL;
144 scm_simple_format (string_port,
145 scm_from_locale_string ("~A:~S:~S: ~A"),
146 scm_list_4 (fn,
147 scm_from_long (SCM_LINUM (port) + 1),
148 scm_from_int (SCM_COL (port) + 1),
149 scm_from_locale_string (message)));
150
151 string = scm_get_output_string (string_port);
152 scm_close_output_port (string_port);
153 scm_error_scm (scm_from_latin1_symbol ("read-error"),
154 function? scm_from_locale_string (function) : SCM_BOOL_F,
155 string,
156 arg,
157 SCM_BOOL_F);
158 }
159
160
161 SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
162 (SCM setting),
163 "Option interface for the read options. Instead of using\n"
164 "this procedure directly, use the procedures @code{read-enable},\n"
165 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
166 #define FUNC_NAME s_scm_read_options
167 {
168 SCM ans = scm_options (setting,
169 scm_read_opts,
170 FUNC_NAME);
171 if (SCM_COPY_SOURCE_P)
172 SCM_RECORD_POSITIONS_P = 1;
173 return ans;
174 }
175 #undef FUNC_NAME
176
177 /* A fluid referring to an association list mapping extra hash
178 characters to procedures. */
179 static SCM *scm_i_read_hash_procedures;
180
181 static SCM
182 scm_i_read_hash_procedures_ref (void)
183 {
184 return scm_fluid_ref (*scm_i_read_hash_procedures);
185 }
186
187 static void
188 scm_i_read_hash_procedures_set_x (SCM value)
189 {
190 scm_fluid_set_x (*scm_i_read_hash_procedures, value);
191 }
192
193 \f
194 /* Token readers. */
195
196
197 /* Size of the C buffer used to read symbols and numbers. */
198 #define READER_BUFFER_SIZE 128
199
200 /* Number of 32-bit codepoints in the buffer used to read strings. */
201 #define READER_STRING_BUFFER_SIZE 128
202
203 /* The maximum size of Scheme character names. */
204 #define READER_CHAR_NAME_MAX_SIZE 50
205
206 /* The maximum size of reader directive names. */
207 #define READER_DIRECTIVE_NAME_MAX_SIZE 50
208
209
210 /* `isblank' is only in C99. */
211 #define CHAR_IS_BLANK_(_chr) \
212 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
213 || ((_chr) == '\f') || ((_chr) == '\r'))
214
215 #ifdef MSDOS
216 # define CHAR_IS_BLANK(_chr) \
217 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
218 #else
219 # define CHAR_IS_BLANK CHAR_IS_BLANK_
220 #endif
221
222
223 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
224 structure''). */
225 #define CHAR_IS_R5RS_DELIMITER(c) \
226 (CHAR_IS_BLANK (c) \
227 || (c) == ')' || (c) == '(' || (c) == ';' || (c) == '"')
228
229 #define CHAR_IS_DELIMITER(c) \
230 (CHAR_IS_R5RS_DELIMITER (c) \
231 || (((c) == ']' || (c) == '[') && (opts->square_brackets_p \
232 || opts->curly_infix_p)) \
233 || (((c) == '}' || (c) == '{') && opts->curly_infix_p))
234
235 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
236 Structure''. */
237 #define CHAR_IS_EXPONENT_MARKER(_chr) \
238 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
239 || ((_chr) == 'd') || ((_chr) == 'l'))
240
241 /* Read an SCSH block comment. */
242 static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
243 static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM);
244 static SCM scm_read_commented_expression (scm_t_wchar, SCM, scm_t_read_opts *);
245 static SCM scm_read_shebang (scm_t_wchar, SCM, scm_t_read_opts *);
246 static SCM scm_get_hash_procedure (int);
247
248 /* Read from PORT until a delimiter (e.g., a whitespace) is read. Put the
249 result in the pre-allocated buffer BUF. Return zero if the whole token has
250 fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
251 bytes actually read. */
252 static int
253 read_token (SCM port, scm_t_read_opts *opts,
254 char *buf, size_t buf_size, size_t *read)
255 {
256 *read = 0;
257
258 while (*read < buf_size)
259 {
260 int chr;
261
262 chr = scm_get_byte_or_eof (port);
263
264 if (chr == EOF)
265 return 0;
266 else if (CHAR_IS_DELIMITER (chr))
267 {
268 scm_unget_byte (chr, port);
269 return 0;
270 }
271 else
272 {
273 *buf = (char) chr;
274 buf++, (*read)++;
275 }
276 }
277
278 return 1;
279 }
280
281 /* Like `read_token', but return either BUFFER, or a GC-allocated buffer
282 if the token doesn't fit in BUFFER_SIZE bytes. */
283 static char *
284 read_complete_token (SCM port, scm_t_read_opts *opts,
285 char *buffer, size_t buffer_size, size_t *read)
286 {
287 int overflow = 0;
288 size_t bytes_read, overflow_size = 0;
289 char *overflow_buffer = NULL;
290
291 do
292 {
293 overflow = read_token (port, opts, buffer, buffer_size, &bytes_read);
294 if (bytes_read == 0)
295 break;
296 if (overflow || overflow_size != 0)
297 {
298 if (overflow_size == 0)
299 {
300 overflow_buffer = scm_gc_malloc_pointerless (bytes_read, "read");
301 memcpy (overflow_buffer, buffer, bytes_read);
302 overflow_size = bytes_read;
303 }
304 else
305 {
306 char *new_buf =
307 scm_gc_malloc_pointerless (overflow_size + bytes_read, "read");
308
309 memcpy (new_buf, overflow_buffer, overflow_size);
310 memcpy (new_buf + overflow_size, buffer, bytes_read);
311
312 overflow_buffer = new_buf;
313 overflow_size += bytes_read;
314 }
315 }
316 }
317 while (overflow);
318
319 if (overflow_size)
320 *read = overflow_size;
321 else
322 *read = bytes_read;
323
324 return (overflow_size > 0 ? overflow_buffer : buffer);
325 }
326
327 /* Skip whitespace from PORT and return the first non-whitespace character
328 read. Raise an error on end-of-file. */
329 static int
330 flush_ws (SCM port, scm_t_read_opts *opts, const char *eoferr)
331 {
332 scm_t_wchar c;
333 while (1)
334 switch (c = scm_getc (port))
335 {
336 case EOF:
337 goteof:
338 if (eoferr)
339 {
340 scm_i_input_error (eoferr,
341 port,
342 "end of file",
343 SCM_EOL);
344 }
345 return c;
346
347 case ';':
348 lp:
349 switch (c = scm_getc (port))
350 {
351 case EOF:
352 goto goteof;
353 default:
354 goto lp;
355 case SCM_LINE_INCREMENTORS:
356 break;
357 }
358 break;
359
360 case '#':
361 switch (c = scm_getc (port))
362 {
363 case EOF:
364 eoferr = "read_sharp";
365 goto goteof;
366 case '!':
367 scm_read_shebang (c, port, opts);
368 break;
369 case ';':
370 scm_read_commented_expression (c, port, opts);
371 break;
372 case '|':
373 if (scm_is_false (scm_get_hash_procedure (c)))
374 {
375 scm_read_r6rs_block_comment (c, port);
376 break;
377 }
378 /* fall through */
379 default:
380 scm_ungetc (c, port);
381 return '#';
382 }
383 break;
384
385 case SCM_LINE_INCREMENTORS:
386 case SCM_SINGLE_SPACES:
387 case '\t':
388 break;
389
390 default:
391 return c;
392 }
393
394 return 0;
395 }
396
397
398 \f
399 /* Token readers. */
400
401 static SCM scm_read_expression (SCM port, scm_t_read_opts *opts);
402 static SCM scm_read_sharp (int chr, SCM port, scm_t_read_opts *opts,
403 long line, int column);
404
405
406 static SCM
407 maybe_annotate_source (SCM x, SCM port, scm_t_read_opts *opts,
408 long line, int column)
409 {
410 if (opts->record_positions_p)
411 scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port));
412 return x;
413 }
414
415 static SCM
416 scm_read_sexp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
417 #define FUNC_NAME "scm_i_lreadparen"
418 {
419 int c;
420 SCM tmp, tl, ans = SCM_EOL;
421 const int curly_list_p = (chr == '{') && opts->curly_infix_p;
422 const int terminating_char = ((chr == '{') ? '}'
423 : ((chr == '[') ? ']'
424 : ')'));
425
426 /* Need to capture line and column numbers here. */
427 long line = SCM_LINUM (port);
428 int column = SCM_COL (port) - 1;
429
430 c = flush_ws (port, opts, FUNC_NAME);
431 if (terminating_char == c)
432 return SCM_EOL;
433
434 scm_ungetc (c, port);
435 tmp = scm_read_expression (port, opts);
436
437 /* Note that it is possible for scm_read_expression to return
438 scm_sym_dot, but not as part of a dotted pair: as in #{.}#. So
439 check that it's a real dot by checking `c'. */
440 if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
441 {
442 ans = scm_read_expression (port, opts);
443 if (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
444 scm_i_input_error (FUNC_NAME, port, "missing close paren",
445 SCM_EOL);
446 return ans;
447 }
448
449 /* Build the head of the list structure. */
450 ans = tl = scm_cons (tmp, SCM_EOL);
451
452 while (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
453 {
454 SCM new_tail;
455
456 if (c == ')' || (c == ']' && opts->square_brackets_p)
457 || ((c == '}' || c == ']') && opts->curly_infix_p))
458 scm_i_input_error (FUNC_NAME, port,
459 "in pair: mismatched close paren: ~A",
460 scm_list_1 (SCM_MAKE_CHAR (c)));
461
462 scm_ungetc (c, port);
463 tmp = scm_read_expression (port, opts);
464
465 /* See above note about scm_sym_dot. */
466 if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
467 {
468 SCM_SETCDR (tl, scm_read_expression (port, opts));
469
470 c = flush_ws (port, opts, FUNC_NAME);
471 if (terminating_char != c)
472 scm_i_input_error (FUNC_NAME, port,
473 "in pair: missing close paren", SCM_EOL);
474 break;
475 }
476
477 new_tail = scm_cons (tmp, SCM_EOL);
478 SCM_SETCDR (tl, new_tail);
479 tl = new_tail;
480 }
481
482 if (curly_list_p)
483 {
484 /* In addition to finding the length, 'scm_ilength' checks for
485 improper or circular lists, in which case it returns -1. */
486 int len = scm_ilength (ans);
487
488 /* The (len == 0) case is handled above */
489 if (len == 1)
490 /* Return directly to avoid re-annotating the element's source
491 location with the position of the outer brace. Also, it
492 might not be possible to annotate the element. */
493 return scm_car (ans); /* {e} => e */
494 else if (len == 2)
495 ; /* Leave the list unchanged: {e1 e2} => (e1 e2) */
496 else if (len >= 3 && (len & 1))
497 {
498 /* It's a proper list whose length is odd and at least 3. If
499 the elements at odd indices (the infix operator positions)
500 are all 'equal?', then it's a simple curly-infix list.
501 Otherwise it's a mixed curly-infix list. */
502 SCM op = scm_cadr (ans);
503
504 /* Check to see if the elements at odd indices are 'equal?' */
505 for (tl = scm_cdddr (ans); ; tl = scm_cddr (tl))
506 {
507 if (scm_is_null (tl))
508 {
509 /* Convert simple curly-infix list to prefix:
510 {a <op> b <op> ...} => (<op> a b ...) */
511 tl = ans;
512 while (scm_is_pair (scm_cdr (tl)))
513 {
514 tmp = scm_cddr (tl);
515 SCM_SETCDR (tl, tmp);
516 tl = tmp;
517 }
518 ans = scm_cons (op, ans);
519 break;
520 }
521 else if (scm_is_false (scm_equal_p (op, scm_car (tl))))
522 {
523 /* Mixed curly-infix list: {e ...} => ($nfx$ e ...) */
524 ans = scm_cons (sym_nfx, ans);
525 break;
526 }
527 }
528 }
529 else
530 /* Mixed curly-infix (possibly improper) list:
531 {e . tail} => ($nfx$ e . tail) */
532 ans = scm_cons (sym_nfx, ans);
533 }
534
535 return maybe_annotate_source (ans, port, opts, line, column);
536 }
537 #undef FUNC_NAME
538
539
540 /* Read a hexadecimal number NDIGITS in length. Put its value into the variable
541 C. If TERMINATOR is non-null, terminate early if the TERMINATOR character is
542 found. */
543 #define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
544 do \
545 { \
546 scm_t_wchar a; \
547 size_t i = 0; \
548 c = 0; \
549 while (i < ndigits) \
550 { \
551 a = scm_getc (port); \
552 if (a == EOF) \
553 goto str_eof; \
554 if (terminator \
555 && (a == (scm_t_wchar) terminator) \
556 && (i > 0)) \
557 break; \
558 if ('0' <= a && a <= '9') \
559 a -= '0'; \
560 else if ('A' <= a && a <= 'F') \
561 a = a - 'A' + 10; \
562 else if ('a' <= a && a <= 'f') \
563 a = a - 'a' + 10; \
564 else \
565 { \
566 c = a; \
567 goto bad_escaped; \
568 } \
569 c = c * 16 + a; \
570 i ++; \
571 } \
572 } while (0)
573
574 static void
575 skip_intraline_whitespace (SCM port)
576 {
577 scm_t_wchar c;
578
579 do
580 {
581 c = scm_getc (port);
582 if (c == EOF)
583 return;
584 }
585 while (c == '\t' || uc_is_general_category (c, UC_SPACE_SEPARATOR));
586
587 scm_ungetc (c, port);
588 }
589
590 static SCM
591 scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
592 #define FUNC_NAME "scm_lreadr"
593 {
594 /* For strings smaller than C_STR, this function creates only one Scheme
595 object (the string returned). */
596
597 SCM str = SCM_EOL;
598 size_t c_str_len = 0;
599 scm_t_wchar c, c_str[READER_STRING_BUFFER_SIZE];
600
601 /* Need to capture line and column numbers here. */
602 long line = SCM_LINUM (port);
603 int column = SCM_COL (port) - 1;
604
605 while ('"' != (c = scm_getc (port)))
606 {
607 if (c == EOF)
608 {
609 str_eof:
610 scm_i_input_error (FUNC_NAME, port,
611 "end of file in string constant", SCM_EOL);
612 }
613
614 if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE)
615 {
616 str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
617 c_str_len = 0;
618 }
619
620 if (c == '\\')
621 {
622 switch (c = scm_getc (port))
623 {
624 case EOF:
625 goto str_eof;
626 case '"':
627 case '\\':
628 break;
629 case '\n':
630 if (opts->hungry_eol_escapes_p)
631 skip_intraline_whitespace (port);
632 continue;
633 case '0':
634 c = '\0';
635 break;
636 case 'f':
637 c = '\f';
638 break;
639 case 'n':
640 c = '\n';
641 break;
642 case 'r':
643 c = '\r';
644 break;
645 case 't':
646 c = '\t';
647 break;
648 case 'a':
649 c = '\007';
650 break;
651 case 'v':
652 c = '\v';
653 break;
654 case 'b':
655 c = '\010';
656 break;
657 case 'x':
658 if (opts->r6rs_escapes_p)
659 SCM_READ_HEX_ESCAPE (10, ';');
660 else
661 SCM_READ_HEX_ESCAPE (2, '\0');
662 break;
663 case 'u':
664 if (!opts->r6rs_escapes_p)
665 {
666 SCM_READ_HEX_ESCAPE (4, '\0');
667 break;
668 }
669 case 'U':
670 if (!opts->r6rs_escapes_p)
671 {
672 SCM_READ_HEX_ESCAPE (6, '\0');
673 break;
674 }
675 default:
676 bad_escaped:
677 scm_i_input_error (FUNC_NAME, port,
678 "illegal character in escape sequence: ~S",
679 scm_list_1 (SCM_MAKE_CHAR (c)));
680 }
681 }
682
683 c_str[c_str_len++] = c;
684 }
685
686 if (scm_is_null (str))
687 /* Fast path: we got a string that fits in C_STR. */
688 str = scm_from_utf32_stringn (c_str, c_str_len);
689 else
690 {
691 if (c_str_len > 0)
692 str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
693
694 str = scm_string_concatenate_reverse (str, SCM_UNDEFINED, SCM_UNDEFINED);
695 }
696
697 return maybe_annotate_source (str, port, opts, line, column);
698 }
699 #undef FUNC_NAME
700
701
702 static SCM
703 scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
704 {
705 SCM result, str = SCM_EOL;
706 char local_buffer[READER_BUFFER_SIZE], *buffer;
707 size_t bytes_read;
708 scm_t_port *pt = SCM_PTAB_ENTRY (port);
709
710 /* Need to capture line and column numbers here. */
711 long line = SCM_LINUM (port);
712 int column = SCM_COL (port) - 1;
713
714 scm_ungetc (chr, port);
715 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
716 &bytes_read);
717
718 str = scm_from_stringn (buffer, bytes_read, pt->encoding, pt->ilseq_handler);
719
720 result = scm_string_to_number (str, SCM_UNDEFINED);
721 if (scm_is_false (result))
722 {
723 /* Return a symbol instead of a number */
724 if (opts->case_insensitive_p)
725 str = scm_string_downcase_x (str);
726 result = scm_string_to_symbol (str);
727 }
728 else if (SCM_NIMP (result))
729 result = maybe_annotate_source (result, port, opts, line, column);
730
731 SCM_COL (port) += scm_i_string_length (str);
732 return result;
733 }
734
735 static SCM
736 scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
737 {
738 SCM result;
739 int ends_with_colon = 0;
740 size_t bytes_read;
741 int postfix = (opts->keyword_style == KEYWORD_STYLE_POSTFIX);
742 char local_buffer[READER_BUFFER_SIZE], *buffer;
743 scm_t_port *pt = SCM_PTAB_ENTRY (port);
744 SCM str;
745
746 scm_ungetc (chr, port);
747 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
748 &bytes_read);
749 if (bytes_read > 0)
750 ends_with_colon = buffer[bytes_read - 1] == ':';
751
752 if (postfix && ends_with_colon && (bytes_read > 1))
753 {
754 str = scm_from_stringn (buffer, bytes_read - 1,
755 pt->encoding, pt->ilseq_handler);
756
757 if (opts->case_insensitive_p)
758 str = scm_string_downcase_x (str);
759 result = scm_symbol_to_keyword (scm_string_to_symbol (str));
760 }
761 else
762 {
763 str = scm_from_stringn (buffer, bytes_read,
764 pt->encoding, pt->ilseq_handler);
765
766 if (opts->case_insensitive_p)
767 str = scm_string_downcase_x (str);
768 result = scm_string_to_symbol (str);
769 }
770
771 SCM_COL (port) += scm_i_string_length (str);
772 return result;
773 }
774
775 static SCM
776 scm_read_number_and_radix (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
777 #define FUNC_NAME "scm_lreadr"
778 {
779 SCM result;
780 size_t read;
781 char local_buffer[READER_BUFFER_SIZE], *buffer;
782 unsigned int radix;
783 SCM str;
784 scm_t_port *pt;
785
786 switch (chr)
787 {
788 case 'B':
789 case 'b':
790 radix = 2;
791 break;
792
793 case 'o':
794 case 'O':
795 radix = 8;
796 break;
797
798 case 'd':
799 case 'D':
800 radix = 10;
801 break;
802
803 case 'x':
804 case 'X':
805 radix = 16;
806 break;
807
808 default:
809 scm_ungetc (chr, port);
810 scm_ungetc ('#', port);
811 radix = 10;
812 }
813
814 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
815 &read);
816
817 pt = SCM_PTAB_ENTRY (port);
818 str = scm_from_stringn (buffer, read, pt->encoding, pt->ilseq_handler);
819
820 result = scm_string_to_number (str, scm_from_uint (radix));
821
822 SCM_COL (port) += scm_i_string_length (str);
823
824 if (scm_is_true (result))
825 return result;
826
827 scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
828
829 return SCM_BOOL_F;
830 }
831 #undef FUNC_NAME
832
833 static SCM
834 scm_read_quote (int chr, SCM port, scm_t_read_opts *opts)
835 {
836 SCM p;
837 long line = SCM_LINUM (port);
838 int column = SCM_COL (port) - 1;
839
840 switch (chr)
841 {
842 case '`':
843 p = scm_sym_quasiquote;
844 break;
845
846 case '\'':
847 p = scm_sym_quote;
848 break;
849
850 case ',':
851 {
852 scm_t_wchar c;
853
854 c = scm_getc (port);
855 if ('@' == c)
856 p = scm_sym_uq_splicing;
857 else
858 {
859 scm_ungetc (c, port);
860 p = scm_sym_unquote;
861 }
862 break;
863 }
864
865 default:
866 fprintf (stderr, "%s: unhandled quote character (%i)\n",
867 "scm_read_quote", chr);
868 abort ();
869 }
870
871 p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
872 return maybe_annotate_source (p, port, opts, line, column);
873 }
874
875 SCM_SYMBOL (sym_syntax, "syntax");
876 SCM_SYMBOL (sym_quasisyntax, "quasisyntax");
877 SCM_SYMBOL (sym_unsyntax, "unsyntax");
878 SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing");
879
880 static SCM
881 scm_read_syntax (int chr, SCM port, scm_t_read_opts *opts)
882 {
883 SCM p;
884 long line = SCM_LINUM (port);
885 int column = SCM_COL (port) - 1;
886
887 switch (chr)
888 {
889 case '`':
890 p = sym_quasisyntax;
891 break;
892
893 case '\'':
894 p = sym_syntax;
895 break;
896
897 case ',':
898 {
899 int c;
900
901 c = scm_getc (port);
902 if ('@' == c)
903 p = sym_unsyntax_splicing;
904 else
905 {
906 scm_ungetc (c, port);
907 p = sym_unsyntax;
908 }
909 break;
910 }
911
912 default:
913 fprintf (stderr, "%s: unhandled syntax character (%i)\n",
914 "scm_read_syntax", chr);
915 abort ();
916 }
917
918 p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
919 return maybe_annotate_source (p, port, opts, line, column);
920 }
921
922 static SCM
923 scm_read_nil (int chr, SCM port, scm_t_read_opts *opts)
924 {
925 SCM id = scm_read_mixed_case_symbol (chr, port, opts);
926
927 if (!scm_is_eq (id, sym_nil))
928 scm_i_input_error ("scm_read_nil", port,
929 "unexpected input while reading #nil: ~a",
930 scm_list_1 (id));
931
932 return SCM_ELISP_NIL;
933 }
934
935 static SCM
936 scm_read_semicolon_comment (int chr, SCM port)
937 {
938 int c;
939
940 /* We use the get_byte here because there is no need to get the
941 locale correct with comment input. This presumes that newline
942 always represents itself no matter what the encoding is. */
943 for (c = scm_get_byte_or_eof (port);
944 (c != EOF) && (c != '\n');
945 c = scm_get_byte_or_eof (port));
946
947 return SCM_UNSPECIFIED;
948 }
949
950 \f
951 /* Sharp readers, i.e. readers called after a `#' sign has been read. */
952
953 static SCM
954 scm_read_boolean (int chr, SCM port)
955 {
956 switch (chr)
957 {
958 case 't':
959 case 'T':
960 return SCM_BOOL_T;
961
962 case 'f':
963 case 'F':
964 return SCM_BOOL_F;
965 }
966
967 return SCM_UNSPECIFIED;
968 }
969
970 static SCM
971 scm_read_character (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
972 #define FUNC_NAME "scm_lreadr"
973 {
974 char buffer[READER_CHAR_NAME_MAX_SIZE];
975 SCM charname;
976 size_t charname_len, bytes_read;
977 scm_t_wchar cp;
978 int overflow;
979 scm_t_port *pt;
980
981 overflow = read_token (port, opts, buffer, READER_CHAR_NAME_MAX_SIZE,
982 &bytes_read);
983 if (overflow)
984 scm_i_input_error (FUNC_NAME, port, "character name too long", SCM_EOL);
985
986 if (bytes_read == 0)
987 {
988 chr = scm_getc (port);
989 if (chr == EOF)
990 scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
991 "while reading character", SCM_EOL);
992
993 /* CHR must be a token delimiter, like a whitespace. */
994 return (SCM_MAKE_CHAR (chr));
995 }
996
997 pt = SCM_PTAB_ENTRY (port);
998
999 /* Simple ASCII characters can be processed immediately. Also, simple
1000 ISO-8859-1 characters can be processed immediately if the encoding for this
1001 port is ISO-8859-1. */
1002 if (bytes_read == 1 && ((unsigned char) buffer[0] <= 127 || pt->encoding == NULL))
1003 {
1004 SCM_COL (port) += 1;
1005 return SCM_MAKE_CHAR (buffer[0]);
1006 }
1007
1008 /* Otherwise, convert the buffer into a proper scheme string for
1009 processing. */
1010 charname = scm_from_stringn (buffer, bytes_read, pt->encoding,
1011 pt->ilseq_handler);
1012 charname_len = scm_i_string_length (charname);
1013 SCM_COL (port) += charname_len;
1014 cp = scm_i_string_ref (charname, 0);
1015 if (charname_len == 1)
1016 return SCM_MAKE_CHAR (cp);
1017
1018 /* Ignore dotted circles, which may be used to keep combining characters from
1019 combining with the backslash in #\charname. */
1020 if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2)
1021 return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1));
1022
1023 if (cp >= '0' && cp < '8')
1024 {
1025 /* Dirk:FIXME:: This type of character syntax is not R5RS
1026 * compliant. Further, it should be verified that the constant
1027 * does only consist of octal digits. */
1028 SCM p = scm_string_to_number (charname, scm_from_uint (8));
1029 if (SCM_I_INUMP (p))
1030 {
1031 scm_t_wchar c = scm_to_uint32 (p);
1032 if (SCM_IS_UNICODE_CHAR (c))
1033 return SCM_MAKE_CHAR (c);
1034 else
1035 scm_i_input_error (FUNC_NAME, port,
1036 "out-of-range octal character escape: ~a",
1037 scm_list_1 (charname));
1038 }
1039 }
1040
1041 if (cp == 'x' && (charname_len > 1))
1042 {
1043 SCM p;
1044
1045 /* Convert from hex, skipping the initial 'x' character in CHARNAME */
1046 p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
1047 scm_from_uint (16));
1048 if (SCM_I_INUMP (p))
1049 {
1050 scm_t_wchar c = scm_to_uint32 (p);
1051 if (SCM_IS_UNICODE_CHAR (c))
1052 return SCM_MAKE_CHAR (c);
1053 else
1054 scm_i_input_error (FUNC_NAME, port,
1055 "out-of-range hex character escape: ~a",
1056 scm_list_1 (charname));
1057 }
1058 }
1059
1060 /* The names of characters should never have non-Latin1
1061 characters. */
1062 if (scm_i_is_narrow_string (charname)
1063 || scm_i_try_narrow_string (charname))
1064 { SCM ch = scm_i_charname_to_char (scm_i_string_chars (charname),
1065 charname_len);
1066 if (scm_is_true (ch))
1067 return ch;
1068 }
1069
1070 scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
1071 scm_list_1 (charname));
1072
1073 return SCM_UNSPECIFIED;
1074 }
1075 #undef FUNC_NAME
1076
1077 static SCM
1078 scm_read_keyword (int chr, SCM port, scm_t_read_opts *opts)
1079 {
1080 SCM symbol;
1081
1082 /* Read the symbol that comprises the keyword. Doing this instead of
1083 invoking a specific symbol reader function allows `scm_read_keyword ()'
1084 to adapt to the delimiters currently valid of symbols.
1085
1086 XXX: This implementation allows sloppy syntaxes like `#: key'. */
1087 symbol = scm_read_expression (port, opts);
1088 if (!scm_is_symbol (symbol))
1089 scm_i_input_error ("scm_read_keyword", port,
1090 "keyword prefix `~a' not followed by a symbol: ~s",
1091 scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
1092
1093 return (scm_symbol_to_keyword (symbol));
1094 }
1095
1096 static SCM
1097 scm_read_vector (int chr, SCM port, scm_t_read_opts *opts,
1098 long line, int column)
1099 {
1100 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
1101 guarantee that it's going to do what we want. After all, this is an
1102 implementation detail of `scm_read_vector ()', not a desirable
1103 property. */
1104 return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port, opts)),
1105 port, opts, line, column);
1106 }
1107
1108 /* Helper used by scm_read_array */
1109 static int
1110 read_decimal_integer (SCM port, int c, ssize_t *resp)
1111 {
1112 ssize_t sign = 1;
1113 ssize_t res = 0;
1114 int got_it = 0;
1115
1116 if (c == '-')
1117 {
1118 sign = -1;
1119 c = scm_getc (port);
1120 }
1121
1122 while ('0' <= c && c <= '9')
1123 {
1124 res = 10*res + c-'0';
1125 got_it = 1;
1126 c = scm_getc (port);
1127 }
1128
1129 if (got_it)
1130 *resp = sign * res;
1131 return c;
1132 }
1133
1134 /* Read an array. This function can also read vectors and uniform
1135 vectors. Also, the conflict between '#f' and '#f32' and '#f64' is
1136 handled here.
1137
1138 C is the first character read after the '#'. */
1139 static SCM
1140 scm_read_array (int c, SCM port, scm_t_read_opts *opts, long line, int column)
1141 {
1142 ssize_t rank;
1143 scm_t_wchar tag_buf[8];
1144 int tag_len;
1145
1146 SCM tag, shape = SCM_BOOL_F, elements, array;
1147
1148 /* XXX - shortcut for ordinary vectors. Shouldn't be necessary but
1149 the array code can not deal with zero-length dimensions yet, and
1150 we want to allow zero-length vectors, of course. */
1151 if (c == '(')
1152 return scm_read_vector (c, port, opts, line, column);
1153
1154 /* Disambiguate between '#f' and uniform floating point vectors. */
1155 if (c == 'f')
1156 {
1157 c = scm_getc (port);
1158 if (c != '3' && c != '6')
1159 {
1160 if (c != EOF)
1161 scm_ungetc (c, port);
1162 return SCM_BOOL_F;
1163 }
1164 rank = 1;
1165 tag_buf[0] = 'f';
1166 tag_len = 1;
1167 goto continue_reading_tag;
1168 }
1169
1170 /* Read rank. */
1171 rank = 1;
1172 c = read_decimal_integer (port, c, &rank);
1173 if (rank < 0)
1174 scm_i_input_error (NULL, port, "array rank must be non-negative",
1175 SCM_EOL);
1176
1177 /* Read tag. */
1178 tag_len = 0;
1179 continue_reading_tag:
1180 while (c != EOF && c != '(' && c != '@' && c != ':'
1181 && tag_len < sizeof tag_buf / sizeof tag_buf[0])
1182 {
1183 tag_buf[tag_len++] = c;
1184 c = scm_getc (port);
1185 }
1186 if (tag_len == 0)
1187 tag = SCM_BOOL_T;
1188 else
1189 {
1190 tag = scm_string_to_symbol (scm_from_utf32_stringn (tag_buf, tag_len));
1191 if (tag_len == sizeof tag_buf / sizeof tag_buf[0])
1192 scm_i_input_error (NULL, port, "invalid array tag, starting with: ~a",
1193 scm_list_1 (tag));
1194 }
1195
1196 /* Read shape. */
1197 if (c == '@' || c == ':')
1198 {
1199 shape = SCM_EOL;
1200
1201 do
1202 {
1203 ssize_t lbnd = 0, len = 0;
1204 SCM s;
1205
1206 if (c == '@')
1207 {
1208 c = scm_getc (port);
1209 c = read_decimal_integer (port, c, &lbnd);
1210 }
1211
1212 s = scm_from_ssize_t (lbnd);
1213
1214 if (c == ':')
1215 {
1216 c = scm_getc (port);
1217 c = read_decimal_integer (port, c, &len);
1218 if (len < 0)
1219 scm_i_input_error (NULL, port,
1220 "array length must be non-negative",
1221 SCM_EOL);
1222
1223 s = scm_list_2 (s, scm_from_ssize_t (lbnd+len-1));
1224 }
1225
1226 shape = scm_cons (s, shape);
1227 } while (c == '@' || c == ':');
1228
1229 shape = scm_reverse_x (shape, SCM_EOL);
1230 }
1231
1232 /* Read nested lists of elements. */
1233 if (c != '(')
1234 scm_i_input_error (NULL, port,
1235 "missing '(' in vector or array literal",
1236 SCM_EOL);
1237 elements = scm_read_sexp (c, port, opts);
1238
1239 if (scm_is_false (shape))
1240 shape = scm_from_ssize_t (rank);
1241 else if (scm_ilength (shape) != rank)
1242 scm_i_input_error
1243 (NULL, port,
1244 "the number of shape specifications must match the array rank",
1245 SCM_EOL);
1246
1247 /* Handle special print syntax of rank zero arrays; see
1248 scm_i_print_array for a rationale. */
1249 if (rank == 0)
1250 {
1251 if (!scm_is_pair (elements))
1252 scm_i_input_error (NULL, port,
1253 "too few elements in array literal, need 1",
1254 SCM_EOL);
1255 if (!scm_is_null (SCM_CDR (elements)))
1256 scm_i_input_error (NULL, port,
1257 "too many elements in array literal, want 1",
1258 SCM_EOL);
1259 elements = SCM_CAR (elements);
1260 }
1261
1262 /* Construct array, annotate with source location, and return. */
1263 array = scm_list_to_typed_array (tag, shape, elements);
1264 return maybe_annotate_source (array, port, opts, line, column);
1265 }
1266
1267 static SCM
1268 scm_read_srfi4_vector (int chr, SCM port, scm_t_read_opts *opts,
1269 long line, int column)
1270 {
1271 return scm_read_array (chr, port, opts, line, column);
1272 }
1273
1274 static SCM
1275 scm_read_bytevector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1276 long line, int column)
1277 {
1278 chr = scm_getc (port);
1279 if (chr != 'u')
1280 goto syntax;
1281
1282 chr = scm_getc (port);
1283 if (chr != '8')
1284 goto syntax;
1285
1286 chr = scm_getc (port);
1287 if (chr != '(')
1288 goto syntax;
1289
1290 return maybe_annotate_source
1291 (scm_u8_list_to_bytevector (scm_read_sexp (chr, port, opts)),
1292 port, opts, line, column);
1293
1294 syntax:
1295 scm_i_input_error ("read_bytevector", port,
1296 "invalid bytevector prefix",
1297 SCM_MAKE_CHAR (chr));
1298 return SCM_UNSPECIFIED;
1299 }
1300
1301 static SCM
1302 scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1303 long line, int column)
1304 {
1305 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
1306 terribly inefficient but who cares? */
1307 SCM s_bits = SCM_EOL;
1308
1309 for (chr = scm_getc (port);
1310 (chr != EOF) && ((chr == '0') || (chr == '1'));
1311 chr = scm_getc (port))
1312 {
1313 s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
1314 }
1315
1316 if (chr != EOF)
1317 scm_ungetc (chr, port);
1318
1319 return maybe_annotate_source
1320 (scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)),
1321 port, opts, line, column);
1322 }
1323
1324 static SCM
1325 scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
1326 {
1327 int bang_seen = 0;
1328
1329 for (;;)
1330 {
1331 int c = scm_getc (port);
1332
1333 if (c == EOF)
1334 scm_i_input_error ("skip_block_comment", port,
1335 "unterminated `#! ... !#' comment", SCM_EOL);
1336
1337 if (c == '!')
1338 bang_seen = 1;
1339 else if (c == '#' && bang_seen)
1340 break;
1341 else
1342 bang_seen = 0;
1343 }
1344
1345 return SCM_UNSPECIFIED;
1346 }
1347
1348 static void set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts,
1349 int value);
1350 static void set_port_square_brackets_p (SCM port, scm_t_read_opts *opts,
1351 int value);
1352 static void set_port_curly_infix_p (SCM port, scm_t_read_opts *opts,
1353 int value);
1354
1355 static SCM
1356 scm_read_shebang (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
1357 {
1358 char name[READER_DIRECTIVE_NAME_MAX_SIZE + 1];
1359 int c;
1360 int i = 0;
1361
1362 while (i <= READER_DIRECTIVE_NAME_MAX_SIZE)
1363 {
1364 c = scm_getc (port);
1365 if (c == EOF)
1366 scm_i_input_error ("skip_block_comment", port,
1367 "unterminated `#! ... !#' comment", SCM_EOL);
1368 else if (('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || c == '-')
1369 name[i++] = c;
1370 else if (CHAR_IS_DELIMITER (c))
1371 {
1372 scm_ungetc (c, port);
1373 name[i] = '\0';
1374 if (0 == strcmp ("r6rs", name))
1375 ; /* Silently ignore */
1376 else if (0 == strcmp ("fold-case", name))
1377 set_port_case_insensitive_p (port, opts, 1);
1378 else if (0 == strcmp ("no-fold-case", name))
1379 set_port_case_insensitive_p (port, opts, 0);
1380 else if (0 == strcmp ("curly-infix", name))
1381 set_port_curly_infix_p (port, opts, 1);
1382 else if (0 == strcmp ("curly-infix-and-bracket-lists", name))
1383 {
1384 set_port_curly_infix_p (port, opts, 1);
1385 set_port_square_brackets_p (port, opts, 0);
1386 }
1387 else
1388 break;
1389
1390 return SCM_UNSPECIFIED;
1391 }
1392 else
1393 {
1394 scm_ungetc (c, port);
1395 break;
1396 }
1397 }
1398 while (i > 0)
1399 scm_ungetc (name[--i], port);
1400 return scm_read_scsh_block_comment (chr, port);
1401 }
1402
1403 static SCM
1404 scm_read_r6rs_block_comment (scm_t_wchar chr, SCM port)
1405 {
1406 /* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
1407 nested. So care must be taken. */
1408 int nesting_level = 1;
1409
1410 int a = scm_getc (port);
1411
1412 if (a == EOF)
1413 scm_i_input_error ("scm_read_r6rs_block_comment", port,
1414 "unterminated `#| ... |#' comment", SCM_EOL);
1415
1416 while (nesting_level > 0)
1417 {
1418 int b = scm_getc (port);
1419
1420 if (b == EOF)
1421 scm_i_input_error ("scm_read_r6rs_block_comment", port,
1422 "unterminated `#| ... |#' comment", SCM_EOL);
1423
1424 if (a == '|' && b == '#')
1425 {
1426 nesting_level--;
1427 b = EOF;
1428 }
1429 else if (a == '#' && b == '|')
1430 {
1431 nesting_level++;
1432 b = EOF;
1433 }
1434
1435 a = b;
1436 }
1437
1438 return SCM_UNSPECIFIED;
1439 }
1440
1441 static SCM
1442 scm_read_commented_expression (scm_t_wchar chr, SCM port,
1443 scm_t_read_opts *opts)
1444 {
1445 scm_t_wchar c;
1446
1447 c = flush_ws (port, opts, (char *) NULL);
1448 if (EOF == c)
1449 scm_i_input_error ("read_commented_expression", port,
1450 "no expression after #; comment", SCM_EOL);
1451 scm_ungetc (c, port);
1452 scm_read_expression (port, opts);
1453 return SCM_UNSPECIFIED;
1454 }
1455
1456 static SCM
1457 scm_read_extended_symbol (scm_t_wchar chr, SCM port)
1458 {
1459 /* Guile's extended symbol read syntax looks like this:
1460
1461 #{This is all a symbol name}#
1462
1463 So here, CHR is expected to be `{'. */
1464 int saw_brace = 0;
1465 size_t len = 0;
1466 SCM buf = scm_i_make_string (1024, NULL, 0);
1467
1468 buf = scm_i_string_start_writing (buf);
1469
1470 while ((chr = scm_getc (port)) != EOF)
1471 {
1472 if (saw_brace)
1473 {
1474 if (chr == '#')
1475 {
1476 break;
1477 }
1478 else
1479 {
1480 saw_brace = 0;
1481 scm_i_string_set_x (buf, len++, '}');
1482 }
1483 }
1484
1485 if (chr == '}')
1486 saw_brace = 1;
1487 else if (chr == '\\')
1488 {
1489 /* It used to be that print.c would print extended-read-syntax
1490 symbols with backslashes before "non-standard" chars, but
1491 this routine wouldn't do anything with those escapes.
1492 Bummer. What we've done is to change print.c to output
1493 R6RS hex escapes for those characters, relying on the fact
1494 that the extended read syntax would never put a `\' before
1495 an `x'. For now, we just ignore other instances of
1496 backslash in the string. */
1497 switch ((chr = scm_getc (port)))
1498 {
1499 case EOF:
1500 goto done;
1501 case 'x':
1502 {
1503 scm_t_wchar c;
1504
1505 SCM_READ_HEX_ESCAPE (10, ';');
1506 scm_i_string_set_x (buf, len++, c);
1507 break;
1508
1509 str_eof:
1510 chr = EOF;
1511 goto done;
1512
1513 bad_escaped:
1514 scm_i_string_stop_writing ();
1515 scm_i_input_error ("scm_read_extended_symbol", port,
1516 "illegal character in escape sequence: ~S",
1517 scm_list_1 (SCM_MAKE_CHAR (c)));
1518 break;
1519 }
1520 default:
1521 scm_i_string_set_x (buf, len++, chr);
1522 break;
1523 }
1524 }
1525 else
1526 scm_i_string_set_x (buf, len++, chr);
1527
1528 if (len >= scm_i_string_length (buf) - 2)
1529 {
1530 SCM addy;
1531
1532 scm_i_string_stop_writing ();
1533 addy = scm_i_make_string (1024, NULL, 0);
1534 buf = scm_string_append (scm_list_2 (buf, addy));
1535 len = 0;
1536 buf = scm_i_string_start_writing (buf);
1537 }
1538 }
1539
1540 done:
1541 scm_i_string_stop_writing ();
1542 if (chr == EOF)
1543 scm_i_input_error ("scm_read_extended_symbol", port,
1544 "end of file while reading symbol", SCM_EOL);
1545
1546 return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
1547 }
1548
1549
1550 \f
1551 /* Top-level token readers, i.e., dispatchers. */
1552
1553 static SCM
1554 scm_read_sharp_extension (int chr, SCM port, scm_t_read_opts *opts)
1555 {
1556 SCM proc;
1557
1558 proc = scm_get_hash_procedure (chr);
1559 if (scm_is_true (scm_procedure_p (proc)))
1560 {
1561 long line = SCM_LINUM (port);
1562 int column = SCM_COL (port) - 2;
1563 SCM got;
1564
1565 got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
1566
1567 if (opts->record_positions_p && SCM_NIMP (got)
1568 && !scm_i_has_source_properties (got))
1569 scm_i_set_source_properties_x (got, line, column, SCM_FILENAME (port));
1570
1571 return got;
1572 }
1573
1574 return SCM_UNSPECIFIED;
1575 }
1576
1577 /* The reader for the sharp `#' character. It basically dispatches reads
1578 among the above token readers. */
1579 static SCM
1580 scm_read_sharp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1581 long line, int column)
1582 #define FUNC_NAME "scm_lreadr"
1583 {
1584 SCM result;
1585
1586 chr = scm_getc (port);
1587
1588 result = scm_read_sharp_extension (chr, port, opts);
1589 if (!scm_is_eq (result, SCM_UNSPECIFIED))
1590 return result;
1591
1592 switch (chr)
1593 {
1594 case '\\':
1595 return (scm_read_character (chr, port, opts));
1596 case '(':
1597 return (scm_read_vector (chr, port, opts, line, column));
1598 case 's':
1599 case 'u':
1600 case 'f':
1601 case 'c':
1602 /* This one may return either a boolean or an SRFI-4 vector. */
1603 return (scm_read_srfi4_vector (chr, port, opts, line, column));
1604 case 'v':
1605 return (scm_read_bytevector (chr, port, opts, line, column));
1606 case '*':
1607 return (scm_read_guile_bit_vector (chr, port, opts, line, column));
1608 case 't':
1609 case 'T':
1610 case 'F':
1611 return (scm_read_boolean (chr, port));
1612 case ':':
1613 return (scm_read_keyword (chr, port, opts));
1614 case '0': case '1': case '2': case '3': case '4':
1615 case '5': case '6': case '7': case '8': case '9':
1616 case '@':
1617 #if SCM_ENABLE_DEPRECATED
1618 /* See below for 'i' and 'e'. */
1619 case 'a':
1620 case 'y':
1621 case 'h':
1622 case 'l':
1623 #endif
1624 return (scm_read_array (chr, port, opts, line, column));
1625
1626 case 'i':
1627 case 'e':
1628 #if SCM_ENABLE_DEPRECATED
1629 {
1630 /* When next char is '(', it really is an old-style
1631 uniform array. */
1632 scm_t_wchar next_c = scm_getc (port);
1633 if (next_c != EOF)
1634 scm_ungetc (next_c, port);
1635 if (next_c == '(')
1636 return scm_read_array (chr, port, opts, line, column);
1637 /* Fall through. */
1638 }
1639 #endif
1640 case 'b':
1641 case 'B':
1642 case 'o':
1643 case 'O':
1644 case 'd':
1645 case 'D':
1646 case 'x':
1647 case 'X':
1648 case 'I':
1649 case 'E':
1650 return (scm_read_number_and_radix (chr, port, opts));
1651 case '{':
1652 return (scm_read_extended_symbol (chr, port));
1653 case '!':
1654 return (scm_read_shebang (chr, port, opts));
1655 case ';':
1656 return (scm_read_commented_expression (chr, port, opts));
1657 case '`':
1658 case '\'':
1659 case ',':
1660 return (scm_read_syntax (chr, port, opts));
1661 case 'n':
1662 return (scm_read_nil (chr, port, opts));
1663 default:
1664 result = scm_read_sharp_extension (chr, port, opts);
1665 if (scm_is_eq (result, SCM_UNSPECIFIED))
1666 {
1667 /* To remain compatible with 1.8 and earlier, the following
1668 characters have lower precedence than `read-hash-extend'
1669 characters. */
1670 switch (chr)
1671 {
1672 case '|':
1673 return scm_read_r6rs_block_comment (chr, port);
1674 default:
1675 scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
1676 scm_list_1 (SCM_MAKE_CHAR (chr)));
1677 }
1678 }
1679 else
1680 return result;
1681 }
1682
1683 return SCM_UNSPECIFIED;
1684 }
1685 #undef FUNC_NAME
1686
1687 static SCM
1688 read_inner_expression (SCM port, scm_t_read_opts *opts)
1689 #define FUNC_NAME "read_inner_expression"
1690 {
1691 while (1)
1692 {
1693 scm_t_wchar chr;
1694
1695 chr = scm_getc (port);
1696
1697 switch (chr)
1698 {
1699 case SCM_WHITE_SPACES:
1700 case SCM_LINE_INCREMENTORS:
1701 break;
1702 case ';':
1703 (void) scm_read_semicolon_comment (chr, port);
1704 break;
1705 case '{':
1706 if (opts->curly_infix_p)
1707 {
1708 if (opts->neoteric_p)
1709 return scm_read_sexp (chr, port, opts);
1710 else
1711 {
1712 SCM expr;
1713
1714 /* Enable neoteric expressions within curly braces */
1715 opts->neoteric_p = 1;
1716 expr = scm_read_sexp (chr, port, opts);
1717 opts->neoteric_p = 0;
1718 return expr;
1719 }
1720 }
1721 else
1722 return scm_read_mixed_case_symbol (chr, port, opts);
1723 case '[':
1724 if (opts->square_brackets_p)
1725 return scm_read_sexp (chr, port, opts);
1726 else if (opts->curly_infix_p)
1727 {
1728 /* The syntax of neoteric expressions requires that '[' be
1729 a delimiter when curly-infix is enabled, so it cannot
1730 be part of an unescaped symbol. We might as well do
1731 something useful with it, so we adopt Kawa's convention:
1732 [...] => ($bracket-list$ ...) */
1733 long line = SCM_LINUM (port);
1734 int column = SCM_COL (port) - 1;
1735 return maybe_annotate_source
1736 (scm_cons (sym_bracket_list, scm_read_sexp (chr, port, opts)),
1737 port, opts, line, column);
1738 }
1739 else
1740 return scm_read_mixed_case_symbol (chr, port, opts);
1741 case '(':
1742 return (scm_read_sexp (chr, port, opts));
1743 case '"':
1744 return (scm_read_string (chr, port, opts));
1745 case '\'':
1746 case '`':
1747 case ',':
1748 return (scm_read_quote (chr, port, opts));
1749 case '#':
1750 {
1751 long line = SCM_LINUM (port);
1752 int column = SCM_COL (port) - 1;
1753 SCM result = scm_read_sharp (chr, port, opts, line, column);
1754 if (scm_is_eq (result, SCM_UNSPECIFIED))
1755 /* We read a comment or some such. */
1756 break;
1757 else
1758 return result;
1759 }
1760 case ')':
1761 scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
1762 break;
1763 case '}':
1764 if (opts->curly_infix_p)
1765 scm_i_input_error (FUNC_NAME, port, "unexpected \"}\"", SCM_EOL);
1766 else
1767 return scm_read_mixed_case_symbol (chr, port, opts);
1768 case ']':
1769 if (opts->square_brackets_p)
1770 scm_i_input_error (FUNC_NAME, port, "unexpected \"]\"", SCM_EOL);
1771 /* otherwise fall through */
1772 case EOF:
1773 return SCM_EOF_VAL;
1774 case ':':
1775 if (opts->keyword_style == KEYWORD_STYLE_PREFIX)
1776 return scm_symbol_to_keyword (scm_read_expression (port, opts));
1777 /* Fall through. */
1778
1779 default:
1780 {
1781 if (((chr >= '0') && (chr <= '9'))
1782 || (strchr ("+-.", chr)))
1783 return (scm_read_number (chr, port, opts));
1784 else
1785 return (scm_read_mixed_case_symbol (chr, port, opts));
1786 }
1787 }
1788 }
1789 }
1790 #undef FUNC_NAME
1791
1792 static SCM
1793 scm_read_expression (SCM port, scm_t_read_opts *opts)
1794 #define FUNC_NAME "scm_read_expression"
1795 {
1796 if (!opts->neoteric_p)
1797 return read_inner_expression (port, opts);
1798 else
1799 {
1800 long line = 0;
1801 int column = 0;
1802 SCM expr;
1803
1804 if (opts->record_positions_p)
1805 {
1806 /* We need to get the position of the first non-whitespace
1807 character in order to correctly annotate neoteric
1808 expressions. For example, for the expression 'f(x)', the
1809 first call to 'read_inner_expression' reads the 'f' (which
1810 cannot be annotated), and then we later read the '(x)' and
1811 use it to construct the new list (f x). */
1812 int c = flush_ws (port, opts, (char *) NULL);
1813 if (c == EOF)
1814 return SCM_EOF_VAL;
1815 scm_ungetc (c, port);
1816 line = SCM_LINUM (port);
1817 column = SCM_COL (port);
1818 }
1819
1820 expr = read_inner_expression (port, opts);
1821
1822 /* 'expr' is the first component of the neoteric expression. Now
1823 we loop, and as long as the next character is '(', '[', or '{',
1824 (without any intervening whitespace), we use it to construct a
1825 new expression. For example, f{n - 1}(x) => ((f (- n 1)) x). */
1826 for (;;)
1827 {
1828 int chr = scm_getc (port);
1829
1830 if (chr == '(')
1831 /* e(...) => (e ...) */
1832 expr = scm_cons (expr, scm_read_sexp (chr, port, opts));
1833 else if (chr == '[')
1834 /* e[...] => ($bracket-apply$ e ...) */
1835 expr = scm_cons (sym_bracket_apply,
1836 scm_cons (expr,
1837 scm_read_sexp (chr, port, opts)));
1838 else if (chr == '{')
1839 {
1840 SCM arg = scm_read_sexp (chr, port, opts);
1841
1842 if (scm_is_null (arg))
1843 expr = scm_list_1 (expr); /* e{} => (e) */
1844 else
1845 expr = scm_list_2 (expr, arg); /* e{...} => (e {...}) */
1846 }
1847 else
1848 {
1849 if (chr != EOF)
1850 scm_ungetc (chr, port);
1851 break;
1852 }
1853 maybe_annotate_source (expr, port, opts, line, column);
1854 }
1855 return expr;
1856 }
1857 }
1858 #undef FUNC_NAME
1859
1860 \f
1861 /* Actual reader. */
1862
1863 static void init_read_options (SCM port, scm_t_read_opts *opts);
1864
1865 SCM_DEFINE (scm_read, "read", 0, 1, 0,
1866 (SCM port),
1867 "Read an s-expression from the input port @var{port}, or from\n"
1868 "the current input port if @var{port} is not specified.\n"
1869 "Any whitespace before the next token is discarded.")
1870 #define FUNC_NAME s_scm_read
1871 {
1872 scm_t_read_opts opts;
1873 int c;
1874
1875 if (SCM_UNBNDP (port))
1876 port = scm_current_input_port ();
1877 SCM_VALIDATE_OPINPORT (1, port);
1878
1879 init_read_options (port, &opts);
1880
1881 c = flush_ws (port, &opts, (char *) NULL);
1882 if (EOF == c)
1883 return SCM_EOF_VAL;
1884 scm_ungetc (c, port);
1885
1886 return (scm_read_expression (port, &opts));
1887 }
1888 #undef FUNC_NAME
1889
1890
1891 \f
1892
1893 /* Manipulate the read-hash-procedures alist. This could be written in
1894 Scheme, but maybe it will also be used by C code during initialisation. */
1895 SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
1896 (SCM chr, SCM proc),
1897 "Install the procedure @var{proc} for reading expressions\n"
1898 "starting with the character sequence @code{#} and @var{chr}.\n"
1899 "@var{proc} will be called with two arguments: the character\n"
1900 "@var{chr} and the port to read further data from. The object\n"
1901 "returned will be the return value of @code{read}. \n"
1902 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1903 )
1904 #define FUNC_NAME s_scm_read_hash_extend
1905 {
1906 SCM this;
1907 SCM prev;
1908
1909 SCM_VALIDATE_CHAR (1, chr);
1910 SCM_ASSERT (scm_is_false (proc)
1911 || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
1912 proc, SCM_ARG2, FUNC_NAME);
1913
1914 /* Check if chr is already in the alist. */
1915 this = scm_i_read_hash_procedures_ref ();
1916 prev = SCM_BOOL_F;
1917 while (1)
1918 {
1919 if (scm_is_null (this))
1920 {
1921 /* not found, so add it to the beginning. */
1922 if (scm_is_true (proc))
1923 {
1924 SCM new = scm_cons (scm_cons (chr, proc),
1925 scm_i_read_hash_procedures_ref ());
1926 scm_i_read_hash_procedures_set_x (new);
1927 }
1928 break;
1929 }
1930 if (scm_is_eq (chr, SCM_CAAR (this)))
1931 {
1932 /* already in the alist. */
1933 if (scm_is_false (proc))
1934 {
1935 /* remove it. */
1936 if (scm_is_false (prev))
1937 {
1938 SCM rest = SCM_CDR (scm_i_read_hash_procedures_ref ());
1939 scm_i_read_hash_procedures_set_x (rest);
1940 }
1941 else
1942 scm_set_cdr_x (prev, SCM_CDR (this));
1943 }
1944 else
1945 {
1946 /* replace it. */
1947 scm_set_cdr_x (SCM_CAR (this), proc);
1948 }
1949 break;
1950 }
1951 prev = this;
1952 this = SCM_CDR (this);
1953 }
1954
1955 return SCM_UNSPECIFIED;
1956 }
1957 #undef FUNC_NAME
1958
1959 /* Recover the read-hash procedure corresponding to char c. */
1960 static SCM
1961 scm_get_hash_procedure (int c)
1962 {
1963 SCM rest = scm_i_read_hash_procedures_ref ();
1964
1965 while (1)
1966 {
1967 if (scm_is_null (rest))
1968 return SCM_BOOL_F;
1969
1970 if (SCM_CHAR (SCM_CAAR (rest)) == c)
1971 return SCM_CDAR (rest);
1972
1973 rest = SCM_CDR (rest);
1974 }
1975 }
1976
1977 #define SCM_ENCODING_SEARCH_SIZE (500)
1978
1979 /* Search the first few hundred characters of a file for an Emacs-like coding
1980 declaration. Returns either NULL or a string whose storage has been
1981 allocated with `scm_gc_malloc ()'. */
1982 char *
1983 scm_i_scan_for_encoding (SCM port)
1984 {
1985 scm_t_port *pt;
1986 char header[SCM_ENCODING_SEARCH_SIZE+1];
1987 size_t bytes_read, encoding_length, i;
1988 char *encoding = NULL;
1989 int utf8_bom = 0;
1990 char *pos, *encoding_start;
1991 int in_comment;
1992
1993 pt = SCM_PTAB_ENTRY (port);
1994
1995 if (pt->rw_active == SCM_PORT_WRITE)
1996 scm_flush (port);
1997
1998 if (pt->rw_random)
1999 pt->rw_active = SCM_PORT_READ;
2000
2001 if (pt->read_pos == pt->read_end)
2002 {
2003 /* We can use the read buffer, and thus avoid a seek. */
2004 if (scm_fill_input (port) == EOF)
2005 return NULL;
2006
2007 bytes_read = pt->read_end - pt->read_pos;
2008 if (bytes_read > SCM_ENCODING_SEARCH_SIZE)
2009 bytes_read = SCM_ENCODING_SEARCH_SIZE;
2010
2011 if (bytes_read <= 1)
2012 /* An unbuffered port -- don't scan. */
2013 return NULL;
2014
2015 memcpy (header, pt->read_pos, bytes_read);
2016 header[bytes_read] = '\0';
2017 }
2018 else
2019 {
2020 /* Try to read some bytes and then seek back. Not all ports
2021 support seeking back; and indeed some file ports (like
2022 /dev/urandom) will succeed on an lseek (fd, 0, SEEK_CUR)---the
2023 check performed by SCM_FPORT_FDES---but fail to seek
2024 backwards. Hence this block comes second. We prefer to use
2025 the read buffer in-place. */
2026 if (SCM_FPORTP (port) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port)))
2027 return NULL;
2028
2029 bytes_read = scm_c_read (port, header, SCM_ENCODING_SEARCH_SIZE);
2030 header[bytes_read] = '\0';
2031 scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
2032 }
2033
2034 if (bytes_read > 3
2035 && header[0] == '\xef' && header[1] == '\xbb' && header[2] == '\xbf')
2036 utf8_bom = 1;
2037
2038 /* search past "coding[:=]" */
2039 pos = header;
2040 while (1)
2041 {
2042 if ((pos = strstr(pos, "coding")) == NULL)
2043 return NULL;
2044
2045 pos += strlen("coding");
2046 if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
2047 (*pos == ':' || *pos == '='))
2048 {
2049 pos ++;
2050 break;
2051 }
2052 }
2053
2054 /* skip spaces */
2055 while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
2056 (*pos == ' ' || *pos == '\t'))
2057 pos ++;
2058
2059 /* grab the next token */
2060 encoding_start = pos;
2061 i = 0;
2062 while (encoding_start + i - header <= SCM_ENCODING_SEARCH_SIZE
2063 && encoding_start + i - header < bytes_read
2064 && (isalnum ((int) encoding_start[i])
2065 || strchr ("_-.:/,+=()", encoding_start[i]) != NULL))
2066 i++;
2067
2068 encoding_length = i;
2069 if (encoding_length == 0)
2070 return NULL;
2071
2072 encoding = scm_gc_strndup (encoding_start, encoding_length, "encoding");
2073 for (i = 0; i < encoding_length; i++)
2074 encoding[i] = toupper ((int) encoding[i]);
2075
2076 /* push backwards to make sure we were in a comment */
2077 in_comment = 0;
2078 pos = encoding_start;
2079 while (pos >= header)
2080 {
2081 if (*pos == ';')
2082 {
2083 in_comment = 1;
2084 break;
2085 }
2086 else if (*pos == '\n' || pos == header)
2087 {
2088 /* This wasn't in a semicolon comment. Check for a
2089 hash-bang comment. */
2090 char *beg = strstr (header, "#!");
2091 char *end = strstr (header, "!#");
2092 if (beg < encoding_start && encoding_start + encoding_length <= end)
2093 in_comment = 1;
2094 break;
2095 }
2096 else
2097 {
2098 pos --;
2099 continue;
2100 }
2101 }
2102 if (!in_comment)
2103 /* This wasn't in a comment */
2104 return NULL;
2105
2106 if (utf8_bom && c_strcasecmp(encoding, "UTF-8"))
2107 scm_misc_error (NULL,
2108 "the port input declares the encoding ~s but is encoded as UTF-8",
2109 scm_list_1 (scm_from_locale_string (encoding)));
2110
2111 return encoding;
2112 }
2113
2114 SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
2115 (SCM port),
2116 "Scans the port for an Emacs-like character coding declaration\n"
2117 "near the top of the contents of a port with random-accessible contents.\n"
2118 "The coding declaration is of the form\n"
2119 "@code{coding: XXXXX} and must appear in a scheme comment.\n"
2120 "\n"
2121 "Returns a string containing the character encoding of the file\n"
2122 "if a declaration was found, or @code{#f} otherwise.\n")
2123 #define FUNC_NAME s_scm_file_encoding
2124 {
2125 char *enc;
2126 SCM s_enc;
2127
2128 SCM_VALIDATE_OPINPORT (SCM_ARG1, port);
2129
2130 enc = scm_i_scan_for_encoding (port);
2131 if (enc == NULL)
2132 return SCM_BOOL_F;
2133 else
2134 {
2135 s_enc = scm_from_locale_string (enc);
2136 return s_enc;
2137 }
2138
2139 return SCM_BOOL_F;
2140 }
2141 #undef FUNC_NAME
2142
2143 \f
2144 /* Per-port read options.
2145
2146 We store per-port read options in the 'port-read-options' key of the
2147 port's alist, which is stored in the internal port structure. The
2148 value stored in the alist is a single integer that contains a two-bit
2149 field for each read option.
2150
2151 If a bit field contains READ_OPTION_INHERIT (3), that indicates that
2152 the applicable value should be inherited from the corresponding
2153 global read option. Otherwise, the bit field contains the value of
2154 the read option. For boolean read options that have been set
2155 per-port, the possible values are 0 or 1. If the 'keyword_style'
2156 read option has been set per-port, its possible values are those in
2157 'enum t_keyword_style'. */
2158
2159 /* Key to read options in per-port alists. */
2160 SCM_SYMBOL (sym_port_read_options, "port-read-options");
2161
2162 /* Offsets of bit fields for each per-port override */
2163 #define READ_OPTION_COPY_SOURCE_P 0
2164 #define READ_OPTION_RECORD_POSITIONS_P 2
2165 #define READ_OPTION_CASE_INSENSITIVE_P 4
2166 #define READ_OPTION_KEYWORD_STYLE 6
2167 #define READ_OPTION_R6RS_ESCAPES_P 8
2168 #define READ_OPTION_SQUARE_BRACKETS_P 10
2169 #define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12
2170 #define READ_OPTION_CURLY_INFIX_P 14
2171
2172 /* The total width in bits of the per-port overrides */
2173 #define READ_OPTIONS_NUM_BITS 16
2174
2175 #define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1)
2176 #define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL
2177
2178 #define READ_OPTION_MASK 3
2179 #define READ_OPTION_INHERIT 3
2180
2181 static void
2182 set_port_read_option (SCM port, int option, int new_value)
2183 {
2184 SCM alist, scm_read_options;
2185 unsigned int read_options;
2186
2187 new_value &= READ_OPTION_MASK;
2188 alist = scm_i_port_alist (port);
2189 scm_read_options = scm_assq_ref (alist, sym_port_read_options);
2190 if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
2191 read_options = scm_to_uint (scm_read_options);
2192 else
2193 read_options = READ_OPTIONS_INHERIT_ALL;
2194 read_options &= ~(READ_OPTION_MASK << option);
2195 read_options |= new_value << option;
2196 scm_read_options = scm_from_uint (read_options);
2197 alist = scm_assq_set_x (alist, sym_port_read_options, scm_read_options);
2198 scm_i_set_port_alist_x (port, alist);
2199 }
2200
2201 /* Set OPTS and PORT's case-insensitivity according to VALUE. */
2202 static void
2203 set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts, int value)
2204 {
2205 value = !!value;
2206 opts->case_insensitive_p = value;
2207 set_port_read_option (port, READ_OPTION_CASE_INSENSITIVE_P, value);
2208 }
2209
2210 /* Set OPTS and PORT's square_brackets_p option according to VALUE. */
2211 static void
2212 set_port_square_brackets_p (SCM port, scm_t_read_opts *opts, int value)
2213 {
2214 value = !!value;
2215 opts->square_brackets_p = value;
2216 set_port_read_option (port, READ_OPTION_SQUARE_BRACKETS_P, value);
2217 }
2218
2219 /* Set OPTS and PORT's curly_infix_p option according to VALUE. */
2220 static void
2221 set_port_curly_infix_p (SCM port, scm_t_read_opts *opts, int value)
2222 {
2223 value = !!value;
2224 opts->curly_infix_p = value;
2225 set_port_read_option (port, READ_OPTION_CURLY_INFIX_P, value);
2226 }
2227
2228 /* Initialize OPTS based on PORT's read options and the global read
2229 options. */
2230 static void
2231 init_read_options (SCM port, scm_t_read_opts *opts)
2232 {
2233 SCM alist, val, scm_read_options;
2234 unsigned int read_options, x;
2235
2236 alist = scm_i_port_alist (port);
2237 scm_read_options = scm_assq_ref (alist, sym_port_read_options);
2238
2239 if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
2240 read_options = scm_to_uint (scm_read_options);
2241 else
2242 read_options = READ_OPTIONS_INHERIT_ALL;
2243
2244 x = READ_OPTION_MASK & (read_options >> READ_OPTION_KEYWORD_STYLE);
2245 if (x == READ_OPTION_INHERIT)
2246 {
2247 val = SCM_PACK (SCM_KEYWORD_STYLE);
2248 if (scm_is_eq (val, scm_keyword_prefix))
2249 x = KEYWORD_STYLE_PREFIX;
2250 else if (scm_is_eq (val, scm_keyword_postfix))
2251 x = KEYWORD_STYLE_POSTFIX;
2252 else
2253 x = KEYWORD_STYLE_HASH_PREFIX;
2254 }
2255 opts->keyword_style = x;
2256
2257 #define RESOLVE_BOOLEAN_OPTION(NAME, name) \
2258 do \
2259 { \
2260 x = READ_OPTION_MASK & (read_options >> READ_OPTION_ ## NAME); \
2261 if (x == READ_OPTION_INHERIT) \
2262 x = !!SCM_ ## NAME; \
2263 opts->name = x; \
2264 } \
2265 while (0)
2266
2267 RESOLVE_BOOLEAN_OPTION (COPY_SOURCE_P, copy_source_p);
2268 RESOLVE_BOOLEAN_OPTION (RECORD_POSITIONS_P, record_positions_p);
2269 RESOLVE_BOOLEAN_OPTION (CASE_INSENSITIVE_P, case_insensitive_p);
2270 RESOLVE_BOOLEAN_OPTION (R6RS_ESCAPES_P, r6rs_escapes_p);
2271 RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P, square_brackets_p);
2272 RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p);
2273 RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p);
2274
2275 #undef RESOLVE_BOOLEAN_OPTION
2276
2277 opts->neoteric_p = 0;
2278 }
2279
2280 void
2281 scm_init_read ()
2282 {
2283 SCM read_hash_procs;
2284
2285 read_hash_procs = scm_make_fluid_with_default (SCM_EOL);
2286
2287 scm_i_read_hash_procedures =
2288 SCM_VARIABLE_LOC (scm_c_define ("%read-hash-procedures", read_hash_procs));
2289
2290 scm_init_opts (scm_read_options, scm_read_opts);
2291 #include "libguile/read.x"
2292 }
2293
2294 /*
2295 Local Variables:
2296 c-file-style: "gnu"
2297 End:
2298 */