read: Accept "\|" in string literals.
[bpt/guile.git] / libguile / read.c
1 /* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
2 * 2007, 2008, 2009, 2010, 2011, 2012, 2014 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20
21 \f
22
23 #ifdef HAVE_CONFIG_H
24 # include <config.h>
25 #endif
26
27 #include <stdio.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <unicase.h>
32 #include <unictype.h>
33 #include <c-strcase.h>
34
35 #include "libguile/_scm.h"
36 #include "libguile/bytevectors.h"
37 #include "libguile/chars.h"
38 #include "libguile/eval.h"
39 #include "libguile/arrays.h"
40 #include "libguile/bitvectors.h"
41 #include "libguile/keywords.h"
42 #include "libguile/alist.h"
43 #include "libguile/srcprop.h"
44 #include "libguile/hashtab.h"
45 #include "libguile/hash.h"
46 #include "libguile/ports.h"
47 #include "libguile/fports.h"
48 #include "libguile/root.h"
49 #include "libguile/strings.h"
50 #include "libguile/strports.h"
51 #include "libguile/vectors.h"
52 #include "libguile/validate.h"
53 #include "libguile/srfi-4.h"
54 #include "libguile/srfi-13.h"
55
56 #include "libguile/read.h"
57 #include "libguile/private-options.h"
58
59
60 \f
61
62 SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
63 SCM_SYMBOL (scm_keyword_prefix, "prefix");
64 SCM_SYMBOL (scm_keyword_postfix, "postfix");
65 SCM_SYMBOL (sym_nil, "nil");
66
67 /* SRFI-105 curly infix expression support */
68 SCM_SYMBOL (sym_nfx, "$nfx$");
69 SCM_SYMBOL (sym_bracket_list, "$bracket-list$");
70 SCM_SYMBOL (sym_bracket_apply, "$bracket-apply$");
71
72 scm_t_option scm_read_opts[] =
73 {
74 { SCM_OPTION_BOOLEAN, "copy", 0,
75 "Copy source code expressions." },
76 { SCM_OPTION_BOOLEAN, "positions", 1,
77 "Record positions of source code expressions." },
78 { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
79 "Convert symbols to lower case."},
80 { SCM_OPTION_SCM, "keywords", (scm_t_bits) SCM_BOOL_F_BITS,
81 "Style of keyword recognition: #f, 'prefix or 'postfix."},
82 { SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
83 "Use R6RS variable-length character and string hex escapes."},
84 { SCM_OPTION_BOOLEAN, "square-brackets", 1,
85 "Treat `[' and `]' as parentheses, for R6RS compatibility."},
86 { SCM_OPTION_BOOLEAN, "hungry-eol-escapes", 0,
87 "In strings, consume leading whitespace after an escaped end-of-line."},
88 { SCM_OPTION_BOOLEAN, "curly-infix", 0,
89 "Support SRFI-105 curly infix expressions."},
90 { 0, },
91 };
92
93 /* Internal read options structure. This is initialized by 'scm_read'
94 from the global and per-port read options, and a pointer is passed
95 down to all helper functions. */
96
97 enum t_keyword_style
98 {
99 KEYWORD_STYLE_HASH_PREFIX,
100 KEYWORD_STYLE_PREFIX,
101 KEYWORD_STYLE_POSTFIX
102 };
103
104 struct t_read_opts
105 {
106 enum t_keyword_style keyword_style;
107 unsigned int copy_source_p : 1;
108 unsigned int record_positions_p : 1;
109 unsigned int case_insensitive_p : 1;
110 unsigned int r6rs_escapes_p : 1;
111 unsigned int square_brackets_p : 1;
112 unsigned int hungry_eol_escapes_p : 1;
113 unsigned int curly_infix_p : 1;
114 unsigned int neoteric_p : 1;
115 };
116
117 typedef struct t_read_opts scm_t_read_opts;
118
119
120 /*
121 Give meaningful error messages for errors
122
123 We use the format
124
125 FILE:LINE:COL: MESSAGE
126 This happened in ....
127
128 This is not standard GNU format, but the test-suite likes the real
129 message to be in front.
130
131 */
132
133
134 void
135 scm_i_input_error (char const *function,
136 SCM port, const char *message, SCM arg)
137 {
138 SCM fn = (scm_is_string (SCM_FILENAME(port))
139 ? SCM_FILENAME(port)
140 : scm_from_locale_string ("#<unknown port>"));
141
142 SCM string_port = scm_open_output_string ();
143 SCM string = SCM_EOL;
144 scm_simple_format (string_port,
145 scm_from_locale_string ("~A:~S:~S: ~A"),
146 scm_list_4 (fn,
147 scm_from_long (SCM_LINUM (port) + 1),
148 scm_from_int (SCM_COL (port) + 1),
149 scm_from_locale_string (message)));
150
151 string = scm_get_output_string (string_port);
152 scm_close_output_port (string_port);
153 scm_error_scm (scm_from_latin1_symbol ("read-error"),
154 function? scm_from_locale_string (function) : SCM_BOOL_F,
155 string,
156 arg,
157 SCM_BOOL_F);
158 }
159
160
161 SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
162 (SCM setting),
163 "Option interface for the read options. Instead of using\n"
164 "this procedure directly, use the procedures @code{read-enable},\n"
165 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
166 #define FUNC_NAME s_scm_read_options
167 {
168 SCM ans = scm_options (setting,
169 scm_read_opts,
170 FUNC_NAME);
171 if (SCM_COPY_SOURCE_P)
172 SCM_RECORD_POSITIONS_P = 1;
173 return ans;
174 }
175 #undef FUNC_NAME
176
177 /* A fluid referring to an association list mapping extra hash
178 characters to procedures. */
179 static SCM *scm_i_read_hash_procedures;
180
181 static SCM
182 scm_i_read_hash_procedures_ref (void)
183 {
184 return scm_fluid_ref (*scm_i_read_hash_procedures);
185 }
186
187 static void
188 scm_i_read_hash_procedures_set_x (SCM value)
189 {
190 scm_fluid_set_x (*scm_i_read_hash_procedures, value);
191 }
192
193 \f
194 /* Token readers. */
195
196
197 /* Size of the C buffer used to read symbols and numbers. */
198 #define READER_BUFFER_SIZE 128
199
200 /* Number of 32-bit codepoints in the buffer used to read strings. */
201 #define READER_STRING_BUFFER_SIZE 128
202
203 /* The maximum size of Scheme character names. */
204 #define READER_CHAR_NAME_MAX_SIZE 50
205
206 /* The maximum size of reader directive names. */
207 #define READER_DIRECTIVE_NAME_MAX_SIZE 50
208
209
210 /* `isblank' is only in C99. */
211 #define CHAR_IS_BLANK_(_chr) \
212 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
213 || ((_chr) == '\f') || ((_chr) == '\r'))
214
215 #ifdef MSDOS
216 # define CHAR_IS_BLANK(_chr) \
217 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
218 #else
219 # define CHAR_IS_BLANK CHAR_IS_BLANK_
220 #endif
221
222
223 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
224 structure''). */
225 #define CHAR_IS_R5RS_DELIMITER(c) \
226 (CHAR_IS_BLANK (c) \
227 || (c) == ')' || (c) == '(' || (c) == ';' || (c) == '"')
228
229 #define CHAR_IS_DELIMITER(c) \
230 (CHAR_IS_R5RS_DELIMITER (c) \
231 || (((c) == ']' || (c) == '[') && (opts->square_brackets_p \
232 || opts->curly_infix_p)) \
233 || (((c) == '}' || (c) == '{') && opts->curly_infix_p))
234
235 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
236 Structure''. */
237 #define CHAR_IS_EXPONENT_MARKER(_chr) \
238 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
239 || ((_chr) == 'd') || ((_chr) == 'l'))
240
241 /* Read an SCSH block comment. */
242 static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
243 static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM);
244 static SCM scm_read_commented_expression (scm_t_wchar, SCM, scm_t_read_opts *);
245 static SCM scm_read_shebang (scm_t_wchar, SCM, scm_t_read_opts *);
246 static SCM scm_get_hash_procedure (int);
247
248 /* Read from PORT until a delimiter (e.g., a whitespace) is read. Put the
249 result in the pre-allocated buffer BUF. Return zero if the whole token has
250 fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
251 bytes actually read. */
252 static int
253 read_token (SCM port, scm_t_read_opts *opts,
254 char *buf, size_t buf_size, size_t *read)
255 {
256 *read = 0;
257
258 while (*read < buf_size)
259 {
260 int chr;
261
262 chr = scm_get_byte_or_eof (port);
263
264 if (chr == EOF)
265 return 0;
266 else if (CHAR_IS_DELIMITER (chr))
267 {
268 scm_unget_byte (chr, port);
269 return 0;
270 }
271 else
272 {
273 *buf = (char) chr;
274 buf++, (*read)++;
275 }
276 }
277
278 return 1;
279 }
280
281 /* Like `read_token', but return either BUFFER, or a GC-allocated buffer
282 if the token doesn't fit in BUFFER_SIZE bytes. */
283 static char *
284 read_complete_token (SCM port, scm_t_read_opts *opts,
285 char *buffer, size_t buffer_size, size_t *read)
286 {
287 int overflow = 0;
288 size_t bytes_read, overflow_size = 0;
289 char *overflow_buffer = NULL;
290
291 do
292 {
293 overflow = read_token (port, opts, buffer, buffer_size, &bytes_read);
294 if (bytes_read == 0)
295 break;
296 if (overflow || overflow_size != 0)
297 {
298 if (overflow_size == 0)
299 {
300 overflow_buffer = scm_gc_malloc_pointerless (bytes_read, "read");
301 memcpy (overflow_buffer, buffer, bytes_read);
302 overflow_size = bytes_read;
303 }
304 else
305 {
306 char *new_buf =
307 scm_gc_malloc_pointerless (overflow_size + bytes_read, "read");
308
309 memcpy (new_buf, overflow_buffer, overflow_size);
310 memcpy (new_buf + overflow_size, buffer, bytes_read);
311
312 overflow_buffer = new_buf;
313 overflow_size += bytes_read;
314 }
315 }
316 }
317 while (overflow);
318
319 if (overflow_size)
320 *read = overflow_size;
321 else
322 *read = bytes_read;
323
324 return (overflow_size > 0 ? overflow_buffer : buffer);
325 }
326
327 /* Skip whitespace from PORT and return the first non-whitespace character
328 read. Raise an error on end-of-file. */
329 static int
330 flush_ws (SCM port, scm_t_read_opts *opts, const char *eoferr)
331 {
332 scm_t_wchar c;
333 while (1)
334 switch (c = scm_getc (port))
335 {
336 case EOF:
337 goteof:
338 if (eoferr)
339 {
340 scm_i_input_error (eoferr,
341 port,
342 "end of file",
343 SCM_EOL);
344 }
345 return c;
346
347 case ';':
348 lp:
349 switch (c = scm_getc (port))
350 {
351 case EOF:
352 goto goteof;
353 default:
354 goto lp;
355 case SCM_LINE_INCREMENTORS:
356 break;
357 }
358 break;
359
360 case '#':
361 switch (c = scm_getc (port))
362 {
363 case EOF:
364 eoferr = "read_sharp";
365 goto goteof;
366 case '!':
367 scm_read_shebang (c, port, opts);
368 break;
369 case ';':
370 scm_read_commented_expression (c, port, opts);
371 break;
372 case '|':
373 if (scm_is_false (scm_get_hash_procedure (c)))
374 {
375 scm_read_r6rs_block_comment (c, port);
376 break;
377 }
378 /* fall through */
379 default:
380 scm_ungetc (c, port);
381 return '#';
382 }
383 break;
384
385 case SCM_LINE_INCREMENTORS:
386 case SCM_SINGLE_SPACES:
387 case '\t':
388 break;
389
390 default:
391 return c;
392 }
393
394 return 0;
395 }
396
397
398 \f
399 /* Token readers. */
400
401 static SCM scm_read_expression (SCM port, scm_t_read_opts *opts);
402 static SCM scm_read_sharp (int chr, SCM port, scm_t_read_opts *opts,
403 long line, int column);
404
405
406 static SCM
407 maybe_annotate_source (SCM x, SCM port, scm_t_read_opts *opts,
408 long line, int column)
409 {
410 if (opts->record_positions_p)
411 scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port));
412 return x;
413 }
414
415 static SCM
416 scm_read_sexp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
417 #define FUNC_NAME "scm_i_lreadparen"
418 {
419 int c;
420 SCM tmp, tl, ans = SCM_EOL;
421 const int curly_list_p = (chr == '{') && opts->curly_infix_p;
422 const int terminating_char = ((chr == '{') ? '}'
423 : ((chr == '[') ? ']'
424 : ')'));
425
426 /* Need to capture line and column numbers here. */
427 long line = SCM_LINUM (port);
428 int column = SCM_COL (port) - 1;
429
430 c = flush_ws (port, opts, FUNC_NAME);
431 if (terminating_char == c)
432 return SCM_EOL;
433
434 scm_ungetc (c, port);
435 tmp = scm_read_expression (port, opts);
436
437 /* Note that it is possible for scm_read_expression to return
438 scm_sym_dot, but not as part of a dotted pair: as in #{.}#. So
439 check that it's a real dot by checking `c'. */
440 if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
441 {
442 ans = scm_read_expression (port, opts);
443 if (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
444 scm_i_input_error (FUNC_NAME, port, "missing close paren",
445 SCM_EOL);
446 return ans;
447 }
448
449 /* Build the head of the list structure. */
450 ans = tl = scm_cons (tmp, SCM_EOL);
451
452 while (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
453 {
454 SCM new_tail;
455
456 if (c == ')' || (c == ']' && opts->square_brackets_p)
457 || ((c == '}' || c == ']') && opts->curly_infix_p))
458 scm_i_input_error (FUNC_NAME, port,
459 "in pair: mismatched close paren: ~A",
460 scm_list_1 (SCM_MAKE_CHAR (c)));
461
462 scm_ungetc (c, port);
463 tmp = scm_read_expression (port, opts);
464
465 /* See above note about scm_sym_dot. */
466 if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
467 {
468 SCM_SETCDR (tl, scm_read_expression (port, opts));
469
470 c = flush_ws (port, opts, FUNC_NAME);
471 if (terminating_char != c)
472 scm_i_input_error (FUNC_NAME, port,
473 "in pair: missing close paren", SCM_EOL);
474 break;
475 }
476
477 new_tail = scm_cons (tmp, SCM_EOL);
478 SCM_SETCDR (tl, new_tail);
479 tl = new_tail;
480 }
481
482 if (curly_list_p)
483 {
484 /* In addition to finding the length, 'scm_ilength' checks for
485 improper or circular lists, in which case it returns -1. */
486 int len = scm_ilength (ans);
487
488 /* The (len == 0) case is handled above */
489 if (len == 1)
490 /* Return directly to avoid re-annotating the element's source
491 location with the position of the outer brace. Also, it
492 might not be possible to annotate the element. */
493 return scm_car (ans); /* {e} => e */
494 else if (len == 2)
495 ; /* Leave the list unchanged: {e1 e2} => (e1 e2) */
496 else if (len >= 3 && (len & 1))
497 {
498 /* It's a proper list whose length is odd and at least 3. If
499 the elements at odd indices (the infix operator positions)
500 are all 'equal?', then it's a simple curly-infix list.
501 Otherwise it's a mixed curly-infix list. */
502 SCM op = scm_cadr (ans);
503
504 /* Check to see if the elements at odd indices are 'equal?' */
505 for (tl = scm_cdddr (ans); ; tl = scm_cddr (tl))
506 {
507 if (scm_is_null (tl))
508 {
509 /* Convert simple curly-infix list to prefix:
510 {a <op> b <op> ...} => (<op> a b ...) */
511 tl = ans;
512 while (scm_is_pair (scm_cdr (tl)))
513 {
514 tmp = scm_cddr (tl);
515 SCM_SETCDR (tl, tmp);
516 tl = tmp;
517 }
518 ans = scm_cons (op, ans);
519 break;
520 }
521 else if (scm_is_false (scm_equal_p (op, scm_car (tl))))
522 {
523 /* Mixed curly-infix list: {e ...} => ($nfx$ e ...) */
524 ans = scm_cons (sym_nfx, ans);
525 break;
526 }
527 }
528 }
529 else
530 /* Mixed curly-infix (possibly improper) list:
531 {e . tail} => ($nfx$ e . tail) */
532 ans = scm_cons (sym_nfx, ans);
533 }
534
535 return maybe_annotate_source (ans, port, opts, line, column);
536 }
537 #undef FUNC_NAME
538
539
540 /* Read a hexadecimal number NDIGITS in length. Put its value into the variable
541 C. If TERMINATOR is non-null, terminate early if the TERMINATOR character is
542 found. */
543 #define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
544 do \
545 { \
546 scm_t_wchar a; \
547 size_t i = 0; \
548 c = 0; \
549 while (i < ndigits) \
550 { \
551 a = scm_getc (port); \
552 if (a == EOF) \
553 goto str_eof; \
554 if (terminator \
555 && (a == (scm_t_wchar) terminator) \
556 && (i > 0)) \
557 break; \
558 if ('0' <= a && a <= '9') \
559 a -= '0'; \
560 else if ('A' <= a && a <= 'F') \
561 a = a - 'A' + 10; \
562 else if ('a' <= a && a <= 'f') \
563 a = a - 'a' + 10; \
564 else \
565 { \
566 c = a; \
567 goto bad_escaped; \
568 } \
569 c = c * 16 + a; \
570 i ++; \
571 } \
572 } while (0)
573
574 static void
575 skip_intraline_whitespace (SCM port)
576 {
577 scm_t_wchar c;
578
579 do
580 {
581 c = scm_getc (port);
582 if (c == EOF)
583 return;
584 }
585 while (c == '\t' || uc_is_general_category (c, UC_SPACE_SEPARATOR));
586
587 scm_ungetc (c, port);
588 }
589
590 static SCM
591 scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
592 #define FUNC_NAME "scm_lreadr"
593 {
594 /* For strings smaller than C_STR, this function creates only one Scheme
595 object (the string returned). */
596
597 SCM str = SCM_EOL;
598 size_t c_str_len = 0;
599 scm_t_wchar c, c_str[READER_STRING_BUFFER_SIZE];
600
601 /* Need to capture line and column numbers here. */
602 long line = SCM_LINUM (port);
603 int column = SCM_COL (port) - 1;
604
605 while ('"' != (c = scm_getc (port)))
606 {
607 if (c == EOF)
608 {
609 str_eof:
610 scm_i_input_error (FUNC_NAME, port,
611 "end of file in string constant", SCM_EOL);
612 }
613
614 if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE)
615 {
616 str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
617 c_str_len = 0;
618 }
619
620 if (c == '\\')
621 {
622 switch (c = scm_getc (port))
623 {
624 case EOF:
625 goto str_eof;
626 case '"':
627 case '|':
628 case '\\':
629 break;
630 case '\n':
631 if (opts->hungry_eol_escapes_p)
632 skip_intraline_whitespace (port);
633 continue;
634 case '0':
635 c = '\0';
636 break;
637 case 'f':
638 c = '\f';
639 break;
640 case 'n':
641 c = '\n';
642 break;
643 case 'r':
644 c = '\r';
645 break;
646 case 't':
647 c = '\t';
648 break;
649 case 'a':
650 c = '\007';
651 break;
652 case 'v':
653 c = '\v';
654 break;
655 case 'b':
656 c = '\010';
657 break;
658 case 'x':
659 if (opts->r6rs_escapes_p)
660 SCM_READ_HEX_ESCAPE (10, ';');
661 else
662 SCM_READ_HEX_ESCAPE (2, '\0');
663 break;
664 case 'u':
665 if (!opts->r6rs_escapes_p)
666 {
667 SCM_READ_HEX_ESCAPE (4, '\0');
668 break;
669 }
670 case 'U':
671 if (!opts->r6rs_escapes_p)
672 {
673 SCM_READ_HEX_ESCAPE (6, '\0');
674 break;
675 }
676 default:
677 bad_escaped:
678 scm_i_input_error (FUNC_NAME, port,
679 "illegal character in escape sequence: ~S",
680 scm_list_1 (SCM_MAKE_CHAR (c)));
681 }
682 }
683
684 c_str[c_str_len++] = c;
685 }
686
687 if (scm_is_null (str))
688 /* Fast path: we got a string that fits in C_STR. */
689 str = scm_from_utf32_stringn (c_str, c_str_len);
690 else
691 {
692 if (c_str_len > 0)
693 str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
694
695 str = scm_string_concatenate_reverse (str, SCM_UNDEFINED, SCM_UNDEFINED);
696 }
697
698 return maybe_annotate_source (str, port, opts, line, column);
699 }
700 #undef FUNC_NAME
701
702
703 static SCM
704 scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
705 {
706 SCM result, str = SCM_EOL;
707 char local_buffer[READER_BUFFER_SIZE], *buffer;
708 size_t bytes_read;
709 scm_t_port *pt = SCM_PTAB_ENTRY (port);
710
711 /* Need to capture line and column numbers here. */
712 long line = SCM_LINUM (port);
713 int column = SCM_COL (port) - 1;
714
715 scm_ungetc (chr, port);
716 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
717 &bytes_read);
718
719 str = scm_from_stringn (buffer, bytes_read, pt->encoding, pt->ilseq_handler);
720
721 result = scm_string_to_number (str, SCM_UNDEFINED);
722 if (scm_is_false (result))
723 {
724 /* Return a symbol instead of a number */
725 if (opts->case_insensitive_p)
726 str = scm_string_downcase_x (str);
727 result = scm_string_to_symbol (str);
728 }
729 else if (SCM_NIMP (result))
730 result = maybe_annotate_source (result, port, opts, line, column);
731
732 SCM_COL (port) += scm_i_string_length (str);
733 return result;
734 }
735
736 static SCM
737 scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
738 {
739 SCM result;
740 int ends_with_colon = 0;
741 size_t bytes_read;
742 int postfix = (opts->keyword_style == KEYWORD_STYLE_POSTFIX);
743 char local_buffer[READER_BUFFER_SIZE], *buffer;
744 scm_t_port *pt = SCM_PTAB_ENTRY (port);
745 SCM str;
746
747 scm_ungetc (chr, port);
748 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
749 &bytes_read);
750 if (bytes_read > 0)
751 ends_with_colon = buffer[bytes_read - 1] == ':';
752
753 if (postfix && ends_with_colon && (bytes_read > 1))
754 {
755 str = scm_from_stringn (buffer, bytes_read - 1,
756 pt->encoding, pt->ilseq_handler);
757
758 if (opts->case_insensitive_p)
759 str = scm_string_downcase_x (str);
760 result = scm_symbol_to_keyword (scm_string_to_symbol (str));
761 }
762 else
763 {
764 str = scm_from_stringn (buffer, bytes_read,
765 pt->encoding, pt->ilseq_handler);
766
767 if (opts->case_insensitive_p)
768 str = scm_string_downcase_x (str);
769 result = scm_string_to_symbol (str);
770 }
771
772 SCM_COL (port) += scm_i_string_length (str);
773 return result;
774 }
775
776 static SCM
777 scm_read_number_and_radix (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
778 #define FUNC_NAME "scm_lreadr"
779 {
780 SCM result;
781 size_t read;
782 char local_buffer[READER_BUFFER_SIZE], *buffer;
783 unsigned int radix;
784 SCM str;
785 scm_t_port *pt;
786
787 switch (chr)
788 {
789 case 'B':
790 case 'b':
791 radix = 2;
792 break;
793
794 case 'o':
795 case 'O':
796 radix = 8;
797 break;
798
799 case 'd':
800 case 'D':
801 radix = 10;
802 break;
803
804 case 'x':
805 case 'X':
806 radix = 16;
807 break;
808
809 default:
810 scm_ungetc (chr, port);
811 scm_ungetc ('#', port);
812 radix = 10;
813 }
814
815 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
816 &read);
817
818 pt = SCM_PTAB_ENTRY (port);
819 str = scm_from_stringn (buffer, read, pt->encoding, pt->ilseq_handler);
820
821 result = scm_string_to_number (str, scm_from_uint (radix));
822
823 SCM_COL (port) += scm_i_string_length (str);
824
825 if (scm_is_true (result))
826 return result;
827
828 scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
829
830 return SCM_BOOL_F;
831 }
832 #undef FUNC_NAME
833
834 static SCM
835 scm_read_quote (int chr, SCM port, scm_t_read_opts *opts)
836 {
837 SCM p;
838 long line = SCM_LINUM (port);
839 int column = SCM_COL (port) - 1;
840
841 switch (chr)
842 {
843 case '`':
844 p = scm_sym_quasiquote;
845 break;
846
847 case '\'':
848 p = scm_sym_quote;
849 break;
850
851 case ',':
852 {
853 scm_t_wchar c;
854
855 c = scm_getc (port);
856 if ('@' == c)
857 p = scm_sym_uq_splicing;
858 else
859 {
860 scm_ungetc (c, port);
861 p = scm_sym_unquote;
862 }
863 break;
864 }
865
866 default:
867 fprintf (stderr, "%s: unhandled quote character (%i)\n",
868 "scm_read_quote", chr);
869 abort ();
870 }
871
872 p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
873 return maybe_annotate_source (p, port, opts, line, column);
874 }
875
876 SCM_SYMBOL (sym_syntax, "syntax");
877 SCM_SYMBOL (sym_quasisyntax, "quasisyntax");
878 SCM_SYMBOL (sym_unsyntax, "unsyntax");
879 SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing");
880
881 static SCM
882 scm_read_syntax (int chr, SCM port, scm_t_read_opts *opts)
883 {
884 SCM p;
885 long line = SCM_LINUM (port);
886 int column = SCM_COL (port) - 1;
887
888 switch (chr)
889 {
890 case '`':
891 p = sym_quasisyntax;
892 break;
893
894 case '\'':
895 p = sym_syntax;
896 break;
897
898 case ',':
899 {
900 int c;
901
902 c = scm_getc (port);
903 if ('@' == c)
904 p = sym_unsyntax_splicing;
905 else
906 {
907 scm_ungetc (c, port);
908 p = sym_unsyntax;
909 }
910 break;
911 }
912
913 default:
914 fprintf (stderr, "%s: unhandled syntax character (%i)\n",
915 "scm_read_syntax", chr);
916 abort ();
917 }
918
919 p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
920 return maybe_annotate_source (p, port, opts, line, column);
921 }
922
923 static SCM
924 scm_read_nil (int chr, SCM port, scm_t_read_opts *opts)
925 {
926 SCM id = scm_read_mixed_case_symbol (chr, port, opts);
927
928 if (!scm_is_eq (id, sym_nil))
929 scm_i_input_error ("scm_read_nil", port,
930 "unexpected input while reading #nil: ~a",
931 scm_list_1 (id));
932
933 return SCM_ELISP_NIL;
934 }
935
936 static SCM
937 scm_read_semicolon_comment (int chr, SCM port)
938 {
939 int c;
940
941 /* We use the get_byte here because there is no need to get the
942 locale correct with comment input. This presumes that newline
943 always represents itself no matter what the encoding is. */
944 for (c = scm_get_byte_or_eof (port);
945 (c != EOF) && (c != '\n');
946 c = scm_get_byte_or_eof (port));
947
948 return SCM_UNSPECIFIED;
949 }
950
951 /* If the EXPECTED_CHARS are the next ones available from PORT, then
952 consume them and return 1. Otherwise leave the port position where
953 it was and return 0. EXPECTED_CHARS should be all lowercase, and
954 will be matched case-insensitively against the characters read from
955 PORT. */
956 static int
957 try_read_ci_chars (SCM port, const char *expected_chars)
958 {
959 int num_chars_wanted = strlen (expected_chars);
960 int num_chars_read = 0;
961 char *chars_read = alloca (num_chars_wanted);
962 int c;
963
964 while (num_chars_read < num_chars_wanted)
965 {
966 c = scm_getc (port);
967 if (c == EOF)
968 break;
969 else if (tolower (c) != expected_chars[num_chars_read])
970 {
971 scm_ungetc (c, port);
972 break;
973 }
974 else
975 chars_read[num_chars_read++] = c;
976 }
977
978 if (num_chars_read == num_chars_wanted)
979 return 1;
980 else
981 {
982 while (num_chars_read > 0)
983 scm_ungetc (chars_read[--num_chars_read], port);
984 return 0;
985 }
986 }
987
988 \f
989 /* Sharp readers, i.e. readers called after a `#' sign has been read. */
990
991 static SCM
992 scm_read_boolean (int chr, SCM port)
993 {
994 switch (chr)
995 {
996 case 't':
997 case 'T':
998 try_read_ci_chars (port, "rue");
999 return SCM_BOOL_T;
1000
1001 case 'f':
1002 case 'F':
1003 try_read_ci_chars (port, "alse");
1004 return SCM_BOOL_F;
1005 }
1006
1007 return SCM_UNSPECIFIED;
1008 }
1009
1010 static SCM
1011 scm_read_character (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
1012 #define FUNC_NAME "scm_lreadr"
1013 {
1014 char buffer[READER_CHAR_NAME_MAX_SIZE];
1015 SCM charname;
1016 size_t charname_len, bytes_read;
1017 scm_t_wchar cp;
1018 int overflow;
1019 scm_t_port *pt;
1020
1021 overflow = read_token (port, opts, buffer, READER_CHAR_NAME_MAX_SIZE,
1022 &bytes_read);
1023 if (overflow)
1024 scm_i_input_error (FUNC_NAME, port, "character name too long", SCM_EOL);
1025
1026 if (bytes_read == 0)
1027 {
1028 chr = scm_getc (port);
1029 if (chr == EOF)
1030 scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
1031 "while reading character", SCM_EOL);
1032
1033 /* CHR must be a token delimiter, like a whitespace. */
1034 return (SCM_MAKE_CHAR (chr));
1035 }
1036
1037 pt = SCM_PTAB_ENTRY (port);
1038
1039 /* Simple ASCII characters can be processed immediately. Also, simple
1040 ISO-8859-1 characters can be processed immediately if the encoding for this
1041 port is ISO-8859-1. */
1042 if (bytes_read == 1 && ((unsigned char) buffer[0] <= 127 || pt->encoding == NULL))
1043 {
1044 SCM_COL (port) += 1;
1045 return SCM_MAKE_CHAR (buffer[0]);
1046 }
1047
1048 /* Otherwise, convert the buffer into a proper scheme string for
1049 processing. */
1050 charname = scm_from_stringn (buffer, bytes_read, pt->encoding,
1051 pt->ilseq_handler);
1052 charname_len = scm_i_string_length (charname);
1053 SCM_COL (port) += charname_len;
1054 cp = scm_i_string_ref (charname, 0);
1055 if (charname_len == 1)
1056 return SCM_MAKE_CHAR (cp);
1057
1058 /* Ignore dotted circles, which may be used to keep combining characters from
1059 combining with the backslash in #\charname. */
1060 if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2)
1061 return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1));
1062
1063 if (cp >= '0' && cp < '8')
1064 {
1065 /* Dirk:FIXME:: This type of character syntax is not R5RS
1066 * compliant. Further, it should be verified that the constant
1067 * does only consist of octal digits. */
1068 SCM p = scm_string_to_number (charname, scm_from_uint (8));
1069 if (SCM_I_INUMP (p))
1070 {
1071 scm_t_wchar c = scm_to_uint32 (p);
1072 if (SCM_IS_UNICODE_CHAR (c))
1073 return SCM_MAKE_CHAR (c);
1074 else
1075 scm_i_input_error (FUNC_NAME, port,
1076 "out-of-range octal character escape: ~a",
1077 scm_list_1 (charname));
1078 }
1079 }
1080
1081 if (cp == 'x' && (charname_len > 1))
1082 {
1083 SCM p;
1084
1085 /* Convert from hex, skipping the initial 'x' character in CHARNAME */
1086 p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
1087 scm_from_uint (16));
1088 if (SCM_I_INUMP (p))
1089 {
1090 scm_t_wchar c = scm_to_uint32 (p);
1091 if (SCM_IS_UNICODE_CHAR (c))
1092 return SCM_MAKE_CHAR (c);
1093 else
1094 scm_i_input_error (FUNC_NAME, port,
1095 "out-of-range hex character escape: ~a",
1096 scm_list_1 (charname));
1097 }
1098 }
1099
1100 /* The names of characters should never have non-Latin1
1101 characters. */
1102 if (scm_i_is_narrow_string (charname)
1103 || scm_i_try_narrow_string (charname))
1104 { SCM ch = scm_i_charname_to_char (scm_i_string_chars (charname),
1105 charname_len);
1106 if (scm_is_true (ch))
1107 return ch;
1108 }
1109
1110 scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
1111 scm_list_1 (charname));
1112
1113 return SCM_UNSPECIFIED;
1114 }
1115 #undef FUNC_NAME
1116
1117 static SCM
1118 scm_read_keyword (int chr, SCM port, scm_t_read_opts *opts)
1119 {
1120 SCM symbol;
1121
1122 /* Read the symbol that comprises the keyword. Doing this instead of
1123 invoking a specific symbol reader function allows `scm_read_keyword ()'
1124 to adapt to the delimiters currently valid of symbols.
1125
1126 XXX: This implementation allows sloppy syntaxes like `#: key'. */
1127 symbol = scm_read_expression (port, opts);
1128 if (!scm_is_symbol (symbol))
1129 scm_i_input_error ("scm_read_keyword", port,
1130 "keyword prefix `~a' not followed by a symbol: ~s",
1131 scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
1132
1133 return (scm_symbol_to_keyword (symbol));
1134 }
1135
1136 static SCM
1137 scm_read_vector (int chr, SCM port, scm_t_read_opts *opts,
1138 long line, int column)
1139 {
1140 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
1141 guarantee that it's going to do what we want. After all, this is an
1142 implementation detail of `scm_read_vector ()', not a desirable
1143 property. */
1144 return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port, opts)),
1145 port, opts, line, column);
1146 }
1147
1148 /* Helper used by scm_read_array */
1149 static int
1150 read_decimal_integer (SCM port, int c, ssize_t *resp)
1151 {
1152 ssize_t sign = 1;
1153 ssize_t res = 0;
1154 int got_it = 0;
1155
1156 if (c == '-')
1157 {
1158 sign = -1;
1159 c = scm_getc (port);
1160 }
1161
1162 while ('0' <= c && c <= '9')
1163 {
1164 if (((SSIZE_MAX - (c-'0')) / 10) <= res)
1165 scm_i_input_error ("read_decimal_integer", port,
1166 "number too large", SCM_EOL);
1167 res = 10*res + c-'0';
1168 got_it = 1;
1169 c = scm_getc (port);
1170 }
1171
1172 if (got_it)
1173 *resp = sign * res;
1174 return c;
1175 }
1176
1177 /* Read an array. This function can also read vectors and uniform
1178 vectors. Also, the conflict between '#f' and '#f32' and '#f64' is
1179 handled here.
1180
1181 C is the first character read after the '#'. */
1182 static SCM
1183 scm_read_array (int c, SCM port, scm_t_read_opts *opts, long line, int column)
1184 {
1185 ssize_t rank;
1186 scm_t_wchar tag_buf[8];
1187 int tag_len;
1188
1189 SCM tag, shape = SCM_BOOL_F, elements, array;
1190
1191 /* XXX - shortcut for ordinary vectors. Shouldn't be necessary but
1192 the array code can not deal with zero-length dimensions yet, and
1193 we want to allow zero-length vectors, of course. */
1194 if (c == '(')
1195 return scm_read_vector (c, port, opts, line, column);
1196
1197 /* Disambiguate between '#f' and uniform floating point vectors. */
1198 if (c == 'f')
1199 {
1200 c = scm_getc (port);
1201 if (c != '3' && c != '6')
1202 {
1203 if (c == 'a' && try_read_ci_chars (port, "lse"))
1204 return SCM_BOOL_F;
1205 else if (c != EOF)
1206 scm_ungetc (c, port);
1207 return SCM_BOOL_F;
1208 }
1209 rank = 1;
1210 tag_buf[0] = 'f';
1211 tag_len = 1;
1212 goto continue_reading_tag;
1213 }
1214
1215 /* Read rank. */
1216 rank = 1;
1217 c = read_decimal_integer (port, c, &rank);
1218 if (rank < 0)
1219 scm_i_input_error (NULL, port, "array rank must be non-negative",
1220 SCM_EOL);
1221
1222 /* Read tag. */
1223 tag_len = 0;
1224 continue_reading_tag:
1225 while (c != EOF && c != '(' && c != '@' && c != ':'
1226 && tag_len < sizeof tag_buf / sizeof tag_buf[0])
1227 {
1228 tag_buf[tag_len++] = c;
1229 c = scm_getc (port);
1230 }
1231 if (tag_len == 0)
1232 tag = SCM_BOOL_T;
1233 else
1234 {
1235 tag = scm_string_to_symbol (scm_from_utf32_stringn (tag_buf, tag_len));
1236 if (tag_len == sizeof tag_buf / sizeof tag_buf[0])
1237 scm_i_input_error (NULL, port, "invalid array tag, starting with: ~a",
1238 scm_list_1 (tag));
1239 }
1240
1241 /* Read shape. */
1242 if (c == '@' || c == ':')
1243 {
1244 shape = SCM_EOL;
1245
1246 do
1247 {
1248 ssize_t lbnd = 0, len = 0;
1249 SCM s;
1250
1251 if (c == '@')
1252 {
1253 c = scm_getc (port);
1254 c = read_decimal_integer (port, c, &lbnd);
1255 }
1256
1257 s = scm_from_ssize_t (lbnd);
1258
1259 if (c == ':')
1260 {
1261 c = scm_getc (port);
1262 c = read_decimal_integer (port, c, &len);
1263 if (len < 0)
1264 scm_i_input_error (NULL, port,
1265 "array length must be non-negative",
1266 SCM_EOL);
1267
1268 s = scm_list_2 (s, scm_from_ssize_t (lbnd+len-1));
1269 }
1270
1271 shape = scm_cons (s, shape);
1272 } while (c == '@' || c == ':');
1273
1274 shape = scm_reverse_x (shape, SCM_EOL);
1275 }
1276
1277 /* Read nested lists of elements. */
1278 if (c != '(')
1279 scm_i_input_error (NULL, port,
1280 "missing '(' in vector or array literal",
1281 SCM_EOL);
1282 elements = scm_read_sexp (c, port, opts);
1283
1284 if (scm_is_false (shape))
1285 shape = scm_from_ssize_t (rank);
1286 else if (scm_ilength (shape) != rank)
1287 scm_i_input_error
1288 (NULL, port,
1289 "the number of shape specifications must match the array rank",
1290 SCM_EOL);
1291
1292 /* Handle special print syntax of rank zero arrays; see
1293 scm_i_print_array for a rationale. */
1294 if (rank == 0)
1295 {
1296 if (!scm_is_pair (elements))
1297 scm_i_input_error (NULL, port,
1298 "too few elements in array literal, need 1",
1299 SCM_EOL);
1300 if (!scm_is_null (SCM_CDR (elements)))
1301 scm_i_input_error (NULL, port,
1302 "too many elements in array literal, want 1",
1303 SCM_EOL);
1304 elements = SCM_CAR (elements);
1305 }
1306
1307 /* Construct array, annotate with source location, and return. */
1308 array = scm_list_to_typed_array (tag, shape, elements);
1309 return maybe_annotate_source (array, port, opts, line, column);
1310 }
1311
1312 static SCM
1313 scm_read_srfi4_vector (int chr, SCM port, scm_t_read_opts *opts,
1314 long line, int column)
1315 {
1316 return scm_read_array (chr, port, opts, line, column);
1317 }
1318
1319 static SCM
1320 scm_read_bytevector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1321 long line, int column)
1322 {
1323 chr = scm_getc (port);
1324 if (chr != 'u')
1325 goto syntax;
1326
1327 chr = scm_getc (port);
1328 if (chr != '8')
1329 goto syntax;
1330
1331 chr = scm_getc (port);
1332 if (chr != '(')
1333 goto syntax;
1334
1335 return maybe_annotate_source
1336 (scm_u8_list_to_bytevector (scm_read_sexp (chr, port, opts)),
1337 port, opts, line, column);
1338
1339 syntax:
1340 scm_i_input_error ("read_bytevector", port,
1341 "invalid bytevector prefix",
1342 SCM_MAKE_CHAR (chr));
1343 return SCM_UNSPECIFIED;
1344 }
1345
1346 static SCM
1347 scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1348 long line, int column)
1349 {
1350 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
1351 terribly inefficient but who cares? */
1352 SCM s_bits = SCM_EOL;
1353
1354 for (chr = scm_getc (port);
1355 (chr != EOF) && ((chr == '0') || (chr == '1'));
1356 chr = scm_getc (port))
1357 {
1358 s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
1359 }
1360
1361 if (chr != EOF)
1362 scm_ungetc (chr, port);
1363
1364 return maybe_annotate_source
1365 (scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)),
1366 port, opts, line, column);
1367 }
1368
1369 static SCM
1370 scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
1371 {
1372 int bang_seen = 0;
1373
1374 for (;;)
1375 {
1376 int c = scm_getc (port);
1377
1378 if (c == EOF)
1379 scm_i_input_error ("skip_block_comment", port,
1380 "unterminated `#! ... !#' comment", SCM_EOL);
1381
1382 if (c == '!')
1383 bang_seen = 1;
1384 else if (c == '#' && bang_seen)
1385 break;
1386 else
1387 bang_seen = 0;
1388 }
1389
1390 return SCM_UNSPECIFIED;
1391 }
1392
1393 static void set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts,
1394 int value);
1395 static void set_port_square_brackets_p (SCM port, scm_t_read_opts *opts,
1396 int value);
1397 static void set_port_curly_infix_p (SCM port, scm_t_read_opts *opts,
1398 int value);
1399
1400 static SCM
1401 scm_read_shebang (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
1402 {
1403 char name[READER_DIRECTIVE_NAME_MAX_SIZE + 1];
1404 int c;
1405 int i = 0;
1406
1407 while (i <= READER_DIRECTIVE_NAME_MAX_SIZE)
1408 {
1409 c = scm_getc (port);
1410 if (c == EOF)
1411 scm_i_input_error ("skip_block_comment", port,
1412 "unterminated `#! ... !#' comment", SCM_EOL);
1413 else if (('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || c == '-')
1414 name[i++] = c;
1415 else if (CHAR_IS_DELIMITER (c))
1416 {
1417 scm_ungetc (c, port);
1418 name[i] = '\0';
1419 if (0 == strcmp ("r6rs", name))
1420 ; /* Silently ignore */
1421 else if (0 == strcmp ("fold-case", name))
1422 set_port_case_insensitive_p (port, opts, 1);
1423 else if (0 == strcmp ("no-fold-case", name))
1424 set_port_case_insensitive_p (port, opts, 0);
1425 else if (0 == strcmp ("curly-infix", name))
1426 set_port_curly_infix_p (port, opts, 1);
1427 else if (0 == strcmp ("curly-infix-and-bracket-lists", name))
1428 {
1429 set_port_curly_infix_p (port, opts, 1);
1430 set_port_square_brackets_p (port, opts, 0);
1431 }
1432 else
1433 break;
1434
1435 return SCM_UNSPECIFIED;
1436 }
1437 else
1438 {
1439 scm_ungetc (c, port);
1440 break;
1441 }
1442 }
1443 while (i > 0)
1444 scm_ungetc (name[--i], port);
1445 return scm_read_scsh_block_comment (chr, port);
1446 }
1447
1448 static SCM
1449 scm_read_r6rs_block_comment (scm_t_wchar chr, SCM port)
1450 {
1451 /* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
1452 nested. So care must be taken. */
1453 int nesting_level = 1;
1454
1455 int a = scm_getc (port);
1456
1457 if (a == EOF)
1458 scm_i_input_error ("scm_read_r6rs_block_comment", port,
1459 "unterminated `#| ... |#' comment", SCM_EOL);
1460
1461 while (nesting_level > 0)
1462 {
1463 int b = scm_getc (port);
1464
1465 if (b == EOF)
1466 scm_i_input_error ("scm_read_r6rs_block_comment", port,
1467 "unterminated `#| ... |#' comment", SCM_EOL);
1468
1469 if (a == '|' && b == '#')
1470 {
1471 nesting_level--;
1472 b = EOF;
1473 }
1474 else if (a == '#' && b == '|')
1475 {
1476 nesting_level++;
1477 b = EOF;
1478 }
1479
1480 a = b;
1481 }
1482
1483 return SCM_UNSPECIFIED;
1484 }
1485
1486 static SCM
1487 scm_read_commented_expression (scm_t_wchar chr, SCM port,
1488 scm_t_read_opts *opts)
1489 {
1490 scm_t_wchar c;
1491
1492 c = flush_ws (port, opts, (char *) NULL);
1493 if (EOF == c)
1494 scm_i_input_error ("read_commented_expression", port,
1495 "no expression after #; comment", SCM_EOL);
1496 scm_ungetc (c, port);
1497 scm_read_expression (port, opts);
1498 return SCM_UNSPECIFIED;
1499 }
1500
1501 static SCM
1502 scm_read_extended_symbol (scm_t_wchar chr, SCM port)
1503 {
1504 /* Guile's extended symbol read syntax looks like this:
1505
1506 #{This is all a symbol name}#
1507
1508 So here, CHR is expected to be `{'. */
1509 int saw_brace = 0;
1510 size_t len = 0;
1511 SCM buf = scm_i_make_string (1024, NULL, 0);
1512
1513 buf = scm_i_string_start_writing (buf);
1514
1515 while ((chr = scm_getc (port)) != EOF)
1516 {
1517 if (saw_brace)
1518 {
1519 if (chr == '#')
1520 {
1521 break;
1522 }
1523 else
1524 {
1525 saw_brace = 0;
1526 scm_i_string_set_x (buf, len++, '}');
1527 }
1528 }
1529
1530 if (chr == '}')
1531 saw_brace = 1;
1532 else if (chr == '\\')
1533 {
1534 /* It used to be that print.c would print extended-read-syntax
1535 symbols with backslashes before "non-standard" chars, but
1536 this routine wouldn't do anything with those escapes.
1537 Bummer. What we've done is to change print.c to output
1538 R6RS hex escapes for those characters, relying on the fact
1539 that the extended read syntax would never put a `\' before
1540 an `x'. For now, we just ignore other instances of
1541 backslash in the string. */
1542 switch ((chr = scm_getc (port)))
1543 {
1544 case EOF:
1545 goto done;
1546 case 'x':
1547 {
1548 scm_t_wchar c;
1549
1550 SCM_READ_HEX_ESCAPE (10, ';');
1551 scm_i_string_set_x (buf, len++, c);
1552 break;
1553
1554 str_eof:
1555 chr = EOF;
1556 goto done;
1557
1558 bad_escaped:
1559 scm_i_string_stop_writing ();
1560 scm_i_input_error ("scm_read_extended_symbol", port,
1561 "illegal character in escape sequence: ~S",
1562 scm_list_1 (SCM_MAKE_CHAR (c)));
1563 break;
1564 }
1565 default:
1566 scm_i_string_set_x (buf, len++, chr);
1567 break;
1568 }
1569 }
1570 else
1571 scm_i_string_set_x (buf, len++, chr);
1572
1573 if (len >= scm_i_string_length (buf) - 2)
1574 {
1575 SCM addy;
1576
1577 scm_i_string_stop_writing ();
1578 addy = scm_i_make_string (1024, NULL, 0);
1579 buf = scm_string_append (scm_list_2 (buf, addy));
1580 len = 0;
1581 buf = scm_i_string_start_writing (buf);
1582 }
1583 }
1584
1585 done:
1586 scm_i_string_stop_writing ();
1587 if (chr == EOF)
1588 scm_i_input_error ("scm_read_extended_symbol", port,
1589 "end of file while reading symbol", SCM_EOL);
1590
1591 return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
1592 }
1593
1594
1595 \f
1596 /* Top-level token readers, i.e., dispatchers. */
1597
1598 static SCM
1599 scm_read_sharp_extension (int chr, SCM port, scm_t_read_opts *opts)
1600 {
1601 SCM proc;
1602
1603 proc = scm_get_hash_procedure (chr);
1604 if (scm_is_true (scm_procedure_p (proc)))
1605 {
1606 long line = SCM_LINUM (port);
1607 int column = SCM_COL (port) - 2;
1608 SCM got;
1609
1610 got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
1611
1612 if (opts->record_positions_p && SCM_NIMP (got)
1613 && !scm_i_has_source_properties (got))
1614 scm_i_set_source_properties_x (got, line, column, SCM_FILENAME (port));
1615
1616 return got;
1617 }
1618
1619 return SCM_UNSPECIFIED;
1620 }
1621
1622 /* The reader for the sharp `#' character. It basically dispatches reads
1623 among the above token readers. */
1624 static SCM
1625 scm_read_sharp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1626 long line, int column)
1627 #define FUNC_NAME "scm_lreadr"
1628 {
1629 SCM result;
1630
1631 chr = scm_getc (port);
1632
1633 result = scm_read_sharp_extension (chr, port, opts);
1634 if (!scm_is_eq (result, SCM_UNSPECIFIED))
1635 return result;
1636
1637 switch (chr)
1638 {
1639 case '\\':
1640 return (scm_read_character (chr, port, opts));
1641 case '(':
1642 return (scm_read_vector (chr, port, opts, line, column));
1643 case 's':
1644 case 'u':
1645 case 'f':
1646 case 'c':
1647 /* This one may return either a boolean or an SRFI-4 vector. */
1648 return (scm_read_srfi4_vector (chr, port, opts, line, column));
1649 case 'v':
1650 return (scm_read_bytevector (chr, port, opts, line, column));
1651 case '*':
1652 return (scm_read_guile_bit_vector (chr, port, opts, line, column));
1653 case 't':
1654 case 'T':
1655 case 'F':
1656 return (scm_read_boolean (chr, port));
1657 case ':':
1658 return (scm_read_keyword (chr, port, opts));
1659 case '0': case '1': case '2': case '3': case '4':
1660 case '5': case '6': case '7': case '8': case '9':
1661 case '@':
1662 #if SCM_ENABLE_DEPRECATED
1663 /* See below for 'i' and 'e'. */
1664 case 'a':
1665 case 'y':
1666 case 'h':
1667 case 'l':
1668 #endif
1669 return (scm_read_array (chr, port, opts, line, column));
1670
1671 case 'i':
1672 case 'e':
1673 #if SCM_ENABLE_DEPRECATED
1674 {
1675 /* When next char is '(', it really is an old-style
1676 uniform array. */
1677 scm_t_wchar next_c = scm_getc (port);
1678 if (next_c != EOF)
1679 scm_ungetc (next_c, port);
1680 if (next_c == '(')
1681 return scm_read_array (chr, port, opts, line, column);
1682 /* Fall through. */
1683 }
1684 #endif
1685 case 'b':
1686 case 'B':
1687 case 'o':
1688 case 'O':
1689 case 'd':
1690 case 'D':
1691 case 'x':
1692 case 'X':
1693 case 'I':
1694 case 'E':
1695 return (scm_read_number_and_radix (chr, port, opts));
1696 case '{':
1697 return (scm_read_extended_symbol (chr, port));
1698 case '!':
1699 return (scm_read_shebang (chr, port, opts));
1700 case ';':
1701 return (scm_read_commented_expression (chr, port, opts));
1702 case '`':
1703 case '\'':
1704 case ',':
1705 return (scm_read_syntax (chr, port, opts));
1706 case 'n':
1707 return (scm_read_nil (chr, port, opts));
1708 default:
1709 result = scm_read_sharp_extension (chr, port, opts);
1710 if (scm_is_eq (result, SCM_UNSPECIFIED))
1711 {
1712 /* To remain compatible with 1.8 and earlier, the following
1713 characters have lower precedence than `read-hash-extend'
1714 characters. */
1715 switch (chr)
1716 {
1717 case '|':
1718 return scm_read_r6rs_block_comment (chr, port);
1719 default:
1720 scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
1721 scm_list_1 (SCM_MAKE_CHAR (chr)));
1722 }
1723 }
1724 else
1725 return result;
1726 }
1727
1728 return SCM_UNSPECIFIED;
1729 }
1730 #undef FUNC_NAME
1731
1732 static SCM
1733 read_inner_expression (SCM port, scm_t_read_opts *opts)
1734 #define FUNC_NAME "read_inner_expression"
1735 {
1736 while (1)
1737 {
1738 scm_t_wchar chr;
1739
1740 chr = scm_getc (port);
1741
1742 switch (chr)
1743 {
1744 case SCM_WHITE_SPACES:
1745 case SCM_LINE_INCREMENTORS:
1746 break;
1747 case ';':
1748 (void) scm_read_semicolon_comment (chr, port);
1749 break;
1750 case '{':
1751 if (opts->curly_infix_p)
1752 {
1753 if (opts->neoteric_p)
1754 return scm_read_sexp (chr, port, opts);
1755 else
1756 {
1757 SCM expr;
1758
1759 /* Enable neoteric expressions within curly braces */
1760 opts->neoteric_p = 1;
1761 expr = scm_read_sexp (chr, port, opts);
1762 opts->neoteric_p = 0;
1763 return expr;
1764 }
1765 }
1766 else
1767 return scm_read_mixed_case_symbol (chr, port, opts);
1768 case '[':
1769 if (opts->square_brackets_p)
1770 return scm_read_sexp (chr, port, opts);
1771 else if (opts->curly_infix_p)
1772 {
1773 /* The syntax of neoteric expressions requires that '[' be
1774 a delimiter when curly-infix is enabled, so it cannot
1775 be part of an unescaped symbol. We might as well do
1776 something useful with it, so we adopt Kawa's convention:
1777 [...] => ($bracket-list$ ...) */
1778 long line = SCM_LINUM (port);
1779 int column = SCM_COL (port) - 1;
1780 return maybe_annotate_source
1781 (scm_cons (sym_bracket_list, scm_read_sexp (chr, port, opts)),
1782 port, opts, line, column);
1783 }
1784 else
1785 return scm_read_mixed_case_symbol (chr, port, opts);
1786 case '(':
1787 return (scm_read_sexp (chr, port, opts));
1788 case '"':
1789 return (scm_read_string (chr, port, opts));
1790 case '\'':
1791 case '`':
1792 case ',':
1793 return (scm_read_quote (chr, port, opts));
1794 case '#':
1795 {
1796 long line = SCM_LINUM (port);
1797 int column = SCM_COL (port) - 1;
1798 SCM result = scm_read_sharp (chr, port, opts, line, column);
1799 if (scm_is_eq (result, SCM_UNSPECIFIED))
1800 /* We read a comment or some such. */
1801 break;
1802 else
1803 return result;
1804 }
1805 case ')':
1806 scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
1807 break;
1808 case '}':
1809 if (opts->curly_infix_p)
1810 scm_i_input_error (FUNC_NAME, port, "unexpected \"}\"", SCM_EOL);
1811 else
1812 return scm_read_mixed_case_symbol (chr, port, opts);
1813 case ']':
1814 if (opts->square_brackets_p)
1815 scm_i_input_error (FUNC_NAME, port, "unexpected \"]\"", SCM_EOL);
1816 /* otherwise fall through */
1817 case EOF:
1818 return SCM_EOF_VAL;
1819 case ':':
1820 if (opts->keyword_style == KEYWORD_STYLE_PREFIX)
1821 return scm_symbol_to_keyword (scm_read_expression (port, opts));
1822 /* Fall through. */
1823
1824 default:
1825 {
1826 if (((chr >= '0') && (chr <= '9'))
1827 || (strchr ("+-.", chr)))
1828 return (scm_read_number (chr, port, opts));
1829 else
1830 return (scm_read_mixed_case_symbol (chr, port, opts));
1831 }
1832 }
1833 }
1834 }
1835 #undef FUNC_NAME
1836
1837 static SCM
1838 scm_read_expression (SCM port, scm_t_read_opts *opts)
1839 #define FUNC_NAME "scm_read_expression"
1840 {
1841 if (!opts->neoteric_p)
1842 return read_inner_expression (port, opts);
1843 else
1844 {
1845 long line = 0;
1846 int column = 0;
1847 SCM expr;
1848
1849 if (opts->record_positions_p)
1850 {
1851 /* We need to get the position of the first non-whitespace
1852 character in order to correctly annotate neoteric
1853 expressions. For example, for the expression 'f(x)', the
1854 first call to 'read_inner_expression' reads the 'f' (which
1855 cannot be annotated), and then we later read the '(x)' and
1856 use it to construct the new list (f x). */
1857 int c = flush_ws (port, opts, (char *) NULL);
1858 if (c == EOF)
1859 return SCM_EOF_VAL;
1860 scm_ungetc (c, port);
1861 line = SCM_LINUM (port);
1862 column = SCM_COL (port);
1863 }
1864
1865 expr = read_inner_expression (port, opts);
1866
1867 /* 'expr' is the first component of the neoteric expression. Now
1868 we loop, and as long as the next character is '(', '[', or '{',
1869 (without any intervening whitespace), we use it to construct a
1870 new expression. For example, f{n - 1}(x) => ((f (- n 1)) x). */
1871 for (;;)
1872 {
1873 int chr = scm_getc (port);
1874
1875 if (chr == '(')
1876 /* e(...) => (e ...) */
1877 expr = scm_cons (expr, scm_read_sexp (chr, port, opts));
1878 else if (chr == '[')
1879 /* e[...] => ($bracket-apply$ e ...) */
1880 expr = scm_cons (sym_bracket_apply,
1881 scm_cons (expr,
1882 scm_read_sexp (chr, port, opts)));
1883 else if (chr == '{')
1884 {
1885 SCM arg = scm_read_sexp (chr, port, opts);
1886
1887 if (scm_is_null (arg))
1888 expr = scm_list_1 (expr); /* e{} => (e) */
1889 else
1890 expr = scm_list_2 (expr, arg); /* e{...} => (e {...}) */
1891 }
1892 else
1893 {
1894 if (chr != EOF)
1895 scm_ungetc (chr, port);
1896 break;
1897 }
1898 maybe_annotate_source (expr, port, opts, line, column);
1899 }
1900 return expr;
1901 }
1902 }
1903 #undef FUNC_NAME
1904
1905 \f
1906 /* Actual reader. */
1907
1908 static void init_read_options (SCM port, scm_t_read_opts *opts);
1909
1910 SCM_DEFINE (scm_read, "read", 0, 1, 0,
1911 (SCM port),
1912 "Read an s-expression from the input port @var{port}, or from\n"
1913 "the current input port if @var{port} is not specified.\n"
1914 "Any whitespace before the next token is discarded.")
1915 #define FUNC_NAME s_scm_read
1916 {
1917 scm_t_read_opts opts;
1918 int c;
1919
1920 if (SCM_UNBNDP (port))
1921 port = scm_current_input_port ();
1922 SCM_VALIDATE_OPINPORT (1, port);
1923
1924 init_read_options (port, &opts);
1925
1926 c = flush_ws (port, &opts, (char *) NULL);
1927 if (EOF == c)
1928 return SCM_EOF_VAL;
1929 scm_ungetc (c, port);
1930
1931 return (scm_read_expression (port, &opts));
1932 }
1933 #undef FUNC_NAME
1934
1935
1936 \f
1937
1938 /* Manipulate the read-hash-procedures alist. This could be written in
1939 Scheme, but maybe it will also be used by C code during initialisation. */
1940 SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
1941 (SCM chr, SCM proc),
1942 "Install the procedure @var{proc} for reading expressions\n"
1943 "starting with the character sequence @code{#} and @var{chr}.\n"
1944 "@var{proc} will be called with two arguments: the character\n"
1945 "@var{chr} and the port to read further data from. The object\n"
1946 "returned will be the return value of @code{read}. \n"
1947 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1948 )
1949 #define FUNC_NAME s_scm_read_hash_extend
1950 {
1951 SCM this;
1952 SCM prev;
1953
1954 SCM_VALIDATE_CHAR (1, chr);
1955 SCM_ASSERT (scm_is_false (proc)
1956 || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
1957 proc, SCM_ARG2, FUNC_NAME);
1958
1959 /* Check if chr is already in the alist. */
1960 this = scm_i_read_hash_procedures_ref ();
1961 prev = SCM_BOOL_F;
1962 while (1)
1963 {
1964 if (scm_is_null (this))
1965 {
1966 /* not found, so add it to the beginning. */
1967 if (scm_is_true (proc))
1968 {
1969 SCM new = scm_cons (scm_cons (chr, proc),
1970 scm_i_read_hash_procedures_ref ());
1971 scm_i_read_hash_procedures_set_x (new);
1972 }
1973 break;
1974 }
1975 if (scm_is_eq (chr, SCM_CAAR (this)))
1976 {
1977 /* already in the alist. */
1978 if (scm_is_false (proc))
1979 {
1980 /* remove it. */
1981 if (scm_is_false (prev))
1982 {
1983 SCM rest = SCM_CDR (scm_i_read_hash_procedures_ref ());
1984 scm_i_read_hash_procedures_set_x (rest);
1985 }
1986 else
1987 scm_set_cdr_x (prev, SCM_CDR (this));
1988 }
1989 else
1990 {
1991 /* replace it. */
1992 scm_set_cdr_x (SCM_CAR (this), proc);
1993 }
1994 break;
1995 }
1996 prev = this;
1997 this = SCM_CDR (this);
1998 }
1999
2000 return SCM_UNSPECIFIED;
2001 }
2002 #undef FUNC_NAME
2003
2004 /* Recover the read-hash procedure corresponding to char c. */
2005 static SCM
2006 scm_get_hash_procedure (int c)
2007 {
2008 SCM rest = scm_i_read_hash_procedures_ref ();
2009
2010 while (1)
2011 {
2012 if (scm_is_null (rest))
2013 return SCM_BOOL_F;
2014
2015 if (SCM_CHAR (SCM_CAAR (rest)) == c)
2016 return SCM_CDAR (rest);
2017
2018 rest = SCM_CDR (rest);
2019 }
2020 }
2021
2022 #define SCM_ENCODING_SEARCH_SIZE (500)
2023
2024 /* Search the first few hundred characters of a file for an Emacs-like coding
2025 declaration. Returns either NULL or a string whose storage has been
2026 allocated with `scm_gc_malloc ()'. */
2027 char *
2028 scm_i_scan_for_encoding (SCM port)
2029 {
2030 scm_t_port *pt;
2031 char header[SCM_ENCODING_SEARCH_SIZE+1];
2032 size_t bytes_read, encoding_length, i;
2033 char *encoding = NULL;
2034 char *pos, *encoding_start;
2035 int in_comment;
2036
2037 pt = SCM_PTAB_ENTRY (port);
2038
2039 if (pt->rw_active == SCM_PORT_WRITE)
2040 scm_flush (port);
2041
2042 if (pt->rw_random)
2043 pt->rw_active = SCM_PORT_READ;
2044
2045 if (pt->read_pos == pt->read_end)
2046 {
2047 /* We can use the read buffer, and thus avoid a seek. */
2048 if (scm_fill_input (port) == EOF)
2049 return NULL;
2050
2051 bytes_read = pt->read_end - pt->read_pos;
2052 if (bytes_read > SCM_ENCODING_SEARCH_SIZE)
2053 bytes_read = SCM_ENCODING_SEARCH_SIZE;
2054
2055 if (bytes_read <= 1)
2056 /* An unbuffered port -- don't scan. */
2057 return NULL;
2058
2059 memcpy (header, pt->read_pos, bytes_read);
2060 header[bytes_read] = '\0';
2061 }
2062 else
2063 {
2064 /* Try to read some bytes and then seek back. Not all ports
2065 support seeking back; and indeed some file ports (like
2066 /dev/urandom) will succeed on an lseek (fd, 0, SEEK_CUR)---the
2067 check performed by SCM_FPORT_FDES---but fail to seek
2068 backwards. Hence this block comes second. We prefer to use
2069 the read buffer in-place. */
2070 if (SCM_FPORTP (port) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port)))
2071 return NULL;
2072
2073 bytes_read = scm_c_read (port, header, SCM_ENCODING_SEARCH_SIZE);
2074 header[bytes_read] = '\0';
2075 scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
2076 }
2077
2078 /* search past "coding[:=]" */
2079 pos = header;
2080 while (1)
2081 {
2082 if ((pos = strstr(pos, "coding")) == NULL)
2083 return NULL;
2084
2085 pos += strlen("coding");
2086 if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
2087 (*pos == ':' || *pos == '='))
2088 {
2089 pos ++;
2090 break;
2091 }
2092 }
2093
2094 /* skip spaces */
2095 while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
2096 (*pos == ' ' || *pos == '\t'))
2097 pos ++;
2098
2099 /* grab the next token */
2100 encoding_start = pos;
2101 i = 0;
2102 while (encoding_start + i - header <= SCM_ENCODING_SEARCH_SIZE
2103 && encoding_start + i - header < bytes_read
2104 && (isalnum ((int) encoding_start[i])
2105 || strchr ("_-.:/,+=()", encoding_start[i]) != NULL))
2106 i++;
2107
2108 encoding_length = i;
2109 if (encoding_length == 0)
2110 return NULL;
2111
2112 encoding = scm_gc_strndup (encoding_start, encoding_length, "encoding");
2113 for (i = 0; i < encoding_length; i++)
2114 encoding[i] = toupper ((int) encoding[i]);
2115
2116 /* push backwards to make sure we were in a comment */
2117 in_comment = 0;
2118 pos = encoding_start;
2119 while (pos >= header)
2120 {
2121 if (*pos == ';')
2122 {
2123 in_comment = 1;
2124 break;
2125 }
2126 else if (*pos == '\n' || pos == header)
2127 {
2128 /* This wasn't in a semicolon comment. Check for a
2129 hash-bang comment. */
2130 char *beg = strstr (header, "#!");
2131 char *end = strstr (header, "!#");
2132 if (beg < encoding_start && encoding_start + encoding_length <= end)
2133 in_comment = 1;
2134 break;
2135 }
2136 else
2137 {
2138 pos --;
2139 continue;
2140 }
2141 }
2142 if (!in_comment)
2143 /* This wasn't in a comment */
2144 return NULL;
2145
2146 return encoding;
2147 }
2148
2149 SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
2150 (SCM port),
2151 "Scans the port for an Emacs-like character coding declaration\n"
2152 "near the top of the contents of a port with random-accessible contents.\n"
2153 "The coding declaration is of the form\n"
2154 "@code{coding: XXXXX} and must appear in a scheme comment.\n"
2155 "\n"
2156 "Returns a string containing the character encoding of the file\n"
2157 "if a declaration was found, or @code{#f} otherwise.\n")
2158 #define FUNC_NAME s_scm_file_encoding
2159 {
2160 char *enc;
2161 SCM s_enc;
2162
2163 SCM_VALIDATE_OPINPORT (SCM_ARG1, port);
2164
2165 enc = scm_i_scan_for_encoding (port);
2166 if (enc == NULL)
2167 return SCM_BOOL_F;
2168 else
2169 {
2170 s_enc = scm_from_locale_string (enc);
2171 return s_enc;
2172 }
2173
2174 return SCM_BOOL_F;
2175 }
2176 #undef FUNC_NAME
2177
2178 \f
2179 /* Per-port read options.
2180
2181 We store per-port read options in the 'port-read-options' port
2182 property, which is stored in the internal port structure. The value
2183 stored is a single integer that contains a two-bit field for each
2184 read option.
2185
2186 If a bit field contains READ_OPTION_INHERIT (3), that indicates that
2187 the applicable value should be inherited from the corresponding
2188 global read option. Otherwise, the bit field contains the value of
2189 the read option. For boolean read options that have been set
2190 per-port, the possible values are 0 or 1. If the 'keyword_style'
2191 read option has been set per-port, its possible values are those in
2192 'enum t_keyword_style'. */
2193
2194 /* Key to read options in port properties. */
2195 SCM_SYMBOL (sym_port_read_options, "port-read-options");
2196
2197 /* Offsets of bit fields for each per-port override */
2198 #define READ_OPTION_COPY_SOURCE_P 0
2199 #define READ_OPTION_RECORD_POSITIONS_P 2
2200 #define READ_OPTION_CASE_INSENSITIVE_P 4
2201 #define READ_OPTION_KEYWORD_STYLE 6
2202 #define READ_OPTION_R6RS_ESCAPES_P 8
2203 #define READ_OPTION_SQUARE_BRACKETS_P 10
2204 #define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12
2205 #define READ_OPTION_CURLY_INFIX_P 14
2206
2207 /* The total width in bits of the per-port overrides */
2208 #define READ_OPTIONS_NUM_BITS 16
2209
2210 #define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1)
2211 #define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL
2212
2213 #define READ_OPTION_MASK 3
2214 #define READ_OPTION_INHERIT 3
2215
2216 static void
2217 set_port_read_option (SCM port, int option, int new_value)
2218 {
2219 SCM scm_read_options;
2220 unsigned int read_options;
2221
2222 new_value &= READ_OPTION_MASK;
2223 scm_read_options = scm_i_port_property (port, sym_port_read_options);
2224 if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
2225 read_options = scm_to_uint (scm_read_options);
2226 else
2227 read_options = READ_OPTIONS_INHERIT_ALL;
2228 read_options &= ~(READ_OPTION_MASK << option);
2229 read_options |= new_value << option;
2230 scm_read_options = scm_from_uint (read_options);
2231 scm_i_set_port_property_x (port, sym_port_read_options, scm_read_options);
2232 }
2233
2234 /* Set OPTS and PORT's case-insensitivity according to VALUE. */
2235 static void
2236 set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts, int value)
2237 {
2238 value = !!value;
2239 opts->case_insensitive_p = value;
2240 set_port_read_option (port, READ_OPTION_CASE_INSENSITIVE_P, value);
2241 }
2242
2243 /* Set OPTS and PORT's square_brackets_p option according to VALUE. */
2244 static void
2245 set_port_square_brackets_p (SCM port, scm_t_read_opts *opts, int value)
2246 {
2247 value = !!value;
2248 opts->square_brackets_p = value;
2249 set_port_read_option (port, READ_OPTION_SQUARE_BRACKETS_P, value);
2250 }
2251
2252 /* Set OPTS and PORT's curly_infix_p option according to VALUE. */
2253 static void
2254 set_port_curly_infix_p (SCM port, scm_t_read_opts *opts, int value)
2255 {
2256 value = !!value;
2257 opts->curly_infix_p = value;
2258 set_port_read_option (port, READ_OPTION_CURLY_INFIX_P, value);
2259 }
2260
2261 /* Initialize OPTS based on PORT's read options and the global read
2262 options. */
2263 static void
2264 init_read_options (SCM port, scm_t_read_opts *opts)
2265 {
2266 SCM val, scm_read_options;
2267 unsigned int read_options, x;
2268
2269 scm_read_options = scm_i_port_property (port, sym_port_read_options);
2270
2271 if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
2272 read_options = scm_to_uint (scm_read_options);
2273 else
2274 read_options = READ_OPTIONS_INHERIT_ALL;
2275
2276 x = READ_OPTION_MASK & (read_options >> READ_OPTION_KEYWORD_STYLE);
2277 if (x == READ_OPTION_INHERIT)
2278 {
2279 val = SCM_PACK (SCM_KEYWORD_STYLE);
2280 if (scm_is_eq (val, scm_keyword_prefix))
2281 x = KEYWORD_STYLE_PREFIX;
2282 else if (scm_is_eq (val, scm_keyword_postfix))
2283 x = KEYWORD_STYLE_POSTFIX;
2284 else
2285 x = KEYWORD_STYLE_HASH_PREFIX;
2286 }
2287 opts->keyword_style = x;
2288
2289 #define RESOLVE_BOOLEAN_OPTION(NAME, name) \
2290 do \
2291 { \
2292 x = READ_OPTION_MASK & (read_options >> READ_OPTION_ ## NAME); \
2293 if (x == READ_OPTION_INHERIT) \
2294 x = !!SCM_ ## NAME; \
2295 opts->name = x; \
2296 } \
2297 while (0)
2298
2299 RESOLVE_BOOLEAN_OPTION (COPY_SOURCE_P, copy_source_p);
2300 RESOLVE_BOOLEAN_OPTION (RECORD_POSITIONS_P, record_positions_p);
2301 RESOLVE_BOOLEAN_OPTION (CASE_INSENSITIVE_P, case_insensitive_p);
2302 RESOLVE_BOOLEAN_OPTION (R6RS_ESCAPES_P, r6rs_escapes_p);
2303 RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P, square_brackets_p);
2304 RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p);
2305 RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p);
2306
2307 #undef RESOLVE_BOOLEAN_OPTION
2308
2309 opts->neoteric_p = 0;
2310 }
2311
2312 void
2313 scm_init_read ()
2314 {
2315 SCM read_hash_procs;
2316
2317 read_hash_procs = scm_make_fluid_with_default (SCM_EOL);
2318
2319 scm_i_read_hash_procedures =
2320 SCM_VARIABLE_LOC (scm_c_define ("%read-hash-procedures", read_hash_procs));
2321
2322 scm_init_opts (scm_read_options, scm_read_opts);
2323 #include "libguile/read.x"
2324 }
2325
2326 /*
2327 Local Variables:
2328 c-file-style: "gnu"
2329 End:
2330 */