Implement SRFI-105 curly infix expressions.
[bpt/guile.git] / libguile / read.c
1 /* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
2 * 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20
21 \f
22
23 #ifdef HAVE_CONFIG_H
24 # include <config.h>
25 #endif
26
27 #include <stdio.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <unicase.h>
32 #include <unictype.h>
33
34 #include "libguile/_scm.h"
35 #include "libguile/bytevectors.h"
36 #include "libguile/chars.h"
37 #include "libguile/eval.h"
38 #include "libguile/arrays.h"
39 #include "libguile/bitvectors.h"
40 #include "libguile/keywords.h"
41 #include "libguile/alist.h"
42 #include "libguile/srcprop.h"
43 #include "libguile/hashtab.h"
44 #include "libguile/hash.h"
45 #include "libguile/ports.h"
46 #include "libguile/fports.h"
47 #include "libguile/root.h"
48 #include "libguile/strings.h"
49 #include "libguile/strports.h"
50 #include "libguile/vectors.h"
51 #include "libguile/validate.h"
52 #include "libguile/srfi-4.h"
53 #include "libguile/srfi-13.h"
54
55 #include "libguile/read.h"
56 #include "libguile/private-options.h"
57
58
59 \f
60
61 SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
62 SCM_SYMBOL (scm_keyword_prefix, "prefix");
63 SCM_SYMBOL (scm_keyword_postfix, "postfix");
64 SCM_SYMBOL (sym_nil, "nil");
65
66 /* SRFI-105 curly infix expression support */
67 SCM_SYMBOL (sym_nfx, "$nfx$");
68 SCM_SYMBOL (sym_bracket_list, "$bracket-list$");
69 SCM_SYMBOL (sym_bracket_apply, "$bracket-apply$");
70
71 scm_t_option scm_read_opts[] =
72 {
73 { SCM_OPTION_BOOLEAN, "copy", 0,
74 "Copy source code expressions." },
75 { SCM_OPTION_BOOLEAN, "positions", 1,
76 "Record positions of source code expressions." },
77 { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
78 "Convert symbols to lower case."},
79 { SCM_OPTION_SCM, "keywords", (scm_t_bits) SCM_BOOL_F_BITS,
80 "Style of keyword recognition: #f, 'prefix or 'postfix."},
81 { SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
82 "Use R6RS variable-length character and string hex escapes."},
83 { SCM_OPTION_BOOLEAN, "square-brackets", 1,
84 "Treat `[' and `]' as parentheses, for R6RS compatibility."},
85 { SCM_OPTION_BOOLEAN, "hungry-eol-escapes", 0,
86 "In strings, consume leading whitespace after an escaped end-of-line."},
87 { SCM_OPTION_BOOLEAN, "curly-infix", 0,
88 "Support SRFI-105 curly infix expressions."},
89 { 0, },
90 };
91
92 /* Internal read options structure. This is initialized by 'scm_read'
93 from the global and per-port read options, and a pointer is passed
94 down to all helper functions. */
95
96 enum t_keyword_style
97 {
98 KEYWORD_STYLE_HASH_PREFIX,
99 KEYWORD_STYLE_PREFIX,
100 KEYWORD_STYLE_POSTFIX
101 };
102
103 struct t_read_opts
104 {
105 enum t_keyword_style keyword_style;
106 unsigned int copy_source_p : 1;
107 unsigned int record_positions_p : 1;
108 unsigned int case_insensitive_p : 1;
109 unsigned int r6rs_escapes_p : 1;
110 unsigned int square_brackets_p : 1;
111 unsigned int hungry_eol_escapes_p : 1;
112 unsigned int curly_infix_p : 1;
113 unsigned int neoteric_p : 1;
114 };
115
116 typedef struct t_read_opts scm_t_read_opts;
117
118
119 /*
120 Give meaningful error messages for errors
121
122 We use the format
123
124 FILE:LINE:COL: MESSAGE
125 This happened in ....
126
127 This is not standard GNU format, but the test-suite likes the real
128 message to be in front.
129
130 */
131
132
133 void
134 scm_i_input_error (char const *function,
135 SCM port, const char *message, SCM arg)
136 {
137 SCM fn = (scm_is_string (SCM_FILENAME(port))
138 ? SCM_FILENAME(port)
139 : scm_from_locale_string ("#<unknown port>"));
140
141 SCM string_port = scm_open_output_string ();
142 SCM string = SCM_EOL;
143 scm_simple_format (string_port,
144 scm_from_locale_string ("~A:~S:~S: ~A"),
145 scm_list_4 (fn,
146 scm_from_long (SCM_LINUM (port) + 1),
147 scm_from_int (SCM_COL (port) + 1),
148 scm_from_locale_string (message)));
149
150 string = scm_get_output_string (string_port);
151 scm_close_output_port (string_port);
152 scm_error_scm (scm_from_latin1_symbol ("read-error"),
153 function? scm_from_locale_string (function) : SCM_BOOL_F,
154 string,
155 arg,
156 SCM_BOOL_F);
157 }
158
159
160 SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
161 (SCM setting),
162 "Option interface for the read options. Instead of using\n"
163 "this procedure directly, use the procedures @code{read-enable},\n"
164 "@code{read-disable}, @code{read-set!} and @code{read-options}.")
165 #define FUNC_NAME s_scm_read_options
166 {
167 SCM ans = scm_options (setting,
168 scm_read_opts,
169 FUNC_NAME);
170 if (SCM_COPY_SOURCE_P)
171 SCM_RECORD_POSITIONS_P = 1;
172 return ans;
173 }
174 #undef FUNC_NAME
175
176 /* A fluid referring to an association list mapping extra hash
177 characters to procedures. */
178 static SCM *scm_i_read_hash_procedures;
179
180 static SCM
181 scm_i_read_hash_procedures_ref (void)
182 {
183 return scm_fluid_ref (*scm_i_read_hash_procedures);
184 }
185
186 static void
187 scm_i_read_hash_procedures_set_x (SCM value)
188 {
189 scm_fluid_set_x (*scm_i_read_hash_procedures, value);
190 }
191
192 \f
193 /* Token readers. */
194
195
196 /* Size of the C buffer used to read symbols and numbers. */
197 #define READER_BUFFER_SIZE 128
198
199 /* Number of 32-bit codepoints in the buffer used to read strings. */
200 #define READER_STRING_BUFFER_SIZE 128
201
202 /* The maximum size of Scheme character names. */
203 #define READER_CHAR_NAME_MAX_SIZE 50
204
205 /* The maximum size of reader directive names. */
206 #define READER_DIRECTIVE_NAME_MAX_SIZE 50
207
208
209 /* `isblank' is only in C99. */
210 #define CHAR_IS_BLANK_(_chr) \
211 (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
212 || ((_chr) == '\f') || ((_chr) == '\r'))
213
214 #ifdef MSDOS
215 # define CHAR_IS_BLANK(_chr) \
216 ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
217 #else
218 # define CHAR_IS_BLANK CHAR_IS_BLANK_
219 #endif
220
221
222 /* R5RS one-character delimiters (see section 7.1.1, ``Lexical
223 structure''). */
224 #define CHAR_IS_R5RS_DELIMITER(c) \
225 (CHAR_IS_BLANK (c) \
226 || (c) == ')' || (c) == '(' || (c) == ';' || (c) == '"')
227
228 #define CHAR_IS_DELIMITER(c) \
229 (CHAR_IS_R5RS_DELIMITER (c) \
230 || (((c) == ']' || (c) == '[') && (opts->square_brackets_p \
231 || opts->curly_infix_p)) \
232 || (((c) == '}' || (c) == '{') && opts->curly_infix_p))
233
234 /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
235 Structure''. */
236 #define CHAR_IS_EXPONENT_MARKER(_chr) \
237 (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
238 || ((_chr) == 'd') || ((_chr) == 'l'))
239
240 /* Read an SCSH block comment. */
241 static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
242 static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM);
243 static SCM scm_read_commented_expression (scm_t_wchar, SCM, scm_t_read_opts *);
244 static SCM scm_read_shebang (scm_t_wchar, SCM, scm_t_read_opts *);
245 static SCM scm_get_hash_procedure (int);
246
247 /* Read from PORT until a delimiter (e.g., a whitespace) is read. Put the
248 result in the pre-allocated buffer BUF. Return zero if the whole token has
249 fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
250 bytes actually read. */
251 static int
252 read_token (SCM port, scm_t_read_opts *opts,
253 char *buf, size_t buf_size, size_t *read)
254 {
255 *read = 0;
256
257 while (*read < buf_size)
258 {
259 int chr;
260
261 chr = scm_get_byte_or_eof (port);
262
263 if (chr == EOF)
264 return 0;
265 else if (CHAR_IS_DELIMITER (chr))
266 {
267 scm_unget_byte (chr, port);
268 return 0;
269 }
270 else
271 {
272 *buf = (char) chr;
273 buf++, (*read)++;
274 }
275 }
276
277 return 1;
278 }
279
280 /* Like `read_token', but return either BUFFER, or a GC-allocated buffer
281 if the token doesn't fit in BUFFER_SIZE bytes. */
282 static char *
283 read_complete_token (SCM port, scm_t_read_opts *opts,
284 char *buffer, size_t buffer_size, size_t *read)
285 {
286 int overflow = 0;
287 size_t bytes_read, overflow_size = 0;
288 char *overflow_buffer = NULL;
289
290 do
291 {
292 overflow = read_token (port, opts, buffer, buffer_size, &bytes_read);
293 if (bytes_read == 0)
294 break;
295 if (overflow || overflow_size != 0)
296 {
297 if (overflow_size == 0)
298 {
299 overflow_buffer = scm_gc_malloc_pointerless (bytes_read, "read");
300 memcpy (overflow_buffer, buffer, bytes_read);
301 overflow_size = bytes_read;
302 }
303 else
304 {
305 char *new_buf =
306 scm_gc_malloc_pointerless (overflow_size + bytes_read, "read");
307
308 memcpy (new_buf, overflow_buffer, overflow_size);
309 memcpy (new_buf + overflow_size, buffer, bytes_read);
310
311 overflow_buffer = new_buf;
312 overflow_size += bytes_read;
313 }
314 }
315 }
316 while (overflow);
317
318 if (overflow_size)
319 *read = overflow_size;
320 else
321 *read = bytes_read;
322
323 return (overflow_size > 0 ? overflow_buffer : buffer);
324 }
325
326 /* Skip whitespace from PORT and return the first non-whitespace character
327 read. Raise an error on end-of-file. */
328 static int
329 flush_ws (SCM port, scm_t_read_opts *opts, const char *eoferr)
330 {
331 scm_t_wchar c;
332 while (1)
333 switch (c = scm_getc (port))
334 {
335 case EOF:
336 goteof:
337 if (eoferr)
338 {
339 scm_i_input_error (eoferr,
340 port,
341 "end of file",
342 SCM_EOL);
343 }
344 return c;
345
346 case ';':
347 lp:
348 switch (c = scm_getc (port))
349 {
350 case EOF:
351 goto goteof;
352 default:
353 goto lp;
354 case SCM_LINE_INCREMENTORS:
355 break;
356 }
357 break;
358
359 case '#':
360 switch (c = scm_getc (port))
361 {
362 case EOF:
363 eoferr = "read_sharp";
364 goto goteof;
365 case '!':
366 scm_read_shebang (c, port, opts);
367 break;
368 case ';':
369 scm_read_commented_expression (c, port, opts);
370 break;
371 case '|':
372 if (scm_is_false (scm_get_hash_procedure (c)))
373 {
374 scm_read_r6rs_block_comment (c, port);
375 break;
376 }
377 /* fall through */
378 default:
379 scm_ungetc (c, port);
380 return '#';
381 }
382 break;
383
384 case SCM_LINE_INCREMENTORS:
385 case SCM_SINGLE_SPACES:
386 case '\t':
387 break;
388
389 default:
390 return c;
391 }
392
393 return 0;
394 }
395
396
397 \f
398 /* Token readers. */
399
400 static SCM scm_read_expression (SCM port, scm_t_read_opts *opts);
401 static SCM scm_read_sharp (int chr, SCM port, scm_t_read_opts *opts,
402 long line, int column);
403
404
405 static SCM
406 maybe_annotate_source (SCM x, SCM port, scm_t_read_opts *opts,
407 long line, int column)
408 {
409 if (opts->record_positions_p)
410 scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port));
411 return x;
412 }
413
414 static SCM
415 scm_read_sexp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
416 #define FUNC_NAME "scm_i_lreadparen"
417 {
418 int c;
419 SCM tmp, tl, ans = SCM_EOL;
420 const int curly_list_p = (chr == '{') && opts->curly_infix_p;
421 const int terminating_char = ((chr == '{') ? '}'
422 : ((chr == '[') ? ']'
423 : ')'));
424
425 /* Need to capture line and column numbers here. */
426 long line = SCM_LINUM (port);
427 int column = SCM_COL (port) - 1;
428
429 c = flush_ws (port, opts, FUNC_NAME);
430 if (terminating_char == c)
431 return SCM_EOL;
432
433 scm_ungetc (c, port);
434 tmp = scm_read_expression (port, opts);
435
436 /* Note that it is possible for scm_read_expression to return
437 scm_sym_dot, but not as part of a dotted pair: as in #{.}#. So
438 check that it's a real dot by checking `c'. */
439 if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
440 {
441 ans = scm_read_expression (port, opts);
442 if (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
443 scm_i_input_error (FUNC_NAME, port, "missing close paren",
444 SCM_EOL);
445 return ans;
446 }
447
448 /* Build the head of the list structure. */
449 ans = tl = scm_cons (tmp, SCM_EOL);
450
451 while (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
452 {
453 SCM new_tail;
454
455 if (c == ')' || (c == ']' && opts->square_brackets_p)
456 || ((c == '}' || c == ']') && opts->curly_infix_p))
457 scm_i_input_error (FUNC_NAME, port,
458 "in pair: mismatched close paren: ~A",
459 scm_list_1 (SCM_MAKE_CHAR (c)));
460
461 scm_ungetc (c, port);
462 tmp = scm_read_expression (port, opts);
463
464 /* See above note about scm_sym_dot. */
465 if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
466 {
467 SCM_SETCDR (tl, scm_read_expression (port, opts));
468
469 c = flush_ws (port, opts, FUNC_NAME);
470 if (terminating_char != c)
471 scm_i_input_error (FUNC_NAME, port,
472 "in pair: missing close paren", SCM_EOL);
473 break;
474 }
475
476 new_tail = scm_cons (tmp, SCM_EOL);
477 SCM_SETCDR (tl, new_tail);
478 tl = new_tail;
479 }
480
481 if (curly_list_p)
482 {
483 /* In addition to finding the length, 'scm_ilength' checks for
484 improper or circular lists, in which case it returns -1. */
485 int len = scm_ilength (ans);
486
487 /* The (len == 0) case is handled above */
488 if (len == 1)
489 /* Return directly to avoid re-annotating the element's source
490 location with the position of the outer brace. Also, it
491 might not be possible to annotate the element. */
492 return scm_car (ans); /* {e} => e */
493 else if (len == 2)
494 ; /* Leave the list unchanged: {e1 e2} => (e1 e2) */
495 else if (len >= 3 && (len & 1))
496 {
497 /* It's a proper list whose length is odd and at least 3. If
498 the elements at odd indices (the infix operator positions)
499 are all 'equal?', then it's a simple curly-infix list.
500 Otherwise it's a mixed curly-infix list. */
501 SCM op = scm_cadr (ans);
502
503 /* Check to see if the elements at odd indices are 'equal?' */
504 for (tl = scm_cdddr (ans); ; tl = scm_cddr (tl))
505 {
506 if (scm_is_null (tl))
507 {
508 /* Convert simple curly-infix list to prefix:
509 {a <op> b <op> ...} => (<op> a b ...) */
510 tl = ans;
511 while (scm_is_pair (scm_cdr (tl)))
512 {
513 tmp = scm_cddr (tl);
514 SCM_SETCDR (tl, tmp);
515 tl = tmp;
516 }
517 ans = scm_cons (op, ans);
518 break;
519 }
520 else if (scm_is_false (scm_equal_p (op, scm_car (tl))))
521 {
522 /* Mixed curly-infix list: {e ...} => ($nfx$ e ...) */
523 ans = scm_cons (sym_nfx, ans);
524 break;
525 }
526 }
527 }
528 else
529 /* Mixed curly-infix (possibly improper) list:
530 {e . tail} => ($nfx$ e . tail) */
531 ans = scm_cons (sym_nfx, ans);
532 }
533
534 return maybe_annotate_source (ans, port, opts, line, column);
535 }
536 #undef FUNC_NAME
537
538
539 /* Read a hexadecimal number NDIGITS in length. Put its value into the variable
540 C. If TERMINATOR is non-null, terminate early if the TERMINATOR character is
541 found. */
542 #define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
543 do \
544 { \
545 scm_t_wchar a; \
546 size_t i = 0; \
547 c = 0; \
548 while (i < ndigits) \
549 { \
550 a = scm_getc (port); \
551 if (a == EOF) \
552 goto str_eof; \
553 if (terminator \
554 && (a == (scm_t_wchar) terminator) \
555 && (i > 0)) \
556 break; \
557 if ('0' <= a && a <= '9') \
558 a -= '0'; \
559 else if ('A' <= a && a <= 'F') \
560 a = a - 'A' + 10; \
561 else if ('a' <= a && a <= 'f') \
562 a = a - 'a' + 10; \
563 else \
564 { \
565 c = a; \
566 goto bad_escaped; \
567 } \
568 c = c * 16 + a; \
569 i ++; \
570 } \
571 } while (0)
572
573 static void
574 skip_intraline_whitespace (SCM port)
575 {
576 scm_t_wchar c;
577
578 do
579 {
580 c = scm_getc (port);
581 if (c == EOF)
582 return;
583 }
584 while (c == '\t' || uc_is_general_category (c, UC_SPACE_SEPARATOR));
585
586 scm_ungetc (c, port);
587 }
588
589 static SCM
590 scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
591 #define FUNC_NAME "scm_lreadr"
592 {
593 /* For strings smaller than C_STR, this function creates only one Scheme
594 object (the string returned). */
595
596 SCM str = SCM_EOL;
597 size_t c_str_len = 0;
598 scm_t_wchar c, c_str[READER_STRING_BUFFER_SIZE];
599
600 /* Need to capture line and column numbers here. */
601 long line = SCM_LINUM (port);
602 int column = SCM_COL (port) - 1;
603
604 while ('"' != (c = scm_getc (port)))
605 {
606 if (c == EOF)
607 {
608 str_eof:
609 scm_i_input_error (FUNC_NAME, port,
610 "end of file in string constant", SCM_EOL);
611 }
612
613 if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE)
614 {
615 str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
616 c_str_len = 0;
617 }
618
619 if (c == '\\')
620 {
621 switch (c = scm_getc (port))
622 {
623 case EOF:
624 goto str_eof;
625 case '"':
626 case '\\':
627 break;
628 case '\n':
629 if (opts->hungry_eol_escapes_p)
630 skip_intraline_whitespace (port);
631 continue;
632 case '0':
633 c = '\0';
634 break;
635 case 'f':
636 c = '\f';
637 break;
638 case 'n':
639 c = '\n';
640 break;
641 case 'r':
642 c = '\r';
643 break;
644 case 't':
645 c = '\t';
646 break;
647 case 'a':
648 c = '\007';
649 break;
650 case 'v':
651 c = '\v';
652 break;
653 case 'b':
654 c = '\010';
655 break;
656 case 'x':
657 if (opts->r6rs_escapes_p)
658 SCM_READ_HEX_ESCAPE (10, ';');
659 else
660 SCM_READ_HEX_ESCAPE (2, '\0');
661 break;
662 case 'u':
663 if (!opts->r6rs_escapes_p)
664 {
665 SCM_READ_HEX_ESCAPE (4, '\0');
666 break;
667 }
668 case 'U':
669 if (!opts->r6rs_escapes_p)
670 {
671 SCM_READ_HEX_ESCAPE (6, '\0');
672 break;
673 }
674 default:
675 bad_escaped:
676 scm_i_input_error (FUNC_NAME, port,
677 "illegal character in escape sequence: ~S",
678 scm_list_1 (SCM_MAKE_CHAR (c)));
679 }
680 }
681
682 c_str[c_str_len++] = c;
683 }
684
685 if (scm_is_null (str))
686 /* Fast path: we got a string that fits in C_STR. */
687 str = scm_from_utf32_stringn (c_str, c_str_len);
688 else
689 {
690 if (c_str_len > 0)
691 str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
692
693 str = scm_string_concatenate_reverse (str, SCM_UNDEFINED, SCM_UNDEFINED);
694 }
695
696 return maybe_annotate_source (str, port, opts, line, column);
697 }
698 #undef FUNC_NAME
699
700
701 static SCM
702 scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
703 {
704 SCM result, str = SCM_EOL;
705 char local_buffer[READER_BUFFER_SIZE], *buffer;
706 size_t bytes_read;
707 scm_t_port *pt = SCM_PTAB_ENTRY (port);
708
709 /* Need to capture line and column numbers here. */
710 long line = SCM_LINUM (port);
711 int column = SCM_COL (port) - 1;
712
713 scm_ungetc (chr, port);
714 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
715 &bytes_read);
716
717 str = scm_from_stringn (buffer, bytes_read, pt->encoding, pt->ilseq_handler);
718
719 result = scm_string_to_number (str, SCM_UNDEFINED);
720 if (scm_is_false (result))
721 {
722 /* Return a symbol instead of a number */
723 if (opts->case_insensitive_p)
724 str = scm_string_downcase_x (str);
725 result = scm_string_to_symbol (str);
726 }
727 else if (SCM_NIMP (result))
728 result = maybe_annotate_source (result, port, opts, line, column);
729
730 SCM_COL (port) += scm_i_string_length (str);
731 return result;
732 }
733
734 static SCM
735 scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
736 {
737 SCM result;
738 int ends_with_colon = 0;
739 size_t bytes_read;
740 int postfix = (opts->keyword_style == KEYWORD_STYLE_POSTFIX);
741 char local_buffer[READER_BUFFER_SIZE], *buffer;
742 scm_t_port *pt = SCM_PTAB_ENTRY (port);
743 SCM str;
744
745 scm_ungetc (chr, port);
746 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
747 &bytes_read);
748 if (bytes_read > 0)
749 ends_with_colon = buffer[bytes_read - 1] == ':';
750
751 if (postfix && ends_with_colon && (bytes_read > 1))
752 {
753 str = scm_from_stringn (buffer, bytes_read - 1,
754 pt->encoding, pt->ilseq_handler);
755
756 if (opts->case_insensitive_p)
757 str = scm_string_downcase_x (str);
758 result = scm_symbol_to_keyword (scm_string_to_symbol (str));
759 }
760 else
761 {
762 str = scm_from_stringn (buffer, bytes_read,
763 pt->encoding, pt->ilseq_handler);
764
765 if (opts->case_insensitive_p)
766 str = scm_string_downcase_x (str);
767 result = scm_string_to_symbol (str);
768 }
769
770 SCM_COL (port) += scm_i_string_length (str);
771 return result;
772 }
773
774 static SCM
775 scm_read_number_and_radix (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
776 #define FUNC_NAME "scm_lreadr"
777 {
778 SCM result;
779 size_t read;
780 char local_buffer[READER_BUFFER_SIZE], *buffer;
781 unsigned int radix;
782 SCM str;
783 scm_t_port *pt;
784
785 switch (chr)
786 {
787 case 'B':
788 case 'b':
789 radix = 2;
790 break;
791
792 case 'o':
793 case 'O':
794 radix = 8;
795 break;
796
797 case 'd':
798 case 'D':
799 radix = 10;
800 break;
801
802 case 'x':
803 case 'X':
804 radix = 16;
805 break;
806
807 default:
808 scm_ungetc (chr, port);
809 scm_ungetc ('#', port);
810 radix = 10;
811 }
812
813 buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
814 &read);
815
816 pt = SCM_PTAB_ENTRY (port);
817 str = scm_from_stringn (buffer, read, pt->encoding, pt->ilseq_handler);
818
819 result = scm_string_to_number (str, scm_from_uint (radix));
820
821 SCM_COL (port) += scm_i_string_length (str);
822
823 if (scm_is_true (result))
824 return result;
825
826 scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
827
828 return SCM_BOOL_F;
829 }
830 #undef FUNC_NAME
831
832 static SCM
833 scm_read_quote (int chr, SCM port, scm_t_read_opts *opts)
834 {
835 SCM p;
836 long line = SCM_LINUM (port);
837 int column = SCM_COL (port) - 1;
838
839 switch (chr)
840 {
841 case '`':
842 p = scm_sym_quasiquote;
843 break;
844
845 case '\'':
846 p = scm_sym_quote;
847 break;
848
849 case ',':
850 {
851 scm_t_wchar c;
852
853 c = scm_getc (port);
854 if ('@' == c)
855 p = scm_sym_uq_splicing;
856 else
857 {
858 scm_ungetc (c, port);
859 p = scm_sym_unquote;
860 }
861 break;
862 }
863
864 default:
865 fprintf (stderr, "%s: unhandled quote character (%i)\n",
866 "scm_read_quote", chr);
867 abort ();
868 }
869
870 p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
871 return maybe_annotate_source (p, port, opts, line, column);
872 }
873
874 SCM_SYMBOL (sym_syntax, "syntax");
875 SCM_SYMBOL (sym_quasisyntax, "quasisyntax");
876 SCM_SYMBOL (sym_unsyntax, "unsyntax");
877 SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing");
878
879 static SCM
880 scm_read_syntax (int chr, SCM port, scm_t_read_opts *opts)
881 {
882 SCM p;
883 long line = SCM_LINUM (port);
884 int column = SCM_COL (port) - 1;
885
886 switch (chr)
887 {
888 case '`':
889 p = sym_quasisyntax;
890 break;
891
892 case '\'':
893 p = sym_syntax;
894 break;
895
896 case ',':
897 {
898 int c;
899
900 c = scm_getc (port);
901 if ('@' == c)
902 p = sym_unsyntax_splicing;
903 else
904 {
905 scm_ungetc (c, port);
906 p = sym_unsyntax;
907 }
908 break;
909 }
910
911 default:
912 fprintf (stderr, "%s: unhandled syntax character (%i)\n",
913 "scm_read_syntax", chr);
914 abort ();
915 }
916
917 p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
918 return maybe_annotate_source (p, port, opts, line, column);
919 }
920
921 static SCM
922 scm_read_nil (int chr, SCM port, scm_t_read_opts *opts)
923 {
924 SCM id = scm_read_mixed_case_symbol (chr, port, opts);
925
926 if (!scm_is_eq (id, sym_nil))
927 scm_i_input_error ("scm_read_nil", port,
928 "unexpected input while reading #nil: ~a",
929 scm_list_1 (id));
930
931 return SCM_ELISP_NIL;
932 }
933
934 static SCM
935 scm_read_semicolon_comment (int chr, SCM port)
936 {
937 int c;
938
939 /* We use the get_byte here because there is no need to get the
940 locale correct with comment input. This presumes that newline
941 always represents itself no matter what the encoding is. */
942 for (c = scm_get_byte_or_eof (port);
943 (c != EOF) && (c != '\n');
944 c = scm_get_byte_or_eof (port));
945
946 return SCM_UNSPECIFIED;
947 }
948
949 \f
950 /* Sharp readers, i.e. readers called after a `#' sign has been read. */
951
952 static SCM
953 scm_read_boolean (int chr, SCM port)
954 {
955 switch (chr)
956 {
957 case 't':
958 case 'T':
959 return SCM_BOOL_T;
960
961 case 'f':
962 case 'F':
963 return SCM_BOOL_F;
964 }
965
966 return SCM_UNSPECIFIED;
967 }
968
969 static SCM
970 scm_read_character (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
971 #define FUNC_NAME "scm_lreadr"
972 {
973 char buffer[READER_CHAR_NAME_MAX_SIZE];
974 SCM charname;
975 size_t charname_len, bytes_read;
976 scm_t_wchar cp;
977 int overflow;
978 scm_t_port *pt;
979
980 overflow = read_token (port, opts, buffer, READER_CHAR_NAME_MAX_SIZE,
981 &bytes_read);
982 if (overflow)
983 scm_i_input_error (FUNC_NAME, port, "character name too long", SCM_EOL);
984
985 if (bytes_read == 0)
986 {
987 chr = scm_getc (port);
988 if (chr == EOF)
989 scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
990 "while reading character", SCM_EOL);
991
992 /* CHR must be a token delimiter, like a whitespace. */
993 return (SCM_MAKE_CHAR (chr));
994 }
995
996 pt = SCM_PTAB_ENTRY (port);
997
998 /* Simple ASCII characters can be processed immediately. Also, simple
999 ISO-8859-1 characters can be processed immediately if the encoding for this
1000 port is ISO-8859-1. */
1001 if (bytes_read == 1 && ((unsigned char) buffer[0] <= 127 || pt->encoding == NULL))
1002 {
1003 SCM_COL (port) += 1;
1004 return SCM_MAKE_CHAR (buffer[0]);
1005 }
1006
1007 /* Otherwise, convert the buffer into a proper scheme string for
1008 processing. */
1009 charname = scm_from_stringn (buffer, bytes_read, pt->encoding,
1010 pt->ilseq_handler);
1011 charname_len = scm_i_string_length (charname);
1012 SCM_COL (port) += charname_len;
1013 cp = scm_i_string_ref (charname, 0);
1014 if (charname_len == 1)
1015 return SCM_MAKE_CHAR (cp);
1016
1017 /* Ignore dotted circles, which may be used to keep combining characters from
1018 combining with the backslash in #\charname. */
1019 if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2)
1020 return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1));
1021
1022 if (cp >= '0' && cp < '8')
1023 {
1024 /* Dirk:FIXME:: This type of character syntax is not R5RS
1025 * compliant. Further, it should be verified that the constant
1026 * does only consist of octal digits. */
1027 SCM p = scm_string_to_number (charname, scm_from_uint (8));
1028 if (SCM_I_INUMP (p))
1029 {
1030 scm_t_wchar c = scm_to_uint32 (p);
1031 if (SCM_IS_UNICODE_CHAR (c))
1032 return SCM_MAKE_CHAR (c);
1033 else
1034 scm_i_input_error (FUNC_NAME, port,
1035 "out-of-range octal character escape: ~a",
1036 scm_list_1 (charname));
1037 }
1038 }
1039
1040 if (cp == 'x' && (charname_len > 1))
1041 {
1042 SCM p;
1043
1044 /* Convert from hex, skipping the initial 'x' character in CHARNAME */
1045 p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
1046 scm_from_uint (16));
1047 if (SCM_I_INUMP (p))
1048 {
1049 scm_t_wchar c = scm_to_uint32 (p);
1050 if (SCM_IS_UNICODE_CHAR (c))
1051 return SCM_MAKE_CHAR (c);
1052 else
1053 scm_i_input_error (FUNC_NAME, port,
1054 "out-of-range hex character escape: ~a",
1055 scm_list_1 (charname));
1056 }
1057 }
1058
1059 /* The names of characters should never have non-Latin1
1060 characters. */
1061 if (scm_i_is_narrow_string (charname)
1062 || scm_i_try_narrow_string (charname))
1063 { SCM ch = scm_i_charname_to_char (scm_i_string_chars (charname),
1064 charname_len);
1065 if (scm_is_true (ch))
1066 return ch;
1067 }
1068
1069 scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
1070 scm_list_1 (charname));
1071
1072 return SCM_UNSPECIFIED;
1073 }
1074 #undef FUNC_NAME
1075
1076 static SCM
1077 scm_read_keyword (int chr, SCM port, scm_t_read_opts *opts)
1078 {
1079 SCM symbol;
1080
1081 /* Read the symbol that comprises the keyword. Doing this instead of
1082 invoking a specific symbol reader function allows `scm_read_keyword ()'
1083 to adapt to the delimiters currently valid of symbols.
1084
1085 XXX: This implementation allows sloppy syntaxes like `#: key'. */
1086 symbol = scm_read_expression (port, opts);
1087 if (!scm_is_symbol (symbol))
1088 scm_i_input_error ("scm_read_keyword", port,
1089 "keyword prefix `~a' not followed by a symbol: ~s",
1090 scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
1091
1092 return (scm_symbol_to_keyword (symbol));
1093 }
1094
1095 static SCM
1096 scm_read_vector (int chr, SCM port, scm_t_read_opts *opts,
1097 long line, int column)
1098 {
1099 /* Note: We call `scm_read_sexp ()' rather than READER here in order to
1100 guarantee that it's going to do what we want. After all, this is an
1101 implementation detail of `scm_read_vector ()', not a desirable
1102 property. */
1103 return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port, opts)),
1104 port, opts, line, column);
1105 }
1106
1107 /* Helper used by scm_read_array */
1108 static int
1109 read_decimal_integer (SCM port, int c, ssize_t *resp)
1110 {
1111 ssize_t sign = 1;
1112 ssize_t res = 0;
1113 int got_it = 0;
1114
1115 if (c == '-')
1116 {
1117 sign = -1;
1118 c = scm_getc (port);
1119 }
1120
1121 while ('0' <= c && c <= '9')
1122 {
1123 res = 10*res + c-'0';
1124 got_it = 1;
1125 c = scm_getc (port);
1126 }
1127
1128 if (got_it)
1129 *resp = sign * res;
1130 return c;
1131 }
1132
1133 /* Read an array. This function can also read vectors and uniform
1134 vectors. Also, the conflict between '#f' and '#f32' and '#f64' is
1135 handled here.
1136
1137 C is the first character read after the '#'.
1138 */
1139 static SCM
1140 scm_read_array (int c, SCM port, scm_t_read_opts *opts, long line, int column)
1141 {
1142 ssize_t rank;
1143 scm_t_wchar tag_buf[8];
1144 int tag_len;
1145
1146 SCM tag, shape = SCM_BOOL_F, elements, array;
1147
1148 /* XXX - shortcut for ordinary vectors. Shouldn't be necessary but
1149 the array code can not deal with zero-length dimensions yet, and
1150 we want to allow zero-length vectors, of course.
1151 */
1152 if (c == '(')
1153 return scm_read_vector (c, port, opts, line, column);
1154
1155 /* Disambiguate between '#f' and uniform floating point vectors.
1156 */
1157 if (c == 'f')
1158 {
1159 c = scm_getc (port);
1160 if (c != '3' && c != '6')
1161 {
1162 if (c != EOF)
1163 scm_ungetc (c, port);
1164 return SCM_BOOL_F;
1165 }
1166 rank = 1;
1167 tag_buf[0] = 'f';
1168 tag_len = 1;
1169 goto continue_reading_tag;
1170 }
1171
1172 /* Read rank. */
1173 rank = 1;
1174 c = read_decimal_integer (port, c, &rank);
1175 if (rank < 0)
1176 scm_i_input_error (NULL, port, "array rank must be non-negative",
1177 SCM_EOL);
1178
1179 /* Read tag. */
1180 tag_len = 0;
1181 continue_reading_tag:
1182 while (c != EOF && c != '(' && c != '@' && c != ':'
1183 && tag_len < sizeof tag_buf / sizeof tag_buf[0])
1184 {
1185 tag_buf[tag_len++] = c;
1186 c = scm_getc (port);
1187 }
1188 if (tag_len == 0)
1189 tag = SCM_BOOL_T;
1190 else
1191 {
1192 tag = scm_string_to_symbol (scm_from_utf32_stringn (tag_buf, tag_len));
1193 if (tag_len == sizeof tag_buf / sizeof tag_buf[0])
1194 scm_i_input_error (NULL, port, "invalid array tag, starting with: ~a",
1195 scm_list_1 (tag));
1196 }
1197
1198 /* Read shape. */
1199 if (c == '@' || c == ':')
1200 {
1201 shape = SCM_EOL;
1202
1203 do
1204 {
1205 ssize_t lbnd = 0, len = 0;
1206 SCM s;
1207
1208 if (c == '@')
1209 {
1210 c = scm_getc (port);
1211 c = read_decimal_integer (port, c, &lbnd);
1212 }
1213
1214 s = scm_from_ssize_t (lbnd);
1215
1216 if (c == ':')
1217 {
1218 c = scm_getc (port);
1219 c = read_decimal_integer (port, c, &len);
1220 if (len < 0)
1221 scm_i_input_error (NULL, port,
1222 "array length must be non-negative",
1223 SCM_EOL);
1224
1225 s = scm_list_2 (s, scm_from_ssize_t (lbnd+len-1));
1226 }
1227
1228 shape = scm_cons (s, shape);
1229 } while (c == '@' || c == ':');
1230
1231 shape = scm_reverse_x (shape, SCM_EOL);
1232 }
1233
1234 /* Read nested lists of elements. */
1235 if (c != '(')
1236 scm_i_input_error (NULL, port,
1237 "missing '(' in vector or array literal",
1238 SCM_EOL);
1239 elements = scm_read_sexp (c, port, opts);
1240
1241 if (scm_is_false (shape))
1242 shape = scm_from_ssize_t (rank);
1243 else if (scm_ilength (shape) != rank)
1244 scm_i_input_error
1245 (NULL, port,
1246 "the number of shape specifications must match the array rank",
1247 SCM_EOL);
1248
1249 /* Handle special print syntax of rank zero arrays; see
1250 scm_i_print_array for a rationale. */
1251 if (rank == 0)
1252 {
1253 if (!scm_is_pair (elements))
1254 scm_i_input_error (NULL, port,
1255 "too few elements in array literal, need 1",
1256 SCM_EOL);
1257 if (!scm_is_null (SCM_CDR (elements)))
1258 scm_i_input_error (NULL, port,
1259 "too many elements in array literal, want 1",
1260 SCM_EOL);
1261 elements = SCM_CAR (elements);
1262 }
1263
1264 /* Construct array, annotate with source location, and return. */
1265 array = scm_list_to_typed_array (tag, shape, elements);
1266 return maybe_annotate_source (array, port, opts, line, column);
1267 }
1268
1269 static SCM
1270 scm_read_srfi4_vector (int chr, SCM port, scm_t_read_opts *opts,
1271 long line, int column)
1272 {
1273 return scm_read_array (chr, port, opts, line, column);
1274 }
1275
1276 static SCM
1277 scm_read_bytevector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1278 long line, int column)
1279 {
1280 chr = scm_getc (port);
1281 if (chr != 'u')
1282 goto syntax;
1283
1284 chr = scm_getc (port);
1285 if (chr != '8')
1286 goto syntax;
1287
1288 chr = scm_getc (port);
1289 if (chr != '(')
1290 goto syntax;
1291
1292 return maybe_annotate_source
1293 (scm_u8_list_to_bytevector (scm_read_sexp (chr, port, opts)),
1294 port, opts, line, column);
1295
1296 syntax:
1297 scm_i_input_error ("read_bytevector", port,
1298 "invalid bytevector prefix",
1299 SCM_MAKE_CHAR (chr));
1300 return SCM_UNSPECIFIED;
1301 }
1302
1303 static SCM
1304 scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1305 long line, int column)
1306 {
1307 /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
1308 terribly inefficient but who cares? */
1309 SCM s_bits = SCM_EOL;
1310
1311 for (chr = scm_getc (port);
1312 (chr != EOF) && ((chr == '0') || (chr == '1'));
1313 chr = scm_getc (port))
1314 {
1315 s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
1316 }
1317
1318 if (chr != EOF)
1319 scm_ungetc (chr, port);
1320
1321 return maybe_annotate_source
1322 (scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)),
1323 port, opts, line, column);
1324 }
1325
1326 static SCM
1327 scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
1328 {
1329 int bang_seen = 0;
1330
1331 for (;;)
1332 {
1333 int c = scm_getc (port);
1334
1335 if (c == EOF)
1336 scm_i_input_error ("skip_block_comment", port,
1337 "unterminated `#! ... !#' comment", SCM_EOL);
1338
1339 if (c == '!')
1340 bang_seen = 1;
1341 else if (c == '#' && bang_seen)
1342 break;
1343 else
1344 bang_seen = 0;
1345 }
1346
1347 return SCM_UNSPECIFIED;
1348 }
1349
1350 static void set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts,
1351 int value);
1352 static void set_port_square_brackets_p (SCM port, scm_t_read_opts *opts,
1353 int value);
1354 static void set_port_curly_infix_p (SCM port, scm_t_read_opts *opts,
1355 int value);
1356
1357 static SCM
1358 scm_read_shebang (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
1359 {
1360 char name[READER_DIRECTIVE_NAME_MAX_SIZE + 1];
1361 int c;
1362 int i = 0;
1363
1364 while (i <= READER_DIRECTIVE_NAME_MAX_SIZE)
1365 {
1366 c = scm_getc (port);
1367 if (c == EOF)
1368 scm_i_input_error ("skip_block_comment", port,
1369 "unterminated `#! ... !#' comment", SCM_EOL);
1370 else if (('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || c == '-')
1371 name[i++] = c;
1372 else if (CHAR_IS_DELIMITER (c))
1373 {
1374 scm_ungetc (c, port);
1375 name[i] = '\0';
1376 if (0 == strcmp ("r6rs", name))
1377 ; /* Silently ignore */
1378 else if (0 == strcmp ("fold-case", name))
1379 set_port_case_insensitive_p (port, opts, 1);
1380 else if (0 == strcmp ("no-fold-case", name))
1381 set_port_case_insensitive_p (port, opts, 0);
1382 else if (0 == strcmp ("curly-infix", name))
1383 set_port_curly_infix_p (port, opts, 1);
1384 else if (0 == strcmp ("curly-infix-and-bracket-lists", name))
1385 {
1386 set_port_curly_infix_p (port, opts, 1);
1387 set_port_square_brackets_p (port, opts, 0);
1388 }
1389 else
1390 break;
1391
1392 return SCM_UNSPECIFIED;
1393 }
1394 }
1395 while (i > 0)
1396 scm_ungetc (name[--i], port);
1397 return scm_read_scsh_block_comment (chr, port);
1398 }
1399
1400 static SCM
1401 scm_read_r6rs_block_comment (scm_t_wchar chr, SCM port)
1402 {
1403 /* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
1404 nested. So care must be taken. */
1405 int nesting_level = 1;
1406
1407 int a = scm_getc (port);
1408
1409 if (a == EOF)
1410 scm_i_input_error ("scm_read_r6rs_block_comment", port,
1411 "unterminated `#| ... |#' comment", SCM_EOL);
1412
1413 while (nesting_level > 0)
1414 {
1415 int b = scm_getc (port);
1416
1417 if (b == EOF)
1418 scm_i_input_error ("scm_read_r6rs_block_comment", port,
1419 "unterminated `#| ... |#' comment", SCM_EOL);
1420
1421 if (a == '|' && b == '#')
1422 {
1423 nesting_level--;
1424 b = EOF;
1425 }
1426 else if (a == '#' && b == '|')
1427 {
1428 nesting_level++;
1429 b = EOF;
1430 }
1431
1432 a = b;
1433 }
1434
1435 return SCM_UNSPECIFIED;
1436 }
1437
1438 static SCM
1439 scm_read_commented_expression (scm_t_wchar chr, SCM port,
1440 scm_t_read_opts *opts)
1441 {
1442 scm_t_wchar c;
1443
1444 c = flush_ws (port, opts, (char *) NULL);
1445 if (EOF == c)
1446 scm_i_input_error ("read_commented_expression", port,
1447 "no expression after #; comment", SCM_EOL);
1448 scm_ungetc (c, port);
1449 scm_read_expression (port, opts);
1450 return SCM_UNSPECIFIED;
1451 }
1452
1453 static SCM
1454 scm_read_extended_symbol (scm_t_wchar chr, SCM port)
1455 {
1456 /* Guile's extended symbol read syntax looks like this:
1457
1458 #{This is all a symbol name}#
1459
1460 So here, CHR is expected to be `{'. */
1461 int saw_brace = 0;
1462 size_t len = 0;
1463 SCM buf = scm_i_make_string (1024, NULL, 0);
1464
1465 buf = scm_i_string_start_writing (buf);
1466
1467 while ((chr = scm_getc (port)) != EOF)
1468 {
1469 if (saw_brace)
1470 {
1471 if (chr == '#')
1472 {
1473 break;
1474 }
1475 else
1476 {
1477 saw_brace = 0;
1478 scm_i_string_set_x (buf, len++, '}');
1479 }
1480 }
1481
1482 if (chr == '}')
1483 saw_brace = 1;
1484 else if (chr == '\\')
1485 {
1486 /* It used to be that print.c would print extended-read-syntax
1487 symbols with backslashes before "non-standard" chars, but
1488 this routine wouldn't do anything with those escapes.
1489 Bummer. What we've done is to change print.c to output
1490 R6RS hex escapes for those characters, relying on the fact
1491 that the extended read syntax would never put a `\' before
1492 an `x'. For now, we just ignore other instances of
1493 backslash in the string. */
1494 switch ((chr = scm_getc (port)))
1495 {
1496 case EOF:
1497 goto done;
1498 case 'x':
1499 {
1500 scm_t_wchar c;
1501
1502 SCM_READ_HEX_ESCAPE (10, ';');
1503 scm_i_string_set_x (buf, len++, c);
1504 break;
1505
1506 str_eof:
1507 chr = EOF;
1508 goto done;
1509
1510 bad_escaped:
1511 scm_i_string_stop_writing ();
1512 scm_i_input_error ("scm_read_extended_symbol", port,
1513 "illegal character in escape sequence: ~S",
1514 scm_list_1 (SCM_MAKE_CHAR (c)));
1515 break;
1516 }
1517 default:
1518 scm_i_string_set_x (buf, len++, chr);
1519 break;
1520 }
1521 }
1522 else
1523 scm_i_string_set_x (buf, len++, chr);
1524
1525 if (len >= scm_i_string_length (buf) - 2)
1526 {
1527 SCM addy;
1528
1529 scm_i_string_stop_writing ();
1530 addy = scm_i_make_string (1024, NULL, 0);
1531 buf = scm_string_append (scm_list_2 (buf, addy));
1532 len = 0;
1533 buf = scm_i_string_start_writing (buf);
1534 }
1535 }
1536
1537 done:
1538 scm_i_string_stop_writing ();
1539 if (chr == EOF)
1540 scm_i_input_error ("scm_read_extended_symbol", port,
1541 "end of file while reading symbol", SCM_EOL);
1542
1543 return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
1544 }
1545
1546
1547 \f
1548 /* Top-level token readers, i.e., dispatchers. */
1549
1550 static SCM
1551 scm_read_sharp_extension (int chr, SCM port, scm_t_read_opts *opts)
1552 {
1553 SCM proc;
1554
1555 proc = scm_get_hash_procedure (chr);
1556 if (scm_is_true (scm_procedure_p (proc)))
1557 {
1558 long line = SCM_LINUM (port);
1559 int column = SCM_COL (port) - 2;
1560 SCM got;
1561
1562 got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
1563
1564 if (opts->record_positions_p && SCM_NIMP (got)
1565 && !scm_i_has_source_properties (got))
1566 scm_i_set_source_properties_x (got, line, column, SCM_FILENAME (port));
1567
1568 return got;
1569 }
1570
1571 return SCM_UNSPECIFIED;
1572 }
1573
1574 /* The reader for the sharp `#' character. It basically dispatches reads
1575 among the above token readers. */
1576 static SCM
1577 scm_read_sharp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
1578 long line, int column)
1579 #define FUNC_NAME "scm_lreadr"
1580 {
1581 SCM result;
1582
1583 chr = scm_getc (port);
1584
1585 result = scm_read_sharp_extension (chr, port, opts);
1586 if (!scm_is_eq (result, SCM_UNSPECIFIED))
1587 return result;
1588
1589 switch (chr)
1590 {
1591 case '\\':
1592 return (scm_read_character (chr, port, opts));
1593 case '(':
1594 return (scm_read_vector (chr, port, opts, line, column));
1595 case 's':
1596 case 'u':
1597 case 'f':
1598 case 'c':
1599 /* This one may return either a boolean or an SRFI-4 vector. */
1600 return (scm_read_srfi4_vector (chr, port, opts, line, column));
1601 case 'v':
1602 return (scm_read_bytevector (chr, port, opts, line, column));
1603 case '*':
1604 return (scm_read_guile_bit_vector (chr, port, opts, line, column));
1605 case 't':
1606 case 'T':
1607 case 'F':
1608 return (scm_read_boolean (chr, port));
1609 case ':':
1610 return (scm_read_keyword (chr, port, opts));
1611 case '0': case '1': case '2': case '3': case '4':
1612 case '5': case '6': case '7': case '8': case '9':
1613 case '@':
1614 #if SCM_ENABLE_DEPRECATED
1615 /* See below for 'i' and 'e'. */
1616 case 'a':
1617 case 'y':
1618 case 'h':
1619 case 'l':
1620 #endif
1621 return (scm_read_array (chr, port, opts, line, column));
1622
1623 case 'i':
1624 case 'e':
1625 #if SCM_ENABLE_DEPRECATED
1626 {
1627 /* When next char is '(', it really is an old-style
1628 uniform array. */
1629 scm_t_wchar next_c = scm_getc (port);
1630 if (next_c != EOF)
1631 scm_ungetc (next_c, port);
1632 if (next_c == '(')
1633 return scm_read_array (chr, port, opts, line, column);
1634 /* Fall through. */
1635 }
1636 #endif
1637 case 'b':
1638 case 'B':
1639 case 'o':
1640 case 'O':
1641 case 'd':
1642 case 'D':
1643 case 'x':
1644 case 'X':
1645 case 'I':
1646 case 'E':
1647 return (scm_read_number_and_radix (chr, port, opts));
1648 case '{':
1649 return (scm_read_extended_symbol (chr, port));
1650 case '!':
1651 return (scm_read_shebang (chr, port, opts));
1652 case ';':
1653 return (scm_read_commented_expression (chr, port, opts));
1654 case '`':
1655 case '\'':
1656 case ',':
1657 return (scm_read_syntax (chr, port, opts));
1658 case 'n':
1659 return (scm_read_nil (chr, port, opts));
1660 default:
1661 result = scm_read_sharp_extension (chr, port, opts);
1662 if (scm_is_eq (result, SCM_UNSPECIFIED))
1663 {
1664 /* To remain compatible with 1.8 and earlier, the following
1665 characters have lower precedence than `read-hash-extend'
1666 characters. */
1667 switch (chr)
1668 {
1669 case '|':
1670 return scm_read_r6rs_block_comment (chr, port);
1671 default:
1672 scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
1673 scm_list_1 (SCM_MAKE_CHAR (chr)));
1674 }
1675 }
1676 else
1677 return result;
1678 }
1679
1680 return SCM_UNSPECIFIED;
1681 }
1682 #undef FUNC_NAME
1683
1684 static SCM
1685 read_inner_expression (SCM port, scm_t_read_opts *opts)
1686 #define FUNC_NAME "read_inner_expression"
1687 {
1688 while (1)
1689 {
1690 scm_t_wchar chr;
1691
1692 chr = scm_getc (port);
1693
1694 switch (chr)
1695 {
1696 case SCM_WHITE_SPACES:
1697 case SCM_LINE_INCREMENTORS:
1698 break;
1699 case ';':
1700 (void) scm_read_semicolon_comment (chr, port);
1701 break;
1702 case '{':
1703 if (opts->curly_infix_p)
1704 {
1705 if (opts->neoteric_p)
1706 return scm_read_sexp (chr, port, opts);
1707 else
1708 {
1709 SCM expr;
1710
1711 /* Enable neoteric expressions within curly braces */
1712 opts->neoteric_p = 1;
1713 expr = scm_read_sexp (chr, port, opts);
1714 opts->neoteric_p = 0;
1715 return expr;
1716 }
1717 }
1718 else
1719 return scm_read_mixed_case_symbol (chr, port, opts);
1720 case '[':
1721 if (opts->square_brackets_p)
1722 return scm_read_sexp (chr, port, opts);
1723 else if (opts->curly_infix_p)
1724 {
1725 /* The syntax of neoteric expressions requires that '[' be
1726 a delimiter when curly-infix is enabled, so it cannot
1727 be part of an unescaped symbol. We might as well do
1728 something useful with it, so we adopt Kawa's convention:
1729 [...] => ($bracket-list$ ...) */
1730 long line = SCM_LINUM (port);
1731 int column = SCM_COL (port) - 1;
1732 return maybe_annotate_source
1733 (scm_cons (sym_bracket_list, scm_read_sexp (chr, port, opts)),
1734 port, opts, line, column);
1735 }
1736 else
1737 return scm_read_mixed_case_symbol (chr, port, opts);
1738 case '(':
1739 return (scm_read_sexp (chr, port, opts));
1740 case '"':
1741 return (scm_read_string (chr, port, opts));
1742 case '\'':
1743 case '`':
1744 case ',':
1745 return (scm_read_quote (chr, port, opts));
1746 case '#':
1747 {
1748 long line = SCM_LINUM (port);
1749 int column = SCM_COL (port) - 1;
1750 SCM result = scm_read_sharp (chr, port, opts, line, column);
1751 if (scm_is_eq (result, SCM_UNSPECIFIED))
1752 /* We read a comment or some such. */
1753 break;
1754 else
1755 return result;
1756 }
1757 case ')':
1758 scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
1759 break;
1760 case '}':
1761 if (opts->curly_infix_p)
1762 scm_i_input_error (FUNC_NAME, port, "unexpected \"}\"", SCM_EOL);
1763 else
1764 return scm_read_mixed_case_symbol (chr, port, opts);
1765 case ']':
1766 if (opts->square_brackets_p)
1767 scm_i_input_error (FUNC_NAME, port, "unexpected \"]\"", SCM_EOL);
1768 /* otherwise fall through */
1769 case EOF:
1770 return SCM_EOF_VAL;
1771 case ':':
1772 if (opts->keyword_style == KEYWORD_STYLE_PREFIX)
1773 return scm_symbol_to_keyword (scm_read_expression (port, opts));
1774 /* Fall through. */
1775
1776 default:
1777 {
1778 if (((chr >= '0') && (chr <= '9'))
1779 || (strchr ("+-.", chr)))
1780 return (scm_read_number (chr, port, opts));
1781 else
1782 return (scm_read_mixed_case_symbol (chr, port, opts));
1783 }
1784 }
1785 }
1786 }
1787 #undef FUNC_NAME
1788
1789 static SCM
1790 scm_read_expression (SCM port, scm_t_read_opts *opts)
1791 #define FUNC_NAME "scm_read_expression"
1792 {
1793 if (!opts->neoteric_p)
1794 return read_inner_expression (port, opts);
1795 else
1796 {
1797 long line = 0;
1798 int column = 0;
1799 SCM expr;
1800
1801 if (opts->record_positions_p)
1802 {
1803 /* We need to get the position of the first non-whitespace
1804 character in order to correctly annotate neoteric
1805 expressions. For example, for the expression 'f(x)', the
1806 first call to 'read_inner_expression' reads the 'f' (which
1807 cannot be annotated), and then we later read the '(x)' and
1808 use it to construct the new list (f x). */
1809 int c = flush_ws (port, opts, (char *) NULL);
1810 if (c == EOF)
1811 return SCM_EOF_VAL;
1812 scm_ungetc (c, port);
1813 line = SCM_LINUM (port);
1814 column = SCM_COL (port);
1815 }
1816
1817 expr = read_inner_expression (port, opts);
1818
1819 /* 'expr' is the first component of the neoteric expression. Now
1820 we loop, and as long as the next character is '(', '[', or '{',
1821 (without any intervening whitespace), we use it to construct a
1822 new expression. For example, f{n - 1}(x) => ((f (- n 1)) x). */
1823 for (;;)
1824 {
1825 int chr = scm_getc (port);
1826
1827 if (chr == '(')
1828 /* e(...) => (e ...) */
1829 expr = scm_cons (expr, scm_read_sexp (chr, port, opts));
1830 else if (chr == '[')
1831 /* e[...] => ($bracket-apply$ e ...) */
1832 expr = scm_cons (sym_bracket_apply,
1833 scm_cons (expr,
1834 scm_read_sexp (chr, port, opts)));
1835 else if (chr == '{')
1836 {
1837 SCM arg = scm_read_sexp (chr, port, opts);
1838
1839 if (scm_is_null (arg))
1840 expr = scm_list_1 (expr); /* e{} => (e) */
1841 else
1842 expr = scm_list_2 (expr, arg); /* e{...} => (e {...}) */
1843 }
1844 else
1845 {
1846 if (chr != EOF)
1847 scm_ungetc (chr, port);
1848 break;
1849 }
1850 maybe_annotate_source (expr, port, opts, line, column);
1851 }
1852 return expr;
1853 }
1854 }
1855 #undef FUNC_NAME
1856
1857 \f
1858 /* Actual reader. */
1859
1860 static void init_read_options (SCM port, scm_t_read_opts *opts);
1861
1862 SCM_DEFINE (scm_read, "read", 0, 1, 0,
1863 (SCM port),
1864 "Read an s-expression from the input port @var{port}, or from\n"
1865 "the current input port if @var{port} is not specified.\n"
1866 "Any whitespace before the next token is discarded.")
1867 #define FUNC_NAME s_scm_read
1868 {
1869 scm_t_read_opts opts;
1870 int c;
1871
1872 if (SCM_UNBNDP (port))
1873 port = scm_current_input_port ();
1874 SCM_VALIDATE_OPINPORT (1, port);
1875
1876 init_read_options (port, &opts);
1877
1878 c = flush_ws (port, &opts, (char *) NULL);
1879 if (EOF == c)
1880 return SCM_EOF_VAL;
1881 scm_ungetc (c, port);
1882
1883 return (scm_read_expression (port, &opts));
1884 }
1885 #undef FUNC_NAME
1886
1887
1888 \f
1889
1890 /* Manipulate the read-hash-procedures alist. This could be written in
1891 Scheme, but maybe it will also be used by C code during initialisation. */
1892 SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
1893 (SCM chr, SCM proc),
1894 "Install the procedure @var{proc} for reading expressions\n"
1895 "starting with the character sequence @code{#} and @var{chr}.\n"
1896 "@var{proc} will be called with two arguments: the character\n"
1897 "@var{chr} and the port to read further data from. The object\n"
1898 "returned will be the return value of @code{read}. \n"
1899 "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
1900 )
1901 #define FUNC_NAME s_scm_read_hash_extend
1902 {
1903 SCM this;
1904 SCM prev;
1905
1906 SCM_VALIDATE_CHAR (1, chr);
1907 SCM_ASSERT (scm_is_false (proc)
1908 || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
1909 proc, SCM_ARG2, FUNC_NAME);
1910
1911 /* Check if chr is already in the alist. */
1912 this = scm_i_read_hash_procedures_ref ();
1913 prev = SCM_BOOL_F;
1914 while (1)
1915 {
1916 if (scm_is_null (this))
1917 {
1918 /* not found, so add it to the beginning. */
1919 if (scm_is_true (proc))
1920 {
1921 SCM new = scm_cons (scm_cons (chr, proc),
1922 scm_i_read_hash_procedures_ref ());
1923 scm_i_read_hash_procedures_set_x (new);
1924 }
1925 break;
1926 }
1927 if (scm_is_eq (chr, SCM_CAAR (this)))
1928 {
1929 /* already in the alist. */
1930 if (scm_is_false (proc))
1931 {
1932 /* remove it. */
1933 if (scm_is_false (prev))
1934 {
1935 SCM rest = SCM_CDR (scm_i_read_hash_procedures_ref ());
1936 scm_i_read_hash_procedures_set_x (rest);
1937 }
1938 else
1939 scm_set_cdr_x (prev, SCM_CDR (this));
1940 }
1941 else
1942 {
1943 /* replace it. */
1944 scm_set_cdr_x (SCM_CAR (this), proc);
1945 }
1946 break;
1947 }
1948 prev = this;
1949 this = SCM_CDR (this);
1950 }
1951
1952 return SCM_UNSPECIFIED;
1953 }
1954 #undef FUNC_NAME
1955
1956 /* Recover the read-hash procedure corresponding to char c. */
1957 static SCM
1958 scm_get_hash_procedure (int c)
1959 {
1960 SCM rest = scm_i_read_hash_procedures_ref ();
1961
1962 while (1)
1963 {
1964 if (scm_is_null (rest))
1965 return SCM_BOOL_F;
1966
1967 if (SCM_CHAR (SCM_CAAR (rest)) == c)
1968 return SCM_CDAR (rest);
1969
1970 rest = SCM_CDR (rest);
1971 }
1972 }
1973
1974 #define SCM_ENCODING_SEARCH_SIZE (500)
1975
1976 /* Search the first few hundred characters of a file for an Emacs-like coding
1977 declaration. Returns either NULL or a string whose storage has been
1978 allocated with `scm_gc_malloc ()'. */
1979 char *
1980 scm_i_scan_for_encoding (SCM port)
1981 {
1982 scm_t_port *pt;
1983 char header[SCM_ENCODING_SEARCH_SIZE+1];
1984 size_t bytes_read, encoding_length, i;
1985 char *encoding = NULL;
1986 int utf8_bom = 0;
1987 char *pos, *encoding_start;
1988 int in_comment;
1989
1990 pt = SCM_PTAB_ENTRY (port);
1991
1992 if (pt->rw_active == SCM_PORT_WRITE)
1993 scm_flush (port);
1994
1995 if (pt->rw_random)
1996 pt->rw_active = SCM_PORT_READ;
1997
1998 if (pt->read_pos == pt->read_end)
1999 {
2000 /* We can use the read buffer, and thus avoid a seek. */
2001 if (scm_fill_input (port) == EOF)
2002 return NULL;
2003
2004 bytes_read = pt->read_end - pt->read_pos;
2005 if (bytes_read > SCM_ENCODING_SEARCH_SIZE)
2006 bytes_read = SCM_ENCODING_SEARCH_SIZE;
2007
2008 if (bytes_read <= 1)
2009 /* An unbuffered port -- don't scan. */
2010 return NULL;
2011
2012 memcpy (header, pt->read_pos, bytes_read);
2013 header[bytes_read] = '\0';
2014 }
2015 else
2016 {
2017 /* Try to read some bytes and then seek back. Not all ports
2018 support seeking back; and indeed some file ports (like
2019 /dev/urandom) will succeed on an lseek (fd, 0, SEEK_CUR)---the
2020 check performed by SCM_FPORT_FDES---but fail to seek
2021 backwards. Hence this block comes second. We prefer to use
2022 the read buffer in-place. */
2023 if (SCM_FPORTP (port) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port)))
2024 return NULL;
2025
2026 bytes_read = scm_c_read (port, header, SCM_ENCODING_SEARCH_SIZE);
2027 header[bytes_read] = '\0';
2028 scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
2029 }
2030
2031 if (bytes_read > 3
2032 && header[0] == '\xef' && header[1] == '\xbb' && header[2] == '\xbf')
2033 utf8_bom = 1;
2034
2035 /* search past "coding[:=]" */
2036 pos = header;
2037 while (1)
2038 {
2039 if ((pos = strstr(pos, "coding")) == NULL)
2040 return NULL;
2041
2042 pos += strlen("coding");
2043 if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
2044 (*pos == ':' || *pos == '='))
2045 {
2046 pos ++;
2047 break;
2048 }
2049 }
2050
2051 /* skip spaces */
2052 while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
2053 (*pos == ' ' || *pos == '\t'))
2054 pos ++;
2055
2056 /* grab the next token */
2057 encoding_start = pos;
2058 i = 0;
2059 while (encoding_start + i - header <= SCM_ENCODING_SEARCH_SIZE
2060 && encoding_start + i - header < bytes_read
2061 && (isalnum ((int) encoding_start[i])
2062 || strchr ("_-.:/,+=()", encoding_start[i]) != NULL))
2063 i++;
2064
2065 encoding_length = i;
2066 if (encoding_length == 0)
2067 return NULL;
2068
2069 encoding = scm_gc_strndup (encoding_start, encoding_length, "encoding");
2070 for (i = 0; i < encoding_length; i++)
2071 encoding[i] = toupper ((int) encoding[i]);
2072
2073 /* push backwards to make sure we were in a comment */
2074 in_comment = 0;
2075 pos = encoding_start;
2076 while (pos >= header)
2077 {
2078 if (*pos == ';')
2079 {
2080 in_comment = 1;
2081 break;
2082 }
2083 else if (*pos == '\n' || pos == header)
2084 {
2085 /* This wasn't in a semicolon comment. Check for a
2086 hash-bang comment. */
2087 char *beg = strstr (header, "#!");
2088 char *end = strstr (header, "!#");
2089 if (beg < encoding_start && encoding_start + encoding_length <= end)
2090 in_comment = 1;
2091 break;
2092 }
2093 else
2094 {
2095 pos --;
2096 continue;
2097 }
2098 }
2099 if (!in_comment)
2100 /* This wasn't in a comment */
2101 return NULL;
2102
2103 if (utf8_bom && strcmp(encoding, "UTF-8"))
2104 scm_misc_error (NULL,
2105 "the port input declares the encoding ~s but is encoded as UTF-8",
2106 scm_list_1 (scm_from_locale_string (encoding)));
2107
2108 return encoding;
2109 }
2110
2111 SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
2112 (SCM port),
2113 "Scans the port for an Emacs-like character coding declaration\n"
2114 "near the top of the contents of a port with random-accessible contents.\n"
2115 "The coding declaration is of the form\n"
2116 "@code{coding: XXXXX} and must appear in a scheme comment.\n"
2117 "\n"
2118 "Returns a string containing the character encoding of the file\n"
2119 "if a declaration was found, or @code{#f} otherwise.\n")
2120 #define FUNC_NAME s_scm_file_encoding
2121 {
2122 char *enc;
2123 SCM s_enc;
2124
2125 SCM_VALIDATE_OPINPORT (SCM_ARG1, port);
2126
2127 enc = scm_i_scan_for_encoding (port);
2128 if (enc == NULL)
2129 return SCM_BOOL_F;
2130 else
2131 {
2132 s_enc = scm_from_locale_string (enc);
2133 return s_enc;
2134 }
2135
2136 return SCM_BOOL_F;
2137 }
2138 #undef FUNC_NAME
2139
2140 \f
2141 /* Per-port read options.
2142
2143 We store per-port read options in the 'port-read-options' key of the
2144 port's alist, which is stored in 'scm_i_port_weak_hash'. The value
2145 stored in the alist is a single integer that contains a two-bit field
2146 for each read option.
2147
2148 If a bit field contains READ_OPTION_INHERIT (3), that indicates that
2149 the applicable value should be inherited from the corresponding
2150 global read option. Otherwise, the bit field contains the value of
2151 the read option. For boolean read options that have been set
2152 per-port, the possible values are 0 or 1. If the 'keyword_style'
2153 read option has been set per-port, its possible values are those in
2154 'enum t_keyword_style'. */
2155
2156 /* Key to read options in per-port alists. */
2157 SCM_SYMBOL (sym_port_read_options, "port-read-options");
2158
2159 /* Offsets of bit fields for each per-port override */
2160 #define READ_OPTION_COPY_SOURCE_P 0
2161 #define READ_OPTION_RECORD_POSITIONS_P 2
2162 #define READ_OPTION_CASE_INSENSITIVE_P 4
2163 #define READ_OPTION_KEYWORD_STYLE 6
2164 #define READ_OPTION_R6RS_ESCAPES_P 8
2165 #define READ_OPTION_SQUARE_BRACKETS_P 10
2166 #define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12
2167 #define READ_OPTION_CURLY_INFIX_P 14
2168
2169 /* The total width in bits of the per-port overrides */
2170 #define READ_OPTIONS_NUM_BITS 16
2171
2172 #define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1)
2173 #define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL
2174
2175 #define READ_OPTION_MASK 3
2176 #define READ_OPTION_INHERIT 3
2177
2178 static void
2179 set_port_read_option (SCM port, int option, int new_value)
2180 {
2181 SCM alist, scm_read_options;
2182 unsigned int read_options;
2183
2184 new_value &= READ_OPTION_MASK;
2185 scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
2186 alist = scm_hashq_ref (scm_i_port_weak_hash, port, SCM_BOOL_F);
2187 scm_read_options = scm_assq_ref (alist, sym_port_read_options);
2188 if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
2189 read_options = scm_to_uint (scm_read_options);
2190 else
2191 read_options = READ_OPTIONS_INHERIT_ALL;
2192 read_options &= ~(READ_OPTION_MASK << option);
2193 read_options |= new_value << option;
2194 scm_read_options = scm_from_uint (read_options);
2195 alist = scm_assq_set_x (alist, sym_port_read_options, scm_read_options);
2196 scm_hashq_set_x (scm_i_port_weak_hash, port, alist);
2197 scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
2198 }
2199
2200 /* Set OPTS and PORT's case-insensitivity according to VALUE. */
2201 static void
2202 set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts, int value)
2203 {
2204 value = !!value;
2205 opts->case_insensitive_p = value;
2206 set_port_read_option (port, READ_OPTION_CASE_INSENSITIVE_P, value);
2207 }
2208
2209 /* Set OPTS and PORT's square_brackets_p option according to VALUE. */
2210 static void
2211 set_port_square_brackets_p (SCM port, scm_t_read_opts *opts, int value)
2212 {
2213 value = !!value;
2214 opts->square_brackets_p = value;
2215 set_port_read_option (port, READ_OPTION_SQUARE_BRACKETS_P, value);
2216 }
2217
2218 /* Set OPTS and PORT's curly_infix_p option according to VALUE. */
2219 static void
2220 set_port_curly_infix_p (SCM port, scm_t_read_opts *opts, int value)
2221 {
2222 value = !!value;
2223 opts->curly_infix_p = value;
2224 set_port_read_option (port, READ_OPTION_CURLY_INFIX_P, value);
2225 }
2226
2227 /* Initialize OPTS based on PORT's read options and the global read
2228 options. */
2229 static void
2230 init_read_options (SCM port, scm_t_read_opts *opts)
2231 {
2232 SCM alist, val, scm_read_options;
2233 unsigned int read_options, x;
2234
2235 scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
2236 alist = scm_hashq_ref (scm_i_port_weak_hash, port, SCM_BOOL_F);
2237 scm_read_options = scm_assq_ref (alist, sym_port_read_options);
2238 scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
2239
2240 if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
2241 read_options = scm_to_uint (scm_read_options);
2242 else
2243 read_options = READ_OPTIONS_INHERIT_ALL;
2244
2245 x = READ_OPTION_MASK & (read_options >> READ_OPTION_KEYWORD_STYLE);
2246 if (x == READ_OPTION_INHERIT)
2247 {
2248 val = SCM_PACK (SCM_KEYWORD_STYLE);
2249 if (scm_is_eq (val, scm_keyword_prefix))
2250 x = KEYWORD_STYLE_PREFIX;
2251 else if (scm_is_eq (val, scm_keyword_postfix))
2252 x = KEYWORD_STYLE_POSTFIX;
2253 else
2254 x = KEYWORD_STYLE_HASH_PREFIX;
2255 }
2256 opts->keyword_style = x;
2257
2258 #define RESOLVE_BOOLEAN_OPTION(NAME, name) \
2259 do \
2260 { \
2261 x = READ_OPTION_MASK & (read_options >> READ_OPTION_ ## NAME); \
2262 if (x == READ_OPTION_INHERIT) \
2263 x = !!SCM_ ## NAME; \
2264 opts->name = x; \
2265 } \
2266 while (0)
2267
2268 RESOLVE_BOOLEAN_OPTION (COPY_SOURCE_P, copy_source_p);
2269 RESOLVE_BOOLEAN_OPTION (RECORD_POSITIONS_P, record_positions_p);
2270 RESOLVE_BOOLEAN_OPTION (CASE_INSENSITIVE_P, case_insensitive_p);
2271 RESOLVE_BOOLEAN_OPTION (R6RS_ESCAPES_P, r6rs_escapes_p);
2272 RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P, square_brackets_p);
2273 RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p);
2274 RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p);
2275
2276 #undef RESOLVE_BOOLEAN_OPTION
2277
2278 opts->neoteric_p = 0;
2279 }
2280
2281 void
2282 scm_init_read ()
2283 {
2284 SCM read_hash_procs;
2285
2286 read_hash_procs = scm_make_fluid_with_default (SCM_EOL);
2287
2288 scm_i_read_hash_procedures =
2289 SCM_VARIABLE_LOC (scm_c_define ("%read-hash-procedures", read_hash_procs));
2290
2291 scm_init_opts (scm_read_options, scm_read_opts);
2292 #include "libguile/read.x"
2293 }
2294
2295 /*
2296 Local Variables:
2297 c-file-style: "gnu"
2298 End:
2299 */