Removed empty file genio.h and references to it.
[bpt/guile.git] / libguile / read.c
1 /* Copyright (C) 1995,1996,1997, 1999 Free Software Foundation, Inc.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
6 * any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
16 * Boston, MA 02111-1307 USA
17 *
18 * As a special exception, the Free Software Foundation gives permission
19 * for additional uses of the text contained in its release of GUILE.
20 *
21 * The exception is that, if you link the GUILE library with other files
22 * to produce an executable, this does not by itself cause the
23 * resulting executable to be covered by the GNU General Public License.
24 * Your use of that executable is in no way restricted on account of
25 * linking the GUILE library code into it.
26 *
27 * This exception does not however invalidate any other reasons why
28 * the executable file might be covered by the GNU General Public License.
29 *
30 * This exception applies only to the code released by the
31 * Free Software Foundation under the name GUILE. If you copy
32 * code from other Free Software Foundation releases into a copy of
33 * GUILE, as the General Public License permits, the exception does
34 * not apply to the code that you add in this way. To avoid misleading
35 * anyone as to the status of such modified files, you must delete
36 * this exception notice from them.
37 *
38 * If you write modifications of your own for GUILE, it is your choice
39 * whether to permit this exception to apply to your modifications.
40 * If you do not wish that, delete this exception notice. */
41
42 /* Software engineering face-lift by Greg J. Badros, 11-Dec-1999,
43 gjb@cs.washington.edu, http://www.cs.washington.edu/homes/gjb */
44
45 \f
46
47 #include <stdio.h>
48 #include "_scm.h"
49 #include "chars.h"
50 #include "eval.h"
51 #include "unif.h"
52 #include "keywords.h"
53 #include "alist.h"
54 #include "srcprop.h"
55 #include "hashtab.h"
56 #include "hash.h"
57
58 #include "validate.h"
59 #include "read.h"
60
61 \f
62
63 SCM_SYMBOL (scm_keyword_prefix, "prefix");
64
65 scm_option scm_read_opts[] = {
66 { SCM_OPTION_BOOLEAN, "copy", 0,
67 "Copy source code expressions." },
68 { SCM_OPTION_BOOLEAN, "positions", 0,
69 "Record positions of source code expressions." },
70 { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
71 "Convert symbols to lower case."},
72 { SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
73 "Style of keyword recognition: #f or 'prefix"}
74 };
75
76 SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
77 (SCM setting),
78 "")
79 #define FUNC_NAME s_scm_read_options
80 {
81 SCM ans = scm_options (setting,
82 scm_read_opts,
83 SCM_N_READ_OPTIONS,
84 FUNC_NAME);
85 if (SCM_COPY_SOURCE_P)
86 SCM_RECORD_POSITIONS_P = 1;
87 return ans;
88 }
89 #undef FUNC_NAME
90
91 /* An association list mapping extra hash characters to procedures. */
92 static SCM *scm_read_hash_procedures;
93
94 SCM_DEFINE (scm_read, "read", 0, 1, 0,
95 (SCM port),
96 "")
97 #define FUNC_NAME s_scm_read
98 {
99 int c;
100 SCM tok_buf, copy;
101
102 if (SCM_UNBNDP (port))
103 port = scm_cur_inp;
104 SCM_VALIDATE_OPINPORT (1,port);
105
106 c = scm_flush_ws (port, (char *) NULL);
107 if (EOF == c)
108 return SCM_EOF_VAL;
109 scm_ungetc (c, port);
110
111 tok_buf = scm_makstr (30L, 0);
112 return scm_lreadr (&tok_buf, port, &copy);
113 }
114 #undef FUNC_NAME
115
116
117
118 char *
119 scm_grow_tok_buf (SCM *tok_buf)
120 {
121 scm_vector_set_length_x (*tok_buf, SCM_MAKINUM (2 * SCM_LENGTH (*tok_buf)));
122 return SCM_CHARS (*tok_buf);
123 }
124
125
126
127 int
128 scm_flush_ws (SCM port, const char *eoferr)
129 {
130 register int c;
131 while (1)
132 switch (c = scm_getc (port))
133 {
134 case EOF:
135 goteof:
136 if (eoferr)
137 scm_wta (SCM_UNDEFINED, "end of file in ", eoferr);
138 return c;
139 case ';':
140 lp:
141 switch (c = scm_getc (port))
142 {
143 case EOF:
144 goto goteof;
145 default:
146 goto lp;
147 case SCM_LINE_INCREMENTORS:
148 break;
149 }
150 break;
151 case SCM_LINE_INCREMENTORS:
152 case SCM_SINGLE_SPACES:
153 case '\t':
154 break;
155 default:
156 return c;
157 }
158 }
159
160
161
162 int
163 scm_casei_streq (char *s1, char *s2)
164 {
165 while (*s1 && *s2)
166 if (scm_downcase((int)*s1) != scm_downcase((int)*s2))
167 return 0;
168 else
169 {
170 ++s1;
171 ++s2;
172 }
173 return !(*s1 || *s2);
174 }
175
176
177 /* recsexpr is used when recording expressions
178 * constructed by read:sharp.
179 */
180 #ifndef DEBUG_EXTENSIONS
181 #define recsexpr(obj, line, column, filename) (obj)
182 #else
183 static SCM
184 recsexpr (SCM obj,int line,int column,SCM filename)
185 {
186 if (SCM_IMP (obj) || SCM_NCONSP(obj))
187 return obj;
188 {
189 SCM tmp = obj, copy;
190 /* If this sexpr is visible in the read:sharp source, we want to
191 keep that information, so only record non-constant cons cells
192 which haven't previously been read by the reader. */
193 if (SCM_FALSEP (scm_whash_lookup (scm_source_whash, obj)))
194 {
195 if (SCM_COPY_SOURCE_P)
196 {
197 copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
198 SCM_UNDEFINED);
199 while ((tmp = SCM_CDR (tmp)) && SCM_CONSP (tmp))
200 {
201 SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
202 line,
203 column,
204 filename),
205 SCM_UNDEFINED));
206 copy = SCM_CDR (copy);
207 }
208 SCM_SETCDR (copy, tmp);
209 }
210 else
211 {
212 recsexpr (SCM_CAR (obj), line, column, filename);
213 while ((tmp = SCM_CDR (tmp)) && SCM_CONSP (tmp))
214 recsexpr (SCM_CAR (tmp), line, column, filename);
215 copy = SCM_UNDEFINED;
216 }
217 scm_whash_insert (scm_source_whash,
218 obj,
219 scm_make_srcprops (line,
220 column,
221 filename,
222 copy,
223 SCM_EOL));
224 }
225 return obj;
226 }
227 }
228 #endif
229
230 /* Consume an SCSH-style block comment. Assume that we've already
231 read the initial `#!', and eat characters until we get a
232 newline/exclamation-point/sharp-sign/newline sequence. */
233
234 static void
235 skip_scsh_block_comment (SCM port)
236 {
237 /* Is this portable? Dear God, spare me from the non-eight-bit
238 characters. But is it tasteful? */
239 long history = 0;
240
241 for (;;)
242 {
243 int c = scm_getc (port);
244
245 if (c == EOF)
246 scm_wta (SCM_UNDEFINED,
247 "unterminated `#! ... !#' comment", "read");
248 history = ((history << 8) | (c & 0xff)) & 0xffffffff;
249
250 /* Were the last four characters read "\n!#\n"? */
251 if (history == (('\n' << 24) | ('!' << 16) | ('#' << 8) | '\n'))
252 return;
253 }
254 }
255
256 static SCM scm_get_hash_procedure(int c);
257
258 static char s_list[]="list";
259
260 SCM
261 scm_lreadr (SCM *tok_buf,SCM port,SCM *copy)
262 {
263 int c;
264 scm_sizet j;
265 SCM p;
266
267 tryagain:
268 c = scm_flush_ws (port, s_scm_read);
269 tryagain_no_flush_ws:
270 switch (c)
271 {
272 case EOF:
273 return SCM_EOF_VAL;
274
275 case '(':
276 return SCM_RECORD_POSITIONS_P
277 ? scm_lreadrecparen (tok_buf, port, s_list, copy)
278 : scm_lreadparen (tok_buf, port, s_list, copy);
279 case ')':
280 scm_wta (SCM_UNDEFINED, "unexpected \")\"", "read");
281 goto tryagain;
282
283 case '\'':
284 p = scm_sym_quote;
285 goto recquote;
286 case '`':
287 p = scm_sym_quasiquote;
288 goto recquote;
289 case ',':
290 c = scm_getc (port);
291 if ('@' == c)
292 p = scm_sym_uq_splicing;
293 else
294 {
295 scm_ungetc (c, port);
296 p = scm_sym_unquote;
297 }
298 recquote:
299 p = scm_cons2 (p,
300 scm_lreadr (tok_buf, port, copy),
301 SCM_EOL);
302 if (SCM_RECORD_POSITIONS_P)
303 scm_whash_insert (scm_source_whash,
304 p,
305 scm_make_srcprops (SCM_LINUM (port),
306 SCM_COL (port) - 1,
307 SCM_FILENAME (port),
308 SCM_COPY_SOURCE_P
309 ? (*copy = scm_cons2 (SCM_CAR (p),
310 SCM_CAR (SCM_CDR (p)),
311 SCM_EOL))
312 : SCM_UNDEFINED,
313 SCM_EOL));
314 return p;
315 case '#':
316 c = scm_getc (port);
317 switch (c)
318 {
319 case '(':
320 p = scm_lreadparen (tok_buf, port, "vector", copy);
321 return SCM_NULLP (p) ? scm_nullvect : scm_vector (p);
322
323 case 't':
324 case 'T':
325 return SCM_BOOL_T;
326 case 'f':
327 case 'F':
328 return SCM_BOOL_F;
329
330 case 'b':
331 case 'B':
332 case 'o':
333 case 'O':
334 case 'd':
335 case 'D':
336 case 'x':
337 case 'X':
338 case 'i':
339 case 'I':
340 case 'e':
341 case 'E':
342 scm_ungetc (c, port);
343 c = '#';
344 goto num;
345
346 case '!':
347 /* start of a shell script. Parse as a block comment,
348 terminated by !#, just like SCSH. */
349 skip_scsh_block_comment (port);
350 /* EOF is not an error here */
351 c = scm_flush_ws (port, (char *)NULL);
352 goto tryagain_no_flush_ws;
353
354 #ifdef HAVE_ARRAYS
355 case '*':
356 j = scm_read_token (c, tok_buf, port, 0);
357 p = scm_istr2bve (SCM_CHARS (*tok_buf) + 1, (long) (j - 1));
358 if (SCM_NFALSEP (p))
359 return p;
360 else
361 goto unkshrp;
362 #endif
363
364 case '{':
365 j = scm_read_token (c, tok_buf, port, 1);
366 p = scm_intern (SCM_CHARS (*tok_buf), j);
367 return SCM_CAR (p);
368
369 case '\\':
370 c = scm_getc (port);
371 j = scm_read_token (c, tok_buf, port, 0);
372 if (j == 1)
373 return SCM_MAKE_CHAR (c);
374 if (c >= '0' && c < '8')
375 {
376 p = scm_istr2int (SCM_CHARS (*tok_buf), (long) j, 8);
377 if (SCM_NFALSEP (p))
378 return SCM_MAKE_CHAR (SCM_INUM (p));
379 }
380 for (c = 0; c < scm_n_charnames; c++)
381 if (scm_charnames[c]
382 && (scm_casei_streq (scm_charnames[c], SCM_CHARS (*tok_buf))))
383 return SCM_MAKE_CHAR (scm_charnums[c]);
384 scm_wta (SCM_UNDEFINED, "unknown # object: #\\", SCM_CHARS (*tok_buf));
385
386 /* #:SYMBOL is a syntax for keywords supported in all contexts. */
387 case ':':
388 j = scm_read_token ('-', tok_buf, port, 0);
389 p = scm_intern (SCM_CHARS (*tok_buf), j);
390 return scm_make_keyword_from_dash_symbol (SCM_CAR (p));
391
392 default:
393 callshrp:
394 {
395 SCM sharp = scm_get_hash_procedure (c);
396
397 if (SCM_NIMP (sharp))
398 {
399 int line = SCM_LINUM (port);
400 int column = SCM_COL (port) - 2;
401 SCM got;
402
403 got = scm_apply (sharp,
404 SCM_MAKE_CHAR (c),
405 scm_acons (port, SCM_EOL, SCM_EOL));
406 if (SCM_UNSPECIFIED == got)
407 goto unkshrp;
408 if (SCM_RECORD_POSITIONS_P)
409 return *copy = recsexpr (got, line, column,
410 SCM_FILENAME (port));
411 else
412 return got;
413 }
414 }
415 unkshrp:
416 scm_misc_error (s_scm_read, "Unknown # object: ~S",
417 scm_listify (SCM_MAKE_CHAR (c), SCM_UNDEFINED));
418 }
419
420 case '"':
421 j = 0;
422 while ('"' != (c = scm_getc (port)))
423 {
424 SCM_ASSERT (EOF != c, SCM_UNDEFINED, "end of file in ", "string");
425
426 while (j + 2 >= SCM_LENGTH (*tok_buf))
427 scm_grow_tok_buf (tok_buf);
428
429 if (c == '\\')
430 switch (c = scm_getc (port))
431 {
432 case '\n':
433 continue;
434 case '0':
435 c = '\0';
436 break;
437 case 'f':
438 c = '\f';
439 break;
440 case 'n':
441 c = '\n';
442 break;
443 case 'r':
444 c = '\r';
445 break;
446 case 't':
447 c = '\t';
448 break;
449 case 'a':
450 c = '\007';
451 break;
452 case 'v':
453 c = '\v';
454 break;
455 }
456 SCM_CHARS (*tok_buf)[j] = c;
457 ++j;
458 }
459 if (j == 0)
460 return scm_nullstr;
461 SCM_CHARS (*tok_buf)[j] = 0;
462 {
463 SCM str;
464 str = scm_makfromstr (SCM_CHARS (*tok_buf), j, 0);
465 return str;
466 }
467
468 case'0':case '1':case '2':case '3':case '4':
469 case '5':case '6':case '7':case '8':case '9':
470 case '.':
471 case '-':
472 case '+':
473 num:
474 j = scm_read_token (c, tok_buf, port, 0);
475 p = scm_istring2number (SCM_CHARS (*tok_buf), (long) j, 10L);
476 if (SCM_NFALSEP (p))
477 return p;
478 if (c == '#')
479 {
480 if ((j == 2) && (scm_getc (port) == '('))
481 {
482 scm_ungetc ('(', port);
483 c = SCM_CHARS (*tok_buf)[1];
484 goto callshrp;
485 }
486 scm_wta (SCM_UNDEFINED, "unknown # object", SCM_CHARS (*tok_buf));
487 }
488 goto tok;
489
490 case ':':
491 if (SCM_PACK (SCM_KEYWORD_STYLE) == scm_keyword_prefix)
492 {
493 j = scm_read_token ('-', tok_buf, port, 0);
494 p = scm_intern (SCM_CHARS (*tok_buf), j);
495 return scm_make_keyword_from_dash_symbol (SCM_CAR (p));
496 }
497 /* fallthrough */
498 default:
499 j = scm_read_token (c, tok_buf, port, 0);
500 /* fallthrough */
501
502 tok:
503 p = scm_intern (SCM_CHARS (*tok_buf), j);
504 return SCM_CAR (p);
505 }
506 }
507
508 #ifdef _UNICOS
509 _Pragma ("noopt"); /* # pragma _CRI noopt */
510 #endif
511
512 scm_sizet
513 scm_read_token (int ic, SCM *tok_buf, SCM port, int weird)
514 {
515 register scm_sizet j;
516 register int c;
517 register char *p;
518
519 c = (SCM_CASE_INSENSITIVE_P ? scm_downcase(ic) : ic);
520 p = SCM_CHARS (*tok_buf);
521
522 if (weird)
523 j = 0;
524 else
525 {
526 j = 0;
527 while (j + 2 >= SCM_LENGTH (*tok_buf))
528 p = scm_grow_tok_buf (tok_buf);
529 p[j] = c;
530 ++j;
531 }
532
533 while (1)
534 {
535 while (j + 2 >= SCM_LENGTH (*tok_buf))
536 p = scm_grow_tok_buf (tok_buf);
537 c = scm_getc (port);
538 switch (c)
539 {
540 case '(':
541 case ')':
542 case '"':
543 case ';':
544 case SCM_WHITE_SPACES:
545 case SCM_LINE_INCREMENTORS:
546 if (weird)
547 goto default_case;
548
549 scm_ungetc (c, port);
550 case EOF:
551 eof_case:
552 p[j] = 0;
553 return j;
554 case '\\':
555 if (!weird)
556 goto default_case;
557 else
558 {
559 c = scm_getc (port);
560 if (c == EOF)
561 goto eof_case;
562 else
563 goto default_case;
564 }
565 case '}':
566 if (!weird)
567 goto default_case;
568
569 c = scm_getc (port);
570 if (c == '#')
571 {
572 p[j] = 0;
573 return j;
574 }
575 else
576 {
577 scm_ungetc (c, port);
578 c = '}';
579 goto default_case;
580 }
581
582 default:
583 default_case:
584 {
585 c = (SCM_CASE_INSENSITIVE_P ? scm_downcase(c) : c);
586 p[j] = c;
587 ++j;
588 }
589
590 }
591 }
592 }
593
594 #ifdef _UNICOS
595 _Pragma ("opt"); /* # pragma _CRI opt */
596 #endif
597
598 SCM
599 scm_lreadparen (SCM *tok_buf, SCM port, char *name, SCM *copy)
600 {
601 SCM tmp;
602 SCM tl;
603 SCM ans;
604 int c;
605
606 c = scm_flush_ws (port, name);
607 if (')' == c)
608 return SCM_EOL;
609 scm_ungetc (c, port);
610 if (scm_sym_dot == (tmp = scm_lreadr (tok_buf, port, copy)))
611 {
612 ans = scm_lreadr (tok_buf, port, copy);
613 closeit:
614 if (')' != (c = scm_flush_ws (port, name)))
615 scm_wta (SCM_UNDEFINED, "missing close paren", "");
616 return ans;
617 }
618 ans = tl = scm_cons (tmp, SCM_EOL);
619 while (')' != (c = scm_flush_ws (port, name)))
620 {
621 scm_ungetc (c, port);
622 if (scm_sym_dot == (tmp = scm_lreadr (tok_buf, port, copy)))
623 {
624 SCM_SETCDR (tl, scm_lreadr (tok_buf, port, copy));
625 goto closeit;
626 }
627 SCM_SETCDR (tl, scm_cons (tmp, SCM_EOL));
628 tl = SCM_CDR (tl);
629 }
630 return ans;
631 }
632
633
634 SCM
635 scm_lreadrecparen (SCM *tok_buf, SCM port, char *name, SCM *copy)
636 {
637 register int c;
638 register SCM tmp;
639 register SCM tl, tl2 = SCM_EOL;
640 SCM ans, ans2 = SCM_EOL;
641 /* Need to capture line and column numbers here. */
642 int line = SCM_LINUM (port);
643 int column = SCM_COL (port) - 1;
644
645 c = scm_flush_ws (port, name);
646 if (')' == c)
647 return SCM_EOL;
648 scm_ungetc (c, port);
649 if (scm_sym_dot == (tmp = scm_lreadr (tok_buf, port, copy)))
650 {
651 ans = scm_lreadr (tok_buf, port, copy);
652 if (')' != (c = scm_flush_ws (port, name)))
653 scm_wta (SCM_UNDEFINED, "missing close paren", "");
654 return ans;
655 }
656 /* Build the head of the list structure. */
657 ans = tl = scm_cons (tmp, SCM_EOL);
658 if (SCM_COPY_SOURCE_P)
659 ans2 = tl2 = scm_cons (SCM_CONSP (tmp)
660 ? *copy
661 : tmp,
662 SCM_EOL);
663 while (')' != (c = scm_flush_ws (port, name)))
664 {
665 scm_ungetc (c, port);
666 if (scm_sym_dot == (tmp = scm_lreadr (tok_buf, port, copy)))
667 {
668 SCM_SETCDR (tl, tmp = scm_lreadr (tok_buf, port, copy));
669 if (SCM_COPY_SOURCE_P)
670 SCM_SETCDR (tl2, scm_cons (SCM_CONSP (tmp)
671 ? *copy
672 : tmp,
673 SCM_EOL));
674 if (')' != (c = scm_flush_ws (port, name)))
675 scm_wta (SCM_UNDEFINED, "missing close paren", "");
676 goto exit;
677 }
678 tl = SCM_SETCDR (tl, scm_cons (tmp, SCM_EOL));
679 if (SCM_COPY_SOURCE_P)
680 tl2 = SCM_SETCDR (tl2, scm_cons (SCM_CONSP (tmp)
681 ? *copy
682 : tmp,
683 SCM_EOL));
684 }
685 exit:
686 scm_whash_insert (scm_source_whash,
687 ans,
688 scm_make_srcprops (line,
689 column,
690 SCM_FILENAME (port),
691 SCM_COPY_SOURCE_P
692 ? *copy = ans2
693 : SCM_UNDEFINED,
694 SCM_EOL));
695 return ans;
696 }
697
698
699 \f
700
701 /* Manipulate the read-hash-procedures alist. This could be written in
702 Scheme, but maybe it will also be used by C code during initialisation. */
703 SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
704 (SCM chr, SCM proc),
705 "")
706 #define FUNC_NAME s_scm_read_hash_extend
707 {
708 SCM this;
709 SCM prev;
710
711 SCM_VALIDATE_CHAR (1,chr);
712 SCM_ASSERT (SCM_FALSEP (proc) || SCM_NIMP(proc), proc, SCM_ARG2,
713 FUNC_NAME);
714
715 /* Check if chr is already in the alist. */
716 this = *scm_read_hash_procedures;
717 prev = SCM_BOOL_F;
718 while (1)
719 {
720 if (SCM_NULLP (this))
721 {
722 /* not found, so add it to the beginning. */
723 if (SCM_NFALSEP (proc))
724 {
725 *scm_read_hash_procedures =
726 scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
727 }
728 break;
729 }
730 if (chr == SCM_CAAR (this))
731 {
732 /* already in the alist. */
733 if (SCM_FALSEP (proc))
734 {
735 /* remove it. */
736 if (prev == SCM_BOOL_F)
737 {
738 *scm_read_hash_procedures =
739 SCM_CDR (*scm_read_hash_procedures);
740 }
741 else
742 scm_set_cdr_x (prev, SCM_CDR (this));
743 }
744 else
745 {
746 /* replace it. */
747 scm_set_cdr_x (SCM_CAR (this), proc);
748 }
749 break;
750 }
751 prev = this;
752 this = SCM_CDR (this);
753 }
754
755 return SCM_UNSPECIFIED;
756 }
757 #undef FUNC_NAME
758
759 /* Recover the read-hash procedure corresponding to char c. */
760 static SCM
761 scm_get_hash_procedure (int c)
762 {
763 SCM rest = *scm_read_hash_procedures;
764
765 while (1)
766 {
767 if (SCM_NULLP (rest))
768 return SCM_BOOL_F;
769
770 if (SCM_CHAR (SCM_CAAR (rest)) == c)
771 return SCM_CDAR (rest);
772
773 rest = SCM_CDR (rest);
774 }
775 }
776
777 void
778 scm_init_read ()
779 {
780 scm_read_hash_procedures =
781 SCM_CDRLOC (scm_sysintern ("read-hash-procedures", SCM_EOL));
782
783 scm_init_opts (scm_read_options, scm_read_opts, SCM_N_READ_OPTIONS);
784 #include "read.x"
785 }