*.[ch]: make a distinction between SCM as a generic
[bpt/guile.git] / libguile / read.c
1 /* Copyright (C) 1995,1996,1997, 1999 Free Software Foundation, Inc.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
6 * any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
16 * Boston, MA 02111-1307 USA
17 *
18 * As a special exception, the Free Software Foundation gives permission
19 * for additional uses of the text contained in its release of GUILE.
20 *
21 * The exception is that, if you link the GUILE library with other files
22 * to produce an executable, this does not by itself cause the
23 * resulting executable to be covered by the GNU General Public License.
24 * Your use of that executable is in no way restricted on account of
25 * linking the GUILE library code into it.
26 *
27 * This exception does not however invalidate any other reasons why
28 * the executable file might be covered by the GNU General Public License.
29 *
30 * This exception applies only to the code released by the
31 * Free Software Foundation under the name GUILE. If you copy
32 * code from other Free Software Foundation releases into a copy of
33 * GUILE, as the General Public License permits, the exception does
34 * not apply to the code that you add in this way. To avoid misleading
35 * anyone as to the status of such modified files, you must delete
36 * this exception notice from them.
37 *
38 * If you write modifications of your own for GUILE, it is your choice
39 * whether to permit this exception to apply to your modifications.
40 * If you do not wish that, delete this exception notice. */
41
42 /* Software engineering face-lift by Greg J. Badros, 11-Dec-1999,
43 gjb@cs.washington.edu, http://www.cs.washington.edu/homes/gjb */
44
45 \f
46
47 #include <stdio.h>
48 #include "_scm.h"
49 #include "chars.h"
50 #include "genio.h"
51 #include "eval.h"
52 #include "unif.h"
53 #include "keywords.h"
54 #include "alist.h"
55 #include "srcprop.h"
56 #include "hashtab.h"
57 #include "hash.h"
58
59 #include "validate.h"
60 #include "read.h"
61
62 \f
63
64 SCM_SYMBOL (scm_keyword_prefix, "prefix");
65
66 scm_option scm_read_opts[] = {
67 { SCM_OPTION_BOOLEAN, "copy", 0,
68 "Copy source code expressions." },
69 { SCM_OPTION_BOOLEAN, "positions", 0,
70 "Record positions of source code expressions." },
71 { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
72 "Convert symbols to lower case."},
73 { SCM_OPTION_SCM, "keywords", SCM_ASWORD (SCM_BOOL_F),
74 "Style of keyword recognition: #f or 'prefix"}
75 };
76
77 SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
78 (SCM setting),
79 "")
80 #define FUNC_NAME s_scm_read_options
81 {
82 SCM ans = scm_options (setting,
83 scm_read_opts,
84 SCM_N_READ_OPTIONS,
85 FUNC_NAME);
86 if (SCM_COPY_SOURCE_P)
87 SCM_RECORD_POSITIONS_P = 1;
88 return ans;
89 }
90 #undef FUNC_NAME
91
92 /* An association list mapping extra hash characters to procedures. */
93 static SCM *scm_read_hash_procedures;
94
95 SCM_DEFINE (scm_read, "read", 0, 1, 0,
96 (SCM port),
97 "")
98 #define FUNC_NAME s_scm_read
99 {
100 int c;
101 SCM tok_buf, copy;
102
103 if (SCM_UNBNDP (port))
104 port = scm_cur_inp;
105 SCM_VALIDATE_OPINPORT (1,port);
106
107 c = scm_flush_ws (port, (char *) NULL);
108 if (EOF == c)
109 return SCM_EOF_VAL;
110 scm_ungetc (c, port);
111
112 tok_buf = scm_makstr (30L, 0);
113 return scm_lreadr (&tok_buf, port, &copy);
114 }
115 #undef FUNC_NAME
116
117
118
119 char *
120 scm_grow_tok_buf (SCM *tok_buf)
121 {
122 scm_vector_set_length_x (*tok_buf, SCM_MAKINUM (2 * SCM_LENGTH (*tok_buf)));
123 return SCM_CHARS (*tok_buf);
124 }
125
126
127
128 int
129 scm_flush_ws (SCM port, const char *eoferr)
130 {
131 register int c;
132 while (1)
133 switch (c = scm_getc (port))
134 {
135 case EOF:
136 goteof:
137 if (eoferr)
138 scm_wta (SCM_UNDEFINED, "end of file in ", eoferr);
139 return c;
140 case ';':
141 lp:
142 switch (c = scm_getc (port))
143 {
144 case EOF:
145 goto goteof;
146 default:
147 goto lp;
148 case SCM_LINE_INCREMENTORS:
149 break;
150 }
151 break;
152 case SCM_LINE_INCREMENTORS:
153 case SCM_SINGLE_SPACES:
154 case '\t':
155 break;
156 default:
157 return c;
158 }
159 }
160
161
162
163 int
164 scm_casei_streq (char *s1, char *s2)
165 {
166 while (*s1 && *s2)
167 if (scm_downcase((int)*s1) != scm_downcase((int)*s2))
168 return 0;
169 else
170 {
171 ++s1;
172 ++s2;
173 }
174 return !(*s1 || *s2);
175 }
176
177
178 /* recsexpr is used when recording expressions
179 * constructed by read:sharp.
180 */
181 #ifndef DEBUG_EXTENSIONS
182 #define recsexpr(obj, line, column, filename) (obj)
183 #else
184 static SCM
185 recsexpr (SCM obj,int line,int column,SCM filename)
186 {
187 if (SCM_IMP (obj) || SCM_NCONSP(obj))
188 return obj;
189 {
190 SCM tmp = obj, copy;
191 /* If this sexpr is visible in the read:sharp source, we want to
192 keep that information, so only record non-constant cons cells
193 which haven't previously been read by the reader. */
194 if (SCM_FALSEP (scm_whash_lookup (scm_source_whash, obj)))
195 {
196 if (SCM_COPY_SOURCE_P)
197 {
198 copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
199 SCM_UNDEFINED);
200 while ((tmp = SCM_CDR (tmp)) && SCM_CONSP (tmp))
201 {
202 SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
203 line,
204 column,
205 filename),
206 SCM_UNDEFINED));
207 copy = SCM_CDR (copy);
208 }
209 SCM_SETCDR (copy, tmp);
210 }
211 else
212 {
213 recsexpr (SCM_CAR (obj), line, column, filename);
214 while ((tmp = SCM_CDR (tmp)) && SCM_CONSP (tmp))
215 recsexpr (SCM_CAR (tmp), line, column, filename);
216 copy = SCM_UNDEFINED;
217 }
218 scm_whash_insert (scm_source_whash,
219 obj,
220 scm_make_srcprops (line,
221 column,
222 filename,
223 copy,
224 SCM_EOL));
225 }
226 return obj;
227 }
228 }
229 #endif
230
231 /* Consume an SCSH-style block comment. Assume that we've already
232 read the initial `#!', and eat characters until we get a
233 newline/exclamation-point/sharp-sign/newline sequence. */
234
235 static void
236 skip_scsh_block_comment (SCM port)
237 {
238 /* Is this portable? Dear God, spare me from the non-eight-bit
239 characters. But is it tasteful? */
240 long history = 0;
241
242 for (;;)
243 {
244 int c = scm_getc (port);
245
246 if (c == EOF)
247 scm_wta (SCM_UNDEFINED,
248 "unterminated `#! ... !#' comment", "read");
249 history = ((history << 8) | (c & 0xff)) & 0xffffffff;
250
251 /* Were the last four characters read "\n!#\n"? */
252 if (history == (('\n' << 24) | ('!' << 16) | ('#' << 8) | '\n'))
253 return;
254 }
255 }
256
257 static SCM scm_get_hash_procedure(int c);
258
259 static char s_list[]="list";
260
261 SCM
262 scm_lreadr (SCM *tok_buf,SCM port,SCM *copy)
263 {
264 int c;
265 scm_sizet j;
266 SCM p;
267
268 tryagain:
269 c = scm_flush_ws (port, s_scm_read);
270 tryagain_no_flush_ws:
271 switch (c)
272 {
273 case EOF:
274 return SCM_EOF_VAL;
275
276 case '(':
277 return SCM_RECORD_POSITIONS_P
278 ? scm_lreadrecparen (tok_buf, port, s_list, copy)
279 : scm_lreadparen (tok_buf, port, s_list, copy);
280 case ')':
281 scm_wta (SCM_UNDEFINED, "unexpected \")\"", "read");
282 goto tryagain;
283
284 case '\'':
285 p = scm_sym_quote;
286 goto recquote;
287 case '`':
288 p = scm_sym_quasiquote;
289 goto recquote;
290 case ',':
291 c = scm_getc (port);
292 if ('@' == c)
293 p = scm_sym_uq_splicing;
294 else
295 {
296 scm_ungetc (c, port);
297 p = scm_sym_unquote;
298 }
299 recquote:
300 p = scm_cons2 (p,
301 scm_lreadr (tok_buf, port, copy),
302 SCM_EOL);
303 if (SCM_RECORD_POSITIONS_P)
304 scm_whash_insert (scm_source_whash,
305 p,
306 scm_make_srcprops (SCM_LINUM (port),
307 SCM_COL (port) - 1,
308 SCM_FILENAME (port),
309 SCM_COPY_SOURCE_P
310 ? (*copy = scm_cons2 (SCM_CAR (p),
311 SCM_CAR (SCM_CDR (p)),
312 SCM_EOL))
313 : SCM_UNDEFINED,
314 SCM_EOL));
315 return p;
316 case '#':
317 c = scm_getc (port);
318 switch (c)
319 {
320 case '(':
321 p = scm_lreadparen (tok_buf, port, "vector", copy);
322 return SCM_NULLP (p) ? scm_nullvect : scm_vector (p);
323
324 case 't':
325 case 'T':
326 return SCM_BOOL_T;
327 case 'f':
328 case 'F':
329 return SCM_BOOL_F;
330
331 case 'b':
332 case 'B':
333 case 'o':
334 case 'O':
335 case 'd':
336 case 'D':
337 case 'x':
338 case 'X':
339 case 'i':
340 case 'I':
341 case 'e':
342 case 'E':
343 scm_ungetc (c, port);
344 c = '#';
345 goto num;
346
347 case '!':
348 /* start of a shell script. Parse as a block comment,
349 terminated by !#, just like SCSH. */
350 skip_scsh_block_comment (port);
351 /* EOF is not an error here */
352 c = scm_flush_ws (port, (char *)NULL);
353 goto tryagain_no_flush_ws;
354
355 #ifdef HAVE_ARRAYS
356 case '*':
357 j = scm_read_token (c, tok_buf, port, 0);
358 p = scm_istr2bve (SCM_CHARS (*tok_buf) + 1, (long) (j - 1));
359 if (SCM_NFALSEP (p))
360 return p;
361 else
362 goto unkshrp;
363 #endif
364
365 case '{':
366 j = scm_read_token (c, tok_buf, port, 1);
367 p = scm_intern (SCM_CHARS (*tok_buf), j);
368 return SCM_CAR (p);
369
370 case '\\':
371 c = scm_getc (port);
372 j = scm_read_token (c, tok_buf, port, 0);
373 if (j == 1)
374 return SCM_MAKE_CHAR (c);
375 if (c >= '0' && c < '8')
376 {
377 p = scm_istr2int (SCM_CHARS (*tok_buf), (long) j, 8);
378 if (SCM_NFALSEP (p))
379 return SCM_MAKE_CHAR (SCM_INUM (p));
380 }
381 for (c = 0; c < scm_n_charnames; c++)
382 if (scm_charnames[c]
383 && (scm_casei_streq (scm_charnames[c], SCM_CHARS (*tok_buf))))
384 return SCM_MAKE_CHAR (scm_charnums[c]);
385 scm_wta (SCM_UNDEFINED, "unknown # object: #\\", SCM_CHARS (*tok_buf));
386
387 /* #:SYMBOL is a syntax for keywords supported in all contexts. */
388 case ':':
389 j = scm_read_token ('-', tok_buf, port, 0);
390 p = scm_intern (SCM_CHARS (*tok_buf), j);
391 return scm_make_keyword_from_dash_symbol (SCM_CAR (p));
392
393 default:
394 callshrp:
395 {
396 SCM sharp = scm_get_hash_procedure (c);
397
398 if (SCM_NIMP (sharp))
399 {
400 int line = SCM_LINUM (port);
401 int column = SCM_COL (port) - 2;
402 SCM got;
403
404 got = scm_apply (sharp,
405 SCM_MAKE_CHAR (c),
406 scm_acons (port, SCM_EOL, SCM_EOL));
407 if (SCM_UNSPECIFIED == got)
408 goto unkshrp;
409 if (SCM_RECORD_POSITIONS_P)
410 return *copy = recsexpr (got, line, column,
411 SCM_FILENAME (port));
412 else
413 return got;
414 }
415 }
416 unkshrp:
417 scm_misc_error (s_scm_read, "Unknown # object: ~S",
418 scm_listify (SCM_MAKE_CHAR (c), SCM_UNDEFINED));
419 }
420
421 case '"':
422 j = 0;
423 while ('"' != (c = scm_getc (port)))
424 {
425 SCM_ASSERT (EOF != c, SCM_UNDEFINED, "end of file in ", "string");
426
427 while (j + 2 >= SCM_LENGTH (*tok_buf))
428 scm_grow_tok_buf (tok_buf);
429
430 if (c == '\\')
431 switch (c = scm_getc (port))
432 {
433 case '\n':
434 continue;
435 case '0':
436 c = '\0';
437 break;
438 case 'f':
439 c = '\f';
440 break;
441 case 'n':
442 c = '\n';
443 break;
444 case 'r':
445 c = '\r';
446 break;
447 case 't':
448 c = '\t';
449 break;
450 case 'a':
451 c = '\007';
452 break;
453 case 'v':
454 c = '\v';
455 break;
456 }
457 SCM_CHARS (*tok_buf)[j] = c;
458 ++j;
459 }
460 if (j == 0)
461 return scm_nullstr;
462 SCM_CHARS (*tok_buf)[j] = 0;
463 {
464 SCM str;
465 str = scm_makfromstr (SCM_CHARS (*tok_buf), j, 0);
466 return str;
467 }
468
469 case'0':case '1':case '2':case '3':case '4':
470 case '5':case '6':case '7':case '8':case '9':
471 case '.':
472 case '-':
473 case '+':
474 num:
475 j = scm_read_token (c, tok_buf, port, 0);
476 p = scm_istring2number (SCM_CHARS (*tok_buf), (long) j, 10L);
477 if (SCM_NFALSEP (p))
478 return p;
479 if (c == '#')
480 {
481 if ((j == 2) && (scm_getc (port) == '('))
482 {
483 scm_ungetc ('(', port);
484 c = SCM_CHARS (*tok_buf)[1];
485 goto callshrp;
486 }
487 scm_wta (SCM_UNDEFINED, "unknown # object", SCM_CHARS (*tok_buf));
488 }
489 goto tok;
490
491 case ':':
492 if (SCM_ASSCM (SCM_KEYWORD_STYLE) == scm_keyword_prefix)
493 {
494 j = scm_read_token ('-', tok_buf, port, 0);
495 p = scm_intern (SCM_CHARS (*tok_buf), j);
496 return scm_make_keyword_from_dash_symbol (SCM_CAR (p));
497 }
498 /* fallthrough */
499 default:
500 j = scm_read_token (c, tok_buf, port, 0);
501 /* fallthrough */
502
503 tok:
504 p = scm_intern (SCM_CHARS (*tok_buf), j);
505 return SCM_CAR (p);
506 }
507 }
508
509 #ifdef _UNICOS
510 _Pragma ("noopt"); /* # pragma _CRI noopt */
511 #endif
512
513 scm_sizet
514 scm_read_token (int ic, SCM *tok_buf, SCM port, int weird)
515 {
516 register scm_sizet j;
517 register int c;
518 register char *p;
519
520 c = (SCM_CASE_INSENSITIVE_P ? scm_downcase(ic) : ic);
521 p = SCM_CHARS (*tok_buf);
522
523 if (weird)
524 j = 0;
525 else
526 {
527 j = 0;
528 while (j + 2 >= SCM_LENGTH (*tok_buf))
529 p = scm_grow_tok_buf (tok_buf);
530 p[j] = c;
531 ++j;
532 }
533
534 while (1)
535 {
536 while (j + 2 >= SCM_LENGTH (*tok_buf))
537 p = scm_grow_tok_buf (tok_buf);
538 c = scm_getc (port);
539 switch (c)
540 {
541 case '(':
542 case ')':
543 case '"':
544 case ';':
545 case SCM_WHITE_SPACES:
546 case SCM_LINE_INCREMENTORS:
547 if (weird)
548 goto default_case;
549
550 scm_ungetc (c, port);
551 case EOF:
552 eof_case:
553 p[j] = 0;
554 return j;
555 case '\\':
556 if (!weird)
557 goto default_case;
558 else
559 {
560 c = scm_getc (port);
561 if (c == EOF)
562 goto eof_case;
563 else
564 goto default_case;
565 }
566 case '}':
567 if (!weird)
568 goto default_case;
569
570 c = scm_getc (port);
571 if (c == '#')
572 {
573 p[j] = 0;
574 return j;
575 }
576 else
577 {
578 scm_ungetc (c, port);
579 c = '}';
580 goto default_case;
581 }
582
583 default:
584 default_case:
585 {
586 c = (SCM_CASE_INSENSITIVE_P ? scm_downcase(c) : c);
587 p[j] = c;
588 ++j;
589 }
590
591 }
592 }
593 }
594
595 #ifdef _UNICOS
596 _Pragma ("opt"); /* # pragma _CRI opt */
597 #endif
598
599 SCM
600 scm_lreadparen (SCM *tok_buf, SCM port, char *name, SCM *copy)
601 {
602 SCM tmp;
603 SCM tl;
604 SCM ans;
605 int c;
606
607 c = scm_flush_ws (port, name);
608 if (')' == c)
609 return SCM_EOL;
610 scm_ungetc (c, port);
611 if (scm_sym_dot == (tmp = scm_lreadr (tok_buf, port, copy)))
612 {
613 ans = scm_lreadr (tok_buf, port, copy);
614 closeit:
615 if (')' != (c = scm_flush_ws (port, name)))
616 scm_wta (SCM_UNDEFINED, "missing close paren", "");
617 return ans;
618 }
619 ans = tl = scm_cons (tmp, SCM_EOL);
620 while (')' != (c = scm_flush_ws (port, name)))
621 {
622 scm_ungetc (c, port);
623 if (scm_sym_dot == (tmp = scm_lreadr (tok_buf, port, copy)))
624 {
625 SCM_SETCDR (tl, scm_lreadr (tok_buf, port, copy));
626 goto closeit;
627 }
628 SCM_SETCDR (tl, scm_cons (tmp, SCM_EOL));
629 tl = SCM_CDR (tl);
630 }
631 return ans;
632 }
633
634
635 SCM
636 scm_lreadrecparen (SCM *tok_buf, SCM port, char *name, SCM *copy)
637 {
638 register int c;
639 register SCM tmp;
640 register SCM tl, tl2 = SCM_EOL;
641 SCM ans, ans2 = SCM_EOL;
642 /* Need to capture line and column numbers here. */
643 int line = SCM_LINUM (port);
644 int column = SCM_COL (port) - 1;
645
646 c = scm_flush_ws (port, name);
647 if (')' == c)
648 return SCM_EOL;
649 scm_ungetc (c, port);
650 if (scm_sym_dot == (tmp = scm_lreadr (tok_buf, port, copy)))
651 {
652 ans = scm_lreadr (tok_buf, port, copy);
653 if (')' != (c = scm_flush_ws (port, name)))
654 scm_wta (SCM_UNDEFINED, "missing close paren", "");
655 return ans;
656 }
657 /* Build the head of the list structure. */
658 ans = tl = scm_cons (tmp, SCM_EOL);
659 if (SCM_COPY_SOURCE_P)
660 ans2 = tl2 = scm_cons (SCM_CONSP (tmp)
661 ? *copy
662 : tmp,
663 SCM_EOL);
664 while (')' != (c = scm_flush_ws (port, name)))
665 {
666 scm_ungetc (c, port);
667 if (scm_sym_dot == (tmp = scm_lreadr (tok_buf, port, copy)))
668 {
669 SCM_SETCDR (tl, tmp = scm_lreadr (tok_buf, port, copy));
670 if (SCM_COPY_SOURCE_P)
671 SCM_SETCDR (tl2, scm_cons (SCM_CONSP (tmp)
672 ? *copy
673 : tmp,
674 SCM_EOL));
675 if (')' != (c = scm_flush_ws (port, name)))
676 scm_wta (SCM_UNDEFINED, "missing close paren", "");
677 goto exit;
678 }
679 tl = SCM_SETCDR (tl, scm_cons (tmp, SCM_EOL));
680 if (SCM_COPY_SOURCE_P)
681 tl2 = SCM_SETCDR (tl2, scm_cons (SCM_CONSP (tmp)
682 ? *copy
683 : tmp,
684 SCM_EOL));
685 }
686 exit:
687 scm_whash_insert (scm_source_whash,
688 ans,
689 scm_make_srcprops (line,
690 column,
691 SCM_FILENAME (port),
692 SCM_COPY_SOURCE_P
693 ? *copy = ans2
694 : SCM_UNDEFINED,
695 SCM_EOL));
696 return ans;
697 }
698
699
700 \f
701
702 /* Manipulate the read-hash-procedures alist. This could be written in
703 Scheme, but maybe it will also be used by C code during initialisation. */
704 SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
705 (SCM chr, SCM proc),
706 "")
707 #define FUNC_NAME s_scm_read_hash_extend
708 {
709 SCM this;
710 SCM prev;
711
712 SCM_VALIDATE_CHAR (1,chr);
713 SCM_ASSERT (SCM_FALSEP (proc) || SCM_NIMP(proc), proc, SCM_ARG2,
714 FUNC_NAME);
715
716 /* Check if chr is already in the alist. */
717 this = *scm_read_hash_procedures;
718 prev = SCM_BOOL_F;
719 while (1)
720 {
721 if (SCM_NULLP (this))
722 {
723 /* not found, so add it to the beginning. */
724 if (SCM_NFALSEP (proc))
725 {
726 *scm_read_hash_procedures =
727 scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
728 }
729 break;
730 }
731 if (chr == SCM_CAAR (this))
732 {
733 /* already in the alist. */
734 if (SCM_FALSEP (proc))
735 {
736 /* remove it. */
737 if (prev == SCM_BOOL_F)
738 {
739 *scm_read_hash_procedures =
740 SCM_CDR (*scm_read_hash_procedures);
741 }
742 else
743 scm_set_cdr_x (prev, SCM_CDR (this));
744 }
745 else
746 {
747 /* replace it. */
748 scm_set_cdr_x (SCM_CAR (this), proc);
749 }
750 break;
751 }
752 prev = this;
753 this = SCM_CDR (this);
754 }
755
756 return SCM_UNSPECIFIED;
757 }
758 #undef FUNC_NAME
759
760 /* Recover the read-hash procedure corresponding to char c. */
761 static SCM
762 scm_get_hash_procedure (int c)
763 {
764 SCM rest = *scm_read_hash_procedures;
765
766 while (1)
767 {
768 if (SCM_NULLP (rest))
769 return SCM_BOOL_F;
770
771 if (SCM_CHAR (SCM_CAAR (rest)) == c)
772 return SCM_CDAR (rest);
773
774 rest = SCM_CDR (rest);
775 }
776 }
777
778 void
779 scm_init_read ()
780 {
781 scm_read_hash_procedures =
782 SCM_CDRLOC (scm_sysintern ("read-hash-procedures", SCM_EOL));
783
784 scm_init_opts (scm_read_options, scm_read_opts, SCM_N_READ_OPTIONS);
785 #include "read.x"
786 }