* alist.c, arbiters.c, continuations.c, debug.c, debug.h, eval.c,
[bpt/guile.git] / libguile / read.c
1 /* Copyright (C) 1995,1996 Free Software Foundation, Inc.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
6 * any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
16 *
17 * As a special exception, the Free Software Foundation gives permission
18 * for additional uses of the text contained in its release of GUILE.
19 *
20 * The exception is that, if you link the GUILE library with other files
21 * to produce an executable, this does not by itself cause the
22 * resulting executable to be covered by the GNU General Public License.
23 * Your use of that executable is in no way restricted on account of
24 * linking the GUILE library code into it.
25 *
26 * This exception does not however invalidate any other reasons why
27 * the executable file might be covered by the GNU General Public License.
28 *
29 * This exception applies only to the code released by the
30 * Free Software Foundation under the name GUILE. If you copy
31 * code from other Free Software Foundation releases into a copy of
32 * GUILE, as the General Public License permits, the exception does
33 * not apply to the code that you add in this way. To avoid misleading
34 * anyone as to the status of such modified files, you must delete
35 * this exception notice from them.
36 *
37 * If you write modifications of your own for GUILE, it is your choice
38 * whether to permit this exception to apply to your modifications.
39 * If you do not wish that, delete this exception notice.
40 */
41 \f
42
43 #include "extchrs.h"
44 #include <stdio.h>
45 #include "_scm.h"
46 #include "chars.h"
47 #include "genio.h"
48 #include "eval.h"
49 #include "unif.h"
50 #include "mbstrings.h"
51 #include "kw.h"
52 #include "alist.h"
53 #include "srcprop.h"
54 #include "hashtab.h"
55 #include "hash.h"
56
57 #include "read.h"
58
59 \f
60
61 #define default_case_i 0
62
63 \f
64
65 scm_option scm_read_opts[] = {
66 { SCM_OPTION_BOOLEAN, "copy", 0,
67 "Copy source code expressions." },
68 { SCM_OPTION_BOOLEAN, "positions", 1,
69 "Record positions of source code expressions." }
70 };
71
72 SCM_PROC (s_read_options, "read-options-interface", 0, 1, 0, scm_read_options);
73
74 SCM
75 scm_read_options (setting)
76 SCM setting;
77 {
78 SCM ans = scm_options (setting,
79 scm_read_opts,
80 SCM_N_READ_OPTIONS,
81 s_read_options);
82 if (SCM_COPY_SOURCE_P)
83 SCM_RECORD_POSITIONS_P = 1;
84 return ans;
85 }
86
87 SCM_PROC (s_read, "read", 0, 3, 0, scm_read);
88
89 SCM
90 scm_read (port, case_insensitive_p, sharp)
91 SCM port;
92 SCM case_insensitive_p;
93 SCM sharp;
94 {
95 int c;
96 SCM tok_buf, copy;
97 int case_i;
98
99 if (SCM_UNBNDP (port))
100 port = scm_cur_inp;
101 else
102 SCM_ASSERT (SCM_NIMP (port) && SCM_OPINPORTP (port),
103 port,
104 SCM_ARG1,
105 s_read);
106
107 case_i = (SCM_UNBNDP (case_insensitive_p)
108 ? default_case_i
109 : (case_insensitive_p == SCM_BOOL_F));
110
111 if (SCM_UNBNDP (sharp))
112 sharp = SCM_BOOL_F;
113
114 c = scm_flush_ws (port, (char *) NULL);
115 if (EOF == c)
116 return SCM_EOF_VAL;
117 scm_gen_ungetc (c, port);
118
119 tok_buf = scm_makstr (30L, 0);
120 return scm_lreadr (&tok_buf, port, case_i, sharp, &copy);
121 }
122
123
124
125 char *
126 scm_grow_tok_buf (tok_buf)
127 SCM * tok_buf;
128 {
129 scm_vector_set_length_x (*tok_buf, SCM_MAKINUM (2 * SCM_LENGTH (*tok_buf)));
130 return SCM_CHARS (*tok_buf);
131 }
132
133
134
135 int
136 scm_flush_ws (port, eoferr)
137 SCM port;
138 char *eoferr;
139 {
140 register int c;
141 while (1)
142 switch (c = scm_gen_getc (port))
143 {
144 case EOF:
145 goteof:
146 if (eoferr)
147 scm_wta (SCM_UNDEFINED, "end of file in ", eoferr);
148 return c;
149 case ';':
150 lp:
151 switch (c = scm_gen_getc (port))
152 {
153 case EOF:
154 goto goteof;
155 default:
156 goto lp;
157 case SCM_LINE_INCREMENTORS:
158 break;
159 }
160 break;
161 case SCM_LINE_INCREMENTORS:
162 case SCM_SINGLE_SPACES:
163 case '\t':
164 break;
165 default:
166 return c;
167 }
168 }
169
170
171
172 int
173 scm_casei_streq (s1, s2)
174 char * s1;
175 char * s2;
176 {
177 while (*s1 && *s2)
178 if (scm_downcase((int)*s1) != scm_downcase((int)*s2))
179 return 0;
180 else
181 {
182 ++s1;
183 ++s2;
184 }
185 return !(*s1 || *s2);
186 }
187
188
189 /* recsexpr is used when recording expressions
190 * constructed by read:sharp.
191 */
192
193 static SCM recsexpr SCM_P ((SCM obj, int line, int column, SCM filename));
194
195 static SCM
196 recsexpr (obj, line, column, filename)
197 SCM obj;
198 int line;
199 int column;
200 SCM filename;
201 {
202 if (SCM_IMP (obj) || SCM_NCONSP(obj))
203 return obj;
204 {
205 SCM tmp = obj, copy;
206 /* If this sexpr is visible in the read:sharp source, we want to
207 keep that information, so only record non-constant cons cells
208 which haven't previously been read by the reader. */
209 if (SCM_FALSEP (scm_whash_lookup (scm_source_whash, obj)))
210 {
211 if (SCM_COPY_SOURCE_P)
212 {
213 copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
214 SCM_UNDEFINED);
215 while (SCM_NIMP (tmp = SCM_CDR (tmp)) && SCM_CONSP (tmp))
216 {
217 SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
218 line,
219 column,
220 filename),
221 SCM_UNDEFINED));
222 copy = SCM_CDR (copy);
223 }
224 SCM_SETCDR (copy, tmp);
225 }
226 else
227 {
228 recsexpr (SCM_CAR (obj), line, column, filename);
229 while (SCM_NIMP (tmp = SCM_CDR (tmp)) && SCM_CONSP (tmp))
230 recsexpr (SCM_CAR (tmp), line, column, filename);
231 copy = SCM_UNDEFINED;
232 }
233 scm_whash_insert (scm_source_whash,
234 obj,
235 scm_make_srcprops (line,
236 column,
237 filename,
238 copy,
239 SCM_EOL));
240 }
241 return obj;
242 }
243 }
244
245 static char s_list[]="list";
246
247 SCM
248 scm_lreadr (tok_buf, port, case_i, sharp, copy)
249 SCM *tok_buf;
250 SCM port;
251 int case_i;
252 SCM sharp;
253 SCM *copy;
254 {
255 int c;
256 scm_sizet j;
257 SCM p;
258
259 tryagain:
260 c = scm_flush_ws (port, s_read);
261 switch (c)
262 {
263 case EOF:
264 return SCM_EOF_VAL;
265
266 case '(':
267 return SCM_RECORD_POSITIONS_P
268 ? scm_lreadrecparen (tok_buf, port, s_list, case_i, sharp, copy)
269 : scm_lreadparen (tok_buf, port, s_list, case_i, sharp, copy);
270 case ')':
271 scm_wta (SCM_UNDEFINED, "unexpected \")\"", "read");
272 goto tryagain;
273
274 case '\'':
275 p = scm_i_quote;
276 goto recquote;
277 case '`':
278 p = scm_i_quasiquote;
279 goto recquote;
280 case ',':
281 c = scm_gen_getc (port);
282 if ('@' == c)
283 p = scm_i_uq_splicing;
284 else
285 {
286 scm_gen_ungetc (c, port);
287 p = scm_i_unquote;
288 }
289 recquote:
290 p = scm_cons2 (p,
291 scm_lreadr (tok_buf, port, case_i, sharp, copy),
292 SCM_EOL);
293 if (SCM_RECORD_POSITIONS_P)
294 scm_whash_insert (scm_source_whash,
295 p,
296 scm_make_srcprops (SCM_LINUM (port),
297 SCM_COL (port) - 1,
298 SCM_FILENAME (port),
299 SCM_COPY_SOURCE_P
300 ? (*copy = scm_cons2 (SCM_CAR (p),
301 SCM_CAR (SCM_CDR (p)),
302 SCM_EOL))
303 : SCM_UNDEFINED,
304 SCM_EOL));
305 return p;
306 case '#':
307 c = scm_gen_getc (port);
308 switch (c)
309 {
310 case '(':
311 p = scm_lreadparen (tok_buf, port, "vector", case_i, sharp, copy);
312 return SCM_NULLP (p) ? scm_nullvect : scm_vector (p);
313
314 case 't':
315 case 'T':
316 return SCM_BOOL_T;
317 case 'f':
318 case 'F':
319 return SCM_BOOL_F;
320
321 case 'b':
322 case 'B':
323 case 'o':
324 case 'O':
325 case 'd':
326 case 'D':
327 case 'x':
328 case 'X':
329 case 'i':
330 case 'I':
331 case 'e':
332 case 'E':
333 scm_gen_ungetc (c, port);
334 c = '#';
335 goto num;
336
337 case '*':
338 j = scm_read_token (c, tok_buf, port, case_i, 0);
339 p = scm_istr2bve (SCM_CHARS (*tok_buf) + 1, (long) (j - 1));
340 if (SCM_NFALSEP (p))
341 return p;
342 else
343 goto unkshrp;
344
345 case '{':
346 j = scm_read_token (c, tok_buf, port, case_i, 1);
347 p = scm_intern (SCM_CHARS (*tok_buf), j);
348 if (SCM_PORT_REPRESENTATION (port) != scm_regular_port)
349 scm_set_symbol_multi_byte_x (SCM_CAR (p), SCM_BOOL_T);
350 return SCM_CAR (p);
351
352 case '\\':
353 c = scm_gen_getc (port);
354 j = scm_read_token (c, tok_buf, port, case_i, 0);
355 if (j == 1)
356 return SCM_MAKICHR (c);
357 if (c >= '0' && c < '8')
358 {
359 p = scm_istr2int (SCM_CHARS (*tok_buf), (long) j, 8);
360 if (SCM_NFALSEP (p))
361 return SCM_MAKICHR (SCM_INUM (p));
362 }
363 for (c = 0; c < scm_n_charnames; c++)
364 if (scm_charnames[c]
365 && (scm_casei_streq (scm_charnames[c], SCM_CHARS (*tok_buf))))
366 return SCM_MAKICHR (scm_charnums[c]);
367 scm_wta (SCM_UNDEFINED, "unknown # object: #\\", SCM_CHARS (*tok_buf));
368
369
370 default:
371 callshrp:
372 if (SCM_NIMP (sharp))
373 {
374 int line = SCM_LINUM (port);
375 int column = SCM_COL (port) - 2;
376 SCM got;
377 got = scm_apply (sharp,
378 SCM_MAKICHR (c),
379 scm_acons (port, SCM_EOL, SCM_EOL));
380 if (SCM_UNSPECIFIED == got)
381 goto unkshrp;
382 if (SCM_RECORD_POSITIONS_P)
383 return *copy = recsexpr (got, line, column,
384 SCM_FILENAME (port));
385 else
386 return got;
387 }
388 unkshrp:scm_wta ((SCM) SCM_MAKICHR (c), "unknown # object", "");
389 }
390
391 case '"':
392 j = 0;
393 while ('"' != (c = scm_gen_getc (port)))
394 {
395 SCM_ASSERT (EOF != c, SCM_UNDEFINED, "end of file in ", "string");
396
397 while (j + sizeof(xwchar_t) + XMB_CUR_MAX >= SCM_LENGTH (*tok_buf))
398 scm_grow_tok_buf (tok_buf);
399
400 if (c == '\\')
401 switch (c = scm_gen_getc (port))
402 {
403 case '\n':
404 continue;
405 case '0':
406 c = '\0';
407 break;
408 case 'f':
409 c = '\f';
410 break;
411 case 'n':
412 c = '\n';
413 break;
414 case 'r':
415 c = '\r';
416 break;
417 case 't':
418 c = '\t';
419 break;
420 case 'a':
421 c = '\007';
422 break;
423 case 'v':
424 c = '\v';
425 break;
426 }
427 if (SCM_PORT_REPRESENTATION(port) == scm_regular_port)
428 {
429 SCM_CHARS (*tok_buf)[j] = c;
430 ++j;
431 }
432 else
433 {
434 int len;
435 len = xwctomb (SCM_CHARS (*tok_buf) + j, c);
436 if (len == 0)
437 len = 1;
438 SCM_ASSERT (len > 0, SCM_MAKINUM (c), "bogus char", "read");
439 j += len;
440 }
441 }
442 if (j == 0)
443 return scm_nullstr;
444 SCM_CHARS (*tok_buf)[j] = 0;
445 {
446 SCM str;
447 str = scm_makfromstr (SCM_CHARS (*tok_buf), j, 0);
448 if (SCM_PORT_REPRESENTATION(port) != scm_regular_port)
449 {
450 SCM_SETLENGTH (str, SCM_LENGTH (str), scm_tc7_mb_string);
451 }
452 return str;
453 }
454
455 case'0':case '1':case '2':case '3':case '4':
456 case '5':case '6':case '7':case '8':case '9':
457 case '.':
458 case '-':
459 case '+':
460 num:
461 j = scm_read_token (c, tok_buf, port, case_i, 0);
462 p = scm_istring2number (SCM_CHARS (*tok_buf), (long) j, 10L);
463 if (SCM_NFALSEP (p))
464 return p;
465 if (c == '#')
466 {
467 if ((j == 2) && (scm_gen_getc (port) == '('))
468 {
469 scm_gen_ungetc ('(', port);
470 c = SCM_CHARS (*tok_buf)[1];
471 goto callshrp;
472 }
473 scm_wta (SCM_UNDEFINED, "unknown # object", SCM_CHARS (*tok_buf));
474 }
475 goto tok;
476
477 case ':':
478 j = scm_read_token ('-', tok_buf, port, case_i, 0);
479 p = scm_intern (SCM_CHARS (*tok_buf), j);
480 if (SCM_PORT_REPRESENTATION (port) != scm_regular_port)
481 scm_set_symbol_multi_byte_x (SCM_CAR (p), SCM_BOOL_T);
482 return scm_make_keyword_from_dash_symbol (SCM_CAR (p));
483
484 default:
485 j = scm_read_token (c, tok_buf, port, case_i, 0);
486 /* fallthrough */
487
488 tok:
489 p = scm_intern (SCM_CHARS (*tok_buf), j);
490 if (SCM_PORT_REPRESENTATION (port) != scm_regular_port)
491 scm_set_symbol_multi_byte_x (SCM_CAR (p), SCM_BOOL_T);
492 return SCM_CAR (p);
493 }
494 }
495
496 #ifdef _UNICOS
497 _Pragma ("noopt"); /* # pragma _CRI noopt */
498 #endif
499
500 scm_sizet
501 scm_read_token (ic, tok_buf, port, case_i, weird)
502 int ic;
503 SCM *tok_buf;
504 SCM port;
505 int case_i;
506 int weird;
507 {
508 register scm_sizet j;
509 register int c;
510 register char *p;
511
512 c = ic;
513 p = SCM_CHARS (*tok_buf);
514
515 if (weird)
516 j = 0;
517 else
518 {
519 j = 0;
520 while (j + sizeof(xwchar_t) + XMB_CUR_MAX >= SCM_LENGTH (*tok_buf))
521 p = scm_grow_tok_buf (tok_buf);
522 if (SCM_PORT_REPRESENTATION(port) == scm_regular_port)
523 {
524 p[j] = c;
525 ++j;
526 }
527 else
528 {
529 int len;
530 len = xwctomb (p + j, c);
531 if (len == 0)
532 len = 1;
533 SCM_ASSERT (len > 0, SCM_MAKINUM (c), "bogus char", "read");
534 j += len;
535 }
536 }
537
538 while (1)
539 {
540 while (j + sizeof(xwchar_t) + XMB_CUR_MAX >= SCM_LENGTH (*tok_buf))
541 p = scm_grow_tok_buf (tok_buf);
542 c = scm_gen_getc (port);
543 switch (c)
544 {
545 case '(':
546 case ')':
547 case '"':
548 case ';':
549 case SCM_WHITE_SPACES:
550 case SCM_LINE_INCREMENTORS:
551 if (weird)
552 goto default_case;
553
554 scm_gen_ungetc (c, port);
555 case EOF:
556 eof_case:
557 p[j] = 0;
558 return j;
559 case '\\':
560 if (!weird)
561 goto default_case;
562 else
563 {
564 c = scm_gen_getc (port);
565 if (c == EOF)
566 goto eof_case;
567 else
568 goto default_case;
569 }
570 case '}':
571 if (!weird)
572 goto default_case;
573
574 c = scm_gen_getc (port);
575 if (c == '#')
576 {
577 p[j] = 0;
578 return j;
579 }
580 else
581 {
582 scm_gen_ungetc (c, port);
583 c = '}';
584 goto default_case;
585 }
586
587 default:
588 default_case:
589 {
590 c = (case_i ? scm_downcase(c) : c);
591 if (SCM_PORT_REPRESENTATION(port) == scm_regular_port)
592 {
593 p[j] = c;
594 ++j;
595 }
596 else
597 {
598 int len;
599 len = xwctomb (p + j, c);
600 if (len == 0)
601 len = 1;
602 SCM_ASSERT (len > 0, SCM_MAKINUM (c), "bogus char", "read");
603 j += len;
604 }
605 }
606
607 }
608 }
609 }
610
611 #ifdef _UNICOS
612 _Pragma ("opt"); /* # pragma _CRI opt */
613 #endif
614
615 SCM
616 scm_lreadparen (tok_buf, port, name, case_i, sharp, copy)
617 SCM *tok_buf;
618 SCM port;
619 char *name;
620 int case_i;
621 SCM sharp;
622 SCM *copy;
623 {
624 SCM tmp;
625 SCM tl;
626 SCM ans;
627 int c;
628
629 c = scm_flush_ws (port, name);
630 if (')' == c)
631 return SCM_EOL;
632 scm_gen_ungetc (c, port);
633 if (scm_i_dot == (tmp = scm_lreadr (tok_buf, port, case_i, sharp, copy)))
634 {
635 ans = scm_lreadr (tok_buf, port, case_i, sharp, copy);
636 closeit:
637 if (')' != (c = scm_flush_ws (port, name)))
638 scm_wta (SCM_UNDEFINED, "missing close paren", "");
639 return ans;
640 }
641 ans = tl = scm_cons (tmp, SCM_EOL);
642 while (')' != (c = scm_flush_ws (port, name)))
643 {
644 scm_gen_ungetc (c, port);
645 if (scm_i_dot == (tmp = scm_lreadr (tok_buf, port, case_i, sharp, copy)))
646 {
647 SCM_SETCDR (tl, scm_lreadr (tok_buf, port, case_i, sharp, copy));
648 goto closeit;
649 }
650 SCM_SETCDR (tl, scm_cons (tmp, SCM_EOL));
651 tl = SCM_CDR (tl);
652 }
653 return ans;
654 }
655
656
657 SCM
658 scm_lreadrecparen (tok_buf, port, name, case_i, sharp, copy)
659 SCM *tok_buf;
660 SCM port;
661 char *name;
662 int case_i;
663 SCM sharp;
664 SCM *copy;
665 {
666 register int c;
667 register SCM tmp;
668 register SCM tl, tl2;
669 SCM ans, ans2;
670 /* Need to capture line and column numbers here. */
671 int line = SCM_LINUM (port);
672 int column = SCM_COL (port) - 1;
673
674 c = scm_flush_ws (port, name);
675 if (')' == c)
676 return SCM_EOL;
677 scm_gen_ungetc (c, port);
678 if (scm_i_dot == (tmp = scm_lreadr (tok_buf, port, case_i, sharp, copy)))
679 {
680 ans = scm_lreadr (tok_buf, port, case_i, sharp, copy);
681 if (')' != (c = scm_flush_ws (port, name)))
682 scm_wta (SCM_UNDEFINED, "missing close paren", "");
683 return ans;
684 }
685 /* Build the head of the list structure. */
686 ans = tl = scm_cons (tmp, SCM_EOL);
687 if (SCM_COPY_SOURCE_P)
688 ans2 = tl2 = scm_cons (SCM_NIMP (tmp) && SCM_CONSP (tmp)
689 ? *copy
690 : tmp,
691 SCM_EOL);
692 while (')' != (c = scm_flush_ws (port, name)))
693 {
694 scm_gen_ungetc (c, port);
695 if (scm_i_dot == (tmp = scm_lreadr (tok_buf, port, case_i, sharp, copy)))
696 {
697 SCM_SETCDR (tl, tmp = scm_lreadr (tok_buf, port, case_i, sharp, copy));
698 if (SCM_COPY_SOURCE_P)
699 SCM_SETCDR (tl2, scm_cons (SCM_NIMP (tmp) && SCM_CONSP (tmp)
700 ? *copy
701 : tmp,
702 SCM_EOL));
703 if (')' != (c = scm_flush_ws (port, name)))
704 scm_wta (SCM_UNDEFINED, "missing close paren", "");
705 goto exit;
706 }
707 tl = SCM_SETCDR (tl, scm_cons (tmp, SCM_EOL));
708 if (SCM_COPY_SOURCE_P)
709 tl2 = SCM_SETCDR (tl2, scm_cons (SCM_NIMP (tmp) && SCM_CONSP (tmp)
710 ? *copy
711 : tmp,
712 SCM_EOL));
713 }
714 exit:
715 scm_whash_insert (scm_source_whash,
716 ans,
717 scm_make_srcprops (line,
718 column,
719 SCM_FILENAME (port),
720 SCM_COPY_SOURCE_P
721 ? *copy = ans2
722 : SCM_UNDEFINED,
723 SCM_EOL));
724 return ans;
725 }
726
727
728 \f
729
730
731
732 void
733 scm_init_read ()
734 {
735 scm_init_opts (scm_read_options, scm_read_opts, SCM_N_READ_OPTIONS);
736 #include "read.x"
737 }