* read.c: Added code for recording of positions of source code
[bpt/guile.git] / libguile / read.c
1 /* Copyright (C) 1995,1996 Free Software Foundation, Inc.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
6 * any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
16 *
17 * As a special exception, the Free Software Foundation gives permission
18 * for additional uses of the text contained in its release of GUILE.
19 *
20 * The exception is that, if you link the GUILE library with other files
21 * to produce an executable, this does not by itself cause the
22 * resulting executable to be covered by the GNU General Public License.
23 * Your use of that executable is in no way restricted on account of
24 * linking the GUILE library code into it.
25 *
26 * This exception does not however invalidate any other reasons why
27 * the executable file might be covered by the GNU General Public License.
28 *
29 * This exception applies only to the code released by the
30 * Free Software Foundation under the name GUILE. If you copy
31 * code from other Free Software Foundation releases into a copy of
32 * GUILE, as the General Public License permits, the exception does
33 * not apply to the code that you add in this way. To avoid misleading
34 * anyone as to the status of such modified files, you must delete
35 * this exception notice from them.
36 *
37 * If you write modifications of your own for GUILE, it is your choice
38 * whether to permit this exception to apply to your modifications.
39 * If you do not wish that, delete this exception notice.
40 */
41 \f
42
43 #include "extchrs.h"
44 #include <stdio.h>
45 #include "_scm.h"
46 #include "chars.h"
47 #include "genio.h"
48 #include "eval.h"
49 #include "unif.h"
50 #include "mbstrings.h"
51 #include "kw.h"
52 #include "alist.h"
53 #include "srcprop.h"
54 #include "hashtab.h"
55 #include "hash.h"
56
57 #include "read.h"
58
59 \f
60
61 #define default_case_i 0
62
63 \f
64
65 scm_option scm_read_opts[] = {
66 { SCM_OPTION_BOOLEAN, "copy", 0,
67 "Copy source code expressions." },
68 { SCM_OPTION_BOOLEAN, "positions", 0,
69 "Record positions of source code expressions." }
70 };
71
72 SCM_PROC (s_read_options, "read-options-interface", 0, 1, 0, scm_read_options);
73 #ifdef __STDC__
74 SCM
75 scm_read_options (SCM setting)
76 #else
77 SCM
78 scm_read_options (setting)
79 SCM setting;
80 #endif
81 {
82 SCM ans = scm_options (setting,
83 scm_read_opts,
84 SCM_N_READ_OPTIONS,
85 s_read_options);
86 if (SCM_COPY_SOURCE_P)
87 SCM_RECORD_POSITIONS_P = 1;
88 return ans;
89 }
90
91 SCM_PROC (s_read, "read", 0, 3, 0, scm_read);
92 #ifdef __STDC__
93 SCM
94 scm_read (SCM port, SCM case_insensitive_p, SCM sharp)
95 #else
96 SCM
97 scm_read (port, case_insensitive_p, sharp)
98 SCM port;
99 SCM case_insensitive_p;
100 SCM sharp;
101 #endif
102 {
103 int c;
104 SCM tok_buf, copy;
105 int case_i;
106
107 if (SCM_UNBNDP (port))
108 port = scm_cur_inp;
109 else
110 SCM_ASSERT (SCM_NIMP (port) && SCM_OPINPORTP (port),
111 port,
112 SCM_ARG1,
113 s_read);
114
115 case_i = (SCM_UNBNDP (case_insensitive_p)
116 ? default_case_i
117 : (case_insensitive_p == SCM_BOOL_F));
118
119 if (SCM_UNBNDP (sharp))
120 sharp = SCM_BOOL_F;
121
122 c = scm_flush_ws (port, (char *) NULL);
123 if (EOF == c)
124 return SCM_EOF_VAL;
125 scm_gen_ungetc (c, port);
126
127 tok_buf = scm_makstr (30L, 0);
128 return scm_lreadr (&tok_buf, port, case_i, sharp, &copy);
129 }
130
131
132 #ifdef __STDC__
133 char *
134 scm_grow_tok_buf (SCM * tok_buf)
135 #else
136 char *
137 scm_grow_tok_buf (tok_buf)
138 SCM * tok_buf;
139 #endif
140 {
141 scm_vector_set_length_x (*tok_buf, SCM_MAKINUM (2 * SCM_LENGTH (*tok_buf)));
142 return SCM_CHARS (*tok_buf);
143 }
144
145
146 #ifdef __STDC__
147 int
148 scm_flush_ws (SCM port, char *eoferr)
149 #else
150 int
151 scm_flush_ws (port, eoferr)
152 SCM port;
153 char *eoferr;
154 #endif
155 {
156 register int c;
157 while (1)
158 switch (c = scm_gen_getc (port))
159 {
160 case EOF:
161 goteof:
162 if (eoferr)
163 scm_wta (SCM_UNDEFINED, "end of file in ", eoferr);
164 return c;
165 case ';':
166 lp:
167 switch (c = scm_gen_getc (port))
168 {
169 case EOF:
170 goto goteof;
171 default:
172 goto lp;
173 case SCM_LINE_INCREMENTORS:
174 break;
175 }
176 break;
177 case SCM_LINE_INCREMENTORS:
178 case SCM_SINGLE_SPACES:
179 case '\t':
180 break;
181 default:
182 return c;
183 }
184 }
185
186
187 #ifdef __STDC__
188 int
189 scm_casei_streq (char * s1, char * s2)
190 #else
191 int
192 scm_casei_streq (s1, s2)
193 char * s1;
194 char * s2;
195 #endif
196 {
197 while (*s1 && *s2)
198 if (scm_downcase((int)*s1) != scm_downcase((int)*s2))
199 return 0;
200 else
201 {
202 ++s1;
203 ++s2;
204 }
205 return !(*s1 || *s2);
206 }
207
208
209 /* recsexpr is used when recording expressions
210 * constructed by read:sharp.
211 */
212 #ifdef __STDC__
213 static SCM
214 recsexpr (SCM obj, int line, int column, SCM filename)
215 #else
216 static SCM
217 recsexpr (obj, line, column, filename)
218 SCM obj;
219 int line;
220 int column;
221 SCM filename;
222 #endif
223 {
224 if (SCM_IMP (obj) || SCM_NCONSP(obj))
225 return obj;
226 {
227 SCM tmp = obj, copy;
228 /* If this sexpr is visible in the read:sharp source, we want to
229 keep that information, so only record non-constant cons cells
230 which haven't previously been read by the reader. */
231 if (SCM_FALSEP (scm_whash_lookup (scm_source_whash, obj)))
232 {
233 if (SCM_COPY_SOURCE_P)
234 {
235 copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
236 SCM_UNDEFINED);
237 while (SCM_NIMP (tmp = SCM_CDR (tmp)) && SCM_CONSP (tmp))
238 copy = (SCM_CDR (copy) = scm_cons (recsexpr (SCM_CAR (tmp),
239 line,
240 column,
241 filename),
242 SCM_UNDEFINED));
243 SCM_CDR (copy) = tmp;
244 }
245 else
246 {
247 recsexpr (SCM_CAR (obj), line, column, filename);
248 while (SCM_NIMP (tmp = SCM_CDR (tmp)) && SCM_CONSP (tmp))
249 recsexpr (SCM_CAR (tmp), line, column, filename);
250 copy = SCM_UNDEFINED;
251 }
252 scm_whash_insert (scm_source_whash,
253 obj,
254 scm_make_srcprops (line,
255 column,
256 filename,
257 copy,
258 SCM_EOL));
259 }
260 return obj;
261 }
262 }
263
264 static char s_list[]="list";
265 #ifdef __STDC__
266 SCM
267 scm_lreadr (SCM *tok_buf, SCM port, int case_i, SCM sharp, SCM *copy)
268 #else
269 SCM
270 scm_lreadr (tok_buf, port, case_i, sharp, copy)
271 SCM *tok_buf;
272 SCM port;
273 int case_i;
274 SCM sharp;
275 SCM *copy;
276 #endif
277 {
278 int c;
279 scm_sizet j;
280 SCM p;
281
282 tryagain:
283 c = scm_flush_ws (port, s_read);
284 switch (c)
285 {
286 case EOF:
287 return SCM_EOF_VAL;
288
289 case '(':
290 return SCM_RECORD_POSITIONS_P
291 ? scm_lreadrecparen (tok_buf, port, s_list, case_i, sharp, copy)
292 : scm_lreadparen (tok_buf, port, s_list, case_i, sharp, copy);
293 case ')':
294 scm_wta (SCM_UNDEFINED, "unexpected \")\"", "read");
295 goto tryagain;
296
297 case '\'':
298 p = scm_i_quote;
299 goto recquote;
300 case '`':
301 p = scm_i_quasiquote;
302 goto recquote;
303 case ',':
304 c = scm_gen_getc (port);
305 if ('@' == c)
306 p = scm_i_uq_splicing;
307 else
308 {
309 scm_gen_ungetc (c, port);
310 p = scm_i_unquote;
311 }
312 recquote:
313 p = scm_cons2 (p,
314 scm_lreadr (tok_buf, port, case_i, sharp, copy),
315 SCM_EOL);
316 if (SCM_RECORD_POSITIONS_P)
317 scm_whash_insert (scm_source_whash,
318 p,
319 scm_make_srcprops (SCM_LINUM (port),
320 SCM_COL (port) - 1,
321 SCM_FILENAME (port),
322 SCM_COPY_SOURCE_P
323 ? (*copy = scm_cons2 (SCM_CAR (p),
324 SCM_CAR (SCM_CDR (p)),
325 SCM_EOL))
326 : SCM_UNDEFINED,
327 SCM_EOL));
328 return p;
329 case '#':
330 c = scm_gen_getc (port);
331 switch (c)
332 {
333 case '(':
334 p = scm_lreadparen (tok_buf, port, "vector", case_i, sharp, copy);
335 return SCM_NULLP (p) ? scm_nullvect : scm_vector (p);
336
337 case 't':
338 case 'T':
339 return SCM_BOOL_T;
340 case 'f':
341 case 'F':
342 return SCM_BOOL_F;
343
344 case 'b':
345 case 'B':
346 case 'o':
347 case 'O':
348 case 'd':
349 case 'D':
350 case 'x':
351 case 'X':
352 case 'i':
353 case 'I':
354 case 'e':
355 case 'E':
356 scm_gen_ungetc (c, port);
357 c = '#';
358 goto num;
359
360 case '*':
361 j = scm_read_token (c, tok_buf, port, case_i, 0);
362 p = scm_istr2bve (SCM_CHARS (*tok_buf) + 1, (long) (j - 1));
363 if (SCM_NFALSEP (p))
364 return p;
365 else
366 goto unkshrp;
367
368 case '{':
369 j = scm_read_token (c, tok_buf, port, case_i, 1);
370 p = scm_intern (SCM_CHARS (*tok_buf), j);
371 if (SCM_PORT_REPRESENTATION (port) != scm_regular_port)
372 scm_set_symbol_multi_byte_x (SCM_CAR (p), SCM_BOOL_T);
373 return SCM_CAR (p);
374
375 case '\\':
376 c = scm_gen_getc (port);
377 j = scm_read_token (c, tok_buf, port, case_i, 0);
378 if (j == 1)
379 return SCM_MAKICHR (c);
380 if (c >= '0' && c < '8')
381 {
382 p = scm_istr2int (SCM_CHARS (*tok_buf), (long) j, 8);
383 if (SCM_NFALSEP (p))
384 return SCM_MAKICHR (SCM_INUM (p));
385 }
386 for (c = 0; c < scm_n_charnames; c++)
387 if (scm_charnames[c]
388 && (scm_casei_streq (scm_charnames[c], SCM_CHARS (*tok_buf))))
389 return SCM_MAKICHR (scm_charnums[c]);
390 scm_wta (SCM_UNDEFINED, "unknown # object: #\\", SCM_CHARS (*tok_buf));
391
392
393 default:
394 callshrp:
395 if (SCM_NIMP (sharp))
396 {
397 int line = SCM_LINUM (port);
398 int column = SCM_COL (port) - 2;
399 SCM got;
400 got = scm_apply (sharp,
401 SCM_MAKICHR (c),
402 scm_acons (port, SCM_EOL, SCM_EOL));
403 if (SCM_UNSPECIFIED == got)
404 goto unkshrp;
405 if (SCM_RECORD_POSITIONS_P)
406 return *copy = recsexpr (got, line, column,
407 SCM_FILENAME (port));
408 else
409 return got;
410 }
411 unkshrp:scm_wta ((SCM) SCM_MAKICHR (c), "unknown # object", "");
412 }
413
414 case '"':
415 j = 0;
416 while ('"' != (c = scm_gen_getc (port)))
417 {
418 SCM_ASSERT (EOF != c, SCM_UNDEFINED, "end of file in ", "string");
419
420 while (j + sizeof(xwchar_t) + XMB_CUR_MAX >= SCM_LENGTH (*tok_buf))
421 scm_grow_tok_buf (tok_buf);
422
423 if (c == '\\')
424 switch (c = scm_gen_getc (port))
425 {
426 case '\n':
427 continue;
428 case '0':
429 c = '\0';
430 break;
431 case 'f':
432 c = '\f';
433 break;
434 case 'n':
435 c = '\n';
436 break;
437 case 'r':
438 c = '\r';
439 break;
440 case 't':
441 c = '\t';
442 break;
443 case 'a':
444 c = '\007';
445 break;
446 case 'v':
447 c = '\v';
448 break;
449 }
450 if (SCM_PORT_REPRESENTATION(port) == scm_regular_port)
451 {
452 SCM_CHARS (*tok_buf)[j] = c;
453 ++j;
454 }
455 else
456 {
457 int len;
458 len = xwctomb (SCM_CHARS (*tok_buf) + j, c);
459 if (len == 0)
460 len = 1;
461 SCM_ASSERT (len > 0, SCM_MAKINUM (c), "bogus char", "read");
462 j += len;
463 }
464 }
465 if (j == 0)
466 return scm_nullstr;
467 SCM_CHARS (*tok_buf)[j] = 0;
468 {
469 SCM str;
470 str = scm_makfromstr (SCM_CHARS (*tok_buf), j, 0);
471 if (SCM_PORT_REPRESENTATION(port) != scm_regular_port)
472 {
473 SCM_SETLENGTH (str, SCM_LENGTH (str), scm_tc7_mb_string);
474 }
475 return str;
476 }
477
478 case'0':case '1':case '2':case '3':case '4':
479 case '5':case '6':case '7':case '8':case '9':
480 case '.':
481 case '-':
482 case '+':
483 num:
484 j = scm_read_token (c, tok_buf, port, case_i, 0);
485 p = scm_istring2number (SCM_CHARS (*tok_buf), (long) j, 10L);
486 if (SCM_NFALSEP (p))
487 return p;
488 if (c == '#')
489 {
490 if ((j == 2) && (scm_gen_getc (port) == '('))
491 {
492 scm_gen_ungetc ('(', port);
493 c = SCM_CHARS (*tok_buf)[1];
494 goto callshrp;
495 }
496 scm_wta (SCM_UNDEFINED, "unknown # object", SCM_CHARS (*tok_buf));
497 }
498 goto tok;
499
500 case ':':
501 j = scm_read_token ('-', tok_buf, port, case_i, 0);
502 p = scm_intern (SCM_CHARS (*tok_buf), j);
503 if (SCM_PORT_REPRESENTATION (port) != scm_regular_port)
504 scm_set_symbol_multi_byte_x (SCM_CAR (p), SCM_BOOL_T);
505 return scm_make_keyword_from_dash_symbol (SCM_CAR (p));
506
507 default:
508 j = scm_read_token (c, tok_buf, port, case_i, 0);
509 /* fallthrough */
510
511 tok:
512 p = scm_intern (SCM_CHARS (*tok_buf), j);
513 if (SCM_PORT_REPRESENTATION (port) != scm_regular_port)
514 scm_set_symbol_multi_byte_x (SCM_CAR (p), SCM_BOOL_T);
515 return SCM_CAR (p);
516 }
517 }
518
519 #ifdef _UNICOS
520 _Pragma ("noopt"); /* # pragma _CRI noopt */
521 #endif
522 #ifdef __STDC__
523 scm_sizet
524 scm_read_token (int ic, SCM * tok_buf, SCM port, int case_i, int weird)
525 #else
526 scm_sizet
527 scm_read_token (ic, * tok_buf, port, case_i, weird)
528 int ic;
529 SCM *tok_buf;
530 SCM port;
531 int case_i;
532 int weird;
533 #endif
534 {
535 register scm_sizet j;
536 register int c;
537 register char *p;
538
539 c = ic;
540 p = SCM_CHARS (*tok_buf);
541
542 if (weird)
543 j = 0;
544 else
545 {
546 j = 0;
547 while (j + sizeof(xwchar_t) + XMB_CUR_MAX >= SCM_LENGTH (*tok_buf))
548 p = scm_grow_tok_buf (tok_buf);
549 if (SCM_PORT_REPRESENTATION(port) == scm_regular_port)
550 {
551 p[j] = c;
552 ++j;
553 }
554 else
555 {
556 int len;
557 len = xwctomb (p + j, c);
558 if (len == 0)
559 len = 1;
560 SCM_ASSERT (len > 0, SCM_MAKINUM (c), "bogus char", "read");
561 j += len;
562 }
563 }
564
565 while (1)
566 {
567 while (j + sizeof(xwchar_t) + XMB_CUR_MAX >= SCM_LENGTH (*tok_buf))
568 p = scm_grow_tok_buf (tok_buf);
569 c = scm_gen_getc (port);
570 switch (c)
571 {
572 case '(':
573 case ')':
574 case '"':
575 case ';':
576 case SCM_WHITE_SPACES:
577 case SCM_LINE_INCREMENTORS:
578 if (weird)
579 goto default_case;
580
581 scm_gen_ungetc (c, port);
582 case EOF:
583 eof_case:
584 p[j] = 0;
585 return j;
586 case '\\':
587 if (!weird)
588 goto default_case;
589 else
590 {
591 c = scm_gen_getc (port);
592 if (c == EOF)
593 goto eof_case;
594 else
595 goto default_case;
596 }
597 case '}':
598 if (!weird)
599 goto default_case;
600
601 c = scm_gen_getc (port);
602 if (c == '#')
603 {
604 p[j] = 0;
605 return j;
606 }
607 else
608 {
609 scm_gen_ungetc (c, port);
610 c = '}';
611 goto default_case;
612 }
613
614 default:
615 default_case:
616 {
617 c = (case_i ? scm_downcase(c) : c);
618 if (SCM_PORT_REPRESENTATION(port) == scm_regular_port)
619 {
620 p[j] = c;
621 ++j;
622 }
623 else
624 {
625 int len;
626 len = xwctomb (p + j, c);
627 if (len == 0)
628 len = 1;
629 SCM_ASSERT (len > 0, SCM_MAKINUM (c), "bogus char", "read");
630 j += len;
631 }
632 }
633
634 }
635 }
636 }
637 #ifdef _UNICOS
638 _Pragma ("opt"); /* # pragma _CRI opt */
639 #endif
640
641 #ifdef __STDC__
642 SCM
643 scm_lreadparen (SCM * tok_buf, SCM port, char *name, int case_i, SCM sharp, SCM *copy)
644 #else
645 SCM
646 scm_lreadparen (tok_buf, port, name, case_i, sharp, SCM *copy)
647 SCM *tok_buf;
648 SCM port;
649 char *name;
650 int case_i;
651 SCM sharp;
652 #endif
653 {
654 SCM tmp;
655 SCM tl;
656 SCM ans;
657 int c;
658
659 c = scm_flush_ws (port, name);
660 if (')' == c)
661 return SCM_EOL;
662 scm_gen_ungetc (c, port);
663 if (scm_i_dot == (tmp = scm_lreadr (tok_buf, port, case_i, sharp, copy)))
664 {
665 ans = scm_lreadr (tok_buf, port, case_i, sharp, copy);
666 closeit:
667 if (')' != (c = scm_flush_ws (port, name)))
668 scm_wta (SCM_UNDEFINED, "missing close paren", "");
669 return ans;
670 }
671 ans = tl = scm_cons (tmp, SCM_EOL);
672 while (')' != (c = scm_flush_ws (port, name)))
673 {
674 scm_gen_ungetc (c, port);
675 if (scm_i_dot == (tmp = scm_lreadr (tok_buf, port, case_i, sharp, copy)))
676 {
677 SCM_CDR (tl) = scm_lreadr (tok_buf, port, case_i, sharp, copy);
678 goto closeit;
679 }
680 tl = (SCM_CDR (tl) = scm_cons (tmp, SCM_EOL));
681 }
682 return ans;
683 }
684
685 #ifdef __STDC__
686 SCM
687 scm_lreadrecparen (SCM * tok_buf, SCM port, char *name, int case_i, SCM sharp, SCM *copy)
688 #else
689 SCM
690 scm_lreadrecparen (tok_buf, port, name, case_i, sharp, copy)
691 SCM *tok_buf;
692 SCM port;
693 char *name;
694 int case_i;
695 SCM sharp;
696 SCM *copy;
697 #endif
698 {
699 register int c;
700 register SCM tmp;
701 register SCM tl, tl2;
702 SCM ans, ans2;
703 /* Need to capture line and column numbers here. */
704 int line = SCM_LINUM (port);
705 int column = SCM_COL (port) - 1;
706
707 c = scm_flush_ws (port, name);
708 if (')' == c)
709 return SCM_EOL;
710 scm_gen_ungetc (c, port);
711 if (scm_i_dot == (tmp = scm_lreadr (tok_buf, port, case_i, sharp, copy)))
712 {
713 ans = scm_lreadr (tok_buf, port, case_i, sharp, copy);
714 if (')' != (c = scm_flush_ws (port, name)))
715 scm_wta (SCM_UNDEFINED, "missing close paren", "");
716 return ans;
717 }
718 /* Build the head of the list structure. */
719 ans = tl = scm_cons (tmp, SCM_EOL);
720 if (SCM_COPY_SOURCE_P)
721 ans2 = tl2 = scm_cons (SCM_NIMP (tmp) && SCM_CONSP (tmp)
722 ? *copy
723 : tmp,
724 SCM_EOL);
725 while (')' != (c = scm_flush_ws (port, name)))
726 {
727 scm_gen_ungetc (c, port);
728 if (scm_i_dot == (tmp = scm_lreadr (tok_buf, port, case_i, sharp, copy)))
729 {
730 SCM_SETCDR (tl, tmp = scm_lreadr (tok_buf, port, case_i, sharp, copy));
731 if (SCM_COPY_SOURCE_P)
732 SCM_SETCDR (tl2, scm_cons (SCM_NIMP (tmp) && SCM_CONSP (tmp)
733 ? *copy
734 : tmp,
735 SCM_EOL));
736 if (')' != (c = scm_flush_ws (port, name)))
737 scm_wta (SCM_UNDEFINED, "missing close paren", "");
738 goto exit;
739 }
740 tl = SCM_SETCDR (tl, scm_cons (tmp, SCM_EOL));
741 if (SCM_COPY_SOURCE_P)
742 tl2 = SCM_SETCDR (tl2, scm_cons (SCM_NIMP (tmp) && SCM_CONSP (tmp)
743 ? *copy
744 : tmp,
745 SCM_EOL));
746 }
747 exit:
748 scm_whash_insert (scm_source_whash,
749 ans,
750 scm_make_srcprops (line,
751 column,
752 SCM_FILENAME (port),
753 SCM_COPY_SOURCE_P
754 ? *copy = ans2
755 : SCM_UNDEFINED,
756 SCM_EOL));
757 return ans;
758 }
759
760
761 \f
762
763
764 #ifdef __STDC__
765 void
766 scm_init_read (void)
767 #else
768 void
769 scm_init_read ()
770 #endif
771 {
772 #ifdef READER_EXTENSIONS
773 scm_init_opts (scm_read_options, scm_read_opts, SCM_N_READ_OPTIONS);
774 #endif
775 #include "read.x"
776 }