C files should #include only the header files they need, not
[bpt/guile.git] / libguile / read.c
1 /* Copyright (C) 1995,1996 Free Software Foundation, Inc.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
6 * any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
16 *
17 * As a special exception, the Free Software Foundation gives permission
18 * for additional uses of the text contained in its release of GUILE.
19 *
20 * The exception is that, if you link the GUILE library with other files
21 * to produce an executable, this does not by itself cause the
22 * resulting executable to be covered by the GNU General Public License.
23 * Your use of that executable is in no way restricted on account of
24 * linking the GUILE library code into it.
25 *
26 * This exception does not however invalidate any other reasons why
27 * the executable file might be covered by the GNU General Public License.
28 *
29 * This exception applies only to the code released by the
30 * Free Software Foundation under the name GUILE. If you copy
31 * code from other Free Software Foundation releases into a copy of
32 * GUILE, as the General Public License permits, the exception does
33 * not apply to the code that you add in this way. To avoid misleading
34 * anyone as to the status of such modified files, you must delete
35 * this exception notice from them.
36 *
37 * If you write modifications of your own for GUILE, it is your choice
38 * whether to permit this exception to apply to your modifications.
39 * If you do not wish that, delete this exception notice.
40 */
41 \f
42
43 #include "extchrs.h"
44 #include <stdio.h>
45 #include "_scm.h"
46 #include "chars.h"
47 #include "genio.h"
48 #include "eval.h"
49 #include "unif.h"
50 #include "mbstrings.h"
51 #include "kw.h"
52 #include "alist.h"
53
54 #include "read.h"
55
56 \f
57
58 #define default_case_i 0
59
60 \f
61
62 #ifdef READER_EXTENSIONS
63 scm_option scm_read_opts[] = {
64 { SCM_OPTION_BOOLEAN, "copy", 0,
65 "Copy source code expressions." },
66 { SCM_OPTION_BOOLEAN, "positions", 0,
67 "Record positions of source code expressions." }
68 };
69
70 SCM_PROC (s_read_options, "read-options-interface", 0, 1, 0, scm_read_options);
71 #ifdef __STDC__
72 SCM
73 scm_read_options (SCM setting)
74 #else
75 SCM
76 scm_read_options (setting)
77 SCM setting;
78 #endif
79 {
80 SCM ans = scm_options (setting,
81 scm_read_opts,
82 SCM_N_READ_OPTIONS,
83 s_read_options);
84 if (SCM_COPY_SOURCE_P)
85 SCM_RECORD_POSITIONS_P = 1;
86 return ans;
87 }
88 #endif
89
90 SCM_PROC (s_read, "read", 0, 3, 0, scm_read);
91 #ifdef __STDC__
92 SCM
93 scm_read (SCM port, SCM case_insensitive_p, SCM sharp)
94 #else
95 SCM
96 scm_read (port, case_insensitive_p, sharp)
97 SCM port;
98 SCM case_insensitive_p;
99 SCM sharp;
100 #endif
101 {
102 int c;
103 SCM tok_buf;
104 int case_i;
105
106 if (SCM_UNBNDP (port))
107 port = scm_cur_inp;
108 else
109 SCM_ASSERT (SCM_NIMP (port) && SCM_OPINPORTP (port), port, SCM_ARG1, s_read);
110
111 case_i = (SCM_UNBNDP (case_insensitive_p)
112 ? default_case_i
113 : (case_insensitive_p == SCM_BOOL_F));
114
115 if (SCM_UNBNDP (sharp))
116 sharp = SCM_BOOL_F;
117
118 c = scm_flush_ws (port, (char *) NULL);
119 if (EOF == c)
120 return SCM_EOF_VAL;
121 scm_gen_ungetc (c, port);
122
123 tok_buf = scm_makstr (30L, 0);
124 return scm_lreadr (&tok_buf, port, case_i, sharp);
125 }
126
127
128 #ifdef __STDC__
129 char *
130 scm_grow_tok_buf (SCM * tok_buf)
131 #else
132 char *
133 scm_grow_tok_buf (tok_buf)
134 SCM * tok_buf;
135 #endif
136 {
137 SCM t2;
138 scm_sizet len;
139
140 len = SCM_LENGTH (*tok_buf);
141 len += (len / 2 ? len / 2 : 1);
142 t2 = scm_makstr (len, 0);
143 {
144 char * a;
145 char * b;
146 int l;
147 for (a = SCM_CHARS (*tok_buf), b = SCM_CHARS (t2), l = SCM_LENGTH (*tok_buf);
148 l;
149 --l, ++a, ++b)
150 *b = *a;
151 }
152 *tok_buf = t2;
153 return SCM_CHARS (*tok_buf);
154 }
155
156
157 #ifdef __STDC__
158 int
159 scm_flush_ws (SCM port, char *eoferr)
160 #else
161 int
162 scm_flush_ws (port, eoferr)
163 SCM port;
164 char *eoferr;
165 #endif
166 {
167 register int c;
168 while (1)
169 switch (c = scm_gen_getc (port))
170 {
171 case EOF:
172 goteof:
173 if (eoferr)
174 scm_wta (SCM_UNDEFINED, "end of file in ", eoferr);
175 return c;
176 case ';':
177 lp:
178 switch (c = scm_gen_getc (port))
179 {
180 case EOF:
181 goto goteof;
182 default:
183 goto lp;
184 case SCM_LINE_INCREMENTORS:
185 break;
186 }
187 break;
188 case SCM_LINE_INCREMENTORS:
189 break;
190 case SCM_SINGLE_SPACES:
191 SCM_INCCOL (port);
192 break;
193 case '\t':
194 SCM_TABCOL (port);
195 break;
196 default:
197 return c;
198 }
199 }
200
201
202 #ifdef __STDC__
203 int
204 scm_casei_streq (char * s1, char * s2)
205 #else
206 int
207 scm_casei_streq (s1, s2)
208 char * s1;
209 char * s2;
210 #endif
211 {
212 while (*s1 && *s2)
213 if (scm_downcase((int)*s1) != scm_downcase((int)*s2))
214 return 0;
215 else
216 {
217 ++s1;
218 ++s2;
219 }
220 return !(*s1 || *s2);
221 }
222
223
224 #ifdef __STDC__
225 SCM
226 scm_lreadr (SCM * tok_buf, SCM port, int case_i, SCM sharp)
227 #else
228 SCM
229 scm_lreadr (tok_buf, port, case_i, sharp)
230 SCM * tok_buf;
231 SCM port;
232 int case_i;
233 SCM sharp;
234 #endif
235 {
236 int c;
237 scm_sizet j;
238 SCM p;
239
240 tryagain:
241 c = scm_flush_ws (port, s_read);
242 switch (c)
243 {
244 case EOF:
245 return SCM_EOF_VAL;
246
247 case '(':
248 return scm_lreadparen (tok_buf, port, "list", case_i, sharp);
249
250 case ')':
251 scm_wta (SCM_UNDEFINED, "unexpected \")\"", "read");
252 goto tryagain;
253
254 case '\'':
255 return scm_cons2 (scm_i_quote, scm_lreadr (tok_buf, port, case_i, sharp), SCM_EOL);
256
257 case '`':
258 return scm_cons2 (scm_i_quasiquote, scm_lreadr (tok_buf, port, case_i, sharp), SCM_EOL);
259
260 case ',':
261 c = scm_gen_getc (port);
262 if ('@' == c)
263 p = scm_i_uq_splicing;
264 else
265 {
266 scm_gen_ungetc (c, port);
267 p = scm_i_unquote;
268 }
269 return scm_cons2 (p, scm_lreadr (tok_buf, port, case_i, sharp), SCM_EOL);
270
271 case '#':
272 c = scm_gen_getc (port);
273 switch (c)
274 {
275 case '(':
276 p = scm_lreadparen (tok_buf, port, "vector", case_i, sharp);
277 return SCM_NULLP (p) ? scm_nullvect : scm_vector (p);
278
279 case 't':
280 case 'T':
281 return SCM_BOOL_T;
282 case 'f':
283 case 'F':
284 return SCM_BOOL_F;
285
286 case 'b':
287 case 'B':
288 case 'o':
289 case 'O':
290 case 'd':
291 case 'D':
292 case 'x':
293 case 'X':
294 case 'i':
295 case 'I':
296 case 'e':
297 case 'E':
298 scm_gen_ungetc (c, port);
299 c = '#';
300 goto num;
301
302 case '*':
303 j = scm_read_token (c, tok_buf, port, case_i, 0);
304 p = scm_istr2bve (SCM_CHARS (*tok_buf) + 1, (long) (j - 1));
305 if (SCM_NFALSEP (p))
306 return p;
307 else
308 goto unkshrp;
309
310 case '{':
311 j = scm_read_token (c, tok_buf, port, case_i, 1);
312 p = scm_intern (SCM_CHARS (*tok_buf), j);
313 if (SCM_PORT_REPRESENTATION (port) != scm_regular_port)
314 scm_set_symbol_multi_byte_x (SCM_CAR (p), SCM_BOOL_T);
315 return SCM_CAR (p);
316
317 case '\\':
318 c = scm_gen_getc (port);
319 j = scm_read_token (c, tok_buf, port, case_i, 0);
320 if (j == 1)
321 return SCM_MAKICHR (c);
322 if (c >= '0' && c < '8')
323 {
324 p = scm_istr2int (SCM_CHARS (*tok_buf), (long) j, 8);
325 if (SCM_NFALSEP (p))
326 return SCM_MAKICHR (SCM_INUM (p));
327 }
328 for (c = 0; c < scm_n_charnames; c++)
329 if (scm_charnames[c]
330 && (scm_casei_streq (scm_charnames[c], SCM_CHARS (*tok_buf))))
331 return SCM_MAKICHR (scm_charnums[c]);
332 scm_wta (SCM_UNDEFINED, "unknown # object: #\\", SCM_CHARS (*tok_buf));
333
334
335 default:
336 callshrp:
337 if (SCM_NIMP (sharp))
338 {
339 SCM got;
340 got = scm_apply (sharp, SCM_MAKICHR (c), scm_acons (port, SCM_EOL, SCM_EOL));
341 if (SCM_UNSPECIFIED == got)
342 goto unkshrp;
343 return got;
344 }
345 unkshrp:scm_wta ((SCM) SCM_MAKICHR (c), "unknown # object", "");
346 }
347
348 case '"':
349 j = 0;
350 while ('"' != (c = scm_gen_getc (port)))
351 {
352 SCM_ASSERT (EOF != c, SCM_UNDEFINED, "end of file in ", "string");
353
354 while (j + sizeof(xwchar_t) + XMB_CUR_MAX >= SCM_LENGTH (*tok_buf))
355 scm_grow_tok_buf (tok_buf);
356
357 if (c == '\\')
358 switch (c = scm_gen_getc (port))
359 {
360 case '\n':
361 continue;
362 case '0':
363 c = '\0';
364 break;
365 case 'f':
366 c = '\f';
367 break;
368 case 'n':
369 c = '\n';
370 break;
371 case 'r':
372 c = '\r';
373 break;
374 case 't':
375 c = '\t';
376 break;
377 case 'a':
378 c = '\007';
379 break;
380 case 'v':
381 c = '\v';
382 break;
383 }
384 if (SCM_PORT_REPRESENTATION(port) == scm_regular_port)
385 {
386 SCM_CHARS (*tok_buf)[j] = c;
387 ++j;
388 }
389 else
390 {
391 int len;
392 len = xwctomb (SCM_CHARS (*tok_buf) + j, c);
393 if (len == 0)
394 len = 1;
395 SCM_ASSERT (len > 0, SCM_MAKINUM (c), "bogus char", "read");
396 j += len;
397 }
398 }
399 if (j == 0)
400 return scm_nullstr;
401 SCM_CHARS (*tok_buf)[j] = 0;
402 {
403 SCM str;
404 str = scm_makfromstr (SCM_CHARS (*tok_buf), j, 0);
405 if (SCM_PORT_REPRESENTATION(port) != scm_regular_port)
406 {
407 SCM_SETLENGTH (str, SCM_LENGTH (str), scm_tc7_mb_string);
408 }
409 return str;
410 }
411
412 case'0':case '1':case '2':case '3':case '4':
413 case '5':case '6':case '7':case '8':case '9':
414 case '.':
415 case '-':
416 case '+':
417 num:
418 j = scm_read_token (c, tok_buf, port, case_i, 0);
419 p = scm_istring2number (SCM_CHARS (*tok_buf), (long) j, 10L);
420 if (SCM_NFALSEP (p))
421 return p;
422 if (c == '#')
423 {
424 if ((j == 2) && (scm_gen_getc (port) == '('))
425 {
426 scm_gen_ungetc ('(', port);
427 c = SCM_CHARS (*tok_buf)[1];
428 goto callshrp;
429 }
430 scm_wta (SCM_UNDEFINED, "unknown # object", SCM_CHARS (*tok_buf));
431 }
432 goto tok;
433
434 case ':':
435 j = scm_read_token ('-', tok_buf, port, case_i, 0);
436 p = scm_intern (SCM_CHARS (*tok_buf), j);
437 if (SCM_PORT_REPRESENTATION (port) != scm_regular_port)
438 scm_set_symbol_multi_byte_x (SCM_CAR (p), SCM_BOOL_T);
439 return scm_make_keyword_from_dash_symbol (SCM_CAR (p));
440
441 default:
442 j = scm_read_token (c, tok_buf, port, case_i, 0);
443 /* fallthrough */
444
445 tok:
446 p = scm_intern (SCM_CHARS (*tok_buf), j);
447 if (SCM_PORT_REPRESENTATION (port) != scm_regular_port)
448 scm_set_symbol_multi_byte_x (SCM_CAR (p), SCM_BOOL_T);
449 return SCM_CAR (p);
450 }
451 }
452
453 #ifdef _UNICOS
454 _Pragma ("noopt"); /* # pragma _CRI noopt */
455 #endif
456 #ifdef __STDC__
457 scm_sizet
458 scm_read_token (int ic, SCM * tok_buf, SCM port, int case_i, int weird)
459 #else
460 scm_sizet
461 scm_read_token (ic, * tok_buf, port, case_i, weird)
462 int ic;
463 SCM *tok_buf;
464 SCM port;
465 int case_i;
466 int weird;
467 #endif
468 {
469 register scm_sizet j;
470 register int c;
471 register char *p;
472
473 c = ic;
474 p = SCM_CHARS (*tok_buf);
475
476 if (weird)
477 j = 0;
478 else
479 {
480 j = 0;
481 while (j + sizeof(xwchar_t) + XMB_CUR_MAX >= SCM_LENGTH (*tok_buf))
482 p = scm_grow_tok_buf (tok_buf);
483 if (SCM_PORT_REPRESENTATION(port) == scm_regular_port)
484 {
485 p[j] = c;
486 ++j;
487 }
488 else
489 {
490 int len;
491 len = xwctomb (p + j, c);
492 if (len == 0)
493 len = 1;
494 SCM_ASSERT (len > 0, SCM_MAKINUM (c), "bogus char", "read");
495 j += len;
496 }
497 }
498
499 while (1)
500 {
501 while (j + sizeof(xwchar_t) + XMB_CUR_MAX >= SCM_LENGTH (*tok_buf))
502 p = scm_grow_tok_buf (tok_buf);
503 c = scm_gen_getc (port);
504 switch (c)
505 {
506 case '(':
507 case ')':
508 case '"':
509 case ';':
510 case SCM_WHITE_SPACES:
511 case SCM_LINE_INCREMENTORS:
512 if (weird)
513 goto default_case;
514
515 scm_gen_ungetc (c, port);
516 case EOF:
517 eof_case:
518 p[j] = 0;
519 return j;
520 case '\\':
521 if (!weird)
522 goto default_case;
523 else
524 {
525 c = scm_gen_getc (port);
526 if (c == EOF)
527 goto eof_case;
528 else
529 goto default_case;
530 }
531 case '}':
532 if (!weird)
533 goto default_case;
534
535 c = scm_gen_getc (port);
536 if (c == '#')
537 {
538 p[j] = 0;
539 return j;
540 }
541 else
542 {
543 scm_gen_ungetc (c, port);
544 c = '}';
545 goto default_case;
546 }
547
548 default:
549 default_case:
550 {
551 c = (case_i ? scm_downcase(c) : c);
552 if (SCM_PORT_REPRESENTATION(port) == scm_regular_port)
553 {
554 p[j] = c;
555 ++j;
556 }
557 else
558 {
559 int len;
560 len = xwctomb (p + j, c);
561 if (len == 0)
562 len = 1;
563 SCM_ASSERT (len > 0, SCM_MAKINUM (c), "bogus char", "read");
564 j += len;
565 }
566 }
567
568 }
569 }
570 }
571 #ifdef _UNICOS
572 _Pragma ("opt"); /* # pragma _CRI opt */
573 #endif
574
575 #ifdef __STDC__
576 SCM
577 scm_lreadparen (SCM * tok_buf, SCM port, char *name, int case_i, SCM sharp)
578 #else
579 SCM
580 scm_lreadparen (tok_buf, port, name, case_i, sharp)
581 SCM *tok_buf;
582 SCM port;
583 char *name;
584 int case_i;
585 SCM sharp;
586 #endif
587 {
588 SCM tmp;
589 SCM tl;
590 SCM ans;
591 int c;
592
593 c = scm_flush_ws (port, name);
594 if (')' == c)
595 return SCM_EOL;
596 scm_gen_ungetc (c, port);
597 if (scm_i_dot == (tmp = scm_lreadr (tok_buf, port, case_i, sharp)))
598 {
599 ans = scm_lreadr (tok_buf, port, case_i, sharp);
600 closeit:
601 if (')' != (c = scm_flush_ws (port, name)))
602 scm_wta (SCM_UNDEFINED, "missing close paren", "");
603 return ans;
604 }
605 ans = tl = scm_cons (tmp, SCM_EOL);
606 while (')' != (c = scm_flush_ws (port, name)))
607 {
608 scm_gen_ungetc (c, port);
609 if (scm_i_dot == (tmp = scm_lreadr (tok_buf, port, case_i, sharp)))
610 {
611 SCM_CDR (tl) = scm_lreadr (tok_buf, port, case_i, sharp);
612 goto closeit;
613 }
614 tl = (SCM_CDR (tl) = scm_cons (tmp, SCM_EOL));
615 }
616 return ans;
617 }
618
619
620 \f
621
622
623 #ifdef __STDC__
624 void
625 scm_init_read (void)
626 #else
627 void
628 scm_init_read ()
629 #endif
630 {
631 #ifdef READER_EXTENSIONS
632 scm_init_opts (scm_read_options, scm_read_opts, SCM_N_READ_OPTIONS);
633 #endif
634 #include "read.x"
635 }
636