maintainer changed: was lord, now jimb; first import
[bpt/guile.git] / libguile / read.c
1 /* Copyright (C) 1995,1996 Free Software Foundation, Inc.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
6 * any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
16 *
17 * As a special exception, the Free Software Foundation gives permission
18 * for additional uses of the text contained in its release of GUILE.
19 *
20 * The exception is that, if you link the GUILE library with other files
21 * to produce an executable, this does not by itself cause the
22 * resulting executable to be covered by the GNU General Public License.
23 * Your use of that executable is in no way restricted on account of
24 * linking the GUILE library code into it.
25 *
26 * This exception does not however invalidate any other reasons why
27 * the executable file might be covered by the GNU General Public License.
28 *
29 * This exception applies only to the code released by the
30 * Free Software Foundation under the name GUILE. If you copy
31 * code from other Free Software Foundation releases into a copy of
32 * GUILE, as the General Public License permits, the exception does
33 * not apply to the code that you add in this way. To avoid misleading
34 * anyone as to the status of such modified files, you must delete
35 * this exception notice from them.
36 *
37 * If you write modifications of your own for GUILE, it is your choice
38 * whether to permit this exception to apply to your modifications.
39 * If you do not wish that, delete this exception notice.
40 */
41 \f
42
43 #include "extchrs.h"
44 #include <stdio.h>
45 #include "_scm.h"
46 #include "read.h"
47
48 \f
49
50 #define default_case_i 0
51
52 \f
53
54 SCM_PROC (s_read, "read", 0, 3, 0, scm_read);
55 #ifdef __STDC__
56 SCM
57 scm_read (SCM port, SCM case_insensative_p, SCM sharp)
58 #else
59 SCM
60 scm_read (port, case_insensative_p, sharp)
61 SCM port;
62 SCM case_insensative_p;
63 SCM sharp;
64 #endif
65 {
66 int c;
67 SCM tok_buf;
68 int case_i;
69
70 if (SCM_UNBNDP (port))
71 port = scm_cur_inp;
72 else
73 SCM_ASSERT (SCM_NIMP (port) && SCM_OPINPORTP (port), port, SCM_ARG1, s_read);
74
75 case_i = (SCM_UNBNDP (case_insensative_p)
76 ? default_case_i
77 : (case_insensative_p == SCM_BOOL_F));
78
79 if (SCM_UNBNDP (sharp))
80 sharp = SCM_BOOL_F;
81
82 c = scm_flush_ws (port, (char *) NULL);
83 if (EOF == c)
84 return SCM_EOF_VAL;
85 scm_gen_ungetc (c, port);
86
87 tok_buf = scm_makstr (30L, 0);
88 return scm_lreadr (&tok_buf, port, case_i, sharp);
89 }
90
91
92 #ifdef __STDC__
93 char *
94 scm_grow_tok_buf (SCM * tok_buf)
95 #else
96 char *
97 scm_grow_tok_buf (tok_buf)
98 SCM * tok_buf;
99 #endif
100 {
101 SCM t2;
102 scm_sizet len;
103
104 len = SCM_LENGTH (*tok_buf);
105 len += (len / 2 ? len / 2 : 1);
106 t2 = scm_makstr (len, 0);
107 {
108 char * a;
109 char * b;
110 int l;
111 for (a = SCM_CHARS (*tok_buf), b = SCM_CHARS (t2), l = SCM_LENGTH (*tok_buf);
112 l;
113 --l, ++a, ++b)
114 *b = *a;
115 }
116 *tok_buf = t2;
117 return SCM_CHARS (*tok_buf);
118 }
119
120
121 #ifdef __STDC__
122 int
123 scm_flush_ws (SCM port, char *eoferr)
124 #else
125 int
126 scm_flush_ws (port, eoferr)
127 SCM port;
128 char *eoferr;
129 #endif
130 {
131 register int c;
132 while (1)
133 switch (c = scm_gen_getc (port))
134 {
135 case EOF:
136 goteof:
137 if (eoferr)
138 scm_wta (SCM_UNDEFINED, "end of file in ", eoferr);
139 return c;
140 case ';':
141 lp:
142 switch (c = scm_gen_getc (port))
143 {
144 case EOF:
145 goto goteof;
146 default:
147 goto lp;
148 case SCM_LINE_INCREMENTORS:
149 break;
150 }
151 break;
152 case SCM_LINE_INCREMENTORS:
153 break;
154 case SCM_SINGLE_SPACES:
155 SCM_INCCOL (port);
156 break;
157 case '\t':
158 SCM_TABCOL (port);
159 break;
160 default:
161 return c;
162 }
163 }
164
165
166 #ifdef __STDC__
167 int
168 scm_casei_streq (char * s1, char * s2)
169 #else
170 int
171 scm_casei_streq (s1, s2)
172 char * s1;
173 char * s2;
174 #endif
175 {
176 while (*s1 && *s2)
177 if (scm_downcase((int)*s1) != scm_downcase((int)*s2))
178 return 0;
179 else
180 {
181 ++s1;
182 ++s2;
183 }
184 return !(*s1 || *s2);
185 }
186
187
188 #ifdef __STDC__
189 SCM
190 scm_lreadr (SCM * tok_buf, SCM port, int case_i, SCM sharp)
191 #else
192 SCM
193 scm_lreadr (tok_buf, port, case_i, sharp)
194 SCM * tok_buf;
195 SCM port;
196 int case_i;
197 SCM sharp;
198 #endif
199 {
200 int c;
201 scm_sizet j;
202 SCM p;
203
204 tryagain:
205 c = scm_flush_ws (port, s_read);
206 switch (c)
207 {
208 case EOF:
209 return SCM_EOF_VAL;
210
211 case '(':
212 return scm_lreadparen (tok_buf, port, "list", case_i, sharp);
213
214 case ')':
215 scm_wta (SCM_UNDEFINED, "unexpected \")\"", "read");
216 goto tryagain;
217
218 case '\'':
219 return scm_cons2 (scm_i_quote, scm_lreadr (tok_buf, port, case_i, sharp), SCM_EOL);
220
221 case '`':
222 return scm_cons2 (scm_i_quasiquote, scm_lreadr (tok_buf, port, case_i, sharp), SCM_EOL);
223
224 case ',':
225 c = scm_gen_getc (port);
226 if ('@' == c)
227 p = scm_i_uq_splicing;
228 else
229 {
230 scm_gen_ungetc (c, port);
231 p = scm_i_unquote;
232 }
233 return scm_cons2 (p, scm_lreadr (tok_buf, port, case_i, sharp), SCM_EOL);
234
235 case '#':
236 c = scm_gen_getc (port);
237 switch (c)
238 {
239 case '(':
240 p = scm_lreadparen (tok_buf, port, "vector", case_i, sharp);
241 return SCM_NULLP (p) ? scm_nullvect : scm_vector (p);
242
243 case 't':
244 case 'T':
245 return SCM_BOOL_T;
246 case 'f':
247 case 'F':
248 return SCM_BOOL_F;
249
250 case 'b':
251 case 'B':
252 case 'o':
253 case 'O':
254 case 'd':
255 case 'D':
256 case 'x':
257 case 'X':
258 case 'i':
259 case 'I':
260 case 'e':
261 case 'E':
262 scm_gen_ungetc (c, port);
263 c = '#';
264 goto num;
265
266 case '*':
267 j = scm_read_token (c, tok_buf, port, case_i, 0);
268 p = scm_istr2bve (SCM_CHARS (*tok_buf) + 1, (long) (j - 1));
269 if (SCM_NFALSEP (p))
270 return p;
271 else
272 goto unkshrp;
273
274 case '{':
275 j = scm_read_token (c, tok_buf, port, case_i, 1);
276 p = scm_intern (SCM_CHARS (*tok_buf), j);
277 if (SCM_PORT_REPRESENTATION (port) != scm_regular_port)
278 scm_set_symbol_multi_byte_x (SCM_CAR (p), SCM_BOOL_T);
279 return SCM_CAR (p);
280
281 case '\\':
282 c = scm_gen_getc (port);
283 j = scm_read_token (c, tok_buf, port, case_i, 0);
284 if (j == 1)
285 return SCM_MAKICHR (c);
286 if (c >= '0' && c < '8')
287 {
288 p = scm_istr2int (SCM_CHARS (*tok_buf), (long) j, 8);
289 if (SCM_NFALSEP (p))
290 return SCM_MAKICHR (SCM_INUM (p));
291 }
292 for (c = 0; c < scm_n_charnames; c++)
293 if (scm_charnames[c]
294 && (scm_casei_streq (scm_charnames[c], SCM_CHARS (*tok_buf))))
295 return SCM_MAKICHR (scm_charnums[c]);
296 scm_wta (SCM_UNDEFINED, "unknown # object: #\\", SCM_CHARS (*tok_buf));
297
298
299 default:
300 callshrp:
301 if (SCM_NIMP (sharp))
302 {
303 SCM got;
304 got = scm_apply (sharp, SCM_MAKICHR (c), scm_acons (port, SCM_EOL, SCM_EOL));
305 if (SCM_UNSPECIFIED == got)
306 goto unkshrp;
307 return got;
308 }
309 unkshrp:scm_wta ((SCM) SCM_MAKICHR (c), "unknown # object", "");
310 }
311
312 case '"':
313 j = 0;
314 while ('"' != (c = scm_gen_getc (port)))
315 {
316 SCM_ASSERT (EOF != c, SCM_UNDEFINED, "end of file in ", "string");
317
318 while (j + sizeof(xwchar_t) + XMB_CUR_MAX >= SCM_LENGTH (*tok_buf))
319 scm_grow_tok_buf (tok_buf);
320
321 if (c == '\\')
322 switch (c = scm_gen_getc (port))
323 {
324 case '\n':
325 continue;
326 case '0':
327 c = '\0';
328 break;
329 case 'f':
330 c = '\f';
331 break;
332 case 'n':
333 c = '\n';
334 break;
335 case 'r':
336 c = '\r';
337 break;
338 case 't':
339 c = '\t';
340 break;
341 case 'a':
342 c = '\007';
343 break;
344 case 'v':
345 c = '\v';
346 break;
347 }
348 if (SCM_PORT_REPRESENTATION(port) == scm_regular_port)
349 {
350 SCM_CHARS (*tok_buf)[j] = c;
351 ++j;
352 }
353 else
354 {
355 int len;
356 len = xwctomb (SCM_CHARS (*tok_buf) + j, c);
357 if (len == 0)
358 len = 1;
359 SCM_ASSERT (len > 0, SCM_MAKINUM (c), "bogus char", "read");
360 j += len;
361 }
362 }
363 if (j == 0)
364 return scm_nullstr;
365 SCM_CHARS (*tok_buf)[j] = 0;
366 {
367 SCM str;
368 str = scm_makfromstr (SCM_CHARS (*tok_buf), j, 0);
369 if (SCM_PORT_REPRESENTATION(port) != scm_regular_port)
370 {
371 SCM_SETLENGTH (str, SCM_LENGTH (str), scm_tc7_mb_string);
372 }
373 return str;
374 }
375
376 case'0':case '1':case '2':case '3':case '4':
377 case '5':case '6':case '7':case '8':case '9':
378 case '.':
379 case '-':
380 case '+':
381 num:
382 j = scm_read_token (c, tok_buf, port, case_i, 0);
383 p = scm_istring2number (SCM_CHARS (*tok_buf), (long) j, 10L);
384 if (SCM_NFALSEP (p))
385 return p;
386 if (c == '#')
387 {
388 if ((j == 2) && (scm_gen_getc (port) == '('))
389 {
390 scm_gen_ungetc ('(', port);
391 c = SCM_CHARS (*tok_buf)[1];
392 goto callshrp;
393 }
394 scm_wta (SCM_UNDEFINED, "unknown # object", SCM_CHARS (*tok_buf));
395 }
396 goto tok;
397
398 case ':':
399 j = scm_read_token ('-', tok_buf, port, case_i, 0);
400 p = scm_intern (SCM_CHARS (*tok_buf), j);
401 if (SCM_PORT_REPRESENTATION (port) != scm_regular_port)
402 scm_set_symbol_multi_byte_x (SCM_CAR (p), SCM_BOOL_T);
403 return scm_make_keyword_from_dash_symbol (SCM_CAR (p));
404
405 default:
406 j = scm_read_token (c, tok_buf, port, case_i, 0);
407 /* fallthrough */
408
409 tok:
410 p = scm_intern (SCM_CHARS (*tok_buf), j);
411 if (SCM_PORT_REPRESENTATION (port) != scm_regular_port)
412 scm_set_symbol_multi_byte_x (SCM_CAR (p), SCM_BOOL_T);
413 return SCM_CAR (p);
414 }
415 }
416
417 #ifdef _UNICOS
418 _Pragma ("noopt"); /* # pragma _CRI noopt */
419 #endif
420 #ifdef __STDC__
421 scm_sizet
422 scm_read_token (int ic, SCM * tok_buf, SCM port, int case_i, int weird)
423 #else
424 scm_sizet
425 scm_read_token (ic, * tok_buf, port, case_i, weird)
426 int ic;
427 SCM *tok_buf;
428 SCM port;
429 int case_i;
430 int weird;
431 #endif
432 {
433 register scm_sizet j;
434 register int c;
435 register char *p;
436
437 c = ic;
438 p = SCM_CHARS (*tok_buf);
439
440 if (weird)
441 j = 0;
442 else
443 {
444 j = 0;
445 while (j + sizeof(xwchar_t) + XMB_CUR_MAX >= SCM_LENGTH (*tok_buf))
446 p = scm_grow_tok_buf (tok_buf);
447 if (SCM_PORT_REPRESENTATION(port) == scm_regular_port)
448 {
449 p[j] = c;
450 ++j;
451 }
452 else
453 {
454 int len;
455 len = xwctomb (p + j, c);
456 if (len == 0)
457 len = 1;
458 SCM_ASSERT (len > 0, SCM_MAKINUM (c), "bogus char", "read");
459 j += len;
460 }
461 }
462
463 while (1)
464 {
465 while (j + sizeof(xwchar_t) + XMB_CUR_MAX >= SCM_LENGTH (*tok_buf))
466 p = scm_grow_tok_buf (tok_buf);
467 c = scm_gen_getc (port);
468 switch (c)
469 {
470 case '(':
471 case ')':
472 case '"':
473 case ';':
474 case SCM_WHITE_SPACES:
475 case SCM_LINE_INCREMENTORS:
476 if (weird)
477 goto default_case;
478
479 scm_gen_ungetc (c, port);
480 case EOF:
481 eof_case:
482 p[j] = 0;
483 return j;
484 case '\\':
485 if (!weird)
486 goto default_case;
487 else
488 {
489 c = scm_gen_getc (port);
490 if (c == EOF)
491 goto eof_case;
492 else
493 goto default_case;
494 }
495 case '}':
496 if (!weird)
497 goto default_case;
498
499 c = scm_gen_getc (port);
500 if (c == '#')
501 {
502 p[j] = 0;
503 return j;
504 }
505 else
506 {
507 scm_gen_ungetc (c, port);
508 c = '}';
509 goto default_case;
510 }
511
512 default:
513 default_case:
514 {
515 c = (case_i ? scm_downcase(c) : c);
516 if (SCM_PORT_REPRESENTATION(port) == scm_regular_port)
517 {
518 p[j] = c;
519 ++j;
520 }
521 else
522 {
523 int len;
524 len = xwctomb (p + j, c);
525 if (len == 0)
526 len = 1;
527 SCM_ASSERT (len > 0, SCM_MAKINUM (c), "bogus char", "read");
528 j += len;
529 }
530 }
531
532 }
533 }
534 }
535 #ifdef _UNICOS
536 _Pragma ("opt"); /* # pragma _CRI opt */
537 #endif
538
539 #ifdef __STDC__
540 SCM
541 scm_lreadparen (SCM * tok_buf, SCM port, char *name, int case_i, SCM sharp)
542 #else
543 SCM
544 scm_lreadparen (tok_buf, port, name, case_i, sharp)
545 SCM *tok_buf;
546 SCM port;
547 char *name;
548 int case_i;
549 SCM sharp;
550 #endif
551 {
552 SCM tmp;
553 SCM tl;
554 SCM ans;
555 int c;
556
557 c = scm_flush_ws (port, name);
558 if (')' == c)
559 return SCM_EOL;
560 scm_gen_ungetc (c, port);
561 if (scm_i_dot == (tmp = scm_lreadr (tok_buf, port, case_i, sharp)))
562 {
563 ans = scm_lreadr (tok_buf, port, case_i, sharp);
564 closeit:
565 if (')' != (c = scm_flush_ws (port, name)))
566 scm_wta (SCM_UNDEFINED, "missing close paren", "");
567 return ans;
568 }
569 ans = tl = scm_cons (tmp, SCM_EOL);
570 while (')' != (c = scm_flush_ws (port, name)))
571 {
572 scm_gen_ungetc (c, port);
573 if (scm_i_dot == (tmp = scm_lreadr (tok_buf, port, case_i, sharp)))
574 {
575 SCM_CDR (tl) = scm_lreadr (tok_buf, port, case_i, sharp);
576 goto closeit;
577 }
578 tl = (SCM_CDR (tl) = scm_cons (tmp, SCM_EOL));
579 }
580 return ans;
581 }
582
583
584 \f
585
586
587 #ifdef __STDC__
588 void
589 scm_init_read (void)
590 #else
591 void
592 scm_init_read ()
593 #endif
594 {
595 #include "read.x"
596 }
597