Commit | Line | Data |
---|---|---|
0f2d19dd JB |
1 | /* Copyright (C) 1995,1996 Free Software Foundation, Inc. |
2 | * | |
3 | * This program is free software; you can redistribute it and/or modify | |
4 | * it under the terms of the GNU General Public License as published by | |
5 | * the Free Software Foundation; either version 2, or (at your option) | |
6 | * any later version. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, | |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
11 | * GNU General Public License for more details. | |
12 | * | |
13 | * You should have received a copy of the GNU General Public License | |
14 | * along with this software; see the file COPYING. If not, write to | |
15 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | |
16 | * | |
17 | * As a special exception, the Free Software Foundation gives permission | |
18 | * for additional uses of the text contained in its release of GUILE. | |
19 | * | |
20 | * The exception is that, if you link the GUILE library with other files | |
21 | * to produce an executable, this does not by itself cause the | |
22 | * resulting executable to be covered by the GNU General Public License. | |
23 | * Your use of that executable is in no way restricted on account of | |
24 | * linking the GUILE library code into it. | |
25 | * | |
26 | * This exception does not however invalidate any other reasons why | |
27 | * the executable file might be covered by the GNU General Public License. | |
28 | * | |
29 | * This exception applies only to the code released by the | |
30 | * Free Software Foundation under the name GUILE. If you copy | |
31 | * code from other Free Software Foundation releases into a copy of | |
32 | * GUILE, as the General Public License permits, the exception does | |
33 | * not apply to the code that you add in this way. To avoid misleading | |
34 | * anyone as to the status of such modified files, you must delete | |
35 | * this exception notice from them. | |
36 | * | |
37 | * If you write modifications of your own for GUILE, it is your choice | |
38 | * whether to permit this exception to apply to your modifications. | |
39 | * If you do not wish that, delete this exception notice. | |
40 | */ | |
41 | \f | |
42 | ||
43 | #include "extchrs.h" | |
44 | #include <stdio.h> | |
45 | #include "_scm.h" | |
46 | #include "read.h" | |
47 | ||
48 | \f | |
49 | ||
50 | #define default_case_i 0 | |
51 | ||
52 | \f | |
53 | ||
a16f6fe7 MD |
54 | #ifdef READER_EXTENSIONS |
55 | scm_option scm_read_opts[] = { | |
b7ff98dd MD |
56 | { SCM_OPTION_BOOLEAN, "copy", 0, |
57 | "Copy source code expressions." }, | |
58 | { SCM_OPTION_BOOLEAN, "positions", 0, | |
59 | "Record positions of source code expressions." } | |
a16f6fe7 MD |
60 | }; |
61 | ||
b7ff98dd | 62 | SCM_PROC (s_read_options, "read-options-interface", 0, 1, 0, scm_read_options); |
a16f6fe7 MD |
63 | #ifdef __STDC__ |
64 | SCM | |
b7ff98dd | 65 | scm_read_options (SCM setting) |
a16f6fe7 MD |
66 | #else |
67 | SCM | |
b7ff98dd MD |
68 | scm_read_options (setting) |
69 | SCM setting; | |
a16f6fe7 MD |
70 | #endif |
71 | { | |
b7ff98dd MD |
72 | SCM ans = scm_options (setting, |
73 | scm_read_opts, | |
74 | SCM_N_READ_OPTIONS, | |
75 | s_read_options); | |
76 | if (SCM_COPY_SOURCE_P) | |
77 | SCM_RECORD_POSITIONS_P = 1; | |
a16f6fe7 MD |
78 | return ans; |
79 | } | |
80 | #endif | |
81 | ||
0f2d19dd JB |
82 | SCM_PROC (s_read, "read", 0, 3, 0, scm_read); |
83 | #ifdef __STDC__ | |
84 | SCM | |
95b88819 | 85 | scm_read (SCM port, SCM case_insensitive_p, SCM sharp) |
0f2d19dd JB |
86 | #else |
87 | SCM | |
95b88819 | 88 | scm_read (port, case_insensitive_p, sharp) |
0f2d19dd | 89 | SCM port; |
95b88819 | 90 | SCM case_insensitive_p; |
0f2d19dd JB |
91 | SCM sharp; |
92 | #endif | |
93 | { | |
94 | int c; | |
95 | SCM tok_buf; | |
96 | int case_i; | |
97 | ||
98 | if (SCM_UNBNDP (port)) | |
99 | port = scm_cur_inp; | |
100 | else | |
101 | SCM_ASSERT (SCM_NIMP (port) && SCM_OPINPORTP (port), port, SCM_ARG1, s_read); | |
102 | ||
95b88819 | 103 | case_i = (SCM_UNBNDP (case_insensitive_p) |
0f2d19dd | 104 | ? default_case_i |
95b88819 | 105 | : (case_insensitive_p == SCM_BOOL_F)); |
0f2d19dd JB |
106 | |
107 | if (SCM_UNBNDP (sharp)) | |
108 | sharp = SCM_BOOL_F; | |
109 | ||
110 | c = scm_flush_ws (port, (char *) NULL); | |
111 | if (EOF == c) | |
112 | return SCM_EOF_VAL; | |
113 | scm_gen_ungetc (c, port); | |
114 | ||
115 | tok_buf = scm_makstr (30L, 0); | |
116 | return scm_lreadr (&tok_buf, port, case_i, sharp); | |
117 | } | |
118 | ||
119 | ||
120 | #ifdef __STDC__ | |
121 | char * | |
122 | scm_grow_tok_buf (SCM * tok_buf) | |
123 | #else | |
124 | char * | |
125 | scm_grow_tok_buf (tok_buf) | |
126 | SCM * tok_buf; | |
127 | #endif | |
128 | { | |
129 | SCM t2; | |
130 | scm_sizet len; | |
131 | ||
132 | len = SCM_LENGTH (*tok_buf); | |
133 | len += (len / 2 ? len / 2 : 1); | |
134 | t2 = scm_makstr (len, 0); | |
135 | { | |
136 | char * a; | |
137 | char * b; | |
138 | int l; | |
139 | for (a = SCM_CHARS (*tok_buf), b = SCM_CHARS (t2), l = SCM_LENGTH (*tok_buf); | |
140 | l; | |
141 | --l, ++a, ++b) | |
142 | *b = *a; | |
143 | } | |
144 | *tok_buf = t2; | |
145 | return SCM_CHARS (*tok_buf); | |
146 | } | |
147 | ||
148 | ||
149 | #ifdef __STDC__ | |
150 | int | |
151 | scm_flush_ws (SCM port, char *eoferr) | |
152 | #else | |
153 | int | |
154 | scm_flush_ws (port, eoferr) | |
155 | SCM port; | |
156 | char *eoferr; | |
157 | #endif | |
158 | { | |
159 | register int c; | |
160 | while (1) | |
161 | switch (c = scm_gen_getc (port)) | |
162 | { | |
163 | case EOF: | |
164 | goteof: | |
165 | if (eoferr) | |
166 | scm_wta (SCM_UNDEFINED, "end of file in ", eoferr); | |
167 | return c; | |
168 | case ';': | |
169 | lp: | |
170 | switch (c = scm_gen_getc (port)) | |
171 | { | |
172 | case EOF: | |
173 | goto goteof; | |
174 | default: | |
175 | goto lp; | |
176 | case SCM_LINE_INCREMENTORS: | |
177 | break; | |
178 | } | |
179 | break; | |
180 | case SCM_LINE_INCREMENTORS: | |
181 | break; | |
182 | case SCM_SINGLE_SPACES: | |
183 | SCM_INCCOL (port); | |
184 | break; | |
185 | case '\t': | |
186 | SCM_TABCOL (port); | |
187 | break; | |
188 | default: | |
189 | return c; | |
190 | } | |
191 | } | |
192 | ||
193 | ||
194 | #ifdef __STDC__ | |
195 | int | |
196 | scm_casei_streq (char * s1, char * s2) | |
197 | #else | |
198 | int | |
199 | scm_casei_streq (s1, s2) | |
200 | char * s1; | |
201 | char * s2; | |
202 | #endif | |
203 | { | |
204 | while (*s1 && *s2) | |
205 | if (scm_downcase((int)*s1) != scm_downcase((int)*s2)) | |
206 | return 0; | |
207 | else | |
208 | { | |
209 | ++s1; | |
210 | ++s2; | |
211 | } | |
212 | return !(*s1 || *s2); | |
213 | } | |
214 | ||
215 | ||
216 | #ifdef __STDC__ | |
217 | SCM | |
218 | scm_lreadr (SCM * tok_buf, SCM port, int case_i, SCM sharp) | |
219 | #else | |
220 | SCM | |
221 | scm_lreadr (tok_buf, port, case_i, sharp) | |
222 | SCM * tok_buf; | |
223 | SCM port; | |
224 | int case_i; | |
225 | SCM sharp; | |
226 | #endif | |
227 | { | |
228 | int c; | |
229 | scm_sizet j; | |
230 | SCM p; | |
231 | ||
232 | tryagain: | |
233 | c = scm_flush_ws (port, s_read); | |
234 | switch (c) | |
235 | { | |
236 | case EOF: | |
237 | return SCM_EOF_VAL; | |
238 | ||
239 | case '(': | |
240 | return scm_lreadparen (tok_buf, port, "list", case_i, sharp); | |
241 | ||
242 | case ')': | |
243 | scm_wta (SCM_UNDEFINED, "unexpected \")\"", "read"); | |
244 | goto tryagain; | |
245 | ||
246 | case '\'': | |
247 | return scm_cons2 (scm_i_quote, scm_lreadr (tok_buf, port, case_i, sharp), SCM_EOL); | |
248 | ||
249 | case '`': | |
250 | return scm_cons2 (scm_i_quasiquote, scm_lreadr (tok_buf, port, case_i, sharp), SCM_EOL); | |
251 | ||
252 | case ',': | |
253 | c = scm_gen_getc (port); | |
254 | if ('@' == c) | |
255 | p = scm_i_uq_splicing; | |
256 | else | |
257 | { | |
258 | scm_gen_ungetc (c, port); | |
259 | p = scm_i_unquote; | |
260 | } | |
261 | return scm_cons2 (p, scm_lreadr (tok_buf, port, case_i, sharp), SCM_EOL); | |
262 | ||
263 | case '#': | |
264 | c = scm_gen_getc (port); | |
265 | switch (c) | |
266 | { | |
267 | case '(': | |
268 | p = scm_lreadparen (tok_buf, port, "vector", case_i, sharp); | |
269 | return SCM_NULLP (p) ? scm_nullvect : scm_vector (p); | |
270 | ||
271 | case 't': | |
272 | case 'T': | |
273 | return SCM_BOOL_T; | |
274 | case 'f': | |
275 | case 'F': | |
276 | return SCM_BOOL_F; | |
277 | ||
278 | case 'b': | |
279 | case 'B': | |
280 | case 'o': | |
281 | case 'O': | |
282 | case 'd': | |
283 | case 'D': | |
284 | case 'x': | |
285 | case 'X': | |
286 | case 'i': | |
287 | case 'I': | |
288 | case 'e': | |
289 | case 'E': | |
290 | scm_gen_ungetc (c, port); | |
291 | c = '#'; | |
292 | goto num; | |
293 | ||
294 | case '*': | |
295 | j = scm_read_token (c, tok_buf, port, case_i, 0); | |
296 | p = scm_istr2bve (SCM_CHARS (*tok_buf) + 1, (long) (j - 1)); | |
297 | if (SCM_NFALSEP (p)) | |
298 | return p; | |
299 | else | |
300 | goto unkshrp; | |
301 | ||
302 | case '{': | |
303 | j = scm_read_token (c, tok_buf, port, case_i, 1); | |
304 | p = scm_intern (SCM_CHARS (*tok_buf), j); | |
305 | if (SCM_PORT_REPRESENTATION (port) != scm_regular_port) | |
306 | scm_set_symbol_multi_byte_x (SCM_CAR (p), SCM_BOOL_T); | |
307 | return SCM_CAR (p); | |
308 | ||
309 | case '\\': | |
310 | c = scm_gen_getc (port); | |
311 | j = scm_read_token (c, tok_buf, port, case_i, 0); | |
312 | if (j == 1) | |
313 | return SCM_MAKICHR (c); | |
314 | if (c >= '0' && c < '8') | |
315 | { | |
316 | p = scm_istr2int (SCM_CHARS (*tok_buf), (long) j, 8); | |
317 | if (SCM_NFALSEP (p)) | |
318 | return SCM_MAKICHR (SCM_INUM (p)); | |
319 | } | |
320 | for (c = 0; c < scm_n_charnames; c++) | |
321 | if (scm_charnames[c] | |
322 | && (scm_casei_streq (scm_charnames[c], SCM_CHARS (*tok_buf)))) | |
323 | return SCM_MAKICHR (scm_charnums[c]); | |
324 | scm_wta (SCM_UNDEFINED, "unknown # object: #\\", SCM_CHARS (*tok_buf)); | |
325 | ||
326 | ||
327 | default: | |
328 | callshrp: | |
329 | if (SCM_NIMP (sharp)) | |
330 | { | |
331 | SCM got; | |
332 | got = scm_apply (sharp, SCM_MAKICHR (c), scm_acons (port, SCM_EOL, SCM_EOL)); | |
333 | if (SCM_UNSPECIFIED == got) | |
334 | goto unkshrp; | |
335 | return got; | |
336 | } | |
337 | unkshrp:scm_wta ((SCM) SCM_MAKICHR (c), "unknown # object", ""); | |
338 | } | |
339 | ||
340 | case '"': | |
341 | j = 0; | |
342 | while ('"' != (c = scm_gen_getc (port))) | |
343 | { | |
344 | SCM_ASSERT (EOF != c, SCM_UNDEFINED, "end of file in ", "string"); | |
345 | ||
346 | while (j + sizeof(xwchar_t) + XMB_CUR_MAX >= SCM_LENGTH (*tok_buf)) | |
347 | scm_grow_tok_buf (tok_buf); | |
348 | ||
349 | if (c == '\\') | |
350 | switch (c = scm_gen_getc (port)) | |
351 | { | |
352 | case '\n': | |
353 | continue; | |
354 | case '0': | |
355 | c = '\0'; | |
356 | break; | |
357 | case 'f': | |
358 | c = '\f'; | |
359 | break; | |
360 | case 'n': | |
361 | c = '\n'; | |
362 | break; | |
363 | case 'r': | |
364 | c = '\r'; | |
365 | break; | |
366 | case 't': | |
367 | c = '\t'; | |
368 | break; | |
369 | case 'a': | |
370 | c = '\007'; | |
371 | break; | |
372 | case 'v': | |
373 | c = '\v'; | |
374 | break; | |
375 | } | |
376 | if (SCM_PORT_REPRESENTATION(port) == scm_regular_port) | |
377 | { | |
378 | SCM_CHARS (*tok_buf)[j] = c; | |
379 | ++j; | |
380 | } | |
381 | else | |
382 | { | |
383 | int len; | |
384 | len = xwctomb (SCM_CHARS (*tok_buf) + j, c); | |
385 | if (len == 0) | |
386 | len = 1; | |
387 | SCM_ASSERT (len > 0, SCM_MAKINUM (c), "bogus char", "read"); | |
388 | j += len; | |
389 | } | |
390 | } | |
391 | if (j == 0) | |
392 | return scm_nullstr; | |
393 | SCM_CHARS (*tok_buf)[j] = 0; | |
394 | { | |
395 | SCM str; | |
396 | str = scm_makfromstr (SCM_CHARS (*tok_buf), j, 0); | |
397 | if (SCM_PORT_REPRESENTATION(port) != scm_regular_port) | |
398 | { | |
399 | SCM_SETLENGTH (str, SCM_LENGTH (str), scm_tc7_mb_string); | |
400 | } | |
401 | return str; | |
402 | } | |
403 | ||
404 | case'0':case '1':case '2':case '3':case '4': | |
405 | case '5':case '6':case '7':case '8':case '9': | |
406 | case '.': | |
407 | case '-': | |
408 | case '+': | |
409 | num: | |
410 | j = scm_read_token (c, tok_buf, port, case_i, 0); | |
411 | p = scm_istring2number (SCM_CHARS (*tok_buf), (long) j, 10L); | |
412 | if (SCM_NFALSEP (p)) | |
413 | return p; | |
414 | if (c == '#') | |
415 | { | |
416 | if ((j == 2) && (scm_gen_getc (port) == '(')) | |
417 | { | |
418 | scm_gen_ungetc ('(', port); | |
419 | c = SCM_CHARS (*tok_buf)[1]; | |
420 | goto callshrp; | |
421 | } | |
422 | scm_wta (SCM_UNDEFINED, "unknown # object", SCM_CHARS (*tok_buf)); | |
423 | } | |
424 | goto tok; | |
425 | ||
426 | case ':': | |
427 | j = scm_read_token ('-', tok_buf, port, case_i, 0); | |
428 | p = scm_intern (SCM_CHARS (*tok_buf), j); | |
429 | if (SCM_PORT_REPRESENTATION (port) != scm_regular_port) | |
430 | scm_set_symbol_multi_byte_x (SCM_CAR (p), SCM_BOOL_T); | |
431 | return scm_make_keyword_from_dash_symbol (SCM_CAR (p)); | |
432 | ||
433 | default: | |
434 | j = scm_read_token (c, tok_buf, port, case_i, 0); | |
435 | /* fallthrough */ | |
436 | ||
437 | tok: | |
438 | p = scm_intern (SCM_CHARS (*tok_buf), j); | |
439 | if (SCM_PORT_REPRESENTATION (port) != scm_regular_port) | |
440 | scm_set_symbol_multi_byte_x (SCM_CAR (p), SCM_BOOL_T); | |
441 | return SCM_CAR (p); | |
442 | } | |
443 | } | |
444 | ||
445 | #ifdef _UNICOS | |
446 | _Pragma ("noopt"); /* # pragma _CRI noopt */ | |
447 | #endif | |
448 | #ifdef __STDC__ | |
449 | scm_sizet | |
450 | scm_read_token (int ic, SCM * tok_buf, SCM port, int case_i, int weird) | |
451 | #else | |
452 | scm_sizet | |
453 | scm_read_token (ic, * tok_buf, port, case_i, weird) | |
454 | int ic; | |
455 | SCM *tok_buf; | |
456 | SCM port; | |
457 | int case_i; | |
458 | int weird; | |
459 | #endif | |
460 | { | |
461 | register scm_sizet j; | |
462 | register int c; | |
463 | register char *p; | |
464 | ||
465 | c = ic; | |
466 | p = SCM_CHARS (*tok_buf); | |
467 | ||
468 | if (weird) | |
469 | j = 0; | |
470 | else | |
471 | { | |
472 | j = 0; | |
473 | while (j + sizeof(xwchar_t) + XMB_CUR_MAX >= SCM_LENGTH (*tok_buf)) | |
474 | p = scm_grow_tok_buf (tok_buf); | |
475 | if (SCM_PORT_REPRESENTATION(port) == scm_regular_port) | |
476 | { | |
477 | p[j] = c; | |
478 | ++j; | |
479 | } | |
480 | else | |
481 | { | |
482 | int len; | |
483 | len = xwctomb (p + j, c); | |
484 | if (len == 0) | |
485 | len = 1; | |
486 | SCM_ASSERT (len > 0, SCM_MAKINUM (c), "bogus char", "read"); | |
487 | j += len; | |
488 | } | |
489 | } | |
490 | ||
491 | while (1) | |
492 | { | |
493 | while (j + sizeof(xwchar_t) + XMB_CUR_MAX >= SCM_LENGTH (*tok_buf)) | |
494 | p = scm_grow_tok_buf (tok_buf); | |
495 | c = scm_gen_getc (port); | |
496 | switch (c) | |
497 | { | |
498 | case '(': | |
499 | case ')': | |
500 | case '"': | |
501 | case ';': | |
502 | case SCM_WHITE_SPACES: | |
503 | case SCM_LINE_INCREMENTORS: | |
504 | if (weird) | |
505 | goto default_case; | |
506 | ||
507 | scm_gen_ungetc (c, port); | |
508 | case EOF: | |
509 | eof_case: | |
510 | p[j] = 0; | |
511 | return j; | |
512 | case '\\': | |
513 | if (!weird) | |
514 | goto default_case; | |
515 | else | |
516 | { | |
517 | c = scm_gen_getc (port); | |
518 | if (c == EOF) | |
519 | goto eof_case; | |
520 | else | |
521 | goto default_case; | |
522 | } | |
523 | case '}': | |
524 | if (!weird) | |
525 | goto default_case; | |
526 | ||
527 | c = scm_gen_getc (port); | |
528 | if (c == '#') | |
529 | { | |
530 | p[j] = 0; | |
531 | return j; | |
532 | } | |
533 | else | |
534 | { | |
535 | scm_gen_ungetc (c, port); | |
536 | c = '}'; | |
537 | goto default_case; | |
538 | } | |
539 | ||
540 | default: | |
541 | default_case: | |
542 | { | |
543 | c = (case_i ? scm_downcase(c) : c); | |
544 | if (SCM_PORT_REPRESENTATION(port) == scm_regular_port) | |
545 | { | |
546 | p[j] = c; | |
547 | ++j; | |
548 | } | |
549 | else | |
550 | { | |
551 | int len; | |
552 | len = xwctomb (p + j, c); | |
553 | if (len == 0) | |
554 | len = 1; | |
555 | SCM_ASSERT (len > 0, SCM_MAKINUM (c), "bogus char", "read"); | |
556 | j += len; | |
557 | } | |
558 | } | |
559 | ||
560 | } | |
561 | } | |
562 | } | |
563 | #ifdef _UNICOS | |
564 | _Pragma ("opt"); /* # pragma _CRI opt */ | |
565 | #endif | |
566 | ||
567 | #ifdef __STDC__ | |
568 | SCM | |
569 | scm_lreadparen (SCM * tok_buf, SCM port, char *name, int case_i, SCM sharp) | |
570 | #else | |
571 | SCM | |
572 | scm_lreadparen (tok_buf, port, name, case_i, sharp) | |
573 | SCM *tok_buf; | |
574 | SCM port; | |
575 | char *name; | |
576 | int case_i; | |
577 | SCM sharp; | |
578 | #endif | |
579 | { | |
580 | SCM tmp; | |
581 | SCM tl; | |
582 | SCM ans; | |
583 | int c; | |
584 | ||
585 | c = scm_flush_ws (port, name); | |
586 | if (')' == c) | |
587 | return SCM_EOL; | |
588 | scm_gen_ungetc (c, port); | |
589 | if (scm_i_dot == (tmp = scm_lreadr (tok_buf, port, case_i, sharp))) | |
590 | { | |
591 | ans = scm_lreadr (tok_buf, port, case_i, sharp); | |
592 | closeit: | |
593 | if (')' != (c = scm_flush_ws (port, name))) | |
594 | scm_wta (SCM_UNDEFINED, "missing close paren", ""); | |
595 | return ans; | |
596 | } | |
597 | ans = tl = scm_cons (tmp, SCM_EOL); | |
598 | while (')' != (c = scm_flush_ws (port, name))) | |
599 | { | |
600 | scm_gen_ungetc (c, port); | |
601 | if (scm_i_dot == (tmp = scm_lreadr (tok_buf, port, case_i, sharp))) | |
602 | { | |
603 | SCM_CDR (tl) = scm_lreadr (tok_buf, port, case_i, sharp); | |
604 | goto closeit; | |
605 | } | |
606 | tl = (SCM_CDR (tl) = scm_cons (tmp, SCM_EOL)); | |
607 | } | |
608 | return ans; | |
609 | } | |
610 | ||
611 | ||
612 | \f | |
613 | ||
614 | ||
615 | #ifdef __STDC__ | |
616 | void | |
617 | scm_init_read (void) | |
618 | #else | |
619 | void | |
620 | scm_init_read () | |
621 | #endif | |
622 | { | |
a16f6fe7 | 623 | #ifdef READER_EXTENSIONS |
b7ff98dd | 624 | scm_init_opts (scm_read_options, scm_read_opts, SCM_N_READ_OPTIONS); |
a16f6fe7 | 625 | #endif |
0f2d19dd JB |
626 | #include "read.x" |
627 | } | |
628 |