Commit | Line | Data |
---|---|---|
be54b15d | 1 | /* Copyright (C) 1995,1996,1997,1999,2000,2001 Free Software Foundation, Inc. |
0f2d19dd | 2 | * |
73be1d9e MV |
3 | * This library is free software; you can redistribute it and/or |
4 | * modify it under the terms of the GNU Lesser General Public | |
5 | * License as published by the Free Software Foundation; either | |
6 | * version 2.1 of the License, or (at your option) any later version. | |
0f2d19dd | 7 | * |
73be1d9e MV |
8 | * This library is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * Lesser General Public License for more details. | |
0f2d19dd | 12 | * |
73be1d9e MV |
13 | * You should have received a copy of the GNU Lesser General Public |
14 | * License along with this library; if not, write to the Free Software | |
15 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
16 | */ | |
1bbd0b84 | 17 | |
1bbd0b84 | 18 | |
0f2d19dd JB |
19 | \f |
20 | ||
0f2d19dd | 21 | #include <stdio.h> |
a0599745 MD |
22 | #include "libguile/_scm.h" |
23 | #include "libguile/chars.h" | |
24 | #include "libguile/eval.h" | |
25 | #include "libguile/unif.h" | |
26 | #include "libguile/keywords.h" | |
27 | #include "libguile/alist.h" | |
28 | #include "libguile/srcprop.h" | |
29 | #include "libguile/hashtab.h" | |
30 | #include "libguile/hash.h" | |
31 | #include "libguile/ports.h" | |
32 | #include "libguile/root.h" | |
33 | #include "libguile/strings.h" | |
ba1b2226 | 34 | #include "libguile/strports.h" |
a0599745 | 35 | #include "libguile/vectors.h" |
a0599745 | 36 | #include "libguile/validate.h" |
ba1b2226 | 37 | |
a0599745 | 38 | #include "libguile/read.h" |
0f2d19dd JB |
39 | |
40 | \f | |
41 | ||
c7733771 GH |
42 | SCM_SYMBOL (scm_keyword_prefix, "prefix"); |
43 | ||
92c2555f | 44 | scm_t_option scm_read_opts[] = { |
b7ff98dd MD |
45 | { SCM_OPTION_BOOLEAN, "copy", 0, |
46 | "Copy source code expressions." }, | |
ac74fc22 | 47 | { SCM_OPTION_BOOLEAN, "positions", 0, |
deca31e1 GH |
48 | "Record positions of source code expressions." }, |
49 | { SCM_OPTION_BOOLEAN, "case-insensitive", 0, | |
c7733771 | 50 | "Convert symbols to lower case."}, |
f1267706 | 51 | { SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F), |
c96d76b8 | 52 | "Style of keyword recognition: #f or 'prefix."} |
a16f6fe7 MD |
53 | }; |
54 | ||
39e8f371 HWN |
55 | /* |
56 | Give meaningful error messages for errors | |
57 | ||
58 | We use the format | |
59 | ||
ba1b2226 | 60 | FILE:LINE:COL: MESSAGE |
39e8f371 HWN |
61 | This happened in .... |
62 | ||
63 | This is not standard GNU format, but the test-suite likes the real | |
64 | message to be in front. | |
65 | ||
39e8f371 HWN |
66 | */ |
67 | ||
68 | ||
ba1b2226 HWN |
69 | static void |
70 | scm_input_error(char const * function, | |
71 | SCM port, const char * message, SCM arg) | |
72 | { | |
73 | char *fn = SCM_STRINGP (SCM_FILENAME(port)) | |
74 | ? SCM_STRING_CHARS(SCM_FILENAME(port)) | |
75 | : "#<unknown port>"; | |
76 | ||
77 | SCM string_port = scm_open_output_string (); | |
78 | SCM string = SCM_EOL; | |
79 | scm_simple_format (string_port, | |
80 | scm_makfrom0str ("~A:~S:~S: ~A"), | |
81 | scm_list_4 (scm_makfrom0str (fn), | |
82 | scm_int2num (SCM_LINUM (port) + 1), | |
83 | scm_int2num (SCM_COL (port) + 1), | |
84 | scm_makfrom0str (message))); | |
39e8f371 | 85 | |
ba1b2226 HWN |
86 | |
87 | string = scm_get_output_string (string_port); | |
88 | scm_close_output_port (string_port); | |
89 | scm_error_scm (scm_str2symbol ("read-error"), | |
90 | scm_makfrom0str (function), | |
91 | string, | |
92 | SCM_EOL, | |
93 | SCM_BOOL_F); | |
94 | } | |
39e8f371 HWN |
95 | |
96 | ||
a1ec6916 | 97 | SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0, |
1bbd0b84 | 98 | (SCM setting), |
dc7fa443 MG |
99 | "Option interface for the read options. Instead of using\n" |
100 | "this procedure directly, use the procedures @code{read-enable},\n" | |
3939e9df | 101 | "@code{read-disable}, @code{read-set!} and @code{read-options}.") |
1bbd0b84 | 102 | #define FUNC_NAME s_scm_read_options |
a16f6fe7 | 103 | { |
b7ff98dd MD |
104 | SCM ans = scm_options (setting, |
105 | scm_read_opts, | |
106 | SCM_N_READ_OPTIONS, | |
1bbd0b84 | 107 | FUNC_NAME); |
b7ff98dd MD |
108 | if (SCM_COPY_SOURCE_P) |
109 | SCM_RECORD_POSITIONS_P = 1; | |
a16f6fe7 MD |
110 | return ans; |
111 | } | |
1bbd0b84 | 112 | #undef FUNC_NAME |
a16f6fe7 | 113 | |
14de3b42 GH |
114 | /* An association list mapping extra hash characters to procedures. */ |
115 | static SCM *scm_read_hash_procedures; | |
deca31e1 | 116 | |
a1ec6916 | 117 | SCM_DEFINE (scm_read, "read", 0, 1, 0, |
1bbd0b84 | 118 | (SCM port), |
dc7fa443 MG |
119 | "Read an s-expression from the input port @var{port}, or from\n" |
120 | "the current input port if @var{port} is not specified.\n" | |
121 | "Any whitespace before the next token is discarded.") | |
1bbd0b84 | 122 | #define FUNC_NAME s_scm_read |
0f2d19dd JB |
123 | { |
124 | int c; | |
09a4f039 | 125 | SCM tok_buf, copy; |
0f2d19dd JB |
126 | |
127 | if (SCM_UNBNDP (port)) | |
128 | port = scm_cur_inp; | |
34d19ef6 | 129 | SCM_VALIDATE_OPINPORT (1, port); |
0f2d19dd | 130 | |
0f2d19dd JB |
131 | c = scm_flush_ws (port, (char *) NULL); |
132 | if (EOF == c) | |
133 | return SCM_EOF_VAL; | |
b7f3516f | 134 | scm_ungetc (c, port); |
0f2d19dd | 135 | |
be54b15d | 136 | tok_buf = scm_allocate_string (30); |
deca31e1 | 137 | return scm_lreadr (&tok_buf, port, ©); |
0f2d19dd | 138 | } |
1bbd0b84 | 139 | #undef FUNC_NAME |
0f2d19dd JB |
140 | |
141 | ||
1cc91f1b | 142 | |
0f2d19dd | 143 | char * |
6e8d25a6 | 144 | scm_grow_tok_buf (SCM *tok_buf) |
0f2d19dd | 145 | { |
1be6b49c | 146 | size_t oldlen = SCM_STRING_LENGTH (*tok_buf); |
be54b15d | 147 | SCM newstr = scm_allocate_string (2 * oldlen); |
1be6b49c | 148 | size_t i; |
94115ae3 DH |
149 | |
150 | for (i = 0; i != oldlen; ++i) | |
151 | SCM_STRING_CHARS (newstr) [i] = SCM_STRING_CHARS (*tok_buf) [i]; | |
152 | ||
153 | *tok_buf = newstr; | |
154 | return SCM_STRING_CHARS (newstr); | |
0f2d19dd JB |
155 | } |
156 | ||
157 | ||
1cc91f1b | 158 | |
0f2d19dd | 159 | int |
6e8d25a6 | 160 | scm_flush_ws (SCM port, const char *eoferr) |
0f2d19dd JB |
161 | { |
162 | register int c; | |
163 | while (1) | |
b7f3516f | 164 | switch (c = scm_getc (port)) |
0f2d19dd JB |
165 | { |
166 | case EOF: | |
167 | goteof: | |
168 | if (eoferr) | |
d156d3b7 | 169 | { |
0f8ae50a HWN |
170 | scm_input_error (eoferr, |
171 | port, | |
172 | "end of file", | |
173 | SCM_EOL); | |
d156d3b7 | 174 | } |
0f2d19dd JB |
175 | return c; |
176 | case ';': | |
177 | lp: | |
b7f3516f | 178 | switch (c = scm_getc (port)) |
0f2d19dd JB |
179 | { |
180 | case EOF: | |
181 | goto goteof; | |
182 | default: | |
183 | goto lp; | |
184 | case SCM_LINE_INCREMENTORS: | |
185 | break; | |
186 | } | |
187 | break; | |
188 | case SCM_LINE_INCREMENTORS: | |
0f2d19dd | 189 | case SCM_SINGLE_SPACES: |
0f2d19dd | 190 | case '\t': |
0f2d19dd JB |
191 | break; |
192 | default: | |
193 | return c; | |
194 | } | |
195 | } | |
196 | ||
197 | ||
1cc91f1b | 198 | |
0f2d19dd | 199 | int |
6e8d25a6 | 200 | scm_casei_streq (char *s1, char *s2) |
0f2d19dd JB |
201 | { |
202 | while (*s1 && *s2) | |
203 | if (scm_downcase((int)*s1) != scm_downcase((int)*s2)) | |
204 | return 0; | |
205 | else | |
206 | { | |
207 | ++s1; | |
208 | ++s2; | |
209 | } | |
210 | return !(*s1 || *s2); | |
211 | } | |
212 | ||
213 | ||
09a4f039 MD |
214 | /* recsexpr is used when recording expressions |
215 | * constructed by read:sharp. | |
216 | */ | |
604d4dd9 JB |
217 | #ifndef DEBUG_EXTENSIONS |
218 | #define recsexpr(obj, line, column, filename) (obj) | |
219 | #else | |
09a4f039 | 220 | static SCM |
1be6b49c | 221 | recsexpr (SCM obj, long line, int column, SCM filename) |
09a4f039 | 222 | { |
fee7ef83 | 223 | if (!SCM_CONSP(obj)) { |
09a4f039 | 224 | return obj; |
fee7ef83 | 225 | } else { |
09a4f039 MD |
226 | SCM tmp = obj, copy; |
227 | /* If this sexpr is visible in the read:sharp source, we want to | |
228 | keep that information, so only record non-constant cons cells | |
229 | which haven't previously been read by the reader. */ | |
230 | if (SCM_FALSEP (scm_whash_lookup (scm_source_whash, obj))) | |
231 | { | |
232 | if (SCM_COPY_SOURCE_P) | |
233 | { | |
234 | copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename), | |
235 | SCM_UNDEFINED); | |
0c95b57d | 236 | while ((tmp = SCM_CDR (tmp)) && SCM_CONSP (tmp)) |
a6c64c3c MD |
237 | { |
238 | SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp), | |
239 | line, | |
240 | column, | |
241 | filename), | |
242 | SCM_UNDEFINED)); | |
243 | copy = SCM_CDR (copy); | |
244 | } | |
245 | SCM_SETCDR (copy, tmp); | |
09a4f039 MD |
246 | } |
247 | else | |
248 | { | |
249 | recsexpr (SCM_CAR (obj), line, column, filename); | |
0c95b57d | 250 | while ((tmp = SCM_CDR (tmp)) && SCM_CONSP (tmp)) |
09a4f039 MD |
251 | recsexpr (SCM_CAR (tmp), line, column, filename); |
252 | copy = SCM_UNDEFINED; | |
253 | } | |
254 | scm_whash_insert (scm_source_whash, | |
255 | obj, | |
256 | scm_make_srcprops (line, | |
257 | column, | |
258 | filename, | |
259 | copy, | |
260 | SCM_EOL)); | |
261 | } | |
262 | return obj; | |
263 | } | |
264 | } | |
604d4dd9 | 265 | #endif |
f9c68a47 JB |
266 | |
267 | /* Consume an SCSH-style block comment. Assume that we've already | |
f9731264 JB |
268 | read the initial `#!', and eat characters until we get a |
269 | newline/exclamation-point/sharp-sign/newline sequence. */ | |
f9c68a47 JB |
270 | |
271 | static void | |
6e8d25a6 | 272 | skip_scsh_block_comment (SCM port) |
db4b4ca6 | 273 | #define FUNC_NAME "skip_scsh_block_comment" |
f9c68a47 | 274 | { |
f9731264 JB |
275 | /* Is this portable? Dear God, spare me from the non-eight-bit |
276 | characters. But is it tasteful? */ | |
277 | long history = 0; | |
f9c68a47 JB |
278 | |
279 | for (;;) | |
280 | { | |
b7f3516f | 281 | int c = scm_getc (port); |
f9c68a47 JB |
282 | |
283 | if (c == EOF) | |
db4b4ca6 | 284 | SCM_MISC_ERROR ("unterminated `#! ... !#' comment", SCM_EOL); |
f9731264 | 285 | history = ((history << 8) | (c & 0xff)) & 0xffffffff; |
f9c68a47 | 286 | |
f9731264 JB |
287 | /* Were the last four characters read "\n!#\n"? */ |
288 | if (history == (('\n' << 24) | ('!' << 16) | ('#' << 8) | '\n')) | |
289 | return; | |
f9c68a47 JB |
290 | } |
291 | } | |
db4b4ca6 DH |
292 | #undef FUNC_NAME |
293 | ||
f9c68a47 | 294 | |
1bbd0b84 | 295 | static SCM scm_get_hash_procedure(int c); |
f9c68a47 | 296 | |
09a4f039 | 297 | static char s_list[]="list"; |
1cc91f1b | 298 | |
0f2d19dd | 299 | SCM |
34d19ef6 | 300 | scm_lreadr (SCM *tok_buf, SCM port, SCM *copy) |
db4b4ca6 | 301 | #define FUNC_NAME "scm_lreadr" |
0f2d19dd JB |
302 | { |
303 | int c; | |
1be6b49c | 304 | size_t j; |
0f2d19dd | 305 | SCM p; |
deca31e1 | 306 | |
b858464a | 307 | tryagain: |
1bbd0b84 | 308 | c = scm_flush_ws (port, s_scm_read); |
b858464a | 309 | tryagain_no_flush_ws: |
0f2d19dd JB |
310 | switch (c) |
311 | { | |
312 | case EOF: | |
313 | return SCM_EOF_VAL; | |
314 | ||
315 | case '(': | |
09a4f039 | 316 | return SCM_RECORD_POSITIONS_P |
b858464a | 317 | ? scm_lreadrecparen (tok_buf, port, s_list, copy) |
c6a040a8 | 318 | : scm_lreadparen (tok_buf, port, s_list, copy SCM_ELISP_CLOSE); |
0f2d19dd | 319 | case ')': |
ba1b2226 | 320 | scm_input_error (FUNC_NAME, port,"unexpected \")\"", SCM_EOL); |
0f2d19dd JB |
321 | goto tryagain; |
322 | ||
c6a040a8 NJ |
323 | #ifdef SCM_ELISP_READ_EXTENSIONS |
324 | case '[': | |
325 | p = scm_lreadparen (tok_buf, port, "vector", copy, ']'); | |
326 | return SCM_NULLP (p) ? scm_nullvect : scm_vector (p); | |
327 | #endif | |
0f2d19dd | 328 | case '\'': |
92e5aa0e | 329 | p = scm_sym_quote; |
09a4f039 | 330 | goto recquote; |
0f2d19dd | 331 | case '`': |
92e5aa0e | 332 | p = scm_sym_quasiquote; |
09a4f039 | 333 | goto recquote; |
0f2d19dd | 334 | case ',': |
b7f3516f | 335 | c = scm_getc (port); |
0f2d19dd | 336 | if ('@' == c) |
92e5aa0e | 337 | p = scm_sym_uq_splicing; |
0f2d19dd JB |
338 | else |
339 | { | |
b7f3516f | 340 | scm_ungetc (c, port); |
92e5aa0e | 341 | p = scm_sym_unquote; |
0f2d19dd | 342 | } |
09a4f039 MD |
343 | recquote: |
344 | p = scm_cons2 (p, | |
deca31e1 | 345 | scm_lreadr (tok_buf, port, copy), |
09a4f039 MD |
346 | SCM_EOL); |
347 | if (SCM_RECORD_POSITIONS_P) | |
348 | scm_whash_insert (scm_source_whash, | |
349 | p, | |
350 | scm_make_srcprops (SCM_LINUM (port), | |
351 | SCM_COL (port) - 1, | |
352 | SCM_FILENAME (port), | |
353 | SCM_COPY_SOURCE_P | |
354 | ? (*copy = scm_cons2 (SCM_CAR (p), | |
355 | SCM_CAR (SCM_CDR (p)), | |
356 | SCM_EOL)) | |
357 | : SCM_UNDEFINED, | |
358 | SCM_EOL)); | |
359 | return p; | |
0f2d19dd | 360 | case '#': |
b7f3516f | 361 | c = scm_getc (port); |
b858464a MG |
362 | |
363 | { | |
364 | /* Check for user-defined hash procedure first, to allow | |
365 | overriding of builtin hash read syntaxes. */ | |
366 | SCM sharp = scm_get_hash_procedure (c); | |
367 | if (!SCM_FALSEP (sharp)) | |
368 | { | |
369 | int line = SCM_LINUM (port); | |
370 | int column = SCM_COL (port) - 2; | |
371 | SCM got; | |
372 | ||
373 | got = scm_call_2 (sharp, SCM_MAKE_CHAR (c), port); | |
374 | if (SCM_EQ_P (got, SCM_UNSPECIFIED)) | |
89759084 | 375 | goto handle_sharp; |
b858464a MG |
376 | if (SCM_RECORD_POSITIONS_P) |
377 | return *copy = recsexpr (got, line, column, | |
378 | SCM_FILENAME (port)); | |
379 | else | |
380 | return got; | |
381 | } | |
382 | } | |
89759084 | 383 | handle_sharp: |
0f2d19dd JB |
384 | switch (c) |
385 | { | |
386 | case '(': | |
c6a040a8 | 387 | p = scm_lreadparen (tok_buf, port, "vector", copy SCM_ELISP_CLOSE); |
0f2d19dd JB |
388 | return SCM_NULLP (p) ? scm_nullvect : scm_vector (p); |
389 | ||
390 | case 't': | |
391 | case 'T': | |
392 | return SCM_BOOL_T; | |
393 | case 'f': | |
394 | case 'F': | |
395 | return SCM_BOOL_F; | |
396 | ||
397 | case 'b': | |
398 | case 'B': | |
399 | case 'o': | |
400 | case 'O': | |
401 | case 'd': | |
402 | case 'D': | |
403 | case 'x': | |
404 | case 'X': | |
405 | case 'i': | |
406 | case 'I': | |
407 | case 'e': | |
408 | case 'E': | |
b7f3516f | 409 | scm_ungetc (c, port); |
0f2d19dd JB |
410 | c = '#'; |
411 | goto num; | |
412 | ||
f9c68a47 JB |
413 | case '!': |
414 | /* start of a shell script. Parse as a block comment, | |
415 | terminated by !#, just like SCSH. */ | |
416 | skip_scsh_block_comment (port); | |
b6356af7 MV |
417 | /* EOF is not an error here */ |
418 | c = scm_flush_ws (port, (char *)NULL); | |
419 | goto tryagain_no_flush_ws; | |
f9c68a47 | 420 | |
971d1e36 | 421 | #if SCM_HAVE_ARRAYS |
0f2d19dd | 422 | case '*': |
deca31e1 | 423 | j = scm_read_token (c, tok_buf, port, 0); |
405aaef9 | 424 | p = scm_istr2bve (SCM_STRING_CHARS (*tok_buf) + 1, (long) (j - 1)); |
36284627 | 425 | if (!SCM_FALSEP (p)) |
0f2d19dd JB |
426 | return p; |
427 | else | |
428 | goto unkshrp; | |
afe5177e | 429 | #endif |
0f2d19dd JB |
430 | |
431 | case '{': | |
deca31e1 | 432 | j = scm_read_token (c, tok_buf, port, 1); |
38ae064c | 433 | return scm_mem2symbol (SCM_STRING_CHARS (*tok_buf), j); |
0f2d19dd JB |
434 | |
435 | case '\\': | |
b7f3516f | 436 | c = scm_getc (port); |
deca31e1 | 437 | j = scm_read_token (c, tok_buf, port, 0); |
0f2d19dd | 438 | if (j == 1) |
7866a09b | 439 | return SCM_MAKE_CHAR (c); |
0f2d19dd JB |
440 | if (c >= '0' && c < '8') |
441 | { | |
3c9a524f DH |
442 | /* Dirk:FIXME:: This type of character syntax is not R5RS |
443 | * compliant. Further, it should be verified that the constant | |
444 | * does only consist of octal digits. Finally, it should be | |
445 | * checked whether the resulting fixnum is in the range of | |
446 | * characters. */ | |
447 | p = scm_i_mem2number (SCM_STRING_CHARS (*tok_buf), j, 8); | |
448 | if (SCM_INUMP (p)) | |
7866a09b | 449 | return SCM_MAKE_CHAR (SCM_INUM (p)); |
0f2d19dd JB |
450 | } |
451 | for (c = 0; c < scm_n_charnames; c++) | |
452 | if (scm_charnames[c] | |
405aaef9 | 453 | && (scm_casei_streq (scm_charnames[c], SCM_STRING_CHARS (*tok_buf)))) |
7866a09b | 454 | return SCM_MAKE_CHAR (scm_charnums[c]); |
ba1b2226 | 455 | scm_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL); |
0f2d19dd | 456 | |
50a095f1 JB |
457 | /* #:SYMBOL is a syntax for keywords supported in all contexts. */ |
458 | case ':': | |
459 | j = scm_read_token ('-', tok_buf, port, 0); | |
38ae064c DH |
460 | p = scm_mem2symbol (SCM_STRING_CHARS (*tok_buf), j); |
461 | return scm_make_keyword_from_dash_symbol (p); | |
0f2d19dd JB |
462 | |
463 | default: | |
464 | callshrp: | |
deca31e1 GH |
465 | { |
466 | SCM sharp = scm_get_hash_procedure (c); | |
467 | ||
36284627 | 468 | if (!SCM_FALSEP (sharp)) |
deca31e1 GH |
469 | { |
470 | int line = SCM_LINUM (port); | |
471 | int column = SCM_COL (port) - 2; | |
472 | SCM got; | |
473 | ||
fdc28395 | 474 | got = scm_call_2 (sharp, SCM_MAKE_CHAR (c), port); |
54778cd3 | 475 | if (SCM_EQ_P (got, SCM_UNSPECIFIED)) |
deca31e1 GH |
476 | goto unkshrp; |
477 | if (SCM_RECORD_POSITIONS_P) | |
478 | return *copy = recsexpr (got, line, column, | |
479 | SCM_FILENAME (port)); | |
480 | else | |
481 | return got; | |
482 | } | |
483 | } | |
03bc4386 | 484 | unkshrp: |
ba1b2226 | 485 | scm_input_error (FUNC_NAME, port, "Unknown # object: ~S", |
39e8f371 | 486 | scm_list_1 (SCM_MAKE_CHAR (c))); |
0f2d19dd JB |
487 | } |
488 | ||
489 | case '"': | |
490 | j = 0; | |
b7f3516f | 491 | while ('"' != (c = scm_getc (port))) |
0f2d19dd | 492 | { |
b3fcac34 | 493 | if (c == EOF) |
ba1b2226 | 494 | scm_input_error (FUNC_NAME, port, "end of file in string constant", SCM_EOL); |
0f2d19dd | 495 | |
94115ae3 | 496 | while (j + 2 >= SCM_STRING_LENGTH (*tok_buf)) |
0f2d19dd JB |
497 | scm_grow_tok_buf (tok_buf); |
498 | ||
499 | if (c == '\\') | |
b7f3516f | 500 | switch (c = scm_getc (port)) |
0f2d19dd JB |
501 | { |
502 | case '\n': | |
503 | continue; | |
504 | case '0': | |
505 | c = '\0'; | |
506 | break; | |
507 | case 'f': | |
508 | c = '\f'; | |
509 | break; | |
510 | case 'n': | |
511 | c = '\n'; | |
512 | break; | |
513 | case 'r': | |
514 | c = '\r'; | |
515 | break; | |
516 | case 't': | |
517 | c = '\t'; | |
518 | break; | |
519 | case 'a': | |
520 | c = '\007'; | |
521 | break; | |
522 | case 'v': | |
523 | c = '\v'; | |
524 | break; | |
525 | } | |
405aaef9 | 526 | SCM_STRING_CHARS (*tok_buf)[j] = c; |
b7f3516f | 527 | ++j; |
0f2d19dd JB |
528 | } |
529 | if (j == 0) | |
530 | return scm_nullstr; | |
405aaef9 | 531 | SCM_STRING_CHARS (*tok_buf)[j] = 0; |
36284627 | 532 | return scm_mem2string (SCM_STRING_CHARS (*tok_buf), j); |
0f2d19dd | 533 | |
3c9a524f DH |
534 | case '0': case '1': case '2': case '3': case '4': |
535 | case '5': case '6': case '7': case '8': case '9': | |
0f2d19dd JB |
536 | case '.': |
537 | case '-': | |
538 | case '+': | |
539 | num: | |
3c9a524f DH |
540 | j = scm_read_token (c, tok_buf, port, 0); |
541 | if (j == 1 && (c == '+' || c == '-')) | |
542 | /* Shortcut: Detected symbol '+ or '- */ | |
543 | goto tok; | |
544 | ||
545 | p = scm_i_mem2number (SCM_STRING_CHARS (*tok_buf), j, 10); | |
546 | if (!SCM_FALSEP (p)) | |
547 | return p; | |
548 | if (c == '#') | |
549 | { | |
550 | if ((j == 2) && (scm_getc (port) == '(')) | |
551 | { | |
552 | scm_ungetc ('(', port); | |
553 | c = SCM_STRING_CHARS (*tok_buf)[1]; | |
554 | goto callshrp; | |
555 | } | |
ba1b2226 | 556 | scm_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL); |
3c9a524f DH |
557 | } |
558 | goto tok; | |
0f2d19dd JB |
559 | |
560 | case ':': | |
fee7ef83 | 561 | if (SCM_EQ_P (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix)) |
c7733771 GH |
562 | { |
563 | j = scm_read_token ('-', tok_buf, port, 0); | |
38ae064c DH |
564 | p = scm_mem2symbol (SCM_STRING_CHARS (*tok_buf), j); |
565 | return scm_make_keyword_from_dash_symbol (p); | |
c7733771 GH |
566 | } |
567 | /* fallthrough */ | |
0f2d19dd | 568 | default: |
deca31e1 | 569 | j = scm_read_token (c, tok_buf, port, 0); |
0f2d19dd JB |
570 | /* fallthrough */ |
571 | ||
572 | tok: | |
38ae064c | 573 | return scm_mem2symbol (SCM_STRING_CHARS (*tok_buf), j); |
0f2d19dd JB |
574 | } |
575 | } | |
db4b4ca6 DH |
576 | #undef FUNC_NAME |
577 | ||
0f2d19dd JB |
578 | |
579 | #ifdef _UNICOS | |
580 | _Pragma ("noopt"); /* # pragma _CRI noopt */ | |
581 | #endif | |
1cc91f1b | 582 | |
1be6b49c | 583 | size_t |
6e8d25a6 | 584 | scm_read_token (int ic, SCM *tok_buf, SCM port, int weird) |
0f2d19dd | 585 | { |
1be6b49c | 586 | register size_t j; |
0f2d19dd JB |
587 | register int c; |
588 | register char *p; | |
589 | ||
deca31e1 | 590 | c = (SCM_CASE_INSENSITIVE_P ? scm_downcase(ic) : ic); |
405aaef9 | 591 | p = SCM_STRING_CHARS (*tok_buf); |
0f2d19dd JB |
592 | |
593 | if (weird) | |
594 | j = 0; | |
595 | else | |
596 | { | |
597 | j = 0; | |
94115ae3 | 598 | while (j + 2 >= SCM_STRING_LENGTH (*tok_buf)) |
0f2d19dd | 599 | p = scm_grow_tok_buf (tok_buf); |
b7f3516f TT |
600 | p[j] = c; |
601 | ++j; | |
0f2d19dd JB |
602 | } |
603 | ||
604 | while (1) | |
605 | { | |
94115ae3 | 606 | while (j + 2 >= SCM_STRING_LENGTH (*tok_buf)) |
0f2d19dd | 607 | p = scm_grow_tok_buf (tok_buf); |
b7f3516f | 608 | c = scm_getc (port); |
0f2d19dd JB |
609 | switch (c) |
610 | { | |
611 | case '(': | |
612 | case ')': | |
c6a040a8 NJ |
613 | #ifdef SCM_ELISP_READ_EXTENSIONS |
614 | case '[': | |
615 | case ']': | |
616 | #endif | |
0f2d19dd JB |
617 | case '"': |
618 | case ';': | |
619 | case SCM_WHITE_SPACES: | |
620 | case SCM_LINE_INCREMENTORS: | |
621 | if (weird) | |
622 | goto default_case; | |
623 | ||
b7f3516f | 624 | scm_ungetc (c, port); |
0f2d19dd JB |
625 | case EOF: |
626 | eof_case: | |
627 | p[j] = 0; | |
628 | return j; | |
629 | case '\\': | |
630 | if (!weird) | |
631 | goto default_case; | |
632 | else | |
633 | { | |
b7f3516f | 634 | c = scm_getc (port); |
0f2d19dd JB |
635 | if (c == EOF) |
636 | goto eof_case; | |
637 | else | |
638 | goto default_case; | |
639 | } | |
640 | case '}': | |
641 | if (!weird) | |
642 | goto default_case; | |
643 | ||
b7f3516f | 644 | c = scm_getc (port); |
0f2d19dd JB |
645 | if (c == '#') |
646 | { | |
647 | p[j] = 0; | |
648 | return j; | |
649 | } | |
650 | else | |
651 | { | |
b7f3516f | 652 | scm_ungetc (c, port); |
0f2d19dd JB |
653 | c = '}'; |
654 | goto default_case; | |
655 | } | |
656 | ||
657 | default: | |
658 | default_case: | |
659 | { | |
deca31e1 | 660 | c = (SCM_CASE_INSENSITIVE_P ? scm_downcase(c) : c); |
b7f3516f TT |
661 | p[j] = c; |
662 | ++j; | |
0f2d19dd JB |
663 | } |
664 | ||
665 | } | |
666 | } | |
667 | } | |
1cc91f1b | 668 | |
0f2d19dd JB |
669 | #ifdef _UNICOS |
670 | _Pragma ("opt"); /* # pragma _CRI opt */ | |
671 | #endif | |
672 | ||
0f2d19dd | 673 | SCM |
c6a040a8 NJ |
674 | scm_lreadparen (SCM *tok_buf, SCM port, char *name, SCM *copy |
675 | #ifdef SCM_ELISP_READ_EXTENSIONS | |
676 | , char term_char | |
677 | #else | |
678 | #define term_char ')' | |
679 | #endif | |
680 | ) | |
db4b4ca6 | 681 | #define FUNC_NAME "scm_lreadparen" |
0f2d19dd JB |
682 | { |
683 | SCM tmp; | |
684 | SCM tl; | |
685 | SCM ans; | |
686 | int c; | |
687 | ||
688 | c = scm_flush_ws (port, name); | |
c6a040a8 | 689 | if (term_char == c) |
0f2d19dd | 690 | return SCM_EOL; |
b7f3516f | 691 | scm_ungetc (c, port); |
54778cd3 | 692 | if (SCM_EQ_P (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy)))) |
0f2d19dd | 693 | { |
deca31e1 | 694 | ans = scm_lreadr (tok_buf, port, copy); |
0f2d19dd | 695 | closeit: |
c6a040a8 | 696 | if (term_char != (c = scm_flush_ws (port, name))) |
ba1b2226 | 697 | scm_input_error (FUNC_NAME, port, "missing close paren", SCM_EOL); |
0f2d19dd JB |
698 | return ans; |
699 | } | |
700 | ans = tl = scm_cons (tmp, SCM_EOL); | |
c6a040a8 | 701 | while (term_char != (c = scm_flush_ws (port, name))) |
0f2d19dd | 702 | { |
b7f3516f | 703 | scm_ungetc (c, port); |
54778cd3 | 704 | if (SCM_EQ_P (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy)))) |
0f2d19dd | 705 | { |
deca31e1 | 706 | SCM_SETCDR (tl, scm_lreadr (tok_buf, port, copy)); |
0f2d19dd JB |
707 | goto closeit; |
708 | } | |
a6c64c3c MD |
709 | SCM_SETCDR (tl, scm_cons (tmp, SCM_EOL)); |
710 | tl = SCM_CDR (tl); | |
0f2d19dd JB |
711 | } |
712 | return ans; | |
713 | } | |
db4b4ca6 | 714 | #undef FUNC_NAME |
c6a040a8 NJ |
715 | #ifndef SCM_ELISP_READ_EXTENSIONS |
716 | #undef term_char | |
717 | #endif | |
0f2d19dd | 718 | |
1cc91f1b | 719 | |
09a4f039 | 720 | SCM |
6e8d25a6 | 721 | scm_lreadrecparen (SCM *tok_buf, SCM port, char *name, SCM *copy) |
db4b4ca6 | 722 | #define FUNC_NAME "scm_lreadrecparen" |
09a4f039 MD |
723 | { |
724 | register int c; | |
725 | register SCM tmp; | |
4dc2435a JB |
726 | register SCM tl, tl2 = SCM_EOL; |
727 | SCM ans, ans2 = SCM_EOL; | |
09a4f039 MD |
728 | /* Need to capture line and column numbers here. */ |
729 | int line = SCM_LINUM (port); | |
730 | int column = SCM_COL (port) - 1; | |
731 | ||
732 | c = scm_flush_ws (port, name); | |
733 | if (')' == c) | |
734 | return SCM_EOL; | |
b7f3516f | 735 | scm_ungetc (c, port); |
54778cd3 | 736 | if (SCM_EQ_P (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy)))) |
09a4f039 | 737 | { |
deca31e1 | 738 | ans = scm_lreadr (tok_buf, port, copy); |
09a4f039 | 739 | if (')' != (c = scm_flush_ws (port, name))) |
ba1b2226 | 740 | scm_input_error (FUNC_NAME, port, "missing close paren", SCM_EOL); |
09a4f039 MD |
741 | return ans; |
742 | } | |
743 | /* Build the head of the list structure. */ | |
744 | ans = tl = scm_cons (tmp, SCM_EOL); | |
745 | if (SCM_COPY_SOURCE_P) | |
0c95b57d | 746 | ans2 = tl2 = scm_cons (SCM_CONSP (tmp) |
09a4f039 MD |
747 | ? *copy |
748 | : tmp, | |
749 | SCM_EOL); | |
750 | while (')' != (c = scm_flush_ws (port, name))) | |
751 | { | |
62850ef3 DH |
752 | SCM new_tail; |
753 | ||
b7f3516f | 754 | scm_ungetc (c, port); |
54778cd3 | 755 | if (SCM_EQ_P (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy)))) |
09a4f039 | 756 | { |
deca31e1 | 757 | SCM_SETCDR (tl, tmp = scm_lreadr (tok_buf, port, copy)); |
09a4f039 | 758 | if (SCM_COPY_SOURCE_P) |
0c95b57d | 759 | SCM_SETCDR (tl2, scm_cons (SCM_CONSP (tmp) |
09a4f039 MD |
760 | ? *copy |
761 | : tmp, | |
762 | SCM_EOL)); | |
763 | if (')' != (c = scm_flush_ws (port, name))) | |
ba1b2226 | 764 | scm_input_error (FUNC_NAME, port, "missing close paren", SCM_EOL); |
09a4f039 MD |
765 | goto exit; |
766 | } | |
62850ef3 DH |
767 | |
768 | new_tail = scm_cons (tmp, SCM_EOL); | |
769 | SCM_SETCDR (tl, new_tail); | |
770 | tl = new_tail; | |
771 | ||
09a4f039 | 772 | if (SCM_COPY_SOURCE_P) |
62850ef3 DH |
773 | { |
774 | SCM new_tail2 = scm_cons (SCM_CONSP (tmp) ? *copy : tmp, SCM_EOL); | |
775 | SCM_SETCDR (tl2, new_tail2); | |
776 | tl2 = new_tail2; | |
777 | } | |
09a4f039 MD |
778 | } |
779 | exit: | |
780 | scm_whash_insert (scm_source_whash, | |
781 | ans, | |
782 | scm_make_srcprops (line, | |
783 | column, | |
784 | SCM_FILENAME (port), | |
785 | SCM_COPY_SOURCE_P | |
786 | ? *copy = ans2 | |
787 | : SCM_UNDEFINED, | |
788 | SCM_EOL)); | |
789 | return ans; | |
790 | } | |
db4b4ca6 | 791 | #undef FUNC_NAME |
09a4f039 | 792 | |
0f2d19dd JB |
793 | |
794 | \f | |
795 | ||
14de3b42 GH |
796 | /* Manipulate the read-hash-procedures alist. This could be written in |
797 | Scheme, but maybe it will also be used by C code during initialisation. */ | |
a1ec6916 | 798 | SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0, |
1bbd0b84 | 799 | (SCM chr, SCM proc), |
dc7fa443 MG |
800 | "Install the procedure @var{proc} for reading expressions\n" |
801 | "starting with the character sequence @code{#} and @var{chr}.\n" | |
802 | "@var{proc} will be called with two arguments: the character\n" | |
803 | "@var{chr} and the port to read further data from. The object\n" | |
804 | "returned will be the return value of @code{read}.") | |
1bbd0b84 | 805 | #define FUNC_NAME s_scm_read_hash_extend |
deca31e1 | 806 | { |
fed9c9a2 GH |
807 | SCM this; |
808 | SCM prev; | |
809 | ||
36284627 DH |
810 | SCM_VALIDATE_CHAR (1, chr); |
811 | SCM_ASSERT (SCM_FALSEP (proc) | |
812 | || SCM_EQ_P (scm_procedure_p (proc), SCM_BOOL_T), | |
813 | proc, SCM_ARG2, FUNC_NAME); | |
fed9c9a2 | 814 | |
14de3b42 GH |
815 | /* Check if chr is already in the alist. */ |
816 | this = *scm_read_hash_procedures; | |
817 | prev = SCM_BOOL_F; | |
fed9c9a2 GH |
818 | while (1) |
819 | { | |
820 | if (SCM_NULLP (this)) | |
821 | { | |
822 | /* not found, so add it to the beginning. */ | |
36284627 | 823 | if (!SCM_FALSEP (proc)) |
fed9c9a2 | 824 | { |
14de3b42 GH |
825 | *scm_read_hash_procedures = |
826 | scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures); | |
fed9c9a2 GH |
827 | } |
828 | break; | |
829 | } | |
54778cd3 | 830 | if (SCM_EQ_P (chr, SCM_CAAR (this))) |
fed9c9a2 GH |
831 | { |
832 | /* already in the alist. */ | |
833 | if (SCM_FALSEP (proc)) | |
14de3b42 GH |
834 | { |
835 | /* remove it. */ | |
54778cd3 | 836 | if (SCM_FALSEP (prev)) |
14de3b42 GH |
837 | { |
838 | *scm_read_hash_procedures = | |
839 | SCM_CDR (*scm_read_hash_procedures); | |
840 | } | |
841 | else | |
842 | scm_set_cdr_x (prev, SCM_CDR (this)); | |
843 | } | |
fed9c9a2 | 844 | else |
14de3b42 GH |
845 | { |
846 | /* replace it. */ | |
847 | scm_set_cdr_x (SCM_CAR (this), proc); | |
848 | } | |
fed9c9a2 GH |
849 | break; |
850 | } | |
851 | prev = this; | |
852 | this = SCM_CDR (this); | |
853 | } | |
deca31e1 | 854 | |
deca31e1 GH |
855 | return SCM_UNSPECIFIED; |
856 | } | |
1bbd0b84 | 857 | #undef FUNC_NAME |
0f2d19dd | 858 | |
deca31e1 GH |
859 | /* Recover the read-hash procedure corresponding to char c. */ |
860 | static SCM | |
6e8d25a6 | 861 | scm_get_hash_procedure (int c) |
deca31e1 | 862 | { |
14de3b42 | 863 | SCM rest = *scm_read_hash_procedures; |
fed9c9a2 | 864 | |
deca31e1 GH |
865 | while (1) |
866 | { | |
867 | if (SCM_NULLP (rest)) | |
868 | return SCM_BOOL_F; | |
869 | ||
7866a09b | 870 | if (SCM_CHAR (SCM_CAAR (rest)) == c) |
deca31e1 GH |
871 | return SCM_CDAR (rest); |
872 | ||
873 | rest = SCM_CDR (rest); | |
874 | } | |
875 | } | |
1cc91f1b | 876 | |
0f2d19dd JB |
877 | void |
878 | scm_init_read () | |
0f2d19dd | 879 | { |
14de3b42 | 880 | scm_read_hash_procedures = |
86d31dfe | 881 | SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL)); |
fed9c9a2 | 882 | |
b7ff98dd | 883 | scm_init_opts (scm_read_options, scm_read_opts, SCM_N_READ_OPTIONS); |
a0599745 | 884 | #include "libguile/read.x" |
0f2d19dd | 885 | } |
89e00824 ML |
886 | |
887 | /* | |
888 | Local Variables: | |
889 | c-file-style: "gnu" | |
890 | End: | |
891 | */ |