(scm_read_delimited_x): Avoid
[bpt/guile.git] / libguile / rdelim.c
1 /* Copyright (C) 1995,1996,1997,1998,1999,2000,2001 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public
5 * License as published by the Free Software Foundation; either
6 * version 2.1 of the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17
18 #if HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 #include "libguile/_scm.h"
23
24 #include <stdio.h>
25
26 #ifdef HAVE_STRING_H
27 #include <string.h>
28 #endif
29
30 #include "libguile/chars.h"
31 #include "libguile/modules.h"
32 #include "libguile/ports.h"
33 #include "libguile/rdelim.h"
34 #include "libguile/root.h"
35 #include "libguile/strings.h"
36 #include "libguile/strports.h"
37 #include "libguile/validate.h"
38
39 SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0,
40 (SCM delims, SCM str, SCM gobble, SCM port, SCM start, SCM end),
41 "Read characters from @var{port} into @var{str} until one of the\n"
42 "characters in the @var{delims} string is encountered. If\n"
43 "@var{gobble} is true, discard the delimiter character;\n"
44 "otherwise, leave it in the input stream for the next read. If\n"
45 "@var{port} is not specified, use the value of\n"
46 "@code{(current-input-port)}. If @var{start} or @var{end} are\n"
47 "specified, store data only into the substring of @var{str}\n"
48 "bounded by @var{start} and @var{end} (which default to the\n"
49 "beginning and end of the string, respectively).\n"
50 "\n"
51 " Return a pair consisting of the delimiter that terminated the\n"
52 "string and the number of characters read. If reading stopped\n"
53 "at the end of file, the delimiter returned is the\n"
54 "@var{eof-object}; if the string was filled without encountering\n"
55 "a delimiter, this value is @code{#f}.")
56 #define FUNC_NAME s_scm_read_delimited_x
57 {
58 size_t j;
59 char *buf;
60 size_t cstart;
61 size_t cend;
62 int c;
63 char *cdelims;
64 size_t num_delims;
65
66 SCM_VALIDATE_STRING (1, delims);
67 cdelims = SCM_I_STRING_CHARS (delims);
68 num_delims = SCM_I_STRING_LENGTH (delims);
69
70 SCM_VALIDATE_STRING (2, str);
71 buf = SCM_I_STRING_CHARS (str);
72 scm_i_get_substring_spec (SCM_I_STRING_LENGTH (str),
73 start, &cstart, end, &cend);
74
75 if (SCM_UNBNDP (port))
76 port = scm_cur_inp;
77 else
78 SCM_VALIDATE_OPINPORT (4, port);
79
80 for (j = cstart; j < cend; j++)
81 {
82 size_t k;
83
84 c = scm_getc (port);
85 for (k = 0; k < num_delims; k++)
86 {
87 if (cdelims[k] == c)
88 {
89 if (scm_is_false (gobble))
90 scm_ungetc (c, port);
91
92 return scm_cons (SCM_MAKE_CHAR (c),
93 scm_from_size_t (j - cstart));
94 }
95 }
96 if (c == EOF)
97 return scm_cons (SCM_EOF_VAL,
98 scm_from_size_t (j - cstart));
99
100 buf[j] = c;
101 }
102 return scm_cons (SCM_BOOL_F, scm_from_size_t (j - cstart));
103 }
104 #undef FUNC_NAME
105
106 static unsigned char *
107 scm_do_read_line (SCM port, size_t *len_p)
108 {
109 scm_t_port *pt = SCM_PTAB_ENTRY (port);
110 unsigned char *end;
111
112 /* I thought reading lines was simple. Mercy me. */
113
114 /* The common case: the buffer contains a complete line.
115 This needs to be fast. */
116 if ((end = memchr (pt->read_pos, '\n', (pt->read_end - pt->read_pos)))
117 != 0)
118 {
119 size_t buf_len = (end + 1) - pt->read_pos;
120 /* Allocate a buffer of the perfect size. */
121 unsigned char *buf = scm_malloc (buf_len + 1);
122
123 memcpy (buf, pt->read_pos, buf_len);
124 pt->read_pos += buf_len;
125
126 buf[buf_len] = '\0';
127
128 *len_p = buf_len;
129 return buf;
130 }
131
132 /* The buffer contains no newlines. */
133 {
134 /* When live, len is always the number of characters in the
135 current buffer that are part of the current line. */
136 size_t len = (pt->read_end - pt->read_pos);
137 size_t buf_size = (len < 50) ? 60 : len * 2;
138 /* Invariant: buf always has buf_size + 1 characters allocated;
139 the `+ 1' is for the final '\0'. */
140 unsigned char *buf = scm_malloc (buf_size + 1);
141 size_t buf_len = 0;
142
143 for (;;)
144 {
145 if (buf_len + len > buf_size)
146 {
147 size_t new_size = (buf_len + len) * 2;
148 buf = scm_realloc (buf, new_size + 1);
149 buf_size = new_size;
150 }
151
152 /* Copy what we've got out of the port, into our buffer. */
153 memcpy (buf + buf_len, pt->read_pos, len);
154 buf_len += len;
155 pt->read_pos += len;
156
157 /* If we had seen a newline, we're done now. */
158 if (end)
159 break;
160
161 /* Get more characters. */
162 if (scm_fill_input (port) == EOF)
163 {
164 /* If we're missing a final newline in the file, return
165 what we did get, sans newline. */
166 if (buf_len > 0)
167 break;
168
169 free (buf);
170 return 0;
171 }
172
173 /* Search the buffer for newlines. */
174 if ((end = memchr (pt->read_pos, '\n',
175 (len = (pt->read_end - pt->read_pos))))
176 != 0)
177 len = (end - pt->read_pos) + 1;
178 }
179
180 /* I wonder how expensive this realloc is. */
181 buf = scm_realloc (buf, buf_len + 1);
182 buf[buf_len] = '\0';
183 *len_p = buf_len;
184 return buf;
185 }
186 }
187
188
189 /*
190 * %read-line
191 * truncates any terminating newline from its input, and returns
192 * a cons of the string read and its terminating character. Doing
193 * so makes it easy to implement the hairy `read-line' options
194 * efficiently in Scheme.
195 */
196
197 SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
198 (SCM port),
199 "Read a newline-terminated line from @var{port}, allocating storage as\n"
200 "necessary. The newline terminator (if any) is removed from the string,\n"
201 "and a pair consisting of the line and its delimiter is returned. The\n"
202 "delimiter may be either a newline or the @var{eof-object}; if\n"
203 "@code{%read-line} is called at the end of file, it returns the pair\n"
204 "@code{(#<eof> . #<eof>)}.")
205 #define FUNC_NAME s_scm_read_line
206 {
207 scm_t_port *pt;
208 char *s;
209 size_t slen;
210 SCM line, term;
211
212 if (SCM_UNBNDP (port))
213 port = scm_cur_inp;
214 SCM_VALIDATE_OPINPORT (1,port);
215
216 pt = SCM_PTAB_ENTRY (port);
217 if (pt->rw_active == SCM_PORT_WRITE)
218 scm_ptobs[SCM_PTOBNUM (port)].flush (port);
219
220 s = (char *) scm_do_read_line (port, &slen);
221
222 if (s == NULL)
223 term = line = SCM_EOF_VAL;
224 else
225 {
226 if (s[slen-1] == '\n')
227 {
228 term = SCM_MAKE_CHAR ('\n');
229 s[slen-1] = '\0';
230 line = scm_take_str (s, slen-1);
231 SCM_INCLINE (port);
232 }
233 else
234 {
235 /* Fix: we should check for eof on the port before assuming this. */
236 term = SCM_EOF_VAL;
237 line = scm_take_str (s, slen);
238 SCM_COL (port) += slen;
239 }
240 }
241
242 if (pt->rw_random)
243 pt->rw_active = SCM_PORT_READ;
244
245 return scm_cons (line, term);
246 }
247 #undef FUNC_NAME
248
249 SCM_DEFINE (scm_write_line, "write-line", 1, 1, 0,
250 (SCM obj, SCM port),
251 "Display @var{obj} and a newline character to @var{port}. If\n"
252 "@var{port} is not specified, @code{(current-output-port)} is\n"
253 "used. This function is equivalent to:\n"
254 "@lisp\n"
255 "(display obj [port])\n"
256 "(newline [port])\n"
257 "@end lisp")
258 #define FUNC_NAME s_scm_write_line
259 {
260 scm_display (obj, port);
261 return scm_newline (port);
262 }
263 #undef FUNC_NAME
264
265 SCM
266 scm_init_rdelim_builtins (void)
267 {
268 #include "libguile/rdelim.x"
269
270 return SCM_UNSPECIFIED;
271 }
272
273 void
274 scm_init_rdelim (void)
275 {
276 scm_c_define_gsubr ("%init-rdelim-builtins", 0, 0, 0,
277 scm_init_rdelim_builtins);
278 }
279
280 /*
281 Local Variables:
282 c-file-style: "gnu"
283 End:
284 */