Merge branch 'master' into boehm-demers-weiser-gc
[bpt/guile.git] / libguile / rdelim.c
1 /* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2006 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
17 */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 #include "libguile/_scm.h"
24
25 #include <stdio.h>
26
27 #ifdef HAVE_STRING_H
28 #include <string.h>
29 #endif
30
31 #include "libguile/chars.h"
32 #include "libguile/modules.h"
33 #include "libguile/ports.h"
34 #include "libguile/rdelim.h"
35 #include "libguile/root.h"
36 #include "libguile/strings.h"
37 #include "libguile/strports.h"
38 #include "libguile/validate.h"
39
40 SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0,
41 (SCM delims, SCM str, SCM gobble, SCM port, SCM start, SCM end),
42 "Read characters from @var{port} into @var{str} until one of the\n"
43 "characters in the @var{delims} string is encountered. If\n"
44 "@var{gobble} is true, discard the delimiter character;\n"
45 "otherwise, leave it in the input stream for the next read. If\n"
46 "@var{port} is not specified, use the value of\n"
47 "@code{(current-input-port)}. If @var{start} or @var{end} are\n"
48 "specified, store data only into the substring of @var{str}\n"
49 "bounded by @var{start} and @var{end} (which default to the\n"
50 "beginning and end of the string, respectively).\n"
51 "\n"
52 " Return a pair consisting of the delimiter that terminated the\n"
53 "string and the number of characters read. If reading stopped\n"
54 "at the end of file, the delimiter returned is the\n"
55 "@var{eof-object}; if the string was filled without encountering\n"
56 "a delimiter, this value is @code{#f}.")
57 #define FUNC_NAME s_scm_read_delimited_x
58 {
59 size_t j;
60 size_t cstart;
61 size_t cend;
62 int c;
63 const char *cdelims;
64 size_t num_delims;
65
66 SCM_VALIDATE_STRING (1, delims);
67 cdelims = scm_i_string_chars (delims);
68 num_delims = scm_i_string_length (delims);
69
70 SCM_VALIDATE_STRING (2, str);
71 scm_i_get_substring_spec (scm_i_string_length (str),
72 start, &cstart, end, &cend);
73
74 if (SCM_UNBNDP (port))
75 port = scm_current_input_port ();
76 else
77 SCM_VALIDATE_OPINPORT (4, port);
78
79 for (j = cstart; j < cend; j++)
80 {
81 size_t k;
82
83 c = scm_getc (port);
84 for (k = 0; k < num_delims; k++)
85 {
86 if (cdelims[k] == c)
87 {
88 if (scm_is_false (gobble))
89 scm_ungetc (c, port);
90
91 return scm_cons (SCM_MAKE_CHAR (c),
92 scm_from_size_t (j - cstart));
93 }
94 }
95 if (c == EOF)
96 return scm_cons (SCM_EOF_VAL,
97 scm_from_size_t (j - cstart));
98
99 scm_c_string_set_x (str, j, SCM_MAKE_CHAR (c));
100 }
101 return scm_cons (SCM_BOOL_F, scm_from_size_t (j - cstart));
102 }
103 #undef FUNC_NAME
104
105 static unsigned char *
106 scm_do_read_line (SCM port, size_t *len_p)
107 {
108 scm_t_port *pt = SCM_PTAB_ENTRY (port);
109 unsigned char *end;
110
111 /* I thought reading lines was simple. Mercy me. */
112
113 /* The common case: the buffer contains a complete line.
114 This needs to be fast. */
115 if ((end = memchr (pt->read_pos, '\n', (pt->read_end - pt->read_pos)))
116 != 0)
117 {
118 size_t buf_len = (end + 1) - pt->read_pos;
119 /* Allocate a buffer of the perfect size. */
120 unsigned char *buf = scm_malloc (buf_len + 1);
121
122 memcpy (buf, pt->read_pos, buf_len);
123 pt->read_pos += buf_len;
124
125 buf[buf_len] = '\0';
126
127 *len_p = buf_len;
128 return buf;
129 }
130
131 /* The buffer contains no newlines. */
132 {
133 /* When live, len is always the number of characters in the
134 current buffer that are part of the current line. */
135 size_t len = (pt->read_end - pt->read_pos);
136 size_t buf_size = (len < 50) ? 60 : len * 2;
137 /* Invariant: buf always has buf_size + 1 characters allocated;
138 the `+ 1' is for the final '\0'. */
139 unsigned char *buf = scm_malloc (buf_size + 1);
140 size_t buf_len = 0;
141
142 for (;;)
143 {
144 if (buf_len + len > buf_size)
145 {
146 size_t new_size = (buf_len + len) * 2;
147 buf = scm_realloc (buf, new_size + 1);
148 buf_size = new_size;
149 }
150
151 /* Copy what we've got out of the port, into our buffer. */
152 memcpy (buf + buf_len, pt->read_pos, len);
153 buf_len += len;
154 pt->read_pos += len;
155
156 /* If we had seen a newline, we're done now. */
157 if (end)
158 break;
159
160 /* Get more characters. */
161 if (scm_fill_input (port) == EOF)
162 {
163 /* If we're missing a final newline in the file, return
164 what we did get, sans newline. */
165 if (buf_len > 0)
166 break;
167
168 free (buf);
169 return 0;
170 }
171
172 /* Search the buffer for newlines. */
173 if ((end = memchr (pt->read_pos, '\n',
174 (len = (pt->read_end - pt->read_pos))))
175 != 0)
176 len = (end - pt->read_pos) + 1;
177 }
178
179 /* I wonder how expensive this realloc is. */
180 buf = scm_realloc (buf, buf_len + 1);
181 buf[buf_len] = '\0';
182 *len_p = buf_len;
183 return buf;
184 }
185 }
186
187
188 /*
189 * %read-line
190 * truncates any terminating newline from its input, and returns
191 * a cons of the string read and its terminating character. Doing
192 * so makes it easy to implement the hairy `read-line' options
193 * efficiently in Scheme.
194 */
195
196 SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
197 (SCM port),
198 "Read a newline-terminated line from @var{port}, allocating storage as\n"
199 "necessary. The newline terminator (if any) is removed from the string,\n"
200 "and a pair consisting of the line and its delimiter is returned. The\n"
201 "delimiter may be either a newline or the @var{eof-object}; if\n"
202 "@code{%read-line} is called at the end of file, it returns the pair\n"
203 "@code{(#<eof> . #<eof>)}.")
204 #define FUNC_NAME s_scm_read_line
205 {
206 scm_t_port *pt;
207 char *s;
208 size_t slen = 0;
209 SCM line, term;
210
211 if (SCM_UNBNDP (port))
212 port = scm_current_input_port ();
213 SCM_VALIDATE_OPINPORT (1,port);
214
215 pt = SCM_PTAB_ENTRY (port);
216 if (pt->rw_active == SCM_PORT_WRITE)
217 scm_ptobs[SCM_PTOBNUM (port)].flush (port);
218
219 s = (char *) scm_do_read_line (port, &slen);
220
221 if (s == NULL)
222 term = line = SCM_EOF_VAL;
223 else
224 {
225 if (s[slen-1] == '\n')
226 {
227 term = SCM_MAKE_CHAR ('\n');
228 s[slen-1] = '\0';
229 line = scm_take_locale_stringn (s, slen-1);
230 SCM_INCLINE (port);
231 }
232 else
233 {
234 /* Fix: we should check for eof on the port before assuming this. */
235 term = SCM_EOF_VAL;
236 line = scm_take_locale_stringn (s, slen);
237 SCM_COL (port) += slen;
238 }
239 }
240
241 if (pt->rw_random)
242 pt->rw_active = SCM_PORT_READ;
243
244 return scm_cons (line, term);
245 }
246 #undef FUNC_NAME
247
248 SCM_DEFINE (scm_write_line, "write-line", 1, 1, 0,
249 (SCM obj, SCM port),
250 "Display @var{obj} and a newline character to @var{port}. If\n"
251 "@var{port} is not specified, @code{(current-output-port)} is\n"
252 "used. This function is equivalent to:\n"
253 "@lisp\n"
254 "(display obj [port])\n"
255 "(newline [port])\n"
256 "@end lisp")
257 #define FUNC_NAME s_scm_write_line
258 {
259 scm_display (obj, port);
260 return scm_newline (port);
261 }
262 #undef FUNC_NAME
263
264 SCM
265 scm_init_rdelim_builtins (void)
266 {
267 #include "libguile/rdelim.x"
268
269 return SCM_UNSPECIFIED;
270 }
271
272 void
273 scm_init_rdelim (void)
274 {
275 scm_c_define_gsubr ("%init-rdelim-builtins", 0, 0, 0,
276 scm_init_rdelim_builtins);
277 }
278
279 /*
280 Local Variables:
281 c-file-style: "gnu"
282 End:
283 */