Merge branch 'master' into boehm-demers-weiser-gc
[bpt/guile.git] / libguile / rdelim.c
1 /* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2006 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
17 */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 #include "libguile/_scm.h"
24
25 #include <stdio.h>
26
27 #ifdef HAVE_STRING_H
28 #include <string.h>
29 #endif
30
31 #include "libguile/chars.h"
32 #include "libguile/modules.h"
33 #include "libguile/ports.h"
34 #include "libguile/rdelim.h"
35 #include "libguile/root.h"
36 #include "libguile/strings.h"
37 #include "libguile/strports.h"
38 #include "libguile/validate.h"
39
40 SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0,
41 (SCM delims, SCM str, SCM gobble, SCM port, SCM start, SCM end),
42 "Read characters from @var{port} into @var{str} until one of the\n"
43 "characters in the @var{delims} string is encountered. If\n"
44 "@var{gobble} is true, discard the delimiter character;\n"
45 "otherwise, leave it in the input stream for the next read. If\n"
46 "@var{port} is not specified, use the value of\n"
47 "@code{(current-input-port)}. If @var{start} or @var{end} are\n"
48 "specified, store data only into the substring of @var{str}\n"
49 "bounded by @var{start} and @var{end} (which default to the\n"
50 "beginning and end of the string, respectively).\n"
51 "\n"
52 " Return a pair consisting of the delimiter that terminated the\n"
53 "string and the number of characters read. If reading stopped\n"
54 "at the end of file, the delimiter returned is the\n"
55 "@var{eof-object}; if the string was filled without encountering\n"
56 "a delimiter, this value is @code{#f}.")
57 #define FUNC_NAME s_scm_read_delimited_x
58 {
59 size_t j;
60 size_t cstart;
61 size_t cend;
62 scm_t_wchar c;
63 size_t num_delims;
64
65 SCM_VALIDATE_STRING (1, delims);
66 num_delims = scm_i_string_length (delims);
67
68 SCM_VALIDATE_STRING (2, str);
69 scm_i_get_substring_spec (scm_i_string_length (str),
70 start, &cstart, end, &cend);
71
72 if (SCM_UNBNDP (port))
73 port = scm_current_input_port ();
74 else
75 SCM_VALIDATE_OPINPORT (4, port);
76
77 for (j = cstart; j < cend; j++)
78 {
79 size_t k;
80
81 c = scm_getc (port);
82 for (k = 0; k < num_delims; k++)
83 {
84 if (scm_i_string_ref (delims, k) == c)
85 {
86 if (scm_is_false (gobble))
87 scm_ungetc (c, port);
88
89 return scm_cons (SCM_MAKE_CHAR (c),
90 scm_from_size_t (j - cstart));
91 }
92 }
93 if (c == EOF)
94 return scm_cons (SCM_EOF_VAL,
95 scm_from_size_t (j - cstart));
96
97 scm_c_string_set_x (str, j, SCM_MAKE_CHAR (c));
98 }
99 return scm_cons (SCM_BOOL_F, scm_from_size_t (j - cstart));
100 }
101 #undef FUNC_NAME
102
103 static unsigned char *
104 scm_do_read_line (SCM port, size_t *len_p)
105 {
106 scm_t_port *pt = SCM_PTAB_ENTRY (port);
107 unsigned char *end;
108
109 /* I thought reading lines was simple. Mercy me. */
110
111 /* The common case: the buffer contains a complete line.
112 This needs to be fast. */
113 if ((end = memchr (pt->read_pos, '\n', (pt->read_end - pt->read_pos)))
114 != 0)
115 {
116 size_t buf_len = (end + 1) - pt->read_pos;
117 /* Allocate a buffer of the perfect size. */
118 unsigned char *buf = scm_malloc (buf_len + 1);
119
120 memcpy (buf, pt->read_pos, buf_len);
121 pt->read_pos += buf_len;
122
123 buf[buf_len] = '\0';
124
125 *len_p = buf_len;
126 return buf;
127 }
128
129 /* The buffer contains no newlines. */
130 {
131 /* When live, len is always the number of characters in the
132 current buffer that are part of the current line. */
133 size_t len = (pt->read_end - pt->read_pos);
134 size_t buf_size = (len < 50) ? 60 : len * 2;
135 /* Invariant: buf always has buf_size + 1 characters allocated;
136 the `+ 1' is for the final '\0'. */
137 unsigned char *buf = scm_malloc (buf_size + 1);
138 size_t buf_len = 0;
139
140 for (;;)
141 {
142 if (buf_len + len > buf_size)
143 {
144 size_t new_size = (buf_len + len) * 2;
145 buf = scm_realloc (buf, new_size + 1);
146 buf_size = new_size;
147 }
148
149 /* Copy what we've got out of the port, into our buffer. */
150 memcpy (buf + buf_len, pt->read_pos, len);
151 buf_len += len;
152 pt->read_pos += len;
153
154 /* If we had seen a newline, we're done now. */
155 if (end)
156 break;
157
158 /* Get more characters. */
159 if (scm_fill_input (port) == EOF)
160 {
161 /* If we're missing a final newline in the file, return
162 what we did get, sans newline. */
163 if (buf_len > 0)
164 break;
165
166 free (buf);
167 return 0;
168 }
169
170 /* Search the buffer for newlines. */
171 if ((end = memchr (pt->read_pos, '\n',
172 (len = (pt->read_end - pt->read_pos))))
173 != 0)
174 len = (end - pt->read_pos) + 1;
175 }
176
177 /* I wonder how expensive this realloc is. */
178 buf = scm_realloc (buf, buf_len + 1);
179 buf[buf_len] = '\0';
180 *len_p = buf_len;
181 return buf;
182 }
183 }
184
185
186 /*
187 * %read-line
188 * truncates any terminating newline from its input, and returns
189 * a cons of the string read and its terminating character. Doing
190 * so makes it easy to implement the hairy `read-line' options
191 * efficiently in Scheme.
192 */
193
194 SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
195 (SCM port),
196 "Read a newline-terminated line from @var{port}, allocating storage as\n"
197 "necessary. The newline terminator (if any) is removed from the string,\n"
198 "and a pair consisting of the line and its delimiter is returned. The\n"
199 "delimiter may be either a newline or the @var{eof-object}; if\n"
200 "@code{%read-line} is called at the end of file, it returns the pair\n"
201 "@code{(#<eof> . #<eof>)}.")
202 #define FUNC_NAME s_scm_read_line
203 {
204 scm_t_port *pt;
205 char *s;
206 size_t slen = 0;
207 SCM line, term;
208
209 if (SCM_UNBNDP (port))
210 port = scm_current_input_port ();
211 SCM_VALIDATE_OPINPORT (1,port);
212
213 pt = SCM_PTAB_ENTRY (port);
214 if (pt->rw_active == SCM_PORT_WRITE)
215 scm_ptobs[SCM_PTOBNUM (port)].flush (port);
216
217 s = (char *) scm_do_read_line (port, &slen);
218
219 if (s == NULL)
220 term = line = SCM_EOF_VAL;
221 else
222 {
223 if (s[slen-1] == '\n')
224 {
225 term = SCM_MAKE_CHAR ('\n');
226 s[slen-1] = '\0';
227 line = scm_take_locale_stringn (s, slen-1);
228 SCM_INCLINE (port);
229 }
230 else
231 {
232 /* Fix: we should check for eof on the port before assuming this. */
233 term = SCM_EOF_VAL;
234 line = scm_take_locale_stringn (s, slen);
235 SCM_COL (port) += slen;
236 }
237 }
238
239 if (pt->rw_random)
240 pt->rw_active = SCM_PORT_READ;
241
242 return scm_cons (line, term);
243 }
244 #undef FUNC_NAME
245
246 SCM_DEFINE (scm_write_line, "write-line", 1, 1, 0,
247 (SCM obj, SCM port),
248 "Display @var{obj} and a newline character to @var{port}. If\n"
249 "@var{port} is not specified, @code{(current-output-port)} is\n"
250 "used. This function is equivalent to:\n"
251 "@lisp\n"
252 "(display obj [port])\n"
253 "(newline [port])\n"
254 "@end lisp")
255 #define FUNC_NAME s_scm_write_line
256 {
257 scm_display (obj, port);
258 return scm_newline (port);
259 }
260 #undef FUNC_NAME
261
262 SCM
263 scm_init_rdelim_builtins (void)
264 {
265 #include "libguile/rdelim.x"
266
267 return SCM_UNSPECIFIED;
268 }
269
270 void
271 scm_init_rdelim (void)
272 {
273 scm_c_define_gsubr ("%init-rdelim-builtins", 0, 0, 0,
274 scm_init_rdelim_builtins);
275 }
276
277 /*
278 Local Variables:
279 c-file-style: "gnu"
280 End:
281 */