c9cc0164d4c1614f840d7bebf7ba5de99ab66490
[bpt/guile.git] / libguile / rdelim.c
1 /* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2006 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public
5 * License as published by the Free Software Foundation; either
6 * version 2.1 of the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 #include "libguile/_scm.h"
23
24 #include <stdio.h>
25
26 #ifdef HAVE_STRING_H
27 #include <string.h>
28 #endif
29
30 #include "libguile/chars.h"
31 #include "libguile/modules.h"
32 #include "libguile/ports.h"
33 #include "libguile/rdelim.h"
34 #include "libguile/root.h"
35 #include "libguile/strings.h"
36 #include "libguile/strports.h"
37 #include "libguile/validate.h"
38
39 SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0,
40 (SCM delims, SCM str, SCM gobble, SCM port, SCM start, SCM end),
41 "Read characters from @var{port} into @var{str} until one of the\n"
42 "characters in the @var{delims} string is encountered. If\n"
43 "@var{gobble} is true, discard the delimiter character;\n"
44 "otherwise, leave it in the input stream for the next read. If\n"
45 "@var{port} is not specified, use the value of\n"
46 "@code{(current-input-port)}. If @var{start} or @var{end} are\n"
47 "specified, store data only into the substring of @var{str}\n"
48 "bounded by @var{start} and @var{end} (which default to the\n"
49 "beginning and end of the string, respectively).\n"
50 "\n"
51 " Return a pair consisting of the delimiter that terminated the\n"
52 "string and the number of characters read. If reading stopped\n"
53 "at the end of file, the delimiter returned is the\n"
54 "@var{eof-object}; if the string was filled without encountering\n"
55 "a delimiter, this value is @code{#f}.")
56 #define FUNC_NAME s_scm_read_delimited_x
57 {
58 size_t j;
59 size_t cstart;
60 size_t cend;
61 int c;
62 const char *cdelims;
63 size_t num_delims;
64
65 SCM_VALIDATE_STRING (1, delims);
66 cdelims = scm_i_string_chars (delims);
67 num_delims = scm_i_string_length (delims);
68
69 SCM_VALIDATE_STRING (2, str);
70 scm_i_get_substring_spec (scm_i_string_length (str),
71 start, &cstart, end, &cend);
72
73 if (SCM_UNBNDP (port))
74 port = scm_current_input_port ();
75 else
76 SCM_VALIDATE_OPINPORT (4, port);
77
78 for (j = cstart; j < cend; j++)
79 {
80 size_t k;
81
82 c = scm_getc (port);
83 for (k = 0; k < num_delims; k++)
84 {
85 if (cdelims[k] == c)
86 {
87 if (scm_is_false (gobble))
88 scm_ungetc (c, port);
89
90 return scm_cons (SCM_MAKE_CHAR (c),
91 scm_from_size_t (j - cstart));
92 }
93 }
94 if (c == EOF)
95 return scm_cons (SCM_EOF_VAL,
96 scm_from_size_t (j - cstart));
97
98 scm_c_string_set_x (str, j, SCM_MAKE_CHAR (c));
99 }
100 return scm_cons (SCM_BOOL_F, scm_from_size_t (j - cstart));
101 }
102 #undef FUNC_NAME
103
104 static unsigned char *
105 scm_do_read_line (SCM port, size_t *len_p)
106 {
107 scm_t_port *pt = SCM_PTAB_ENTRY (port);
108 unsigned char *end;
109
110 /* I thought reading lines was simple. Mercy me. */
111
112 /* The common case: the buffer contains a complete line.
113 This needs to be fast. */
114 if ((end = memchr (pt->read_pos, '\n', (pt->read_end - pt->read_pos)))
115 != 0)
116 {
117 size_t buf_len = (end + 1) - pt->read_pos;
118 /* Allocate a buffer of the perfect size. */
119 unsigned char *buf = scm_malloc (buf_len + 1);
120
121 memcpy (buf, pt->read_pos, buf_len);
122 pt->read_pos += buf_len;
123
124 buf[buf_len] = '\0';
125
126 *len_p = buf_len;
127 return buf;
128 }
129
130 /* The buffer contains no newlines. */
131 {
132 /* When live, len is always the number of characters in the
133 current buffer that are part of the current line. */
134 size_t len = (pt->read_end - pt->read_pos);
135 size_t buf_size = (len < 50) ? 60 : len * 2;
136 /* Invariant: buf always has buf_size + 1 characters allocated;
137 the `+ 1' is for the final '\0'. */
138 unsigned char *buf = scm_malloc (buf_size + 1);
139 size_t buf_len = 0;
140
141 for (;;)
142 {
143 if (buf_len + len > buf_size)
144 {
145 size_t new_size = (buf_len + len) * 2;
146 buf = scm_realloc (buf, new_size + 1);
147 buf_size = new_size;
148 }
149
150 /* Copy what we've got out of the port, into our buffer. */
151 memcpy (buf + buf_len, pt->read_pos, len);
152 buf_len += len;
153 pt->read_pos += len;
154
155 /* If we had seen a newline, we're done now. */
156 if (end)
157 break;
158
159 /* Get more characters. */
160 if (scm_fill_input (port) == EOF)
161 {
162 /* If we're missing a final newline in the file, return
163 what we did get, sans newline. */
164 if (buf_len > 0)
165 break;
166
167 free (buf);
168 return 0;
169 }
170
171 /* Search the buffer for newlines. */
172 if ((end = memchr (pt->read_pos, '\n',
173 (len = (pt->read_end - pt->read_pos))))
174 != 0)
175 len = (end - pt->read_pos) + 1;
176 }
177
178 /* I wonder how expensive this realloc is. */
179 buf = scm_realloc (buf, buf_len + 1);
180 buf[buf_len] = '\0';
181 *len_p = buf_len;
182 return buf;
183 }
184 }
185
186
187 /*
188 * %read-line
189 * truncates any terminating newline from its input, and returns
190 * a cons of the string read and its terminating character. Doing
191 * so makes it easy to implement the hairy `read-line' options
192 * efficiently in Scheme.
193 */
194
195 SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
196 (SCM port),
197 "Read a newline-terminated line from @var{port}, allocating storage as\n"
198 "necessary. The newline terminator (if any) is removed from the string,\n"
199 "and a pair consisting of the line and its delimiter is returned. The\n"
200 "delimiter may be either a newline or the @var{eof-object}; if\n"
201 "@code{%read-line} is called at the end of file, it returns the pair\n"
202 "@code{(#<eof> . #<eof>)}.")
203 #define FUNC_NAME s_scm_read_line
204 {
205 scm_t_port *pt;
206 char *s;
207 size_t slen = 0;
208 SCM line, term;
209
210 if (SCM_UNBNDP (port))
211 port = scm_current_input_port ();
212 SCM_VALIDATE_OPINPORT (1,port);
213
214 pt = SCM_PTAB_ENTRY (port);
215 if (pt->rw_active == SCM_PORT_WRITE)
216 scm_ptobs[SCM_PTOBNUM (port)].flush (port);
217
218 s = (char *) scm_do_read_line (port, &slen);
219
220 if (s == NULL)
221 term = line = SCM_EOF_VAL;
222 else
223 {
224 if (s[slen-1] == '\n')
225 {
226 term = SCM_MAKE_CHAR ('\n');
227 s[slen-1] = '\0';
228 line = scm_take_locale_stringn (s, slen-1);
229 SCM_INCLINE (port);
230 }
231 else
232 {
233 /* Fix: we should check for eof on the port before assuming this. */
234 term = SCM_EOF_VAL;
235 line = scm_take_locale_stringn (s, slen);
236 SCM_COL (port) += slen;
237 }
238 }
239
240 if (pt->rw_random)
241 pt->rw_active = SCM_PORT_READ;
242
243 return scm_cons (line, term);
244 }
245 #undef FUNC_NAME
246
247 SCM_DEFINE (scm_write_line, "write-line", 1, 1, 0,
248 (SCM obj, SCM port),
249 "Display @var{obj} and a newline character to @var{port}. If\n"
250 "@var{port} is not specified, @code{(current-output-port)} is\n"
251 "used. This function is equivalent to:\n"
252 "@lisp\n"
253 "(display obj [port])\n"
254 "(newline [port])\n"
255 "@end lisp")
256 #define FUNC_NAME s_scm_write_line
257 {
258 scm_display (obj, port);
259 return scm_newline (port);
260 }
261 #undef FUNC_NAME
262
263 SCM
264 scm_init_rdelim_builtins (void)
265 {
266 #include "libguile/rdelim.x"
267
268 return SCM_UNSPECIFIED;
269 }
270
271 void
272 scm_init_rdelim (void)
273 {
274 scm_c_define_gsubr ("%init-rdelim-builtins", 0, 0, 0,
275 scm_init_rdelim_builtins);
276 }
277
278 /*
279 Local Variables:
280 c-file-style: "gnu"
281 End:
282 */