read-line should use port's encoding, not locale's encoding
[bpt/guile.git] / libguile / rdelim.c
1 /* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2006 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
17 */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 #include "libguile/_scm.h"
24
25 #include <stdio.h>
26
27 #ifdef HAVE_STRING_H
28 #include <string.h>
29 #endif
30
31 #include "libguile/chars.h"
32 #include "libguile/modules.h"
33 #include "libguile/ports.h"
34 #include "libguile/rdelim.h"
35 #include "libguile/root.h"
36 #include "libguile/strings.h"
37 #include "libguile/strports.h"
38 #include "libguile/validate.h"
39
40 SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0,
41 (SCM delims, SCM str, SCM gobble, SCM port, SCM start, SCM end),
42 "Read characters from @var{port} into @var{str} until one of the\n"
43 "characters in the @var{delims} string is encountered. If\n"
44 "@var{gobble} is true, discard the delimiter character;\n"
45 "otherwise, leave it in the input stream for the next read. If\n"
46 "@var{port} is not specified, use the value of\n"
47 "@code{(current-input-port)}. If @var{start} or @var{end} are\n"
48 "specified, store data only into the substring of @var{str}\n"
49 "bounded by @var{start} and @var{end} (which default to the\n"
50 "beginning and end of the string, respectively).\n"
51 "\n"
52 " Return a pair consisting of the delimiter that terminated the\n"
53 "string and the number of characters read. If reading stopped\n"
54 "at the end of file, the delimiter returned is the\n"
55 "@var{eof-object}; if the string was filled without encountering\n"
56 "a delimiter, this value is @code{#f}.")
57 #define FUNC_NAME s_scm_read_delimited_x
58 {
59 size_t j;
60 size_t cstart;
61 size_t cend;
62 scm_t_wchar c;
63 size_t num_delims;
64
65 SCM_VALIDATE_STRING (1, delims);
66 num_delims = scm_i_string_length (delims);
67
68 SCM_VALIDATE_STRING (2, str);
69 scm_i_get_substring_spec (scm_i_string_length (str),
70 start, &cstart, end, &cend);
71
72 if (SCM_UNBNDP (port))
73 port = scm_current_input_port ();
74 else
75 SCM_VALIDATE_OPINPORT (4, port);
76
77 for (j = cstart; j < cend; j++)
78 {
79 size_t k;
80
81 c = scm_getc (port);
82 for (k = 0; k < num_delims; k++)
83 {
84 if (scm_i_string_ref (delims, k) == c)
85 {
86 if (scm_is_false (gobble))
87 scm_ungetc (c, port);
88
89 return scm_cons (SCM_MAKE_CHAR (c),
90 scm_from_size_t (j - cstart));
91 }
92 }
93 if (c == EOF)
94 return scm_cons (SCM_EOF_VAL,
95 scm_from_size_t (j - cstart));
96
97 scm_c_string_set_x (str, j, SCM_MAKE_CHAR (c));
98 }
99 return scm_cons (SCM_BOOL_F, scm_from_size_t (j - cstart));
100 }
101 #undef FUNC_NAME
102
103 static unsigned char *
104 scm_do_read_line (SCM port, size_t *len_p)
105 {
106 scm_t_port *pt = SCM_PTAB_ENTRY (port);
107 unsigned char *end;
108
109 /* I thought reading lines was simple. Mercy me. */
110
111 /* The common case: the buffer contains a complete line.
112 This needs to be fast. */
113 if ((end = memchr (pt->read_pos, '\n', (pt->read_end - pt->read_pos)))
114 != 0)
115 {
116 size_t buf_len = (end + 1) - pt->read_pos;
117 /* Allocate a buffer of the perfect size. */
118 unsigned char *buf = scm_malloc (buf_len + 1);
119
120 memcpy (buf, pt->read_pos, buf_len);
121 pt->read_pos += buf_len;
122
123 buf[buf_len] = '\0';
124
125 *len_p = buf_len;
126 return buf;
127 }
128
129 /* The buffer contains no newlines. */
130 {
131 /* When live, len is always the number of characters in the
132 current buffer that are part of the current line. */
133 size_t len = (pt->read_end - pt->read_pos);
134 size_t buf_size = (len < 50) ? 60 : len * 2;
135 /* Invariant: buf always has buf_size + 1 characters allocated;
136 the `+ 1' is for the final '\0'. */
137 unsigned char *buf = scm_malloc (buf_size + 1);
138 size_t buf_len = 0;
139
140 for (;;)
141 {
142 if (buf_len + len > buf_size)
143 {
144 size_t new_size = (buf_len + len) * 2;
145 buf = scm_realloc (buf, new_size + 1);
146 buf_size = new_size;
147 }
148
149 /* Copy what we've got out of the port, into our buffer. */
150 memcpy (buf + buf_len, pt->read_pos, len);
151 buf_len += len;
152 pt->read_pos += len;
153
154 /* If we had seen a newline, we're done now. */
155 if (end)
156 break;
157
158 /* Get more characters. */
159 if (scm_fill_input (port) == EOF)
160 {
161 /* If we're missing a final newline in the file, return
162 what we did get, sans newline. */
163 if (buf_len > 0)
164 break;
165
166 free (buf);
167 return 0;
168 }
169
170 /* Search the buffer for newlines. */
171 if ((end = memchr (pt->read_pos, '\n',
172 (len = (pt->read_end - pt->read_pos))))
173 != 0)
174 len = (end - pt->read_pos) + 1;
175 }
176
177 /* I wonder how expensive this realloc is. */
178 buf = scm_realloc (buf, buf_len + 1);
179 buf[buf_len] = '\0';
180 *len_p = buf_len;
181 return buf;
182 }
183 }
184
185
186 /*
187 * %read-line
188 * truncates any terminating newline from its input, and returns
189 * a cons of the string read and its terminating character. Doing
190 * so makes it easy to implement the hairy `read-line' options
191 * efficiently in Scheme.
192 */
193
194 SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
195 (SCM port),
196 "Read a newline-terminated line from @var{port}, allocating storage as\n"
197 "necessary. The newline terminator (if any) is removed from the string,\n"
198 "and a pair consisting of the line and its delimiter is returned. The\n"
199 "delimiter may be either a newline or the @var{eof-object}; if\n"
200 "@code{%read-line} is called at the end of file, it returns the pair\n"
201 "@code{(#<eof> . #<eof>)}.")
202 #define FUNC_NAME s_scm_read_line
203 {
204 scm_t_port *pt;
205 char *s;
206 size_t slen = 0;
207 SCM line, term;
208 const char *enc;
209 scm_t_string_failed_conversion_handler hndl;
210
211 if (SCM_UNBNDP (port))
212 port = scm_current_input_port ();
213 SCM_VALIDATE_OPINPORT (1,port);
214
215 pt = SCM_PTAB_ENTRY (port);
216 enc = pt->encoding;
217 hndl = pt->ilseq_handler;
218 if (pt->rw_active == SCM_PORT_WRITE)
219 scm_ptobs[SCM_PTOBNUM (port)].flush (port);
220
221 s = (char *) scm_do_read_line (port, &slen);
222
223 if (s == NULL)
224 term = line = SCM_EOF_VAL;
225 else
226 {
227 if (s[slen - 1] == '\n')
228 {
229 term = SCM_MAKE_CHAR ('\n');
230 s[slen - 1] = '\0';
231
232 line = scm_from_stringn (s, slen - 1, enc, hndl);
233 free (s);
234 SCM_INCLINE (port);
235 }
236 else
237 {
238 /* Fix: we should check for eof on the port before assuming this. */
239 term = SCM_EOF_VAL;
240 line = scm_from_stringn (s, slen, enc, hndl);
241 free (s);
242 SCM_COL (port) += scm_i_string_length (line);
243 }
244 }
245
246 if (pt->rw_random)
247 pt->rw_active = SCM_PORT_READ;
248
249 return scm_cons (line, term);
250 }
251 #undef FUNC_NAME
252
253 SCM_DEFINE (scm_write_line, "write-line", 1, 1, 0,
254 (SCM obj, SCM port),
255 "Display @var{obj} and a newline character to @var{port}. If\n"
256 "@var{port} is not specified, @code{(current-output-port)} is\n"
257 "used. This function is equivalent to:\n"
258 "@lisp\n"
259 "(display obj [port])\n"
260 "(newline [port])\n"
261 "@end lisp")
262 #define FUNC_NAME s_scm_write_line
263 {
264 scm_display (obj, port);
265 return scm_newline (port);
266 }
267 #undef FUNC_NAME
268
269 SCM
270 scm_init_rdelim_builtins (void)
271 {
272 #include "libguile/rdelim.x"
273
274 return SCM_UNSPECIFIED;
275 }
276
277 void
278 scm_init_rdelim (void)
279 {
280 scm_c_define_gsubr ("%init-rdelim-builtins", 0, 0, 0,
281 scm_init_rdelim_builtins);
282 }
283
284 /*
285 Local Variables:
286 c-file-style: "gnu"
287 End:
288 */