Add full Unicode capability to ports and the default reader
[bpt/guile.git] / libguile / rdelim.c
CommitLineData
2b829bbb 1/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2006 Free Software Foundation, Inc.
6d36532c 2 *
73be1d9e 3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
6d36532c 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
6d36532c 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
73be1d9e 17 */
6d36532c 18
dbb605f5 19#ifdef HAVE_CONFIG_H
f3447317
RB
20# include <config.h>
21#endif
22
6d36532c
GH
23#include "libguile/_scm.h"
24
25#include <stdio.h>
26
27#ifdef HAVE_STRING_H
28#include <string.h>
29#endif
30
31#include "libguile/chars.h"
32#include "libguile/modules.h"
33#include "libguile/ports.h"
34#include "libguile/rdelim.h"
35#include "libguile/root.h"
36#include "libguile/strings.h"
37#include "libguile/strports.h"
38#include "libguile/validate.h"
39
40SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0,
41 (SCM delims, SCM str, SCM gobble, SCM port, SCM start, SCM end),
42 "Read characters from @var{port} into @var{str} until one of the\n"
5352393c
MG
43 "characters in the @var{delims} string is encountered. If\n"
44 "@var{gobble} is true, discard the delimiter character;\n"
45 "otherwise, leave it in the input stream for the next read. If\n"
46 "@var{port} is not specified, use the value of\n"
47 "@code{(current-input-port)}. If @var{start} or @var{end} are\n"
48 "specified, store data only into the substring of @var{str}\n"
49 "bounded by @var{start} and @var{end} (which default to the\n"
50 "beginning and end of the string, respectively).\n"
51 "\n"
52 " Return a pair consisting of the delimiter that terminated the\n"
53 "string and the number of characters read. If reading stopped\n"
54 "at the end of file, the delimiter returned is the\n"
55 "@var{eof-object}; if the string was filled without encountering\n"
56 "a delimiter, this value is @code{#f}.")
6d36532c
GH
57#define FUNC_NAME s_scm_read_delimited_x
58{
1be6b49c 59 size_t j;
1be6b49c
ML
60 size_t cstart;
61 size_t cend;
889975e5 62 scm_t_wchar c;
1be6b49c 63 size_t num_delims;
6d36532c 64
3eb1e2aa 65 SCM_VALIDATE_STRING (1, delims);
cc95e00a 66 num_delims = scm_i_string_length (delims);
3eb1e2aa
MV
67
68 SCM_VALIDATE_STRING (2, str);
cc95e00a 69 scm_i_get_substring_spec (scm_i_string_length (str),
3eb1e2aa
MV
70 start, &cstart, end, &cend);
71
6d36532c 72 if (SCM_UNBNDP (port))
9de87eea 73 port = scm_current_input_port ();
6d36532c 74 else
3eb1e2aa 75 SCM_VALIDATE_OPINPORT (4, port);
6d36532c
GH
76
77 for (j = cstart; j < cend; j++)
78 {
1be6b49c 79 size_t k;
6d36532c
GH
80
81 c = scm_getc (port);
82 for (k = 0; k < num_delims; k++)
83 {
889975e5 84 if (scm_i_string_ref (delims, k) == c)
6d36532c 85 {
7888309b 86 if (scm_is_false (gobble))
6d36532c
GH
87 scm_ungetc (c, port);
88
89 return scm_cons (SCM_MAKE_CHAR (c),
3eb1e2aa 90 scm_from_size_t (j - cstart));
6d36532c
GH
91 }
92 }
93 if (c == EOF)
94 return scm_cons (SCM_EOF_VAL,
3eb1e2aa 95 scm_from_size_t (j - cstart));
6d36532c 96
cc95e00a 97 scm_c_string_set_x (str, j, SCM_MAKE_CHAR (c));
6d36532c 98 }
3eb1e2aa 99 return scm_cons (SCM_BOOL_F, scm_from_size_t (j - cstart));
6d36532c
GH
100}
101#undef FUNC_NAME
102
103static unsigned char *
1be6b49c 104scm_do_read_line (SCM port, size_t *len_p)
6d36532c 105{
92c2555f 106 scm_t_port *pt = SCM_PTAB_ENTRY (port);
6d36532c
GH
107 unsigned char *end;
108
109 /* I thought reading lines was simple. Mercy me. */
110
111 /* The common case: the buffer contains a complete line.
112 This needs to be fast. */
113 if ((end = memchr (pt->read_pos, '\n', (pt->read_end - pt->read_pos)))
114 != 0)
115 {
1be6b49c 116 size_t buf_len = (end + 1) - pt->read_pos;
6d36532c 117 /* Allocate a buffer of the perfect size. */
4c9419ac 118 unsigned char *buf = scm_malloc (buf_len + 1);
6d36532c
GH
119
120 memcpy (buf, pt->read_pos, buf_len);
121 pt->read_pos += buf_len;
122
123 buf[buf_len] = '\0';
124
125 *len_p = buf_len;
126 return buf;
127 }
128
129 /* The buffer contains no newlines. */
130 {
131 /* When live, len is always the number of characters in the
132 current buffer that are part of the current line. */
1be6b49c
ML
133 size_t len = (pt->read_end - pt->read_pos);
134 size_t buf_size = (len < 50) ? 60 : len * 2;
6d36532c
GH
135 /* Invariant: buf always has buf_size + 1 characters allocated;
136 the `+ 1' is for the final '\0'. */
4c9419ac 137 unsigned char *buf = scm_malloc (buf_size + 1);
1be6b49c 138 size_t buf_len = 0;
6d36532c
GH
139
140 for (;;)
141 {
142 if (buf_len + len > buf_size)
143 {
1be6b49c 144 size_t new_size = (buf_len + len) * 2;
4c9419ac 145 buf = scm_realloc (buf, new_size + 1);
6d36532c
GH
146 buf_size = new_size;
147 }
148
149 /* Copy what we've got out of the port, into our buffer. */
150 memcpy (buf + buf_len, pt->read_pos, len);
151 buf_len += len;
152 pt->read_pos += len;
153
154 /* If we had seen a newline, we're done now. */
155 if (end)
156 break;
157
158 /* Get more characters. */
159 if (scm_fill_input (port) == EOF)
160 {
161 /* If we're missing a final newline in the file, return
162 what we did get, sans newline. */
163 if (buf_len > 0)
164 break;
165
166 free (buf);
167 return 0;
168 }
169
170 /* Search the buffer for newlines. */
171 if ((end = memchr (pt->read_pos, '\n',
172 (len = (pt->read_end - pt->read_pos))))
173 != 0)
174 len = (end - pt->read_pos) + 1;
175 }
176
177 /* I wonder how expensive this realloc is. */
4c9419ac 178 buf = scm_realloc (buf, buf_len + 1);
6d36532c
GH
179 buf[buf_len] = '\0';
180 *len_p = buf_len;
181 return buf;
182 }
4c9419ac 183}
6d36532c
GH
184
185
186/*
187 * %read-line
188 * truncates any terminating newline from its input, and returns
189 * a cons of the string read and its terminating character. Doing
190 * so makes it easy to implement the hairy `read-line' options
191 * efficiently in Scheme.
192 */
193
194SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
195 (SCM port),
196 "Read a newline-terminated line from @var{port}, allocating storage as\n"
197 "necessary. The newline terminator (if any) is removed from the string,\n"
198 "and a pair consisting of the line and its delimiter is returned. The\n"
199 "delimiter may be either a newline or the @var{eof-object}; if\n"
200 "@code{%read-line} is called at the end of file, it returns the pair\n"
201 "@code{(#<eof> . #<eof>)}.")
202#define FUNC_NAME s_scm_read_line
203{
92c2555f 204 scm_t_port *pt;
6d36532c 205 char *s;
5a6d139b 206 size_t slen = 0;
6d36532c
GH
207 SCM line, term;
208
209 if (SCM_UNBNDP (port))
9de87eea 210 port = scm_current_input_port ();
6d36532c
GH
211 SCM_VALIDATE_OPINPORT (1,port);
212
213 pt = SCM_PTAB_ENTRY (port);
214 if (pt->rw_active == SCM_PORT_WRITE)
215 scm_ptobs[SCM_PTOBNUM (port)].flush (port);
216
217 s = (char *) scm_do_read_line (port, &slen);
218
219 if (s == NULL)
220 term = line = SCM_EOF_VAL;
221 else
222 {
223 if (s[slen-1] == '\n')
224 {
225 term = SCM_MAKE_CHAR ('\n');
226 s[slen-1] = '\0';
cc95e00a 227 line = scm_take_locale_stringn (s, slen-1);
6d36532c
GH
228 SCM_INCLINE (port);
229 }
230 else
231 {
232 /* Fix: we should check for eof on the port before assuming this. */
233 term = SCM_EOF_VAL;
cc95e00a 234 line = scm_take_locale_stringn (s, slen);
6d36532c 235 SCM_COL (port) += slen;
4c9419ac 236 }
6d36532c
GH
237 }
238
239 if (pt->rw_random)
240 pt->rw_active = SCM_PORT_READ;
241
242 return scm_cons (line, term);
243}
244#undef FUNC_NAME
245
246SCM_DEFINE (scm_write_line, "write-line", 1, 1, 0,
247 (SCM obj, SCM port),
1e6808ea
MG
248 "Display @var{obj} and a newline character to @var{port}. If\n"
249 "@var{port} is not specified, @code{(current-output-port)} is\n"
250 "used. This function is equivalent to:\n"
251 "@lisp\n"
6d36532c
GH
252 "(display obj [port])\n"
253 "(newline [port])\n"
1e6808ea 254 "@end lisp")
6d36532c
GH
255#define FUNC_NAME s_scm_write_line
256{
257 scm_display (obj, port);
258 return scm_newline (port);
259}
260#undef FUNC_NAME
261
6280d429
MV
262SCM
263scm_init_rdelim_builtins (void)
6d36532c 264{
6d36532c 265#include "libguile/rdelim.x"
6d36532c 266
6280d429
MV
267 return SCM_UNSPECIFIED;
268}
269
270void
271scm_init_rdelim (void)
272{
9a441ddb
MV
273 scm_c_define_gsubr ("%init-rdelim-builtins", 0, 0, 0,
274 scm_init_rdelim_builtins);
6d36532c
GH
275}
276
277/*
278 Local Variables:
279 c-file-style: "gnu"
280 End:
281*/