Merge remote-tracking branch 'origin/stable-2.0'
[bpt/guile.git] / libguile / strports.c
1 /* Copyright (C) 1995,1996,1998,1999,2000,2001,2002, 2003, 2005, 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
17 */
18
19
20 \f
21
22 #ifdef HAVE_CONFIG_H
23 # include <config.h>
24 #endif
25
26 #include "libguile/_scm.h"
27
28 #include <stdio.h>
29 #ifdef HAVE_UNISTD_H
30 #include <unistd.h>
31 #endif
32
33 #include "libguile/bytevectors.h"
34 #include "libguile/eval.h"
35 #include "libguile/ports.h"
36 #include "libguile/read.h"
37 #include "libguile/root.h"
38 #include "libguile/strings.h"
39 #include "libguile/modules.h"
40 #include "libguile/validate.h"
41 #include "libguile/deprecation.h"
42 #include "libguile/srfi-4.h"
43
44 #include "libguile/strports.h"
45
46 #ifdef HAVE_STRING_H
47 #include <string.h>
48 #endif
49
50 \f
51
52 /* {Ports - string ports}
53 *
54 */
55
56 /* NOTES:
57
58 write_buf/write_end point to the ends of the allocated bytevector.
59 read_buf/read_end point to the part of the bytevector which has been
60 written to. read_pos and write_pos are always equal.
61
62 ENHANCE-ME - output blocks:
63
64 The current code keeps an output string as a single block. That means
65 when the size is increased the entire old contents must be copied. It'd
66 be more efficient to begin a new block when the old one is full, so
67 there's no re-copying of previous data.
68
69 To make seeking efficient, keeping the pieces in a vector might be best,
70 though appending is probably the most common operation. The size of each
71 block could be progressively increased, so the bigger the string the
72 bigger the blocks.
73
74 When `get-output-string' is called the blocks have to be coalesced into a
75 string, the result could be kept as a single big block. If blocks were
76 strings then `get-output-string' could notice when there's just one and
77 return that with a copy-on-write (though repeated calls to
78 `get-output-string' are probably unlikely).
79
80 Another possibility would be to extend the port mechanism to let SCM
81 strings come through directly from `display' and friends. That way if a
82 big string is written it can be kept as a copy-on-write, saving time
83 copying and maybe saving some space. */
84
85
86 scm_t_bits scm_tc16_strport;
87
88
89 static int
90 st_fill_input (SCM port)
91 {
92 scm_t_port *pt = SCM_PTAB_ENTRY (port);
93
94 if (pt->read_pos >= pt->read_end)
95 return EOF;
96 else
97 return *pt->read_pos;
98 }
99
100 /* Change the size of a port's bytevector to NEW_SIZE. This doesn't
101 change `read_buf_size'. */
102 static void
103 st_resize_port (scm_t_port *pt, scm_t_off new_size)
104 {
105 SCM old_stream = SCM_PACK (pt->stream);
106 const signed char *src = SCM_BYTEVECTOR_CONTENTS (old_stream);
107 SCM new_stream = scm_c_make_bytevector (new_size);
108 signed char *dst = SCM_BYTEVECTOR_CONTENTS (new_stream);
109 unsigned long int old_size = SCM_BYTEVECTOR_LENGTH (old_stream);
110 unsigned long int min_size = min (old_size, new_size);
111
112 scm_t_off offset = pt->write_pos - pt->write_buf;
113
114 pt->write_buf_size = new_size;
115
116 memcpy (dst, src, min_size);
117
118 scm_remember_upto_here_1 (old_stream);
119
120 /* reset buffer. */
121 {
122 pt->stream = SCM_UNPACK (new_stream);
123 pt->read_buf = pt->write_buf = (unsigned char *)dst;
124 pt->read_pos = pt->write_pos = pt->write_buf + offset;
125 pt->write_end = pt->write_buf + pt->write_buf_size;
126 pt->read_end = pt->read_buf + pt->read_buf_size;
127 }
128 }
129
130 static void
131 st_write (SCM port, const void *data, size_t size)
132 {
133 scm_t_port *pt = SCM_PTAB_ENTRY (port);
134
135 if (size > pt->write_end - pt->write_pos)
136 st_resize_port (pt, max (pt->write_buf_size * 2,
137 pt->write_end - pt->write_pos + size));
138
139 memcpy ((char *) pt->write_pos, data, size);
140 pt->read_pos = (pt->write_pos += size);
141
142 if (pt->read_pos > pt->read_end)
143 {
144 pt->read_end = (unsigned char *) pt->read_pos;
145 pt->read_buf_size = pt->read_end - pt->read_buf;
146 }
147 }
148
149 static void
150 st_end_input (SCM port, int offset)
151 {
152 scm_t_port *pt = SCM_PTAB_ENTRY (port);
153
154 if (pt->read_pos - pt->read_buf < offset)
155 scm_misc_error ("st_end_input", "negative position", SCM_EOL);
156
157 pt->write_pos = (unsigned char *) (pt->read_pos = pt->read_pos - offset);
158 pt->rw_active = SCM_PORT_NEITHER;
159 }
160
161 static scm_t_off
162 st_seek (SCM port, scm_t_off offset, int whence)
163 {
164 scm_t_port *pt = SCM_PTAB_ENTRY (port);
165 scm_t_off target;
166
167 if (pt->rw_active == SCM_PORT_READ && offset == 0 && whence == SEEK_CUR)
168 /* special case to avoid disturbing the unread-char buffer. */
169 {
170 if (pt->read_buf == pt->putback_buf)
171 {
172 target = pt->saved_read_pos - pt->saved_read_buf
173 - (pt->read_end - pt->read_pos);
174 }
175 else
176 {
177 target = pt->read_pos - pt->read_buf;
178 }
179 }
180 else
181 /* all other cases. */
182 {
183 if (pt->rw_active == SCM_PORT_READ)
184 scm_end_input_unlocked (port);
185
186 pt->rw_active = SCM_PORT_NEITHER;
187
188 switch (whence)
189 {
190 case SEEK_CUR:
191 target = pt->read_pos - pt->read_buf + offset;
192 break;
193 case SEEK_END:
194 target = pt->read_end - pt->read_buf + offset;
195 break;
196 default: /* SEEK_SET */
197 target = offset;
198 break;
199 }
200
201 if (target < 0)
202 scm_misc_error ("st_seek", "negative offset", SCM_EOL);
203
204 if (target >= pt->write_buf_size)
205 {
206 if (!(SCM_CELL_WORD_0 (port) & SCM_WRTNG))
207 {
208 if (target > pt->write_buf_size)
209 {
210 scm_misc_error ("st_seek",
211 "seek past end of read-only strport",
212 SCM_EOL);
213 }
214 }
215 else if (target == pt->write_buf_size)
216 st_resize_port (pt, target * 2);
217 }
218 pt->read_pos = pt->write_pos = pt->read_buf + target;
219 if (pt->read_pos > pt->read_end)
220 {
221 pt->read_end = (unsigned char *) pt->read_pos;
222 pt->read_buf_size = pt->read_end - pt->read_buf;
223 }
224 }
225 return target;
226 }
227
228 static void
229 st_truncate (SCM port, scm_t_off length)
230 {
231 scm_t_port *pt = SCM_PTAB_ENTRY (port);
232
233 if (length > pt->write_buf_size)
234 st_resize_port (pt, length);
235
236 pt->read_buf_size = length;
237 pt->read_end = pt->read_buf + length;
238 if (pt->read_pos > pt->read_end)
239 pt->read_pos = pt->write_pos = pt->read_end;
240 }
241
242 /* The initial size in bytes of a string port's buffer. */
243 #define INITIAL_BUFFER_SIZE 128
244
245 /* Return a new string port with MODES. If STR is #f, a new backing
246 buffer is allocated; otherwise STR must be a string and a copy of it
247 serves as the buffer for the new port. */
248 SCM
249 scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
250 {
251 SCM z, buf;
252 scm_t_port *pt;
253 const char *encoding;
254 size_t read_buf_size, str_len, c_pos;
255 char *c_buf;
256
257 if (!((modes & SCM_WRTNG) || (modes & SCM_RDNG)))
258 scm_misc_error ("scm_mkstrport", "port must read or write", SCM_EOL);
259
260 encoding = scm_i_default_port_encoding ();
261
262 if (scm_is_false (str))
263 {
264 /* Allocate a new buffer to write to. */
265 str_len = INITIAL_BUFFER_SIZE;
266 buf = scm_c_make_bytevector (str_len);
267 c_buf = (char *) SCM_BYTEVECTOR_CONTENTS (buf);
268
269 /* Reset `read_buf_size'. It will contain the actual number of
270 bytes written to the port. */
271 read_buf_size = 0;
272 c_pos = 0;
273 }
274 else
275 {
276 /* STR is a string. */
277 char *copy;
278
279 SCM_ASSERT (scm_is_string (str), str, SCM_ARG1, caller);
280
281 /* Create a copy of STR in ENCODING. */
282 copy = scm_to_stringn (str, &str_len, encoding,
283 SCM_FAILED_CONVERSION_ERROR);
284 buf = scm_c_make_bytevector (str_len);
285 c_buf = (char *) SCM_BYTEVECTOR_CONTENTS (buf);
286 memcpy (c_buf, copy, str_len);
287 free (copy);
288
289 c_pos = scm_to_unsigned_integer (pos, 0, str_len);
290 read_buf_size = str_len;
291 }
292
293 z = scm_c_make_port_with_encoding (scm_tc16_strport, modes,
294 encoding,
295 SCM_FAILED_CONVERSION_ERROR,
296 (scm_t_bits)buf);
297
298 pt = SCM_PTAB_ENTRY (z);
299 pt->write_buf = pt->read_buf = (unsigned char *) c_buf;
300 pt->read_pos = pt->write_pos = pt->read_buf + c_pos;
301 pt->read_buf_size = read_buf_size;
302 pt->write_buf_size = str_len;
303 pt->write_end = pt->read_end = pt->read_buf + pt->read_buf_size;
304 pt->rw_random = 1;
305
306 return z;
307 }
308
309 /* Create a new string from the buffer of PORT, a string port, converting from
310 PORT's encoding to the standard string representation. */
311 SCM
312 scm_strport_to_string (SCM port)
313 {
314 scm_t_port *pt = SCM_PTAB_ENTRY (port);
315
316 if (pt->read_buf_size == 0)
317 return scm_nullstr;
318
319 return scm_from_stringn ((char *)pt->read_buf, pt->read_buf_size,
320 pt->encoding, pt->ilseq_handler);
321 }
322
323 SCM_DEFINE (scm_object_to_string, "object->string", 1, 1, 0,
324 (SCM obj, SCM printer),
325 "Return a Scheme string obtained by printing @var{obj}.\n"
326 "Printing function can be specified by the optional second\n"
327 "argument @var{printer} (default: @code{write}).")
328 #define FUNC_NAME s_scm_object_to_string
329 {
330 SCM port, result;
331
332 if (!SCM_UNBNDP (printer))
333 SCM_VALIDATE_PROC (2, printer);
334
335 port = scm_mkstrport (SCM_INUM0, SCM_BOOL_F,
336 SCM_OPN | SCM_WRTNG, FUNC_NAME);
337
338 if (SCM_UNBNDP (printer))
339 scm_write (obj, port);
340 else
341 scm_call_2 (printer, obj, port);
342
343 result = scm_strport_to_string (port);
344
345 /* Explicitly close PORT so that the iconv CDs associated with it are
346 deallocated right away. This is important because CDs use a lot of
347 memory that's not visible to the GC, so not freeing them can lead
348 to almost large heap usage. See
349 <http://wingolog.org/archives/2011/02/25/ports-weaks-gc-and-dark-matter>
350 for details. */
351 scm_close_port (port);
352
353 return result;
354 }
355 #undef FUNC_NAME
356
357 SCM
358 scm_call_with_output_string (SCM proc)
359 {
360 static SCM var = SCM_BOOL_F;
361
362 if (scm_is_false (var))
363 var = scm_c_private_lookup ("guile", "call-with-output-string");
364
365 return scm_call_1 (scm_variable_ref (var), proc);
366 }
367
368 SCM
369 scm_call_with_input_string (SCM string, SCM proc)
370 {
371 static SCM var = SCM_BOOL_F;
372
373 if (scm_is_false (var))
374 var = scm_c_private_lookup ("guile", "call-with-input-string");
375
376 return scm_call_2 (scm_variable_ref (var), string, proc);
377 }
378
379 SCM_DEFINE (scm_open_input_string, "open-input-string", 1, 0, 0,
380 (SCM str),
381 "Take a string and return an input port that delivers characters\n"
382 "from the string. The port can be closed by\n"
383 "@code{close-input-port}, though its storage will be reclaimed\n"
384 "by the garbage collector if it becomes inaccessible.")
385 #define FUNC_NAME s_scm_open_input_string
386 {
387 SCM p = scm_mkstrport(SCM_INUM0, str, SCM_OPN | SCM_RDNG, FUNC_NAME);
388 return p;
389 }
390 #undef FUNC_NAME
391
392 SCM_DEFINE (scm_open_output_string, "open-output-string", 0, 0, 0,
393 (void),
394 "Return an output port that will accumulate characters for\n"
395 "retrieval by @code{get-output-string}. The port can be closed\n"
396 "by the procedure @code{close-output-port}, though its storage\n"
397 "will be reclaimed by the garbage collector if it becomes\n"
398 "inaccessible.")
399 #define FUNC_NAME s_scm_open_output_string
400 {
401 SCM p;
402
403 p = scm_mkstrport (SCM_INUM0, SCM_BOOL_F,
404 SCM_OPN | SCM_WRTNG,
405 FUNC_NAME);
406 return p;
407 }
408 #undef FUNC_NAME
409
410 SCM_DEFINE (scm_get_output_string, "get-output-string", 1, 0, 0,
411 (SCM port),
412 "Given an output port created by @code{open-output-string},\n"
413 "return a string consisting of the characters that have been\n"
414 "output to the port so far.")
415 #define FUNC_NAME s_scm_get_output_string
416 {
417 SCM_VALIDATE_OPOUTSTRPORT (1, port);
418 return scm_strport_to_string (port);
419 }
420 #undef FUNC_NAME
421
422
423 /* Given a null-terminated string EXPR containing a Scheme expression
424 read it, and return it as an SCM value. */
425 SCM
426 scm_c_read_string (const char *expr)
427 {
428 SCM port = scm_mkstrport (SCM_INUM0,
429 scm_from_locale_string (expr),
430 SCM_OPN | SCM_RDNG,
431 "scm_c_read_string");
432 SCM form;
433
434 form = scm_read (port);
435
436 scm_close_port (port);
437 return form;
438 }
439
440 /* Given a null-terminated string EXPR containing Scheme program text,
441 evaluate it, and return the result of the last expression evaluated. */
442 SCM
443 scm_c_eval_string (const char *expr)
444 {
445 return scm_eval_string (scm_from_locale_string (expr));
446 }
447
448 SCM
449 scm_c_eval_string_in_module (const char *expr, SCM module)
450 {
451 return scm_eval_string_in_module (scm_from_locale_string (expr), module);
452 }
453
454
455 SCM_DEFINE (scm_eval_string_in_module, "eval-string", 1, 1, 0,
456 (SCM string, SCM module),
457 "Evaluate @var{string} as the text representation of a Scheme\n"
458 "form or forms, and return whatever value they produce.\n"
459 "Evaluation takes place in the given module, or the current\n"
460 "module when no module is given.\n"
461 "While the code is evaluated, the given module is made the\n"
462 "current one. The current module is restored when this\n"
463 "procedure returns.")
464 #define FUNC_NAME s_scm_eval_string_in_module
465 {
466 static SCM eval_string = SCM_BOOL_F, k_module = SCM_BOOL_F;
467
468 if (scm_is_false (eval_string))
469 {
470 eval_string = scm_c_public_lookup ("ice-9 eval-string", "eval-string");
471 k_module = scm_from_locale_keyword ("module");
472 }
473
474 if (SCM_UNBNDP (module))
475 module = scm_current_module ();
476 else
477 SCM_VALIDATE_MODULE (2, module);
478
479 return scm_call_3 (scm_variable_ref (eval_string), string, k_module, module);
480 }
481 #undef FUNC_NAME
482
483 SCM
484 scm_eval_string (SCM string)
485 {
486 return scm_eval_string_in_module (string, SCM_UNDEFINED);
487 }
488
489 static scm_t_bits
490 scm_make_stptob ()
491 {
492 scm_t_bits tc = scm_make_port_type ("string", st_fill_input, st_write);
493
494 scm_set_port_end_input (tc, st_end_input);
495 scm_set_port_seek (tc, st_seek);
496 scm_set_port_truncate (tc, st_truncate);
497
498 return tc;
499 }
500
501 void
502 scm_init_strports ()
503 {
504 scm_tc16_strport = scm_make_stptob ();
505
506 #include "libguile/strports.x"
507 }
508
509
510 /*
511 Local Variables:
512 c-file-style: "gnu"
513 End:
514 */