micro-optimizations to string-trim-both, and to (web http)
[bpt/guile.git] / libguile / strports.c
CommitLineData
ca2ec018 1/* Copyright (C) 1995,1996,1998,1999,2000,2001,2002, 2003, 2005, 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e 3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
0f2d19dd 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd
JB
20\f
21
dbb605f5 22#ifdef HAVE_CONFIG_H
c986274f
RB
23# include <config.h>
24#endif
25
a0599745 26#include "libguile/_scm.h"
681b9005
MD
27
28#include <stdio.h>
29#ifdef HAVE_UNISTD_H
30#include <unistd.h>
31#endif
32
691fcf66 33#include "libguile/bytevectors.h"
a0599745
MD
34#include "libguile/eval.h"
35#include "libguile/ports.h"
36#include "libguile/read.h"
37#include "libguile/root.h"
38#include "libguile/strings.h"
07bcf91d 39#include "libguile/modules.h"
fe78b6c0 40#include "libguile/validate.h"
ca13a04a 41#include "libguile/deprecation.h"
889975e5 42#include "libguile/srfi-4.h"
20e6290e 43
a0599745 44#include "libguile/strports.h"
0f2d19dd 45
95b88819
GH
46#ifdef HAVE_STRING_H
47#include <string.h>
48#endif
49
0f2d19dd
JB
50\f
51
52/* {Ports - string ports}
53 *
54 */
55
3fe6190f 56/* NOTES:
cc95e00a 57
691fcf66 58 write_buf/write_end point to the ends of the allocated bytevector.
ca2ec018
AW
59 read_buf/read_end point to the part of the bytevector which has been
60 written to. read_pos and write_pos are always equal.
bf5ad0da
KR
61
62 ENHANCE-ME - output blocks:
63
64 The current code keeps an output string as a single block. That means
65 when the size is increased the entire old contents must be copied. It'd
66 be more efficient to begin a new block when the old one is full, so
67 there's no re-copying of previous data.
68
69 To make seeking efficient, keeping the pieces in a vector might be best,
70 though appending is probably the most common operation. The size of each
71 block could be progressively increased, so the bigger the string the
72 bigger the blocks.
73
74 When `get-output-string' is called the blocks have to be coalesced into a
75 string, the result could be kept as a single big block. If blocks were
76 strings then `get-output-string' could notice when there's just one and
77 return that with a copy-on-write (though repeated calls to
78 `get-output-string' are probably unlikely).
79
80 Another possibility would be to extend the port mechanism to let SCM
81 strings come through directly from `display' and friends. That way if a
82 big string is written it can be kept as a copy-on-write, saving time
83 copying and maybe saving some space. */
84
1cc91f1b 85
92c2555f 86scm_t_bits scm_tc16_strport;
a98bddfd
DH
87
88
ee149d03 89static int
ca2ec018 90st_fill_input (SCM port)
0f2d19dd 91{
92c2555f 92 scm_t_port *pt = SCM_PTAB_ENTRY (port);
ee149d03 93
ee149d03
JB
94 if (pt->read_pos >= pt->read_end)
95 return EOF;
96 else
ca2ec018 97 return *pt->read_pos;
0f2d19dd
JB
98}
99
691fcf66
LC
100/* Change the size of a port's bytevector to NEW_SIZE. This doesn't
101 change `read_buf_size'. */
102static void
f1ce9199 103st_resize_port (scm_t_port *pt, scm_t_off new_size)
754c9491 104{
94115ae3 105 SCM old_stream = SCM_PACK (pt->stream);
691fcf66
LC
106 const signed char *src = SCM_BYTEVECTOR_CONTENTS (old_stream);
107 SCM new_stream = scm_c_make_bytevector (new_size);
108 signed char *dst = SCM_BYTEVECTOR_CONTENTS (new_stream);
109 unsigned long int old_size = SCM_BYTEVECTOR_LENGTH (old_stream);
c014a02e 110 unsigned long int min_size = min (old_size, new_size);
74a16888 111
ca2ec018 112 scm_t_off offset = pt->write_pos - pt->write_buf;
3fe6190f
GH
113
114 pt->write_buf_size = new_size;
115
691fcf66 116 memcpy (dst, src, min_size);
8824ac88
MV
117
118 scm_remember_upto_here_1 (old_stream);
754c9491 119
94115ae3 120 /* reset buffer. */
754c9491 121 {
729dbac3 122 pt->stream = SCM_UNPACK (new_stream);
5a6d139b 123 pt->read_buf = pt->write_buf = (unsigned char *)dst;
ca2ec018 124 pt->read_pos = pt->write_pos = pt->write_buf + offset;
3fe6190f
GH
125 pt->write_end = pt->write_buf + pt->write_buf_size;
126 pt->read_end = pt->read_buf + pt->read_buf_size;
754c9491
JB
127 }
128}
129
ee149d03 130static void
ca2ec018 131st_write (SCM port, const void *data, size_t size)
0f2d19dd 132{
92c2555f 133 scm_t_port *pt = SCM_PTAB_ENTRY (port);
ee149d03 134
ca2ec018
AW
135 if (size > pt->write_end - pt->write_pos)
136 st_resize_port (pt, max (pt->write_buf_size * 2,
137 pt->write_end - pt->write_pos + size));
138
139 memcpy ((char *) pt->write_pos, data, size);
140 pt->read_pos = (pt->write_pos += size);
d8f1c216 141
3fe6190f
GH
142 if (pt->read_pos > pt->read_end)
143 {
144 pt->read_end = (unsigned char *) pt->read_pos;
145 pt->read_buf_size = pt->read_end - pt->read_buf;
146 }
31703ab8
GH
147}
148
754c9491 149static void
affc96b5 150st_end_input (SCM port, int offset)
754c9491 151{
92c2555f 152 scm_t_port *pt = SCM_PTAB_ENTRY (port);
7dcb364d 153
cd19d608
GH
154 if (pt->read_pos - pt->read_buf < offset)
155 scm_misc_error ("st_end_input", "negative position", SCM_EOL);
156
a3c8b9fc 157 pt->write_pos = (unsigned char *) (pt->read_pos = pt->read_pos - offset);
61e452ba 158 pt->rw_active = SCM_PORT_NEITHER;
754c9491
JB
159}
160
f1ce9199
LC
161static scm_t_off
162st_seek (SCM port, scm_t_off offset, int whence)
754c9491 163{
92c2555f 164 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f1ce9199 165 scm_t_off target;
754c9491 166
7dcb364d
GH
167 if (pt->rw_active == SCM_PORT_READ && offset == 0 && whence == SEEK_CUR)
168 /* special case to avoid disturbing the unread-char buffer. */
754c9491 169 {
7dcb364d
GH
170 if (pt->read_buf == pt->putback_buf)
171 {
172 target = pt->saved_read_pos - pt->saved_read_buf
173 - (pt->read_end - pt->read_pos);
174 }
175 else
176 {
177 target = pt->read_pos - pt->read_buf;
178 }
754c9491 179 }
7dcb364d
GH
180 else
181 /* all other cases. */
754c9491 182 {
7dcb364d 183 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 184 scm_end_input_unlocked (port);
7dcb364d 185
ca2ec018
AW
186 pt->rw_active = SCM_PORT_NEITHER;
187
7dcb364d
GH
188 switch (whence)
189 {
190 case SEEK_CUR:
191 target = pt->read_pos - pt->read_buf + offset;
192 break;
193 case SEEK_END:
194 target = pt->read_end - pt->read_buf + offset;
195 break;
196 default: /* SEEK_SET */
197 target = offset;
198 break;
199 }
200
201 if (target < 0)
202 scm_misc_error ("st_seek", "negative offset", SCM_EOL);
203
204 if (target >= pt->write_buf_size)
3fe6190f 205 {
206d3de3 206 if (!(SCM_CELL_WORD_0 (port) & SCM_WRTNG))
3fe6190f 207 {
7dcb364d
GH
208 if (target > pt->write_buf_size)
209 {
210 scm_misc_error ("st_seek",
211 "seek past end of read-only strport",
212 SCM_EOL);
213 }
214 }
d8f1c216
LC
215 else if (target == pt->write_buf_size)
216 st_resize_port (pt, target * 2);
3fe6190f 217 }
7dcb364d
GH
218 pt->read_pos = pt->write_pos = pt->read_buf + target;
219 if (pt->read_pos > pt->read_end)
3fe6190f 220 {
7dcb364d
GH
221 pt->read_end = (unsigned char *) pt->read_pos;
222 pt->read_buf_size = pt->read_end - pt->read_buf;
3fe6190f 223 }
ee149d03 224 }
754c9491
JB
225 return target;
226}
227
228static void
f1ce9199 229st_truncate (SCM port, scm_t_off length)
754c9491 230{
92c2555f 231 scm_t_port *pt = SCM_PTAB_ENTRY (port);
3fe6190f
GH
232
233 if (length > pt->write_buf_size)
234 st_resize_port (pt, length);
235
236 pt->read_buf_size = length;
237 pt->read_end = pt->read_buf + length;
238 if (pt->read_pos > pt->read_end)
ca2ec018 239 pt->read_pos = pt->write_pos = pt->read_end;
0f2d19dd
JB
240}
241
0b2c2ba3
LC
242/* The initial size in bytes of a string port's buffer. */
243#define INITIAL_BUFFER_SIZE 128
244
245/* Return a new string port with MODES. If STR is #f, a new backing
246 buffer is allocated; otherwise STR must be a string and a copy of it
247 serves as the buffer for the new port. */
7b041912
LC
248SCM
249scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
0f2d19dd 250{
691fcf66 251 SCM z, buf;
92c2555f 252 scm_t_port *pt;
2721f918
AW
253 const char *encoding;
254 size_t read_buf_size, str_len, c_pos;
691fcf66 255 char *c_buf;
e11e83f3 256
754c9491
JB
257 if (!((modes & SCM_WRTNG) || (modes & SCM_RDNG)))
258 scm_misc_error ("scm_mkstrport", "port must read or write", SCM_EOL);
5f16b897 259
2721f918 260 encoding = scm_i_default_port_encoding ();
7b041912 261
0b2c2ba3
LC
262 if (scm_is_false (str))
263 {
264 /* Allocate a new buffer to write to. */
265 str_len = INITIAL_BUFFER_SIZE;
266 buf = scm_c_make_bytevector (str_len);
267 c_buf = (char *) SCM_BYTEVECTOR_CONTENTS (buf);
268
269 /* Reset `read_buf_size'. It will contain the actual number of
2721f918
AW
270 bytes written to the port. */
271 read_buf_size = 0;
0b2c2ba3
LC
272 c_pos = 0;
273 }
274 else
275 {
276 /* STR is a string. */
277 char *copy;
691fcf66 278
0b2c2ba3 279 SCM_ASSERT (scm_is_string (str), str, SCM_ARG1, caller);
691fcf66 280
2721f918
AW
281 /* Create a copy of STR in ENCODING. */
282 copy = scm_to_stringn (str, &str_len, encoding,
0b2c2ba3
LC
283 SCM_FAILED_CONVERSION_ERROR);
284 buf = scm_c_make_bytevector (str_len);
285 c_buf = (char *) SCM_BYTEVECTOR_CONTENTS (buf);
286 memcpy (c_buf, copy, str_len);
287 free (copy);
691fcf66 288
0b2c2ba3 289 c_pos = scm_to_unsigned_integer (pos, 0, str_len);
2721f918 290 read_buf_size = str_len;
0b2c2ba3 291 }
691fcf66 292
2721f918
AW
293 z = scm_c_make_port_with_encoding (scm_tc16_strport, modes,
294 encoding,
295 SCM_FAILED_CONVERSION_ERROR,
296 (scm_t_bits)buf);
7b041912 297
2721f918 298 pt = SCM_PTAB_ENTRY (z);
691fcf66 299 pt->write_buf = pt->read_buf = (unsigned char *) c_buf;
e11e83f3 300 pt->read_pos = pt->write_pos = pt->read_buf + c_pos;
2721f918 301 pt->read_buf_size = read_buf_size;
691fcf66 302 pt->write_buf_size = str_len;
754c9491 303 pt->write_end = pt->read_end = pt->read_buf + pt->read_buf_size;
0de97b83 304 pt->rw_random = 1;
3fe6190f 305
0f2d19dd
JB
306 return z;
307}
308
7b041912
LC
309/* Create a new string from the buffer of PORT, a string port, converting from
310 PORT's encoding to the standard string representation. */
311SCM
312scm_strport_to_string (SCM port)
3fe6190f 313{
7b041912
LC
314 scm_t_port *pt = SCM_PTAB_ENTRY (port);
315
fac32b51
MG
316 if (pt->read_buf_size == 0)
317 return scm_nullstr;
318
ca2ec018
AW
319 return scm_from_stringn ((char *)pt->read_buf, pt->read_buf_size,
320 pt->encoding, pt->ilseq_handler);
3fe6190f
GH
321}
322
fe78b6c0
KN
323SCM_DEFINE (scm_object_to_string, "object->string", 1, 1, 0,
324 (SCM obj, SCM printer),
325 "Return a Scheme string obtained by printing @var{obj}.\n"
326 "Printing function can be specified by the optional second\n"
327 "argument @var{printer} (default: @code{write}).")
1f3908c4
KN
328#define FUNC_NAME s_scm_object_to_string
329{
8b263377 330 SCM port, result;
fe78b6c0
KN
331
332 if (!SCM_UNBNDP (printer))
333 SCM_VALIDATE_PROC (2, printer);
1f3908c4 334
0b2c2ba3
LC
335 port = scm_mkstrport (SCM_INUM0, SCM_BOOL_F,
336 SCM_OPN | SCM_WRTNG, FUNC_NAME);
fe78b6c0
KN
337
338 if (SCM_UNBNDP (printer))
339 scm_write (obj, port);
340 else
fdc28395 341 scm_call_2 (printer, obj, port);
fe78b6c0 342
8b263377
LC
343 result = scm_strport_to_string (port);
344
345 /* Explicitly close PORT so that the iconv CDs associated with it are
346 deallocated right away. This is important because CDs use a lot of
347 memory that's not visible to the GC, so not freeing them can lead
348 to almost large heap usage. See
349 <http://wingolog.org/archives/2011/02/25/ports-weaks-gc-and-dark-matter>
350 for details. */
351 scm_close_port (port);
352
353 return result;
1f3908c4
KN
354}
355#undef FUNC_NAME
356
3b3b36dd 357SCM_DEFINE (scm_call_with_output_string, "call-with-output-string", 1, 0, 0,
1bbd0b84 358 (SCM proc),
b380b885
MD
359 "Calls the one-argument procedure @var{proc} with a newly created output\n"
360 "port. When the function returns, the string composed of the characters\n"
361 "written into the port is returned.")
1bbd0b84 362#define FUNC_NAME s_scm_call_with_output_string
0f2d19dd
JB
363{
364 SCM p;
754c9491 365
0b2c2ba3 366 p = scm_mkstrport (SCM_INUM0, SCM_BOOL_F,
754c9491 367 SCM_OPN | SCM_WRTNG,
1bbd0b84 368 FUNC_NAME);
fdc28395 369 scm_call_1 (proc, p);
3fe6190f 370
184b85a3 371 return scm_get_output_string (p);
0f2d19dd 372}
1bbd0b84 373#undef FUNC_NAME
0f2d19dd 374
3b3b36dd 375SCM_DEFINE (scm_call_with_input_string, "call-with-input-string", 2, 0, 0,
1e6808ea
MG
376 (SCM string, SCM proc),
377 "Calls the one-argument procedure @var{proc} with a newly\n"
378 "created input port from which @var{string}'s contents may be\n"
379 "read. The value yielded by the @var{proc} is returned.")
1bbd0b84 380#define FUNC_NAME s_scm_call_with_input_string
0f2d19dd 381{
1e6808ea 382 SCM p = scm_mkstrport(SCM_INUM0, string, SCM_OPN | SCM_RDNG, FUNC_NAME);
fdc28395 383 return scm_call_1 (proc, p);
0f2d19dd 384}
1bbd0b84 385#undef FUNC_NAME
0f2d19dd 386
e87a03fc
MG
387SCM_DEFINE (scm_open_input_string, "open-input-string", 1, 0, 0,
388 (SCM str),
1e6808ea
MG
389 "Take a string and return an input port that delivers characters\n"
390 "from the string. The port can be closed by\n"
e87a03fc
MG
391 "@code{close-input-port}, though its storage will be reclaimed\n"
392 "by the garbage collector if it becomes inaccessible.")
393#define FUNC_NAME s_scm_open_input_string
394{
395 SCM p = scm_mkstrport(SCM_INUM0, str, SCM_OPN | SCM_RDNG, FUNC_NAME);
396 return p;
397}
398#undef FUNC_NAME
399
400SCM_DEFINE (scm_open_output_string, "open-output-string", 0, 0, 0,
401 (void),
1e6808ea 402 "Return an output port that will accumulate characters for\n"
e87a03fc
MG
403 "retrieval by @code{get-output-string}. The port can be closed\n"
404 "by the procedure @code{close-output-port}, though its storage\n"
405 "will be reclaimed by the garbage collector if it becomes\n"
406 "inaccessible.")
407#define FUNC_NAME s_scm_open_output_string
408{
409 SCM p;
410
0b2c2ba3 411 p = scm_mkstrport (SCM_INUM0, SCM_BOOL_F,
e87a03fc
MG
412 SCM_OPN | SCM_WRTNG,
413 FUNC_NAME);
414 return p;
415}
416#undef FUNC_NAME
417
418SCM_DEFINE (scm_get_output_string, "get-output-string", 1, 0, 0,
419 (SCM port),
420 "Given an output port created by @code{open-output-string},\n"
1e6808ea 421 "return a string consisting of the characters that have been\n"
e87a03fc
MG
422 "output to the port so far.")
423#define FUNC_NAME s_scm_get_output_string
424{
425 SCM_VALIDATE_OPOUTSTRPORT (1, port);
426 return scm_strport_to_string (port);
427}
428#undef FUNC_NAME
1cc91f1b 429
cd07a097 430
a8aa30d8
MD
431/* Given a null-terminated string EXPR containing a Scheme expression
432 read it, and return it as an SCM value. */
433SCM
ca13a04a 434scm_c_read_string (const char *expr)
a8aa30d8 435{
3fe6190f 436 SCM port = scm_mkstrport (SCM_INUM0,
cc95e00a 437 scm_from_locale_string (expr),
a8aa30d8 438 SCM_OPN | SCM_RDNG,
ca13a04a 439 "scm_c_read_string");
a8aa30d8
MD
440 SCM form;
441
deca31e1 442 form = scm_read (port);
a8aa30d8
MD
443
444 scm_close_port (port);
445 return form;
446}
447
cd07a097 448/* Given a null-terminated string EXPR containing Scheme program text,
a8aa30d8
MD
449 evaluate it, and return the result of the last expression evaluated. */
450SCM
ca13a04a 451scm_c_eval_string (const char *expr)
cd07a097 452{
cc95e00a 453 return scm_eval_string (scm_from_locale_string (expr));
b377f53e
JB
454}
455
209b52fe
MV
456SCM
457scm_c_eval_string_in_module (const char *expr, SCM module)
458{
cc95e00a 459 return scm_eval_string_in_module (scm_from_locale_string (expr), module);
209b52fe
MV
460}
461
462
209b52fe
MV
463SCM_DEFINE (scm_eval_string_in_module, "eval-string", 1, 1, 0,
464 (SCM string, SCM module),
96e83482
MV
465 "Evaluate @var{string} as the text representation of a Scheme\n"
466 "form or forms, and return whatever value they produce.\n"
209b52fe
MV
467 "Evaluation takes place in the given module, or the current\n"
468 "module when no module is given.\n"
469 "While the code is evaluated, the given module is made the\n"
470 "current one. The current module is restored when this\n"
471 "procedure returns.")
472#define FUNC_NAME s_scm_eval_string_in_module
96e83482 473{
0b0e066a
AW
474 static SCM eval_string = SCM_BOOL_F, k_module = SCM_BOOL_F;
475
476 if (scm_is_false (eval_string))
477 {
478 eval_string = scm_c_public_lookup ("ice-9 eval-string", "eval-string");
479 k_module = scm_from_locale_keyword ("module");
480 }
481
209b52fe
MV
482 if (SCM_UNBNDP (module))
483 module = scm_current_module ();
aeec5be1
MV
484 else
485 SCM_VALIDATE_MODULE (2, module);
0b0e066a
AW
486
487 return scm_call_3 (scm_variable_ref (eval_string), string, k_module, module);
96e83482 488}
1bbd0b84 489#undef FUNC_NAME
cd07a097 490
209b52fe
MV
491SCM
492scm_eval_string (SCM string)
493{
494 return scm_eval_string_in_module (string, SCM_UNDEFINED);
495}
496
92c2555f 497static scm_t_bits
0b6881fa 498scm_make_stptob ()
0f2d19dd 499{
ca2ec018 500 scm_t_bits tc = scm_make_port_type ("string", st_fill_input, st_write);
a98bddfd 501
affc96b5 502 scm_set_port_end_input (tc, st_end_input);
6c747373 503 scm_set_port_seek (tc, st_seek);
affc96b5 504 scm_set_port_truncate (tc, st_truncate);
a98bddfd
DH
505
506 return tc;
0f2d19dd
JB
507}
508
0f2d19dd
JB
509void
510scm_init_strports ()
0f2d19dd 511{
a98bddfd
DH
512 scm_tc16_strport = scm_make_stptob ();
513
a0599745 514#include "libguile/strports.x"
0f2d19dd
JB
515}
516
89e00824
ML
517
518/*
519 Local Variables:
520 c-file-style: "gnu"
521 End:
522*/