threadsafe access to scm_ptobs
[bpt/guile.git] / libguile / ports.c
CommitLineData
f4bc4e59
LC
1/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
2 * 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
3 *
73be1d9e 4 * This library is free software; you can redistribute it and/or
53befeb7
NJ
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
0f2d19dd 8 *
53befeb7
NJ
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
0f2d19dd 13 *
73be1d9e
MV
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
53befeb7
NJ
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
73be1d9e 18 */
1bbd0b84 19
1bbd0b84 20
0f2d19dd 21\f
d68fee48
JB
22/* Headers. */
23
2b829bbb
KR
24#define _LARGEFILE64_SOURCE /* ask for stat64 etc */
25
dbb605f5 26#ifdef HAVE_CONFIG_H
bd515f37
RB
27# include <config.h>
28#endif
29
0f2d19dd 30#include <stdio.h>
e6e2e95a 31#include <errno.h>
8ab3d8a0 32#include <fcntl.h> /* for chsize on mingw */
b5cb4464 33#include <assert.h>
f4bc4e59 34#include <iconv.h>
889975e5
MG
35#include <uniconv.h>
36#include <unistr.h>
37#include <striconveh.h>
e6e2e95a 38
fca43887
LC
39#include <assert.h>
40
a0599745 41#include "libguile/_scm.h"
4e047c3e 42#include "libguile/async.h"
8269ba5b 43#include "libguile/deprecation.h"
f0942910 44#include "libguile/eval.h"
8ab3d8a0 45#include "libguile/fports.h" /* direct access for seek and truncate */
9511876f 46#include "libguile/goops.h"
a0599745
MD
47#include "libguile/smob.h"
48#include "libguile/chars.h"
185e369a 49#include "libguile/dynwind.h"
0f2d19dd 50
a0599745 51#include "libguile/keywords.h"
5dbc6c06 52#include "libguile/hashtab.h"
a0599745
MD
53#include "libguile/root.h"
54#include "libguile/strings.h"
b42170a4 55#include "libguile/mallocs.h"
a0599745
MD
56#include "libguile/validate.h"
57#include "libguile/ports.h"
3a5fb14d 58#include "libguile/vectors.h"
2721f918 59#include "libguile/weak-set.h"
9de87eea 60#include "libguile/fluids.h"
889975e5 61#include "libguile/eq.h"
0f2d19dd 62
bd9e24b3
GH
63#ifdef HAVE_STRING_H
64#include <string.h>
65#endif
66
ec65f5da
MV
67#ifdef HAVE_IO_H
68#include <io.h>
69#endif
70
0f2d19dd
JB
71#ifdef HAVE_UNISTD_H
72#include <unistd.h>
73#endif
74
95b88819
GH
75#ifdef HAVE_SYS_IOCTL_H
76#include <sys/ioctl.h>
77#endif
d68fee48 78
8ab3d8a0
KR
79/* Mingw (version 3.4.5, circa 2006) has ftruncate as an alias for chsize
80 already, but have this code here in case that wasn't so in past versions,
81 or perhaps to help other minimal DOS environments.
82
83 gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
84 might be possibilities if we've got other systems without ftruncate. */
85
56a3dcd4 86#if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
82893676 87#define ftruncate(fd, size) chsize (fd, size)
8ab3d8a0
KR
88#undef HAVE_FTRUNCATE
89#define HAVE_FTRUNCATE 1
82893676
MG
90#endif
91
0f2d19dd 92\f
d68fee48 93/* The port kind table --- a dynamically resized array of port types. */
0f2d19dd
JB
94
95
96/* scm_ptobs scm_numptob
5dbc6c06 97 * implement a dynamically resized array of ptob records.
0f2d19dd
JB
98 * Indexes into this table are used when generating type
99 * tags for smobjects (if you know a tag you can get an index and conversely).
100 */
62bd5d66
AW
101static scm_t_ptob_descriptor **scm_ptobs = NULL;
102static long scm_numptob = 0; /* Number of port types. */
103static long scm_ptobs_size = 0; /* Number of slots in the port type
104 table. */
105static scm_i_pthread_mutex_t scm_ptobs_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER;
106
107long
108scm_c_num_port_types (void)
109{
110 long ret;
111
112 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
113 ret = scm_numptob;
114 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
115
116 return ret;
117}
118
119scm_t_ptob_descriptor*
120scm_c_port_type_ref (long ptobnum)
121{
122 scm_t_ptob_descriptor *ret = NULL;
123
124 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
125
126 if (0 <= ptobnum && ptobnum < scm_numptob)
127 ret = scm_ptobs[ptobnum];
128
129 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
130
131 if (!ret)
132 scm_out_of_range ("scm_c_port_type_ref", scm_from_long (ptobnum));
133
134 return ret;
135}
136
137long
138scm_c_port_type_add_x (scm_t_ptob_descriptor *desc)
139{
140 long ret = -1;
141
142 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
143
144 if (scm_numptob + 1 < SCM_I_MAX_PORT_TYPE_COUNT)
145 {
146 if (scm_numptob == scm_ptobs_size)
147 {
148 unsigned long old_size = scm_ptobs_size;
149 scm_t_ptob_descriptor **old_ptobs = scm_ptobs;
150
151 /* Currently there are only 9 predefined port types, so one
152 resize will cover it. */
153 scm_ptobs_size = old_size + 10;
154
155 if (scm_ptobs_size >= SCM_I_MAX_PORT_TYPE_COUNT)
156 scm_ptobs_size = SCM_I_MAX_PORT_TYPE_COUNT;
157
158 scm_ptobs = scm_gc_malloc (sizeof (*scm_ptobs) * scm_ptobs_size,
159 "scm_ptobs");
160
161 memcpy (scm_ptobs, old_ptobs, sizeof (*scm_ptobs) * scm_numptob);
162 }
163
164 ret = scm_numptob++;
165 scm_ptobs[ret] = desc;
166 }
167
168 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
169
170 if (ret < 0)
171 scm_out_of_range ("scm_c_port_type_add_x", scm_from_long (scm_numptob));
172
173 return ret;
174}
0f2d19dd 175
f12733c9 176/*
f12733c9 177 * We choose to use an interface similar to the smob interface with
affc96b5 178 * fill_input and write as standard fields, passed to the port
f12733c9
MD
179 * type constructor, and optional fields set by setters.
180 */
181
70df8af6 182static void
e81d98ec 183flush_port_default (SCM port SCM_UNUSED)
70df8af6
GH
184{
185}
186
187static void
e81d98ec 188end_input_default (SCM port SCM_UNUSED, int offset SCM_UNUSED)
70df8af6
GH
189{
190}
0f2d19dd 191
92c2555f 192scm_t_bits
f12733c9 193scm_make_port_type (char *name,
affc96b5 194 int (*fill_input) (SCM port),
8aa011a1 195 void (*write) (SCM port, const void *data, size_t size))
0f2d19dd 196{
62bd5d66
AW
197 scm_t_ptob_descriptor *desc;
198 long ptobnum;
affc96b5 199
62bd5d66
AW
200 desc = scm_gc_malloc_pointerless (sizeof (*desc), "port-type");
201 memset (desc, 0, sizeof (*desc));
affc96b5 202
62bd5d66
AW
203 desc->name = name;
204 desc->print = scm_port_print;
205 desc->write = write;
206 desc->flush = flush_port_default;
207 desc->end_input = end_input_default;
208 desc->fill_input = fill_input;
affc96b5 209
62bd5d66 210 ptobnum = scm_c_port_type_add_x (desc);
affc96b5 211
62bd5d66 212 /* Make a class object if GOOPS is present. */
63385df2 213 if (SCM_UNPACK (scm_port_class[0]) != 0)
62bd5d66
AW
214 scm_make_port_classes (ptobnum, name);
215
216 return scm_tc7_port + ptobnum * 256;
0f2d19dd
JB
217}
218
f12733c9 219void
23f2b9a3 220scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM))
f12733c9 221{
62bd5d66 222 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->mark = mark;
f12733c9
MD
223}
224
225void
23f2b9a3 226scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM))
f12733c9 227{
62bd5d66 228 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->free = free;
f12733c9
MD
229}
230
231void
23f2b9a3 232scm_set_port_print (scm_t_bits tc, int (*print) (SCM exp, SCM port,
f12733c9
MD
233 scm_print_state *pstate))
234{
62bd5d66 235 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->print = print;
f12733c9
MD
236}
237
238void
23f2b9a3 239scm_set_port_equalp (scm_t_bits tc, SCM (*equalp) (SCM, SCM))
f12733c9 240{
62bd5d66 241 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->equalp = equalp;
f12733c9
MD
242}
243
31703ab8 244void
23f2b9a3 245scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port))
31703ab8 246{
62bd5d66 247 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->flush = flush;
31703ab8
GH
248}
249
f12733c9 250void
23f2b9a3 251scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset))
f12733c9 252{
62bd5d66 253 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->end_input = end_input;
f12733c9
MD
254}
255
256void
23f2b9a3 257scm_set_port_close (scm_t_bits tc, int (*close) (SCM))
f12733c9 258{
62bd5d66 259 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->close = close;
f12733c9
MD
260}
261
262void
f1ce9199
LC
263scm_set_port_seek (scm_t_bits tc,
264 scm_t_off (*seek) (SCM, scm_t_off, int))
f12733c9 265{
62bd5d66 266 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->seek = seek;
f12733c9
MD
267}
268
269void
f1ce9199 270scm_set_port_truncate (scm_t_bits tc, void (*truncate) (SCM, scm_t_off))
f12733c9 271{
62bd5d66 272 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->truncate = truncate;
f12733c9
MD
273}
274
275void
23f2b9a3 276scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM))
f12733c9 277{
62bd5d66 278 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting;
f12733c9
MD
279}
280
0f2d19dd 281\f
0f2d19dd 282
3b3b36dd 283SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0,
1e6808ea
MG
284 (SCM port),
285 "Return @code{#t} if a character is ready on input @var{port}\n"
286 "and return @code{#f} otherwise. If @code{char-ready?} returns\n"
287 "@code{#t} then the next @code{read-char} operation on\n"
288 "@var{port} is guaranteed not to hang. If @var{port} is a file\n"
289 "port at end of file then @code{char-ready?} returns @code{#t}.\n"
c2dfff19
KR
290 "\n"
291 "@code{char-ready?} exists to make it possible for a\n"
1e6808ea
MG
292 "program to accept characters from interactive ports without\n"
293 "getting stuck waiting for input. Any input editors associated\n"
294 "with such ports must make sure that characters whose existence\n"
295 "has been asserted by @code{char-ready?} cannot be rubbed out.\n"
296 "If @code{char-ready?} were to return @code{#f} at end of file,\n"
297 "a port at end of file would be indistinguishable from an\n"
c2dfff19 298 "interactive port that has no ready characters.")
1bbd0b84 299#define FUNC_NAME s_scm_char_ready_p
0f2d19dd 300{
92c2555f 301 scm_t_port *pt;
6c951427 302
0f2d19dd 303 if (SCM_UNBNDP (port))
9de87eea 304 port = scm_current_input_port ();
b2456dd4
AW
305 /* It's possible to close the current input port, so validate even in
306 this case. */
307 SCM_VALIDATE_OPINPORT (1, port);
d68fee48 308
ae4c4016
JB
309 pt = SCM_PTAB_ENTRY (port);
310
6c951427
GH
311 /* if the current read buffer is filled, or the
312 last pushed-back char has been read and the saved buffer is
313 filled, result is true. */
314 if (pt->read_pos < pt->read_end
315 || (pt->read_buf == pt->putback_buf
316 && pt->saved_read_pos < pt->saved_read_end))
0f2d19dd 317 return SCM_BOOL_T;
ee149d03
JB
318 else
319 {
62bd5d66 320 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
ee149d03 321
affc96b5 322 if (ptob->input_waiting)
7888309b 323 return scm_from_bool(ptob->input_waiting (port));
ee149d03 324 else
6c951427 325 return SCM_BOOL_T;
ee149d03 326 }
0f2d19dd 327}
1bbd0b84 328#undef FUNC_NAME
0f2d19dd 329
c2da2648
GH
330/* move up to read_len chars from port's putback and/or read buffers
331 into memory starting at dest. returns the number of chars moved. */
62bd5d66
AW
332size_t
333scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
c2da2648 334{
92c2555f 335 scm_t_port *pt = SCM_PTAB_ENTRY (port);
c2da2648
GH
336 size_t chars_read = 0;
337 size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
338
339 if (from_buf > 0)
340 {
341 memcpy (dest, pt->read_pos, from_buf);
342 pt->read_pos += from_buf;
343 chars_read += from_buf;
344 read_len -= from_buf;
345 dest += from_buf;
346 }
347
348 /* if putback was active, try the real input buffer too. */
349 if (pt->read_buf == pt->putback_buf)
350 {
351 from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len);
352 if (from_buf > 0)
353 {
354 memcpy (dest, pt->saved_read_pos, from_buf);
355 pt->saved_read_pos += from_buf;
356 chars_read += from_buf;
357 }
358 }
359 return chars_read;
360}
361
6c951427 362/* Clear a port's read buffers, returning the contents. */
a1ec6916 363SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
1bbd0b84 364 (SCM port),
4a151b3d
GH
365 "This procedure clears a port's input buffers, similar\n"
366 "to the way that force-output clears the output buffer. The\n"
367 "contents of the buffers are returned as a single string, e.g.,\n"
368 "\n"
369 "@lisp\n"
370 "(define p (open-input-file ...))\n"
371 "(drain-input p) => empty string, nothing buffered yet.\n"
372 "(unread-char (read-char p) p)\n"
373 "(drain-input p) => initial chars from p, up to the buffer size.\n"
374 "@end lisp\n\n"
375 "Draining the buffers may be useful for cleanly finishing\n"
376 "buffered I/O so that the file descriptor can be used directly\n"
377 "for further input.")
1bbd0b84 378#define FUNC_NAME s_scm_drain_input
ee149d03 379{
840ae05d 380 SCM result;
3a5fb14d 381 char *data;
28a6e1b0 382 scm_t_port *pt;
c014a02e 383 long count;
ee149d03 384
34d19ef6 385 SCM_VALIDATE_OPINPORT (1, port);
28a6e1b0 386 pt = SCM_PTAB_ENTRY (port);
840ae05d 387
6c951427
GH
388 count = pt->read_end - pt->read_pos;
389 if (pt->read_buf == pt->putback_buf)
390 count += pt->saved_read_end - pt->saved_read_pos;
840ae05d 391
67a72dc1
AW
392 if (count)
393 {
190d4b0d 394 result = scm_i_make_string (count, &data, 0);
67a72dc1
AW
395 scm_take_from_input_buffers (port, data, count);
396 }
397 else
398 result = scm_nullstr;
399
840ae05d 400 return result;
ee149d03 401}
1bbd0b84 402#undef FUNC_NAME
0f2d19dd
JB
403
404\f
d68fee48 405/* Standard ports --- current input, output, error, and more(!). */
0f2d19dd 406
34297700
AW
407static SCM cur_inport_fluid = SCM_BOOL_F;
408static SCM cur_outport_fluid = SCM_BOOL_F;
409static SCM cur_errport_fluid = SCM_BOOL_F;
410static SCM cur_loadport_fluid = SCM_BOOL_F;
9de87eea 411
3b3b36dd 412SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
e1546b65
MG
413 (),
414 "Return the current input port. This is the default port used\n"
415 "by many input procedures. Initially, @code{current-input-port}\n"
416 "returns the @dfn{standard input} in Unix and C terminology.")
1bbd0b84 417#define FUNC_NAME s_scm_current_input_port
0f2d19dd 418{
34297700 419 if (scm_is_true (cur_inport_fluid))
889975e5
MG
420 return scm_fluid_ref (cur_inport_fluid);
421 else
422 return SCM_BOOL_F;
0f2d19dd 423}
1bbd0b84 424#undef FUNC_NAME
0f2d19dd 425
3b3b36dd 426SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
e1546b65
MG
427 (),
428 "Return the current output port. This is the default port used\n"
9401323e 429 "by many output procedures. Initially,\n"
e1546b65
MG
430 "@code{current-output-port} returns the @dfn{standard output} in\n"
431 "Unix and C terminology.")
1bbd0b84 432#define FUNC_NAME s_scm_current_output_port
0f2d19dd 433{
34297700 434 if (scm_is_true (cur_outport_fluid))
889975e5
MG
435 return scm_fluid_ref (cur_outport_fluid);
436 else
437 return SCM_BOOL_F;
0f2d19dd 438}
1bbd0b84 439#undef FUNC_NAME
0f2d19dd 440
3b3b36dd 441SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
1bbd0b84 442 (),
b380b885
MD
443 "Return the port to which errors and warnings should be sent (the\n"
444 "@dfn{standard error} in Unix and C terminology).")
1bbd0b84 445#define FUNC_NAME s_scm_current_error_port
0f2d19dd 446{
34297700 447 if (scm_is_true (cur_errport_fluid))
889975e5
MG
448 return scm_fluid_ref (cur_errport_fluid);
449 else
450 return SCM_BOOL_F;
0f2d19dd 451}
1bbd0b84 452#undef FUNC_NAME
0f2d19dd 453
3b3b36dd 454SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
e1546b65 455 (),
b450f070 456 "Return the current-load-port.\n"
e1546b65 457 "The load port is used internally by @code{primitive-load}.")
1bbd0b84 458#define FUNC_NAME s_scm_current_load_port
31614d8e 459{
9de87eea 460 return scm_fluid_ref (cur_loadport_fluid);
31614d8e 461}
1bbd0b84 462#undef FUNC_NAME
31614d8e 463
3b3b36dd 464SCM_DEFINE (scm_set_current_input_port, "set-current-input-port", 1, 0, 0,
1bbd0b84 465 (SCM port),
8f85c0c6
NJ
466 "@deffnx {Scheme Procedure} set-current-output-port port\n"
467 "@deffnx {Scheme Procedure} set-current-error-port port\n"
b380b885
MD
468 "Change the ports returned by @code{current-input-port},\n"
469 "@code{current-output-port} and @code{current-error-port}, respectively,\n"
470 "so that they use the supplied @var{port} for input or output.")
1bbd0b84 471#define FUNC_NAME s_scm_set_current_input_port
0f2d19dd 472{
9de87eea 473 SCM oinp = scm_fluid_ref (cur_inport_fluid);
34d19ef6 474 SCM_VALIDATE_OPINPORT (1, port);
9de87eea 475 scm_fluid_set_x (cur_inport_fluid, port);
0f2d19dd
JB
476 return oinp;
477}
1bbd0b84 478#undef FUNC_NAME
0f2d19dd
JB
479
480
3b3b36dd 481SCM_DEFINE (scm_set_current_output_port, "set-current-output-port", 1, 0, 0,
e1546b65
MG
482 (SCM port),
483 "Set the current default output port to @var{port}.")
1bbd0b84 484#define FUNC_NAME s_scm_set_current_output_port
0f2d19dd 485{
9de87eea 486 SCM ooutp = scm_fluid_ref (cur_outport_fluid);
78446828 487 port = SCM_COERCE_OUTPORT (port);
34d19ef6 488 SCM_VALIDATE_OPOUTPORT (1, port);
9de87eea 489 scm_fluid_set_x (cur_outport_fluid, port);
0f2d19dd
JB
490 return ooutp;
491}
1bbd0b84 492#undef FUNC_NAME
0f2d19dd
JB
493
494
3b3b36dd 495SCM_DEFINE (scm_set_current_error_port, "set-current-error-port", 1, 0, 0,
e1546b65
MG
496 (SCM port),
497 "Set the current default error port to @var{port}.")
1bbd0b84 498#define FUNC_NAME s_scm_set_current_error_port
0f2d19dd 499{
9de87eea 500 SCM oerrp = scm_fluid_ref (cur_errport_fluid);
78446828 501 port = SCM_COERCE_OUTPORT (port);
34d19ef6 502 SCM_VALIDATE_OPOUTPORT (1, port);
9de87eea 503 scm_fluid_set_x (cur_errport_fluid, port);
0f2d19dd
JB
504 return oerrp;
505}
1bbd0b84 506#undef FUNC_NAME
0f2d19dd 507
185e369a 508void
661ae7ab 509scm_dynwind_current_input_port (SCM port)
9de87eea 510#define FUNC_NAME NULL
185e369a 511{
9de87eea 512 SCM_VALIDATE_OPINPORT (1, port);
661ae7ab 513 scm_dynwind_fluid (cur_inport_fluid, port);
185e369a 514}
9de87eea 515#undef FUNC_NAME
185e369a
MV
516
517void
661ae7ab 518scm_dynwind_current_output_port (SCM port)
9de87eea 519#define FUNC_NAME NULL
185e369a 520{
9de87eea
MV
521 port = SCM_COERCE_OUTPORT (port);
522 SCM_VALIDATE_OPOUTPORT (1, port);
661ae7ab 523 scm_dynwind_fluid (cur_outport_fluid, port);
185e369a 524}
9de87eea 525#undef FUNC_NAME
185e369a
MV
526
527void
661ae7ab 528scm_dynwind_current_error_port (SCM port)
9de87eea
MV
529#define FUNC_NAME NULL
530{
531 port = SCM_COERCE_OUTPORT (port);
532 SCM_VALIDATE_OPOUTPORT (1, port);
661ae7ab 533 scm_dynwind_fluid (cur_errport_fluid, port);
9de87eea
MV
534}
535#undef FUNC_NAME
536
537void
661ae7ab 538scm_i_dynwind_current_load_port (SCM port)
185e369a 539{
661ae7ab 540 scm_dynwind_fluid (cur_loadport_fluid, port);
185e369a
MV
541}
542
0f2d19dd 543\f
840ae05d 544/* The port table --- an array of pointers to ports. */
0f2d19dd 545
5dbc6c06
HWN
546/*
547 We need a global registry of ports to flush them all at exit, and to
548 get all the ports matching a file descriptor.
549 */
2721f918 550SCM scm_i_port_weak_set;
b9ad392e 551
651a0735
LC
552\f
553/* Port finalization. */
554
1cc91f1b 555
651a0735
LC
556static void finalize_port (GC_PTR, GC_PTR);
557
f4bc4e59 558/* Register a finalizer for PORT. */
651a0735
LC
559static SCM_C_INLINE_KEYWORD void
560register_finalizer_for_port (SCM port)
561{
f4bc4e59
LC
562 GC_finalization_proc prev_finalizer;
563 GC_PTR prev_finalization_data;
651a0735 564
f4bc4e59
LC
565 /* Register a finalizer for PORT so that its iconv CDs get freed and
566 optionally its type's `free' function gets called. */
47ed8656
AW
567 GC_REGISTER_FINALIZER_NO_ORDER (SCM_HEAP_OBJECT_BASE (port),
568 finalize_port, 0,
f4bc4e59
LC
569 &prev_finalizer,
570 &prev_finalization_data);
651a0735
LC
571}
572
573/* Finalize the object (a port) pointed to by PTR. */
574static void
575finalize_port (GC_PTR ptr, GC_PTR data)
576{
21041372 577 SCM port = SCM_PACK_POINTER (ptr);
651a0735
LC
578
579 if (!SCM_PORTP (port))
580 abort ();
581
582 if (SCM_OPENP (port))
583 {
584 if (SCM_REVEALED (port) > 0)
585 /* Keep "revealed" ports alive and re-register a finalizer. */
586 register_finalizer_for_port (port);
587 else
588 {
62bd5d66 589 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
f4bc4e59
LC
590 scm_t_port *entry;
591
62bd5d66
AW
592 if (ptob->free)
593 /* Yes, I really do mean `free' rather than `close'. `close'
651a0735 594 is for explicit `close-port' by user. */
62bd5d66 595 ptob->free (port);
651a0735 596
f4bc4e59
LC
597 entry = SCM_PTAB_ENTRY (port);
598
599 if (entry->input_cd != (iconv_t) -1)
600 iconv_close (entry->input_cd);
601 if (entry->output_cd != (iconv_t) -1)
602 iconv_close (entry->output_cd);
603
651a0735
LC
604 SCM_SETSTREAM (port, 0);
605 SCM_CLR_PORT_OPEN_FLAG (port);
651a0735
LC
606
607 scm_gc_ports_collected++;
608 }
609 }
610}
611
612
613
614\f
615
da220f27 616SCM
2721f918
AW
617scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
618 const char *encoding,
619 scm_t_string_failed_conversion_handler handler,
620 scm_t_bits stream)
0f2d19dd 621{
2721f918
AW
622 SCM ret;
623 scm_t_port *entry;
62bd5d66 624 scm_t_ptob_descriptor *ptob;
2721f918
AW
625
626 entry = (scm_t_port *) scm_gc_calloc (sizeof (scm_t_port), "port");
62bd5d66
AW
627 ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag));
628
629 ret = scm_words (tag | mode_bits, 3);
630 SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry);
631 SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob);
5f16b897 632
30b126d2
AW
633#if SCM_USE_PTHREAD_THREADS
634 scm_i_pthread_mutex_init (&entry->lock, scm_i_pthread_mutexattr_recursive);
635#endif
636
840ae05d 637 entry->file_name = SCM_BOOL_F;
61e452ba 638 entry->rw_active = SCM_PORT_NEITHER;
2721f918
AW
639 entry->port = ret;
640 entry->stream = stream;
641 entry->encoding = encoding ? scm_gc_strdup (encoding, "port") : NULL;
f4bc4e59
LC
642 /* The conversion descriptors will be opened lazily. */
643 entry->input_cd = (iconv_t) -1;
644 entry->output_cd = (iconv_t) -1;
2721f918 645 entry->ilseq_handler = handler;
f4bc4e59 646
2721f918 647 scm_weak_set_add_x (scm_i_port_weak_set, ret);
651a0735
LC
648
649 /* For each new port, register a finalizer so that it port type's free
650 function can be invoked eventually. */
2721f918 651 register_finalizer_for_port (ret);
651a0735 652
2721f918
AW
653 return ret;
654}
655
656SCM
657scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
658{
659 return scm_c_make_port_with_encoding (tag, mode_bits,
660 scm_i_default_port_encoding (),
661 scm_i_get_conversion_strategy (SCM_BOOL_F),
662 stream);
663}
664
665SCM
666scm_new_port_table_entry (scm_t_bits tag)
667{
668 return scm_c_make_port (tag, 0, 0);
0f2d19dd
JB
669}
670
6c951427 671/* Remove a port from the table and destroy it. */
b9ad392e 672
8269ba5b 673static void
5dbc6c06
HWN
674scm_i_remove_port (SCM port)
675#define FUNC_NAME "scm_remove_port"
0f2d19dd 676{
8269ba5b 677 scm_t_port *p;
682d78d0 678
8269ba5b
AW
679 p = SCM_PTAB_ENTRY (port);
680 scm_port_non_buffer (p);
2721f918
AW
681 SCM_SETPTAB_ENTRY (port, 0);
682 scm_weak_set_remove_x (scm_i_port_weak_set, port);
683
682d78d0
LC
684 p->putback_buf = NULL;
685 p->putback_buf_size = 0;
3e05fc04
AW
686
687 if (p->input_cd != (iconv_t) -1)
688 {
689 iconv_close (p->input_cd);
690 p->input_cd = (iconv_t) -1;
691 }
692
693 if (p->output_cd != (iconv_t) -1)
694 {
695 iconv_close (p->output_cd);
696 p->output_cd = (iconv_t) -1;
697 }
0f2d19dd 698}
db4b4ca6
DH
699#undef FUNC_NAME
700
0f2d19dd 701
70df8af6 702void
92c2555f 703scm_port_non_buffer (scm_t_port *pt)
70df8af6
GH
704{
705 pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf;
706 pt->write_buf = pt->write_pos = &pt->shortbuf;
707 pt->read_buf_size = pt->write_buf_size = 1;
708 pt->write_end = pt->write_buf + pt->write_buf_size;
709}
0f2d19dd 710
d68fee48
JB
711\f
712/* Revealed counts --- an oddity inherited from SCSH. */
713
8b13c6b3
GH
714/* Find a port in the table and return its revealed count.
715 Also used by the garbage collector.
0f2d19dd 716 */
1cc91f1b 717
0f2d19dd 718int
a284e297 719scm_revealed_count (SCM port)
0f2d19dd
JB
720{
721 return SCM_REVEALED(port);
722}
723
724
725
726/* Return the revealed count for a port. */
727
3b3b36dd 728SCM_DEFINE (scm_port_revealed, "port-revealed", 1, 0, 0,
1bbd0b84 729 (SCM port),
1e6808ea 730 "Return the revealed count for @var{port}.")
1bbd0b84 731#define FUNC_NAME s_scm_port_revealed
0f2d19dd 732{
78446828 733 port = SCM_COERCE_OUTPORT (port);
34d19ef6 734 SCM_VALIDATE_OPENPORT (1, port);
e11e83f3 735 return scm_from_int (scm_revealed_count (port));
0f2d19dd 736}
1bbd0b84 737#undef FUNC_NAME
0f2d19dd
JB
738
739/* Set the revealed count for a port. */
3b3b36dd 740SCM_DEFINE (scm_set_port_revealed_x, "set-port-revealed!", 2, 0, 0,
1bbd0b84 741 (SCM port, SCM rcount),
b450f070 742 "Sets the revealed count for a port to a given value.\n"
b380b885 743 "The return value is unspecified.")
1bbd0b84 744#define FUNC_NAME s_scm_set_port_revealed_x
0f2d19dd 745{
78446828 746 port = SCM_COERCE_OUTPORT (port);
34d19ef6 747 SCM_VALIDATE_OPENPORT (1, port);
a55c2b68 748 SCM_REVEALED (port) = scm_to_int (rcount);
8b13c6b3 749 return SCM_UNSPECIFIED;
0f2d19dd 750}
1bbd0b84 751#undef FUNC_NAME
0f2d19dd 752
d68fee48
JB
753
754\f
755/* Retrieving a port's mode. */
756
eadd48de
GH
757/* Return the flags that characterize a port based on the mode
758 * string used to open a file for that port.
759 *
760 * See PORT FLAGS in scm.h
761 */
762
3a5fb14d 763static long
889975e5 764scm_i_mode_bits_n (SCM modes)
3a5fb14d
MV
765{
766 return (SCM_OPN
889975e5
MG
767 | (scm_i_string_contains_char (modes, 'r')
768 || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
769 | (scm_i_string_contains_char (modes, 'w')
770 || scm_i_string_contains_char (modes, 'a')
771 || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
772 | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
773 | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
3a5fb14d
MV
774}
775
eadd48de 776long
a284e297 777scm_mode_bits (char *modes)
eadd48de 778{
889975e5 779 return scm_i_mode_bits (scm_from_locale_string (modes));
eadd48de
GH
780}
781
d617ee18
MV
782long
783scm_i_mode_bits (SCM modes)
784{
785 long bits;
786
787 if (!scm_is_string (modes))
788 scm_wrong_type_arg_msg (NULL, 0, modes, "string");
789
889975e5 790 bits = scm_i_mode_bits_n (modes);
d617ee18
MV
791 scm_remember_upto_here_1 (modes);
792 return bits;
793}
eadd48de
GH
794
795/* Return the mode flags from an open port.
796 * Some modes such as "append" are only used when opening
797 * a file and are not returned here. */
798
3b3b36dd 799SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
1bbd0b84 800 (SCM port),
1e6808ea
MG
801 "Return the port modes associated with the open port @var{port}.\n"
802 "These will not necessarily be identical to the modes used when\n"
803 "the port was opened, since modes such as \"append\" which are\n"
804 "used only during port creation are not retained.")
1bbd0b84 805#define FUNC_NAME s_scm_port_mode
eadd48de 806{
26a3038d 807 char modes[4];
eadd48de 808 modes[0] = '\0';
78446828
MV
809
810 port = SCM_COERCE_OUTPORT (port);
34d19ef6 811 SCM_VALIDATE_OPPORT (1, port);
f9a64404
DH
812 if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
813 if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
eadd48de
GH
814 strcpy (modes, "r+");
815 else
816 strcpy (modes, "r");
817 }
f9a64404 818 else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
eadd48de 819 strcpy (modes, "w");
f9a64404 820 if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
eadd48de 821 strcat (modes, "0");
62bd5d66
AW
822
823 return scm_from_latin1_string (modes);
eadd48de 824}
1bbd0b84 825#undef FUNC_NAME
eadd48de
GH
826
827
d68fee48
JB
828\f
829/* Closing ports. */
830
0f2d19dd
JB
831/* scm_close_port
832 * Call the close operation on a port object.
eadd48de 833 * see also scm_close.
0f2d19dd 834 */
3b3b36dd 835SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
1bbd0b84 836 (SCM port),
1e6808ea
MG
837 "Close the specified port object. Return @code{#t} if it\n"
838 "successfully closes a port or @code{#f} if it was already\n"
839 "closed. An exception may be raised if an error occurs, for\n"
840 "example when flushing buffered output. See also @ref{Ports and\n"
841 "File Descriptors, close}, for a procedure which can close file\n"
842 "descriptors.")
1bbd0b84 843#define FUNC_NAME s_scm_close_port
0f2d19dd 844{
eadd48de
GH
845 int rv;
846
78446828
MV
847 port = SCM_COERCE_OUTPORT (port);
848
7a754ca6 849 SCM_VALIDATE_PORT (1, port);
0f2d19dd 850 if (SCM_CLOSEDP (port))
eadd48de 851 return SCM_BOOL_F;
62bd5d66
AW
852 if (SCM_PORT_DESCRIPTOR (port)->close)
853 rv = SCM_PORT_DESCRIPTOR (port)->close (port);
eadd48de
GH
854 else
855 rv = 0;
5dbc6c06 856 scm_i_remove_port (port);
22a52da1 857 SCM_CLR_PORT_OPEN_FLAG (port);
7888309b 858 return scm_from_bool (rv >= 0);
7a754ca6
MD
859}
860#undef FUNC_NAME
861
862SCM_DEFINE (scm_close_input_port, "close-input-port", 1, 0, 0,
863 (SCM port),
864 "Close the specified input port object. The routine has no effect if\n"
865 "the file has already been closed. An exception may be raised if an\n"
866 "error occurs. The value returned is unspecified.\n\n"
867 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
868 "which can close file descriptors.")
869#define FUNC_NAME s_scm_close_input_port
870{
871 SCM_VALIDATE_INPUT_PORT (1, port);
872 scm_close_port (port);
873 return SCM_UNSPECIFIED;
874}
875#undef FUNC_NAME
876
877SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0,
878 (SCM port),
879 "Close the specified output port object. The routine has no effect if\n"
880 "the file has already been closed. An exception may be raised if an\n"
881 "error occurs. The value returned is unspecified.\n\n"
882 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
883 "which can close file descriptors.")
884#define FUNC_NAME s_scm_close_output_port
885{
886 port = SCM_COERCE_OUTPORT (port);
887 SCM_VALIDATE_OUTPUT_PORT (1, port);
888 scm_close_port (port);
889 return SCM_UNSPECIFIED;
0f2d19dd 890}
1bbd0b84 891#undef FUNC_NAME
0f2d19dd 892
2721f918
AW
893struct for_each_data
894{
895 void (*proc) (void *data, SCM p);
896 void *data;
897};
898
5dbc6c06 899static SCM
2721f918 900for_each_trampoline (void *data, SCM port, SCM result)
5dbc6c06 901{
2721f918
AW
902 struct for_each_data *d = data;
903
904 d->proc (d->data, port);
905
906 return result;
5dbc6c06
HWN
907}
908
c536b4b3
MV
909void
910scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
c2ca4493 911{
2721f918
AW
912 struct for_each_data d;
913
914 d.proc = proc;
915 d.data = data;
fdfe6305 916
2721f918
AW
917 scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL,
918 scm_i_port_weak_set);
919}
3a5fb14d 920
2721f918
AW
921static void
922scm_for_each_trampoline (void *data, SCM port)
923{
21041372 924 scm_call_1 (SCM_PACK_POINTER (data), port);
c536b4b3 925}
fdfe6305 926
c536b4b3
MV
927SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
928 (SCM proc),
929 "Apply @var{proc} to each port in the Guile port table\n"
930 "in turn. The return value is unspecified. More specifically,\n"
931 "@var{proc} is applied exactly once to every port that exists\n"
932 "in the system at the time @var{port-for-each} is invoked.\n"
933 "Changes to the port table while @var{port-for-each} is running\n"
934 "have no effect as far as @var{port-for-each} is concerned.")
935#define FUNC_NAME s_scm_port_for_each
936{
937 SCM_VALIDATE_PROC (1, proc);
938
21041372 939 scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc));
2721f918 940
c2ca4493
GH
941 return SCM_UNSPECIFIED;
942}
943#undef FUNC_NAME
944
c536b4b3 945
d68fee48
JB
946\f
947/* Utter miscellany. Gosh, we should clean this up some time. */
948
3b3b36dd 949SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
1bbd0b84 950 (SCM x),
1e6808ea
MG
951 "Return @code{#t} if @var{x} is an input port, otherwise return\n"
952 "@code{#f}. Any object satisfying this predicate also satisfies\n"
953 "@code{port?}.")
1bbd0b84 954#define FUNC_NAME s_scm_input_port_p
0f2d19dd 955{
7888309b 956 return scm_from_bool (SCM_INPUT_PORT_P (x));
0f2d19dd 957}
1bbd0b84 958#undef FUNC_NAME
0f2d19dd 959
3b3b36dd 960SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
1bbd0b84 961 (SCM x),
1e6808ea
MG
962 "Return @code{#t} if @var{x} is an output port, otherwise return\n"
963 "@code{#f}. Any object satisfying this predicate also satisfies\n"
964 "@code{port?}.")
1bbd0b84 965#define FUNC_NAME s_scm_output_port_p
0f2d19dd 966{
82893676 967 x = SCM_COERCE_OUTPORT (x);
7888309b 968 return scm_from_bool (SCM_OUTPUT_PORT_P (x));
0f2d19dd 969}
1bbd0b84 970#undef FUNC_NAME
0f2d19dd 971
eb5c0a2a
GH
972SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
973 (SCM x),
1e6808ea 974 "Return a boolean indicating whether @var{x} is a port.\n"
5352393c
MG
975 "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
976 "@var{x}))}.")
eb5c0a2a
GH
977#define FUNC_NAME s_scm_port_p
978{
7888309b 979 return scm_from_bool (SCM_PORTP (x));
eb5c0a2a
GH
980}
981#undef FUNC_NAME
982
3b3b36dd 983SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
1bbd0b84 984 (SCM port),
1e6808ea
MG
985 "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
986 "open.")
1bbd0b84 987#define FUNC_NAME s_scm_port_closed_p
60d0643d 988{
34d19ef6 989 SCM_VALIDATE_PORT (1, port);
7888309b 990 return scm_from_bool (!SCM_OPPORTP (port));
60d0643d 991}
1bbd0b84 992#undef FUNC_NAME
0f2d19dd 993
3b3b36dd 994SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
1bbd0b84 995 (SCM x),
1e6808ea
MG
996 "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
997 "return @code{#f}.")
1bbd0b84 998#define FUNC_NAME s_scm_eof_object_p
0f2d19dd 999{
7888309b 1000 return scm_from_bool(SCM_EOF_OBJECT_P (x));
0f2d19dd 1001}
1bbd0b84 1002#undef FUNC_NAME
0f2d19dd 1003
3b3b36dd 1004SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
1bbd0b84 1005 (SCM port),
b380b885 1006 "Flush the specified output port, or the current output port if @var{port}\n"
9401323e 1007 "is omitted. The current output buffer contents are passed to the\n"
b380b885
MD
1008 "underlying port implementation (e.g., in the case of fports, the\n"
1009 "data will be written to the file and the output buffer will be cleared.)\n"
1010 "It has no effect on an unbuffered port.\n\n"
1011 "The return value is unspecified.")
1bbd0b84 1012#define FUNC_NAME s_scm_force_output
0f2d19dd
JB
1013{
1014 if (SCM_UNBNDP (port))
9de87eea 1015 port = scm_current_output_port ();
0f2d19dd 1016 else
78446828
MV
1017 {
1018 port = SCM_COERCE_OUTPORT (port);
34d19ef6 1019 SCM_VALIDATE_OPOUTPORT (1, port);
78446828 1020 }
affc96b5 1021 scm_flush (port);
ee149d03 1022 return SCM_UNSPECIFIED;
0f2d19dd 1023}
1bbd0b84 1024#undef FUNC_NAME
0f2d19dd 1025
5dbc6c06
HWN
1026
1027static void
61d3568b 1028flush_output_port (void *closure, SCM port)
5dbc6c06 1029{
5dbc6c06
HWN
1030 if (SCM_OPOUTPORTP (port))
1031 scm_flush (port);
1032}
1033
a1ec6916 1034SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0,
1bbd0b84 1035 (),
b380b885
MD
1036 "Equivalent to calling @code{force-output} on\n"
1037 "all open output ports. The return value is unspecified.")
1bbd0b84 1038#define FUNC_NAME s_scm_flush_all_ports
89ea5b7c 1039{
5dbc6c06 1040 scm_c_port_for_each (&flush_output_port, NULL);
89ea5b7c
GH
1041 return SCM_UNSPECIFIED;
1042}
1bbd0b84 1043#undef FUNC_NAME
0f2d19dd 1044
3b3b36dd 1045SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
1bbd0b84 1046 (SCM port),
1e6808ea
MG
1047 "Return the next character available from @var{port}, updating\n"
1048 "@var{port} to point to the following character. If no more\n"
c62da8f8
LC
1049 "characters are available, the end-of-file object is returned.\n"
1050 "\n"
1051 "When @var{port}'s data cannot be decoded according to its\n"
1052 "character encoding, a @code{decoding-error} is raised and\n"
1053 "@var{port} points past the erroneous byte sequence.\n")
1bbd0b84 1054#define FUNC_NAME s_scm_read_char
0f2d19dd 1055{
889975e5 1056 scm_t_wchar c;
0f2d19dd 1057 if (SCM_UNBNDP (port))
9de87eea 1058 port = scm_current_input_port ();
34d19ef6 1059 SCM_VALIDATE_OPINPORT (1, port);
b7f3516f 1060 c = scm_getc (port);
0f2d19dd
JB
1061 if (EOF == c)
1062 return SCM_EOF_VAL;
7866a09b 1063 return SCM_MAKE_CHAR (c);
0f2d19dd 1064}
1bbd0b84 1065#undef FUNC_NAME
0f2d19dd 1066
f4bc4e59
LC
1067/* Update the line and column number of PORT after consumption of C. */
1068static inline void
1069update_port_lf (scm_t_wchar c, SCM port)
1070{
1071 switch (c)
1072 {
1073 case '\a':
7b292a9d 1074 case EOF:
f4bc4e59
LC
1075 break;
1076 case '\b':
1077 SCM_DECCOL (port);
1078 break;
1079 case '\n':
1080 SCM_INCLINE (port);
1081 break;
1082 case '\r':
1083 SCM_ZEROCOL (port);
1084 break;
1085 case '\t':
1086 SCM_TABCOL (port);
1087 break;
1088 default:
1089 SCM_INCCOL (port);
1090 break;
1091 }
1092}
1093
889975e5
MG
1094#define SCM_MBCHAR_BUF_SIZE (4)
1095
f4bc4e59
LC
1096/* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
1097 UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
1098static scm_t_wchar
1099utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
1100{
1101 scm_t_wchar codepoint;
1102
1103 if (utf8_buf[0] <= 0x7f)
1104 {
1105 assert (size == 1);
1106 codepoint = utf8_buf[0];
1107 }
1108 else if ((utf8_buf[0] & 0xe0) == 0xc0)
1109 {
1110 assert (size == 2);
1111 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
1112 | (utf8_buf[1] & 0x3f);
1113 }
1114 else if ((utf8_buf[0] & 0xf0) == 0xe0)
1115 {
1116 assert (size == 3);
1117 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
1118 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
1119 | (utf8_buf[2] & 0x3f);
1120 }
1121 else
1122 {
1123 assert (size == 4);
1124 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
1125 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
1126 | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
1127 | (utf8_buf[3] & 0x3f);
1128 }
1129
1130 return codepoint;
1131}
1132
7b292a9d
LC
1133/* Read a UTF-8 sequence from PORT. On success, return 0 and set
1134 *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
1135 representation, and set *LEN to the length in bytes. Return
1136 `EILSEQ' on error. */
c62da8f8 1137static int
7b292a9d
LC
1138get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
1139 scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
889975e5 1140{
7b292a9d
LC
1141#define ASSERT_NOT_EOF(b) \
1142 if (SCM_UNLIKELY ((b) == EOF)) \
1143 goto invalid_seq
7be1705d
LC
1144#define CONSUME_PEEKED_BYTE() \
1145 pt->read_pos++
7b292a9d
LC
1146
1147 int byte;
7be1705d 1148 scm_t_port *pt;
7b292a9d
LC
1149
1150 *len = 0;
7be1705d 1151 pt = SCM_PTAB_ENTRY (port);
7b292a9d
LC
1152
1153 byte = scm_get_byte_or_eof (port);
1154 if (byte == EOF)
1155 {
1156 *codepoint = EOF;
1157 return 0;
1158 }
1159
1160 buf[0] = (scm_t_uint8) byte;
1161 *len = 1;
1162
1163 if (buf[0] <= 0x7f)
1164 /* 1-byte form. */
1165 *codepoint = buf[0];
1166 else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
1167 {
1168 /* 2-byte form. */
7be1705d 1169 byte = scm_peek_byte_or_eof (port);
7b292a9d
LC
1170 ASSERT_NOT_EOF (byte);
1171
7b292a9d
LC
1172 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1173 goto invalid_seq;
1174
7be1705d
LC
1175 CONSUME_PEEKED_BYTE ();
1176 buf[1] = (scm_t_uint8) byte;
1177 *len = 2;
1178
7b292a9d
LC
1179 *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
1180 | (buf[1] & 0x3f);
1181 }
1182 else if ((buf[0] & 0xf0) == 0xe0)
1183 {
1184 /* 3-byte form. */
7be1705d
LC
1185 byte = scm_peek_byte_or_eof (port);
1186 ASSERT_NOT_EOF (byte);
7b292a9d
LC
1187
1188 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
1189 || (buf[0] == 0xe0 && byte < 0xa0)
1190 || (buf[0] == 0xed && byte > 0x9f)))
7be1705d 1191 goto invalid_seq;
7b292a9d 1192
7be1705d
LC
1193 CONSUME_PEEKED_BYTE ();
1194 buf[1] = (scm_t_uint8) byte;
1195 *len = 2;
7b292a9d 1196
7be1705d 1197 byte = scm_peek_byte_or_eof (port);
7b292a9d
LC
1198 ASSERT_NOT_EOF (byte);
1199
7b292a9d
LC
1200 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1201 goto invalid_seq;
1202
7be1705d
LC
1203 CONSUME_PEEKED_BYTE ();
1204 buf[2] = (scm_t_uint8) byte;
1205 *len = 3;
1206
7b292a9d
LC
1207 *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
1208 | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
1209 | (buf[2] & 0x3f);
1210 }
1211 else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
1212 {
1213 /* 4-byte form. */
7be1705d 1214 byte = scm_peek_byte_or_eof (port);
7b292a9d
LC
1215 ASSERT_NOT_EOF (byte);
1216
7b292a9d
LC
1217 if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
1218 || (buf[0] == 0xf0 && byte < 0x90)
1219 || (buf[0] == 0xf4 && byte > 0x8f)))
7be1705d 1220 goto invalid_seq;
7b292a9d 1221
7be1705d
LC
1222 CONSUME_PEEKED_BYTE ();
1223 buf[1] = (scm_t_uint8) byte;
1224 *len = 2;
1225
1226 byte = scm_peek_byte_or_eof (port);
7b292a9d
LC
1227 ASSERT_NOT_EOF (byte);
1228
7be1705d
LC
1229 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1230 goto invalid_seq;
1231
1232 CONSUME_PEEKED_BYTE ();
7b292a9d
LC
1233 buf[2] = (scm_t_uint8) byte;
1234 *len = 3;
1235
7be1705d 1236 byte = scm_peek_byte_or_eof (port);
7b292a9d
LC
1237 ASSERT_NOT_EOF (byte);
1238
7b292a9d
LC
1239 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1240 goto invalid_seq;
1241
7be1705d
LC
1242 CONSUME_PEEKED_BYTE ();
1243 buf[3] = (scm_t_uint8) byte;
1244 *len = 4;
1245
7b292a9d
LC
1246 *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
1247 | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
1248 | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
1249 | (buf[3] & 0x3f);
1250 }
1251 else
1252 goto invalid_seq;
1253
1254 return 0;
1255
1256 invalid_seq:
7be1705d
LC
1257 /* Here we could choose the consume the faulty byte when it's not a
1258 valid starting byte, but it's not a requirement. What Section 3.9
1259 of Unicode 6.0.0 mandates, though, is to not consume a byte that
1260 would otherwise be a valid starting byte. */
1261
7b292a9d
LC
1262 return EILSEQ;
1263
7be1705d 1264#undef CONSUME_PEEKED_BYTE
7b292a9d
LC
1265#undef ASSERT_NOT_EOF
1266}
1267
1268/* Likewise, read a byte sequence from PORT, passing it through its
1269 input conversion descriptor. */
1270static int
1271get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
1272 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1273{
1274 scm_t_port *pt;
f4bc4e59
LC
1275 int err, byte_read;
1276 size_t bytes_consumed, output_size;
f4bc4e59
LC
1277 char *output;
1278 scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
889975e5 1279
7b292a9d 1280 pt = SCM_PTAB_ENTRY (port);
889975e5 1281
f4bc4e59
LC
1282 for (output_size = 0, output = (char *) utf8_buf,
1283 bytes_consumed = 0, err = 0;
1284 err == 0 && output_size == 0
1285 && (bytes_consumed == 0 || byte_read != EOF);
1286 bytes_consumed++)
63479112 1287 {
f4bc4e59
LC
1288 char *input;
1289 size_t input_left, output_left, done;
889975e5 1290
f4bc4e59
LC
1291 byte_read = scm_get_byte_or_eof (port);
1292 if (byte_read == EOF)
889975e5 1293 {
f4bc4e59 1294 if (bytes_consumed == 0)
c62da8f8
LC
1295 {
1296 *codepoint = (scm_t_wchar) EOF;
1297 *len = 0;
1298 return 0;
1299 }
f4bc4e59
LC
1300 else
1301 continue;
889975e5
MG
1302 }
1303
f4bc4e59 1304 buf[bytes_consumed] = byte_read;
889975e5 1305
f4bc4e59
LC
1306 input = buf;
1307 input_left = bytes_consumed + 1;
1308 output_left = sizeof (utf8_buf);
889975e5 1309
f4bc4e59
LC
1310 done = iconv (pt->input_cd, &input, &input_left,
1311 &output, &output_left);
1312 if (done == (size_t) -1)
889975e5 1313 {
f4bc4e59
LC
1314 err = errno;
1315 if (err == EINVAL)
1316 /* Missing input: keep trying. */
1317 err = 0;
889975e5 1318 }
f4bc4e59
LC
1319 else
1320 output_size = sizeof (utf8_buf) - output_left;
889975e5
MG
1321 }
1322
a42d7971
LC
1323 if (SCM_UNLIKELY (output_size == 0))
1324 /* An unterminated sequence. */
1325 err = EILSEQ;
7b292a9d 1326 else if (SCM_LIKELY (err == 0))
cc540d0b 1327 {
7b292a9d
LC
1328 /* Convert the UTF8_BUF sequence to a Unicode code point. */
1329 *codepoint = utf8_to_codepoint (utf8_buf, output_size);
1330 *len = bytes_consumed;
1331 }
cc540d0b 1332
7b292a9d
LC
1333 return err;
1334}
c62da8f8 1335
7b292a9d
LC
1336/* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
1337 with the byte representation of the codepoint in PORT's encoding, and
1338 set *LEN to the length in bytes of that representation. Return 0 on
1339 success and an errno value on error. */
1340static int
1341get_codepoint (SCM port, scm_t_wchar *codepoint,
1342 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1343{
1344 int err;
1345 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1346
1347 if (pt->input_cd == (iconv_t) -1)
1348 /* Initialize the conversion descriptors, if needed. */
1349 scm_i_set_port_encoding_x (port, pt->encoding);
1350
1351 /* FIXME: In 2.1, add a flag to determine whether a port is UTF-8. */
1352 if (pt->input_cd == (iconv_t) -1)
1353 err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
cc540d0b 1354 else
7b292a9d
LC
1355 err = get_iconv_codepoint (port, codepoint, buf, len);
1356
1357 if (SCM_LIKELY (err == 0))
1358 update_port_lf (*codepoint, port);
1359 else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
a42d7971 1360 {
7b292a9d
LC
1361 *codepoint = '?';
1362 err = 0;
a42d7971
LC
1363 update_port_lf (*codepoint, port);
1364 }
889975e5 1365
c62da8f8 1366 return err;
889975e5
MG
1367}
1368
fd5eec2b
LC
1369/* Read a codepoint from PORT and return it. */
1370scm_t_wchar
1371scm_getc (SCM port)
c62da8f8 1372#define FUNC_NAME "scm_getc"
fd5eec2b 1373{
c62da8f8 1374 int err;
fd5eec2b 1375 size_t len;
c62da8f8 1376 scm_t_wchar codepoint;
fd5eec2b
LC
1377 char buf[SCM_MBCHAR_BUF_SIZE];
1378
c62da8f8
LC
1379 err = get_codepoint (port, &codepoint, buf, &len);
1380 if (SCM_UNLIKELY (err != 0))
1381 /* At this point PORT should point past the invalid encoding, as per
1382 R6RS-lib Section 8.2.4. */
1383 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
1384
1385 return codepoint;
fd5eec2b 1386}
c62da8f8 1387#undef FUNC_NAME
889975e5 1388
5c070ca7 1389/* this should only be called when the read buffer is empty. it
affc96b5 1390 tries to refill the read buffer. it returns the first char from
5c070ca7 1391 the port, which is either EOF or *(pt->read_pos). */
6c951427 1392int
affc96b5 1393scm_fill_input (SCM port)
6c951427 1394{
92c2555f 1395 scm_t_port *pt = SCM_PTAB_ENTRY (port);
283a1a0e 1396
b5cb4464
NJ
1397 assert (pt->read_pos == pt->read_end);
1398
6c951427
GH
1399 if (pt->read_buf == pt->putback_buf)
1400 {
1401 /* finished reading put-back chars. */
1402 pt->read_buf = pt->saved_read_buf;
1403 pt->read_pos = pt->saved_read_pos;
1404 pt->read_end = pt->saved_read_end;
1405 pt->read_buf_size = pt->saved_read_buf_size;
1406 if (pt->read_pos < pt->read_end)
5c070ca7 1407 return *(pt->read_pos);
6c951427 1408 }
62bd5d66 1409 return SCM_PORT_DESCRIPTOR (port)->fill_input (port);
6c951427
GH
1410}
1411
3cb988bd 1412
6fe692e9
MD
1413/* scm_lfwrite
1414 *
53802ea8
MV
1415 * This function differs from scm_c_write; it updates port line and
1416 * column. */
6fe692e9 1417
9c44cd45 1418void
1be6b49c 1419scm_lfwrite (const char *ptr, size_t size, SCM port)
ee149d03 1420{
92c2555f 1421 scm_t_port *pt = SCM_PTAB_ENTRY (port);
62bd5d66 1422 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
3e2043c4 1423
840ae05d 1424 if (pt->rw_active == SCM_PORT_READ)
affc96b5 1425 scm_end_input (port);
283a1a0e 1426
31703ab8 1427 ptob->write (port, ptr, size);
840ae05d 1428
9c44cd45
MG
1429 for (; size; ptr++, size--)
1430 update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
1431
1432 if (pt->rw_random)
1433 pt->rw_active = SCM_PORT_WRITE;
1434}
1435
4325620f 1436/* Write STR to PORT from START inclusive to END exclusive. */
9c44cd45
MG
1437void
1438scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
1439{
9c44cd45 1440 scm_t_port *pt = SCM_PTAB_ENTRY (port);
9c44cd45
MG
1441
1442 if (pt->rw_active == SCM_PORT_READ)
1443 scm_end_input (port);
1444
4325620f
LC
1445 if (end == (size_t) -1)
1446 end = scm_i_string_length (str);
9c44cd45 1447
4325620f 1448 scm_display (scm_c_substring (str, start, end), port);
53802ea8 1449
840ae05d
JB
1450 if (pt->rw_random)
1451 pt->rw_active = SCM_PORT_WRITE;
ee149d03 1452}
3cb988bd 1453
6fe692e9
MD
1454/* scm_c_read
1455 *
1456 * Used by an application to read arbitrary number of bytes from an
1457 * SCM port. Same semantics as libc read, except that scm_c_read only
1458 * returns less than SIZE bytes if at end-of-file.
1459 *
1460 * Warning: Doesn't update port line and column counts! */
1461
b5cb4464
NJ
1462/* This structure, and the following swap_buffer function, are used
1463 for temporarily swapping a port's own read buffer, and the buffer
1464 that the caller of scm_c_read provides. */
1465struct port_and_swap_buffer
1466{
1467 scm_t_port *pt;
1468 unsigned char *buffer;
1469 size_t size;
1470};
1471
1472static void
1473swap_buffer (void *data)
1474{
1475 struct port_and_swap_buffer *psb = (struct port_and_swap_buffer *) data;
1476 unsigned char *old_buf = psb->pt->read_buf;
1477 size_t old_size = psb->pt->read_buf_size;
1478
1479 /* Make the port use (buffer, size) from the struct. */
1480 psb->pt->read_pos = psb->pt->read_buf = psb->pt->read_end = psb->buffer;
1481 psb->pt->read_buf_size = psb->size;
1482
1483 /* Save the port's old (buffer, size) in the struct. */
1484 psb->buffer = old_buf;
1485 psb->size = old_size;
1486}
1487
1be6b49c
ML
1488size_t
1489scm_c_read (SCM port, void *buffer, size_t size)
693758d5 1490#define FUNC_NAME "scm_c_read"
6fe692e9 1491{
693758d5 1492 scm_t_port *pt;
1be6b49c 1493 size_t n_read = 0, n_available;
b5cb4464 1494 struct port_and_swap_buffer psb;
6fe692e9 1495
693758d5
LC
1496 SCM_VALIDATE_OPINPORT (1, port);
1497
1498 pt = SCM_PTAB_ENTRY (port);
6fe692e9 1499 if (pt->rw_active == SCM_PORT_WRITE)
62bd5d66 1500 SCM_PORT_DESCRIPTOR (port)->flush (port);
6fe692e9
MD
1501
1502 if (pt->rw_random)
1503 pt->rw_active = SCM_PORT_READ;
1504
b5cb4464
NJ
1505 /* Take bytes first from the port's read buffer. */
1506 if (pt->read_pos < pt->read_end)
6fe692e9 1507 {
b5cb4464 1508 n_available = min (size, pt->read_end - pt->read_pos);
6fe692e9 1509 memcpy (buffer, pt->read_pos, n_available);
910d1e40 1510 buffer = (char *) buffer + n_available;
6fe692e9
MD
1511 pt->read_pos += n_available;
1512 n_read += n_available;
6fe692e9 1513 size -= n_available;
6fe692e9
MD
1514 }
1515
b5cb4464
NJ
1516 /* Avoid the scm_dynwind_* costs if we now have enough data. */
1517 if (size == 0)
1518 return n_read;
1519
1520 /* Now we will call scm_fill_input repeatedly until we have read the
1521 requested number of bytes. (Note that a single scm_fill_input
1522 call does not guarantee to fill the whole of the port's read
6d227556 1523 buffer.) */
75192345 1524 if (pt->read_buf_size <= 1 && pt->encoding == NULL)
b5cb4464 1525 {
6d227556
NJ
1526 /* The port that we are reading from is unbuffered - i.e. does
1527 not have its own persistent buffer - but we have a buffer,
1528 provided by our caller, that is the right size for the data
1529 that is wanted. For the following scm_fill_input calls,
1530 therefore, we use the buffer in hand as the port's read
1531 buffer.
1532
1533 We need to make sure that the port's normal (1 byte) buffer
1534 is reinstated in case one of the scm_fill_input () calls
1535 throws an exception; we use the scm_dynwind_* API to achieve
75192345
MG
1536 that.
1537
1538 A consequence of this optimization is that the fill_input
1539 functions can't unget characters. That'll push data to the
1540 pushback buffer instead of this psb buffer. */
1541#if SCM_DEBUG == 1
1542 unsigned char *pback = pt->putback_buf;
1543#endif
6d227556
NJ
1544 psb.pt = pt;
1545 psb.buffer = buffer;
1546 psb.size = size;
1547 scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
1548 scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1549 scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1550
1551 /* Call scm_fill_input until we have all the bytes that we need,
1552 or we hit EOF. */
1553 while (pt->read_buf_size && (scm_fill_input (port) != EOF))
1554 {
1555 pt->read_buf_size -= (pt->read_end - pt->read_pos);
1556 pt->read_pos = pt->read_buf = pt->read_end;
1557 }
75192345
MG
1558#if SCM_DEBUG == 1
1559 if (pback != pt->putback_buf
1560 || pt->read_buf - (unsigned char *) buffer < 0)
1561 scm_misc_error (FUNC_NAME,
1562 "scm_c_read must not call a fill function that pushes "
1563 "back characters onto an unbuffered port", SCM_EOL);
1564#endif
6d227556 1565 n_read += pt->read_buf - (unsigned char *) buffer;
75192345 1566
6d227556
NJ
1567 /* Reinstate the port's normal buffer. */
1568 scm_dynwind_end ();
1569 }
1570 else
1571 {
1572 /* The port has its own buffer. It is important that we use it,
1573 even if it happens to be smaller than our caller's buffer, so
1574 that a custom port implementation's entry points (in
1575 particular, fill_input) can rely on the buffer always being
1576 the same as they first set up. */
1577 while (size && (scm_fill_input (port) != EOF))
1578 {
1579 n_available = min (size, pt->read_end - pt->read_pos);
1580 memcpy (buffer, pt->read_pos, n_available);
1581 buffer = (char *) buffer + n_available;
1582 pt->read_pos += n_available;
1583 n_read += n_available;
1584 size -= n_available;
1585 }
1586 }
6fe692e9 1587
b5cb4464 1588 return n_read;
6fe692e9 1589}
693758d5 1590#undef FUNC_NAME
6fe692e9
MD
1591
1592/* scm_c_write
1593 *
1594 * Used by an application to write arbitrary number of bytes to an SCM
1595 * port. Similar semantics as libc write. However, unlike libc
1596 * write, scm_c_write writes the requested number of bytes and has no
1597 * return value.
1598 *
1599 * Warning: Doesn't update port line and column counts!
1600 */
1601
693758d5 1602void
1be6b49c 1603scm_c_write (SCM port, const void *ptr, size_t size)
693758d5 1604#define FUNC_NAME "scm_c_write"
6fe692e9 1605{
693758d5
LC
1606 scm_t_port *pt;
1607 scm_t_ptob_descriptor *ptob;
1608
1609 SCM_VALIDATE_OPOUTPORT (1, port);
1610
1611 pt = SCM_PTAB_ENTRY (port);
62bd5d66 1612 ptob = SCM_PORT_DESCRIPTOR (port);
6fe692e9
MD
1613
1614 if (pt->rw_active == SCM_PORT_READ)
1615 scm_end_input (port);
1616
1617 ptob->write (port, ptr, size);
1618
1619 if (pt->rw_random)
1620 pt->rw_active = SCM_PORT_WRITE;
1621}
693758d5 1622#undef FUNC_NAME
3cb988bd 1623
fca43887 1624void
a284e297 1625scm_flush (SCM port)
ee149d03 1626{
62bd5d66 1627 SCM_PORT_DESCRIPTOR (port)->flush (port);
ee149d03
JB
1628}
1629
283a1a0e 1630void
a284e297 1631scm_end_input (SCM port)
283a1a0e 1632{
c014a02e 1633 long offset;
92c2555f 1634 scm_t_port *pt = SCM_PTAB_ENTRY (port);
283a1a0e
GH
1635
1636 if (pt->read_buf == pt->putback_buf)
1637 {
1638 offset = pt->read_end - pt->read_pos;
1639 pt->read_buf = pt->saved_read_buf;
1640 pt->read_pos = pt->saved_read_pos;
1641 pt->read_end = pt->saved_read_end;
1642 pt->read_buf_size = pt->saved_read_buf_size;
1643 }
1644 else
1645 offset = 0;
1646
62bd5d66 1647 SCM_PORT_DESCRIPTOR (port)->end_input (port, offset);
283a1a0e
GH
1648}
1649
ee149d03
JB
1650\f
1651
1652
1653void
889975e5
MG
1654scm_unget_byte (int c, SCM port)
1655#define FUNC_NAME "scm_unget_byte"
ee149d03 1656{
92c2555f 1657 scm_t_port *pt = SCM_PTAB_ENTRY (port);
840ae05d 1658
6c951427
GH
1659 if (pt->read_buf == pt->putback_buf)
1660 /* already using the put-back buffer. */
1661 {
1662 /* enlarge putback_buf if necessary. */
1663 if (pt->read_end == pt->read_buf + pt->read_buf_size
1664 && pt->read_buf == pt->read_pos)
1665 {
1be6b49c 1666 size_t new_size = pt->read_buf_size * 2;
c6c79933 1667 unsigned char *tmp = (unsigned char *)
4c9419ac
MV
1668 scm_gc_realloc (pt->putback_buf, pt->read_buf_size, new_size,
1669 "putback buffer");
6c951427 1670
6c951427
GH
1671 pt->read_pos = pt->read_buf = pt->putback_buf = tmp;
1672 pt->read_end = pt->read_buf + pt->read_buf_size;
1673 pt->read_buf_size = pt->putback_buf_size = new_size;
1674 }
1675
1676 /* shift any existing bytes to buffer + 1. */
1677 if (pt->read_pos == pt->read_end)
1678 pt->read_end = pt->read_buf + 1;
1679 else if (pt->read_pos != pt->read_buf + 1)
1680 {
1681 int count = pt->read_end - pt->read_pos;
1682
1683 memmove (pt->read_buf + 1, pt->read_pos, count);
1684 pt->read_end = pt->read_buf + 1 + count;
1685 }
1686
1687 pt->read_pos = pt->read_buf;
1688 }
1689 else
1690 /* switch to the put-back buffer. */
1691 {
1692 if (pt->putback_buf == NULL)
1693 {
c357d546 1694 pt->putback_buf
92d8fd32
LC
1695 = (unsigned char *) scm_gc_malloc_pointerless
1696 (SCM_INITIAL_PUTBACK_BUF_SIZE, "putback buffer");
6c951427
GH
1697 pt->putback_buf_size = SCM_INITIAL_PUTBACK_BUF_SIZE;
1698 }
1699
1700 pt->saved_read_buf = pt->read_buf;
1701 pt->saved_read_pos = pt->read_pos;
1702 pt->saved_read_end = pt->read_end;
1703 pt->saved_read_buf_size = pt->read_buf_size;
1704
1705 pt->read_pos = pt->read_buf = pt->putback_buf;
1706 pt->read_end = pt->read_buf + 1;
1707 pt->read_buf_size = pt->putback_buf_size;
1708 }
1709
1710 *pt->read_buf = c;
ee149d03 1711
840ae05d
JB
1712 if (pt->rw_random)
1713 pt->rw_active = SCM_PORT_READ;
889975e5
MG
1714}
1715#undef FUNC_NAME
1716
63479112 1717void
889975e5
MG
1718scm_ungetc (scm_t_wchar c, SCM port)
1719#define FUNC_NAME "scm_ungetc"
1720{
1721 scm_t_port *pt = SCM_PTAB_ENTRY (port);
63479112
LC
1722 char *result;
1723 char result_buf[10];
1724 const char *encoding;
889975e5
MG
1725 size_t len;
1726 int i;
1727
63479112
LC
1728 if (pt->encoding != NULL)
1729 encoding = pt->encoding;
1730 else
1731 encoding = "ISO-8859-1";
1732
1733 len = sizeof (result_buf);
1734 result = u32_conv_to_encoding (encoding,
1735 (enum iconv_ilseq_handler) pt->ilseq_handler,
1736 (uint32_t *) &c, 1, NULL,
1737 result_buf, &len);
1738
1739 if (SCM_UNLIKELY (result == NULL || len == 0))
6851d3be
LC
1740 scm_encoding_error (FUNC_NAME, errno,
1741 "conversion to port encoding failed",
1742 SCM_BOOL_F, SCM_MAKE_CHAR (c));
63479112 1743
889975e5 1744 for (i = len - 1; i >= 0; i--)
63479112
LC
1745 scm_unget_byte (result[i], port);
1746
1747 if (SCM_UNLIKELY (result != result_buf))
1748 free (result);
840ae05d 1749
ee149d03
JB
1750 if (c == '\n')
1751 {
1752 /* What should col be in this case?
1753 * We'll leave it at -1.
1754 */
1755 SCM_LINUM (port) -= 1;
1756 }
1757 else
1758 SCM_COL(port) -= 1;
1759}
c6c79933 1760#undef FUNC_NAME
ee149d03
JB
1761
1762
1763void
70d63753 1764scm_ungets (const char *s, int n, SCM port)
ee149d03
JB
1765{
1766 /* This is simple minded and inefficient, but unreading strings is
1767 * probably not a common operation, and remember that line and
1768 * column numbers have to be handled...
1769 *
1770 * Please feel free to write an optimized version!
1771 */
1772 while (n--)
1773 scm_ungetc (s[n], port);
1774}
1775
1776
3b3b36dd 1777SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
1bbd0b84 1778 (SCM port),
1e6808ea
MG
1779 "Return the next character available from @var{port},\n"
1780 "@emph{without} updating @var{port} to point to the following\n"
1781 "character. If no more characters are available, the\n"
c2dfff19
KR
1782 "end-of-file object is returned.\n"
1783 "\n"
1784 "The value returned by\n"
1e6808ea
MG
1785 "a call to @code{peek-char} is the same as the value that would\n"
1786 "have been returned by a call to @code{read-char} on the same\n"
1787 "port. The only difference is that the very next call to\n"
1788 "@code{read-char} or @code{peek-char} on that @var{port} will\n"
1789 "return the value returned by the preceding call to\n"
1790 "@code{peek-char}. In particular, a call to @code{peek-char} on\n"
1791 "an interactive port will hang waiting for input whenever a call\n"
c62da8f8
LC
1792 "to @code{read-char} would have hung.\n"
1793 "\n"
1794 "As for @code{read-char}, a @code{decoding-error} may be raised\n"
1795 "if such a situation occurs. However, unlike with @code{read-char},\n"
1796 "@var{port} still points at the beginning of the erroneous byte\n"
1797 "sequence when the error is raised.\n")
1bbd0b84 1798#define FUNC_NAME s_scm_peek_char
ee149d03 1799{
c62da8f8 1800 int err;
fd5eec2b
LC
1801 SCM result;
1802 scm_t_wchar c;
1803 char bytes[SCM_MBCHAR_BUF_SIZE];
c62da8f8 1804 long column, line, i;
fd5eec2b
LC
1805 size_t len;
1806
ee149d03 1807 if (SCM_UNBNDP (port))
9de87eea 1808 port = scm_current_input_port ();
b2456dd4 1809 SCM_VALIDATE_OPINPORT (1, port);
fd5eec2b
LC
1810
1811 column = SCM_COL (port);
1812 line = SCM_LINUM (port);
1813
c62da8f8 1814 err = get_codepoint (port, &c, bytes, &len);
fd5eec2b 1815
c62da8f8
LC
1816 for (i = len - 1; i >= 0; i--)
1817 scm_unget_byte (bytes[i], port);
fd5eec2b 1818
c62da8f8
LC
1819 SCM_COL (port) = column;
1820 SCM_LINUM (port) = line;
fd5eec2b 1821
c62da8f8
LC
1822 if (SCM_UNLIKELY (err != 0))
1823 {
1824 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
1825
1826 /* Shouldn't happen since `catch' always aborts to prompt. */
1827 result = SCM_BOOL_F;
fd5eec2b 1828 }
c62da8f8
LC
1829 else if (c == EOF)
1830 result = SCM_EOF_VAL;
1831 else
1832 result = SCM_MAKE_CHAR (c);
fd5eec2b
LC
1833
1834 return result;
3cb988bd 1835}
1bbd0b84 1836#undef FUNC_NAME
3cb988bd 1837
1be4270a 1838SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
1bbd0b84 1839 (SCM cobj, SCM port),
b380b885
MD
1840 "Place @var{char} in @var{port} so that it will be read by the\n"
1841 "next read operation. If called multiple times, the unread characters\n"
1842 "will be read again in last-in first-out order. If @var{port} is\n"
1843 "not supplied, the current input port is used.")
1bbd0b84 1844#define FUNC_NAME s_scm_unread_char
0f2d19dd
JB
1845{
1846 int c;
1847
34d19ef6 1848 SCM_VALIDATE_CHAR (1, cobj);
0f2d19dd 1849 if (SCM_UNBNDP (port))
9de87eea 1850 port = scm_current_input_port ();
b2456dd4 1851 SCM_VALIDATE_OPINPORT (2, port);
0f2d19dd 1852
7866a09b 1853 c = SCM_CHAR (cobj);
0f2d19dd 1854
b7f3516f 1855 scm_ungetc (c, port);
0f2d19dd
JB
1856 return cobj;
1857}
1bbd0b84 1858#undef FUNC_NAME
0f2d19dd 1859
a1ec6916 1860SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
1bbd0b84 1861 (SCM str, SCM port),
b380b885
MD
1862 "Place the string @var{str} in @var{port} so that its characters will be\n"
1863 "read in subsequent read operations. If called multiple times, the\n"
1864 "unread characters will be read again in last-in first-out order. If\n"
1865 "@var{port} is not supplied, the current-input-port is used.")
1bbd0b84 1866#define FUNC_NAME s_scm_unread_string
ee1e7e13 1867{
889975e5 1868 int n;
34d19ef6 1869 SCM_VALIDATE_STRING (1, str);
ee1e7e13 1870 if (SCM_UNBNDP (port))
9de87eea 1871 port = scm_current_input_port ();
b2456dd4 1872 SCM_VALIDATE_OPINPORT (2, port);
ee1e7e13 1873
889975e5
MG
1874 n = scm_i_string_length (str);
1875
1876 while (n--)
1877 scm_ungetc (scm_i_string_ref (str, n), port);
ee1e7e13
MD
1878
1879 return str;
1880}
1bbd0b84 1881#undef FUNC_NAME
ee1e7e13 1882
a1ec6916 1883SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
1e6808ea
MG
1884 (SCM fd_port, SCM offset, SCM whence),
1885 "Sets the current position of @var{fd/port} to the integer\n"
1886 "@var{offset}, which is interpreted according to the value of\n"
1887 "@var{whence}.\n"
1888 "\n"
1889 "One of the following variables should be supplied for\n"
1890 "@var{whence}:\n"
b380b885
MD
1891 "@defvar SEEK_SET\n"
1892 "Seek from the beginning of the file.\n"
1893 "@end defvar\n"
1894 "@defvar SEEK_CUR\n"
1895 "Seek from the current position.\n"
1896 "@end defvar\n"
1897 "@defvar SEEK_END\n"
1898 "Seek from the end of the file.\n"
1e6808ea
MG
1899 "@end defvar\n"
1900 "If @var{fd/port} is a file descriptor, the underlying system\n"
1901 "call is @code{lseek}. @var{port} may be a string port.\n"
1902 "\n"
1903 "The value returned is the new position in the file. This means\n"
1904 "that the current position of a port can be obtained using:\n"
1905 "@lisp\n"
b380b885 1906 "(seek port 0 SEEK_CUR)\n"
1e6808ea 1907 "@end lisp")
1bbd0b84 1908#define FUNC_NAME s_scm_seek
840ae05d 1909{
840ae05d
JB
1910 int how;
1911
1e6808ea 1912 fd_port = SCM_COERCE_OUTPORT (fd_port);
840ae05d 1913
a55c2b68 1914 how = scm_to_int (whence);
840ae05d 1915 if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
1bbd0b84 1916 SCM_OUT_OF_RANGE (3, whence);
23f2b9a3 1917
0a94eb00 1918 if (SCM_OPPORTP (fd_port))
840ae05d 1919 {
62bd5d66 1920 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port);
f1ce9199
LC
1921 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
1922 off_t_or_off64_t rv;
840ae05d
JB
1923
1924 if (!ptob->seek)
1bbd0b84 1925 SCM_MISC_ERROR ("port is not seekable",
1e6808ea 1926 scm_cons (fd_port, SCM_EOL));
840ae05d 1927 else
1e6808ea 1928 rv = ptob->seek (fd_port, off, how);
f1ce9199 1929 return scm_from_off_t_or_off64_t (rv);
840ae05d
JB
1930 }
1931 else /* file descriptor?. */
1932 {
23f2b9a3
KR
1933 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
1934 off_t_or_off64_t rv;
1935 rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
840ae05d 1936 if (rv == -1)
1bbd0b84 1937 SCM_SYSERROR;
23f2b9a3 1938 return scm_from_off_t_or_off64_t (rv);
840ae05d 1939 }
840ae05d 1940}
1bbd0b84 1941#undef FUNC_NAME
840ae05d 1942
8ab3d8a0
KR
1943#ifndef O_BINARY
1944#define O_BINARY 0
1945#endif
1946
1947/* Mingw has ftruncate(), perhaps implemented above using chsize, but
1948 doesn't have the filename version truncate(), hence this code. */
1949#if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
1950static int
1951truncate (const char *file, off_t length)
82893676 1952{
8ab3d8a0
KR
1953 int ret, fdes;
1954
1955 fdes = open (file, O_BINARY | O_WRONLY);
1956 if (fdes == -1)
1957 return -1;
1958
1959 ret = ftruncate (fdes, length);
1960 if (ret == -1)
82893676 1961 {
8ab3d8a0 1962 int save_errno = errno;
82893676 1963 close (fdes);
8ab3d8a0
KR
1964 errno = save_errno;
1965 return -1;
82893676 1966 }
8ab3d8a0
KR
1967
1968 return close (fdes);
82893676 1969}
8ab3d8a0 1970#endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
82893676 1971
a1ec6916 1972SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
1bbd0b84 1973 (SCM object, SCM length),
8ab3d8a0
KR
1974 "Truncate @var{file} to @var{length} bytes. @var{file} can be a\n"
1975 "filename string, a port object, or an integer file descriptor.\n"
1976 "The return value is unspecified.\n"
1977 "\n"
1978 "For a port or file descriptor @var{length} can be omitted, in\n"
1979 "which case the file is truncated at the current position (per\n"
1980 "@code{ftell} above).\n"
1981 "\n"
1982 "On most systems a file can be extended by giving a length\n"
1983 "greater than the current size, but this is not mandatory in the\n"
1984 "POSIX standard.")
1bbd0b84 1985#define FUNC_NAME s_scm_truncate_file
840ae05d 1986{
69bc9ff3 1987 int rv;
69bc9ff3 1988
2b829bbb
KR
1989 /* "object" can be a port, fdes or filename.
1990
1991 Negative "length" makes no sense, but it's left to truncate() or
1992 ftruncate() to give back an error for that (normally EINVAL).
1993 */
840ae05d 1994
840ae05d
JB
1995 if (SCM_UNBNDP (length))
1996 {
69bc9ff3 1997 /* must supply length if object is a filename. */
7f9994d9 1998 if (scm_is_string (object))
34d19ef6 1999 SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
69bc9ff3 2000
e11e83f3 2001 length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
840ae05d 2002 }
3fe6190f 2003
69bc9ff3 2004 object = SCM_COERCE_OUTPORT (object);
e11e83f3 2005 if (scm_is_integer (object))
69bc9ff3 2006 {
23f2b9a3
KR
2007 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2008 SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
2009 c_length));
69bc9ff3 2010 }
0c95b57d 2011 else if (SCM_OPOUTPORTP (object))
69bc9ff3 2012 {
f1ce9199 2013 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
92c2555f 2014 scm_t_port *pt = SCM_PTAB_ENTRY (object);
62bd5d66 2015 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object);
69bc9ff3 2016
affc96b5 2017 if (!ptob->truncate)
1bbd0b84 2018 SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
69bc9ff3 2019 if (pt->rw_active == SCM_PORT_READ)
affc96b5 2020 scm_end_input (object);
69bc9ff3 2021 else if (pt->rw_active == SCM_PORT_WRITE)
affc96b5 2022 ptob->flush (object);
69bc9ff3 2023
affc96b5 2024 ptob->truncate (object, c_length);
69bc9ff3
GH
2025 rv = 0;
2026 }
2027 else
2028 {
2b829bbb 2029 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
7f9994d9
MV
2030 char *str = scm_to_locale_string (object);
2031 int eno;
2b829bbb 2032 SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
7f9994d9
MV
2033 eno = errno;
2034 free (str);
2035 errno = eno;
69bc9ff3
GH
2036 }
2037 if (rv == -1)
1bbd0b84 2038 SCM_SYSERROR;
840ae05d
JB
2039 return SCM_UNSPECIFIED;
2040}
1bbd0b84 2041#undef FUNC_NAME
840ae05d 2042
a1ec6916 2043SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0,
1bbd0b84 2044 (SCM port),
a150979d
KR
2045 "Return the current line number for @var{port}.\n"
2046 "\n"
2047 "The first line of a file is 0. But you might want to add 1\n"
2048 "when printing line numbers, since starting from 1 is\n"
2049 "traditional in error messages, and likely to be more natural to\n"
2050 "non-programmers.")
1bbd0b84 2051#define FUNC_NAME s_scm_port_line
0f2d19dd 2052{
78446828 2053 port = SCM_COERCE_OUTPORT (port);
34d19ef6 2054 SCM_VALIDATE_OPENPORT (1, port);
651f2cd2 2055 return scm_from_long (SCM_LINUM (port));
0f2d19dd 2056}
1bbd0b84 2057#undef FUNC_NAME
0f2d19dd 2058
a1ec6916 2059SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0,
1bbd0b84 2060 (SCM port, SCM line),
a150979d
KR
2061 "Set the current line number for @var{port} to @var{line}. The\n"
2062 "first line of a file is 0.")
1bbd0b84 2063#define FUNC_NAME s_scm_set_port_line_x
d043d8c2 2064{
360fc44c 2065 port = SCM_COERCE_OUTPORT (port);
34d19ef6 2066 SCM_VALIDATE_OPENPORT (1, port);
651f2cd2 2067 SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line);
564478fd 2068 return SCM_UNSPECIFIED;
d043d8c2 2069}
1bbd0b84 2070#undef FUNC_NAME
d043d8c2 2071
a1ec6916 2072SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0,
1bbd0b84 2073 (SCM port),
a150979d
KR
2074 "Return the current column number of @var{port}.\n"
2075 "If the number is\n"
b380b885
MD
2076 "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n"
2077 "- i.e. the first character of the first line is line 0, column 0.\n"
2078 "(However, when you display a file position, for example in an error\n"
650a1cf9 2079 "message, we recommend you add 1 to get 1-origin integers. This is\n"
b380b885
MD
2080 "because lines and column numbers traditionally start with 1, and that is\n"
2081 "what non-programmers will find most natural.)")
1bbd0b84 2082#define FUNC_NAME s_scm_port_column
0f2d19dd 2083{
78446828 2084 port = SCM_COERCE_OUTPORT (port);
34d19ef6 2085 SCM_VALIDATE_OPENPORT (1, port);
e11e83f3 2086 return scm_from_int (SCM_COL (port));
0f2d19dd 2087}
1bbd0b84 2088#undef FUNC_NAME
0f2d19dd 2089
a1ec6916 2090SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0,
1bbd0b84 2091 (SCM port, SCM column),
a150979d
KR
2092 "Set the current column of @var{port}. Before reading the first\n"
2093 "character on a line the column should be 0.")
1bbd0b84 2094#define FUNC_NAME s_scm_set_port_column_x
d043d8c2 2095{
360fc44c 2096 port = SCM_COERCE_OUTPORT (port);
34d19ef6 2097 SCM_VALIDATE_OPENPORT (1, port);
a55c2b68 2098 SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column);
564478fd 2099 return SCM_UNSPECIFIED;
d043d8c2 2100}
1bbd0b84 2101#undef FUNC_NAME
d043d8c2 2102
a1ec6916 2103SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
1bbd0b84 2104 (SCM port),
ac012a27
AW
2105 "Return the filename associated with @var{port}, or @code{#f}\n"
2106 "if no filename is associated with the port.")
1bbd0b84 2107#define FUNC_NAME s_scm_port_filename
0f2d19dd 2108{
78446828 2109 port = SCM_COERCE_OUTPORT (port);
34d19ef6 2110 SCM_VALIDATE_OPENPORT (1, port);
b24b5e13 2111 return SCM_FILENAME (port);
0f2d19dd 2112}
1bbd0b84 2113#undef FUNC_NAME
0f2d19dd 2114
a1ec6916 2115SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
1bbd0b84 2116 (SCM port, SCM filename),
b380b885
MD
2117 "Change the filename associated with @var{port}, using the current input\n"
2118 "port if none is specified. Note that this does not change the port's\n"
2119 "source of data, but only the value that is returned by\n"
2120 "@code{port-filename} and reported in diagnostic output.")
1bbd0b84 2121#define FUNC_NAME s_scm_set_port_filename_x
d14af9f2 2122{
360fc44c 2123 port = SCM_COERCE_OUTPORT (port);
34d19ef6 2124 SCM_VALIDATE_OPENPORT (1, port);
360fc44c 2125 /* We allow the user to set the filename to whatever he likes. */
b24b5e13
DH
2126 SCM_SET_FILENAME (port, filename);
2127 return SCM_UNSPECIFIED;
d14af9f2 2128}
1bbd0b84 2129#undef FUNC_NAME
d14af9f2 2130
d6a6989e
LC
2131/* A fluid specifying the default encoding for newly created ports. If it is
2132 a string, that is the encoding. If it is #f, it is in the "native"
2133 (Latin-1) encoding. */
2134SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
2135
889975e5
MG
2136static int scm_port_encoding_init = 0;
2137
9d9c66ba
LC
2138/* Use ENCODING as the default encoding for future ports. */
2139void
2140scm_i_set_default_port_encoding (const char *encoding)
2141{
2142 if (!scm_port_encoding_init
2143 || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
2144 scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
2145 SCM_EOL);
2146
2147 if (encoding == NULL
2148 || !strcmp (encoding, "ASCII")
2149 || !strcmp (encoding, "ANSI_X3.4-1968")
2150 || !strcmp (encoding, "ISO-8859-1"))
2151 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
2152 else
2153 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
2154 scm_from_locale_string (encoding));
2155}
2156
2157/* Return the name of the default encoding for newly created ports; a
2158 return value of NULL means "ISO-8859-1". */
889975e5 2159const char *
9d9c66ba 2160scm_i_default_port_encoding (void)
889975e5 2161{
9d9c66ba
LC
2162 if (!scm_port_encoding_init)
2163 return NULL;
2164 else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
2165 return NULL;
2166 else
889975e5 2167 {
9d9c66ba
LC
2168 SCM encoding;
2169
2170 encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
2171 if (!scm_is_string (encoding))
889975e5
MG
2172 return NULL;
2173 else
9d9c66ba 2174 return scm_i_string_chars (encoding);
889975e5
MG
2175 }
2176}
2177
889975e5 2178void
064c27c4 2179scm_i_set_port_encoding_x (SCM port, const char *encoding)
889975e5 2180{
889975e5 2181 scm_t_port *pt;
9d9c66ba 2182 iconv_t new_input_cd, new_output_cd;
889975e5 2183
9d9c66ba
LC
2184 new_input_cd = (iconv_t) -1;
2185 new_output_cd = (iconv_t) -1;
f4bc4e59 2186
9d9c66ba
LC
2187 /* Set the character encoding for this port. */
2188 pt = SCM_PTAB_ENTRY (port);
f4bc4e59 2189
9d9c66ba
LC
2190 if (encoding == NULL)
2191 encoding = "ISO-8859-1";
f4bc4e59 2192
7b292a9d
LC
2193 if (pt->encoding != encoding)
2194 pt->encoding = scm_gc_strdup (encoding, "port");
d9544bf0 2195
7b292a9d
LC
2196 /* If ENCODING is UTF-8, then no conversion descriptor is opened
2197 because we do I/O ourselves. This saves 100+ KiB for each
2198 descriptor. */
2199 if (strcmp (encoding, "UTF-8"))
9d9c66ba 2200 {
7b292a9d
LC
2201 if (SCM_CELL_WORD_0 (port) & SCM_RDNG)
2202 {
2203 /* Open an input iconv conversion descriptor, from ENCODING
2204 to UTF-8. We choose UTF-8, not UTF-32, because iconv
2205 implementations can typically convert from anything to
2206 UTF-8, but not to UTF-32 (see
2207 <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
2208 new_input_cd = iconv_open ("UTF-8", encoding);
2209 if (new_input_cd == (iconv_t) -1)
2210 goto invalid_encoding;
2211 }
f4bc4e59 2212
7b292a9d 2213 if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
f4bc4e59 2214 {
7b292a9d
LC
2215 new_output_cd = iconv_open (encoding, "UTF-8");
2216 if (new_output_cd == (iconv_t) -1)
2217 {
2218 if (new_input_cd != (iconv_t) -1)
2219 iconv_close (new_input_cd);
2220 goto invalid_encoding;
2221 }
f4bc4e59 2222 }
9d9c66ba 2223 }
f4bc4e59 2224
9d9c66ba
LC
2225 if (pt->input_cd != (iconv_t) -1)
2226 iconv_close (pt->input_cd);
2227 if (pt->output_cd != (iconv_t) -1)
2228 iconv_close (pt->output_cd);
f4bc4e59 2229
9d9c66ba
LC
2230 pt->input_cd = new_input_cd;
2231 pt->output_cd = new_output_cd;
f4bc4e59
LC
2232
2233 return;
2234
2235 invalid_encoding:
2236 {
2237 SCM err;
064c27c4
LC
2238 err = scm_from_locale_string (encoding);
2239 scm_misc_error ("scm_i_set_port_encoding_x",
2240 "invalid or unknown character encoding ~s",
f4bc4e59
LC
2241 scm_list_1 (err));
2242 }
889975e5
MG
2243}
2244
2245SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
2246 (SCM port),
2247 "Returns, as a string, the character encoding that @var{port}\n"
2248 "uses to interpret its input and output.\n")
2249#define FUNC_NAME s_scm_port_encoding
2250{
2251 scm_t_port *pt;
2252 const char *enc;
2253
2254 SCM_VALIDATE_PORT (1, port);
2255
2256 pt = SCM_PTAB_ENTRY (port);
9d9c66ba 2257 enc = pt->encoding;
889975e5
MG
2258 if (enc)
2259 return scm_from_locale_string (pt->encoding);
2260 else
0af34a3f 2261 return SCM_BOOL_F;
889975e5
MG
2262}
2263#undef FUNC_NAME
d6a6989e 2264
889975e5
MG
2265SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
2266 (SCM port, SCM enc),
2267 "Sets the character encoding that will be used to interpret all\n"
2268 "port I/O. New ports are created with the encoding\n"
2269 "appropriate for the current locale if @code{setlocale} has \n"
2270 "been called or ISO-8859-1 otherwise\n"
2271 "and this procedure can be used to modify that encoding.\n")
889975e5
MG
2272#define FUNC_NAME s_scm_set_port_encoding_x
2273{
2274 char *enc_str;
889975e5
MG
2275
2276 SCM_VALIDATE_PORT (1, port);
2277 SCM_VALIDATE_STRING (2, enc);
2278
2279 enc_str = scm_to_locale_string (enc);
da288f50 2280 scm_i_set_port_encoding_x (port, enc_str);
3e05fc04 2281 free (enc_str);
da288f50 2282
889975e5
MG
2283 return SCM_UNSPECIFIED;
2284}
2285#undef FUNC_NAME
2286
2287
2288/* This determines how conversions handle unconvertible characters. */
2289SCM_GLOBAL_VARIABLE (scm_conversion_strategy, "%port-conversion-strategy");
2290static int scm_conversion_strategy_init = 0;
2291
2292scm_t_string_failed_conversion_handler
2293scm_i_get_conversion_strategy (SCM port)
2294{
2295 SCM encoding;
2296
2297 if (scm_is_false (port))
2298 {
2299 if (!scm_conversion_strategy_init
2300 || !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
2301 return SCM_FAILED_CONVERSION_QUESTION_MARK;
2302 else
2303 {
2304 encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_conversion_strategy));
2305 if (scm_is_false (encoding))
2306 return SCM_FAILED_CONVERSION_QUESTION_MARK;
2307 else
2308 return (scm_t_string_failed_conversion_handler) scm_to_int (encoding);
2309 }
2310 }
2311 else
2312 {
2313 scm_t_port *pt;
2314 pt = SCM_PTAB_ENTRY (port);
e1ee45e7 2315 return pt->ilseq_handler;
889975e5
MG
2316 }
2317
2318}
2319
2320void
2321scm_i_set_conversion_strategy_x (SCM port,
2322 scm_t_string_failed_conversion_handler handler)
2323{
2324 SCM strategy;
2325 scm_t_port *pt;
2326
2327 strategy = scm_from_int ((int) handler);
2328
2329 if (scm_is_false (port))
2330 {
2331 /* Set the default encoding for future ports. */
2e16a342 2332 if (!scm_conversion_strategy_init
889975e5
MG
2333 || !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
2334 scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
2335 SCM_EOL);
2336 scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy), strategy);
2337 }
2338 else
2339 {
2340 /* Set the character encoding for this port. */
2341 pt = SCM_PTAB_ENTRY (port);
2342 pt->ilseq_handler = handler;
2343 }
2344}
2345
2346SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
2347 1, 0, 0, (SCM port),
2348 "Returns the behavior of the port when handling a character that\n"
2349 "is not representable in the port's current encoding.\n"
2350 "It returns the symbol @code{error} if unrepresentable characters\n"
2351 "should cause exceptions, @code{substitute} if the port should\n"
2352 "try to replace unrepresentable characters with question marks or\n"
2353 "approximate characters, or @code{escape} if unrepresentable\n"
2354 "characters should be converted to string escapes.\n"
2355 "\n"
2356 "If @var{port} is @code{#f}, then the current default behavior\n"
2357 "will be returned. New ports will have this default behavior\n"
2358 "when they are created.\n")
2359#define FUNC_NAME s_scm_port_conversion_strategy
2360{
2361 scm_t_string_failed_conversion_handler h;
2362
2363 SCM_VALIDATE_OPPORT (1, port);
2364
2365 if (!scm_is_false (port))
2366 {
2367 SCM_VALIDATE_OPPORT (1, port);
2368 }
2369
2370 h = scm_i_get_conversion_strategy (port);
2371 if (h == SCM_FAILED_CONVERSION_ERROR)
4a655e50 2372 return scm_from_latin1_symbol ("error");
889975e5 2373 else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
4a655e50 2374 return scm_from_latin1_symbol ("substitute");
889975e5 2375 else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
4a655e50 2376 return scm_from_latin1_symbol ("escape");
889975e5
MG
2377 else
2378 abort ();
2379
2380 /* Never gets here. */
2381 return SCM_UNDEFINED;
2382}
2383#undef FUNC_NAME
2384
2385SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
2386 2, 0, 0,
2387 (SCM port, SCM sym),
2388 "Sets the behavior of the interpreter when outputting a character\n"
2389 "that is not representable in the port's current encoding.\n"
2390 "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
2391 "@code{'escape}. If it is @code{'error}, an error will be thrown\n"
2392 "when an unconvertible character is encountered. If it is\n"
2393 "@code{'substitute}, then unconvertible characters will \n"
2394 "be replaced with approximate characters, or with question marks\n"
2395 "if no approximately correct character is available.\n"
2396 "If it is @code{'escape},\n"
2397 "it will appear as a hex escape when output.\n"
2398 "\n"
2399 "If @var{port} is an open port, the conversion error behavior\n"
2400 "is set for that port. If it is @code{#f}, it is set as the\n"
2401 "default behavior for any future ports that get created in\n"
2402 "this thread.\n")
2403#define FUNC_NAME s_scm_set_port_conversion_strategy_x
2404{
2405 SCM err;
2406 SCM qm;
2407 SCM esc;
2408
2409 if (!scm_is_false (port))
2410 {
2411 SCM_VALIDATE_OPPORT (1, port);
2412 }
2413
4a655e50 2414 err = scm_from_latin1_symbol ("error");
889975e5
MG
2415 if (scm_is_true (scm_eqv_p (sym, err)))
2416 {
2417 scm_i_set_conversion_strategy_x (port, SCM_FAILED_CONVERSION_ERROR);
2418 return SCM_UNSPECIFIED;
2419 }
2420
4a655e50 2421 qm = scm_from_latin1_symbol ("substitute");
889975e5
MG
2422 if (scm_is_true (scm_eqv_p (sym, qm)))
2423 {
2424 scm_i_set_conversion_strategy_x (port,
2425 SCM_FAILED_CONVERSION_QUESTION_MARK);
2426 return SCM_UNSPECIFIED;
2427 }
2428
4a655e50 2429 esc = scm_from_latin1_symbol ("escape");
889975e5
MG
2430 if (scm_is_true (scm_eqv_p (sym, esc)))
2431 {
2432 scm_i_set_conversion_strategy_x (port,
2433 SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
2434 return SCM_UNSPECIFIED;
2435 }
2436
2437 SCM_MISC_ERROR ("unknown conversion behavior ~s", scm_list_1 (sym));
2438
2439 return SCM_UNSPECIFIED;
2440}
2441#undef FUNC_NAME
2442
2443
2444
f12733c9
MD
2445void
2446scm_print_port_mode (SCM exp, SCM port)
2447{
2448 scm_puts (SCM_CLOSEDP (exp)
2449 ? "closed: "
f9a64404
DH
2450 : (SCM_RDNG & SCM_CELL_WORD_0 (exp)
2451 ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
f12733c9
MD
2452 ? "input-output: "
2453 : "input: ")
f9a64404 2454 : (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
f12733c9
MD
2455 ? "output: "
2456 : "bogus: ")),
2457 port);
2458}
1cc91f1b 2459
f12733c9 2460int
e81d98ec 2461scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED)
0f2d19dd 2462{
f12733c9
MD
2463 char *type = SCM_PTOBNAME (SCM_PTOBNUM (exp));
2464 if (!type)
2465 type = "port";
b7f3516f 2466 scm_puts ("#<", port);
f12733c9 2467 scm_print_port_mode (exp, port);
b7f3516f
TT
2468 scm_puts (type, port);
2469 scm_putc (' ', port);
0345e278 2470 scm_uintprint (SCM_CELL_WORD_1 (exp), 16, port);
b7f3516f 2471 scm_putc ('>', port);
f12733c9 2472 return 1;
0f2d19dd
JB
2473}
2474
0f2d19dd 2475\f
ee149d03 2476
d68fee48 2477/* Void ports. */
0f2d19dd 2478
92c2555f 2479scm_t_bits scm_tc16_void_port = 0;
0f2d19dd 2480
e81d98ec 2481static int fill_input_void_port (SCM port SCM_UNUSED)
283a1a0e 2482{
70df8af6 2483 return EOF;
283a1a0e
GH
2484}
2485
31703ab8 2486static void
e81d98ec
DH
2487write_void_port (SCM port SCM_UNUSED,
2488 const void *data SCM_UNUSED,
2489 size_t size SCM_UNUSED)
31703ab8
GH
2490{
2491}
2492
d617ee18
MV
2493static SCM
2494scm_i_void_port (long mode_bits)
0f2d19dd 2495{
2721f918
AW
2496 SCM ret;
2497
2498 ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0);
da220f27 2499
2721f918 2500 scm_port_non_buffer (SCM_PTAB_ENTRY (ret));
402788a9 2501
2721f918 2502 return ret;
0f2d19dd
JB
2503}
2504
d617ee18
MV
2505SCM
2506scm_void_port (char *mode_str)
2507{
2508 return scm_i_void_port (scm_mode_bits (mode_str));
2509}
2510
a1ec6916 2511SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0,
1bbd0b84 2512 (SCM mode),
70df8af6 2513 "Create and return a new void port. A void port acts like\n"
bb2c02f2 2514 "@file{/dev/null}. The @var{mode} argument\n"
70df8af6 2515 "specifies the input/output modes for this port: see the\n"
b380b885 2516 "documentation for @code{open-file} in @ref{File Ports}.")
1bbd0b84 2517#define FUNC_NAME s_scm_sys_make_void_port
0f2d19dd 2518{
d617ee18 2519 return scm_i_void_port (scm_i_mode_bits (mode));
0f2d19dd 2520}
1bbd0b84 2521#undef FUNC_NAME
0f2d19dd 2522
0f2d19dd 2523\f
89545eba 2524/* Initialization. */
1cc91f1b 2525
0f2d19dd
JB
2526void
2527scm_init_ports ()
0f2d19dd 2528{
840ae05d 2529 /* lseek() symbols. */
e11e83f3
MV
2530 scm_c_define ("SEEK_SET", scm_from_int (SEEK_SET));
2531 scm_c_define ("SEEK_CUR", scm_from_int (SEEK_CUR));
2532 scm_c_define ("SEEK_END", scm_from_int (SEEK_END));
840ae05d 2533
70df8af6
GH
2534 scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
2535 write_void_port);
9de87eea 2536
f39448c5
AW
2537 cur_inport_fluid = scm_make_fluid ();
2538 cur_outport_fluid = scm_make_fluid ();
2539 cur_errport_fluid = scm_make_fluid ();
2540 cur_loadport_fluid = scm_make_fluid ();
9de87eea 2541
2721f918 2542 scm_i_port_weak_set = scm_c_make_weak_set (31);
d6a6989e 2543
a0599745 2544#include "libguile/ports.x"
889975e5 2545
d6a6989e
LC
2546 /* Use Latin-1 as the default port encoding. */
2547 SCM_VARIABLE_SET (default_port_encoding_var, scm_make_fluid ());
2548 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
889975e5 2549 scm_port_encoding_init = 1;
d6a6989e 2550
889975e5
MG
2551 SCM_VARIABLE_SET (scm_conversion_strategy, scm_make_fluid ());
2552 scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy),
2553 scm_from_int ((int) SCM_FAILED_CONVERSION_QUESTION_MARK));
2554 scm_conversion_strategy_init = 1;
2555
0f2d19dd 2556}
89e00824
ML
2557
2558/*
2559 Local Variables:
2560 c-file-style: "gnu"
2561 End:
2562*/