Merge remote-tracking branch 'origin/stable-2.0'
[bpt/guile.git] / libguile / ports.c
CommitLineData
f4bc4e59 1/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
93c4fa21 2 * 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
f4bc4e59 3 *
73be1d9e 4 * This library is free software; you can redistribute it and/or
53befeb7
NJ
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
0f2d19dd 8 *
53befeb7
NJ
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
0f2d19dd 13 *
73be1d9e
MV
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
53befeb7
NJ
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
73be1d9e 18 */
1bbd0b84 19
1bbd0b84 20
0f2d19dd 21\f
d68fee48
JB
22/* Headers. */
23
2b829bbb
KR
24#define _LARGEFILE64_SOURCE /* ask for stat64 etc */
25
dbb605f5 26#ifdef HAVE_CONFIG_H
bd515f37
RB
27# include <config.h>
28#endif
29
0f2d19dd 30#include <stdio.h>
e6e2e95a 31#include <errno.h>
8ab3d8a0 32#include <fcntl.h> /* for chsize on mingw */
b5cb4464 33#include <assert.h>
f4bc4e59 34#include <iconv.h>
889975e5
MG
35#include <uniconv.h>
36#include <unistr.h>
37#include <striconveh.h>
e6e2e95a 38
fca43887
LC
39#include <assert.h>
40
a0599745 41#include "libguile/_scm.h"
4e047c3e 42#include "libguile/async.h"
8269ba5b 43#include "libguile/deprecation.h"
f0942910 44#include "libguile/eval.h"
8ab3d8a0 45#include "libguile/fports.h" /* direct access for seek and truncate */
9511876f 46#include "libguile/goops.h"
a0599745
MD
47#include "libguile/smob.h"
48#include "libguile/chars.h"
185e369a 49#include "libguile/dynwind.h"
0f2d19dd 50
a0599745 51#include "libguile/keywords.h"
5dbc6c06 52#include "libguile/hashtab.h"
a0599745
MD
53#include "libguile/root.h"
54#include "libguile/strings.h"
b42170a4 55#include "libguile/mallocs.h"
a0599745
MD
56#include "libguile/validate.h"
57#include "libguile/ports.h"
e4598559 58#include "libguile/ports-internal.h"
3a5fb14d 59#include "libguile/vectors.h"
2721f918 60#include "libguile/weak-set.h"
9de87eea 61#include "libguile/fluids.h"
889975e5 62#include "libguile/eq.h"
a38024ba 63#include "libguile/alist.h"
0f2d19dd 64
bd9e24b3
GH
65#ifdef HAVE_STRING_H
66#include <string.h>
67#endif
68
ec65f5da
MV
69#ifdef HAVE_IO_H
70#include <io.h>
71#endif
72
0f2d19dd
JB
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76
95b88819
GH
77#ifdef HAVE_SYS_IOCTL_H
78#include <sys/ioctl.h>
79#endif
d68fee48 80
8ab3d8a0
KR
81/* Mingw (version 3.4.5, circa 2006) has ftruncate as an alias for chsize
82 already, but have this code here in case that wasn't so in past versions,
83 or perhaps to help other minimal DOS environments.
84
85 gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
86 might be possibilities if we've got other systems without ftruncate. */
87
56a3dcd4 88#if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
82893676 89#define ftruncate(fd, size) chsize (fd, size)
8ab3d8a0
KR
90#undef HAVE_FTRUNCATE
91#define HAVE_FTRUNCATE 1
82893676
MG
92#endif
93
0f2d19dd 94\f
93c4fa21
AW
95/* Port encodings are case-insensitive ASCII strings. */
96static char
97ascii_toupper (char c)
98{
99 return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
100}
101
102/* It is only necessary to use this function on encodings that come from
103 the user and have not been canonicalized yet. Encodings that are set
104 on ports or in the default encoding fluid are in upper-case, and can
105 be compared with strcmp. */
106static int
107encoding_matches (const char *enc, const char *upper)
108{
109 if (!enc)
110 enc = "ISO-8859-1";
111
112 while (*enc)
113 if (ascii_toupper (*enc++) != *upper++)
114 return 0;
115
116 return !*upper;
117}
118
119static char*
120canonicalize_encoding (const char *enc)
121{
122 char *ret;
123 int i;
124
125 if (!enc)
126 return "ISO-8859-1";
127
128 ret = scm_gc_strdup (enc, "port");
129
130 for (i = 0; ret[i]; i++)
131 {
132 if (ret[i] > 127)
133 /* Restrict to ASCII. */
134 scm_misc_error (NULL, "invalid character encoding ~s",
135 scm_list_1 (scm_from_latin1_string (enc)));
136 else
137 ret[i] = ascii_toupper (ret[i]);
138 }
139
140 return ret;
141}
142
143
144\f
d68fee48 145/* The port kind table --- a dynamically resized array of port types. */
0f2d19dd
JB
146
147
148/* scm_ptobs scm_numptob
5dbc6c06 149 * implement a dynamically resized array of ptob records.
0f2d19dd
JB
150 * Indexes into this table are used when generating type
151 * tags for smobjects (if you know a tag you can get an index and conversely).
152 */
62bd5d66
AW
153static scm_t_ptob_descriptor **scm_ptobs = NULL;
154static long scm_numptob = 0; /* Number of port types. */
155static long scm_ptobs_size = 0; /* Number of slots in the port type
156 table. */
157static scm_i_pthread_mutex_t scm_ptobs_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER;
158
159long
160scm_c_num_port_types (void)
161{
162 long ret;
163
164 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
165 ret = scm_numptob;
166 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
167
168 return ret;
169}
170
171scm_t_ptob_descriptor*
172scm_c_port_type_ref (long ptobnum)
173{
174 scm_t_ptob_descriptor *ret = NULL;
175
176 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
177
178 if (0 <= ptobnum && ptobnum < scm_numptob)
179 ret = scm_ptobs[ptobnum];
180
181 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
182
183 if (!ret)
184 scm_out_of_range ("scm_c_port_type_ref", scm_from_long (ptobnum));
185
186 return ret;
187}
188
189long
190scm_c_port_type_add_x (scm_t_ptob_descriptor *desc)
191{
192 long ret = -1;
193
194 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
195
196 if (scm_numptob + 1 < SCM_I_MAX_PORT_TYPE_COUNT)
197 {
198 if (scm_numptob == scm_ptobs_size)
199 {
200 unsigned long old_size = scm_ptobs_size;
201 scm_t_ptob_descriptor **old_ptobs = scm_ptobs;
202
203 /* Currently there are only 9 predefined port types, so one
204 resize will cover it. */
205 scm_ptobs_size = old_size + 10;
206
207 if (scm_ptobs_size >= SCM_I_MAX_PORT_TYPE_COUNT)
208 scm_ptobs_size = SCM_I_MAX_PORT_TYPE_COUNT;
209
210 scm_ptobs = scm_gc_malloc (sizeof (*scm_ptobs) * scm_ptobs_size,
211 "scm_ptobs");
212
213 memcpy (scm_ptobs, old_ptobs, sizeof (*scm_ptobs) * scm_numptob);
214 }
215
216 ret = scm_numptob++;
217 scm_ptobs[ret] = desc;
218 }
219
220 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
221
222 if (ret < 0)
223 scm_out_of_range ("scm_c_port_type_add_x", scm_from_long (scm_numptob));
224
225 return ret;
226}
0f2d19dd 227
f12733c9 228/*
f12733c9 229 * We choose to use an interface similar to the smob interface with
affc96b5 230 * fill_input and write as standard fields, passed to the port
f12733c9
MD
231 * type constructor, and optional fields set by setters.
232 */
233
70df8af6 234static void
e81d98ec 235flush_port_default (SCM port SCM_UNUSED)
70df8af6
GH
236{
237}
238
239static void
e81d98ec 240end_input_default (SCM port SCM_UNUSED, int offset SCM_UNUSED)
70df8af6
GH
241{
242}
0f2d19dd 243
92c2555f 244scm_t_bits
f12733c9 245scm_make_port_type (char *name,
affc96b5 246 int (*fill_input) (SCM port),
8aa011a1 247 void (*write) (SCM port, const void *data, size_t size))
0f2d19dd 248{
62bd5d66
AW
249 scm_t_ptob_descriptor *desc;
250 long ptobnum;
affc96b5 251
62bd5d66
AW
252 desc = scm_gc_malloc_pointerless (sizeof (*desc), "port-type");
253 memset (desc, 0, sizeof (*desc));
affc96b5 254
62bd5d66
AW
255 desc->name = name;
256 desc->print = scm_port_print;
257 desc->write = write;
258 desc->flush = flush_port_default;
259 desc->end_input = end_input_default;
260 desc->fill_input = fill_input;
affc96b5 261
62bd5d66 262 ptobnum = scm_c_port_type_add_x (desc);
affc96b5 263
62bd5d66 264 /* Make a class object if GOOPS is present. */
63385df2 265 if (SCM_UNPACK (scm_port_class[0]) != 0)
62bd5d66
AW
266 scm_make_port_classes (ptobnum, name);
267
268 return scm_tc7_port + ptobnum * 256;
0f2d19dd
JB
269}
270
f12733c9 271void
23f2b9a3 272scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM))
f12733c9 273{
62bd5d66 274 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->mark = mark;
f12733c9
MD
275}
276
277void
23f2b9a3 278scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM))
f12733c9 279{
62bd5d66 280 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->free = free;
f12733c9
MD
281}
282
283void
23f2b9a3 284scm_set_port_print (scm_t_bits tc, int (*print) (SCM exp, SCM port,
19b8d12b 285 scm_print_state *pstate))
f12733c9 286{
62bd5d66 287 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->print = print;
f12733c9
MD
288}
289
290void
23f2b9a3 291scm_set_port_equalp (scm_t_bits tc, SCM (*equalp) (SCM, SCM))
f12733c9 292{
62bd5d66 293 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->equalp = equalp;
f12733c9
MD
294}
295
31703ab8 296void
19b8d12b 297scm_set_port_close (scm_t_bits tc, int (*close) (SCM))
31703ab8 298{
19b8d12b 299 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->close = close;
31703ab8
GH
300}
301
f12733c9 302void
19b8d12b 303scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port))
f12733c9 304{
03a2eeb0
AW
305 scm_t_ptob_descriptor *ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tc));
306 ptob->flush = flush;
307 ptob->flags |= SCM_PORT_TYPE_HAS_FLUSH;
f12733c9
MD
308}
309
310void
19b8d12b 311scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset))
f12733c9 312{
19b8d12b 313 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->end_input = end_input;
f12733c9
MD
314}
315
316void
19b8d12b 317scm_set_port_seek (scm_t_bits tc, scm_t_off (*seek) (SCM, scm_t_off, int))
f12733c9 318{
62bd5d66 319 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->seek = seek;
f12733c9
MD
320}
321
322void
f1ce9199 323scm_set_port_truncate (scm_t_bits tc, void (*truncate) (SCM, scm_t_off))
f12733c9 324{
62bd5d66 325 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->truncate = truncate;
f12733c9
MD
326}
327
328void
23f2b9a3 329scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM))
f12733c9 330{
62bd5d66 331 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting;
f12733c9
MD
332}
333
45c0878b
MW
334static void
335scm_i_set_pending_eof (SCM port)
336{
337 SCM_PORT_GET_INTERNAL (port)->pending_eof = 1;
338}
339
340static void
341scm_i_clear_pending_eof (SCM port)
342{
343 SCM_PORT_GET_INTERNAL (port)->pending_eof = 0;
344}
345
a38024ba
MW
346SCM_DEFINE (scm_i_port_property, "%port-property", 2, 0, 0,
347 (SCM port, SCM key),
348 "Return the property of @var{port} associated with @var{key}.")
349#define FUNC_NAME s_scm_i_port_property
05d7f762 350{
a38024ba
MW
351 SCM_VALIDATE_OPPORT (1, port);
352 return scm_assq_ref (SCM_PORT_GET_INTERNAL (port)->alist, key);
05d7f762 353}
a38024ba 354#undef FUNC_NAME
05d7f762 355
a38024ba
MW
356SCM_DEFINE (scm_i_set_port_property_x, "%set-port-property!", 3, 0, 0,
357 (SCM port, SCM key, SCM value),
358 "Set the property of @var{port} associated with @var{key} to @var{value}.")
359#define FUNC_NAME s_scm_i_set_port_property_x
05d7f762 360{
a38024ba
MW
361 scm_t_port_internal *pti;
362
363 SCM_VALIDATE_OPPORT (1, port);
364 pti = SCM_PORT_GET_INTERNAL (port);
365 pti->alist = scm_assq_set_x (pti->alist, key, value);
366 return SCM_UNSPECIFIED;
05d7f762 367}
a38024ba 368#undef FUNC_NAME
05d7f762 369
0f2d19dd 370\f
0f2d19dd 371
d68fee48 372/* Standard ports --- current input, output, error, and more(!). */
0f2d19dd 373
34297700
AW
374static SCM cur_inport_fluid = SCM_BOOL_F;
375static SCM cur_outport_fluid = SCM_BOOL_F;
376static SCM cur_errport_fluid = SCM_BOOL_F;
377static SCM cur_loadport_fluid = SCM_BOOL_F;
9de87eea 378
3b3b36dd 379SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
e1546b65
MG
380 (),
381 "Return the current input port. This is the default port used\n"
382 "by many input procedures. Initially, @code{current-input-port}\n"
383 "returns the @dfn{standard input} in Unix and C terminology.")
1bbd0b84 384#define FUNC_NAME s_scm_current_input_port
0f2d19dd 385{
34297700 386 if (scm_is_true (cur_inport_fluid))
889975e5
MG
387 return scm_fluid_ref (cur_inport_fluid);
388 else
389 return SCM_BOOL_F;
0f2d19dd 390}
1bbd0b84 391#undef FUNC_NAME
0f2d19dd 392
3b3b36dd 393SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
e1546b65
MG
394 (),
395 "Return the current output port. This is the default port used\n"
9401323e 396 "by many output procedures. Initially,\n"
e1546b65
MG
397 "@code{current-output-port} returns the @dfn{standard output} in\n"
398 "Unix and C terminology.")
1bbd0b84 399#define FUNC_NAME s_scm_current_output_port
0f2d19dd 400{
34297700 401 if (scm_is_true (cur_outport_fluid))
889975e5
MG
402 return scm_fluid_ref (cur_outport_fluid);
403 else
404 return SCM_BOOL_F;
0f2d19dd 405}
1bbd0b84 406#undef FUNC_NAME
0f2d19dd 407
3b3b36dd 408SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
1bbd0b84 409 (),
b380b885
MD
410 "Return the port to which errors and warnings should be sent (the\n"
411 "@dfn{standard error} in Unix and C terminology).")
1bbd0b84 412#define FUNC_NAME s_scm_current_error_port
0f2d19dd 413{
34297700 414 if (scm_is_true (cur_errport_fluid))
889975e5
MG
415 return scm_fluid_ref (cur_errport_fluid);
416 else
417 return SCM_BOOL_F;
0f2d19dd 418}
1bbd0b84 419#undef FUNC_NAME
0f2d19dd 420
3972de76
AW
421SCM
422scm_current_warning_port (void)
423{
f57ea23a
MW
424 static SCM cwp_var = SCM_UNDEFINED;
425 static scm_i_pthread_mutex_t cwp_var_mutex
426 = SCM_I_PTHREAD_MUTEX_INITIALIZER;
427
428 scm_i_scm_pthread_mutex_lock (&cwp_var_mutex);
429 if (SCM_UNBNDP (cwp_var))
430 cwp_var = scm_c_private_variable ("guile", "current-warning-port");
431 scm_i_pthread_mutex_unlock (&cwp_var_mutex);
3972de76
AW
432
433 return scm_call_0 (scm_variable_ref (cwp_var));
434}
435
3b3b36dd 436SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
e1546b65 437 (),
b450f070 438 "Return the current-load-port.\n"
e1546b65 439 "The load port is used internally by @code{primitive-load}.")
1bbd0b84 440#define FUNC_NAME s_scm_current_load_port
31614d8e 441{
9de87eea 442 return scm_fluid_ref (cur_loadport_fluid);
31614d8e 443}
1bbd0b84 444#undef FUNC_NAME
31614d8e 445
3b3b36dd 446SCM_DEFINE (scm_set_current_input_port, "set-current-input-port", 1, 0, 0,
1bbd0b84 447 (SCM port),
8f85c0c6
NJ
448 "@deffnx {Scheme Procedure} set-current-output-port port\n"
449 "@deffnx {Scheme Procedure} set-current-error-port port\n"
b380b885
MD
450 "Change the ports returned by @code{current-input-port},\n"
451 "@code{current-output-port} and @code{current-error-port}, respectively,\n"
452 "so that they use the supplied @var{port} for input or output.")
1bbd0b84 453#define FUNC_NAME s_scm_set_current_input_port
0f2d19dd 454{
9de87eea 455 SCM oinp = scm_fluid_ref (cur_inport_fluid);
34d19ef6 456 SCM_VALIDATE_OPINPORT (1, port);
9de87eea 457 scm_fluid_set_x (cur_inport_fluid, port);
0f2d19dd
JB
458 return oinp;
459}
1bbd0b84 460#undef FUNC_NAME
0f2d19dd
JB
461
462
3b3b36dd 463SCM_DEFINE (scm_set_current_output_port, "set-current-output-port", 1, 0, 0,
e1546b65
MG
464 (SCM port),
465 "Set the current default output port to @var{port}.")
1bbd0b84 466#define FUNC_NAME s_scm_set_current_output_port
0f2d19dd 467{
9de87eea 468 SCM ooutp = scm_fluid_ref (cur_outport_fluid);
78446828 469 port = SCM_COERCE_OUTPORT (port);
34d19ef6 470 SCM_VALIDATE_OPOUTPORT (1, port);
9de87eea 471 scm_fluid_set_x (cur_outport_fluid, port);
0f2d19dd
JB
472 return ooutp;
473}
1bbd0b84 474#undef FUNC_NAME
0f2d19dd
JB
475
476
3b3b36dd 477SCM_DEFINE (scm_set_current_error_port, "set-current-error-port", 1, 0, 0,
e1546b65
MG
478 (SCM port),
479 "Set the current default error port to @var{port}.")
1bbd0b84 480#define FUNC_NAME s_scm_set_current_error_port
0f2d19dd 481{
9de87eea 482 SCM oerrp = scm_fluid_ref (cur_errport_fluid);
78446828 483 port = SCM_COERCE_OUTPORT (port);
34d19ef6 484 SCM_VALIDATE_OPOUTPORT (1, port);
9de87eea 485 scm_fluid_set_x (cur_errport_fluid, port);
0f2d19dd
JB
486 return oerrp;
487}
1bbd0b84 488#undef FUNC_NAME
0f2d19dd 489
3972de76
AW
490
491SCM
492scm_set_current_warning_port (SCM port)
493{
494 static SCM cwp_var = SCM_BOOL_F;
495
496 if (scm_is_false (cwp_var))
497 cwp_var = scm_c_private_lookup ("guile", "current-warning-port");
498
499 return scm_call_1 (scm_variable_ref (cwp_var), port);
500}
501
502
185e369a 503void
661ae7ab 504scm_dynwind_current_input_port (SCM port)
9de87eea 505#define FUNC_NAME NULL
185e369a 506{
9de87eea 507 SCM_VALIDATE_OPINPORT (1, port);
661ae7ab 508 scm_dynwind_fluid (cur_inport_fluid, port);
185e369a 509}
9de87eea 510#undef FUNC_NAME
185e369a
MV
511
512void
661ae7ab 513scm_dynwind_current_output_port (SCM port)
9de87eea 514#define FUNC_NAME NULL
185e369a 515{
9de87eea
MV
516 port = SCM_COERCE_OUTPORT (port);
517 SCM_VALIDATE_OPOUTPORT (1, port);
661ae7ab 518 scm_dynwind_fluid (cur_outport_fluid, port);
185e369a 519}
9de87eea 520#undef FUNC_NAME
185e369a
MV
521
522void
661ae7ab 523scm_dynwind_current_error_port (SCM port)
9de87eea
MV
524#define FUNC_NAME NULL
525{
526 port = SCM_COERCE_OUTPORT (port);
527 SCM_VALIDATE_OPOUTPORT (1, port);
661ae7ab 528 scm_dynwind_fluid (cur_errport_fluid, port);
9de87eea
MV
529}
530#undef FUNC_NAME
531
532void
661ae7ab 533scm_i_dynwind_current_load_port (SCM port)
185e369a 534{
661ae7ab 535 scm_dynwind_fluid (cur_loadport_fluid, port);
185e369a
MV
536}
537
19b8d12b 538
0f2d19dd 539\f
0f2d19dd 540
19b8d12b
AW
541/* Retrieving a port's mode. */
542
543/* Return the flags that characterize a port based on the mode
544 * string used to open a file for that port.
545 *
546 * See PORT FLAGS in scm.h
5dbc6c06 547 */
19b8d12b
AW
548
549static long
550scm_i_mode_bits_n (SCM modes)
551{
552 return (SCM_OPN
553 | (scm_i_string_contains_char (modes, 'r')
554 || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
555 | (scm_i_string_contains_char (modes, 'w')
556 || scm_i_string_contains_char (modes, 'a')
557 || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
558 | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
559 | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
560}
561
562long
563scm_mode_bits (char *modes)
564{
8ebd06c6
AW
565 /* Valid characters are rw+a0l. So, use latin1. */
566 return scm_i_mode_bits (scm_from_latin1_string (modes));
19b8d12b
AW
567}
568
569long
570scm_i_mode_bits (SCM modes)
571{
572 long bits;
573
574 if (!scm_is_string (modes))
575 scm_wrong_type_arg_msg (NULL, 0, modes, "string");
576
577 bits = scm_i_mode_bits_n (modes);
578 scm_remember_upto_here_1 (modes);
579 return bits;
580}
581
582/* Return the mode flags from an open port.
583 * Some modes such as "append" are only used when opening
584 * a file and are not returned here. */
585
586SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
587 (SCM port),
588 "Return the port modes associated with the open port @var{port}.\n"
589 "These will not necessarily be identical to the modes used when\n"
590 "the port was opened, since modes such as \"append\" which are\n"
591 "used only during port creation are not retained.")
592#define FUNC_NAME s_scm_port_mode
593{
594 char modes[4];
595 modes[0] = '\0';
596
597 port = SCM_COERCE_OUTPORT (port);
598 SCM_VALIDATE_OPPORT (1, port);
599 if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
600 if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
601 strcpy (modes, "r+");
602 else
603 strcpy (modes, "r");
604 }
605 else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
606 strcpy (modes, "w");
607 if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
608 strcat (modes, "0");
609
610 return scm_from_latin1_string (modes);
611}
612#undef FUNC_NAME
613
614
615\f
616
617/* The port table --- a weak set of all ports.
618
619 We need a global registry of ports to flush them all at exit, and to
620 get all the ports matching a file descriptor. */
2721f918 621SCM scm_i_port_weak_set;
b9ad392e 622
19b8d12b 623
651a0735 624\f
651a0735 625
19b8d12b 626/* Port finalization. */
1cc91f1b 627
5a771d5f
AW
628struct do_free_data
629{
630 scm_t_ptob_descriptor *ptob;
631 SCM port;
632};
633
634static SCM
635do_free (void *body_data)
636{
637 struct do_free_data *data = body_data;
638
639 /* `close' is for explicit `close-port' by user. `free' is for this
640 purpose: ports collected by the GC. */
641 data->ptob->free (data->port);
642
643 return SCM_BOOL_T;
644}
645
651a0735
LC
646/* Finalize the object (a port) pointed to by PTR. */
647static void
6922d92f 648finalize_port (void *ptr, void *data)
651a0735 649{
21041372 650 SCM port = SCM_PACK_POINTER (ptr);
651a0735
LC
651
652 if (!SCM_PORTP (port))
653 abort ();
654
655 if (SCM_OPENP (port))
656 {
3753e227 657 struct do_free_data data;
651a0735 658
3753e227 659 SCM_CLR_PORT_OPEN_FLAG (port);
651a0735 660
3753e227
AW
661 data.ptob = SCM_PORT_DESCRIPTOR (port);
662 data.port = port;
5a771d5f 663
3753e227
AW
664 scm_internal_catch (SCM_BOOL_T, do_free, &data,
665 scm_handle_by_message_noexit, NULL);
5a771d5f 666
3753e227 667 scm_gc_ports_collected++;
651a0735
LC
668 }
669}
670
671
651a0735
LC
672\f
673
da220f27 674SCM
2721f918
AW
675scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
676 const char *encoding,
677 scm_t_string_failed_conversion_handler handler,
678 scm_t_bits stream)
0f2d19dd 679{
2721f918
AW
680 SCM ret;
681 scm_t_port *entry;
f6f4feb0 682 scm_t_port_internal *pti;
62bd5d66 683 scm_t_ptob_descriptor *ptob;
2721f918 684
f6f4feb0
MW
685 entry = scm_gc_typed_calloc (scm_t_port);
686 pti = scm_gc_typed_calloc (scm_t_port_internal);
62bd5d66
AW
687 ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag));
688
689 ret = scm_words (tag | mode_bits, 3);
690 SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry);
691 SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob);
5f16b897 692
92c0ebac
AW
693 entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock");
694 scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive);
30b126d2 695
f6f4feb0 696 entry->internal = pti;
840ae05d 697 entry->file_name = SCM_BOOL_F;
61e452ba 698 entry->rw_active = SCM_PORT_NEITHER;
2721f918
AW
699 entry->port = ret;
700 entry->stream = stream;
93c4fa21
AW
701
702 if (encoding_matches (encoding, "UTF-8"))
703 {
f6f4feb0 704 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
93c4fa21
AW
705 entry->encoding = "UTF-8";
706 }
707 else if (encoding_matches (encoding, "ISO-8859-1"))
708 {
f6f4feb0 709 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
93c4fa21
AW
710 entry->encoding = "ISO-8859-1";
711 }
6c98257f 712 else
93c4fa21 713 {
f6f4feb0 714 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
93c4fa21
AW
715 entry->encoding = canonicalize_encoding (encoding);
716 }
717
2721f918 718 entry->ilseq_handler = handler;
f6f4feb0 719 pti->iconv_descriptors = NULL;
0f2d19dd 720
f6f4feb0
MW
721 pti->at_stream_start_for_bom_read = 1;
722 pti->at_stream_start_for_bom_write = 1;
f4bc4e59 723
f6f4feb0
MW
724 pti->pending_eof = 0;
725 pti->alist = SCM_EOL;
3e05fc04 726
6978c673
AW
727 if (SCM_PORT_DESCRIPTOR (ret)->free)
728 scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL);
729
03a2eeb0
AW
730 if (SCM_PORT_DESCRIPTOR (ret)->flags & SCM_PORT_TYPE_HAS_FLUSH)
731 scm_weak_set_add_x (scm_i_port_weak_set, ret);
651a0735 732
2721f918
AW
733 return ret;
734}
735
736SCM
737scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
738{
739 return scm_c_make_port_with_encoding (tag, mode_bits,
740 scm_i_default_port_encoding (),
0dd7c540 741 scm_i_default_port_conversion_handler (),
2721f918
AW
742 stream);
743}
744
745SCM
746scm_new_port_table_entry (scm_t_bits tag)
747{
748 return scm_c_make_port (tag, 0, 0);
0f2d19dd
JB
749}
750
d68fee48 751\f
d68fee48 752
19b8d12b 753/* Predicates. */
1cc91f1b 754
19b8d12b
AW
755SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
756 (SCM x),
757 "Return a boolean indicating whether @var{x} is a port.\n"
758 "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
759 "@var{x}))}.")
760#define FUNC_NAME s_scm_port_p
0f2d19dd 761{
19b8d12b 762 return scm_from_bool (SCM_PORTP (x));
0f2d19dd 763}
19b8d12b 764#undef FUNC_NAME
0f2d19dd 765
19b8d12b
AW
766SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
767 (SCM x),
768 "Return @code{#t} if @var{x} is an input port, otherwise return\n"
769 "@code{#f}. Any object satisfying this predicate also satisfies\n"
770 "@code{port?}.")
771#define FUNC_NAME s_scm_input_port_p
0f2d19dd 772{
19b8d12b 773 return scm_from_bool (SCM_INPUT_PORT_P (x));
0f2d19dd 774}
1bbd0b84 775#undef FUNC_NAME
0f2d19dd 776
19b8d12b
AW
777SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
778 (SCM x),
779 "Return @code{#t} if @var{x} is an output port, otherwise return\n"
780 "@code{#f}. Any object satisfying this predicate also satisfies\n"
781 "@code{port?}.")
782#define FUNC_NAME s_scm_output_port_p
0f2d19dd 783{
19b8d12b
AW
784 x = SCM_COERCE_OUTPORT (x);
785 return scm_from_bool (SCM_OUTPUT_PORT_P (x));
0f2d19dd 786}
1bbd0b84 787#undef FUNC_NAME
0f2d19dd 788
19b8d12b
AW
789SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
790 (SCM port),
791 "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
792 "open.")
793#define FUNC_NAME s_scm_port_closed_p
d617ee18 794{
19b8d12b
AW
795 SCM_VALIDATE_PORT (1, port);
796 return scm_from_bool (!SCM_OPPORTP (port));
d617ee18 797}
19b8d12b 798#undef FUNC_NAME
eadd48de 799
19b8d12b
AW
800SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
801 (SCM x),
802 "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
803 "return @code{#f}.")
804#define FUNC_NAME s_scm_eof_object_p
eadd48de 805{
19b8d12b 806 return scm_from_bool (SCM_EOF_OBJECT_P (x));
eadd48de 807}
1bbd0b84 808#undef FUNC_NAME
eadd48de
GH
809
810
d68fee48 811\f
19b8d12b 812
d68fee48
JB
813/* Closing ports. */
814
03a2eeb0
AW
815static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
816
0f2d19dd
JB
817/* scm_close_port
818 * Call the close operation on a port object.
eadd48de 819 * see also scm_close.
0f2d19dd 820 */
3b3b36dd 821SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
1bbd0b84 822 (SCM port),
1e6808ea
MG
823 "Close the specified port object. Return @code{#t} if it\n"
824 "successfully closes a port or @code{#f} if it was already\n"
825 "closed. An exception may be raised if an error occurs, for\n"
826 "example when flushing buffered output. See also @ref{Ports and\n"
827 "File Descriptors, close}, for a procedure which can close file\n"
828 "descriptors.")
1bbd0b84 829#define FUNC_NAME s_scm_close_port
0f2d19dd 830{
f6f4feb0 831 scm_t_port_internal *pti;
eadd48de
GH
832 int rv;
833
78446828
MV
834 port = SCM_COERCE_OUTPORT (port);
835
7a754ca6 836 SCM_VALIDATE_PORT (1, port);
0f2d19dd 837 if (SCM_CLOSEDP (port))
eadd48de 838 return SCM_BOOL_F;
03a2eeb0 839
f6f4feb0 840 pti = SCM_PORT_GET_INTERNAL (port);
5a771d5f 841 SCM_CLR_PORT_OPEN_FLAG (port);
03a2eeb0
AW
842
843 if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH)
844 scm_weak_set_remove_x (scm_i_port_weak_set, port);
845
5a771d5f
AW
846 if (SCM_PORT_DESCRIPTOR (port)->close)
847 /* Note! This may throw an exception. Anything after this point
848 should be resilient to non-local exits. */
849 rv = SCM_PORT_DESCRIPTOR (port)->close (port);
850 else
851 rv = 0;
03a2eeb0 852
f6f4feb0 853 if (pti->iconv_descriptors)
03a2eeb0 854 {
5a771d5f
AW
855 /* If we don't get here, the iconv_descriptors finalizer will
856 clean up. */
f6f4feb0
MW
857 close_iconv_descriptors (pti->iconv_descriptors);
858 pti->iconv_descriptors = NULL;
03a2eeb0
AW
859 }
860
7888309b 861 return scm_from_bool (rv >= 0);
7a754ca6
MD
862}
863#undef FUNC_NAME
864
865SCM_DEFINE (scm_close_input_port, "close-input-port", 1, 0, 0,
866 (SCM port),
867 "Close the specified input port object. The routine has no effect if\n"
868 "the file has already been closed. An exception may be raised if an\n"
869 "error occurs. The value returned is unspecified.\n\n"
870 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
871 "which can close file descriptors.")
872#define FUNC_NAME s_scm_close_input_port
873{
874 SCM_VALIDATE_INPUT_PORT (1, port);
875 scm_close_port (port);
876 return SCM_UNSPECIFIED;
877}
878#undef FUNC_NAME
879
880SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0,
881 (SCM port),
882 "Close the specified output port object. The routine has no effect if\n"
883 "the file has already been closed. An exception may be raised if an\n"
884 "error occurs. The value returned is unspecified.\n\n"
885 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
886 "which can close file descriptors.")
887#define FUNC_NAME s_scm_close_output_port
888{
889 port = SCM_COERCE_OUTPORT (port);
890 SCM_VALIDATE_OUTPUT_PORT (1, port);
891 scm_close_port (port);
892 return SCM_UNSPECIFIED;
0f2d19dd 893}
1bbd0b84 894#undef FUNC_NAME
0f2d19dd 895
2721f918 896
19b8d12b 897\f
2721f918 898
19b8d12b
AW
899/* Encoding characters to byte streams, and decoding byte streams to
900 characters. */
5dbc6c06 901
19b8d12b
AW
902/* A fluid specifying the default encoding for newly created ports. If it is
903 a string, that is the encoding. If it is #f, it is in the "native"
904 (Latin-1) encoding. */
905SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
906
907static int scm_port_encoding_init = 0;
908
909/* Use ENCODING as the default encoding for future ports. */
c536b4b3 910void
19b8d12b 911scm_i_set_default_port_encoding (const char *encoding)
c2ca4493 912{
19b8d12b
AW
913 if (!scm_port_encoding_init
914 || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
915 scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
916 SCM_EOL);
fdfe6305 917
449ca87b 918 if (encoding_matches (encoding, "ISO-8859-1"))
19b8d12b
AW
919 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
920 else
93c4fa21
AW
921 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
922 scm_from_latin1_string (canonicalize_encoding (encoding)));
2721f918 923}
3a5fb14d 924
93c4fa21 925/* Return the name of the default encoding for newly created ports. */
19b8d12b
AW
926const char *
927scm_i_default_port_encoding (void)
2721f918 928{
19b8d12b 929 if (!scm_port_encoding_init)
93c4fa21 930 return "ISO-8859-1";
19b8d12b 931 else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
93c4fa21 932 return "ISO-8859-1";
19b8d12b
AW
933 else
934 {
935 SCM encoding;
936
937 encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
938 if (!scm_is_string (encoding))
93c4fa21 939 return "ISO-8859-1";
19b8d12b
AW
940 else
941 return scm_i_string_chars (encoding);
942 }
c536b4b3 943}
fdfe6305 944
0dd7c540
AW
945/* A fluid specifying the default conversion handler for newly created
946 ports. Its value should be one of the symbols below. */
947SCM_VARIABLE (default_conversion_strategy_var,
948 "%default-port-conversion-strategy");
1b3daef0 949
0dd7c540
AW
950/* Whether the above fluid is initialized. */
951static int scm_conversion_strategy_init = 0;
c2ca4493 952
0dd7c540
AW
953/* The possible conversion strategies. */
954SCM_SYMBOL (sym_error, "error");
955SCM_SYMBOL (sym_substitute, "substitute");
956SCM_SYMBOL (sym_escape, "escape");
c536b4b3 957
0dd7c540
AW
958/* Return the default failed encoding conversion policy for new created
959 ports. */
960scm_t_string_failed_conversion_handler
961scm_i_default_port_conversion_handler (void)
962{
963 scm_t_string_failed_conversion_handler handler;
d68fee48 964
0dd7c540
AW
965 if (!scm_conversion_strategy_init
966 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
967 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
968 else
969 {
970 SCM fluid, value;
0f2d19dd 971
0dd7c540
AW
972 fluid = SCM_VARIABLE_REF (default_conversion_strategy_var);
973 value = scm_fluid_ref (fluid);
0f2d19dd 974
0dd7c540
AW
975 if (scm_is_eq (sym_substitute, value))
976 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
977 else if (scm_is_eq (sym_escape, value))
978 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
979 else
980 /* Default to 'error also when the fluid's value is not one of
981 the valid symbols. */
982 handler = SCM_FAILED_CONVERSION_ERROR;
983 }
eb5c0a2a 984
0dd7c540 985 return handler;
60d0643d 986}
0f2d19dd 987
0dd7c540
AW
988/* Use HANDLER as the default conversion strategy for future ports. */
989void
990scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler
991 handler)
0f2d19dd 992{
0dd7c540 993 SCM strategy;
0f2d19dd 994
0dd7c540
AW
995 if (!scm_conversion_strategy_init
996 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
997 scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
998 SCM_EOL);
999
1000 switch (handler)
78446828 1001 {
0dd7c540
AW
1002 case SCM_FAILED_CONVERSION_ERROR:
1003 strategy = sym_error;
1004 break;
1005
1006 case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
1007 strategy = sym_escape;
1008 break;
1009
1010 case SCM_FAILED_CONVERSION_QUESTION_MARK:
1011 strategy = sym_substitute;
1012 break;
1013
1014 default:
1015 abort ();
78446828 1016 }
0f2d19dd 1017
0dd7c540
AW
1018 scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var),
1019 strategy);
1020}
5dbc6c06 1021
f6f4feb0
MW
1022static void
1023scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port);
1024
1025/* If the next LEN bytes from PORT are equal to those in BYTES, then
1026 return 1, else return 0. Leave the port position unchanged. */
1027static int
1028looking_at_bytes (SCM port, const unsigned char *bytes, int len)
0f2d19dd 1029{
f6f4feb0
MW
1030 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1031 int i = 0;
1032
1033 while (i < len && scm_peek_byte_or_eof_unlocked (port) == bytes[i])
78446828 1034 {
f6f4feb0
MW
1035 pt->read_pos++;
1036 i++;
78446828 1037 }
f6f4feb0
MW
1038 scm_i_unget_bytes_unlocked (bytes, i, port);
1039 return (i == len);
0f2d19dd
JB
1040}
1041
f6f4feb0
MW
1042static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF};
1043static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
1044static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
1045static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
1046static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
5dbc6c06 1047
f6f4feb0
MW
1048/* Decide what byte order to use for a UTF-16 port. Return "UTF-16BE"
1049 or "UTF-16LE". MODE must be either SCM_PORT_READ or SCM_PORT_WRITE,
1050 and specifies which operation is about to be done. The MODE
1051 determines how we will decide the byte order. We deliberately avoid
1052 reading from the port unless the user is about to do so. If the user
1053 is about to read, then we look for a BOM, and if present, we use it
1054 to determine the byte order. Otherwise we choose big endian, as
1055 recommended by the Unicode Standard. Note that the BOM (if any) is
1056 not consumed here. */
1057static const char *
1058decide_utf16_encoding (SCM port, scm_t_port_rw_active mode)
5dbc6c06 1059{
f6f4feb0
MW
1060 if (mode == SCM_PORT_READ
1061 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1062 && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom))
1063 return "UTF-16LE";
1064 else
1065 return "UTF-16BE";
5dbc6c06
HWN
1066}
1067
f6f4feb0
MW
1068/* Decide what byte order to use for a UTF-32 port. Return "UTF-32BE"
1069 or "UTF-32LE". See the comment above 'decide_utf16_encoding' for
1070 details. */
1071static const char *
1072decide_utf32_encoding (SCM port, scm_t_port_rw_active mode)
89ea5b7c 1073{
f6f4feb0
MW
1074 if (mode == SCM_PORT_READ
1075 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1076 && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom))
1077 return "UTF-32LE";
1078 else
1079 return "UTF-32BE";
89ea5b7c 1080}
0f2d19dd 1081
6c98257f 1082static void
2aed2667 1083finalize_iconv_descriptors (void *ptr, void *data)
c536b4b3 1084{
6c98257f
AW
1085 close_iconv_descriptors (ptr);
1086}
c2ca4493 1087
6c98257f
AW
1088static scm_t_iconv_descriptors *
1089open_iconv_descriptors (const char *encoding, int reading, int writing)
1090{
1091 scm_t_iconv_descriptors *id;
1092 iconv_t input_cd, output_cd;
8ebd06c6 1093 size_t i;
c536b4b3 1094
6c98257f
AW
1095 input_cd = (iconv_t) -1;
1096 output_cd = (iconv_t) -1;
d68fee48 1097
8ebd06c6
AW
1098 for (i = 0; encoding[i]; i++)
1099 if (encoding[i] > 127)
1100 goto invalid_encoding;
1101
6c98257f
AW
1102 if (reading)
1103 {
1104 /* Open an input iconv conversion descriptor, from ENCODING
1105 to UTF-8. We choose UTF-8, not UTF-32, because iconv
1106 implementations can typically convert from anything to
1107 UTF-8, but not to UTF-32 (see
1108 <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
1109
1110 /* Assume opening an iconv descriptor causes about 16 KB of
1111 allocation. */
1112 scm_gc_register_allocation (16 * 1024);
1113
1114 input_cd = iconv_open ("UTF-8", encoding);
1115 if (input_cd == (iconv_t) -1)
1116 goto invalid_encoding;
1117 }
0f2d19dd 1118
6c98257f 1119 if (writing)
19b8d12b 1120 {
6c98257f
AW
1121 /* Assume opening an iconv descriptor causes about 16 KB of
1122 allocation. */
1123 scm_gc_register_allocation (16 * 1024);
0f2d19dd 1124
6c98257f
AW
1125 output_cd = iconv_open (encoding, "UTF-8");
1126 if (output_cd == (iconv_t) -1)
1127 {
1128 if (input_cd != (iconv_t) -1)
1129 iconv_close (input_cd);
1130 goto invalid_encoding;
1131 }
19b8d12b 1132 }
eb5c0a2a 1133
6c98257f
AW
1134 id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
1135 id->input_cd = input_cd;
1136 id->output_cd = output_cd;
1137
6978c673
AW
1138 /* Register a finalizer to close the descriptors. */
1139 scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
19b8d12b 1140
6c98257f 1141 return id;
19b8d12b
AW
1142
1143 invalid_encoding:
1144 {
1145 SCM err;
8ebd06c6 1146 err = scm_from_latin1_string (encoding);
6c98257f 1147 scm_misc_error ("open_iconv_descriptors",
19b8d12b
AW
1148 "invalid or unknown character encoding ~s",
1149 scm_list_1 (err));
1150 }
0f2d19dd
JB
1151}
1152
6c98257f
AW
1153static void
1154close_iconv_descriptors (scm_t_iconv_descriptors *id)
1155{
1156 if (id->input_cd != (iconv_t) -1)
1157 iconv_close (id->input_cd);
1158 if (id->output_cd != (iconv_t) -1)
1159 iconv_close (id->output_cd);
1160 id->input_cd = (void *) -1;
1161 id->output_cd = (void *) -1;
1162}
1163
1164scm_t_iconv_descriptors *
f6f4feb0 1165scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode)
6c98257f 1166{
f6f4feb0 1167 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
6c98257f 1168
f6f4feb0 1169 assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
6c98257f 1170
f6f4feb0 1171 if (!pti->iconv_descriptors)
6c98257f 1172 {
f6f4feb0
MW
1173 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1174 const char *precise_encoding;
7b292a9d 1175
6c98257f
AW
1176 if (!pt->encoding)
1177 pt->encoding = "ISO-8859-1";
7be1705d 1178
f6f4feb0
MW
1179 /* If the specified encoding is UTF-16 or UTF-32, then make
1180 that more precise by deciding what byte order to use. */
1181 if (strcmp (pt->encoding, "UTF-16") == 0)
1182 precise_encoding = decide_utf16_encoding (port, mode);
1183 else if (strcmp (pt->encoding, "UTF-32") == 0)
1184 precise_encoding = decide_utf32_encoding (port, mode);
1185 else
1186 precise_encoding = pt->encoding;
7b292a9d 1187
f6f4feb0
MW
1188 pti->iconv_descriptors =
1189 open_iconv_descriptors (precise_encoding,
6c98257f
AW
1190 SCM_INPUT_PORT_P (port),
1191 SCM_OUTPUT_PORT_P (port));
1192 }
1193
f6f4feb0 1194 return pti->iconv_descriptors;
6c98257f
AW
1195}
1196
8ebd06c6 1197/* The name of the encoding is itself encoded in ASCII. */
6c98257f
AW
1198void
1199scm_i_set_port_encoding_x (SCM port, const char *encoding)
1200{
1201 scm_t_port *pt;
f6f4feb0 1202 scm_t_port_internal *pti;
6c98257f
AW
1203 scm_t_iconv_descriptors *prev;
1204
1205 /* Set the character encoding for this port. */
1206 pt = SCM_PTAB_ENTRY (port);
f6f4feb0
MW
1207 pti = SCM_PORT_GET_INTERNAL (port);
1208 prev = pti->iconv_descriptors;
7b292a9d 1209
f6f4feb0
MW
1210 /* In order to handle cases where the encoding changes mid-stream
1211 (e.g. within an HTTP stream, or within a file that is composed of
1212 segments with different encodings), we consider this to be "stream
1213 start" for purposes of BOM handling, regardless of our actual file
1214 position. */
1215 pti->at_stream_start_for_bom_read = 1;
1216 pti->at_stream_start_for_bom_write = 1;
6c98257f 1217
93c4fa21 1218 if (encoding_matches (encoding, "UTF-8"))
6c98257f
AW
1219 {
1220 pt->encoding = "UTF-8";
f6f4feb0 1221 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
6c98257f 1222 }
93c4fa21 1223 else if (encoding_matches (encoding, "ISO-8859-1"))
79eb47ea
AW
1224 {
1225 pt->encoding = "ISO-8859-1";
f6f4feb0 1226 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
79eb47ea 1227 }
6c98257f
AW
1228 else
1229 {
f6f4feb0
MW
1230 pt->encoding = canonicalize_encoding (encoding);
1231 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
6c98257f
AW
1232 }
1233
f6f4feb0 1234 pti->iconv_descriptors = NULL;
6c98257f
AW
1235 if (prev)
1236 close_iconv_descriptors (prev);
1237}
1238
19b8d12b
AW
1239SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
1240 (SCM port),
1241 "Returns, as a string, the character encoding that @var{port}\n"
1242 "uses to interpret its input and output.\n")
1243#define FUNC_NAME s_scm_port_encoding
0f2d19dd 1244{
19b8d12b
AW
1245 SCM_VALIDATE_PORT (1, port);
1246
93c4fa21 1247 return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding);
0f2d19dd 1248}
1bbd0b84 1249#undef FUNC_NAME
0f2d19dd 1250
19b8d12b
AW
1251SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
1252 (SCM port, SCM enc),
1253 "Sets the character encoding that will be used to interpret all\n"
1254 "port I/O. New ports are created with the encoding\n"
1255 "appropriate for the current locale if @code{setlocale} has \n"
1256 "been called or ISO-8859-1 otherwise\n"
1257 "and this procedure can be used to modify that encoding.\n")
1258#define FUNC_NAME s_scm_set_port_encoding_x
5dbc6c06 1259{
19b8d12b 1260 char *enc_str;
5dbc6c06 1261
19b8d12b
AW
1262 SCM_VALIDATE_PORT (1, port);
1263 SCM_VALIDATE_STRING (2, enc);
0f2d19dd 1264
8ebd06c6 1265 enc_str = scm_to_latin1_string (enc);
19b8d12b
AW
1266 scm_i_set_port_encoding_x (port, enc_str);
1267 free (enc_str);
1268
1269 return SCM_UNSPECIFIED;
0f2d19dd 1270}
1bbd0b84 1271#undef FUNC_NAME
0f2d19dd 1272
19b8d12b
AW
1273SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
1274 1, 0, 0, (SCM port),
1275 "Returns the behavior of the port when handling a character that\n"
1276 "is not representable in the port's current encoding.\n"
1277 "It returns the symbol @code{error} if unrepresentable characters\n"
1278 "should cause exceptions, @code{substitute} if the port should\n"
1279 "try to replace unrepresentable characters with question marks or\n"
1280 "approximate characters, or @code{escape} if unrepresentable\n"
1281 "characters should be converted to string escapes.\n"
1282 "\n"
1283 "If @var{port} is @code{#f}, then the current default behavior\n"
1284 "will be returned. New ports will have this default behavior\n"
1285 "when they are created.\n")
1286#define FUNC_NAME s_scm_port_conversion_strategy
889975e5 1287{
19b8d12b 1288 scm_t_string_failed_conversion_handler h;
7b292a9d 1289
19b8d12b 1290 SCM_VALIDATE_OPPORT (1, port);
7b292a9d 1291
0dd7c540
AW
1292 if (scm_is_false (port))
1293 h = scm_i_default_port_conversion_handler ();
1294 else
7b292a9d 1295 {
0dd7c540
AW
1296 scm_t_port *pt;
1297
19b8d12b 1298 SCM_VALIDATE_OPPORT (1, port);
0dd7c540
AW
1299 pt = SCM_PTAB_ENTRY (port);
1300
1301 h = pt->ilseq_handler;
7b292a9d
LC
1302 }
1303
19b8d12b
AW
1304 if (h == SCM_FAILED_CONVERSION_ERROR)
1305 return scm_from_latin1_symbol ("error");
1306 else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
1307 return scm_from_latin1_symbol ("substitute");
1308 else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1309 return scm_from_latin1_symbol ("escape");
1310 else
1311 abort ();
7b292a9d 1312
19b8d12b
AW
1313 /* Never gets here. */
1314 return SCM_UNDEFINED;
1315}
1316#undef FUNC_NAME
7b292a9d 1317
19b8d12b
AW
1318SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
1319 2, 0, 0,
1320 (SCM port, SCM sym),
1321 "Sets the behavior of the interpreter when outputting a character\n"
1322 "that is not representable in the port's current encoding.\n"
1323 "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
1324 "@code{'escape}. If it is @code{'error}, an error will be thrown\n"
1325 "when an unconvertible character is encountered. If it is\n"
1326 "@code{'substitute}, then unconvertible characters will \n"
1327 "be replaced with approximate characters, or with question marks\n"
1328 "if no approximately correct character is available.\n"
1329 "If it is @code{'escape},\n"
1330 "it will appear as a hex escape when output.\n"
1331 "\n"
1332 "If @var{port} is an open port, the conversion error behavior\n"
1333 "is set for that port. If it is @code{#f}, it is set as the\n"
1334 "default behavior for any future ports that get created in\n"
1335 "this thread.\n")
1336#define FUNC_NAME s_scm_set_port_conversion_strategy_x
1337{
0dd7c540 1338 scm_t_string_failed_conversion_handler handler;
7b292a9d 1339
0dd7c540
AW
1340 if (scm_is_eq (sym, sym_error))
1341 handler = SCM_FAILED_CONVERSION_ERROR;
1342 else if (scm_is_eq (sym, sym_substitute))
1343 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
1344 else if (scm_is_eq (sym, sym_escape))
1345 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
1346 else
1347 SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
7b292a9d 1348
0dd7c540
AW
1349 if (scm_is_false (port))
1350 scm_i_set_default_port_conversion_handler (handler);
1351 else
19b8d12b 1352 {
0dd7c540
AW
1353 SCM_VALIDATE_OPPORT (1, port);
1354 SCM_PTAB_ENTRY (port)->ilseq_handler = handler;
19b8d12b 1355 }
7b292a9d 1356
19b8d12b
AW
1357 return SCM_UNSPECIFIED;
1358}
1359#undef FUNC_NAME
7be1705d 1360
7b292a9d 1361
19b8d12b 1362\f
7b292a9d 1363
14dcb5cc
AW
1364/* The port lock. */
1365
1366static void
92c0ebac 1367lock_port (void *mutex)
14dcb5cc 1368{
789dd40b 1369 scm_i_pthread_mutex_lock ((scm_i_pthread_mutex_t *) mutex);
14dcb5cc
AW
1370}
1371
1372static void
92c0ebac 1373unlock_port (void *mutex)
14dcb5cc 1374{
789dd40b 1375 scm_i_pthread_mutex_unlock ((scm_i_pthread_mutex_t *) mutex);
14dcb5cc
AW
1376}
1377
1378void
1379scm_dynwind_lock_port (SCM port)
92c0ebac 1380#define FUNC_NAME "dynwind-lock-port"
14dcb5cc 1381{
92c0ebac
AW
1382 scm_i_pthread_mutex_t *lock;
1383 SCM_VALIDATE_OPPORT (SCM_ARG1, port);
1384 scm_c_lock_port (port, &lock);
1385 if (lock)
1386 {
1387 scm_dynwind_unwind_handler (unlock_port, lock, SCM_F_WIND_EXPLICITLY);
1388 scm_dynwind_rewind_handler (lock_port, lock, 0);
1389 }
14dcb5cc 1390}
92c0ebac 1391#undef FUNC_NAME
14dcb5cc
AW
1392
1393
1394\f
1395
19b8d12b 1396/* Input. */
7be1705d 1397
0d959103
AW
1398int
1399scm_get_byte_or_eof (SCM port)
1400{
92c0ebac 1401 scm_i_pthread_mutex_t *lock;
0d959103
AW
1402 int ret;
1403
92c0ebac 1404 scm_c_lock_port (port, &lock);
0d959103 1405 ret = scm_get_byte_or_eof_unlocked (port);
92c0ebac
AW
1406 if (lock)
1407 scm_i_pthread_mutex_unlock (lock);
0d959103
AW
1408
1409 return ret;
1410}
1411
1412int
1413scm_peek_byte_or_eof (SCM port)
1414{
92c0ebac 1415 scm_i_pthread_mutex_t *lock;
0d959103
AW
1416 int ret;
1417
92c0ebac 1418 scm_c_lock_port (port, &lock);
0d959103 1419 ret = scm_peek_byte_or_eof_unlocked (port);
92c0ebac
AW
1420 if (lock)
1421 scm_i_pthread_mutex_unlock (lock);
0d959103
AW
1422
1423 return ret;
1424}
1425
19b8d12b
AW
1426/* scm_c_read
1427 *
1428 * Used by an application to read arbitrary number of bytes from an
1429 * SCM port. Same semantics as libc read, except that scm_c_read only
1430 * returns less than SIZE bytes if at end-of-file.
1431 *
1432 * Warning: Doesn't update port line and column counts! */
7b292a9d 1433
19b8d12b
AW
1434/* This structure, and the following swap_buffer function, are used
1435 for temporarily swapping a port's own read buffer, and the buffer
1436 that the caller of scm_c_read provides. */
1437struct port_and_swap_buffer
1438{
1439 scm_t_port *pt;
1440 unsigned char *buffer;
1441 size_t size;
1442};
7b292a9d 1443
19b8d12b
AW
1444static void
1445swap_buffer (void *data)
1446{
1447 struct port_and_swap_buffer *psb = (struct port_and_swap_buffer *) data;
1448 unsigned char *old_buf = psb->pt->read_buf;
1449 size_t old_size = psb->pt->read_buf_size;
7be1705d 1450
19b8d12b
AW
1451 /* Make the port use (buffer, size) from the struct. */
1452 psb->pt->read_pos = psb->pt->read_buf = psb->pt->read_end = psb->buffer;
1453 psb->pt->read_buf_size = psb->size;
7b292a9d 1454
19b8d12b
AW
1455 /* Save the port's old (buffer, size) in the struct. */
1456 psb->buffer = old_buf;
1457 psb->size = old_size;
7b292a9d
LC
1458}
1459
f6f4feb0
MW
1460static int scm_i_fill_input_unlocked (SCM port);
1461
19b8d12b 1462size_t
be632904 1463scm_c_read_unlocked (SCM port, void *buffer, size_t size)
19b8d12b 1464#define FUNC_NAME "scm_c_read"
7b292a9d
LC
1465{
1466 scm_t_port *pt;
f6f4feb0 1467 scm_t_port_internal *pti;
19b8d12b
AW
1468 size_t n_read = 0, n_available;
1469 struct port_and_swap_buffer psb;
889975e5 1470
19b8d12b 1471 SCM_VALIDATE_OPINPORT (1, port);
889975e5 1472
19b8d12b 1473 pt = SCM_PTAB_ENTRY (port);
f6f4feb0 1474 pti = SCM_PORT_GET_INTERNAL (port);
19b8d12b
AW
1475 if (pt->rw_active == SCM_PORT_WRITE)
1476 SCM_PORT_DESCRIPTOR (port)->flush (port);
889975e5 1477
19b8d12b
AW
1478 if (pt->rw_random)
1479 pt->rw_active = SCM_PORT_READ;
889975e5 1480
19b8d12b
AW
1481 /* Take bytes first from the port's read buffer. */
1482 if (pt->read_pos < pt->read_end)
1483 {
1484 n_available = min (size, pt->read_end - pt->read_pos);
1485 memcpy (buffer, pt->read_pos, n_available);
1486 buffer = (char *) buffer + n_available;
1487 pt->read_pos += n_available;
1488 n_read += n_available;
1489 size -= n_available;
1490 }
889975e5 1491
19b8d12b
AW
1492 /* Avoid the scm_dynwind_* costs if we now have enough data. */
1493 if (size == 0)
1494 return n_read;
b5cb4464 1495
f6f4feb0
MW
1496 /* Now we will call scm_i_fill_input_unlocked repeatedly until we have
1497 read the requested number of bytes. (Note that a single
1498 scm_i_fill_input_unlocked call does not guarantee to fill the whole
1499 of the port's read buffer.) */
d1b9f8ac 1500 if (pt->read_buf_size <= 1
f6f4feb0 1501 && pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
b5cb4464 1502 {
f6f4feb0
MW
1503 /* The port that we are reading from is unbuffered - i.e. does not
1504 have its own persistent buffer - but we have a buffer, provided
1505 by our caller, that is the right size for the data that is
1506 wanted. For the following scm_i_fill_input_unlocked calls,
1507 therefore, we use the buffer in hand as the port's read buffer.
1508
1509 We need to make sure that the port's normal (1 byte) buffer is
1510 reinstated in case one of the scm_i_fill_input_unlocked ()
1511 calls throws an exception; we use the scm_dynwind_* API to
1512 achieve that.
75192345
MG
1513
1514 A consequence of this optimization is that the fill_input
1515 functions can't unget characters. That'll push data to the
1516 pushback buffer instead of this psb buffer. */
1517#if SCM_DEBUG == 1
1518 unsigned char *pback = pt->putback_buf;
1519#endif
6d227556
NJ
1520 psb.pt = pt;
1521 psb.buffer = buffer;
1522 psb.size = size;
1523 scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
1524 scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1525 scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1526
f6f4feb0
MW
1527 /* Call scm_i_fill_input_unlocked until we have all the bytes that
1528 we need, or we hit EOF. */
1529 while (pt->read_buf_size && (scm_i_fill_input_unlocked (port) != EOF))
6d227556
NJ
1530 {
1531 pt->read_buf_size -= (pt->read_end - pt->read_pos);
1532 pt->read_pos = pt->read_buf = pt->read_end;
1533 }
75192345
MG
1534#if SCM_DEBUG == 1
1535 if (pback != pt->putback_buf
1536 || pt->read_buf - (unsigned char *) buffer < 0)
1537 scm_misc_error (FUNC_NAME,
1538 "scm_c_read must not call a fill function that pushes "
1539 "back characters onto an unbuffered port", SCM_EOL);
1540#endif
6d227556 1541 n_read += pt->read_buf - (unsigned char *) buffer;
75192345 1542
6d227556
NJ
1543 /* Reinstate the port's normal buffer. */
1544 scm_dynwind_end ();
1545 }
1546 else
1547 {
1548 /* The port has its own buffer. It is important that we use it,
1549 even if it happens to be smaller than our caller's buffer, so
1550 that a custom port implementation's entry points (in
1551 particular, fill_input) can rely on the buffer always being
1552 the same as they first set up. */
f6f4feb0 1553 while (size && (scm_i_fill_input_unlocked (port) != EOF))
6d227556
NJ
1554 {
1555 n_available = min (size, pt->read_end - pt->read_pos);
1556 memcpy (buffer, pt->read_pos, n_available);
1557 buffer = (char *) buffer + n_available;
1558 pt->read_pos += n_available;
1559 n_read += n_available;
1560 size -= n_available;
1561 }
1562 }
6fe692e9 1563
b5cb4464 1564 return n_read;
6fe692e9 1565}
693758d5 1566#undef FUNC_NAME
6fe692e9 1567
be632904
AW
1568size_t
1569scm_c_read (SCM port, void *buffer, size_t size)
1570{
92c0ebac 1571 scm_i_pthread_mutex_t *lock;
be632904
AW
1572 size_t ret;
1573
92c0ebac 1574 scm_c_lock_port (port, &lock);
be632904 1575 ret = scm_c_read_unlocked (port, buffer, size);
92c0ebac
AW
1576 if (lock)
1577 scm_i_pthread_mutex_unlock (lock);
1578
be632904
AW
1579
1580 return ret;
1581}
1582
19b8d12b
AW
1583/* Update the line and column number of PORT after consumption of C. */
1584static inline void
1585update_port_lf (scm_t_wchar c, SCM port)
6fe692e9 1586{
19b8d12b
AW
1587 switch (c)
1588 {
1589 case '\a':
1590 case EOF:
1591 break;
1592 case '\b':
1593 SCM_DECCOL (port);
1594 break;
1595 case '\n':
1596 SCM_INCLINE (port);
1597 break;
1598 case '\r':
1599 SCM_ZEROCOL (port);
1600 break;
1601 case '\t':
1602 SCM_TABCOL (port);
1603 break;
1604 default:
1605 SCM_INCCOL (port);
1606 break;
1607 }
1608}
6fe692e9 1609
19b8d12b 1610#define SCM_MBCHAR_BUF_SIZE (4)
6fe692e9 1611
19b8d12b
AW
1612/* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
1613 UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
1614static scm_t_wchar
1615utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
283a1a0e 1616{
19b8d12b 1617 scm_t_wchar codepoint;
283a1a0e 1618
19b8d12b 1619 if (utf8_buf[0] <= 0x7f)
283a1a0e 1620 {
19b8d12b
AW
1621 assert (size == 1);
1622 codepoint = utf8_buf[0];
1623 }
1624 else if ((utf8_buf[0] & 0xe0) == 0xc0)
1625 {
1626 assert (size == 2);
1627 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
1628 | (utf8_buf[1] & 0x3f);
1629 }
1630 else if ((utf8_buf[0] & 0xf0) == 0xe0)
1631 {
1632 assert (size == 3);
1633 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
1634 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
1635 | (utf8_buf[2] & 0x3f);
283a1a0e
GH
1636 }
1637 else
19b8d12b
AW
1638 {
1639 assert (size == 4);
1640 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
1641 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
1642 | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
1643 | (utf8_buf[3] & 0x3f);
1644 }
283a1a0e 1645
19b8d12b 1646 return codepoint;
283a1a0e
GH
1647}
1648
19b8d12b
AW
1649/* Read a UTF-8 sequence from PORT. On success, return 0 and set
1650 *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
1651 representation, and set *LEN to the length in bytes. Return
1652 `EILSEQ' on error. */
1653static int
1654get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
1655 scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1656{
1657#define ASSERT_NOT_EOF(b) \
1658 if (SCM_UNLIKELY ((b) == EOF)) \
1659 goto invalid_seq
1660#define CONSUME_PEEKED_BYTE() \
1661 pt->read_pos++
ee149d03 1662
19b8d12b
AW
1663 int byte;
1664 scm_t_port *pt;
ee149d03 1665
19b8d12b
AW
1666 *len = 0;
1667 pt = SCM_PTAB_ENTRY (port);
840ae05d 1668
0d959103 1669 byte = scm_get_byte_or_eof_unlocked (port);
19b8d12b 1670 if (byte == EOF)
6c951427 1671 {
19b8d12b
AW
1672 *codepoint = EOF;
1673 return 0;
1674 }
6c951427 1675
19b8d12b
AW
1676 buf[0] = (scm_t_uint8) byte;
1677 *len = 1;
6c951427 1678
19b8d12b
AW
1679 if (buf[0] <= 0x7f)
1680 /* 1-byte form. */
1681 *codepoint = buf[0];
1682 else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
1683 {
1684 /* 2-byte form. */
0d959103 1685 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1686 ASSERT_NOT_EOF (byte);
6c951427 1687
19b8d12b
AW
1688 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1689 goto invalid_seq;
6c951427 1690
19b8d12b
AW
1691 CONSUME_PEEKED_BYTE ();
1692 buf[1] = (scm_t_uint8) byte;
1693 *len = 2;
1694
1695 *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
1696 | (buf[1] & 0x3f);
6c951427 1697 }
19b8d12b 1698 else if ((buf[0] & 0xf0) == 0xe0)
6c951427 1699 {
19b8d12b 1700 /* 3-byte form. */
0d959103 1701 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1702 ASSERT_NOT_EOF (byte);
6c951427 1703
19b8d12b
AW
1704 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
1705 || (buf[0] == 0xe0 && byte < 0xa0)
1706 || (buf[0] == 0xed && byte > 0x9f)))
1707 goto invalid_seq;
6c951427 1708
19b8d12b
AW
1709 CONSUME_PEEKED_BYTE ();
1710 buf[1] = (scm_t_uint8) byte;
1711 *len = 2;
1712
0d959103 1713 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b
AW
1714 ASSERT_NOT_EOF (byte);
1715
1716 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1717 goto invalid_seq;
1718
1719 CONSUME_PEEKED_BYTE ();
1720 buf[2] = (scm_t_uint8) byte;
1721 *len = 3;
1722
1723 *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
1724 | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
1725 | (buf[2] & 0x3f);
6c951427 1726 }
19b8d12b
AW
1727 else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
1728 {
1729 /* 4-byte form. */
0d959103 1730 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1731 ASSERT_NOT_EOF (byte);
6c951427 1732
19b8d12b
AW
1733 if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
1734 || (buf[0] == 0xf0 && byte < 0x90)
1735 || (buf[0] == 0xf4 && byte > 0x8f)))
1736 goto invalid_seq;
ee149d03 1737
19b8d12b
AW
1738 CONSUME_PEEKED_BYTE ();
1739 buf[1] = (scm_t_uint8) byte;
1740 *len = 2;
889975e5 1741
0d959103 1742 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1743 ASSERT_NOT_EOF (byte);
889975e5 1744
19b8d12b
AW
1745 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1746 goto invalid_seq;
63479112 1747
19b8d12b
AW
1748 CONSUME_PEEKED_BYTE ();
1749 buf[2] = (scm_t_uint8) byte;
1750 *len = 3;
63479112 1751
0d959103 1752 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1753 ASSERT_NOT_EOF (byte);
63479112 1754
19b8d12b
AW
1755 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1756 goto invalid_seq;
63479112 1757
19b8d12b
AW
1758 CONSUME_PEEKED_BYTE ();
1759 buf[3] = (scm_t_uint8) byte;
1760 *len = 4;
840ae05d 1761
19b8d12b
AW
1762 *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
1763 | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
1764 | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
1765 | (buf[3] & 0x3f);
ee149d03
JB
1766 }
1767 else
19b8d12b 1768 goto invalid_seq;
ee149d03 1769
19b8d12b 1770 return 0;
ee149d03 1771
19b8d12b
AW
1772 invalid_seq:
1773 /* Here we could choose the consume the faulty byte when it's not a
1774 valid starting byte, but it's not a requirement. What Section 3.9
1775 of Unicode 6.0.0 mandates, though, is to not consume a byte that
1776 would otherwise be a valid starting byte. */
ee149d03 1777
19b8d12b 1778 return EILSEQ;
ee149d03 1779
19b8d12b
AW
1780#undef CONSUME_PEEKED_BYTE
1781#undef ASSERT_NOT_EOF
1782}
1783
79eb47ea 1784/* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
05b4d910
AW
1785 0 and set *CODEPOINT to the codepoint that was read, fill BUF with
1786 its UTF-8 representation, and set *LEN to the length in bytes.
1787 Return `EILSEQ' on error. */
79eb47ea
AW
1788static int
1789get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
1790 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1791{
1792 *codepoint = scm_get_byte_or_eof_unlocked (port);
1793
1794 if (*codepoint == EOF)
1795 *len = 0;
1796 else
1797 {
1798 *len = 1;
1799 buf[0] = *codepoint;
1800 }
1801 return 0;
1802}
1803
19b8d12b
AW
1804/* Likewise, read a byte sequence from PORT, passing it through its
1805 input conversion descriptor. */
1806static int
1807get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
1808 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1809{
6c98257f 1810 scm_t_iconv_descriptors *id;
19b8d12b 1811 scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
f6f4feb0 1812 size_t input_size = 0;
19b8d12b 1813
f6f4feb0 1814 id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ);
19b8d12b 1815
f6f4feb0 1816 for (;;)
19b8d12b 1817 {
f6f4feb0
MW
1818 int byte_read;
1819 char *input, *output;
19b8d12b
AW
1820 size_t input_left, output_left, done;
1821
0d959103 1822 byte_read = scm_get_byte_or_eof_unlocked (port);
f6f4feb0 1823 if (SCM_UNLIKELY (byte_read == EOF))
19b8d12b 1824 {
f6f4feb0
MW
1825 if (SCM_LIKELY (input_size == 0))
1826 {
1827 *codepoint = (scm_t_wchar) EOF;
1828 *len = input_size;
1829 return 0;
1830 }
1831 else
1832 {
1833 /* EOF found in the middle of a multibyte character. */
1834 scm_i_set_pending_eof (port);
1835 return EILSEQ;
1836 }
19b8d12b
AW
1837 }
1838
f6f4feb0 1839 buf[input_size++] = byte_read;
19b8d12b
AW
1840
1841 input = buf;
f6f4feb0
MW
1842 input_left = input_size;
1843 output = (char *) utf8_buf;
19b8d12b
AW
1844 output_left = sizeof (utf8_buf);
1845
6c98257f 1846 done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
f6f4feb0 1847
19b8d12b
AW
1848 if (done == (size_t) -1)
1849 {
f6f4feb0
MW
1850 int err = errno;
1851 if (SCM_LIKELY (err == EINVAL))
1852 /* The input byte sequence did not form a complete
1853 character. Read another byte and try again. */
1854 continue;
1855 else
1856 return err;
19b8d12b
AW
1857 }
1858 else
f6f4feb0
MW
1859 {
1860 size_t output_size = sizeof (utf8_buf) - output_left;
1861 if (SCM_LIKELY (output_size > 0))
1862 {
1863 /* iconv generated output. Convert the UTF8_BUF sequence
1864 to a Unicode code point. */
1865 *codepoint = utf8_to_codepoint (utf8_buf, output_size);
1866 *len = input_size;
1867 return 0;
1868 }
1869 else
1870 {
1871 /* iconv consumed some bytes without producing any output.
1872 Most likely this means that a Unicode byte-order mark
1873 (BOM) was consumed, which should not be included in the
1874 returned buf. Shift any remaining bytes to the beginning
1875 of buf, and continue the loop. */
1876 memmove (buf, input, input_left);
1877 input_size = input_left;
1878 continue;
1879 }
1880 }
19b8d12b 1881 }
19b8d12b
AW
1882}
1883
1884/* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
1885 with the byte representation of the codepoint in PORT's encoding, and
1886 set *LEN to the length in bytes of that representation. Return 0 on
1887 success and an errno value on error. */
24ea9f9c 1888static SCM_C_INLINE int
19b8d12b
AW
1889get_codepoint (SCM port, scm_t_wchar *codepoint,
1890 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1891{
1892 int err;
1893 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f6f4feb0 1894 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
19b8d12b 1895
f6f4feb0 1896 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
19b8d12b 1897 err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
f6f4feb0 1898 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
79eb47ea 1899 err = get_latin1_codepoint (port, codepoint, buf, len);
19b8d12b
AW
1900 else
1901 err = get_iconv_codepoint (port, codepoint, buf, len);
1902
1903 if (SCM_LIKELY (err == 0))
f6f4feb0
MW
1904 {
1905 if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
1906 {
1907 /* Record that we're no longer at stream start. */
1908 pti->at_stream_start_for_bom_read = 0;
1909 if (pt->rw_random)
1910 pti->at_stream_start_for_bom_write = 0;
1911
1912 /* If we just read a BOM in an encoding that recognizes them,
1913 then silently consume it and read another code point. */
1914 if (SCM_UNLIKELY
1915 (*codepoint == SCM_UNICODE_BOM
1916 && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
1917 || strcmp (pt->encoding, "UTF-16") == 0
1918 || strcmp (pt->encoding, "UTF-32") == 0)))
1919 return get_codepoint (port, codepoint, buf, len);
1920 }
1921 update_port_lf (*codepoint, port);
1922 }
19b8d12b
AW
1923 else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
1924 {
1925 *codepoint = '?';
1926 err = 0;
1927 update_port_lf (*codepoint, port);
1928 }
1929
1930 return err;
1931}
1932
1933/* Read a codepoint from PORT and return it. */
1934scm_t_wchar
be632904 1935scm_getc_unlocked (SCM port)
19b8d12b
AW
1936#define FUNC_NAME "scm_getc"
1937{
1938 int err;
1939 size_t len;
1940 scm_t_wchar codepoint;
1941 char buf[SCM_MBCHAR_BUF_SIZE];
1942
1943 err = get_codepoint (port, &codepoint, buf, &len);
1944 if (SCM_UNLIKELY (err != 0))
1945 /* At this point PORT should point past the invalid encoding, as per
1946 R6RS-lib Section 8.2.4. */
1947 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
1948
1949 return codepoint;
1950}
1951#undef FUNC_NAME
1952
be632904
AW
1953scm_t_wchar
1954scm_getc (SCM port)
1955{
92c0ebac 1956 scm_i_pthread_mutex_t *lock;
be632904
AW
1957 scm_t_wchar ret;
1958
92c0ebac 1959 scm_c_lock_port (port, &lock);
be632904 1960 ret = scm_getc_unlocked (port);
92c0ebac
AW
1961 if (lock)
1962 scm_i_pthread_mutex_unlock (lock);
1963
be632904
AW
1964
1965 return ret;
1966}
1967
19b8d12b
AW
1968SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
1969 (SCM port),
1970 "Return the next character available from @var{port}, updating\n"
1971 "@var{port} to point to the following character. If no more\n"
1972 "characters are available, the end-of-file object is returned.\n"
1973 "\n"
1974 "When @var{port}'s data cannot be decoded according to its\n"
1975 "character encoding, a @code{decoding-error} is raised and\n"
1976 "@var{port} points past the erroneous byte sequence.\n")
1977#define FUNC_NAME s_scm_read_char
1978{
1979 scm_t_wchar c;
1980 if (SCM_UNBNDP (port))
1981 port = scm_current_input_port ();
1982 SCM_VALIDATE_OPINPORT (1, port);
be632904 1983 c = scm_getc_unlocked (port);
19b8d12b
AW
1984 if (EOF == c)
1985 return SCM_EOF_VAL;
1986 return SCM_MAKE_CHAR (c);
1987}
1988#undef FUNC_NAME
1989
1990
1991\f
1992
1993/* Pushback. */
f6f4feb0 1994\f
19b8d12b 1995
f6f4feb0
MW
1996
1997static void
1998scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
1999#define FUNC_NAME "scm_unget_bytes"
19b8d12b
AW
2000{
2001 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f6f4feb0 2002 size_t old_len, new_len;
19b8d12b 2003
f6f4feb0 2004 scm_i_clear_pending_eof (port);
19b8d12b 2005
f6f4feb0 2006 if (pt->read_buf != pt->putback_buf)
19b8d12b
AW
2007 /* switch to the put-back buffer. */
2008 {
2009 if (pt->putback_buf == NULL)
2010 {
f6f4feb0
MW
2011 pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
2012 ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
19b8d12b
AW
2013 pt->putback_buf
2014 = (unsigned char *) scm_gc_malloc_pointerless
f6f4feb0 2015 (pt->putback_buf_size, "putback buffer");
19b8d12b
AW
2016 }
2017
2018 pt->saved_read_buf = pt->read_buf;
2019 pt->saved_read_pos = pt->read_pos;
2020 pt->saved_read_end = pt->read_end;
2021 pt->saved_read_buf_size = pt->read_buf_size;
2022
f6f4feb0
MW
2023 /* Put read_pos at the end of the buffer, so that ungets will not
2024 have to shift the buffer contents each time. */
2025 pt->read_buf = pt->putback_buf;
2026 pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
19b8d12b
AW
2027 pt->read_buf_size = pt->putback_buf_size;
2028 }
2029
f6f4feb0
MW
2030 old_len = pt->read_end - pt->read_pos;
2031 new_len = old_len + len;
2032
2033 if (new_len > pt->read_buf_size)
2034 /* The putback buffer needs to be enlarged. */
2035 {
2036 size_t new_buf_size;
2037 unsigned char *new_buf, *new_end, *new_pos;
2038
2039 new_buf_size = pt->read_buf_size * 2;
2040 if (new_buf_size < new_len)
2041 new_buf_size = new_len;
2042
2043 new_buf = (unsigned char *)
2044 scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
2045
2046 /* Put the bytes at the end of the buffer, so that future
2047 ungets won't need to shift the buffer. */
2048 new_end = new_buf + new_buf_size;
2049 new_pos = new_end - old_len;
2050 memcpy (new_pos, pt->read_pos, old_len);
2051
2052 pt->read_buf = pt->putback_buf = new_buf;
2053 pt->read_pos = new_pos;
2054 pt->read_end = new_end;
2055 pt->read_buf_size = pt->putback_buf_size = new_buf_size;
2056 }
2057 else if (pt->read_buf + len < pt->read_pos)
2058 /* If needed, shift the existing buffer contents up.
2059 This should not happen unless some external code
2060 manipulates the putback buffer pointers. */
2061 {
2062 unsigned char *new_end = pt->read_buf + pt->read_buf_size;
2063 unsigned char *new_pos = new_end - old_len;
2064
2065 memmove (new_pos, pt->read_pos, old_len);
2066 pt->read_pos = new_pos;
2067 pt->read_end = new_end;
2068 }
2069
2070 /* Move read_pos back and copy the bytes there. */
2071 pt->read_pos -= len;
2072 memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
2073
2074 if (pt->rw_active == SCM_PORT_WRITE)
2075 scm_flush (port);
19b8d12b
AW
2076
2077 if (pt->rw_random)
2078 pt->rw_active = SCM_PORT_READ;
2079}
2080#undef FUNC_NAME
2081
f6f4feb0
MW
2082void
2083scm_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
7f6c3f8f 2084{
f6f4feb0 2085 scm_i_unget_bytes_unlocked (buf, len, port);
7f6c3f8f
MW
2086}
2087
2088void
f6f4feb0
MW
2089scm_unget_byte_unlocked (int c, SCM port)
2090{
2091 unsigned char byte = c;
2092 scm_i_unget_bytes_unlocked (&byte, 1, port);
2093}
2094
2095void
2096scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
7f6c3f8f 2097{
f6f4feb0
MW
2098 scm_i_pthread_mutex_t *lock;
2099 scm_c_lock_port (port, &lock);
2100 scm_i_unget_bytes_unlocked (buf, len, port);
2101 if (lock)
2102 scm_i_pthread_mutex_unlock (lock);
2103}
7f6c3f8f 2104
c932ce0b
AW
2105void
2106scm_unget_byte (int c, SCM port)
2107{
f6f4feb0 2108 unsigned char byte = c;
92c0ebac
AW
2109 scm_i_pthread_mutex_t *lock;
2110 scm_c_lock_port (port, &lock);
f6f4feb0 2111 scm_i_unget_bytes_unlocked (&byte, 1, port);
92c0ebac
AW
2112 if (lock)
2113 scm_i_pthread_mutex_unlock (lock);
c932ce0b
AW
2114}
2115
19b8d12b 2116void
c932ce0b 2117scm_ungetc_unlocked (scm_t_wchar c, SCM port)
19b8d12b
AW
2118#define FUNC_NAME "scm_ungetc"
2119{
2120 scm_t_port *pt = SCM_PTAB_ENTRY (port);
be7ecef0 2121 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
19b8d12b
AW
2122 char *result;
2123 char result_buf[10];
19b8d12b 2124 size_t len;
19b8d12b 2125
19b8d12b 2126 len = sizeof (result_buf);
be7ecef0
AW
2127
2128 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
2129 {
2130 if (c < 0xf0)
2131 {
2132 result_buf[0] = (char) c;
2133 result = result_buf;
2134 len = 1;
2135 }
2136 else
2137 result =
2138 (char *) u32_to_u8 ((uint32_t *) &c, 1, (uint8_t *) result_buf, &len);
2139 }
2140 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1 && c <= 0xff)
2141 {
2142 result_buf[0] = (char) c;
2143 result = result_buf;
2144 len = 1;
2145 }
2146 else
2147 result = u32_conv_to_encoding (pt->encoding,
2148 (enum iconv_ilseq_handler) pt->ilseq_handler,
2149 (uint32_t *) &c, 1, NULL,
2150 result_buf, &len);
19b8d12b
AW
2151
2152 if (SCM_UNLIKELY (result == NULL || len == 0))
2153 scm_encoding_error (FUNC_NAME, errno,
2154 "conversion to port encoding failed",
2155 SCM_BOOL_F, SCM_MAKE_CHAR (c));
2156
f6f4feb0 2157 scm_i_unget_bytes_unlocked ((unsigned char *) result, len, port);
19b8d12b
AW
2158
2159 if (SCM_UNLIKELY (result != result_buf))
2160 free (result);
2161
2162 if (c == '\n')
2163 {
2164 /* What should col be in this case?
2165 * We'll leave it at -1.
2166 */
2167 SCM_LINUM (port) -= 1;
2168 }
2169 else
2170 SCM_COL(port) -= 1;
2171}
2172#undef FUNC_NAME
2173
c932ce0b
AW
2174void
2175scm_ungetc (scm_t_wchar c, SCM port)
2176{
92c0ebac
AW
2177 scm_i_pthread_mutex_t *lock;
2178 scm_c_lock_port (port, &lock);
c932ce0b 2179 scm_ungetc_unlocked (c, port);
92c0ebac
AW
2180 if (lock)
2181 scm_i_pthread_mutex_unlock (lock);
2182
c932ce0b 2183}
19b8d12b
AW
2184
2185void
c932ce0b 2186scm_ungets_unlocked (const char *s, int n, SCM port)
19b8d12b
AW
2187{
2188 /* This is simple minded and inefficient, but unreading strings is
2189 * probably not a common operation, and remember that line and
2190 * column numbers have to be handled...
2191 *
2192 * Please feel free to write an optimized version!
2193 */
2194 while (n--)
c932ce0b 2195 scm_ungetc_unlocked (s[n], port);
19b8d12b
AW
2196}
2197
c932ce0b
AW
2198void
2199scm_ungets (const char *s, int n, SCM port)
2200{
92c0ebac
AW
2201 scm_i_pthread_mutex_t *lock;
2202 scm_c_lock_port (port, &lock);
c932ce0b 2203 scm_ungets_unlocked (s, n, port);
92c0ebac
AW
2204 if (lock)
2205 scm_i_pthread_mutex_unlock (lock);
2206
c932ce0b 2207}
19b8d12b
AW
2208
2209SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
1bbd0b84 2210 (SCM port),
1e6808ea
MG
2211 "Return the next character available from @var{port},\n"
2212 "@emph{without} updating @var{port} to point to the following\n"
2213 "character. If no more characters are available, the\n"
c2dfff19
KR
2214 "end-of-file object is returned.\n"
2215 "\n"
2216 "The value returned by\n"
1e6808ea
MG
2217 "a call to @code{peek-char} is the same as the value that would\n"
2218 "have been returned by a call to @code{read-char} on the same\n"
2219 "port. The only difference is that the very next call to\n"
2220 "@code{read-char} or @code{peek-char} on that @var{port} will\n"
2221 "return the value returned by the preceding call to\n"
2222 "@code{peek-char}. In particular, a call to @code{peek-char} on\n"
2223 "an interactive port will hang waiting for input whenever a call\n"
c62da8f8
LC
2224 "to @code{read-char} would have hung.\n"
2225 "\n"
2226 "As for @code{read-char}, a @code{decoding-error} may be raised\n"
2227 "if such a situation occurs. However, unlike with @code{read-char},\n"
2228 "@var{port} still points at the beginning of the erroneous byte\n"
2229 "sequence when the error is raised.\n")
1bbd0b84 2230#define FUNC_NAME s_scm_peek_char
ee149d03 2231{
c62da8f8 2232 int err;
fd5eec2b
LC
2233 SCM result;
2234 scm_t_wchar c;
2235 char bytes[SCM_MBCHAR_BUF_SIZE];
7f6c3f8f 2236 long column, line;
fe8935d4 2237 size_t len = 0;
fd5eec2b 2238
ee149d03 2239 if (SCM_UNBNDP (port))
9de87eea 2240 port = scm_current_input_port ();
b2456dd4 2241 SCM_VALIDATE_OPINPORT (1, port);
fd5eec2b
LC
2242
2243 column = SCM_COL (port);
2244 line = SCM_LINUM (port);
2245
c62da8f8 2246 err = get_codepoint (port, &c, bytes, &len);
fd5eec2b 2247
f6f4feb0 2248 scm_i_unget_bytes_unlocked ((unsigned char *) bytes, len, port);
fd5eec2b 2249
c62da8f8
LC
2250 SCM_COL (port) = column;
2251 SCM_LINUM (port) = line;
fd5eec2b 2252
c62da8f8
LC
2253 if (SCM_UNLIKELY (err != 0))
2254 {
2255 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
2256
2257 /* Shouldn't happen since `catch' always aborts to prompt. */
2258 result = SCM_BOOL_F;
fd5eec2b 2259 }
c62da8f8 2260 else if (c == EOF)
69bc9ff3 2261 {
f6f4feb0
MW
2262 scm_i_set_pending_eof (port);
2263 result = SCM_EOF_VAL;
69bc9ff3 2264 }
c62da8f8
LC
2265 else
2266 result = SCM_MAKE_CHAR (c);
fd5eec2b
LC
2267
2268 return result;
3cb988bd 2269}
1bbd0b84 2270#undef FUNC_NAME
3cb988bd 2271
1be4270a 2272SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
1bbd0b84 2273 (SCM cobj, SCM port),
b7e64f8b
BT
2274 "Place character @var{cobj} in @var{port} so that it will be\n"
2275 "read by the next read operation. If called multiple times, the\n"
2276 "unread characters will be read again in last-in first-out\n"
2277 "order. If @var{port} is not supplied, the current input port\n"
2278 "is used.")
1bbd0b84 2279#define FUNC_NAME s_scm_unread_char
0f2d19dd
JB
2280{
2281 int c;
2282
34d19ef6 2283 SCM_VALIDATE_CHAR (1, cobj);
0f2d19dd 2284 if (SCM_UNBNDP (port))
9de87eea 2285 port = scm_current_input_port ();
b2456dd4 2286 SCM_VALIDATE_OPINPORT (2, port);
0f2d19dd 2287
7866a09b 2288 c = SCM_CHAR (cobj);
0f2d19dd 2289
c932ce0b 2290 scm_ungetc_unlocked (c, port);
0f2d19dd
JB
2291 return cobj;
2292}
1bbd0b84 2293#undef FUNC_NAME
0f2d19dd 2294
a1ec6916 2295SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
1bbd0b84 2296 (SCM str, SCM port),
b380b885
MD
2297 "Place the string @var{str} in @var{port} so that its characters will be\n"
2298 "read in subsequent read operations. If called multiple times, the\n"
2299 "unread characters will be read again in last-in first-out order. If\n"
2300 "@var{port} is not supplied, the current-input-port is used.")
1bbd0b84 2301#define FUNC_NAME s_scm_unread_string
ee1e7e13 2302{
889975e5 2303 int n;
34d19ef6 2304 SCM_VALIDATE_STRING (1, str);
ee1e7e13 2305 if (SCM_UNBNDP (port))
9de87eea 2306 port = scm_current_input_port ();
b2456dd4 2307 SCM_VALIDATE_OPINPORT (2, port);
ee1e7e13 2308
889975e5
MG
2309 n = scm_i_string_length (str);
2310
2311 while (n--)
c932ce0b 2312 scm_ungetc_unlocked (scm_i_string_ref (str, n), port);
ee1e7e13
MD
2313
2314 return str;
2315}
1bbd0b84 2316#undef FUNC_NAME
ee1e7e13 2317
840ae05d 2318
19b8d12b 2319\f
23f2b9a3 2320
19b8d12b 2321/* Manipulating the buffers. */
840ae05d 2322
4251ae2e
AW
2323/* This routine does not take any locks, as it is usually called as part
2324 of a port implementation. */
19b8d12b
AW
2325void
2326scm_port_non_buffer (scm_t_port *pt)
2327{
2328 pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf;
2329 pt->write_buf = pt->write_pos = &pt->shortbuf;
2330 pt->read_buf_size = pt->write_buf_size = 1;
2331 pt->write_end = pt->write_buf + pt->write_buf_size;
840ae05d 2332}
8ab3d8a0 2333
19b8d12b
AW
2334/* this should only be called when the read buffer is empty. it
2335 tries to refill the read buffer. it returns the first char from
2336 the port, which is either EOF or *(pt->read_pos). */
f6f4feb0
MW
2337static int
2338scm_i_fill_input_unlocked (SCM port)
82893676 2339{
19b8d12b 2340 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f6f4feb0 2341 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
8ab3d8a0 2342
19b8d12b 2343 assert (pt->read_pos == pt->read_end);
8ab3d8a0 2344
f6f4feb0
MW
2345 if (pti->pending_eof)
2346 {
2347 pti->pending_eof = 0;
2348 return EOF;
2349 }
2350
19b8d12b 2351 if (pt->read_buf == pt->putback_buf)
82893676 2352 {
19b8d12b
AW
2353 /* finished reading put-back chars. */
2354 pt->read_buf = pt->saved_read_buf;
2355 pt->read_pos = pt->saved_read_pos;
2356 pt->read_end = pt->saved_read_end;
2357 pt->read_buf_size = pt->saved_read_buf_size;
2358 if (pt->read_pos < pt->read_end)
2359 return *(pt->read_pos);
82893676 2360 }
19b8d12b 2361 return SCM_PORT_DESCRIPTOR (port)->fill_input (port);
82893676 2362}
82893676 2363
4251ae2e
AW
2364int
2365scm_fill_input (SCM port)
2366{
92c0ebac 2367 scm_i_pthread_mutex_t *lock;
4251ae2e
AW
2368 int ret;
2369
92c0ebac 2370 scm_c_lock_port (port, &lock);
4251ae2e 2371 ret = scm_fill_input_unlocked (port);
92c0ebac
AW
2372 if (lock)
2373 scm_i_pthread_mutex_unlock (lock);
2374
4251ae2e
AW
2375
2376 return ret;
2377}
2378
f6f4feb0
MW
2379/* Slow-path fallback for 'scm_get_byte_or_eof_unlocked' */
2380int
2381scm_slow_get_byte_or_eof_unlocked (SCM port)
d14af9f2 2382{
f6f4feb0
MW
2383 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2384
2385 if (pt->rw_active == SCM_PORT_WRITE)
2386 scm_flush_unlocked (port);
2387
2388 if (pt->rw_random)
2389 pt->rw_active = SCM_PORT_READ;
2390
2391 if (pt->read_pos >= pt->read_end)
2392 {
2393 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2394 return EOF;
2395 }
2396
2397 return *pt->read_pos++;
d14af9f2
MD
2398}
2399
f6f4feb0
MW
2400/* Slow-path fallback for 'scm_peek_byte_or_eof_unlocked' */
2401int
2402scm_slow_peek_byte_or_eof_unlocked (SCM port)
2403{
2404 scm_t_port *pt = SCM_PTAB_ENTRY (port);
d6a6989e 2405
f6f4feb0
MW
2406 if (pt->rw_active == SCM_PORT_WRITE)
2407 scm_flush_unlocked (port);
889975e5 2408
f6f4feb0
MW
2409 if (pt->rw_random)
2410 pt->rw_active = SCM_PORT_READ;
2411
2412 if (pt->read_pos >= pt->read_end)
2413 {
2414 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2415 {
2416 scm_i_set_pending_eof (port);
2417 return EOF;
2418 }
2419 }
2420
2421 return *pt->read_pos;
2422}
2423
a3ded465
AW
2424/* Move up to READ_LEN bytes from PORT's putback and/or read buffers
2425 into memory starting at DEST. Return the number of bytes moved.
2426 PORT's line/column numbers are left unchanged. */
19b8d12b
AW
2427size_t
2428scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
840ae05d 2429{
19b8d12b 2430 scm_t_port *pt = SCM_PTAB_ENTRY (port);
a3ded465 2431 size_t bytes_read = 0;
19b8d12b 2432 size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
840ae05d 2433
19b8d12b 2434 if (from_buf > 0)
840ae05d 2435 {
19b8d12b
AW
2436 memcpy (dest, pt->read_pos, from_buf);
2437 pt->read_pos += from_buf;
a3ded465 2438 bytes_read += from_buf;
19b8d12b
AW
2439 read_len -= from_buf;
2440 dest += from_buf;
840ae05d 2441 }
3fe6190f 2442
19b8d12b
AW
2443 /* if putback was active, try the real input buffer too. */
2444 if (pt->read_buf == pt->putback_buf)
69bc9ff3 2445 {
19b8d12b
AW
2446 from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len);
2447 if (from_buf > 0)
2448 {
2449 memcpy (dest, pt->saved_read_pos, from_buf);
2450 pt->saved_read_pos += from_buf;
a3ded465 2451 bytes_read += from_buf;
19b8d12b 2452 }
69bc9ff3 2453 }
8ab3d8a0 2454
a3ded465 2455 return bytes_read;
840ae05d
JB
2456}
2457
19b8d12b
AW
2458/* Clear a port's read buffers, returning the contents. */
2459SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
1bbd0b84 2460 (SCM port),
19b8d12b
AW
2461 "This procedure clears a port's input buffers, similar\n"
2462 "to the way that force-output clears the output buffer. The\n"
2463 "contents of the buffers are returned as a single string, e.g.,\n"
a150979d 2464 "\n"
19b8d12b
AW
2465 "@lisp\n"
2466 "(define p (open-input-file ...))\n"
2467 "(drain-input p) => empty string, nothing buffered yet.\n"
2468 "(unread-char (read-char p) p)\n"
2469 "(drain-input p) => initial chars from p, up to the buffer size.\n"
2470 "@end lisp\n\n"
2471 "Draining the buffers may be useful for cleanly finishing\n"
2472 "buffered I/O so that the file descriptor can be used directly\n"
2473 "for further input.")
2474#define FUNC_NAME s_scm_drain_input
0f2d19dd 2475{
19b8d12b
AW
2476 SCM result;
2477 char *data;
2478 scm_t_port *pt;
2479 long count;
0f2d19dd 2480
19b8d12b
AW
2481 SCM_VALIDATE_OPINPORT (1, port);
2482 pt = SCM_PTAB_ENTRY (port);
2483
2484 count = pt->read_end - pt->read_pos;
2485 if (pt->read_buf == pt->putback_buf)
2486 count += pt->saved_read_end - pt->saved_read_pos;
2487
2488 if (count)
2489 {
2490 result = scm_i_make_string (count, &data, 0);
2491 scm_take_from_input_buffers (port, data, count);
2492 }
2493 else
2494 result = scm_nullstr;
2495
2496 return result;
d043d8c2 2497}
1bbd0b84 2498#undef FUNC_NAME
d043d8c2 2499
19b8d12b 2500void
4251ae2e 2501scm_end_input_unlocked (SCM port)
0f2d19dd 2502{
19b8d12b
AW
2503 long offset;
2504 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2505
f6f4feb0 2506 scm_i_clear_pending_eof (port);
19b8d12b
AW
2507 if (pt->read_buf == pt->putback_buf)
2508 {
2509 offset = pt->read_end - pt->read_pos;
2510 pt->read_buf = pt->saved_read_buf;
2511 pt->read_pos = pt->saved_read_pos;
2512 pt->read_end = pt->saved_read_end;
2513 pt->read_buf_size = pt->saved_read_buf_size;
2514 }
2515 else
2516 offset = 0;
2517
2518 SCM_PORT_DESCRIPTOR (port)->end_input (port, offset);
0f2d19dd
JB
2519}
2520
4251ae2e
AW
2521void
2522scm_end_input (SCM port)
2523{
92c0ebac
AW
2524 scm_i_pthread_mutex_t *lock;
2525 scm_c_lock_port (port, &lock);
4251ae2e 2526 scm_end_input_unlocked (port);
92c0ebac
AW
2527 if (lock)
2528 scm_i_pthread_mutex_unlock (lock);
2529
4251ae2e
AW
2530}
2531
19b8d12b
AW
2532SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
2533 (SCM port),
2534 "Flush the specified output port, or the current output port if @var{port}\n"
2535 "is omitted. The current output buffer contents are passed to the\n"
2536 "underlying port implementation (e.g., in the case of fports, the\n"
2537 "data will be written to the file and the output buffer will be cleared.)\n"
2538 "It has no effect on an unbuffered port.\n\n"
2539 "The return value is unspecified.")
2540#define FUNC_NAME s_scm_force_output
d043d8c2 2541{
19b8d12b
AW
2542 if (SCM_UNBNDP (port))
2543 port = scm_current_output_port ();
2544 else
2545 {
2546 port = SCM_COERCE_OUTPORT (port);
2547 SCM_VALIDATE_OPOUTPORT (1, port);
2548 }
4251ae2e 2549 scm_flush_unlocked (port);
564478fd 2550 return SCM_UNSPECIFIED;
d043d8c2 2551}
1bbd0b84 2552#undef FUNC_NAME
d043d8c2 2553
19b8d12b 2554void
4251ae2e 2555scm_flush_unlocked (SCM port)
0f2d19dd 2556{
19b8d12b 2557 SCM_PORT_DESCRIPTOR (port)->flush (port);
0f2d19dd
JB
2558}
2559
4251ae2e
AW
2560void
2561scm_flush (SCM port)
2562{
92c0ebac
AW
2563 scm_i_pthread_mutex_t *lock;
2564 scm_c_lock_port (port, &lock);
4251ae2e 2565 scm_flush_unlocked (port);
92c0ebac
AW
2566 if (lock)
2567 scm_i_pthread_mutex_unlock (lock);
2568
4251ae2e
AW
2569}
2570
f6f4feb0
MW
2571int
2572scm_fill_input_unlocked (SCM port)
337edc59 2573{
f6f4feb0
MW
2574 return scm_i_fill_input_unlocked (port);
2575}
337edc59 2576
d14af9f2 2577
19b8d12b 2578\f
d6a6989e 2579
19b8d12b 2580/* Output. */
889975e5 2581
0607ebbf
AW
2582void
2583scm_putc (char c, SCM port)
2584{
92c0ebac
AW
2585 scm_i_pthread_mutex_t *lock;
2586 scm_c_lock_port (port, &lock);
0607ebbf 2587 scm_putc_unlocked (c, port);
92c0ebac
AW
2588 if (lock)
2589 scm_i_pthread_mutex_unlock (lock);
2590
0607ebbf
AW
2591}
2592
2593void
2594scm_puts (const char *s, SCM port)
2595{
92c0ebac
AW
2596 scm_i_pthread_mutex_t *lock;
2597 scm_c_lock_port (port, &lock);
0607ebbf 2598 scm_puts_unlocked (s, port);
92c0ebac
AW
2599 if (lock)
2600 scm_i_pthread_mutex_unlock (lock);
2601
0607ebbf
AW
2602}
2603
19b8d12b
AW
2604/* scm_c_write
2605 *
2606 * Used by an application to write arbitrary number of bytes to an SCM
2607 * port. Similar semantics as libc write. However, unlike libc
2608 * write, scm_c_write writes the requested number of bytes and has no
2609 * return value.
2610 *
2611 * Warning: Doesn't update port line and column counts!
2612 */
9d9c66ba 2613void
f209aeee 2614scm_c_write_unlocked (SCM port, const void *ptr, size_t size)
19b8d12b 2615#define FUNC_NAME "scm_c_write"
9d9c66ba 2616{
19b8d12b
AW
2617 scm_t_port *pt;
2618 scm_t_ptob_descriptor *ptob;
9d9c66ba 2619
19b8d12b 2620 SCM_VALIDATE_OPOUTPORT (1, port);
9d9c66ba 2621
19b8d12b
AW
2622 pt = SCM_PTAB_ENTRY (port);
2623 ptob = SCM_PORT_DESCRIPTOR (port);
9d9c66ba 2624
19b8d12b 2625 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 2626 scm_end_input_unlocked (port);
19b8d12b
AW
2627
2628 ptob->write (port, ptr, size);
2629
2630 if (pt->rw_random)
2631 pt->rw_active = SCM_PORT_WRITE;
889975e5 2632}
19b8d12b 2633#undef FUNC_NAME
889975e5 2634
f209aeee
AW
2635void
2636scm_c_write (SCM port, const void *ptr, size_t size)
2637{
92c0ebac
AW
2638 scm_i_pthread_mutex_t *lock;
2639 scm_c_lock_port (port, &lock);
f209aeee 2640 scm_c_write_unlocked (port, ptr, size);
92c0ebac
AW
2641 if (lock)
2642 scm_i_pthread_mutex_unlock (lock);
2643
f209aeee
AW
2644}
2645
19b8d12b
AW
2646/* scm_lfwrite
2647 *
2648 * This function differs from scm_c_write; it updates port line and
2649 * column. */
889975e5 2650void
f209aeee 2651scm_lfwrite_unlocked (const char *ptr, size_t size, SCM port)
889975e5 2652{
19b8d12b
AW
2653 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2654 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
f4bc4e59 2655
19b8d12b 2656 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 2657 scm_end_input_unlocked (port);
f4bc4e59 2658
19b8d12b 2659 ptob->write (port, ptr, size);
f4bc4e59 2660
19b8d12b
AW
2661 for (; size; ptr++, size--)
2662 update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
d9544bf0 2663
19b8d12b
AW
2664 if (pt->rw_random)
2665 pt->rw_active = SCM_PORT_WRITE;
2666}
f4bc4e59 2667
f209aeee
AW
2668void
2669scm_lfwrite (const char *ptr, size_t size, SCM port)
2670{
92c0ebac
AW
2671 scm_i_pthread_mutex_t *lock;
2672 scm_c_lock_port (port, &lock);
f209aeee 2673 scm_lfwrite_unlocked (ptr, size, port);
92c0ebac
AW
2674 if (lock)
2675 scm_i_pthread_mutex_unlock (lock);
2676
f209aeee
AW
2677}
2678
19b8d12b
AW
2679/* Write STR to PORT from START inclusive to END exclusive. */
2680void
2681scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
2682{
2683 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f4bc4e59 2684
19b8d12b 2685 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 2686 scm_end_input_unlocked (port);
f4bc4e59 2687
19b8d12b
AW
2688 if (end == (size_t) -1)
2689 end = scm_i_string_length (str);
f4bc4e59 2690
fa980bcc 2691 scm_i_display_substring (str, start, end, port);
f4bc4e59 2692
19b8d12b
AW
2693 if (pt->rw_random)
2694 pt->rw_active = SCM_PORT_WRITE;
889975e5
MG
2695}
2696
19b8d12b
AW
2697
2698\f
2699
2700/* Querying and setting positions, and character availability. */
2701
2702SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0,
889975e5 2703 (SCM port),
19b8d12b
AW
2704 "Return @code{#t} if a character is ready on input @var{port}\n"
2705 "and return @code{#f} otherwise. If @code{char-ready?} returns\n"
2706 "@code{#t} then the next @code{read-char} operation on\n"
2707 "@var{port} is guaranteed not to hang. If @var{port} is a file\n"
2708 "port at end of file then @code{char-ready?} returns @code{#t}.\n"
2709 "\n"
2710 "@code{char-ready?} exists to make it possible for a\n"
2711 "program to accept characters from interactive ports without\n"
2712 "getting stuck waiting for input. Any input editors associated\n"
2713 "with such ports must make sure that characters whose existence\n"
2714 "has been asserted by @code{char-ready?} cannot be rubbed out.\n"
2715 "If @code{char-ready?} were to return @code{#f} at end of file,\n"
2716 "a port at end of file would be indistinguishable from an\n"
2717 "interactive port that has no ready characters.")
2718#define FUNC_NAME s_scm_char_ready_p
889975e5
MG
2719{
2720 scm_t_port *pt;
889975e5 2721
19b8d12b
AW
2722 if (SCM_UNBNDP (port))
2723 port = scm_current_input_port ();
2724 /* It's possible to close the current input port, so validate even in
2725 this case. */
2726 SCM_VALIDATE_OPINPORT (1, port);
889975e5
MG
2727
2728 pt = SCM_PTAB_ENTRY (port);
19b8d12b
AW
2729
2730 /* if the current read buffer is filled, or the
2731 last pushed-back char has been read and the saved buffer is
2732 filled, result is true. */
2733 if (pt->read_pos < pt->read_end
2734 || (pt->read_buf == pt->putback_buf
2735 && pt->saved_read_pos < pt->saved_read_end))
2736 return SCM_BOOL_T;
889975e5 2737 else
19b8d12b
AW
2738 {
2739 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2740
2741 if (ptob->input_waiting)
2742 return scm_from_bool(ptob->input_waiting (port));
2743 else
2744 return SCM_BOOL_T;
2745 }
889975e5
MG
2746}
2747#undef FUNC_NAME
d6a6989e 2748
19b8d12b
AW
2749SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
2750 (SCM fd_port, SCM offset, SCM whence),
0858753e 2751 "Sets the current position of @var{fd_port} to the integer\n"
19b8d12b
AW
2752 "@var{offset}, which is interpreted according to the value of\n"
2753 "@var{whence}.\n"
2754 "\n"
2755 "One of the following variables should be supplied for\n"
2756 "@var{whence}:\n"
2757 "@defvar SEEK_SET\n"
2758 "Seek from the beginning of the file.\n"
2759 "@end defvar\n"
2760 "@defvar SEEK_CUR\n"
2761 "Seek from the current position.\n"
2762 "@end defvar\n"
2763 "@defvar SEEK_END\n"
2764 "Seek from the end of the file.\n"
2765 "@end defvar\n"
0858753e 2766 "If @var{fd_port} is a file descriptor, the underlying system\n"
19b8d12b
AW
2767 "call is @code{lseek}. @var{port} may be a string port.\n"
2768 "\n"
2769 "The value returned is the new position in the file. This means\n"
2770 "that the current position of a port can be obtained using:\n"
2771 "@lisp\n"
2772 "(seek port 0 SEEK_CUR)\n"
2773 "@end lisp")
2774#define FUNC_NAME s_scm_seek
889975e5 2775{
19b8d12b 2776 int how;
889975e5 2777
19b8d12b 2778 fd_port = SCM_COERCE_OUTPORT (fd_port);
889975e5 2779
19b8d12b
AW
2780 how = scm_to_int (whence);
2781 if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
2782 SCM_OUT_OF_RANGE (3, whence);
da288f50 2783
19b8d12b
AW
2784 if (SCM_OPPORTP (fd_port))
2785 {
f6f4feb0 2786 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port);
19b8d12b
AW
2787 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port);
2788 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2789 off_t_or_off64_t rv;
2790
2791 if (!ptob->seek)
2792 SCM_MISC_ERROR ("port is not seekable",
2793 scm_cons (fd_port, SCM_EOL));
2794 else
f6f4feb0 2795 rv = ptob->seek (fd_port, off, how);
889975e5 2796
f6f4feb0
MW
2797 /* Set stream-start flags according to new position. */
2798 pti->at_stream_start_for_bom_read = (rv == 0);
2799 pti->at_stream_start_for_bom_write = (rv == 0);
b22e94db 2800
f6f4feb0 2801 scm_i_clear_pending_eof (fd_port);
889975e5 2802
19b8d12b
AW
2803 return scm_from_off_t_or_off64_t (rv);
2804 }
2805 else /* file descriptor?. */
2806 {
2807 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2808 off_t_or_off64_t rv;
2809 rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
2810 if (rv == -1)
2811 SCM_SYSERROR;
2812 return scm_from_off_t_or_off64_t (rv);
2813 }
889975e5
MG
2814}
2815#undef FUNC_NAME
2816
19b8d12b
AW
2817#ifndef O_BINARY
2818#define O_BINARY 0
2819#endif
889975e5 2820
19b8d12b
AW
2821/* Mingw has ftruncate(), perhaps implemented above using chsize, but
2822 doesn't have the filename version truncate(), hence this code. */
2823#if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
2824static int
2825truncate (const char *file, off_t length)
889975e5 2826{
19b8d12b
AW
2827 int ret, fdes;
2828
2829 fdes = open (file, O_BINARY | O_WRONLY);
2830 if (fdes == -1)
2831 return -1;
2832
2833 ret = ftruncate (fdes, length);
2834 if (ret == -1)
889975e5 2835 {
19b8d12b
AW
2836 int save_errno = errno;
2837 close (fdes);
2838 errno = save_errno;
2839 return -1;
889975e5 2840 }
19b8d12b
AW
2841
2842 return close (fdes);
889975e5 2843}
19b8d12b 2844#endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
889975e5 2845
19b8d12b
AW
2846SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
2847 (SCM object, SCM length),
0858753e
AW
2848 "Truncate file @var{object} to @var{length} bytes. @var{object}\n"
2849 "can be a filename string, a port object, or an integer file\n"
2850 "descriptor.\n"
19b8d12b
AW
2851 "The return value is unspecified.\n"
2852 "\n"
2853 "For a port or file descriptor @var{length} can be omitted, in\n"
2854 "which case the file is truncated at the current position (per\n"
2855 "@code{ftell} above).\n"
2856 "\n"
2857 "On most systems a file can be extended by giving a length\n"
2858 "greater than the current size, but this is not mandatory in the\n"
2859 "POSIX standard.")
2860#define FUNC_NAME s_scm_truncate_file
889975e5 2861{
19b8d12b
AW
2862 int rv;
2863
2864 /* "object" can be a port, fdes or filename.
2865
2866 Negative "length" makes no sense, but it's left to truncate() or
2867 ftruncate() to give back an error for that (normally EINVAL).
2868 */
2869
2870 if (SCM_UNBNDP (length))
889975e5 2871 {
19b8d12b
AW
2872 /* must supply length if object is a filename. */
2873 if (scm_is_string (object))
2874 SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
2875
2876 length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
2877 }
2878
2879 object = SCM_COERCE_OUTPORT (object);
2880 if (scm_is_integer (object))
2881 {
2882 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2883 SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
2884 c_length));
2885 }
2886 else if (SCM_OPOUTPORTP (object))
2887 {
2888 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2889 scm_t_port *pt = SCM_PTAB_ENTRY (object);
2890 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object);
b22e94db 2891
19b8d12b
AW
2892 if (!ptob->truncate)
2893 SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
889975e5 2894
f6f4feb0 2895 scm_i_clear_pending_eof (object);
19b8d12b 2896 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 2897 scm_end_input_unlocked (object);
19b8d12b
AW
2898 else if (pt->rw_active == SCM_PORT_WRITE)
2899 ptob->flush (object);
889975e5 2900
19b8d12b
AW
2901 ptob->truncate (object, c_length);
2902 rv = 0;
889975e5
MG
2903 }
2904 else
2905 {
19b8d12b
AW
2906 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2907 char *str = scm_to_locale_string (object);
2908 int eno;
2909 SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
2910 eno = errno;
2911 free (str);
2912 errno = eno;
889975e5 2913 }
19b8d12b
AW
2914 if (rv == -1)
2915 SCM_SYSERROR;
2916 return SCM_UNSPECIFIED;
889975e5 2917}
19b8d12b 2918#undef FUNC_NAME
889975e5 2919
19b8d12b
AW
2920SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0,
2921 (SCM port),
2922 "Return the current line number for @var{port}.\n"
889975e5 2923 "\n"
19b8d12b
AW
2924 "The first line of a file is 0. But you might want to add 1\n"
2925 "when printing line numbers, since starting from 1 is\n"
2926 "traditional in error messages, and likely to be more natural to\n"
2927 "non-programmers.")
2928#define FUNC_NAME s_scm_port_line
889975e5 2929{
19b8d12b
AW
2930 port = SCM_COERCE_OUTPORT (port);
2931 SCM_VALIDATE_OPENPORT (1, port);
2932 return scm_from_long (SCM_LINUM (port));
889975e5
MG
2933}
2934#undef FUNC_NAME
2935
19b8d12b
AW
2936SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0,
2937 (SCM port, SCM line),
2938 "Set the current line number for @var{port} to @var{line}. The\n"
2939 "first line of a file is 0.")
2940#define FUNC_NAME s_scm_set_port_line_x
889975e5 2941{
19b8d12b
AW
2942 port = SCM_COERCE_OUTPORT (port);
2943 SCM_VALIDATE_OPENPORT (1, port);
2944 SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line);
2945 return SCM_UNSPECIFIED;
2946}
2947#undef FUNC_NAME
889975e5 2948
19b8d12b
AW
2949SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0,
2950 (SCM port),
2951 "Return the current column number of @var{port}.\n"
2952 "If the number is\n"
2953 "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n"
2954 "- i.e. the first character of the first line is line 0, column 0.\n"
2955 "(However, when you display a file position, for example in an error\n"
2956 "message, we recommend you add 1 to get 1-origin integers. This is\n"
2957 "because lines and column numbers traditionally start with 1, and that is\n"
2958 "what non-programmers will find most natural.)")
2959#define FUNC_NAME s_scm_port_column
2960{
2961 port = SCM_COERCE_OUTPORT (port);
2962 SCM_VALIDATE_OPENPORT (1, port);
2963 return scm_from_int (SCM_COL (port));
2964}
2965#undef FUNC_NAME
889975e5 2966
19b8d12b
AW
2967SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0,
2968 (SCM port, SCM column),
2969 "Set the current column of @var{port}. Before reading the first\n"
2970 "character on a line the column should be 0.")
2971#define FUNC_NAME s_scm_set_port_column_x
2972{
2973 port = SCM_COERCE_OUTPORT (port);
2974 SCM_VALIDATE_OPENPORT (1, port);
2975 SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column);
2976 return SCM_UNSPECIFIED;
2977}
2978#undef FUNC_NAME
889975e5 2979
19b8d12b
AW
2980SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
2981 (SCM port),
2982 "Return the filename associated with @var{port}, or @code{#f}\n"
2983 "if no filename is associated with the port.")
2984#define FUNC_NAME s_scm_port_filename
2985{
2986 port = SCM_COERCE_OUTPORT (port);
2987 SCM_VALIDATE_OPENPORT (1, port);
2988 return SCM_FILENAME (port);
2989}
2990#undef FUNC_NAME
889975e5 2991
19b8d12b
AW
2992SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
2993 (SCM port, SCM filename),
2994 "Change the filename associated with @var{port}, using the current input\n"
2995 "port if none is specified. Note that this does not change the port's\n"
2996 "source of data, but only the value that is returned by\n"
2997 "@code{port-filename} and reported in diagnostic output.")
2998#define FUNC_NAME s_scm_set_port_filename_x
2999{
3000 port = SCM_COERCE_OUTPORT (port);
3001 SCM_VALIDATE_OPENPORT (1, port);
3002 /* We allow the user to set the filename to whatever he likes. */
3003 SCM_SET_FILENAME (port, filename);
889975e5
MG
3004 return SCM_UNSPECIFIED;
3005}
3006#undef FUNC_NAME
3007
3008
19b8d12b
AW
3009\f
3010
3011/* Implementation helpers for port printing functions. */
889975e5 3012
f12733c9
MD
3013void
3014scm_print_port_mode (SCM exp, SCM port)
3015{
0607ebbf 3016 scm_puts_unlocked (SCM_CLOSEDP (exp)
f12733c9 3017 ? "closed: "
f9a64404
DH
3018 : (SCM_RDNG & SCM_CELL_WORD_0 (exp)
3019 ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
f12733c9
MD
3020 ? "input-output: "
3021 : "input: ")
f9a64404 3022 : (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
f12733c9
MD
3023 ? "output: "
3024 : "bogus: ")),
3025 port);
3026}
1cc91f1b 3027
f12733c9 3028int
e81d98ec 3029scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED)
0f2d19dd 3030{
f12733c9
MD
3031 char *type = SCM_PTOBNAME (SCM_PTOBNUM (exp));
3032 if (!type)
3033 type = "port";
0607ebbf 3034 scm_puts_unlocked ("#<", port);
f12733c9 3035 scm_print_port_mode (exp, port);
0607ebbf
AW
3036 scm_puts_unlocked (type, port);
3037 scm_putc_unlocked (' ', port);
0345e278 3038 scm_uintprint (SCM_CELL_WORD_1 (exp), 16, port);
0607ebbf 3039 scm_putc_unlocked ('>', port);
f12733c9 3040 return 1;
0f2d19dd
JB
3041}
3042
19b8d12b
AW
3043
3044\f
3045
3046/* Iterating over all ports. */
3047
3048struct for_each_data
3049{
3050 void (*proc) (void *data, SCM p);
3051 void *data;
3052};
3053
3054static SCM
3055for_each_trampoline (void *data, SCM port, SCM result)
3056{
3057 struct for_each_data *d = data;
3058
3059 d->proc (d->data, port);
3060
3061 return result;
3062}
3063
3064void
3065scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
3066{
3067 struct for_each_data d;
3068
3069 d.proc = proc;
3070 d.data = data;
3071
3072 scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL,
3073 scm_i_port_weak_set);
3074}
3075
3076static void
3077scm_for_each_trampoline (void *data, SCM port)
3078{
3079 scm_call_1 (SCM_PACK_POINTER (data), port);
3080}
3081
3082SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
3083 (SCM proc),
3084 "Apply @var{proc} to each port in the Guile port table\n"
3085 "in turn. The return value is unspecified. More specifically,\n"
3086 "@var{proc} is applied exactly once to every port that exists\n"
0858753e
AW
3087 "in the system at the time @code{port-for-each} is invoked.\n"
3088 "Changes to the port table while @code{port-for-each} is running\n"
3089 "have no effect as far as @code{port-for-each} is concerned.")
19b8d12b
AW
3090#define FUNC_NAME s_scm_port_for_each
3091{
3092 SCM_VALIDATE_PROC (1, proc);
3093
3094 scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc));
3095
3096 return SCM_UNSPECIFIED;
3097}
3098#undef FUNC_NAME
3099
3100static void
3101flush_output_port (void *closure, SCM port)
3102{
3103 if (SCM_OPOUTPORTP (port))
4251ae2e 3104 scm_flush_unlocked (port);
19b8d12b
AW
3105}
3106
3107SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0,
3108 (),
3109 "Equivalent to calling @code{force-output} on\n"
3110 "all open output ports. The return value is unspecified.")
3111#define FUNC_NAME s_scm_flush_all_ports
3112{
3113 scm_c_port_for_each (&flush_output_port, NULL);
3114 return SCM_UNSPECIFIED;
3115}
3116#undef FUNC_NAME
3117
3118
0f2d19dd 3119\f
ee149d03 3120
d68fee48 3121/* Void ports. */
0f2d19dd 3122
92c2555f 3123scm_t_bits scm_tc16_void_port = 0;
0f2d19dd 3124
e81d98ec 3125static int fill_input_void_port (SCM port SCM_UNUSED)
283a1a0e 3126{
70df8af6 3127 return EOF;
283a1a0e
GH
3128}
3129
31703ab8 3130static void
e81d98ec
DH
3131write_void_port (SCM port SCM_UNUSED,
3132 const void *data SCM_UNUSED,
3133 size_t size SCM_UNUSED)
31703ab8
GH
3134{
3135}
3136
d617ee18
MV
3137static SCM
3138scm_i_void_port (long mode_bits)
0f2d19dd 3139{
2721f918
AW
3140 SCM ret;
3141
3142 ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0);
da220f27 3143
2721f918 3144 scm_port_non_buffer (SCM_PTAB_ENTRY (ret));
402788a9 3145
2721f918 3146 return ret;
0f2d19dd
JB
3147}
3148
d617ee18
MV
3149SCM
3150scm_void_port (char *mode_str)
3151{
3152 return scm_i_void_port (scm_mode_bits (mode_str));
3153}
3154
a1ec6916 3155SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0,
1bbd0b84 3156 (SCM mode),
70df8af6 3157 "Create and return a new void port. A void port acts like\n"
bb2c02f2 3158 "@file{/dev/null}. The @var{mode} argument\n"
70df8af6 3159 "specifies the input/output modes for this port: see the\n"
b380b885 3160 "documentation for @code{open-file} in @ref{File Ports}.")
1bbd0b84 3161#define FUNC_NAME s_scm_sys_make_void_port
0f2d19dd 3162{
d617ee18 3163 return scm_i_void_port (scm_i_mode_bits (mode));
0f2d19dd 3164}
1bbd0b84 3165#undef FUNC_NAME
0f2d19dd 3166
19b8d12b 3167
0f2d19dd 3168\f
19b8d12b 3169
89545eba 3170/* Initialization. */
1cc91f1b 3171
0f2d19dd
JB
3172void
3173scm_init_ports ()
0f2d19dd 3174{
840ae05d 3175 /* lseek() symbols. */
e11e83f3
MV
3176 scm_c_define ("SEEK_SET", scm_from_int (SEEK_SET));
3177 scm_c_define ("SEEK_CUR", scm_from_int (SEEK_CUR));
3178 scm_c_define ("SEEK_END", scm_from_int (SEEK_END));
840ae05d 3179
70df8af6
GH
3180 scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
3181 write_void_port);
9de87eea 3182
f39448c5
AW
3183 cur_inport_fluid = scm_make_fluid ();
3184 cur_outport_fluid = scm_make_fluid ();
3185 cur_errport_fluid = scm_make_fluid ();
3186 cur_loadport_fluid = scm_make_fluid ();
9de87eea 3187
2721f918 3188 scm_i_port_weak_set = scm_c_make_weak_set (31);
d6a6989e 3189
a0599745 3190#include "libguile/ports.x"
889975e5 3191
d6a6989e 3192 /* Use Latin-1 as the default port encoding. */
c81c2ad3
AW
3193 SCM_VARIABLE_SET (default_port_encoding_var,
3194 scm_make_fluid_with_default (SCM_BOOL_F));
889975e5 3195 scm_port_encoding_init = 1;
d6a6989e 3196
b22e94db
LC
3197 SCM_VARIABLE_SET (default_conversion_strategy_var,
3198 scm_make_fluid_with_default (sym_substitute));
889975e5 3199 scm_conversion_strategy_init = 1;
b22e94db 3200
9670f238
AW
3201 /* These bindings are used when boot-9 turns `current-input-port' et
3202 al into parameters. They are then removed from the guile module. */
3203 scm_c_define ("%current-input-port-fluid", cur_inport_fluid);
3204 scm_c_define ("%current-output-port-fluid", cur_outport_fluid);
3205 scm_c_define ("%current-error-port-fluid", cur_errport_fluid);
0f2d19dd 3206}
89e00824
ML
3207
3208/*
3209 Local Variables:
3210 c-file-style: "gnu"
3211 End:
3212*/