temporarily disable elisp exception tests
[bpt/guile.git] / libguile / ports.c
CommitLineData
e4598559 1/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2006,
bc8e6d7d 2 * 2007, 2008, 2009, 2010, 2011, 2012, 2013,
57898597 3 * 2014, 2015 Free Software Foundation, Inc.
f4bc4e59 4 *
73be1d9e 5 * This library is free software; you can redistribute it and/or
53befeb7
NJ
6 * modify it under the terms of the GNU Lesser General Public License
7 * as published by the Free Software Foundation; either version 3 of
8 * the License, or (at your option) any later version.
0f2d19dd 9 *
53befeb7
NJ
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
0f2d19dd 14 *
73be1d9e
MV
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
53befeb7
NJ
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301 USA
73be1d9e 19 */
1bbd0b84 20
1bbd0b84 21
0f2d19dd 22\f
d68fee48
JB
23/* Headers. */
24
2b829bbb
KR
25#define _LARGEFILE64_SOURCE /* ask for stat64 etc */
26
dbb605f5 27#ifdef HAVE_CONFIG_H
bd515f37
RB
28# include <config.h>
29#endif
30
0f2d19dd 31#include <stdio.h>
e6e2e95a 32#include <errno.h>
8ab3d8a0 33#include <fcntl.h> /* for chsize on mingw */
b5cb4464 34#include <assert.h>
f4bc4e59 35#include <iconv.h>
889975e5
MG
36#include <uniconv.h>
37#include <unistr.h>
38#include <striconveh.h>
e6e2e95a 39
fca43887
LC
40#include <assert.h>
41
a0599745 42#include "libguile/_scm.h"
4e047c3e 43#include "libguile/async.h"
8269ba5b 44#include "libguile/deprecation.h"
f0942910 45#include "libguile/eval.h"
8ab3d8a0 46#include "libguile/fports.h" /* direct access for seek and truncate */
9511876f 47#include "libguile/goops.h"
a0599745
MD
48#include "libguile/smob.h"
49#include "libguile/chars.h"
185e369a 50#include "libguile/dynwind.h"
0f2d19dd 51
a0599745 52#include "libguile/keywords.h"
5dbc6c06 53#include "libguile/hashtab.h"
a0599745
MD
54#include "libguile/root.h"
55#include "libguile/strings.h"
b42170a4 56#include "libguile/mallocs.h"
a0599745
MD
57#include "libguile/validate.h"
58#include "libguile/ports.h"
e4598559 59#include "libguile/ports-internal.h"
3a5fb14d 60#include "libguile/vectors.h"
2721f918 61#include "libguile/weak-set.h"
9de87eea 62#include "libguile/fluids.h"
889975e5 63#include "libguile/eq.h"
a38024ba 64#include "libguile/alist.h"
0f2d19dd 65
bd9e24b3
GH
66#ifdef HAVE_STRING_H
67#include <string.h>
68#endif
69
ec65f5da
MV
70#ifdef HAVE_IO_H
71#include <io.h>
72#endif
73
0f2d19dd 74#include <unistd.h>
0f2d19dd 75
95b88819
GH
76#ifdef HAVE_SYS_IOCTL_H
77#include <sys/ioctl.h>
78#endif
d68fee48 79
8ab3d8a0
KR
80/* Mingw (version 3.4.5, circa 2006) has ftruncate as an alias for chsize
81 already, but have this code here in case that wasn't so in past versions,
82 or perhaps to help other minimal DOS environments.
83
84 gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
85 might be possibilities if we've got other systems without ftruncate. */
86
56a3dcd4 87#if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
82893676 88#define ftruncate(fd, size) chsize (fd, size)
8ab3d8a0
KR
89#undef HAVE_FTRUNCATE
90#define HAVE_FTRUNCATE 1
82893676
MG
91#endif
92
0f2d19dd 93\f
93c4fa21
AW
94/* Port encodings are case-insensitive ASCII strings. */
95static char
96ascii_toupper (char c)
97{
98 return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
99}
100
101/* It is only necessary to use this function on encodings that come from
102 the user and have not been canonicalized yet. Encodings that are set
103 on ports or in the default encoding fluid are in upper-case, and can
104 be compared with strcmp. */
105static int
106encoding_matches (const char *enc, const char *upper)
107{
108 if (!enc)
109 enc = "ISO-8859-1";
110
111 while (*enc)
112 if (ascii_toupper (*enc++) != *upper++)
113 return 0;
114
115 return !*upper;
116}
117
118static char*
119canonicalize_encoding (const char *enc)
120{
121 char *ret;
122 int i;
123
124 if (!enc)
125 return "ISO-8859-1";
126
127 ret = scm_gc_strdup (enc, "port");
128
129 for (i = 0; ret[i]; i++)
130 {
131 if (ret[i] > 127)
132 /* Restrict to ASCII. */
133 scm_misc_error (NULL, "invalid character encoding ~s",
134 scm_list_1 (scm_from_latin1_string (enc)));
135 else
136 ret[i] = ascii_toupper (ret[i]);
137 }
138
139 return ret;
140}
141
142
143\f
d68fee48 144/* The port kind table --- a dynamically resized array of port types. */
0f2d19dd
JB
145
146
147/* scm_ptobs scm_numptob
5dbc6c06 148 * implement a dynamically resized array of ptob records.
0f2d19dd
JB
149 * Indexes into this table are used when generating type
150 * tags for smobjects (if you know a tag you can get an index and conversely).
151 */
62bd5d66
AW
152static scm_t_ptob_descriptor **scm_ptobs = NULL;
153static long scm_numptob = 0; /* Number of port types. */
154static long scm_ptobs_size = 0; /* Number of slots in the port type
155 table. */
156static scm_i_pthread_mutex_t scm_ptobs_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER;
157
158long
159scm_c_num_port_types (void)
160{
161 long ret;
162
163 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
164 ret = scm_numptob;
165 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
166
167 return ret;
168}
169
170scm_t_ptob_descriptor*
171scm_c_port_type_ref (long ptobnum)
172{
173 scm_t_ptob_descriptor *ret = NULL;
174
175 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
176
177 if (0 <= ptobnum && ptobnum < scm_numptob)
178 ret = scm_ptobs[ptobnum];
179
180 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
181
182 if (!ret)
183 scm_out_of_range ("scm_c_port_type_ref", scm_from_long (ptobnum));
184
185 return ret;
186}
187
188long
189scm_c_port_type_add_x (scm_t_ptob_descriptor *desc)
190{
191 long ret = -1;
192
193 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
194
195 if (scm_numptob + 1 < SCM_I_MAX_PORT_TYPE_COUNT)
196 {
197 if (scm_numptob == scm_ptobs_size)
198 {
199 unsigned long old_size = scm_ptobs_size;
200 scm_t_ptob_descriptor **old_ptobs = scm_ptobs;
201
202 /* Currently there are only 9 predefined port types, so one
203 resize will cover it. */
204 scm_ptobs_size = old_size + 10;
205
206 if (scm_ptobs_size >= SCM_I_MAX_PORT_TYPE_COUNT)
207 scm_ptobs_size = SCM_I_MAX_PORT_TYPE_COUNT;
208
209 scm_ptobs = scm_gc_malloc (sizeof (*scm_ptobs) * scm_ptobs_size,
210 "scm_ptobs");
211
212 memcpy (scm_ptobs, old_ptobs, sizeof (*scm_ptobs) * scm_numptob);
213 }
214
215 ret = scm_numptob++;
216 scm_ptobs[ret] = desc;
217 }
218
219 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
220
221 if (ret < 0)
222 scm_out_of_range ("scm_c_port_type_add_x", scm_from_long (scm_numptob));
223
224 return ret;
225}
0f2d19dd 226
f12733c9 227/*
f12733c9 228 * We choose to use an interface similar to the smob interface with
affc96b5 229 * fill_input and write as standard fields, passed to the port
f12733c9
MD
230 * type constructor, and optional fields set by setters.
231 */
232
70df8af6 233static void
e81d98ec 234flush_port_default (SCM port SCM_UNUSED)
70df8af6
GH
235{
236}
237
238static void
e81d98ec 239end_input_default (SCM port SCM_UNUSED, int offset SCM_UNUSED)
70df8af6
GH
240{
241}
0f2d19dd 242
92c2555f 243scm_t_bits
f12733c9 244scm_make_port_type (char *name,
affc96b5 245 int (*fill_input) (SCM port),
8aa011a1 246 void (*write) (SCM port, const void *data, size_t size))
0f2d19dd 247{
62bd5d66
AW
248 scm_t_ptob_descriptor *desc;
249 long ptobnum;
affc96b5 250
62bd5d66
AW
251 desc = scm_gc_malloc_pointerless (sizeof (*desc), "port-type");
252 memset (desc, 0, sizeof (*desc));
affc96b5 253
62bd5d66
AW
254 desc->name = name;
255 desc->print = scm_port_print;
256 desc->write = write;
257 desc->flush = flush_port_default;
258 desc->end_input = end_input_default;
259 desc->fill_input = fill_input;
affc96b5 260
62bd5d66 261 ptobnum = scm_c_port_type_add_x (desc);
affc96b5 262
62bd5d66 263 /* Make a class object if GOOPS is present. */
57898597 264 if (SCM_UNPACK (scm_i_port_class[0]) != 0)
62bd5d66
AW
265 scm_make_port_classes (ptobnum, name);
266
267 return scm_tc7_port + ptobnum * 256;
0f2d19dd
JB
268}
269
f12733c9 270void
23f2b9a3 271scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM))
f12733c9 272{
62bd5d66 273 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->mark = mark;
f12733c9
MD
274}
275
276void
23f2b9a3 277scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM))
f12733c9 278{
62bd5d66 279 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->free = free;
f12733c9
MD
280}
281
282void
23f2b9a3 283scm_set_port_print (scm_t_bits tc, int (*print) (SCM exp, SCM port,
19b8d12b 284 scm_print_state *pstate))
f12733c9 285{
62bd5d66 286 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->print = print;
f12733c9
MD
287}
288
289void
23f2b9a3 290scm_set_port_equalp (scm_t_bits tc, SCM (*equalp) (SCM, SCM))
f12733c9 291{
62bd5d66 292 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->equalp = equalp;
f12733c9
MD
293}
294
31703ab8 295void
19b8d12b 296scm_set_port_close (scm_t_bits tc, int (*close) (SCM))
31703ab8 297{
19b8d12b 298 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->close = close;
31703ab8
GH
299}
300
f12733c9 301void
19b8d12b 302scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port))
f12733c9 303{
03a2eeb0
AW
304 scm_t_ptob_descriptor *ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tc));
305 ptob->flush = flush;
306 ptob->flags |= SCM_PORT_TYPE_HAS_FLUSH;
f12733c9
MD
307}
308
309void
19b8d12b 310scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset))
f12733c9 311{
19b8d12b 312 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->end_input = end_input;
f12733c9
MD
313}
314
315void
19b8d12b 316scm_set_port_seek (scm_t_bits tc, scm_t_off (*seek) (SCM, scm_t_off, int))
f12733c9 317{
62bd5d66 318 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->seek = seek;
f12733c9
MD
319}
320
321void
f1ce9199 322scm_set_port_truncate (scm_t_bits tc, void (*truncate) (SCM, scm_t_off))
f12733c9 323{
62bd5d66 324 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->truncate = truncate;
f12733c9
MD
325}
326
327void
23f2b9a3 328scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM))
f12733c9 329{
62bd5d66 330 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting;
f12733c9
MD
331}
332
e140d85d
AW
333void
334scm_set_port_setvbuf (scm_t_bits tc, void (*setvbuf) (SCM, long, long))
335{
336 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->setvbuf = setvbuf;
f12733c9
MD
337}
338
45c0878b
MW
339static void
340scm_i_set_pending_eof (SCM port)
341{
342 SCM_PORT_GET_INTERNAL (port)->pending_eof = 1;
343}
344
345static void
346scm_i_clear_pending_eof (SCM port)
347{
348 SCM_PORT_GET_INTERNAL (port)->pending_eof = 0;
349}
350
a38024ba
MW
351SCM_DEFINE (scm_i_port_property, "%port-property", 2, 0, 0,
352 (SCM port, SCM key),
353 "Return the property of @var{port} associated with @var{key}.")
354#define FUNC_NAME s_scm_i_port_property
05d7f762 355{
79657fd3
MW
356 scm_i_pthread_mutex_t *lock;
357 SCM result;
358
a38024ba 359 SCM_VALIDATE_OPPORT (1, port);
79657fd3
MW
360 scm_c_lock_port (port, &lock);
361 result = scm_assq_ref (SCM_PORT_GET_INTERNAL (port)->alist, key);
362 if (lock)
363 scm_i_pthread_mutex_unlock (lock);
364 return result;
05d7f762 365}
a38024ba 366#undef FUNC_NAME
05d7f762 367
a38024ba
MW
368SCM_DEFINE (scm_i_set_port_property_x, "%set-port-property!", 3, 0, 0,
369 (SCM port, SCM key, SCM value),
370 "Set the property of @var{port} associated with @var{key} to @var{value}.")
371#define FUNC_NAME s_scm_i_set_port_property_x
05d7f762 372{
79657fd3 373 scm_i_pthread_mutex_t *lock;
a38024ba
MW
374 scm_t_port_internal *pti;
375
376 SCM_VALIDATE_OPPORT (1, port);
79657fd3 377 scm_c_lock_port (port, &lock);
a38024ba
MW
378 pti = SCM_PORT_GET_INTERNAL (port);
379 pti->alist = scm_assq_set_x (pti->alist, key, value);
79657fd3
MW
380 if (lock)
381 scm_i_pthread_mutex_unlock (lock);
a38024ba 382 return SCM_UNSPECIFIED;
05d7f762 383}
a38024ba 384#undef FUNC_NAME
05d7f762 385
0f2d19dd 386\f
0f2d19dd 387
d68fee48 388/* Standard ports --- current input, output, error, and more(!). */
0f2d19dd 389
34297700
AW
390static SCM cur_inport_fluid = SCM_BOOL_F;
391static SCM cur_outport_fluid = SCM_BOOL_F;
392static SCM cur_errport_fluid = SCM_BOOL_F;
0463a927 393static SCM cur_warnport_fluid = SCM_BOOL_F;
34297700 394static SCM cur_loadport_fluid = SCM_BOOL_F;
9de87eea 395
3b3b36dd 396SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
0463a927 397 (void),
e1546b65
MG
398 "Return the current input port. This is the default port used\n"
399 "by many input procedures. Initially, @code{current-input-port}\n"
400 "returns the @dfn{standard input} in Unix and C terminology.")
1bbd0b84 401#define FUNC_NAME s_scm_current_input_port
0f2d19dd 402{
34297700 403 if (scm_is_true (cur_inport_fluid))
889975e5
MG
404 return scm_fluid_ref (cur_inport_fluid);
405 else
406 return SCM_BOOL_F;
0f2d19dd 407}
1bbd0b84 408#undef FUNC_NAME
0f2d19dd 409
3b3b36dd 410SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
0463a927 411 (void),
e1546b65 412 "Return the current output port. This is the default port used\n"
9401323e 413 "by many output procedures. Initially,\n"
e1546b65
MG
414 "@code{current-output-port} returns the @dfn{standard output} in\n"
415 "Unix and C terminology.")
1bbd0b84 416#define FUNC_NAME s_scm_current_output_port
0f2d19dd 417{
34297700 418 if (scm_is_true (cur_outport_fluid))
889975e5
MG
419 return scm_fluid_ref (cur_outport_fluid);
420 else
421 return SCM_BOOL_F;
0f2d19dd 422}
1bbd0b84 423#undef FUNC_NAME
0f2d19dd 424
3b3b36dd 425SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
0463a927 426 (void),
b380b885
MD
427 "Return the port to which errors and warnings should be sent (the\n"
428 "@dfn{standard error} in Unix and C terminology).")
1bbd0b84 429#define FUNC_NAME s_scm_current_error_port
0f2d19dd 430{
34297700 431 if (scm_is_true (cur_errport_fluid))
889975e5
MG
432 return scm_fluid_ref (cur_errport_fluid);
433 else
434 return SCM_BOOL_F;
0f2d19dd 435}
1bbd0b84 436#undef FUNC_NAME
0f2d19dd 437
0463a927
AW
438SCM_DEFINE (scm_current_warning_port, "current-warning-port", 0, 0, 0,
439 (void),
440 "Return the port to which diagnostic warnings should be sent.")
441#define FUNC_NAME s_scm_current_warning_port
3972de76 442{
0463a927
AW
443 if (scm_is_true (cur_warnport_fluid))
444 return scm_fluid_ref (cur_warnport_fluid);
445 else
446 return SCM_BOOL_F;
3972de76 447}
0463a927 448#undef FUNC_NAME
3972de76 449
3b3b36dd 450SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
e1546b65 451 (),
b450f070 452 "Return the current-load-port.\n"
e1546b65 453 "The load port is used internally by @code{primitive-load}.")
1bbd0b84 454#define FUNC_NAME s_scm_current_load_port
31614d8e 455{
9de87eea 456 return scm_fluid_ref (cur_loadport_fluid);
31614d8e 457}
1bbd0b84 458#undef FUNC_NAME
31614d8e 459
3b3b36dd 460SCM_DEFINE (scm_set_current_input_port, "set-current-input-port", 1, 0, 0,
1bbd0b84 461 (SCM port),
8f85c0c6
NJ
462 "@deffnx {Scheme Procedure} set-current-output-port port\n"
463 "@deffnx {Scheme Procedure} set-current-error-port port\n"
b380b885
MD
464 "Change the ports returned by @code{current-input-port},\n"
465 "@code{current-output-port} and @code{current-error-port}, respectively,\n"
466 "so that they use the supplied @var{port} for input or output.")
1bbd0b84 467#define FUNC_NAME s_scm_set_current_input_port
0f2d19dd 468{
9de87eea 469 SCM oinp = scm_fluid_ref (cur_inport_fluid);
34d19ef6 470 SCM_VALIDATE_OPINPORT (1, port);
9de87eea 471 scm_fluid_set_x (cur_inport_fluid, port);
0f2d19dd
JB
472 return oinp;
473}
1bbd0b84 474#undef FUNC_NAME
0f2d19dd
JB
475
476
3b3b36dd 477SCM_DEFINE (scm_set_current_output_port, "set-current-output-port", 1, 0, 0,
e1546b65
MG
478 (SCM port),
479 "Set the current default output port to @var{port}.")
1bbd0b84 480#define FUNC_NAME s_scm_set_current_output_port
0f2d19dd 481{
9de87eea 482 SCM ooutp = scm_fluid_ref (cur_outport_fluid);
78446828 483 port = SCM_COERCE_OUTPORT (port);
34d19ef6 484 SCM_VALIDATE_OPOUTPORT (1, port);
9de87eea 485 scm_fluid_set_x (cur_outport_fluid, port);
0f2d19dd
JB
486 return ooutp;
487}
1bbd0b84 488#undef FUNC_NAME
0f2d19dd
JB
489
490
3b3b36dd 491SCM_DEFINE (scm_set_current_error_port, "set-current-error-port", 1, 0, 0,
e1546b65
MG
492 (SCM port),
493 "Set the current default error port to @var{port}.")
1bbd0b84 494#define FUNC_NAME s_scm_set_current_error_port
0f2d19dd 495{
9de87eea 496 SCM oerrp = scm_fluid_ref (cur_errport_fluid);
78446828 497 port = SCM_COERCE_OUTPORT (port);
34d19ef6 498 SCM_VALIDATE_OPOUTPORT (1, port);
9de87eea 499 scm_fluid_set_x (cur_errport_fluid, port);
0f2d19dd
JB
500 return oerrp;
501}
1bbd0b84 502#undef FUNC_NAME
0f2d19dd 503
3972de76
AW
504
505SCM
506scm_set_current_warning_port (SCM port)
0463a927 507#define FUNC_NAME "set-current-warning-port"
3972de76 508{
0463a927
AW
509 SCM owarnp = scm_fluid_ref (cur_warnport_fluid);
510 port = SCM_COERCE_OUTPORT (port);
511 SCM_VALIDATE_OPOUTPORT (1, port);
512 scm_fluid_set_x (cur_warnport_fluid, port);
513 return owarnp;
3972de76 514}
0463a927 515#undef FUNC_NAME
3972de76
AW
516
517
185e369a 518void
661ae7ab 519scm_dynwind_current_input_port (SCM port)
9de87eea 520#define FUNC_NAME NULL
185e369a 521{
9de87eea 522 SCM_VALIDATE_OPINPORT (1, port);
661ae7ab 523 scm_dynwind_fluid (cur_inport_fluid, port);
185e369a 524}
9de87eea 525#undef FUNC_NAME
185e369a
MV
526
527void
661ae7ab 528scm_dynwind_current_output_port (SCM port)
9de87eea 529#define FUNC_NAME NULL
185e369a 530{
9de87eea
MV
531 port = SCM_COERCE_OUTPORT (port);
532 SCM_VALIDATE_OPOUTPORT (1, port);
661ae7ab 533 scm_dynwind_fluid (cur_outport_fluid, port);
185e369a 534}
9de87eea 535#undef FUNC_NAME
185e369a
MV
536
537void
661ae7ab 538scm_dynwind_current_error_port (SCM port)
9de87eea
MV
539#define FUNC_NAME NULL
540{
541 port = SCM_COERCE_OUTPORT (port);
542 SCM_VALIDATE_OPOUTPORT (1, port);
661ae7ab 543 scm_dynwind_fluid (cur_errport_fluid, port);
9de87eea
MV
544}
545#undef FUNC_NAME
546
547void
661ae7ab 548scm_i_dynwind_current_load_port (SCM port)
185e369a 549{
661ae7ab 550 scm_dynwind_fluid (cur_loadport_fluid, port);
185e369a
MV
551}
552
19b8d12b 553
0f2d19dd 554\f
0f2d19dd 555
19b8d12b
AW
556/* Retrieving a port's mode. */
557
558/* Return the flags that characterize a port based on the mode
559 * string used to open a file for that port.
560 *
561 * See PORT FLAGS in scm.h
5dbc6c06 562 */
19b8d12b
AW
563
564static long
565scm_i_mode_bits_n (SCM modes)
566{
567 return (SCM_OPN
568 | (scm_i_string_contains_char (modes, 'r')
569 || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
570 | (scm_i_string_contains_char (modes, 'w')
571 || scm_i_string_contains_char (modes, 'a')
572 || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
573 | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
574 | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
575}
576
577long
578scm_mode_bits (char *modes)
579{
8ebd06c6
AW
580 /* Valid characters are rw+a0l. So, use latin1. */
581 return scm_i_mode_bits (scm_from_latin1_string (modes));
19b8d12b
AW
582}
583
584long
585scm_i_mode_bits (SCM modes)
586{
587 long bits;
588
589 if (!scm_is_string (modes))
590 scm_wrong_type_arg_msg (NULL, 0, modes, "string");
591
592 bits = scm_i_mode_bits_n (modes);
593 scm_remember_upto_here_1 (modes);
594 return bits;
595}
596
597/* Return the mode flags from an open port.
598 * Some modes such as "append" are only used when opening
599 * a file and are not returned here. */
600
601SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
602 (SCM port),
603 "Return the port modes associated with the open port @var{port}.\n"
604 "These will not necessarily be identical to the modes used when\n"
605 "the port was opened, since modes such as \"append\" which are\n"
606 "used only during port creation are not retained.")
607#define FUNC_NAME s_scm_port_mode
608{
609 char modes[4];
610 modes[0] = '\0';
611
612 port = SCM_COERCE_OUTPORT (port);
613 SCM_VALIDATE_OPPORT (1, port);
614 if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
615 if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
616 strcpy (modes, "r+");
617 else
618 strcpy (modes, "r");
619 }
620 else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
621 strcpy (modes, "w");
622 if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
623 strcat (modes, "0");
624
625 return scm_from_latin1_string (modes);
626}
627#undef FUNC_NAME
628
19b8d12b
AW
629\f
630
631/* The port table --- a weak set of all ports.
632
633 We need a global registry of ports to flush them all at exit, and to
634 get all the ports matching a file descriptor. */
2721f918 635SCM scm_i_port_weak_set;
b9ad392e 636
19b8d12b 637
651a0735 638\f
651a0735 639
19b8d12b 640/* Port finalization. */
1cc91f1b 641
5a771d5f
AW
642struct do_free_data
643{
644 scm_t_ptob_descriptor *ptob;
645 SCM port;
646};
647
648static SCM
649do_free (void *body_data)
650{
651 struct do_free_data *data = body_data;
652
653 /* `close' is for explicit `close-port' by user. `free' is for this
654 purpose: ports collected by the GC. */
655 data->ptob->free (data->port);
656
657 return SCM_BOOL_T;
658}
659
651a0735
LC
660/* Finalize the object (a port) pointed to by PTR. */
661static void
6922d92f 662finalize_port (void *ptr, void *data)
651a0735 663{
21041372 664 SCM port = SCM_PACK_POINTER (ptr);
651a0735
LC
665
666 if (!SCM_PORTP (port))
667 abort ();
668
669 if (SCM_OPENP (port))
670 {
3753e227 671 struct do_free_data data;
651a0735 672
3753e227 673 SCM_CLR_PORT_OPEN_FLAG (port);
651a0735 674
3753e227
AW
675 data.ptob = SCM_PORT_DESCRIPTOR (port);
676 data.port = port;
5a771d5f 677
3753e227
AW
678 scm_internal_catch (SCM_BOOL_T, do_free, &data,
679 scm_handle_by_message_noexit, NULL);
5a771d5f 680
3753e227 681 scm_gc_ports_collected++;
651a0735
LC
682 }
683}
684
685
651a0735
LC
686\f
687
da220f27 688SCM
2721f918
AW
689scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
690 const char *encoding,
691 scm_t_string_failed_conversion_handler handler,
692 scm_t_bits stream)
0f2d19dd 693{
2721f918
AW
694 SCM ret;
695 scm_t_port *entry;
f6f4feb0 696 scm_t_port_internal *pti;
62bd5d66 697 scm_t_ptob_descriptor *ptob;
2721f918 698
f6f4feb0
MW
699 entry = scm_gc_typed_calloc (scm_t_port);
700 pti = scm_gc_typed_calloc (scm_t_port_internal);
62bd5d66
AW
701 ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag));
702
703 ret = scm_words (tag | mode_bits, 3);
704 SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry);
705 SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob);
5f16b897 706
92c0ebac
AW
707 entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock");
708 scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive);
30b126d2 709
f6f4feb0 710 entry->internal = pti;
840ae05d 711 entry->file_name = SCM_BOOL_F;
61e452ba 712 entry->rw_active = SCM_PORT_NEITHER;
2721f918
AW
713 entry->port = ret;
714 entry->stream = stream;
93c4fa21
AW
715
716 if (encoding_matches (encoding, "UTF-8"))
717 {
f6f4feb0 718 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
93c4fa21
AW
719 entry->encoding = "UTF-8";
720 }
721 else if (encoding_matches (encoding, "ISO-8859-1"))
722 {
f6f4feb0 723 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
93c4fa21
AW
724 entry->encoding = "ISO-8859-1";
725 }
6c98257f 726 else
93c4fa21 727 {
f6f4feb0 728 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
93c4fa21
AW
729 entry->encoding = canonicalize_encoding (encoding);
730 }
731
2721f918 732 entry->ilseq_handler = handler;
f6f4feb0 733 pti->iconv_descriptors = NULL;
0f2d19dd 734
f6f4feb0
MW
735 pti->at_stream_start_for_bom_read = 1;
736 pti->at_stream_start_for_bom_write = 1;
f4bc4e59 737
f6f4feb0
MW
738 pti->pending_eof = 0;
739 pti->alist = SCM_EOL;
3e05fc04 740
6978c673
AW
741 if (SCM_PORT_DESCRIPTOR (ret)->free)
742 scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL);
743
03a2eeb0
AW
744 if (SCM_PORT_DESCRIPTOR (ret)->flags & SCM_PORT_TYPE_HAS_FLUSH)
745 scm_weak_set_add_x (scm_i_port_weak_set, ret);
651a0735 746
2721f918
AW
747 return ret;
748}
749
750SCM
751scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
752{
753 return scm_c_make_port_with_encoding (tag, mode_bits,
754 scm_i_default_port_encoding (),
0dd7c540 755 scm_i_default_port_conversion_handler (),
2721f918
AW
756 stream);
757}
758
759SCM
760scm_new_port_table_entry (scm_t_bits tag)
761{
762 return scm_c_make_port (tag, 0, 0);
0f2d19dd
JB
763}
764
d68fee48 765\f
d68fee48 766
19b8d12b 767/* Predicates. */
1cc91f1b 768
19b8d12b
AW
769SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
770 (SCM x),
771 "Return a boolean indicating whether @var{x} is a port.\n"
772 "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
773 "@var{x}))}.")
774#define FUNC_NAME s_scm_port_p
0f2d19dd 775{
19b8d12b 776 return scm_from_bool (SCM_PORTP (x));
0f2d19dd 777}
19b8d12b 778#undef FUNC_NAME
0f2d19dd 779
19b8d12b
AW
780SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
781 (SCM x),
782 "Return @code{#t} if @var{x} is an input port, otherwise return\n"
783 "@code{#f}. Any object satisfying this predicate also satisfies\n"
784 "@code{port?}.")
785#define FUNC_NAME s_scm_input_port_p
0f2d19dd 786{
19b8d12b 787 return scm_from_bool (SCM_INPUT_PORT_P (x));
0f2d19dd 788}
1bbd0b84 789#undef FUNC_NAME
0f2d19dd 790
19b8d12b
AW
791SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
792 (SCM x),
793 "Return @code{#t} if @var{x} is an output port, otherwise return\n"
794 "@code{#f}. Any object satisfying this predicate also satisfies\n"
795 "@code{port?}.")
796#define FUNC_NAME s_scm_output_port_p
0f2d19dd 797{
19b8d12b
AW
798 x = SCM_COERCE_OUTPORT (x);
799 return scm_from_bool (SCM_OUTPUT_PORT_P (x));
0f2d19dd 800}
1bbd0b84 801#undef FUNC_NAME
0f2d19dd 802
19b8d12b
AW
803SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
804 (SCM port),
805 "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
806 "open.")
807#define FUNC_NAME s_scm_port_closed_p
d617ee18 808{
19b8d12b
AW
809 SCM_VALIDATE_PORT (1, port);
810 return scm_from_bool (!SCM_OPPORTP (port));
d617ee18 811}
19b8d12b 812#undef FUNC_NAME
eadd48de 813
19b8d12b
AW
814SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
815 (SCM x),
816 "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
817 "return @code{#f}.")
818#define FUNC_NAME s_scm_eof_object_p
eadd48de 819{
19b8d12b 820 return scm_from_bool (SCM_EOF_OBJECT_P (x));
eadd48de 821}
1bbd0b84 822#undef FUNC_NAME
eadd48de
GH
823
824
d68fee48 825\f
19b8d12b 826
d68fee48
JB
827/* Closing ports. */
828
03a2eeb0
AW
829static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
830
0f2d19dd
JB
831/* scm_close_port
832 * Call the close operation on a port object.
eadd48de 833 * see also scm_close.
0f2d19dd 834 */
3b3b36dd 835SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
1bbd0b84 836 (SCM port),
1e6808ea
MG
837 "Close the specified port object. Return @code{#t} if it\n"
838 "successfully closes a port or @code{#f} if it was already\n"
839 "closed. An exception may be raised if an error occurs, for\n"
840 "example when flushing buffered output. See also @ref{Ports and\n"
841 "File Descriptors, close}, for a procedure which can close file\n"
842 "descriptors.")
1bbd0b84 843#define FUNC_NAME s_scm_close_port
0f2d19dd 844{
f6f4feb0 845 scm_t_port_internal *pti;
eadd48de
GH
846 int rv;
847
78446828
MV
848 port = SCM_COERCE_OUTPORT (port);
849
7a754ca6 850 SCM_VALIDATE_PORT (1, port);
0f2d19dd 851 if (SCM_CLOSEDP (port))
eadd48de 852 return SCM_BOOL_F;
03a2eeb0 853
f6f4feb0 854 pti = SCM_PORT_GET_INTERNAL (port);
5a771d5f 855 SCM_CLR_PORT_OPEN_FLAG (port);
03a2eeb0
AW
856
857 if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH)
858 scm_weak_set_remove_x (scm_i_port_weak_set, port);
859
5a771d5f
AW
860 if (SCM_PORT_DESCRIPTOR (port)->close)
861 /* Note! This may throw an exception. Anything after this point
862 should be resilient to non-local exits. */
863 rv = SCM_PORT_DESCRIPTOR (port)->close (port);
864 else
865 rv = 0;
03a2eeb0 866
f6f4feb0 867 if (pti->iconv_descriptors)
03a2eeb0 868 {
5a771d5f
AW
869 /* If we don't get here, the iconv_descriptors finalizer will
870 clean up. */
f6f4feb0
MW
871 close_iconv_descriptors (pti->iconv_descriptors);
872 pti->iconv_descriptors = NULL;
03a2eeb0
AW
873 }
874
7888309b 875 return scm_from_bool (rv >= 0);
7a754ca6
MD
876}
877#undef FUNC_NAME
878
879SCM_DEFINE (scm_close_input_port, "close-input-port", 1, 0, 0,
880 (SCM port),
881 "Close the specified input port object. The routine has no effect if\n"
882 "the file has already been closed. An exception may be raised if an\n"
883 "error occurs. The value returned is unspecified.\n\n"
884 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
885 "which can close file descriptors.")
886#define FUNC_NAME s_scm_close_input_port
887{
888 SCM_VALIDATE_INPUT_PORT (1, port);
889 scm_close_port (port);
890 return SCM_UNSPECIFIED;
891}
892#undef FUNC_NAME
893
894SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0,
895 (SCM port),
896 "Close the specified output port object. The routine has no effect if\n"
897 "the file has already been closed. An exception may be raised if an\n"
898 "error occurs. The value returned is unspecified.\n\n"
899 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
900 "which can close file descriptors.")
901#define FUNC_NAME s_scm_close_output_port
902{
903 port = SCM_COERCE_OUTPORT (port);
904 SCM_VALIDATE_OUTPUT_PORT (1, port);
905 scm_close_port (port);
906 return SCM_UNSPECIFIED;
0f2d19dd 907}
1bbd0b84 908#undef FUNC_NAME
0f2d19dd 909
2721f918 910
19b8d12b 911\f
2721f918 912
19b8d12b
AW
913/* Encoding characters to byte streams, and decoding byte streams to
914 characters. */
5dbc6c06 915
19b8d12b
AW
916/* A fluid specifying the default encoding for newly created ports. If it is
917 a string, that is the encoding. If it is #f, it is in the "native"
918 (Latin-1) encoding. */
919SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
920
921static int scm_port_encoding_init = 0;
922
923/* Use ENCODING as the default encoding for future ports. */
c536b4b3 924void
19b8d12b 925scm_i_set_default_port_encoding (const char *encoding)
c2ca4493 926{
19b8d12b
AW
927 if (!scm_port_encoding_init
928 || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
929 scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
930 SCM_EOL);
fdfe6305 931
449ca87b 932 if (encoding_matches (encoding, "ISO-8859-1"))
19b8d12b
AW
933 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
934 else
93c4fa21
AW
935 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
936 scm_from_latin1_string (canonicalize_encoding (encoding)));
2721f918 937}
3a5fb14d 938
93c4fa21 939/* Return the name of the default encoding for newly created ports. */
19b8d12b
AW
940const char *
941scm_i_default_port_encoding (void)
2721f918 942{
19b8d12b 943 if (!scm_port_encoding_init)
93c4fa21 944 return "ISO-8859-1";
19b8d12b 945 else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
93c4fa21 946 return "ISO-8859-1";
19b8d12b
AW
947 else
948 {
949 SCM encoding;
950
951 encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
952 if (!scm_is_string (encoding))
93c4fa21 953 return "ISO-8859-1";
19b8d12b
AW
954 else
955 return scm_i_string_chars (encoding);
956 }
c536b4b3 957}
fdfe6305 958
0dd7c540
AW
959/* A fluid specifying the default conversion handler for newly created
960 ports. Its value should be one of the symbols below. */
961SCM_VARIABLE (default_conversion_strategy_var,
962 "%default-port-conversion-strategy");
1b3daef0 963
0dd7c540
AW
964/* Whether the above fluid is initialized. */
965static int scm_conversion_strategy_init = 0;
c2ca4493 966
0dd7c540
AW
967/* The possible conversion strategies. */
968SCM_SYMBOL (sym_error, "error");
969SCM_SYMBOL (sym_substitute, "substitute");
970SCM_SYMBOL (sym_escape, "escape");
c536b4b3 971
0dd7c540
AW
972/* Return the default failed encoding conversion policy for new created
973 ports. */
974scm_t_string_failed_conversion_handler
975scm_i_default_port_conversion_handler (void)
976{
977 scm_t_string_failed_conversion_handler handler;
d68fee48 978
0dd7c540
AW
979 if (!scm_conversion_strategy_init
980 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
981 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
982 else
983 {
984 SCM fluid, value;
0f2d19dd 985
0dd7c540
AW
986 fluid = SCM_VARIABLE_REF (default_conversion_strategy_var);
987 value = scm_fluid_ref (fluid);
0f2d19dd 988
0dd7c540
AW
989 if (scm_is_eq (sym_substitute, value))
990 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
991 else if (scm_is_eq (sym_escape, value))
992 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
993 else
994 /* Default to 'error also when the fluid's value is not one of
995 the valid symbols. */
996 handler = SCM_FAILED_CONVERSION_ERROR;
997 }
eb5c0a2a 998
0dd7c540 999 return handler;
60d0643d 1000}
0f2d19dd 1001
0dd7c540
AW
1002/* Use HANDLER as the default conversion strategy for future ports. */
1003void
1004scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler
1005 handler)
0f2d19dd 1006{
0dd7c540 1007 SCM strategy;
0f2d19dd 1008
0dd7c540
AW
1009 if (!scm_conversion_strategy_init
1010 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
1011 scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
1012 SCM_EOL);
1013
1014 switch (handler)
78446828 1015 {
0dd7c540
AW
1016 case SCM_FAILED_CONVERSION_ERROR:
1017 strategy = sym_error;
1018 break;
1019
1020 case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
1021 strategy = sym_escape;
1022 break;
1023
1024 case SCM_FAILED_CONVERSION_QUESTION_MARK:
1025 strategy = sym_substitute;
1026 break;
1027
1028 default:
1029 abort ();
78446828 1030 }
0f2d19dd 1031
0dd7c540
AW
1032 scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var),
1033 strategy);
1034}
5dbc6c06 1035
f6f4feb0
MW
1036static void
1037scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port);
1038
1039/* If the next LEN bytes from PORT are equal to those in BYTES, then
1040 return 1, else return 0. Leave the port position unchanged. */
1041static int
1042looking_at_bytes (SCM port, const unsigned char *bytes, int len)
0f2d19dd 1043{
f6f4feb0
MW
1044 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1045 int i = 0;
1046
1047 while (i < len && scm_peek_byte_or_eof_unlocked (port) == bytes[i])
78446828 1048 {
f6f4feb0
MW
1049 pt->read_pos++;
1050 i++;
78446828 1051 }
f6f4feb0
MW
1052 scm_i_unget_bytes_unlocked (bytes, i, port);
1053 return (i == len);
0f2d19dd
JB
1054}
1055
f6f4feb0
MW
1056static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF};
1057static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
1058static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
1059static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
1060static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
5dbc6c06 1061
f6f4feb0
MW
1062/* Decide what byte order to use for a UTF-16 port. Return "UTF-16BE"
1063 or "UTF-16LE". MODE must be either SCM_PORT_READ or SCM_PORT_WRITE,
1064 and specifies which operation is about to be done. The MODE
1065 determines how we will decide the byte order. We deliberately avoid
1066 reading from the port unless the user is about to do so. If the user
1067 is about to read, then we look for a BOM, and if present, we use it
1068 to determine the byte order. Otherwise we choose big endian, as
1069 recommended by the Unicode Standard. Note that the BOM (if any) is
1070 not consumed here. */
1071static const char *
1072decide_utf16_encoding (SCM port, scm_t_port_rw_active mode)
5dbc6c06 1073{
f6f4feb0
MW
1074 if (mode == SCM_PORT_READ
1075 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1076 && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom))
1077 return "UTF-16LE";
1078 else
1079 return "UTF-16BE";
5dbc6c06
HWN
1080}
1081
f6f4feb0
MW
1082/* Decide what byte order to use for a UTF-32 port. Return "UTF-32BE"
1083 or "UTF-32LE". See the comment above 'decide_utf16_encoding' for
1084 details. */
1085static const char *
1086decide_utf32_encoding (SCM port, scm_t_port_rw_active mode)
89ea5b7c 1087{
f6f4feb0
MW
1088 if (mode == SCM_PORT_READ
1089 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1090 && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom))
1091 return "UTF-32LE";
1092 else
1093 return "UTF-32BE";
89ea5b7c 1094}
0f2d19dd 1095
6c98257f 1096static void
2aed2667 1097finalize_iconv_descriptors (void *ptr, void *data)
c536b4b3 1098{
6c98257f
AW
1099 close_iconv_descriptors (ptr);
1100}
c2ca4493 1101
6c98257f
AW
1102static scm_t_iconv_descriptors *
1103open_iconv_descriptors (const char *encoding, int reading, int writing)
1104{
1105 scm_t_iconv_descriptors *id;
1106 iconv_t input_cd, output_cd;
8ebd06c6 1107 size_t i;
c536b4b3 1108
6c98257f
AW
1109 input_cd = (iconv_t) -1;
1110 output_cd = (iconv_t) -1;
d68fee48 1111
8ebd06c6
AW
1112 for (i = 0; encoding[i]; i++)
1113 if (encoding[i] > 127)
1114 goto invalid_encoding;
1115
6c98257f
AW
1116 if (reading)
1117 {
1118 /* Open an input iconv conversion descriptor, from ENCODING
1119 to UTF-8. We choose UTF-8, not UTF-32, because iconv
1120 implementations can typically convert from anything to
1121 UTF-8, but not to UTF-32 (see
1122 <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
1123
1124 /* Assume opening an iconv descriptor causes about 16 KB of
1125 allocation. */
1126 scm_gc_register_allocation (16 * 1024);
1127
1128 input_cd = iconv_open ("UTF-8", encoding);
1129 if (input_cd == (iconv_t) -1)
1130 goto invalid_encoding;
1131 }
0f2d19dd 1132
6c98257f 1133 if (writing)
19b8d12b 1134 {
6c98257f
AW
1135 /* Assume opening an iconv descriptor causes about 16 KB of
1136 allocation. */
1137 scm_gc_register_allocation (16 * 1024);
0f2d19dd 1138
6c98257f
AW
1139 output_cd = iconv_open (encoding, "UTF-8");
1140 if (output_cd == (iconv_t) -1)
1141 {
1142 if (input_cd != (iconv_t) -1)
1143 iconv_close (input_cd);
1144 goto invalid_encoding;
1145 }
19b8d12b 1146 }
eb5c0a2a 1147
6c98257f
AW
1148 id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
1149 id->input_cd = input_cd;
1150 id->output_cd = output_cd;
1151
6978c673
AW
1152 /* Register a finalizer to close the descriptors. */
1153 scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
19b8d12b 1154
6c98257f 1155 return id;
19b8d12b
AW
1156
1157 invalid_encoding:
1158 {
1159 SCM err;
8ebd06c6 1160 err = scm_from_latin1_string (encoding);
6c98257f 1161 scm_misc_error ("open_iconv_descriptors",
19b8d12b
AW
1162 "invalid or unknown character encoding ~s",
1163 scm_list_1 (err));
1164 }
0f2d19dd
JB
1165}
1166
6c98257f
AW
1167static void
1168close_iconv_descriptors (scm_t_iconv_descriptors *id)
1169{
1170 if (id->input_cd != (iconv_t) -1)
1171 iconv_close (id->input_cd);
1172 if (id->output_cd != (iconv_t) -1)
1173 iconv_close (id->output_cd);
1174 id->input_cd = (void *) -1;
1175 id->output_cd = (void *) -1;
1176}
1177
1178scm_t_iconv_descriptors *
f6f4feb0 1179scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode)
6c98257f 1180{
f6f4feb0 1181 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
6c98257f 1182
f6f4feb0 1183 assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
6c98257f 1184
f6f4feb0 1185 if (!pti->iconv_descriptors)
6c98257f 1186 {
f6f4feb0
MW
1187 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1188 const char *precise_encoding;
7b292a9d 1189
6c98257f
AW
1190 if (!pt->encoding)
1191 pt->encoding = "ISO-8859-1";
7be1705d 1192
f6f4feb0
MW
1193 /* If the specified encoding is UTF-16 or UTF-32, then make
1194 that more precise by deciding what byte order to use. */
1195 if (strcmp (pt->encoding, "UTF-16") == 0)
1196 precise_encoding = decide_utf16_encoding (port, mode);
1197 else if (strcmp (pt->encoding, "UTF-32") == 0)
1198 precise_encoding = decide_utf32_encoding (port, mode);
1199 else
1200 precise_encoding = pt->encoding;
7b292a9d 1201
f6f4feb0
MW
1202 pti->iconv_descriptors =
1203 open_iconv_descriptors (precise_encoding,
6c98257f
AW
1204 SCM_INPUT_PORT_P (port),
1205 SCM_OUTPUT_PORT_P (port));
1206 }
1207
f6f4feb0 1208 return pti->iconv_descriptors;
6c98257f
AW
1209}
1210
8ebd06c6 1211/* The name of the encoding is itself encoded in ASCII. */
6c98257f
AW
1212void
1213scm_i_set_port_encoding_x (SCM port, const char *encoding)
1214{
1215 scm_t_port *pt;
f6f4feb0 1216 scm_t_port_internal *pti;
6c98257f
AW
1217 scm_t_iconv_descriptors *prev;
1218
1219 /* Set the character encoding for this port. */
1220 pt = SCM_PTAB_ENTRY (port);
f6f4feb0
MW
1221 pti = SCM_PORT_GET_INTERNAL (port);
1222 prev = pti->iconv_descriptors;
7b292a9d 1223
f6f4feb0
MW
1224 /* In order to handle cases where the encoding changes mid-stream
1225 (e.g. within an HTTP stream, or within a file that is composed of
1226 segments with different encodings), we consider this to be "stream
1227 start" for purposes of BOM handling, regardless of our actual file
1228 position. */
1229 pti->at_stream_start_for_bom_read = 1;
1230 pti->at_stream_start_for_bom_write = 1;
6c98257f 1231
93c4fa21 1232 if (encoding_matches (encoding, "UTF-8"))
6c98257f
AW
1233 {
1234 pt->encoding = "UTF-8";
f6f4feb0 1235 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
6c98257f 1236 }
93c4fa21 1237 else if (encoding_matches (encoding, "ISO-8859-1"))
79eb47ea
AW
1238 {
1239 pt->encoding = "ISO-8859-1";
f6f4feb0 1240 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
79eb47ea 1241 }
6c98257f
AW
1242 else
1243 {
f6f4feb0
MW
1244 pt->encoding = canonicalize_encoding (encoding);
1245 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
6c98257f
AW
1246 }
1247
f6f4feb0 1248 pti->iconv_descriptors = NULL;
6c98257f
AW
1249 if (prev)
1250 close_iconv_descriptors (prev);
1251}
1252
19b8d12b
AW
1253SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
1254 (SCM port),
1255 "Returns, as a string, the character encoding that @var{port}\n"
1256 "uses to interpret its input and output.\n")
1257#define FUNC_NAME s_scm_port_encoding
0f2d19dd 1258{
19b8d12b
AW
1259 SCM_VALIDATE_PORT (1, port);
1260
93c4fa21 1261 return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding);
0f2d19dd 1262}
1bbd0b84 1263#undef FUNC_NAME
0f2d19dd 1264
19b8d12b
AW
1265SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
1266 (SCM port, SCM enc),
1267 "Sets the character encoding that will be used to interpret all\n"
1268 "port I/O. New ports are created with the encoding\n"
1269 "appropriate for the current locale if @code{setlocale} has \n"
1270 "been called or ISO-8859-1 otherwise\n"
1271 "and this procedure can be used to modify that encoding.\n")
1272#define FUNC_NAME s_scm_set_port_encoding_x
5dbc6c06 1273{
19b8d12b 1274 char *enc_str;
5dbc6c06 1275
19b8d12b
AW
1276 SCM_VALIDATE_PORT (1, port);
1277 SCM_VALIDATE_STRING (2, enc);
0f2d19dd 1278
8ebd06c6 1279 enc_str = scm_to_latin1_string (enc);
19b8d12b
AW
1280 scm_i_set_port_encoding_x (port, enc_str);
1281 free (enc_str);
1282
1283 return SCM_UNSPECIFIED;
0f2d19dd 1284}
1bbd0b84 1285#undef FUNC_NAME
0f2d19dd 1286
19b8d12b
AW
1287SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
1288 1, 0, 0, (SCM port),
1289 "Returns the behavior of the port when handling a character that\n"
1290 "is not representable in the port's current encoding.\n"
1291 "It returns the symbol @code{error} if unrepresentable characters\n"
1292 "should cause exceptions, @code{substitute} if the port should\n"
1293 "try to replace unrepresentable characters with question marks or\n"
1294 "approximate characters, or @code{escape} if unrepresentable\n"
1295 "characters should be converted to string escapes.\n"
1296 "\n"
1297 "If @var{port} is @code{#f}, then the current default behavior\n"
1298 "will be returned. New ports will have this default behavior\n"
1299 "when they are created.\n")
1300#define FUNC_NAME s_scm_port_conversion_strategy
889975e5 1301{
19b8d12b 1302 scm_t_string_failed_conversion_handler h;
7b292a9d 1303
0dd7c540
AW
1304 if (scm_is_false (port))
1305 h = scm_i_default_port_conversion_handler ();
1306 else
7b292a9d 1307 {
0dd7c540
AW
1308 scm_t_port *pt;
1309
19b8d12b 1310 SCM_VALIDATE_OPPORT (1, port);
0dd7c540
AW
1311 pt = SCM_PTAB_ENTRY (port);
1312
1313 h = pt->ilseq_handler;
7b292a9d
LC
1314 }
1315
19b8d12b
AW
1316 if (h == SCM_FAILED_CONVERSION_ERROR)
1317 return scm_from_latin1_symbol ("error");
1318 else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
1319 return scm_from_latin1_symbol ("substitute");
1320 else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1321 return scm_from_latin1_symbol ("escape");
1322 else
1323 abort ();
7b292a9d 1324
19b8d12b
AW
1325 /* Never gets here. */
1326 return SCM_UNDEFINED;
1327}
1328#undef FUNC_NAME
7b292a9d 1329
19b8d12b
AW
1330SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
1331 2, 0, 0,
1332 (SCM port, SCM sym),
1333 "Sets the behavior of the interpreter when outputting a character\n"
1334 "that is not representable in the port's current encoding.\n"
1335 "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
1336 "@code{'escape}. If it is @code{'error}, an error will be thrown\n"
1337 "when an unconvertible character is encountered. If it is\n"
1338 "@code{'substitute}, then unconvertible characters will \n"
1339 "be replaced with approximate characters, or with question marks\n"
1340 "if no approximately correct character is available.\n"
1341 "If it is @code{'escape},\n"
1342 "it will appear as a hex escape when output.\n"
1343 "\n"
1344 "If @var{port} is an open port, the conversion error behavior\n"
1345 "is set for that port. If it is @code{#f}, it is set as the\n"
1346 "default behavior for any future ports that get created in\n"
1347 "this thread.\n")
1348#define FUNC_NAME s_scm_set_port_conversion_strategy_x
1349{
0dd7c540 1350 scm_t_string_failed_conversion_handler handler;
7b292a9d 1351
0dd7c540
AW
1352 if (scm_is_eq (sym, sym_error))
1353 handler = SCM_FAILED_CONVERSION_ERROR;
1354 else if (scm_is_eq (sym, sym_substitute))
1355 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
1356 else if (scm_is_eq (sym, sym_escape))
1357 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
1358 else
1359 SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
7b292a9d 1360
0dd7c540
AW
1361 if (scm_is_false (port))
1362 scm_i_set_default_port_conversion_handler (handler);
1363 else
19b8d12b 1364 {
0dd7c540
AW
1365 SCM_VALIDATE_OPPORT (1, port);
1366 SCM_PTAB_ENTRY (port)->ilseq_handler = handler;
19b8d12b 1367 }
7b292a9d 1368
19b8d12b
AW
1369 return SCM_UNSPECIFIED;
1370}
1371#undef FUNC_NAME
7be1705d 1372
7b292a9d 1373
19b8d12b 1374\f
7b292a9d 1375
14dcb5cc
AW
1376/* The port lock. */
1377
1378static void
92c0ebac 1379lock_port (void *mutex)
14dcb5cc 1380{
789dd40b 1381 scm_i_pthread_mutex_lock ((scm_i_pthread_mutex_t *) mutex);
14dcb5cc
AW
1382}
1383
1384static void
92c0ebac 1385unlock_port (void *mutex)
14dcb5cc 1386{
789dd40b 1387 scm_i_pthread_mutex_unlock ((scm_i_pthread_mutex_t *) mutex);
14dcb5cc
AW
1388}
1389
1390void
1391scm_dynwind_lock_port (SCM port)
92c0ebac 1392#define FUNC_NAME "dynwind-lock-port"
14dcb5cc 1393{
92c0ebac
AW
1394 scm_i_pthread_mutex_t *lock;
1395 SCM_VALIDATE_OPPORT (SCM_ARG1, port);
1396 scm_c_lock_port (port, &lock);
1397 if (lock)
1398 {
1399 scm_dynwind_unwind_handler (unlock_port, lock, SCM_F_WIND_EXPLICITLY);
1400 scm_dynwind_rewind_handler (lock_port, lock, 0);
1401 }
14dcb5cc 1402}
92c0ebac 1403#undef FUNC_NAME
14dcb5cc
AW
1404
1405
1406\f
1407
19b8d12b 1408/* Input. */
7be1705d 1409
0d959103
AW
1410int
1411scm_get_byte_or_eof (SCM port)
1412{
92c0ebac 1413 scm_i_pthread_mutex_t *lock;
0d959103
AW
1414 int ret;
1415
92c0ebac 1416 scm_c_lock_port (port, &lock);
0d959103 1417 ret = scm_get_byte_or_eof_unlocked (port);
92c0ebac
AW
1418 if (lock)
1419 scm_i_pthread_mutex_unlock (lock);
0d959103
AW
1420
1421 return ret;
1422}
1423
1424int
1425scm_peek_byte_or_eof (SCM port)
1426{
92c0ebac 1427 scm_i_pthread_mutex_t *lock;
0d959103
AW
1428 int ret;
1429
92c0ebac 1430 scm_c_lock_port (port, &lock);
0d959103 1431 ret = scm_peek_byte_or_eof_unlocked (port);
92c0ebac
AW
1432 if (lock)
1433 scm_i_pthread_mutex_unlock (lock);
0d959103
AW
1434
1435 return ret;
1436}
1437
19b8d12b
AW
1438/* scm_c_read
1439 *
1440 * Used by an application to read arbitrary number of bytes from an
1441 * SCM port. Same semantics as libc read, except that scm_c_read only
1442 * returns less than SIZE bytes if at end-of-file.
1443 *
1444 * Warning: Doesn't update port line and column counts! */
7b292a9d 1445
19b8d12b
AW
1446/* This structure, and the following swap_buffer function, are used
1447 for temporarily swapping a port's own read buffer, and the buffer
1448 that the caller of scm_c_read provides. */
1449struct port_and_swap_buffer
1450{
1451 scm_t_port *pt;
1452 unsigned char *buffer;
1453 size_t size;
1454};
7b292a9d 1455
19b8d12b
AW
1456static void
1457swap_buffer (void *data)
1458{
1459 struct port_and_swap_buffer *psb = (struct port_and_swap_buffer *) data;
1460 unsigned char *old_buf = psb->pt->read_buf;
1461 size_t old_size = psb->pt->read_buf_size;
7be1705d 1462
19b8d12b
AW
1463 /* Make the port use (buffer, size) from the struct. */
1464 psb->pt->read_pos = psb->pt->read_buf = psb->pt->read_end = psb->buffer;
1465 psb->pt->read_buf_size = psb->size;
7b292a9d 1466
19b8d12b
AW
1467 /* Save the port's old (buffer, size) in the struct. */
1468 psb->buffer = old_buf;
1469 psb->size = old_size;
7b292a9d
LC
1470}
1471
f6f4feb0
MW
1472static int scm_i_fill_input_unlocked (SCM port);
1473
19b8d12b 1474size_t
be632904 1475scm_c_read_unlocked (SCM port, void *buffer, size_t size)
19b8d12b 1476#define FUNC_NAME "scm_c_read"
7b292a9d
LC
1477{
1478 scm_t_port *pt;
f6f4feb0 1479 scm_t_port_internal *pti;
19b8d12b
AW
1480 size_t n_read = 0, n_available;
1481 struct port_and_swap_buffer psb;
889975e5 1482
19b8d12b 1483 SCM_VALIDATE_OPINPORT (1, port);
889975e5 1484
19b8d12b 1485 pt = SCM_PTAB_ENTRY (port);
f6f4feb0 1486 pti = SCM_PORT_GET_INTERNAL (port);
19b8d12b
AW
1487 if (pt->rw_active == SCM_PORT_WRITE)
1488 SCM_PORT_DESCRIPTOR (port)->flush (port);
889975e5 1489
19b8d12b
AW
1490 if (pt->rw_random)
1491 pt->rw_active = SCM_PORT_READ;
889975e5 1492
19b8d12b
AW
1493 /* Take bytes first from the port's read buffer. */
1494 if (pt->read_pos < pt->read_end)
1495 {
1496 n_available = min (size, pt->read_end - pt->read_pos);
1497 memcpy (buffer, pt->read_pos, n_available);
1498 buffer = (char *) buffer + n_available;
1499 pt->read_pos += n_available;
1500 n_read += n_available;
1501 size -= n_available;
1502 }
889975e5 1503
19b8d12b
AW
1504 /* Avoid the scm_dynwind_* costs if we now have enough data. */
1505 if (size == 0)
1506 return n_read;
b5cb4464 1507
f6f4feb0
MW
1508 /* Now we will call scm_i_fill_input_unlocked repeatedly until we have
1509 read the requested number of bytes. (Note that a single
1510 scm_i_fill_input_unlocked call does not guarantee to fill the whole
1511 of the port's read buffer.) */
d1b9f8ac 1512 if (pt->read_buf_size <= 1
f6f4feb0 1513 && pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
b5cb4464 1514 {
f6f4feb0
MW
1515 /* The port that we are reading from is unbuffered - i.e. does not
1516 have its own persistent buffer - but we have a buffer, provided
1517 by our caller, that is the right size for the data that is
1518 wanted. For the following scm_i_fill_input_unlocked calls,
1519 therefore, we use the buffer in hand as the port's read buffer.
1520
1521 We need to make sure that the port's normal (1 byte) buffer is
1522 reinstated in case one of the scm_i_fill_input_unlocked ()
1523 calls throws an exception; we use the scm_dynwind_* API to
1524 achieve that.
75192345
MG
1525
1526 A consequence of this optimization is that the fill_input
1527 functions can't unget characters. That'll push data to the
1528 pushback buffer instead of this psb buffer. */
1529#if SCM_DEBUG == 1
1530 unsigned char *pback = pt->putback_buf;
1531#endif
6d227556
NJ
1532 psb.pt = pt;
1533 psb.buffer = buffer;
1534 psb.size = size;
1535 scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
1536 scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1537 scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1538
f6f4feb0
MW
1539 /* Call scm_i_fill_input_unlocked until we have all the bytes that
1540 we need, or we hit EOF. */
1541 while (pt->read_buf_size && (scm_i_fill_input_unlocked (port) != EOF))
6d227556
NJ
1542 {
1543 pt->read_buf_size -= (pt->read_end - pt->read_pos);
1544 pt->read_pos = pt->read_buf = pt->read_end;
1545 }
75192345
MG
1546#if SCM_DEBUG == 1
1547 if (pback != pt->putback_buf
1548 || pt->read_buf - (unsigned char *) buffer < 0)
1549 scm_misc_error (FUNC_NAME,
1550 "scm_c_read must not call a fill function that pushes "
1551 "back characters onto an unbuffered port", SCM_EOL);
1552#endif
6d227556 1553 n_read += pt->read_buf - (unsigned char *) buffer;
75192345 1554
6d227556
NJ
1555 /* Reinstate the port's normal buffer. */
1556 scm_dynwind_end ();
1557 }
1558 else
1559 {
1560 /* The port has its own buffer. It is important that we use it,
1561 even if it happens to be smaller than our caller's buffer, so
1562 that a custom port implementation's entry points (in
1563 particular, fill_input) can rely on the buffer always being
1564 the same as they first set up. */
f6f4feb0 1565 while (size && (scm_i_fill_input_unlocked (port) != EOF))
6d227556
NJ
1566 {
1567 n_available = min (size, pt->read_end - pt->read_pos);
1568 memcpy (buffer, pt->read_pos, n_available);
1569 buffer = (char *) buffer + n_available;
1570 pt->read_pos += n_available;
1571 n_read += n_available;
1572 size -= n_available;
1573 }
1574 }
6fe692e9 1575
b5cb4464 1576 return n_read;
6fe692e9 1577}
693758d5 1578#undef FUNC_NAME
6fe692e9 1579
be632904
AW
1580size_t
1581scm_c_read (SCM port, void *buffer, size_t size)
1582{
92c0ebac 1583 scm_i_pthread_mutex_t *lock;
be632904
AW
1584 size_t ret;
1585
92c0ebac 1586 scm_c_lock_port (port, &lock);
be632904 1587 ret = scm_c_read_unlocked (port, buffer, size);
92c0ebac
AW
1588 if (lock)
1589 scm_i_pthread_mutex_unlock (lock);
1590
be632904
AW
1591
1592 return ret;
1593}
1594
19b8d12b
AW
1595/* Update the line and column number of PORT after consumption of C. */
1596static inline void
1597update_port_lf (scm_t_wchar c, SCM port)
6fe692e9 1598{
19b8d12b
AW
1599 switch (c)
1600 {
1601 case '\a':
1602 case EOF:
1603 break;
1604 case '\b':
1605 SCM_DECCOL (port);
1606 break;
1607 case '\n':
1608 SCM_INCLINE (port);
1609 break;
1610 case '\r':
1611 SCM_ZEROCOL (port);
1612 break;
1613 case '\t':
1614 SCM_TABCOL (port);
1615 break;
1616 default:
1617 SCM_INCCOL (port);
1618 break;
1619 }
1620}
6fe692e9 1621
19b8d12b 1622#define SCM_MBCHAR_BUF_SIZE (4)
6fe692e9 1623
19b8d12b
AW
1624/* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
1625 UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
1626static scm_t_wchar
1627utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
283a1a0e 1628{
19b8d12b 1629 scm_t_wchar codepoint;
283a1a0e 1630
19b8d12b 1631 if (utf8_buf[0] <= 0x7f)
283a1a0e 1632 {
19b8d12b
AW
1633 assert (size == 1);
1634 codepoint = utf8_buf[0];
1635 }
1636 else if ((utf8_buf[0] & 0xe0) == 0xc0)
1637 {
1638 assert (size == 2);
1639 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
1640 | (utf8_buf[1] & 0x3f);
1641 }
1642 else if ((utf8_buf[0] & 0xf0) == 0xe0)
1643 {
1644 assert (size == 3);
1645 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
1646 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
1647 | (utf8_buf[2] & 0x3f);
283a1a0e
GH
1648 }
1649 else
19b8d12b
AW
1650 {
1651 assert (size == 4);
1652 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
1653 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
1654 | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
1655 | (utf8_buf[3] & 0x3f);
1656 }
283a1a0e 1657
19b8d12b 1658 return codepoint;
283a1a0e
GH
1659}
1660
19b8d12b
AW
1661/* Read a UTF-8 sequence from PORT. On success, return 0 and set
1662 *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
1663 representation, and set *LEN to the length in bytes. Return
1664 `EILSEQ' on error. */
1665static int
1666get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
1667 scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1668{
1669#define ASSERT_NOT_EOF(b) \
1670 if (SCM_UNLIKELY ((b) == EOF)) \
1671 goto invalid_seq
1672#define CONSUME_PEEKED_BYTE() \
1673 pt->read_pos++
ee149d03 1674
19b8d12b
AW
1675 int byte;
1676 scm_t_port *pt;
ee149d03 1677
19b8d12b
AW
1678 *len = 0;
1679 pt = SCM_PTAB_ENTRY (port);
840ae05d 1680
0d959103 1681 byte = scm_get_byte_or_eof_unlocked (port);
19b8d12b 1682 if (byte == EOF)
6c951427 1683 {
19b8d12b
AW
1684 *codepoint = EOF;
1685 return 0;
1686 }
6c951427 1687
19b8d12b
AW
1688 buf[0] = (scm_t_uint8) byte;
1689 *len = 1;
6c951427 1690
19b8d12b
AW
1691 if (buf[0] <= 0x7f)
1692 /* 1-byte form. */
1693 *codepoint = buf[0];
1694 else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
1695 {
1696 /* 2-byte form. */
0d959103 1697 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1698 ASSERT_NOT_EOF (byte);
6c951427 1699
19b8d12b
AW
1700 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1701 goto invalid_seq;
6c951427 1702
19b8d12b
AW
1703 CONSUME_PEEKED_BYTE ();
1704 buf[1] = (scm_t_uint8) byte;
1705 *len = 2;
1706
1707 *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
1708 | (buf[1] & 0x3f);
6c951427 1709 }
19b8d12b 1710 else if ((buf[0] & 0xf0) == 0xe0)
6c951427 1711 {
19b8d12b 1712 /* 3-byte form. */
0d959103 1713 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1714 ASSERT_NOT_EOF (byte);
6c951427 1715
19b8d12b
AW
1716 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
1717 || (buf[0] == 0xe0 && byte < 0xa0)
1718 || (buf[0] == 0xed && byte > 0x9f)))
1719 goto invalid_seq;
6c951427 1720
19b8d12b
AW
1721 CONSUME_PEEKED_BYTE ();
1722 buf[1] = (scm_t_uint8) byte;
1723 *len = 2;
1724
0d959103 1725 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b
AW
1726 ASSERT_NOT_EOF (byte);
1727
1728 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1729 goto invalid_seq;
1730
1731 CONSUME_PEEKED_BYTE ();
1732 buf[2] = (scm_t_uint8) byte;
1733 *len = 3;
1734
1735 *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
1736 | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
1737 | (buf[2] & 0x3f);
6c951427 1738 }
19b8d12b
AW
1739 else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
1740 {
1741 /* 4-byte form. */
0d959103 1742 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1743 ASSERT_NOT_EOF (byte);
6c951427 1744
19b8d12b
AW
1745 if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
1746 || (buf[0] == 0xf0 && byte < 0x90)
1747 || (buf[0] == 0xf4 && byte > 0x8f)))
1748 goto invalid_seq;
ee149d03 1749
19b8d12b
AW
1750 CONSUME_PEEKED_BYTE ();
1751 buf[1] = (scm_t_uint8) byte;
1752 *len = 2;
889975e5 1753
0d959103 1754 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1755 ASSERT_NOT_EOF (byte);
889975e5 1756
19b8d12b
AW
1757 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1758 goto invalid_seq;
63479112 1759
19b8d12b
AW
1760 CONSUME_PEEKED_BYTE ();
1761 buf[2] = (scm_t_uint8) byte;
1762 *len = 3;
63479112 1763
0d959103 1764 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1765 ASSERT_NOT_EOF (byte);
63479112 1766
19b8d12b
AW
1767 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1768 goto invalid_seq;
63479112 1769
19b8d12b
AW
1770 CONSUME_PEEKED_BYTE ();
1771 buf[3] = (scm_t_uint8) byte;
1772 *len = 4;
840ae05d 1773
19b8d12b
AW
1774 *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
1775 | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
1776 | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
1777 | (buf[3] & 0x3f);
ee149d03
JB
1778 }
1779 else
19b8d12b 1780 goto invalid_seq;
ee149d03 1781
19b8d12b 1782 return 0;
ee149d03 1783
19b8d12b
AW
1784 invalid_seq:
1785 /* Here we could choose the consume the faulty byte when it's not a
1786 valid starting byte, but it's not a requirement. What Section 3.9
1787 of Unicode 6.0.0 mandates, though, is to not consume a byte that
1788 would otherwise be a valid starting byte. */
ee149d03 1789
19b8d12b 1790 return EILSEQ;
ee149d03 1791
19b8d12b
AW
1792#undef CONSUME_PEEKED_BYTE
1793#undef ASSERT_NOT_EOF
1794}
1795
79eb47ea 1796/* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
05b4d910
AW
1797 0 and set *CODEPOINT to the codepoint that was read, fill BUF with
1798 its UTF-8 representation, and set *LEN to the length in bytes.
1799 Return `EILSEQ' on error. */
79eb47ea
AW
1800static int
1801get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
1802 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1803{
1804 *codepoint = scm_get_byte_or_eof_unlocked (port);
1805
1806 if (*codepoint == EOF)
1807 *len = 0;
1808 else
1809 {
1810 *len = 1;
1811 buf[0] = *codepoint;
1812 }
1813 return 0;
1814}
1815
19b8d12b
AW
1816/* Likewise, read a byte sequence from PORT, passing it through its
1817 input conversion descriptor. */
1818static int
1819get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
1820 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1821{
6c98257f 1822 scm_t_iconv_descriptors *id;
19b8d12b 1823 scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
f6f4feb0 1824 size_t input_size = 0;
19b8d12b 1825
f6f4feb0 1826 id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ);
19b8d12b 1827
f6f4feb0 1828 for (;;)
19b8d12b 1829 {
f6f4feb0
MW
1830 int byte_read;
1831 char *input, *output;
19b8d12b
AW
1832 size_t input_left, output_left, done;
1833
0d959103 1834 byte_read = scm_get_byte_or_eof_unlocked (port);
f6f4feb0 1835 if (SCM_UNLIKELY (byte_read == EOF))
19b8d12b 1836 {
f6f4feb0
MW
1837 if (SCM_LIKELY (input_size == 0))
1838 {
1839 *codepoint = (scm_t_wchar) EOF;
1840 *len = input_size;
1841 return 0;
1842 }
1843 else
1844 {
1845 /* EOF found in the middle of a multibyte character. */
1846 scm_i_set_pending_eof (port);
1847 return EILSEQ;
1848 }
19b8d12b
AW
1849 }
1850
f6f4feb0 1851 buf[input_size++] = byte_read;
19b8d12b
AW
1852
1853 input = buf;
f6f4feb0
MW
1854 input_left = input_size;
1855 output = (char *) utf8_buf;
19b8d12b
AW
1856 output_left = sizeof (utf8_buf);
1857
6c98257f 1858 done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
f6f4feb0 1859
19b8d12b
AW
1860 if (done == (size_t) -1)
1861 {
f6f4feb0
MW
1862 int err = errno;
1863 if (SCM_LIKELY (err == EINVAL))
1864 /* The input byte sequence did not form a complete
1865 character. Read another byte and try again. */
1866 continue;
1867 else
1868 return err;
19b8d12b
AW
1869 }
1870 else
f6f4feb0
MW
1871 {
1872 size_t output_size = sizeof (utf8_buf) - output_left;
1873 if (SCM_LIKELY (output_size > 0))
1874 {
1875 /* iconv generated output. Convert the UTF8_BUF sequence
1876 to a Unicode code point. */
1877 *codepoint = utf8_to_codepoint (utf8_buf, output_size);
1878 *len = input_size;
1879 return 0;
1880 }
1881 else
1882 {
1883 /* iconv consumed some bytes without producing any output.
1884 Most likely this means that a Unicode byte-order mark
1885 (BOM) was consumed, which should not be included in the
1886 returned buf. Shift any remaining bytes to the beginning
1887 of buf, and continue the loop. */
1888 memmove (buf, input, input_left);
1889 input_size = input_left;
1890 continue;
1891 }
1892 }
19b8d12b 1893 }
19b8d12b
AW
1894}
1895
1896/* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
1897 with the byte representation of the codepoint in PORT's encoding, and
1898 set *LEN to the length in bytes of that representation. Return 0 on
1899 success and an errno value on error. */
24ea9f9c 1900static SCM_C_INLINE int
19b8d12b
AW
1901get_codepoint (SCM port, scm_t_wchar *codepoint,
1902 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1903{
1904 int err;
1905 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f6f4feb0 1906 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
19b8d12b 1907
f6f4feb0 1908 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
19b8d12b 1909 err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
f6f4feb0 1910 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
79eb47ea 1911 err = get_latin1_codepoint (port, codepoint, buf, len);
19b8d12b
AW
1912 else
1913 err = get_iconv_codepoint (port, codepoint, buf, len);
1914
1915 if (SCM_LIKELY (err == 0))
f6f4feb0
MW
1916 {
1917 if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
1918 {
1919 /* Record that we're no longer at stream start. */
1920 pti->at_stream_start_for_bom_read = 0;
1921 if (pt->rw_random)
1922 pti->at_stream_start_for_bom_write = 0;
1923
1924 /* If we just read a BOM in an encoding that recognizes them,
1925 then silently consume it and read another code point. */
1926 if (SCM_UNLIKELY
1927 (*codepoint == SCM_UNICODE_BOM
1928 && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
1929 || strcmp (pt->encoding, "UTF-16") == 0
1930 || strcmp (pt->encoding, "UTF-32") == 0)))
1931 return get_codepoint (port, codepoint, buf, len);
1932 }
1933 update_port_lf (*codepoint, port);
1934 }
19b8d12b
AW
1935 else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
1936 {
1937 *codepoint = '?';
1938 err = 0;
1939 update_port_lf (*codepoint, port);
1940 }
1941
1942 return err;
1943}
1944
1945/* Read a codepoint from PORT and return it. */
1946scm_t_wchar
be632904 1947scm_getc_unlocked (SCM port)
19b8d12b
AW
1948#define FUNC_NAME "scm_getc"
1949{
1950 int err;
1951 size_t len;
1952 scm_t_wchar codepoint;
1953 char buf[SCM_MBCHAR_BUF_SIZE];
1954
1955 err = get_codepoint (port, &codepoint, buf, &len);
1956 if (SCM_UNLIKELY (err != 0))
1957 /* At this point PORT should point past the invalid encoding, as per
1958 R6RS-lib Section 8.2.4. */
1959 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
1960
1961 return codepoint;
1962}
1963#undef FUNC_NAME
1964
be632904
AW
1965scm_t_wchar
1966scm_getc (SCM port)
1967{
92c0ebac 1968 scm_i_pthread_mutex_t *lock;
be632904
AW
1969 scm_t_wchar ret;
1970
92c0ebac 1971 scm_c_lock_port (port, &lock);
be632904 1972 ret = scm_getc_unlocked (port);
92c0ebac
AW
1973 if (lock)
1974 scm_i_pthread_mutex_unlock (lock);
1975
be632904
AW
1976
1977 return ret;
1978}
1979
19b8d12b
AW
1980SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
1981 (SCM port),
1982 "Return the next character available from @var{port}, updating\n"
1983 "@var{port} to point to the following character. If no more\n"
1984 "characters are available, the end-of-file object is returned.\n"
1985 "\n"
1986 "When @var{port}'s data cannot be decoded according to its\n"
1987 "character encoding, a @code{decoding-error} is raised and\n"
1988 "@var{port} points past the erroneous byte sequence.\n")
1989#define FUNC_NAME s_scm_read_char
1990{
1991 scm_t_wchar c;
1992 if (SCM_UNBNDP (port))
1993 port = scm_current_input_port ();
1994 SCM_VALIDATE_OPINPORT (1, port);
be632904 1995 c = scm_getc_unlocked (port);
19b8d12b
AW
1996 if (EOF == c)
1997 return SCM_EOF_VAL;
1998 return SCM_MAKE_CHAR (c);
1999}
2000#undef FUNC_NAME
2001
2002
2003\f
2004
2005/* Pushback. */
f6f4feb0 2006\f
19b8d12b 2007
f6f4feb0
MW
2008
2009static void
2010scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
2011#define FUNC_NAME "scm_unget_bytes"
19b8d12b
AW
2012{
2013 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f6f4feb0 2014 size_t old_len, new_len;
19b8d12b 2015
f6f4feb0 2016 scm_i_clear_pending_eof (port);
19b8d12b 2017
f6f4feb0 2018 if (pt->read_buf != pt->putback_buf)
19b8d12b
AW
2019 /* switch to the put-back buffer. */
2020 {
2021 if (pt->putback_buf == NULL)
2022 {
f6f4feb0
MW
2023 pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
2024 ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
19b8d12b
AW
2025 pt->putback_buf
2026 = (unsigned char *) scm_gc_malloc_pointerless
f6f4feb0 2027 (pt->putback_buf_size, "putback buffer");
19b8d12b
AW
2028 }
2029
2030 pt->saved_read_buf = pt->read_buf;
2031 pt->saved_read_pos = pt->read_pos;
2032 pt->saved_read_end = pt->read_end;
2033 pt->saved_read_buf_size = pt->read_buf_size;
2034
f6f4feb0
MW
2035 /* Put read_pos at the end of the buffer, so that ungets will not
2036 have to shift the buffer contents each time. */
2037 pt->read_buf = pt->putback_buf;
2038 pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
19b8d12b
AW
2039 pt->read_buf_size = pt->putback_buf_size;
2040 }
2041
f6f4feb0
MW
2042 old_len = pt->read_end - pt->read_pos;
2043 new_len = old_len + len;
2044
2045 if (new_len > pt->read_buf_size)
2046 /* The putback buffer needs to be enlarged. */
2047 {
2048 size_t new_buf_size;
2049 unsigned char *new_buf, *new_end, *new_pos;
2050
2051 new_buf_size = pt->read_buf_size * 2;
2052 if (new_buf_size < new_len)
2053 new_buf_size = new_len;
2054
2055 new_buf = (unsigned char *)
2056 scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
2057
2058 /* Put the bytes at the end of the buffer, so that future
2059 ungets won't need to shift the buffer. */
2060 new_end = new_buf + new_buf_size;
2061 new_pos = new_end - old_len;
2062 memcpy (new_pos, pt->read_pos, old_len);
2063
2064 pt->read_buf = pt->putback_buf = new_buf;
2065 pt->read_pos = new_pos;
2066 pt->read_end = new_end;
2067 pt->read_buf_size = pt->putback_buf_size = new_buf_size;
2068 }
2069 else if (pt->read_buf + len < pt->read_pos)
2070 /* If needed, shift the existing buffer contents up.
2071 This should not happen unless some external code
2072 manipulates the putback buffer pointers. */
2073 {
2074 unsigned char *new_end = pt->read_buf + pt->read_buf_size;
2075 unsigned char *new_pos = new_end - old_len;
2076
2077 memmove (new_pos, pt->read_pos, old_len);
2078 pt->read_pos = new_pos;
2079 pt->read_end = new_end;
2080 }
2081
2082 /* Move read_pos back and copy the bytes there. */
2083 pt->read_pos -= len;
2084 memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
2085
2086 if (pt->rw_active == SCM_PORT_WRITE)
2087 scm_flush (port);
19b8d12b
AW
2088
2089 if (pt->rw_random)
2090 pt->rw_active = SCM_PORT_READ;
2091}
2092#undef FUNC_NAME
2093
f6f4feb0
MW
2094void
2095scm_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
7f6c3f8f 2096{
f6f4feb0 2097 scm_i_unget_bytes_unlocked (buf, len, port);
7f6c3f8f
MW
2098}
2099
2100void
f6f4feb0
MW
2101scm_unget_byte_unlocked (int c, SCM port)
2102{
2103 unsigned char byte = c;
2104 scm_i_unget_bytes_unlocked (&byte, 1, port);
2105}
2106
2107void
2108scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
7f6c3f8f 2109{
f6f4feb0
MW
2110 scm_i_pthread_mutex_t *lock;
2111 scm_c_lock_port (port, &lock);
2112 scm_i_unget_bytes_unlocked (buf, len, port);
2113 if (lock)
2114 scm_i_pthread_mutex_unlock (lock);
2115}
7f6c3f8f 2116
c932ce0b
AW
2117void
2118scm_unget_byte (int c, SCM port)
2119{
f6f4feb0 2120 unsigned char byte = c;
92c0ebac
AW
2121 scm_i_pthread_mutex_t *lock;
2122 scm_c_lock_port (port, &lock);
f6f4feb0 2123 scm_i_unget_bytes_unlocked (&byte, 1, port);
92c0ebac
AW
2124 if (lock)
2125 scm_i_pthread_mutex_unlock (lock);
c932ce0b
AW
2126}
2127
19b8d12b 2128void
c932ce0b 2129scm_ungetc_unlocked (scm_t_wchar c, SCM port)
19b8d12b
AW
2130#define FUNC_NAME "scm_ungetc"
2131{
2132 scm_t_port *pt = SCM_PTAB_ENTRY (port);
be7ecef0 2133 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
19b8d12b
AW
2134 char *result;
2135 char result_buf[10];
19b8d12b 2136 size_t len;
19b8d12b 2137
19b8d12b 2138 len = sizeof (result_buf);
be7ecef0
AW
2139
2140 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
2141 {
f7582f98 2142 if (c < 0x80)
be7ecef0
AW
2143 {
2144 result_buf[0] = (char) c;
2145 result = result_buf;
2146 len = 1;
2147 }
2148 else
2149 result =
2150 (char *) u32_to_u8 ((uint32_t *) &c, 1, (uint8_t *) result_buf, &len);
2151 }
2152 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1 && c <= 0xff)
2153 {
2154 result_buf[0] = (char) c;
2155 result = result_buf;
2156 len = 1;
2157 }
2158 else
2159 result = u32_conv_to_encoding (pt->encoding,
2160 (enum iconv_ilseq_handler) pt->ilseq_handler,
2161 (uint32_t *) &c, 1, NULL,
2162 result_buf, &len);
19b8d12b
AW
2163
2164 if (SCM_UNLIKELY (result == NULL || len == 0))
2165 scm_encoding_error (FUNC_NAME, errno,
2166 "conversion to port encoding failed",
2167 SCM_BOOL_F, SCM_MAKE_CHAR (c));
2168
f6f4feb0 2169 scm_i_unget_bytes_unlocked ((unsigned char *) result, len, port);
19b8d12b
AW
2170
2171 if (SCM_UNLIKELY (result != result_buf))
2172 free (result);
2173
2174 if (c == '\n')
681f2b85
AW
2175 SCM_LINUM (port) -= 1;
2176 SCM_DECCOL (port);
19b8d12b
AW
2177}
2178#undef FUNC_NAME
2179
c932ce0b
AW
2180void
2181scm_ungetc (scm_t_wchar c, SCM port)
2182{
92c0ebac
AW
2183 scm_i_pthread_mutex_t *lock;
2184 scm_c_lock_port (port, &lock);
c932ce0b 2185 scm_ungetc_unlocked (c, port);
92c0ebac
AW
2186 if (lock)
2187 scm_i_pthread_mutex_unlock (lock);
2188
c932ce0b 2189}
19b8d12b
AW
2190
2191void
c932ce0b 2192scm_ungets_unlocked (const char *s, int n, SCM port)
19b8d12b
AW
2193{
2194 /* This is simple minded and inefficient, but unreading strings is
2195 * probably not a common operation, and remember that line and
2196 * column numbers have to be handled...
2197 *
2198 * Please feel free to write an optimized version!
2199 */
2200 while (n--)
c932ce0b 2201 scm_ungetc_unlocked (s[n], port);
19b8d12b
AW
2202}
2203
c932ce0b
AW
2204void
2205scm_ungets (const char *s, int n, SCM port)
2206{
92c0ebac
AW
2207 scm_i_pthread_mutex_t *lock;
2208 scm_c_lock_port (port, &lock);
c932ce0b 2209 scm_ungets_unlocked (s, n, port);
92c0ebac
AW
2210 if (lock)
2211 scm_i_pthread_mutex_unlock (lock);
2212
c932ce0b 2213}
19b8d12b
AW
2214
2215SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
1bbd0b84 2216 (SCM port),
1e6808ea
MG
2217 "Return the next character available from @var{port},\n"
2218 "@emph{without} updating @var{port} to point to the following\n"
2219 "character. If no more characters are available, the\n"
c2dfff19
KR
2220 "end-of-file object is returned.\n"
2221 "\n"
2222 "The value returned by\n"
1e6808ea
MG
2223 "a call to @code{peek-char} is the same as the value that would\n"
2224 "have been returned by a call to @code{read-char} on the same\n"
2225 "port. The only difference is that the very next call to\n"
2226 "@code{read-char} or @code{peek-char} on that @var{port} will\n"
2227 "return the value returned by the preceding call to\n"
2228 "@code{peek-char}. In particular, a call to @code{peek-char} on\n"
2229 "an interactive port will hang waiting for input whenever a call\n"
c62da8f8
LC
2230 "to @code{read-char} would have hung.\n"
2231 "\n"
2232 "As for @code{read-char}, a @code{decoding-error} may be raised\n"
2233 "if such a situation occurs. However, unlike with @code{read-char},\n"
2234 "@var{port} still points at the beginning of the erroneous byte\n"
2235 "sequence when the error is raised.\n")
1bbd0b84 2236#define FUNC_NAME s_scm_peek_char
ee149d03 2237{
c62da8f8 2238 int err;
fd5eec2b
LC
2239 SCM result;
2240 scm_t_wchar c;
2241 char bytes[SCM_MBCHAR_BUF_SIZE];
7f6c3f8f 2242 long column, line;
fe8935d4 2243 size_t len = 0;
fd5eec2b 2244
ee149d03 2245 if (SCM_UNBNDP (port))
9de87eea 2246 port = scm_current_input_port ();
b2456dd4 2247 SCM_VALIDATE_OPINPORT (1, port);
fd5eec2b
LC
2248
2249 column = SCM_COL (port);
2250 line = SCM_LINUM (port);
2251
c62da8f8 2252 err = get_codepoint (port, &c, bytes, &len);
fd5eec2b 2253
f6f4feb0 2254 scm_i_unget_bytes_unlocked ((unsigned char *) bytes, len, port);
fd5eec2b 2255
c62da8f8
LC
2256 SCM_COL (port) = column;
2257 SCM_LINUM (port) = line;
fd5eec2b 2258
c62da8f8
LC
2259 if (SCM_UNLIKELY (err != 0))
2260 {
2261 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
2262
2263 /* Shouldn't happen since `catch' always aborts to prompt. */
2264 result = SCM_BOOL_F;
fd5eec2b 2265 }
c62da8f8 2266 else if (c == EOF)
69bc9ff3 2267 {
f6f4feb0
MW
2268 scm_i_set_pending_eof (port);
2269 result = SCM_EOF_VAL;
69bc9ff3 2270 }
c62da8f8
LC
2271 else
2272 result = SCM_MAKE_CHAR (c);
fd5eec2b
LC
2273
2274 return result;
3cb988bd 2275}
1bbd0b84 2276#undef FUNC_NAME
3cb988bd 2277
1be4270a 2278SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
1bbd0b84 2279 (SCM cobj, SCM port),
b7e64f8b
BT
2280 "Place character @var{cobj} in @var{port} so that it will be\n"
2281 "read by the next read operation. If called multiple times, the\n"
2282 "unread characters will be read again in last-in first-out\n"
2283 "order. If @var{port} is not supplied, the current input port\n"
2284 "is used.")
1bbd0b84 2285#define FUNC_NAME s_scm_unread_char
0f2d19dd
JB
2286{
2287 int c;
2288
34d19ef6 2289 SCM_VALIDATE_CHAR (1, cobj);
0f2d19dd 2290 if (SCM_UNBNDP (port))
9de87eea 2291 port = scm_current_input_port ();
b2456dd4 2292 SCM_VALIDATE_OPINPORT (2, port);
0f2d19dd 2293
7866a09b 2294 c = SCM_CHAR (cobj);
0f2d19dd 2295
c932ce0b 2296 scm_ungetc_unlocked (c, port);
0f2d19dd
JB
2297 return cobj;
2298}
1bbd0b84 2299#undef FUNC_NAME
0f2d19dd 2300
a1ec6916 2301SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
1bbd0b84 2302 (SCM str, SCM port),
b380b885
MD
2303 "Place the string @var{str} in @var{port} so that its characters will be\n"
2304 "read in subsequent read operations. If called multiple times, the\n"
2305 "unread characters will be read again in last-in first-out order. If\n"
2306 "@var{port} is not supplied, the current-input-port is used.")
1bbd0b84 2307#define FUNC_NAME s_scm_unread_string
ee1e7e13 2308{
889975e5 2309 int n;
34d19ef6 2310 SCM_VALIDATE_STRING (1, str);
ee1e7e13 2311 if (SCM_UNBNDP (port))
9de87eea 2312 port = scm_current_input_port ();
b2456dd4 2313 SCM_VALIDATE_OPINPORT (2, port);
ee1e7e13 2314
889975e5
MG
2315 n = scm_i_string_length (str);
2316
2317 while (n--)
c932ce0b 2318 scm_ungetc_unlocked (scm_i_string_ref (str, n), port);
ee1e7e13
MD
2319
2320 return str;
2321}
1bbd0b84 2322#undef FUNC_NAME
ee1e7e13 2323
840ae05d 2324
19b8d12b 2325\f
23f2b9a3 2326
19b8d12b 2327/* Manipulating the buffers. */
840ae05d 2328
4251ae2e
AW
2329/* This routine does not take any locks, as it is usually called as part
2330 of a port implementation. */
19b8d12b
AW
2331void
2332scm_port_non_buffer (scm_t_port *pt)
2333{
2334 pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf;
2335 pt->write_buf = pt->write_pos = &pt->shortbuf;
2336 pt->read_buf_size = pt->write_buf_size = 1;
2337 pt->write_end = pt->write_buf + pt->write_buf_size;
840ae05d 2338}
8ab3d8a0 2339
19b8d12b
AW
2340/* this should only be called when the read buffer is empty. it
2341 tries to refill the read buffer. it returns the first char from
2342 the port, which is either EOF or *(pt->read_pos). */
f6f4feb0
MW
2343static int
2344scm_i_fill_input_unlocked (SCM port)
82893676 2345{
19b8d12b 2346 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f6f4feb0 2347 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
8ab3d8a0 2348
19b8d12b 2349 assert (pt->read_pos == pt->read_end);
8ab3d8a0 2350
f6f4feb0
MW
2351 if (pti->pending_eof)
2352 {
2353 pti->pending_eof = 0;
2354 return EOF;
2355 }
2356
19b8d12b 2357 if (pt->read_buf == pt->putback_buf)
82893676 2358 {
19b8d12b
AW
2359 /* finished reading put-back chars. */
2360 pt->read_buf = pt->saved_read_buf;
2361 pt->read_pos = pt->saved_read_pos;
2362 pt->read_end = pt->saved_read_end;
2363 pt->read_buf_size = pt->saved_read_buf_size;
2364 if (pt->read_pos < pt->read_end)
2365 return *(pt->read_pos);
82893676 2366 }
19b8d12b 2367 return SCM_PORT_DESCRIPTOR (port)->fill_input (port);
82893676 2368}
82893676 2369
4251ae2e
AW
2370int
2371scm_fill_input (SCM port)
2372{
92c0ebac 2373 scm_i_pthread_mutex_t *lock;
4251ae2e
AW
2374 int ret;
2375
92c0ebac 2376 scm_c_lock_port (port, &lock);
4251ae2e 2377 ret = scm_fill_input_unlocked (port);
92c0ebac
AW
2378 if (lock)
2379 scm_i_pthread_mutex_unlock (lock);
2380
4251ae2e
AW
2381
2382 return ret;
2383}
2384
f6f4feb0
MW
2385/* Slow-path fallback for 'scm_get_byte_or_eof_unlocked' */
2386int
2387scm_slow_get_byte_or_eof_unlocked (SCM port)
d14af9f2 2388{
f6f4feb0
MW
2389 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2390
2391 if (pt->rw_active == SCM_PORT_WRITE)
2392 scm_flush_unlocked (port);
2393
2394 if (pt->rw_random)
2395 pt->rw_active = SCM_PORT_READ;
2396
2397 if (pt->read_pos >= pt->read_end)
2398 {
2399 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2400 return EOF;
2401 }
2402
2403 return *pt->read_pos++;
d14af9f2
MD
2404}
2405
f6f4feb0
MW
2406/* Slow-path fallback for 'scm_peek_byte_or_eof_unlocked' */
2407int
2408scm_slow_peek_byte_or_eof_unlocked (SCM port)
2409{
2410 scm_t_port *pt = SCM_PTAB_ENTRY (port);
d6a6989e 2411
f6f4feb0
MW
2412 if (pt->rw_active == SCM_PORT_WRITE)
2413 scm_flush_unlocked (port);
889975e5 2414
f6f4feb0
MW
2415 if (pt->rw_random)
2416 pt->rw_active = SCM_PORT_READ;
2417
2418 if (pt->read_pos >= pt->read_end)
2419 {
2420 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2421 {
2422 scm_i_set_pending_eof (port);
2423 return EOF;
2424 }
2425 }
2426
2427 return *pt->read_pos;
2428}
2429
a3ded465
AW
2430/* Move up to READ_LEN bytes from PORT's putback and/or read buffers
2431 into memory starting at DEST. Return the number of bytes moved.
2432 PORT's line/column numbers are left unchanged. */
19b8d12b
AW
2433size_t
2434scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
840ae05d 2435{
19b8d12b 2436 scm_t_port *pt = SCM_PTAB_ENTRY (port);
a3ded465 2437 size_t bytes_read = 0;
19b8d12b 2438 size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
840ae05d 2439
19b8d12b 2440 if (from_buf > 0)
840ae05d 2441 {
19b8d12b
AW
2442 memcpy (dest, pt->read_pos, from_buf);
2443 pt->read_pos += from_buf;
a3ded465 2444 bytes_read += from_buf;
19b8d12b
AW
2445 read_len -= from_buf;
2446 dest += from_buf;
840ae05d 2447 }
3fe6190f 2448
19b8d12b
AW
2449 /* if putback was active, try the real input buffer too. */
2450 if (pt->read_buf == pt->putback_buf)
69bc9ff3 2451 {
19b8d12b
AW
2452 from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len);
2453 if (from_buf > 0)
2454 {
2455 memcpy (dest, pt->saved_read_pos, from_buf);
2456 pt->saved_read_pos += from_buf;
a3ded465 2457 bytes_read += from_buf;
19b8d12b 2458 }
69bc9ff3 2459 }
8ab3d8a0 2460
a3ded465 2461 return bytes_read;
840ae05d
JB
2462}
2463
19b8d12b
AW
2464/* Clear a port's read buffers, returning the contents. */
2465SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
1bbd0b84 2466 (SCM port),
19b8d12b
AW
2467 "This procedure clears a port's input buffers, similar\n"
2468 "to the way that force-output clears the output buffer. The\n"
2469 "contents of the buffers are returned as a single string, e.g.,\n"
a150979d 2470 "\n"
19b8d12b
AW
2471 "@lisp\n"
2472 "(define p (open-input-file ...))\n"
2473 "(drain-input p) => empty string, nothing buffered yet.\n"
2474 "(unread-char (read-char p) p)\n"
2475 "(drain-input p) => initial chars from p, up to the buffer size.\n"
2476 "@end lisp\n\n"
2477 "Draining the buffers may be useful for cleanly finishing\n"
2478 "buffered I/O so that the file descriptor can be used directly\n"
2479 "for further input.")
2480#define FUNC_NAME s_scm_drain_input
0f2d19dd 2481{
19b8d12b
AW
2482 SCM result;
2483 char *data;
2484 scm_t_port *pt;
2485 long count;
0f2d19dd 2486
19b8d12b
AW
2487 SCM_VALIDATE_OPINPORT (1, port);
2488 pt = SCM_PTAB_ENTRY (port);
2489
2490 count = pt->read_end - pt->read_pos;
2491 if (pt->read_buf == pt->putback_buf)
2492 count += pt->saved_read_end - pt->saved_read_pos;
2493
2494 if (count)
2495 {
2496 result = scm_i_make_string (count, &data, 0);
2497 scm_take_from_input_buffers (port, data, count);
2498 }
2499 else
2500 result = scm_nullstr;
2501
2502 return result;
d043d8c2 2503}
1bbd0b84 2504#undef FUNC_NAME
d043d8c2 2505
19b8d12b 2506void
4251ae2e 2507scm_end_input_unlocked (SCM port)
0f2d19dd 2508{
19b8d12b
AW
2509 long offset;
2510 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2511
f6f4feb0 2512 scm_i_clear_pending_eof (port);
19b8d12b
AW
2513 if (pt->read_buf == pt->putback_buf)
2514 {
2515 offset = pt->read_end - pt->read_pos;
2516 pt->read_buf = pt->saved_read_buf;
2517 pt->read_pos = pt->saved_read_pos;
2518 pt->read_end = pt->saved_read_end;
2519 pt->read_buf_size = pt->saved_read_buf_size;
2520 }
2521 else
2522 offset = 0;
2523
2524 SCM_PORT_DESCRIPTOR (port)->end_input (port, offset);
0f2d19dd
JB
2525}
2526
4251ae2e
AW
2527void
2528scm_end_input (SCM port)
2529{
92c0ebac
AW
2530 scm_i_pthread_mutex_t *lock;
2531 scm_c_lock_port (port, &lock);
4251ae2e 2532 scm_end_input_unlocked (port);
92c0ebac
AW
2533 if (lock)
2534 scm_i_pthread_mutex_unlock (lock);
2535
4251ae2e
AW
2536}
2537
19b8d12b
AW
2538SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
2539 (SCM port),
2540 "Flush the specified output port, or the current output port if @var{port}\n"
2541 "is omitted. The current output buffer contents are passed to the\n"
2542 "underlying port implementation (e.g., in the case of fports, the\n"
2543 "data will be written to the file and the output buffer will be cleared.)\n"
2544 "It has no effect on an unbuffered port.\n\n"
2545 "The return value is unspecified.")
2546#define FUNC_NAME s_scm_force_output
d043d8c2 2547{
19b8d12b
AW
2548 if (SCM_UNBNDP (port))
2549 port = scm_current_output_port ();
2550 else
2551 {
2552 port = SCM_COERCE_OUTPORT (port);
2553 SCM_VALIDATE_OPOUTPORT (1, port);
2554 }
4251ae2e 2555 scm_flush_unlocked (port);
564478fd 2556 return SCM_UNSPECIFIED;
d043d8c2 2557}
1bbd0b84 2558#undef FUNC_NAME
d043d8c2 2559
19b8d12b 2560void
4251ae2e 2561scm_flush_unlocked (SCM port)
0f2d19dd 2562{
19b8d12b 2563 SCM_PORT_DESCRIPTOR (port)->flush (port);
0f2d19dd
JB
2564}
2565
4251ae2e
AW
2566void
2567scm_flush (SCM port)
2568{
92c0ebac
AW
2569 scm_i_pthread_mutex_t *lock;
2570 scm_c_lock_port (port, &lock);
4251ae2e 2571 scm_flush_unlocked (port);
92c0ebac
AW
2572 if (lock)
2573 scm_i_pthread_mutex_unlock (lock);
2574
4251ae2e
AW
2575}
2576
f6f4feb0
MW
2577int
2578scm_fill_input_unlocked (SCM port)
337edc59 2579{
f6f4feb0
MW
2580 return scm_i_fill_input_unlocked (port);
2581}
337edc59 2582
d14af9f2 2583
19b8d12b 2584\f
d6a6989e 2585
19b8d12b 2586/* Output. */
889975e5 2587
0607ebbf
AW
2588void
2589scm_putc (char c, SCM port)
2590{
92c0ebac
AW
2591 scm_i_pthread_mutex_t *lock;
2592 scm_c_lock_port (port, &lock);
0607ebbf 2593 scm_putc_unlocked (c, port);
92c0ebac
AW
2594 if (lock)
2595 scm_i_pthread_mutex_unlock (lock);
2596
0607ebbf
AW
2597}
2598
2599void
2600scm_puts (const char *s, SCM port)
2601{
92c0ebac
AW
2602 scm_i_pthread_mutex_t *lock;
2603 scm_c_lock_port (port, &lock);
0607ebbf 2604 scm_puts_unlocked (s, port);
92c0ebac
AW
2605 if (lock)
2606 scm_i_pthread_mutex_unlock (lock);
2607
0607ebbf
AW
2608}
2609
19b8d12b
AW
2610/* scm_c_write
2611 *
2612 * Used by an application to write arbitrary number of bytes to an SCM
2613 * port. Similar semantics as libc write. However, unlike libc
2614 * write, scm_c_write writes the requested number of bytes and has no
2615 * return value.
2616 *
2617 * Warning: Doesn't update port line and column counts!
2618 */
9d9c66ba 2619void
f209aeee 2620scm_c_write_unlocked (SCM port, const void *ptr, size_t size)
19b8d12b 2621#define FUNC_NAME "scm_c_write"
9d9c66ba 2622{
19b8d12b
AW
2623 scm_t_port *pt;
2624 scm_t_ptob_descriptor *ptob;
9d9c66ba 2625
19b8d12b 2626 SCM_VALIDATE_OPOUTPORT (1, port);
9d9c66ba 2627
19b8d12b
AW
2628 pt = SCM_PTAB_ENTRY (port);
2629 ptob = SCM_PORT_DESCRIPTOR (port);
9d9c66ba 2630
19b8d12b 2631 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 2632 scm_end_input_unlocked (port);
19b8d12b
AW
2633
2634 ptob->write (port, ptr, size);
2635
2636 if (pt->rw_random)
2637 pt->rw_active = SCM_PORT_WRITE;
889975e5 2638}
19b8d12b 2639#undef FUNC_NAME
889975e5 2640
f209aeee
AW
2641void
2642scm_c_write (SCM port, const void *ptr, size_t size)
2643{
92c0ebac
AW
2644 scm_i_pthread_mutex_t *lock;
2645 scm_c_lock_port (port, &lock);
f209aeee 2646 scm_c_write_unlocked (port, ptr, size);
92c0ebac
AW
2647 if (lock)
2648 scm_i_pthread_mutex_unlock (lock);
2649
f209aeee
AW
2650}
2651
19b8d12b
AW
2652/* scm_lfwrite
2653 *
2654 * This function differs from scm_c_write; it updates port line and
2655 * column. */
889975e5 2656void
f209aeee 2657scm_lfwrite_unlocked (const char *ptr, size_t size, SCM port)
889975e5 2658{
19b8d12b
AW
2659 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2660 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
f4bc4e59 2661
19b8d12b 2662 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 2663 scm_end_input_unlocked (port);
f4bc4e59 2664
19b8d12b 2665 ptob->write (port, ptr, size);
f4bc4e59 2666
19b8d12b
AW
2667 for (; size; ptr++, size--)
2668 update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
d9544bf0 2669
19b8d12b
AW
2670 if (pt->rw_random)
2671 pt->rw_active = SCM_PORT_WRITE;
2672}
f4bc4e59 2673
f209aeee
AW
2674void
2675scm_lfwrite (const char *ptr, size_t size, SCM port)
2676{
92c0ebac
AW
2677 scm_i_pthread_mutex_t *lock;
2678 scm_c_lock_port (port, &lock);
f209aeee 2679 scm_lfwrite_unlocked (ptr, size, port);
92c0ebac
AW
2680 if (lock)
2681 scm_i_pthread_mutex_unlock (lock);
2682
f209aeee
AW
2683}
2684
19b8d12b
AW
2685/* Write STR to PORT from START inclusive to END exclusive. */
2686void
2687scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
2688{
2689 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f4bc4e59 2690
19b8d12b 2691 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 2692 scm_end_input_unlocked (port);
f4bc4e59 2693
19b8d12b
AW
2694 if (end == (size_t) -1)
2695 end = scm_i_string_length (str);
f4bc4e59 2696
fa980bcc 2697 scm_i_display_substring (str, start, end, port);
f4bc4e59 2698
19b8d12b
AW
2699 if (pt->rw_random)
2700 pt->rw_active = SCM_PORT_WRITE;
889975e5
MG
2701}
2702
19b8d12b
AW
2703
2704\f
2705
2706/* Querying and setting positions, and character availability. */
2707
2708SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0,
889975e5 2709 (SCM port),
19b8d12b
AW
2710 "Return @code{#t} if a character is ready on input @var{port}\n"
2711 "and return @code{#f} otherwise. If @code{char-ready?} returns\n"
2712 "@code{#t} then the next @code{read-char} operation on\n"
2713 "@var{port} is guaranteed not to hang. If @var{port} is a file\n"
2714 "port at end of file then @code{char-ready?} returns @code{#t}.\n"
2715 "\n"
2716 "@code{char-ready?} exists to make it possible for a\n"
2717 "program to accept characters from interactive ports without\n"
2718 "getting stuck waiting for input. Any input editors associated\n"
2719 "with such ports must make sure that characters whose existence\n"
2720 "has been asserted by @code{char-ready?} cannot be rubbed out.\n"
2721 "If @code{char-ready?} were to return @code{#f} at end of file,\n"
2722 "a port at end of file would be indistinguishable from an\n"
2723 "interactive port that has no ready characters.")
2724#define FUNC_NAME s_scm_char_ready_p
889975e5
MG
2725{
2726 scm_t_port *pt;
889975e5 2727
19b8d12b
AW
2728 if (SCM_UNBNDP (port))
2729 port = scm_current_input_port ();
2730 /* It's possible to close the current input port, so validate even in
2731 this case. */
2732 SCM_VALIDATE_OPINPORT (1, port);
889975e5
MG
2733
2734 pt = SCM_PTAB_ENTRY (port);
19b8d12b
AW
2735
2736 /* if the current read buffer is filled, or the
2737 last pushed-back char has been read and the saved buffer is
2738 filled, result is true. */
2739 if (pt->read_pos < pt->read_end
2740 || (pt->read_buf == pt->putback_buf
2741 && pt->saved_read_pos < pt->saved_read_end))
2742 return SCM_BOOL_T;
889975e5 2743 else
19b8d12b
AW
2744 {
2745 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2746
2747 if (ptob->input_waiting)
2748 return scm_from_bool(ptob->input_waiting (port));
2749 else
2750 return SCM_BOOL_T;
2751 }
889975e5
MG
2752}
2753#undef FUNC_NAME
d6a6989e 2754
19b8d12b
AW
2755SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
2756 (SCM fd_port, SCM offset, SCM whence),
0858753e 2757 "Sets the current position of @var{fd_port} to the integer\n"
19b8d12b
AW
2758 "@var{offset}, which is interpreted according to the value of\n"
2759 "@var{whence}.\n"
2760 "\n"
2761 "One of the following variables should be supplied for\n"
2762 "@var{whence}:\n"
2763 "@defvar SEEK_SET\n"
2764 "Seek from the beginning of the file.\n"
2765 "@end defvar\n"
2766 "@defvar SEEK_CUR\n"
2767 "Seek from the current position.\n"
2768 "@end defvar\n"
2769 "@defvar SEEK_END\n"
2770 "Seek from the end of the file.\n"
2771 "@end defvar\n"
0858753e 2772 "If @var{fd_port} is a file descriptor, the underlying system\n"
19b8d12b
AW
2773 "call is @code{lseek}. @var{port} may be a string port.\n"
2774 "\n"
2775 "The value returned is the new position in the file. This means\n"
2776 "that the current position of a port can be obtained using:\n"
2777 "@lisp\n"
2778 "(seek port 0 SEEK_CUR)\n"
2779 "@end lisp")
2780#define FUNC_NAME s_scm_seek
889975e5 2781{
19b8d12b 2782 int how;
889975e5 2783
19b8d12b 2784 fd_port = SCM_COERCE_OUTPORT (fd_port);
889975e5 2785
19b8d12b
AW
2786 how = scm_to_int (whence);
2787 if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
2788 SCM_OUT_OF_RANGE (3, whence);
da288f50 2789
19b8d12b
AW
2790 if (SCM_OPPORTP (fd_port))
2791 {
f6f4feb0 2792 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port);
19b8d12b
AW
2793 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port);
2794 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2795 off_t_or_off64_t rv;
2796
2797 if (!ptob->seek)
2798 SCM_MISC_ERROR ("port is not seekable",
2799 scm_cons (fd_port, SCM_EOL));
2800 else
f6f4feb0 2801 rv = ptob->seek (fd_port, off, how);
889975e5 2802
f6f4feb0
MW
2803 /* Set stream-start flags according to new position. */
2804 pti->at_stream_start_for_bom_read = (rv == 0);
2805 pti->at_stream_start_for_bom_write = (rv == 0);
b22e94db 2806
f6f4feb0 2807 scm_i_clear_pending_eof (fd_port);
889975e5 2808
19b8d12b
AW
2809 return scm_from_off_t_or_off64_t (rv);
2810 }
2811 else /* file descriptor?. */
2812 {
2813 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2814 off_t_or_off64_t rv;
2815 rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
2816 if (rv == -1)
2817 SCM_SYSERROR;
2818 return scm_from_off_t_or_off64_t (rv);
2819 }
889975e5
MG
2820}
2821#undef FUNC_NAME
2822
19b8d12b
AW
2823#ifndef O_BINARY
2824#define O_BINARY 0
2825#endif
889975e5 2826
19b8d12b
AW
2827/* Mingw has ftruncate(), perhaps implemented above using chsize, but
2828 doesn't have the filename version truncate(), hence this code. */
2829#if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
2830static int
2831truncate (const char *file, off_t length)
889975e5 2832{
19b8d12b
AW
2833 int ret, fdes;
2834
2835 fdes = open (file, O_BINARY | O_WRONLY);
2836 if (fdes == -1)
2837 return -1;
2838
2839 ret = ftruncate (fdes, length);
2840 if (ret == -1)
889975e5 2841 {
19b8d12b
AW
2842 int save_errno = errno;
2843 close (fdes);
2844 errno = save_errno;
2845 return -1;
889975e5 2846 }
19b8d12b
AW
2847
2848 return close (fdes);
889975e5 2849}
19b8d12b 2850#endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
889975e5 2851
19b8d12b
AW
2852SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
2853 (SCM object, SCM length),
0858753e
AW
2854 "Truncate file @var{object} to @var{length} bytes. @var{object}\n"
2855 "can be a filename string, a port object, or an integer file\n"
2856 "descriptor.\n"
19b8d12b
AW
2857 "The return value is unspecified.\n"
2858 "\n"
2859 "For a port or file descriptor @var{length} can be omitted, in\n"
2860 "which case the file is truncated at the current position (per\n"
2861 "@code{ftell} above).\n"
2862 "\n"
2863 "On most systems a file can be extended by giving a length\n"
2864 "greater than the current size, but this is not mandatory in the\n"
2865 "POSIX standard.")
2866#define FUNC_NAME s_scm_truncate_file
889975e5 2867{
19b8d12b
AW
2868 int rv;
2869
2870 /* "object" can be a port, fdes or filename.
2871
2872 Negative "length" makes no sense, but it's left to truncate() or
2873 ftruncate() to give back an error for that (normally EINVAL).
2874 */
2875
2876 if (SCM_UNBNDP (length))
889975e5 2877 {
19b8d12b
AW
2878 /* must supply length if object is a filename. */
2879 if (scm_is_string (object))
2880 SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
2881
2882 length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
2883 }
2884
2885 object = SCM_COERCE_OUTPORT (object);
2886 if (scm_is_integer (object))
2887 {
2888 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2889 SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
2890 c_length));
2891 }
2892 else if (SCM_OPOUTPORTP (object))
2893 {
2894 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2895 scm_t_port *pt = SCM_PTAB_ENTRY (object);
2896 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object);
b22e94db 2897
19b8d12b
AW
2898 if (!ptob->truncate)
2899 SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
889975e5 2900
f6f4feb0 2901 scm_i_clear_pending_eof (object);
19b8d12b 2902 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 2903 scm_end_input_unlocked (object);
19b8d12b
AW
2904 else if (pt->rw_active == SCM_PORT_WRITE)
2905 ptob->flush (object);
889975e5 2906
19b8d12b
AW
2907 ptob->truncate (object, c_length);
2908 rv = 0;
889975e5
MG
2909 }
2910 else
2911 {
19b8d12b
AW
2912 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2913 char *str = scm_to_locale_string (object);
2914 int eno;
2915 SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
2916 eno = errno;
2917 free (str);
2918 errno = eno;
889975e5 2919 }
19b8d12b
AW
2920 if (rv == -1)
2921 SCM_SYSERROR;
2922 return SCM_UNSPECIFIED;
889975e5 2923}
19b8d12b 2924#undef FUNC_NAME
889975e5 2925
19b8d12b
AW
2926SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0,
2927 (SCM port),
2928 "Return the current line number for @var{port}.\n"
889975e5 2929 "\n"
19b8d12b
AW
2930 "The first line of a file is 0. But you might want to add 1\n"
2931 "when printing line numbers, since starting from 1 is\n"
2932 "traditional in error messages, and likely to be more natural to\n"
2933 "non-programmers.")
2934#define FUNC_NAME s_scm_port_line
889975e5 2935{
19b8d12b
AW
2936 port = SCM_COERCE_OUTPORT (port);
2937 SCM_VALIDATE_OPENPORT (1, port);
2938 return scm_from_long (SCM_LINUM (port));
889975e5
MG
2939}
2940#undef FUNC_NAME
2941
19b8d12b
AW
2942SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0,
2943 (SCM port, SCM line),
2944 "Set the current line number for @var{port} to @var{line}. The\n"
2945 "first line of a file is 0.")
2946#define FUNC_NAME s_scm_set_port_line_x
889975e5 2947{
19b8d12b
AW
2948 port = SCM_COERCE_OUTPORT (port);
2949 SCM_VALIDATE_OPENPORT (1, port);
2950 SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line);
2951 return SCM_UNSPECIFIED;
2952}
2953#undef FUNC_NAME
889975e5 2954
19b8d12b
AW
2955SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0,
2956 (SCM port),
2957 "Return the current column number of @var{port}.\n"
2958 "If the number is\n"
2959 "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n"
2960 "- i.e. the first character of the first line is line 0, column 0.\n"
2961 "(However, when you display a file position, for example in an error\n"
2962 "message, we recommend you add 1 to get 1-origin integers. This is\n"
2963 "because lines and column numbers traditionally start with 1, and that is\n"
2964 "what non-programmers will find most natural.)")
2965#define FUNC_NAME s_scm_port_column
2966{
2967 port = SCM_COERCE_OUTPORT (port);
2968 SCM_VALIDATE_OPENPORT (1, port);
2969 return scm_from_int (SCM_COL (port));
2970}
2971#undef FUNC_NAME
889975e5 2972
19b8d12b
AW
2973SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0,
2974 (SCM port, SCM column),
2975 "Set the current column of @var{port}. Before reading the first\n"
2976 "character on a line the column should be 0.")
2977#define FUNC_NAME s_scm_set_port_column_x
2978{
2979 port = SCM_COERCE_OUTPORT (port);
2980 SCM_VALIDATE_OPENPORT (1, port);
2981 SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column);
2982 return SCM_UNSPECIFIED;
2983}
2984#undef FUNC_NAME
889975e5 2985
19b8d12b
AW
2986SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
2987 (SCM port),
2988 "Return the filename associated with @var{port}, or @code{#f}\n"
2989 "if no filename is associated with the port.")
2990#define FUNC_NAME s_scm_port_filename
2991{
2992 port = SCM_COERCE_OUTPORT (port);
2993 SCM_VALIDATE_OPENPORT (1, port);
2994 return SCM_FILENAME (port);
2995}
2996#undef FUNC_NAME
889975e5 2997
19b8d12b
AW
2998SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
2999 (SCM port, SCM filename),
3000 "Change the filename associated with @var{port}, using the current input\n"
3001 "port if none is specified. Note that this does not change the port's\n"
3002 "source of data, but only the value that is returned by\n"
3003 "@code{port-filename} and reported in diagnostic output.")
3004#define FUNC_NAME s_scm_set_port_filename_x
3005{
3006 port = SCM_COERCE_OUTPORT (port);
3007 SCM_VALIDATE_OPENPORT (1, port);
3008 /* We allow the user to set the filename to whatever he likes. */
3009 SCM_SET_FILENAME (port, filename);
889975e5
MG
3010 return SCM_UNSPECIFIED;
3011}
3012#undef FUNC_NAME
3013
3014
19b8d12b
AW
3015\f
3016
3017/* Implementation helpers for port printing functions. */
889975e5 3018
f12733c9
MD
3019void
3020scm_print_port_mode (SCM exp, SCM port)
3021{
0607ebbf 3022 scm_puts_unlocked (SCM_CLOSEDP (exp)
f12733c9 3023 ? "closed: "
f9a64404
DH
3024 : (SCM_RDNG & SCM_CELL_WORD_0 (exp)
3025 ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
f12733c9
MD
3026 ? "input-output: "
3027 : "input: ")
f9a64404 3028 : (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
f12733c9
MD
3029 ? "output: "
3030 : "bogus: ")),
3031 port);
3032}
1cc91f1b 3033
f12733c9 3034int
e81d98ec 3035scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED)
0f2d19dd 3036{
f12733c9
MD
3037 char *type = SCM_PTOBNAME (SCM_PTOBNUM (exp));
3038 if (!type)
3039 type = "port";
0607ebbf 3040 scm_puts_unlocked ("#<", port);
f12733c9 3041 scm_print_port_mode (exp, port);
0607ebbf
AW
3042 scm_puts_unlocked (type, port);
3043 scm_putc_unlocked (' ', port);
0345e278 3044 scm_uintprint (SCM_CELL_WORD_1 (exp), 16, port);
0607ebbf 3045 scm_putc_unlocked ('>', port);
f12733c9 3046 return 1;
0f2d19dd
JB
3047}
3048
19b8d12b
AW
3049
3050\f
3051
3052/* Iterating over all ports. */
3053
3054struct for_each_data
3055{
3056 void (*proc) (void *data, SCM p);
3057 void *data;
3058};
3059
3060static SCM
3061for_each_trampoline (void *data, SCM port, SCM result)
3062{
3063 struct for_each_data *d = data;
3064
3065 d->proc (d->data, port);
3066
3067 return result;
3068}
3069
3070void
3071scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
3072{
3073 struct for_each_data d;
3074
3075 d.proc = proc;
3076 d.data = data;
3077
3078 scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL,
3079 scm_i_port_weak_set);
3080}
3081
3082static void
3083scm_for_each_trampoline (void *data, SCM port)
3084{
3085 scm_call_1 (SCM_PACK_POINTER (data), port);
3086}
3087
3088SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
3089 (SCM proc),
3090 "Apply @var{proc} to each port in the Guile port table\n"
3091 "in turn. The return value is unspecified. More specifically,\n"
3092 "@var{proc} is applied exactly once to every port that exists\n"
0858753e
AW
3093 "in the system at the time @code{port-for-each} is invoked.\n"
3094 "Changes to the port table while @code{port-for-each} is running\n"
3095 "have no effect as far as @code{port-for-each} is concerned.")
19b8d12b
AW
3096#define FUNC_NAME s_scm_port_for_each
3097{
3098 SCM_VALIDATE_PROC (1, proc);
3099
3100 scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc));
3101
3102 return SCM_UNSPECIFIED;
3103}
3104#undef FUNC_NAME
3105
3106static void
3107flush_output_port (void *closure, SCM port)
3108{
3109 if (SCM_OPOUTPORTP (port))
4251ae2e 3110 scm_flush_unlocked (port);
19b8d12b
AW
3111}
3112
3113SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0,
3114 (),
3115 "Equivalent to calling @code{force-output} on\n"
3116 "all open output ports. The return value is unspecified.")
3117#define FUNC_NAME s_scm_flush_all_ports
3118{
3119 scm_c_port_for_each (&flush_output_port, NULL);
3120 return SCM_UNSPECIFIED;
3121}
3122#undef FUNC_NAME
3123
3124
0f2d19dd 3125\f
ee149d03 3126
d68fee48 3127/* Void ports. */
0f2d19dd 3128
92c2555f 3129scm_t_bits scm_tc16_void_port = 0;
0f2d19dd 3130
e81d98ec 3131static int fill_input_void_port (SCM port SCM_UNUSED)
283a1a0e 3132{
70df8af6 3133 return EOF;
283a1a0e
GH
3134}
3135
31703ab8 3136static void
e81d98ec
DH
3137write_void_port (SCM port SCM_UNUSED,
3138 const void *data SCM_UNUSED,
3139 size_t size SCM_UNUSED)
31703ab8
GH
3140{
3141}
3142
d617ee18
MV
3143static SCM
3144scm_i_void_port (long mode_bits)
0f2d19dd 3145{
2721f918
AW
3146 SCM ret;
3147
3148 ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0);
da220f27 3149
2721f918 3150 scm_port_non_buffer (SCM_PTAB_ENTRY (ret));
402788a9 3151
2721f918 3152 return ret;
0f2d19dd
JB
3153}
3154
d617ee18
MV
3155SCM
3156scm_void_port (char *mode_str)
3157{
3158 return scm_i_void_port (scm_mode_bits (mode_str));
3159}
3160
a1ec6916 3161SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0,
1bbd0b84 3162 (SCM mode),
70df8af6 3163 "Create and return a new void port. A void port acts like\n"
bb2c02f2 3164 "@file{/dev/null}. The @var{mode} argument\n"
70df8af6 3165 "specifies the input/output modes for this port: see the\n"
b380b885 3166 "documentation for @code{open-file} in @ref{File Ports}.")
1bbd0b84 3167#define FUNC_NAME s_scm_sys_make_void_port
0f2d19dd 3168{
d617ee18 3169 return scm_i_void_port (scm_i_mode_bits (mode));
0f2d19dd 3170}
1bbd0b84 3171#undef FUNC_NAME
0f2d19dd 3172
19b8d12b 3173
0f2d19dd 3174\f
19b8d12b 3175
89545eba 3176/* Initialization. */
1cc91f1b 3177
0f2d19dd
JB
3178void
3179scm_init_ports ()
0f2d19dd 3180{
840ae05d 3181 /* lseek() symbols. */
e11e83f3
MV
3182 scm_c_define ("SEEK_SET", scm_from_int (SEEK_SET));
3183 scm_c_define ("SEEK_CUR", scm_from_int (SEEK_CUR));
3184 scm_c_define ("SEEK_END", scm_from_int (SEEK_END));
840ae05d 3185
70df8af6
GH
3186 scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
3187 write_void_port);
9de87eea 3188
f39448c5
AW
3189 cur_inport_fluid = scm_make_fluid ();
3190 cur_outport_fluid = scm_make_fluid ();
3191 cur_errport_fluid = scm_make_fluid ();
0463a927 3192 cur_warnport_fluid = scm_make_fluid ();
f39448c5 3193 cur_loadport_fluid = scm_make_fluid ();
9de87eea 3194
2721f918 3195 scm_i_port_weak_set = scm_c_make_weak_set (31);
d6a6989e 3196
a0599745 3197#include "libguile/ports.x"
889975e5 3198
d6a6989e 3199 /* Use Latin-1 as the default port encoding. */
c81c2ad3
AW
3200 SCM_VARIABLE_SET (default_port_encoding_var,
3201 scm_make_fluid_with_default (SCM_BOOL_F));
889975e5 3202 scm_port_encoding_init = 1;
d6a6989e 3203
b22e94db
LC
3204 SCM_VARIABLE_SET (default_conversion_strategy_var,
3205 scm_make_fluid_with_default (sym_substitute));
889975e5 3206 scm_conversion_strategy_init = 1;
b22e94db 3207
9670f238
AW
3208 /* These bindings are used when boot-9 turns `current-input-port' et
3209 al into parameters. They are then removed from the guile module. */
3210 scm_c_define ("%current-input-port-fluid", cur_inport_fluid);
3211 scm_c_define ("%current-output-port-fluid", cur_outport_fluid);
3212 scm_c_define ("%current-error-port-fluid", cur_errport_fluid);
0463a927 3213 scm_c_define ("%current-warning-port-fluid", cur_warnport_fluid);
0f2d19dd 3214}
89e00824
ML
3215
3216/*
3217 Local Variables:
3218 c-file-style: "gnu"
3219 End:
3220*/