Various VM stack management fixes
[bpt/guile.git] / libguile / ports.c
CommitLineData
f4bc4e59 1/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
93c4fa21 2 * 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
f4bc4e59 3 *
73be1d9e 4 * This library is free software; you can redistribute it and/or
53befeb7
NJ
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
0f2d19dd 8 *
53befeb7
NJ
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
0f2d19dd 13 *
73be1d9e
MV
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
53befeb7
NJ
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
73be1d9e 18 */
1bbd0b84 19
1bbd0b84 20
0f2d19dd 21\f
d68fee48
JB
22/* Headers. */
23
2b829bbb
KR
24#define _LARGEFILE64_SOURCE /* ask for stat64 etc */
25
dbb605f5 26#ifdef HAVE_CONFIG_H
bd515f37
RB
27# include <config.h>
28#endif
29
0f2d19dd 30#include <stdio.h>
e6e2e95a 31#include <errno.h>
8ab3d8a0 32#include <fcntl.h> /* for chsize on mingw */
b5cb4464 33#include <assert.h>
f4bc4e59 34#include <iconv.h>
889975e5
MG
35#include <uniconv.h>
36#include <unistr.h>
37#include <striconveh.h>
e6e2e95a 38
fca43887
LC
39#include <assert.h>
40
a0599745 41#include "libguile/_scm.h"
4e047c3e 42#include "libguile/async.h"
8269ba5b 43#include "libguile/deprecation.h"
f0942910 44#include "libguile/eval.h"
8ab3d8a0 45#include "libguile/fports.h" /* direct access for seek and truncate */
9511876f 46#include "libguile/goops.h"
a0599745
MD
47#include "libguile/smob.h"
48#include "libguile/chars.h"
185e369a 49#include "libguile/dynwind.h"
0f2d19dd 50
a0599745 51#include "libguile/keywords.h"
5dbc6c06 52#include "libguile/hashtab.h"
a0599745
MD
53#include "libguile/root.h"
54#include "libguile/strings.h"
b42170a4 55#include "libguile/mallocs.h"
a0599745
MD
56#include "libguile/validate.h"
57#include "libguile/ports.h"
e4598559 58#include "libguile/ports-internal.h"
3a5fb14d 59#include "libguile/vectors.h"
2721f918 60#include "libguile/weak-set.h"
9de87eea 61#include "libguile/fluids.h"
889975e5 62#include "libguile/eq.h"
a38024ba 63#include "libguile/alist.h"
0f2d19dd 64
bd9e24b3
GH
65#ifdef HAVE_STRING_H
66#include <string.h>
67#endif
68
ec65f5da
MV
69#ifdef HAVE_IO_H
70#include <io.h>
71#endif
72
0f2d19dd
JB
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76
95b88819
GH
77#ifdef HAVE_SYS_IOCTL_H
78#include <sys/ioctl.h>
79#endif
d68fee48 80
8ab3d8a0
KR
81/* Mingw (version 3.4.5, circa 2006) has ftruncate as an alias for chsize
82 already, but have this code here in case that wasn't so in past versions,
83 or perhaps to help other minimal DOS environments.
84
85 gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
86 might be possibilities if we've got other systems without ftruncate. */
87
56a3dcd4 88#if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
82893676 89#define ftruncate(fd, size) chsize (fd, size)
8ab3d8a0
KR
90#undef HAVE_FTRUNCATE
91#define HAVE_FTRUNCATE 1
82893676
MG
92#endif
93
0f2d19dd 94\f
93c4fa21
AW
95/* Port encodings are case-insensitive ASCII strings. */
96static char
97ascii_toupper (char c)
98{
99 return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
100}
101
102/* It is only necessary to use this function on encodings that come from
103 the user and have not been canonicalized yet. Encodings that are set
104 on ports or in the default encoding fluid are in upper-case, and can
105 be compared with strcmp. */
106static int
107encoding_matches (const char *enc, const char *upper)
108{
109 if (!enc)
110 enc = "ISO-8859-1";
111
112 while (*enc)
113 if (ascii_toupper (*enc++) != *upper++)
114 return 0;
115
116 return !*upper;
117}
118
119static char*
120canonicalize_encoding (const char *enc)
121{
122 char *ret;
123 int i;
124
125 if (!enc)
126 return "ISO-8859-1";
127
128 ret = scm_gc_strdup (enc, "port");
129
130 for (i = 0; ret[i]; i++)
131 {
132 if (ret[i] > 127)
133 /* Restrict to ASCII. */
134 scm_misc_error (NULL, "invalid character encoding ~s",
135 scm_list_1 (scm_from_latin1_string (enc)));
136 else
137 ret[i] = ascii_toupper (ret[i]);
138 }
139
140 return ret;
141}
142
143
144\f
d68fee48 145/* The port kind table --- a dynamically resized array of port types. */
0f2d19dd
JB
146
147
148/* scm_ptobs scm_numptob
5dbc6c06 149 * implement a dynamically resized array of ptob records.
0f2d19dd
JB
150 * Indexes into this table are used when generating type
151 * tags for smobjects (if you know a tag you can get an index and conversely).
152 */
62bd5d66
AW
153static scm_t_ptob_descriptor **scm_ptobs = NULL;
154static long scm_numptob = 0; /* Number of port types. */
155static long scm_ptobs_size = 0; /* Number of slots in the port type
156 table. */
157static scm_i_pthread_mutex_t scm_ptobs_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER;
158
159long
160scm_c_num_port_types (void)
161{
162 long ret;
163
164 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
165 ret = scm_numptob;
166 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
167
168 return ret;
169}
170
171scm_t_ptob_descriptor*
172scm_c_port_type_ref (long ptobnum)
173{
174 scm_t_ptob_descriptor *ret = NULL;
175
176 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
177
178 if (0 <= ptobnum && ptobnum < scm_numptob)
179 ret = scm_ptobs[ptobnum];
180
181 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
182
183 if (!ret)
184 scm_out_of_range ("scm_c_port_type_ref", scm_from_long (ptobnum));
185
186 return ret;
187}
188
189long
190scm_c_port_type_add_x (scm_t_ptob_descriptor *desc)
191{
192 long ret = -1;
193
194 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
195
196 if (scm_numptob + 1 < SCM_I_MAX_PORT_TYPE_COUNT)
197 {
198 if (scm_numptob == scm_ptobs_size)
199 {
200 unsigned long old_size = scm_ptobs_size;
201 scm_t_ptob_descriptor **old_ptobs = scm_ptobs;
202
203 /* Currently there are only 9 predefined port types, so one
204 resize will cover it. */
205 scm_ptobs_size = old_size + 10;
206
207 if (scm_ptobs_size >= SCM_I_MAX_PORT_TYPE_COUNT)
208 scm_ptobs_size = SCM_I_MAX_PORT_TYPE_COUNT;
209
210 scm_ptobs = scm_gc_malloc (sizeof (*scm_ptobs) * scm_ptobs_size,
211 "scm_ptobs");
212
213 memcpy (scm_ptobs, old_ptobs, sizeof (*scm_ptobs) * scm_numptob);
214 }
215
216 ret = scm_numptob++;
217 scm_ptobs[ret] = desc;
218 }
219
220 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
221
222 if (ret < 0)
223 scm_out_of_range ("scm_c_port_type_add_x", scm_from_long (scm_numptob));
224
225 return ret;
226}
0f2d19dd 227
f12733c9 228/*
f12733c9 229 * We choose to use an interface similar to the smob interface with
affc96b5 230 * fill_input and write as standard fields, passed to the port
f12733c9
MD
231 * type constructor, and optional fields set by setters.
232 */
233
70df8af6 234static void
e81d98ec 235flush_port_default (SCM port SCM_UNUSED)
70df8af6
GH
236{
237}
238
239static void
e81d98ec 240end_input_default (SCM port SCM_UNUSED, int offset SCM_UNUSED)
70df8af6
GH
241{
242}
0f2d19dd 243
92c2555f 244scm_t_bits
f12733c9 245scm_make_port_type (char *name,
affc96b5 246 int (*fill_input) (SCM port),
8aa011a1 247 void (*write) (SCM port, const void *data, size_t size))
0f2d19dd 248{
62bd5d66
AW
249 scm_t_ptob_descriptor *desc;
250 long ptobnum;
affc96b5 251
62bd5d66
AW
252 desc = scm_gc_malloc_pointerless (sizeof (*desc), "port-type");
253 memset (desc, 0, sizeof (*desc));
affc96b5 254
62bd5d66
AW
255 desc->name = name;
256 desc->print = scm_port_print;
257 desc->write = write;
258 desc->flush = flush_port_default;
259 desc->end_input = end_input_default;
260 desc->fill_input = fill_input;
affc96b5 261
62bd5d66 262 ptobnum = scm_c_port_type_add_x (desc);
affc96b5 263
62bd5d66 264 /* Make a class object if GOOPS is present. */
63385df2 265 if (SCM_UNPACK (scm_port_class[0]) != 0)
62bd5d66
AW
266 scm_make_port_classes (ptobnum, name);
267
268 return scm_tc7_port + ptobnum * 256;
0f2d19dd
JB
269}
270
f12733c9 271void
23f2b9a3 272scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM))
f12733c9 273{
62bd5d66 274 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->mark = mark;
f12733c9
MD
275}
276
277void
23f2b9a3 278scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM))
f12733c9 279{
62bd5d66 280 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->free = free;
f12733c9
MD
281}
282
283void
23f2b9a3 284scm_set_port_print (scm_t_bits tc, int (*print) (SCM exp, SCM port,
19b8d12b 285 scm_print_state *pstate))
f12733c9 286{
62bd5d66 287 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->print = print;
f12733c9
MD
288}
289
290void
23f2b9a3 291scm_set_port_equalp (scm_t_bits tc, SCM (*equalp) (SCM, SCM))
f12733c9 292{
62bd5d66 293 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->equalp = equalp;
f12733c9
MD
294}
295
31703ab8 296void
19b8d12b 297scm_set_port_close (scm_t_bits tc, int (*close) (SCM))
31703ab8 298{
19b8d12b 299 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->close = close;
31703ab8
GH
300}
301
f12733c9 302void
19b8d12b 303scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port))
f12733c9 304{
03a2eeb0
AW
305 scm_t_ptob_descriptor *ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tc));
306 ptob->flush = flush;
307 ptob->flags |= SCM_PORT_TYPE_HAS_FLUSH;
f12733c9
MD
308}
309
310void
19b8d12b 311scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset))
f12733c9 312{
19b8d12b 313 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->end_input = end_input;
f12733c9
MD
314}
315
316void
19b8d12b 317scm_set_port_seek (scm_t_bits tc, scm_t_off (*seek) (SCM, scm_t_off, int))
f12733c9 318{
62bd5d66 319 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->seek = seek;
f12733c9
MD
320}
321
322void
f1ce9199 323scm_set_port_truncate (scm_t_bits tc, void (*truncate) (SCM, scm_t_off))
f12733c9 324{
62bd5d66 325 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->truncate = truncate;
f12733c9
MD
326}
327
328void
23f2b9a3 329scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM))
f12733c9 330{
62bd5d66 331 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting;
f12733c9
MD
332}
333
45c0878b
MW
334static void
335scm_i_set_pending_eof (SCM port)
336{
337 SCM_PORT_GET_INTERNAL (port)->pending_eof = 1;
338}
339
340static void
341scm_i_clear_pending_eof (SCM port)
342{
343 SCM_PORT_GET_INTERNAL (port)->pending_eof = 0;
344}
345
a38024ba
MW
346SCM_DEFINE (scm_i_port_property, "%port-property", 2, 0, 0,
347 (SCM port, SCM key),
348 "Return the property of @var{port} associated with @var{key}.")
349#define FUNC_NAME s_scm_i_port_property
05d7f762 350{
79657fd3
MW
351 scm_i_pthread_mutex_t *lock;
352 SCM result;
353
a38024ba 354 SCM_VALIDATE_OPPORT (1, port);
79657fd3
MW
355 scm_c_lock_port (port, &lock);
356 result = scm_assq_ref (SCM_PORT_GET_INTERNAL (port)->alist, key);
357 if (lock)
358 scm_i_pthread_mutex_unlock (lock);
359 return result;
05d7f762 360}
a38024ba 361#undef FUNC_NAME
05d7f762 362
a38024ba
MW
363SCM_DEFINE (scm_i_set_port_property_x, "%set-port-property!", 3, 0, 0,
364 (SCM port, SCM key, SCM value),
365 "Set the property of @var{port} associated with @var{key} to @var{value}.")
366#define FUNC_NAME s_scm_i_set_port_property_x
05d7f762 367{
79657fd3 368 scm_i_pthread_mutex_t *lock;
a38024ba
MW
369 scm_t_port_internal *pti;
370
371 SCM_VALIDATE_OPPORT (1, port);
79657fd3 372 scm_c_lock_port (port, &lock);
a38024ba
MW
373 pti = SCM_PORT_GET_INTERNAL (port);
374 pti->alist = scm_assq_set_x (pti->alist, key, value);
79657fd3
MW
375 if (lock)
376 scm_i_pthread_mutex_unlock (lock);
a38024ba 377 return SCM_UNSPECIFIED;
05d7f762 378}
a38024ba 379#undef FUNC_NAME
05d7f762 380
0f2d19dd 381\f
0f2d19dd 382
d68fee48 383/* Standard ports --- current input, output, error, and more(!). */
0f2d19dd 384
34297700
AW
385static SCM cur_inport_fluid = SCM_BOOL_F;
386static SCM cur_outport_fluid = SCM_BOOL_F;
387static SCM cur_errport_fluid = SCM_BOOL_F;
388static SCM cur_loadport_fluid = SCM_BOOL_F;
9de87eea 389
3b3b36dd 390SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
e1546b65
MG
391 (),
392 "Return the current input port. This is the default port used\n"
393 "by many input procedures. Initially, @code{current-input-port}\n"
394 "returns the @dfn{standard input} in Unix and C terminology.")
1bbd0b84 395#define FUNC_NAME s_scm_current_input_port
0f2d19dd 396{
34297700 397 if (scm_is_true (cur_inport_fluid))
889975e5
MG
398 return scm_fluid_ref (cur_inport_fluid);
399 else
400 return SCM_BOOL_F;
0f2d19dd 401}
1bbd0b84 402#undef FUNC_NAME
0f2d19dd 403
3b3b36dd 404SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
e1546b65
MG
405 (),
406 "Return the current output port. This is the default port used\n"
9401323e 407 "by many output procedures. Initially,\n"
e1546b65
MG
408 "@code{current-output-port} returns the @dfn{standard output} in\n"
409 "Unix and C terminology.")
1bbd0b84 410#define FUNC_NAME s_scm_current_output_port
0f2d19dd 411{
34297700 412 if (scm_is_true (cur_outport_fluid))
889975e5
MG
413 return scm_fluid_ref (cur_outport_fluid);
414 else
415 return SCM_BOOL_F;
0f2d19dd 416}
1bbd0b84 417#undef FUNC_NAME
0f2d19dd 418
3b3b36dd 419SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
1bbd0b84 420 (),
b380b885
MD
421 "Return the port to which errors and warnings should be sent (the\n"
422 "@dfn{standard error} in Unix and C terminology).")
1bbd0b84 423#define FUNC_NAME s_scm_current_error_port
0f2d19dd 424{
34297700 425 if (scm_is_true (cur_errport_fluid))
889975e5
MG
426 return scm_fluid_ref (cur_errport_fluid);
427 else
428 return SCM_BOOL_F;
0f2d19dd 429}
1bbd0b84 430#undef FUNC_NAME
0f2d19dd 431
3972de76
AW
432SCM
433scm_current_warning_port (void)
434{
f57ea23a
MW
435 static SCM cwp_var = SCM_UNDEFINED;
436 static scm_i_pthread_mutex_t cwp_var_mutex
437 = SCM_I_PTHREAD_MUTEX_INITIALIZER;
438
439 scm_i_scm_pthread_mutex_lock (&cwp_var_mutex);
440 if (SCM_UNBNDP (cwp_var))
441 cwp_var = scm_c_private_variable ("guile", "current-warning-port");
442 scm_i_pthread_mutex_unlock (&cwp_var_mutex);
3972de76
AW
443
444 return scm_call_0 (scm_variable_ref (cwp_var));
445}
446
3b3b36dd 447SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
e1546b65 448 (),
b450f070 449 "Return the current-load-port.\n"
e1546b65 450 "The load port is used internally by @code{primitive-load}.")
1bbd0b84 451#define FUNC_NAME s_scm_current_load_port
31614d8e 452{
9de87eea 453 return scm_fluid_ref (cur_loadport_fluid);
31614d8e 454}
1bbd0b84 455#undef FUNC_NAME
31614d8e 456
3b3b36dd 457SCM_DEFINE (scm_set_current_input_port, "set-current-input-port", 1, 0, 0,
1bbd0b84 458 (SCM port),
8f85c0c6
NJ
459 "@deffnx {Scheme Procedure} set-current-output-port port\n"
460 "@deffnx {Scheme Procedure} set-current-error-port port\n"
b380b885
MD
461 "Change the ports returned by @code{current-input-port},\n"
462 "@code{current-output-port} and @code{current-error-port}, respectively,\n"
463 "so that they use the supplied @var{port} for input or output.")
1bbd0b84 464#define FUNC_NAME s_scm_set_current_input_port
0f2d19dd 465{
9de87eea 466 SCM oinp = scm_fluid_ref (cur_inport_fluid);
34d19ef6 467 SCM_VALIDATE_OPINPORT (1, port);
9de87eea 468 scm_fluid_set_x (cur_inport_fluid, port);
0f2d19dd
JB
469 return oinp;
470}
1bbd0b84 471#undef FUNC_NAME
0f2d19dd
JB
472
473
3b3b36dd 474SCM_DEFINE (scm_set_current_output_port, "set-current-output-port", 1, 0, 0,
e1546b65
MG
475 (SCM port),
476 "Set the current default output port to @var{port}.")
1bbd0b84 477#define FUNC_NAME s_scm_set_current_output_port
0f2d19dd 478{
9de87eea 479 SCM ooutp = scm_fluid_ref (cur_outport_fluid);
78446828 480 port = SCM_COERCE_OUTPORT (port);
34d19ef6 481 SCM_VALIDATE_OPOUTPORT (1, port);
9de87eea 482 scm_fluid_set_x (cur_outport_fluid, port);
0f2d19dd
JB
483 return ooutp;
484}
1bbd0b84 485#undef FUNC_NAME
0f2d19dd
JB
486
487
3b3b36dd 488SCM_DEFINE (scm_set_current_error_port, "set-current-error-port", 1, 0, 0,
e1546b65
MG
489 (SCM port),
490 "Set the current default error port to @var{port}.")
1bbd0b84 491#define FUNC_NAME s_scm_set_current_error_port
0f2d19dd 492{
9de87eea 493 SCM oerrp = scm_fluid_ref (cur_errport_fluid);
78446828 494 port = SCM_COERCE_OUTPORT (port);
34d19ef6 495 SCM_VALIDATE_OPOUTPORT (1, port);
9de87eea 496 scm_fluid_set_x (cur_errport_fluid, port);
0f2d19dd
JB
497 return oerrp;
498}
1bbd0b84 499#undef FUNC_NAME
0f2d19dd 500
3972de76
AW
501
502SCM
503scm_set_current_warning_port (SCM port)
504{
505 static SCM cwp_var = SCM_BOOL_F;
506
507 if (scm_is_false (cwp_var))
508 cwp_var = scm_c_private_lookup ("guile", "current-warning-port");
509
510 return scm_call_1 (scm_variable_ref (cwp_var), port);
511}
512
513
185e369a 514void
661ae7ab 515scm_dynwind_current_input_port (SCM port)
9de87eea 516#define FUNC_NAME NULL
185e369a 517{
9de87eea 518 SCM_VALIDATE_OPINPORT (1, port);
661ae7ab 519 scm_dynwind_fluid (cur_inport_fluid, port);
185e369a 520}
9de87eea 521#undef FUNC_NAME
185e369a
MV
522
523void
661ae7ab 524scm_dynwind_current_output_port (SCM port)
9de87eea 525#define FUNC_NAME NULL
185e369a 526{
9de87eea
MV
527 port = SCM_COERCE_OUTPORT (port);
528 SCM_VALIDATE_OPOUTPORT (1, port);
661ae7ab 529 scm_dynwind_fluid (cur_outport_fluid, port);
185e369a 530}
9de87eea 531#undef FUNC_NAME
185e369a
MV
532
533void
661ae7ab 534scm_dynwind_current_error_port (SCM port)
9de87eea
MV
535#define FUNC_NAME NULL
536{
537 port = SCM_COERCE_OUTPORT (port);
538 SCM_VALIDATE_OPOUTPORT (1, port);
661ae7ab 539 scm_dynwind_fluid (cur_errport_fluid, port);
9de87eea
MV
540}
541#undef FUNC_NAME
542
543void
661ae7ab 544scm_i_dynwind_current_load_port (SCM port)
185e369a 545{
661ae7ab 546 scm_dynwind_fluid (cur_loadport_fluid, port);
185e369a
MV
547}
548
19b8d12b 549
0f2d19dd 550\f
0f2d19dd 551
19b8d12b
AW
552/* Retrieving a port's mode. */
553
554/* Return the flags that characterize a port based on the mode
555 * string used to open a file for that port.
556 *
557 * See PORT FLAGS in scm.h
5dbc6c06 558 */
19b8d12b
AW
559
560static long
561scm_i_mode_bits_n (SCM modes)
562{
563 return (SCM_OPN
564 | (scm_i_string_contains_char (modes, 'r')
565 || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
566 | (scm_i_string_contains_char (modes, 'w')
567 || scm_i_string_contains_char (modes, 'a')
568 || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
569 | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
570 | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
571}
572
573long
574scm_mode_bits (char *modes)
575{
8ebd06c6
AW
576 /* Valid characters are rw+a0l. So, use latin1. */
577 return scm_i_mode_bits (scm_from_latin1_string (modes));
19b8d12b
AW
578}
579
580long
581scm_i_mode_bits (SCM modes)
582{
583 long bits;
584
585 if (!scm_is_string (modes))
586 scm_wrong_type_arg_msg (NULL, 0, modes, "string");
587
588 bits = scm_i_mode_bits_n (modes);
589 scm_remember_upto_here_1 (modes);
590 return bits;
591}
592
593/* Return the mode flags from an open port.
594 * Some modes such as "append" are only used when opening
595 * a file and are not returned here. */
596
597SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
598 (SCM port),
599 "Return the port modes associated with the open port @var{port}.\n"
600 "These will not necessarily be identical to the modes used when\n"
601 "the port was opened, since modes such as \"append\" which are\n"
602 "used only during port creation are not retained.")
603#define FUNC_NAME s_scm_port_mode
604{
605 char modes[4];
606 modes[0] = '\0';
607
608 port = SCM_COERCE_OUTPORT (port);
609 SCM_VALIDATE_OPPORT (1, port);
610 if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
611 if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
612 strcpy (modes, "r+");
613 else
614 strcpy (modes, "r");
615 }
616 else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
617 strcpy (modes, "w");
618 if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
619 strcat (modes, "0");
620
621 return scm_from_latin1_string (modes);
622}
623#undef FUNC_NAME
624
625
626\f
627
628/* The port table --- a weak set of all ports.
629
630 We need a global registry of ports to flush them all at exit, and to
631 get all the ports matching a file descriptor. */
2721f918 632SCM scm_i_port_weak_set;
b9ad392e 633
19b8d12b 634
651a0735 635\f
651a0735 636
19b8d12b 637/* Port finalization. */
1cc91f1b 638
5a771d5f
AW
639struct do_free_data
640{
641 scm_t_ptob_descriptor *ptob;
642 SCM port;
643};
644
645static SCM
646do_free (void *body_data)
647{
648 struct do_free_data *data = body_data;
649
650 /* `close' is for explicit `close-port' by user. `free' is for this
651 purpose: ports collected by the GC. */
652 data->ptob->free (data->port);
653
654 return SCM_BOOL_T;
655}
656
651a0735
LC
657/* Finalize the object (a port) pointed to by PTR. */
658static void
6922d92f 659finalize_port (void *ptr, void *data)
651a0735 660{
21041372 661 SCM port = SCM_PACK_POINTER (ptr);
651a0735
LC
662
663 if (!SCM_PORTP (port))
664 abort ();
665
666 if (SCM_OPENP (port))
667 {
3753e227 668 struct do_free_data data;
651a0735 669
3753e227 670 SCM_CLR_PORT_OPEN_FLAG (port);
651a0735 671
3753e227
AW
672 data.ptob = SCM_PORT_DESCRIPTOR (port);
673 data.port = port;
5a771d5f 674
3753e227
AW
675 scm_internal_catch (SCM_BOOL_T, do_free, &data,
676 scm_handle_by_message_noexit, NULL);
5a771d5f 677
3753e227 678 scm_gc_ports_collected++;
651a0735
LC
679 }
680}
681
682
651a0735
LC
683\f
684
da220f27 685SCM
2721f918
AW
686scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
687 const char *encoding,
688 scm_t_string_failed_conversion_handler handler,
689 scm_t_bits stream)
0f2d19dd 690{
2721f918
AW
691 SCM ret;
692 scm_t_port *entry;
f6f4feb0 693 scm_t_port_internal *pti;
62bd5d66 694 scm_t_ptob_descriptor *ptob;
2721f918 695
f6f4feb0
MW
696 entry = scm_gc_typed_calloc (scm_t_port);
697 pti = scm_gc_typed_calloc (scm_t_port_internal);
62bd5d66
AW
698 ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag));
699
700 ret = scm_words (tag | mode_bits, 3);
701 SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry);
702 SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob);
5f16b897 703
92c0ebac
AW
704 entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock");
705 scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive);
30b126d2 706
f6f4feb0 707 entry->internal = pti;
840ae05d 708 entry->file_name = SCM_BOOL_F;
61e452ba 709 entry->rw_active = SCM_PORT_NEITHER;
2721f918
AW
710 entry->port = ret;
711 entry->stream = stream;
93c4fa21
AW
712
713 if (encoding_matches (encoding, "UTF-8"))
714 {
f6f4feb0 715 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
93c4fa21
AW
716 entry->encoding = "UTF-8";
717 }
718 else if (encoding_matches (encoding, "ISO-8859-1"))
719 {
f6f4feb0 720 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
93c4fa21
AW
721 entry->encoding = "ISO-8859-1";
722 }
6c98257f 723 else
93c4fa21 724 {
f6f4feb0 725 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
93c4fa21
AW
726 entry->encoding = canonicalize_encoding (encoding);
727 }
728
2721f918 729 entry->ilseq_handler = handler;
f6f4feb0 730 pti->iconv_descriptors = NULL;
0f2d19dd 731
f6f4feb0
MW
732 pti->at_stream_start_for_bom_read = 1;
733 pti->at_stream_start_for_bom_write = 1;
f4bc4e59 734
f6f4feb0
MW
735 pti->pending_eof = 0;
736 pti->alist = SCM_EOL;
3e05fc04 737
6978c673
AW
738 if (SCM_PORT_DESCRIPTOR (ret)->free)
739 scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL);
740
03a2eeb0
AW
741 if (SCM_PORT_DESCRIPTOR (ret)->flags & SCM_PORT_TYPE_HAS_FLUSH)
742 scm_weak_set_add_x (scm_i_port_weak_set, ret);
651a0735 743
2721f918
AW
744 return ret;
745}
746
747SCM
748scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
749{
750 return scm_c_make_port_with_encoding (tag, mode_bits,
751 scm_i_default_port_encoding (),
0dd7c540 752 scm_i_default_port_conversion_handler (),
2721f918
AW
753 stream);
754}
755
756SCM
757scm_new_port_table_entry (scm_t_bits tag)
758{
759 return scm_c_make_port (tag, 0, 0);
0f2d19dd
JB
760}
761
d68fee48 762\f
d68fee48 763
19b8d12b 764/* Predicates. */
1cc91f1b 765
19b8d12b
AW
766SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
767 (SCM x),
768 "Return a boolean indicating whether @var{x} is a port.\n"
769 "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
770 "@var{x}))}.")
771#define FUNC_NAME s_scm_port_p
0f2d19dd 772{
19b8d12b 773 return scm_from_bool (SCM_PORTP (x));
0f2d19dd 774}
19b8d12b 775#undef FUNC_NAME
0f2d19dd 776
19b8d12b
AW
777SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
778 (SCM x),
779 "Return @code{#t} if @var{x} is an input port, otherwise return\n"
780 "@code{#f}. Any object satisfying this predicate also satisfies\n"
781 "@code{port?}.")
782#define FUNC_NAME s_scm_input_port_p
0f2d19dd 783{
19b8d12b 784 return scm_from_bool (SCM_INPUT_PORT_P (x));
0f2d19dd 785}
1bbd0b84 786#undef FUNC_NAME
0f2d19dd 787
19b8d12b
AW
788SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
789 (SCM x),
790 "Return @code{#t} if @var{x} is an output port, otherwise return\n"
791 "@code{#f}. Any object satisfying this predicate also satisfies\n"
792 "@code{port?}.")
793#define FUNC_NAME s_scm_output_port_p
0f2d19dd 794{
19b8d12b
AW
795 x = SCM_COERCE_OUTPORT (x);
796 return scm_from_bool (SCM_OUTPUT_PORT_P (x));
0f2d19dd 797}
1bbd0b84 798#undef FUNC_NAME
0f2d19dd 799
19b8d12b
AW
800SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
801 (SCM port),
802 "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
803 "open.")
804#define FUNC_NAME s_scm_port_closed_p
d617ee18 805{
19b8d12b
AW
806 SCM_VALIDATE_PORT (1, port);
807 return scm_from_bool (!SCM_OPPORTP (port));
d617ee18 808}
19b8d12b 809#undef FUNC_NAME
eadd48de 810
19b8d12b
AW
811SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
812 (SCM x),
813 "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
814 "return @code{#f}.")
815#define FUNC_NAME s_scm_eof_object_p
eadd48de 816{
19b8d12b 817 return scm_from_bool (SCM_EOF_OBJECT_P (x));
eadd48de 818}
1bbd0b84 819#undef FUNC_NAME
eadd48de
GH
820
821
d68fee48 822\f
19b8d12b 823
d68fee48
JB
824/* Closing ports. */
825
03a2eeb0
AW
826static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
827
0f2d19dd
JB
828/* scm_close_port
829 * Call the close operation on a port object.
eadd48de 830 * see also scm_close.
0f2d19dd 831 */
3b3b36dd 832SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
1bbd0b84 833 (SCM port),
1e6808ea
MG
834 "Close the specified port object. Return @code{#t} if it\n"
835 "successfully closes a port or @code{#f} if it was already\n"
836 "closed. An exception may be raised if an error occurs, for\n"
837 "example when flushing buffered output. See also @ref{Ports and\n"
838 "File Descriptors, close}, for a procedure which can close file\n"
839 "descriptors.")
1bbd0b84 840#define FUNC_NAME s_scm_close_port
0f2d19dd 841{
f6f4feb0 842 scm_t_port_internal *pti;
eadd48de
GH
843 int rv;
844
78446828
MV
845 port = SCM_COERCE_OUTPORT (port);
846
7a754ca6 847 SCM_VALIDATE_PORT (1, port);
0f2d19dd 848 if (SCM_CLOSEDP (port))
eadd48de 849 return SCM_BOOL_F;
03a2eeb0 850
f6f4feb0 851 pti = SCM_PORT_GET_INTERNAL (port);
5a771d5f 852 SCM_CLR_PORT_OPEN_FLAG (port);
03a2eeb0
AW
853
854 if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH)
855 scm_weak_set_remove_x (scm_i_port_weak_set, port);
856
5a771d5f
AW
857 if (SCM_PORT_DESCRIPTOR (port)->close)
858 /* Note! This may throw an exception. Anything after this point
859 should be resilient to non-local exits. */
860 rv = SCM_PORT_DESCRIPTOR (port)->close (port);
861 else
862 rv = 0;
03a2eeb0 863
f6f4feb0 864 if (pti->iconv_descriptors)
03a2eeb0 865 {
5a771d5f
AW
866 /* If we don't get here, the iconv_descriptors finalizer will
867 clean up. */
f6f4feb0
MW
868 close_iconv_descriptors (pti->iconv_descriptors);
869 pti->iconv_descriptors = NULL;
03a2eeb0
AW
870 }
871
7888309b 872 return scm_from_bool (rv >= 0);
7a754ca6
MD
873}
874#undef FUNC_NAME
875
876SCM_DEFINE (scm_close_input_port, "close-input-port", 1, 0, 0,
877 (SCM port),
878 "Close the specified input port object. The routine has no effect if\n"
879 "the file has already been closed. An exception may be raised if an\n"
880 "error occurs. The value returned is unspecified.\n\n"
881 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
882 "which can close file descriptors.")
883#define FUNC_NAME s_scm_close_input_port
884{
885 SCM_VALIDATE_INPUT_PORT (1, port);
886 scm_close_port (port);
887 return SCM_UNSPECIFIED;
888}
889#undef FUNC_NAME
890
891SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0,
892 (SCM port),
893 "Close the specified output port object. The routine has no effect if\n"
894 "the file has already been closed. An exception may be raised if an\n"
895 "error occurs. The value returned is unspecified.\n\n"
896 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
897 "which can close file descriptors.")
898#define FUNC_NAME s_scm_close_output_port
899{
900 port = SCM_COERCE_OUTPORT (port);
901 SCM_VALIDATE_OUTPUT_PORT (1, port);
902 scm_close_port (port);
903 return SCM_UNSPECIFIED;
0f2d19dd 904}
1bbd0b84 905#undef FUNC_NAME
0f2d19dd 906
2721f918 907
19b8d12b 908\f
2721f918 909
19b8d12b
AW
910/* Encoding characters to byte streams, and decoding byte streams to
911 characters. */
5dbc6c06 912
19b8d12b
AW
913/* A fluid specifying the default encoding for newly created ports. If it is
914 a string, that is the encoding. If it is #f, it is in the "native"
915 (Latin-1) encoding. */
916SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
917
918static int scm_port_encoding_init = 0;
919
920/* Use ENCODING as the default encoding for future ports. */
c536b4b3 921void
19b8d12b 922scm_i_set_default_port_encoding (const char *encoding)
c2ca4493 923{
19b8d12b
AW
924 if (!scm_port_encoding_init
925 || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
926 scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
927 SCM_EOL);
fdfe6305 928
449ca87b 929 if (encoding_matches (encoding, "ISO-8859-1"))
19b8d12b
AW
930 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
931 else
93c4fa21
AW
932 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
933 scm_from_latin1_string (canonicalize_encoding (encoding)));
2721f918 934}
3a5fb14d 935
93c4fa21 936/* Return the name of the default encoding for newly created ports. */
19b8d12b
AW
937const char *
938scm_i_default_port_encoding (void)
2721f918 939{
19b8d12b 940 if (!scm_port_encoding_init)
93c4fa21 941 return "ISO-8859-1";
19b8d12b 942 else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
93c4fa21 943 return "ISO-8859-1";
19b8d12b
AW
944 else
945 {
946 SCM encoding;
947
948 encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
949 if (!scm_is_string (encoding))
93c4fa21 950 return "ISO-8859-1";
19b8d12b
AW
951 else
952 return scm_i_string_chars (encoding);
953 }
c536b4b3 954}
fdfe6305 955
0dd7c540
AW
956/* A fluid specifying the default conversion handler for newly created
957 ports. Its value should be one of the symbols below. */
958SCM_VARIABLE (default_conversion_strategy_var,
959 "%default-port-conversion-strategy");
1b3daef0 960
0dd7c540
AW
961/* Whether the above fluid is initialized. */
962static int scm_conversion_strategy_init = 0;
c2ca4493 963
0dd7c540
AW
964/* The possible conversion strategies. */
965SCM_SYMBOL (sym_error, "error");
966SCM_SYMBOL (sym_substitute, "substitute");
967SCM_SYMBOL (sym_escape, "escape");
c536b4b3 968
0dd7c540
AW
969/* Return the default failed encoding conversion policy for new created
970 ports. */
971scm_t_string_failed_conversion_handler
972scm_i_default_port_conversion_handler (void)
973{
974 scm_t_string_failed_conversion_handler handler;
d68fee48 975
0dd7c540
AW
976 if (!scm_conversion_strategy_init
977 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
978 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
979 else
980 {
981 SCM fluid, value;
0f2d19dd 982
0dd7c540
AW
983 fluid = SCM_VARIABLE_REF (default_conversion_strategy_var);
984 value = scm_fluid_ref (fluid);
0f2d19dd 985
0dd7c540
AW
986 if (scm_is_eq (sym_substitute, value))
987 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
988 else if (scm_is_eq (sym_escape, value))
989 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
990 else
991 /* Default to 'error also when the fluid's value is not one of
992 the valid symbols. */
993 handler = SCM_FAILED_CONVERSION_ERROR;
994 }
eb5c0a2a 995
0dd7c540 996 return handler;
60d0643d 997}
0f2d19dd 998
0dd7c540
AW
999/* Use HANDLER as the default conversion strategy for future ports. */
1000void
1001scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler
1002 handler)
0f2d19dd 1003{
0dd7c540 1004 SCM strategy;
0f2d19dd 1005
0dd7c540
AW
1006 if (!scm_conversion_strategy_init
1007 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
1008 scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
1009 SCM_EOL);
1010
1011 switch (handler)
78446828 1012 {
0dd7c540
AW
1013 case SCM_FAILED_CONVERSION_ERROR:
1014 strategy = sym_error;
1015 break;
1016
1017 case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
1018 strategy = sym_escape;
1019 break;
1020
1021 case SCM_FAILED_CONVERSION_QUESTION_MARK:
1022 strategy = sym_substitute;
1023 break;
1024
1025 default:
1026 abort ();
78446828 1027 }
0f2d19dd 1028
0dd7c540
AW
1029 scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var),
1030 strategy);
1031}
5dbc6c06 1032
f6f4feb0
MW
1033static void
1034scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port);
1035
1036/* If the next LEN bytes from PORT are equal to those in BYTES, then
1037 return 1, else return 0. Leave the port position unchanged. */
1038static int
1039looking_at_bytes (SCM port, const unsigned char *bytes, int len)
0f2d19dd 1040{
f6f4feb0
MW
1041 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1042 int i = 0;
1043
1044 while (i < len && scm_peek_byte_or_eof_unlocked (port) == bytes[i])
78446828 1045 {
f6f4feb0
MW
1046 pt->read_pos++;
1047 i++;
78446828 1048 }
f6f4feb0
MW
1049 scm_i_unget_bytes_unlocked (bytes, i, port);
1050 return (i == len);
0f2d19dd
JB
1051}
1052
f6f4feb0
MW
1053static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF};
1054static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
1055static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
1056static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
1057static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
5dbc6c06 1058
f6f4feb0
MW
1059/* Decide what byte order to use for a UTF-16 port. Return "UTF-16BE"
1060 or "UTF-16LE". MODE must be either SCM_PORT_READ or SCM_PORT_WRITE,
1061 and specifies which operation is about to be done. The MODE
1062 determines how we will decide the byte order. We deliberately avoid
1063 reading from the port unless the user is about to do so. If the user
1064 is about to read, then we look for a BOM, and if present, we use it
1065 to determine the byte order. Otherwise we choose big endian, as
1066 recommended by the Unicode Standard. Note that the BOM (if any) is
1067 not consumed here. */
1068static const char *
1069decide_utf16_encoding (SCM port, scm_t_port_rw_active mode)
5dbc6c06 1070{
f6f4feb0
MW
1071 if (mode == SCM_PORT_READ
1072 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1073 && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom))
1074 return "UTF-16LE";
1075 else
1076 return "UTF-16BE";
5dbc6c06
HWN
1077}
1078
f6f4feb0
MW
1079/* Decide what byte order to use for a UTF-32 port. Return "UTF-32BE"
1080 or "UTF-32LE". See the comment above 'decide_utf16_encoding' for
1081 details. */
1082static const char *
1083decide_utf32_encoding (SCM port, scm_t_port_rw_active mode)
89ea5b7c 1084{
f6f4feb0
MW
1085 if (mode == SCM_PORT_READ
1086 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1087 && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom))
1088 return "UTF-32LE";
1089 else
1090 return "UTF-32BE";
89ea5b7c 1091}
0f2d19dd 1092
6c98257f 1093static void
2aed2667 1094finalize_iconv_descriptors (void *ptr, void *data)
c536b4b3 1095{
6c98257f
AW
1096 close_iconv_descriptors (ptr);
1097}
c2ca4493 1098
6c98257f
AW
1099static scm_t_iconv_descriptors *
1100open_iconv_descriptors (const char *encoding, int reading, int writing)
1101{
1102 scm_t_iconv_descriptors *id;
1103 iconv_t input_cd, output_cd;
8ebd06c6 1104 size_t i;
c536b4b3 1105
6c98257f
AW
1106 input_cd = (iconv_t) -1;
1107 output_cd = (iconv_t) -1;
d68fee48 1108
8ebd06c6
AW
1109 for (i = 0; encoding[i]; i++)
1110 if (encoding[i] > 127)
1111 goto invalid_encoding;
1112
6c98257f
AW
1113 if (reading)
1114 {
1115 /* Open an input iconv conversion descriptor, from ENCODING
1116 to UTF-8. We choose UTF-8, not UTF-32, because iconv
1117 implementations can typically convert from anything to
1118 UTF-8, but not to UTF-32 (see
1119 <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
1120
1121 /* Assume opening an iconv descriptor causes about 16 KB of
1122 allocation. */
1123 scm_gc_register_allocation (16 * 1024);
1124
1125 input_cd = iconv_open ("UTF-8", encoding);
1126 if (input_cd == (iconv_t) -1)
1127 goto invalid_encoding;
1128 }
0f2d19dd 1129
6c98257f 1130 if (writing)
19b8d12b 1131 {
6c98257f
AW
1132 /* Assume opening an iconv descriptor causes about 16 KB of
1133 allocation. */
1134 scm_gc_register_allocation (16 * 1024);
0f2d19dd 1135
6c98257f
AW
1136 output_cd = iconv_open (encoding, "UTF-8");
1137 if (output_cd == (iconv_t) -1)
1138 {
1139 if (input_cd != (iconv_t) -1)
1140 iconv_close (input_cd);
1141 goto invalid_encoding;
1142 }
19b8d12b 1143 }
eb5c0a2a 1144
6c98257f
AW
1145 id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
1146 id->input_cd = input_cd;
1147 id->output_cd = output_cd;
1148
6978c673
AW
1149 /* Register a finalizer to close the descriptors. */
1150 scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
19b8d12b 1151
6c98257f 1152 return id;
19b8d12b
AW
1153
1154 invalid_encoding:
1155 {
1156 SCM err;
8ebd06c6 1157 err = scm_from_latin1_string (encoding);
6c98257f 1158 scm_misc_error ("open_iconv_descriptors",
19b8d12b
AW
1159 "invalid or unknown character encoding ~s",
1160 scm_list_1 (err));
1161 }
0f2d19dd
JB
1162}
1163
6c98257f
AW
1164static void
1165close_iconv_descriptors (scm_t_iconv_descriptors *id)
1166{
1167 if (id->input_cd != (iconv_t) -1)
1168 iconv_close (id->input_cd);
1169 if (id->output_cd != (iconv_t) -1)
1170 iconv_close (id->output_cd);
1171 id->input_cd = (void *) -1;
1172 id->output_cd = (void *) -1;
1173}
1174
1175scm_t_iconv_descriptors *
f6f4feb0 1176scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode)
6c98257f 1177{
f6f4feb0 1178 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
6c98257f 1179
f6f4feb0 1180 assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
6c98257f 1181
f6f4feb0 1182 if (!pti->iconv_descriptors)
6c98257f 1183 {
f6f4feb0
MW
1184 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1185 const char *precise_encoding;
7b292a9d 1186
6c98257f
AW
1187 if (!pt->encoding)
1188 pt->encoding = "ISO-8859-1";
7be1705d 1189
f6f4feb0
MW
1190 /* If the specified encoding is UTF-16 or UTF-32, then make
1191 that more precise by deciding what byte order to use. */
1192 if (strcmp (pt->encoding, "UTF-16") == 0)
1193 precise_encoding = decide_utf16_encoding (port, mode);
1194 else if (strcmp (pt->encoding, "UTF-32") == 0)
1195 precise_encoding = decide_utf32_encoding (port, mode);
1196 else
1197 precise_encoding = pt->encoding;
7b292a9d 1198
f6f4feb0
MW
1199 pti->iconv_descriptors =
1200 open_iconv_descriptors (precise_encoding,
6c98257f
AW
1201 SCM_INPUT_PORT_P (port),
1202 SCM_OUTPUT_PORT_P (port));
1203 }
1204
f6f4feb0 1205 return pti->iconv_descriptors;
6c98257f
AW
1206}
1207
8ebd06c6 1208/* The name of the encoding is itself encoded in ASCII. */
6c98257f
AW
1209void
1210scm_i_set_port_encoding_x (SCM port, const char *encoding)
1211{
1212 scm_t_port *pt;
f6f4feb0 1213 scm_t_port_internal *pti;
6c98257f
AW
1214 scm_t_iconv_descriptors *prev;
1215
1216 /* Set the character encoding for this port. */
1217 pt = SCM_PTAB_ENTRY (port);
f6f4feb0
MW
1218 pti = SCM_PORT_GET_INTERNAL (port);
1219 prev = pti->iconv_descriptors;
7b292a9d 1220
f6f4feb0
MW
1221 /* In order to handle cases where the encoding changes mid-stream
1222 (e.g. within an HTTP stream, or within a file that is composed of
1223 segments with different encodings), we consider this to be "stream
1224 start" for purposes of BOM handling, regardless of our actual file
1225 position. */
1226 pti->at_stream_start_for_bom_read = 1;
1227 pti->at_stream_start_for_bom_write = 1;
6c98257f 1228
93c4fa21 1229 if (encoding_matches (encoding, "UTF-8"))
6c98257f
AW
1230 {
1231 pt->encoding = "UTF-8";
f6f4feb0 1232 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
6c98257f 1233 }
93c4fa21 1234 else if (encoding_matches (encoding, "ISO-8859-1"))
79eb47ea
AW
1235 {
1236 pt->encoding = "ISO-8859-1";
f6f4feb0 1237 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
79eb47ea 1238 }
6c98257f
AW
1239 else
1240 {
f6f4feb0
MW
1241 pt->encoding = canonicalize_encoding (encoding);
1242 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
6c98257f
AW
1243 }
1244
f6f4feb0 1245 pti->iconv_descriptors = NULL;
6c98257f
AW
1246 if (prev)
1247 close_iconv_descriptors (prev);
1248}
1249
19b8d12b
AW
1250SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
1251 (SCM port),
1252 "Returns, as a string, the character encoding that @var{port}\n"
1253 "uses to interpret its input and output.\n")
1254#define FUNC_NAME s_scm_port_encoding
0f2d19dd 1255{
19b8d12b
AW
1256 SCM_VALIDATE_PORT (1, port);
1257
93c4fa21 1258 return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding);
0f2d19dd 1259}
1bbd0b84 1260#undef FUNC_NAME
0f2d19dd 1261
19b8d12b
AW
1262SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
1263 (SCM port, SCM enc),
1264 "Sets the character encoding that will be used to interpret all\n"
1265 "port I/O. New ports are created with the encoding\n"
1266 "appropriate for the current locale if @code{setlocale} has \n"
1267 "been called or ISO-8859-1 otherwise\n"
1268 "and this procedure can be used to modify that encoding.\n")
1269#define FUNC_NAME s_scm_set_port_encoding_x
5dbc6c06 1270{
19b8d12b 1271 char *enc_str;
5dbc6c06 1272
19b8d12b
AW
1273 SCM_VALIDATE_PORT (1, port);
1274 SCM_VALIDATE_STRING (2, enc);
0f2d19dd 1275
8ebd06c6 1276 enc_str = scm_to_latin1_string (enc);
19b8d12b
AW
1277 scm_i_set_port_encoding_x (port, enc_str);
1278 free (enc_str);
1279
1280 return SCM_UNSPECIFIED;
0f2d19dd 1281}
1bbd0b84 1282#undef FUNC_NAME
0f2d19dd 1283
19b8d12b
AW
1284SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
1285 1, 0, 0, (SCM port),
1286 "Returns the behavior of the port when handling a character that\n"
1287 "is not representable in the port's current encoding.\n"
1288 "It returns the symbol @code{error} if unrepresentable characters\n"
1289 "should cause exceptions, @code{substitute} if the port should\n"
1290 "try to replace unrepresentable characters with question marks or\n"
1291 "approximate characters, or @code{escape} if unrepresentable\n"
1292 "characters should be converted to string escapes.\n"
1293 "\n"
1294 "If @var{port} is @code{#f}, then the current default behavior\n"
1295 "will be returned. New ports will have this default behavior\n"
1296 "when they are created.\n")
1297#define FUNC_NAME s_scm_port_conversion_strategy
889975e5 1298{
19b8d12b 1299 scm_t_string_failed_conversion_handler h;
7b292a9d 1300
0dd7c540
AW
1301 if (scm_is_false (port))
1302 h = scm_i_default_port_conversion_handler ();
1303 else
7b292a9d 1304 {
0dd7c540
AW
1305 scm_t_port *pt;
1306
19b8d12b 1307 SCM_VALIDATE_OPPORT (1, port);
0dd7c540
AW
1308 pt = SCM_PTAB_ENTRY (port);
1309
1310 h = pt->ilseq_handler;
7b292a9d
LC
1311 }
1312
19b8d12b
AW
1313 if (h == SCM_FAILED_CONVERSION_ERROR)
1314 return scm_from_latin1_symbol ("error");
1315 else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
1316 return scm_from_latin1_symbol ("substitute");
1317 else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1318 return scm_from_latin1_symbol ("escape");
1319 else
1320 abort ();
7b292a9d 1321
19b8d12b
AW
1322 /* Never gets here. */
1323 return SCM_UNDEFINED;
1324}
1325#undef FUNC_NAME
7b292a9d 1326
19b8d12b
AW
1327SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
1328 2, 0, 0,
1329 (SCM port, SCM sym),
1330 "Sets the behavior of the interpreter when outputting a character\n"
1331 "that is not representable in the port's current encoding.\n"
1332 "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
1333 "@code{'escape}. If it is @code{'error}, an error will be thrown\n"
1334 "when an unconvertible character is encountered. If it is\n"
1335 "@code{'substitute}, then unconvertible characters will \n"
1336 "be replaced with approximate characters, or with question marks\n"
1337 "if no approximately correct character is available.\n"
1338 "If it is @code{'escape},\n"
1339 "it will appear as a hex escape when output.\n"
1340 "\n"
1341 "If @var{port} is an open port, the conversion error behavior\n"
1342 "is set for that port. If it is @code{#f}, it is set as the\n"
1343 "default behavior for any future ports that get created in\n"
1344 "this thread.\n")
1345#define FUNC_NAME s_scm_set_port_conversion_strategy_x
1346{
0dd7c540 1347 scm_t_string_failed_conversion_handler handler;
7b292a9d 1348
0dd7c540
AW
1349 if (scm_is_eq (sym, sym_error))
1350 handler = SCM_FAILED_CONVERSION_ERROR;
1351 else if (scm_is_eq (sym, sym_substitute))
1352 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
1353 else if (scm_is_eq (sym, sym_escape))
1354 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
1355 else
1356 SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
7b292a9d 1357
0dd7c540
AW
1358 if (scm_is_false (port))
1359 scm_i_set_default_port_conversion_handler (handler);
1360 else
19b8d12b 1361 {
0dd7c540
AW
1362 SCM_VALIDATE_OPPORT (1, port);
1363 SCM_PTAB_ENTRY (port)->ilseq_handler = handler;
19b8d12b 1364 }
7b292a9d 1365
19b8d12b
AW
1366 return SCM_UNSPECIFIED;
1367}
1368#undef FUNC_NAME
7be1705d 1369
7b292a9d 1370
19b8d12b 1371\f
7b292a9d 1372
14dcb5cc
AW
1373/* The port lock. */
1374
1375static void
92c0ebac 1376lock_port (void *mutex)
14dcb5cc 1377{
789dd40b 1378 scm_i_pthread_mutex_lock ((scm_i_pthread_mutex_t *) mutex);
14dcb5cc
AW
1379}
1380
1381static void
92c0ebac 1382unlock_port (void *mutex)
14dcb5cc 1383{
789dd40b 1384 scm_i_pthread_mutex_unlock ((scm_i_pthread_mutex_t *) mutex);
14dcb5cc
AW
1385}
1386
1387void
1388scm_dynwind_lock_port (SCM port)
92c0ebac 1389#define FUNC_NAME "dynwind-lock-port"
14dcb5cc 1390{
92c0ebac
AW
1391 scm_i_pthread_mutex_t *lock;
1392 SCM_VALIDATE_OPPORT (SCM_ARG1, port);
1393 scm_c_lock_port (port, &lock);
1394 if (lock)
1395 {
1396 scm_dynwind_unwind_handler (unlock_port, lock, SCM_F_WIND_EXPLICITLY);
1397 scm_dynwind_rewind_handler (lock_port, lock, 0);
1398 }
14dcb5cc 1399}
92c0ebac 1400#undef FUNC_NAME
14dcb5cc
AW
1401
1402
1403\f
1404
19b8d12b 1405/* Input. */
7be1705d 1406
0d959103
AW
1407int
1408scm_get_byte_or_eof (SCM port)
1409{
92c0ebac 1410 scm_i_pthread_mutex_t *lock;
0d959103
AW
1411 int ret;
1412
92c0ebac 1413 scm_c_lock_port (port, &lock);
0d959103 1414 ret = scm_get_byte_or_eof_unlocked (port);
92c0ebac
AW
1415 if (lock)
1416 scm_i_pthread_mutex_unlock (lock);
0d959103
AW
1417
1418 return ret;
1419}
1420
1421int
1422scm_peek_byte_or_eof (SCM port)
1423{
92c0ebac 1424 scm_i_pthread_mutex_t *lock;
0d959103
AW
1425 int ret;
1426
92c0ebac 1427 scm_c_lock_port (port, &lock);
0d959103 1428 ret = scm_peek_byte_or_eof_unlocked (port);
92c0ebac
AW
1429 if (lock)
1430 scm_i_pthread_mutex_unlock (lock);
0d959103
AW
1431
1432 return ret;
1433}
1434
19b8d12b
AW
1435/* scm_c_read
1436 *
1437 * Used by an application to read arbitrary number of bytes from an
1438 * SCM port. Same semantics as libc read, except that scm_c_read only
1439 * returns less than SIZE bytes if at end-of-file.
1440 *
1441 * Warning: Doesn't update port line and column counts! */
7b292a9d 1442
19b8d12b
AW
1443/* This structure, and the following swap_buffer function, are used
1444 for temporarily swapping a port's own read buffer, and the buffer
1445 that the caller of scm_c_read provides. */
1446struct port_and_swap_buffer
1447{
1448 scm_t_port *pt;
1449 unsigned char *buffer;
1450 size_t size;
1451};
7b292a9d 1452
19b8d12b
AW
1453static void
1454swap_buffer (void *data)
1455{
1456 struct port_and_swap_buffer *psb = (struct port_and_swap_buffer *) data;
1457 unsigned char *old_buf = psb->pt->read_buf;
1458 size_t old_size = psb->pt->read_buf_size;
7be1705d 1459
19b8d12b
AW
1460 /* Make the port use (buffer, size) from the struct. */
1461 psb->pt->read_pos = psb->pt->read_buf = psb->pt->read_end = psb->buffer;
1462 psb->pt->read_buf_size = psb->size;
7b292a9d 1463
19b8d12b
AW
1464 /* Save the port's old (buffer, size) in the struct. */
1465 psb->buffer = old_buf;
1466 psb->size = old_size;
7b292a9d
LC
1467}
1468
f6f4feb0
MW
1469static int scm_i_fill_input_unlocked (SCM port);
1470
19b8d12b 1471size_t
be632904 1472scm_c_read_unlocked (SCM port, void *buffer, size_t size)
19b8d12b 1473#define FUNC_NAME "scm_c_read"
7b292a9d
LC
1474{
1475 scm_t_port *pt;
f6f4feb0 1476 scm_t_port_internal *pti;
19b8d12b
AW
1477 size_t n_read = 0, n_available;
1478 struct port_and_swap_buffer psb;
889975e5 1479
19b8d12b 1480 SCM_VALIDATE_OPINPORT (1, port);
889975e5 1481
19b8d12b 1482 pt = SCM_PTAB_ENTRY (port);
f6f4feb0 1483 pti = SCM_PORT_GET_INTERNAL (port);
19b8d12b
AW
1484 if (pt->rw_active == SCM_PORT_WRITE)
1485 SCM_PORT_DESCRIPTOR (port)->flush (port);
889975e5 1486
19b8d12b
AW
1487 if (pt->rw_random)
1488 pt->rw_active = SCM_PORT_READ;
889975e5 1489
19b8d12b
AW
1490 /* Take bytes first from the port's read buffer. */
1491 if (pt->read_pos < pt->read_end)
1492 {
1493 n_available = min (size, pt->read_end - pt->read_pos);
1494 memcpy (buffer, pt->read_pos, n_available);
1495 buffer = (char *) buffer + n_available;
1496 pt->read_pos += n_available;
1497 n_read += n_available;
1498 size -= n_available;
1499 }
889975e5 1500
19b8d12b
AW
1501 /* Avoid the scm_dynwind_* costs if we now have enough data. */
1502 if (size == 0)
1503 return n_read;
b5cb4464 1504
f6f4feb0
MW
1505 /* Now we will call scm_i_fill_input_unlocked repeatedly until we have
1506 read the requested number of bytes. (Note that a single
1507 scm_i_fill_input_unlocked call does not guarantee to fill the whole
1508 of the port's read buffer.) */
d1b9f8ac 1509 if (pt->read_buf_size <= 1
f6f4feb0 1510 && pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
b5cb4464 1511 {
f6f4feb0
MW
1512 /* The port that we are reading from is unbuffered - i.e. does not
1513 have its own persistent buffer - but we have a buffer, provided
1514 by our caller, that is the right size for the data that is
1515 wanted. For the following scm_i_fill_input_unlocked calls,
1516 therefore, we use the buffer in hand as the port's read buffer.
1517
1518 We need to make sure that the port's normal (1 byte) buffer is
1519 reinstated in case one of the scm_i_fill_input_unlocked ()
1520 calls throws an exception; we use the scm_dynwind_* API to
1521 achieve that.
75192345
MG
1522
1523 A consequence of this optimization is that the fill_input
1524 functions can't unget characters. That'll push data to the
1525 pushback buffer instead of this psb buffer. */
1526#if SCM_DEBUG == 1
1527 unsigned char *pback = pt->putback_buf;
1528#endif
6d227556
NJ
1529 psb.pt = pt;
1530 psb.buffer = buffer;
1531 psb.size = size;
1532 scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
1533 scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1534 scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1535
f6f4feb0
MW
1536 /* Call scm_i_fill_input_unlocked until we have all the bytes that
1537 we need, or we hit EOF. */
1538 while (pt->read_buf_size && (scm_i_fill_input_unlocked (port) != EOF))
6d227556
NJ
1539 {
1540 pt->read_buf_size -= (pt->read_end - pt->read_pos);
1541 pt->read_pos = pt->read_buf = pt->read_end;
1542 }
75192345
MG
1543#if SCM_DEBUG == 1
1544 if (pback != pt->putback_buf
1545 || pt->read_buf - (unsigned char *) buffer < 0)
1546 scm_misc_error (FUNC_NAME,
1547 "scm_c_read must not call a fill function that pushes "
1548 "back characters onto an unbuffered port", SCM_EOL);
1549#endif
6d227556 1550 n_read += pt->read_buf - (unsigned char *) buffer;
75192345 1551
6d227556
NJ
1552 /* Reinstate the port's normal buffer. */
1553 scm_dynwind_end ();
1554 }
1555 else
1556 {
1557 /* The port has its own buffer. It is important that we use it,
1558 even if it happens to be smaller than our caller's buffer, so
1559 that a custom port implementation's entry points (in
1560 particular, fill_input) can rely on the buffer always being
1561 the same as they first set up. */
f6f4feb0 1562 while (size && (scm_i_fill_input_unlocked (port) != EOF))
6d227556
NJ
1563 {
1564 n_available = min (size, pt->read_end - pt->read_pos);
1565 memcpy (buffer, pt->read_pos, n_available);
1566 buffer = (char *) buffer + n_available;
1567 pt->read_pos += n_available;
1568 n_read += n_available;
1569 size -= n_available;
1570 }
1571 }
6fe692e9 1572
b5cb4464 1573 return n_read;
6fe692e9 1574}
693758d5 1575#undef FUNC_NAME
6fe692e9 1576
be632904
AW
1577size_t
1578scm_c_read (SCM port, void *buffer, size_t size)
1579{
92c0ebac 1580 scm_i_pthread_mutex_t *lock;
be632904
AW
1581 size_t ret;
1582
92c0ebac 1583 scm_c_lock_port (port, &lock);
be632904 1584 ret = scm_c_read_unlocked (port, buffer, size);
92c0ebac
AW
1585 if (lock)
1586 scm_i_pthread_mutex_unlock (lock);
1587
be632904
AW
1588
1589 return ret;
1590}
1591
19b8d12b
AW
1592/* Update the line and column number of PORT after consumption of C. */
1593static inline void
1594update_port_lf (scm_t_wchar c, SCM port)
6fe692e9 1595{
19b8d12b
AW
1596 switch (c)
1597 {
1598 case '\a':
1599 case EOF:
1600 break;
1601 case '\b':
1602 SCM_DECCOL (port);
1603 break;
1604 case '\n':
1605 SCM_INCLINE (port);
1606 break;
1607 case '\r':
1608 SCM_ZEROCOL (port);
1609 break;
1610 case '\t':
1611 SCM_TABCOL (port);
1612 break;
1613 default:
1614 SCM_INCCOL (port);
1615 break;
1616 }
1617}
6fe692e9 1618
19b8d12b 1619#define SCM_MBCHAR_BUF_SIZE (4)
6fe692e9 1620
19b8d12b
AW
1621/* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
1622 UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
1623static scm_t_wchar
1624utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
283a1a0e 1625{
19b8d12b 1626 scm_t_wchar codepoint;
283a1a0e 1627
19b8d12b 1628 if (utf8_buf[0] <= 0x7f)
283a1a0e 1629 {
19b8d12b
AW
1630 assert (size == 1);
1631 codepoint = utf8_buf[0];
1632 }
1633 else if ((utf8_buf[0] & 0xe0) == 0xc0)
1634 {
1635 assert (size == 2);
1636 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
1637 | (utf8_buf[1] & 0x3f);
1638 }
1639 else if ((utf8_buf[0] & 0xf0) == 0xe0)
1640 {
1641 assert (size == 3);
1642 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
1643 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
1644 | (utf8_buf[2] & 0x3f);
283a1a0e
GH
1645 }
1646 else
19b8d12b
AW
1647 {
1648 assert (size == 4);
1649 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
1650 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
1651 | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
1652 | (utf8_buf[3] & 0x3f);
1653 }
283a1a0e 1654
19b8d12b 1655 return codepoint;
283a1a0e
GH
1656}
1657
19b8d12b
AW
1658/* Read a UTF-8 sequence from PORT. On success, return 0 and set
1659 *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
1660 representation, and set *LEN to the length in bytes. Return
1661 `EILSEQ' on error. */
1662static int
1663get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
1664 scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1665{
1666#define ASSERT_NOT_EOF(b) \
1667 if (SCM_UNLIKELY ((b) == EOF)) \
1668 goto invalid_seq
1669#define CONSUME_PEEKED_BYTE() \
1670 pt->read_pos++
ee149d03 1671
19b8d12b
AW
1672 int byte;
1673 scm_t_port *pt;
ee149d03 1674
19b8d12b
AW
1675 *len = 0;
1676 pt = SCM_PTAB_ENTRY (port);
840ae05d 1677
0d959103 1678 byte = scm_get_byte_or_eof_unlocked (port);
19b8d12b 1679 if (byte == EOF)
6c951427 1680 {
19b8d12b
AW
1681 *codepoint = EOF;
1682 return 0;
1683 }
6c951427 1684
19b8d12b
AW
1685 buf[0] = (scm_t_uint8) byte;
1686 *len = 1;
6c951427 1687
19b8d12b
AW
1688 if (buf[0] <= 0x7f)
1689 /* 1-byte form. */
1690 *codepoint = buf[0];
1691 else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
1692 {
1693 /* 2-byte form. */
0d959103 1694 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1695 ASSERT_NOT_EOF (byte);
6c951427 1696
19b8d12b
AW
1697 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1698 goto invalid_seq;
6c951427 1699
19b8d12b
AW
1700 CONSUME_PEEKED_BYTE ();
1701 buf[1] = (scm_t_uint8) byte;
1702 *len = 2;
1703
1704 *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
1705 | (buf[1] & 0x3f);
6c951427 1706 }
19b8d12b 1707 else if ((buf[0] & 0xf0) == 0xe0)
6c951427 1708 {
19b8d12b 1709 /* 3-byte form. */
0d959103 1710 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1711 ASSERT_NOT_EOF (byte);
6c951427 1712
19b8d12b
AW
1713 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
1714 || (buf[0] == 0xe0 && byte < 0xa0)
1715 || (buf[0] == 0xed && byte > 0x9f)))
1716 goto invalid_seq;
6c951427 1717
19b8d12b
AW
1718 CONSUME_PEEKED_BYTE ();
1719 buf[1] = (scm_t_uint8) byte;
1720 *len = 2;
1721
0d959103 1722 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b
AW
1723 ASSERT_NOT_EOF (byte);
1724
1725 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1726 goto invalid_seq;
1727
1728 CONSUME_PEEKED_BYTE ();
1729 buf[2] = (scm_t_uint8) byte;
1730 *len = 3;
1731
1732 *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
1733 | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
1734 | (buf[2] & 0x3f);
6c951427 1735 }
19b8d12b
AW
1736 else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
1737 {
1738 /* 4-byte form. */
0d959103 1739 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1740 ASSERT_NOT_EOF (byte);
6c951427 1741
19b8d12b
AW
1742 if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
1743 || (buf[0] == 0xf0 && byte < 0x90)
1744 || (buf[0] == 0xf4 && byte > 0x8f)))
1745 goto invalid_seq;
ee149d03 1746
19b8d12b
AW
1747 CONSUME_PEEKED_BYTE ();
1748 buf[1] = (scm_t_uint8) byte;
1749 *len = 2;
889975e5 1750
0d959103 1751 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1752 ASSERT_NOT_EOF (byte);
889975e5 1753
19b8d12b
AW
1754 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1755 goto invalid_seq;
63479112 1756
19b8d12b
AW
1757 CONSUME_PEEKED_BYTE ();
1758 buf[2] = (scm_t_uint8) byte;
1759 *len = 3;
63479112 1760
0d959103 1761 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1762 ASSERT_NOT_EOF (byte);
63479112 1763
19b8d12b
AW
1764 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1765 goto invalid_seq;
63479112 1766
19b8d12b
AW
1767 CONSUME_PEEKED_BYTE ();
1768 buf[3] = (scm_t_uint8) byte;
1769 *len = 4;
840ae05d 1770
19b8d12b
AW
1771 *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
1772 | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
1773 | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
1774 | (buf[3] & 0x3f);
ee149d03
JB
1775 }
1776 else
19b8d12b 1777 goto invalid_seq;
ee149d03 1778
19b8d12b 1779 return 0;
ee149d03 1780
19b8d12b
AW
1781 invalid_seq:
1782 /* Here we could choose the consume the faulty byte when it's not a
1783 valid starting byte, but it's not a requirement. What Section 3.9
1784 of Unicode 6.0.0 mandates, though, is to not consume a byte that
1785 would otherwise be a valid starting byte. */
ee149d03 1786
19b8d12b 1787 return EILSEQ;
ee149d03 1788
19b8d12b
AW
1789#undef CONSUME_PEEKED_BYTE
1790#undef ASSERT_NOT_EOF
1791}
1792
79eb47ea 1793/* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
05b4d910
AW
1794 0 and set *CODEPOINT to the codepoint that was read, fill BUF with
1795 its UTF-8 representation, and set *LEN to the length in bytes.
1796 Return `EILSEQ' on error. */
79eb47ea
AW
1797static int
1798get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
1799 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1800{
1801 *codepoint = scm_get_byte_or_eof_unlocked (port);
1802
1803 if (*codepoint == EOF)
1804 *len = 0;
1805 else
1806 {
1807 *len = 1;
1808 buf[0] = *codepoint;
1809 }
1810 return 0;
1811}
1812
19b8d12b
AW
1813/* Likewise, read a byte sequence from PORT, passing it through its
1814 input conversion descriptor. */
1815static int
1816get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
1817 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1818{
6c98257f 1819 scm_t_iconv_descriptors *id;
19b8d12b 1820 scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
f6f4feb0 1821 size_t input_size = 0;
19b8d12b 1822
f6f4feb0 1823 id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ);
19b8d12b 1824
f6f4feb0 1825 for (;;)
19b8d12b 1826 {
f6f4feb0
MW
1827 int byte_read;
1828 char *input, *output;
19b8d12b
AW
1829 size_t input_left, output_left, done;
1830
0d959103 1831 byte_read = scm_get_byte_or_eof_unlocked (port);
f6f4feb0 1832 if (SCM_UNLIKELY (byte_read == EOF))
19b8d12b 1833 {
f6f4feb0
MW
1834 if (SCM_LIKELY (input_size == 0))
1835 {
1836 *codepoint = (scm_t_wchar) EOF;
1837 *len = input_size;
1838 return 0;
1839 }
1840 else
1841 {
1842 /* EOF found in the middle of a multibyte character. */
1843 scm_i_set_pending_eof (port);
1844 return EILSEQ;
1845 }
19b8d12b
AW
1846 }
1847
f6f4feb0 1848 buf[input_size++] = byte_read;
19b8d12b
AW
1849
1850 input = buf;
f6f4feb0
MW
1851 input_left = input_size;
1852 output = (char *) utf8_buf;
19b8d12b
AW
1853 output_left = sizeof (utf8_buf);
1854
6c98257f 1855 done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
f6f4feb0 1856
19b8d12b
AW
1857 if (done == (size_t) -1)
1858 {
f6f4feb0
MW
1859 int err = errno;
1860 if (SCM_LIKELY (err == EINVAL))
1861 /* The input byte sequence did not form a complete
1862 character. Read another byte and try again. */
1863 continue;
1864 else
1865 return err;
19b8d12b
AW
1866 }
1867 else
f6f4feb0
MW
1868 {
1869 size_t output_size = sizeof (utf8_buf) - output_left;
1870 if (SCM_LIKELY (output_size > 0))
1871 {
1872 /* iconv generated output. Convert the UTF8_BUF sequence
1873 to a Unicode code point. */
1874 *codepoint = utf8_to_codepoint (utf8_buf, output_size);
1875 *len = input_size;
1876 return 0;
1877 }
1878 else
1879 {
1880 /* iconv consumed some bytes without producing any output.
1881 Most likely this means that a Unicode byte-order mark
1882 (BOM) was consumed, which should not be included in the
1883 returned buf. Shift any remaining bytes to the beginning
1884 of buf, and continue the loop. */
1885 memmove (buf, input, input_left);
1886 input_size = input_left;
1887 continue;
1888 }
1889 }
19b8d12b 1890 }
19b8d12b
AW
1891}
1892
1893/* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
1894 with the byte representation of the codepoint in PORT's encoding, and
1895 set *LEN to the length in bytes of that representation. Return 0 on
1896 success and an errno value on error. */
24ea9f9c 1897static SCM_C_INLINE int
19b8d12b
AW
1898get_codepoint (SCM port, scm_t_wchar *codepoint,
1899 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1900{
1901 int err;
1902 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f6f4feb0 1903 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
19b8d12b 1904
f6f4feb0 1905 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
19b8d12b 1906 err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
f6f4feb0 1907 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
79eb47ea 1908 err = get_latin1_codepoint (port, codepoint, buf, len);
19b8d12b
AW
1909 else
1910 err = get_iconv_codepoint (port, codepoint, buf, len);
1911
1912 if (SCM_LIKELY (err == 0))
f6f4feb0
MW
1913 {
1914 if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
1915 {
1916 /* Record that we're no longer at stream start. */
1917 pti->at_stream_start_for_bom_read = 0;
1918 if (pt->rw_random)
1919 pti->at_stream_start_for_bom_write = 0;
1920
1921 /* If we just read a BOM in an encoding that recognizes them,
1922 then silently consume it and read another code point. */
1923 if (SCM_UNLIKELY
1924 (*codepoint == SCM_UNICODE_BOM
1925 && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
1926 || strcmp (pt->encoding, "UTF-16") == 0
1927 || strcmp (pt->encoding, "UTF-32") == 0)))
1928 return get_codepoint (port, codepoint, buf, len);
1929 }
1930 update_port_lf (*codepoint, port);
1931 }
19b8d12b
AW
1932 else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
1933 {
1934 *codepoint = '?';
1935 err = 0;
1936 update_port_lf (*codepoint, port);
1937 }
1938
1939 return err;
1940}
1941
1942/* Read a codepoint from PORT and return it. */
1943scm_t_wchar
be632904 1944scm_getc_unlocked (SCM port)
19b8d12b
AW
1945#define FUNC_NAME "scm_getc"
1946{
1947 int err;
1948 size_t len;
1949 scm_t_wchar codepoint;
1950 char buf[SCM_MBCHAR_BUF_SIZE];
1951
1952 err = get_codepoint (port, &codepoint, buf, &len);
1953 if (SCM_UNLIKELY (err != 0))
1954 /* At this point PORT should point past the invalid encoding, as per
1955 R6RS-lib Section 8.2.4. */
1956 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
1957
1958 return codepoint;
1959}
1960#undef FUNC_NAME
1961
be632904
AW
1962scm_t_wchar
1963scm_getc (SCM port)
1964{
92c0ebac 1965 scm_i_pthread_mutex_t *lock;
be632904
AW
1966 scm_t_wchar ret;
1967
92c0ebac 1968 scm_c_lock_port (port, &lock);
be632904 1969 ret = scm_getc_unlocked (port);
92c0ebac
AW
1970 if (lock)
1971 scm_i_pthread_mutex_unlock (lock);
1972
be632904
AW
1973
1974 return ret;
1975}
1976
19b8d12b
AW
1977SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
1978 (SCM port),
1979 "Return the next character available from @var{port}, updating\n"
1980 "@var{port} to point to the following character. If no more\n"
1981 "characters are available, the end-of-file object is returned.\n"
1982 "\n"
1983 "When @var{port}'s data cannot be decoded according to its\n"
1984 "character encoding, a @code{decoding-error} is raised and\n"
1985 "@var{port} points past the erroneous byte sequence.\n")
1986#define FUNC_NAME s_scm_read_char
1987{
1988 scm_t_wchar c;
1989 if (SCM_UNBNDP (port))
1990 port = scm_current_input_port ();
1991 SCM_VALIDATE_OPINPORT (1, port);
be632904 1992 c = scm_getc_unlocked (port);
19b8d12b
AW
1993 if (EOF == c)
1994 return SCM_EOF_VAL;
1995 return SCM_MAKE_CHAR (c);
1996}
1997#undef FUNC_NAME
1998
1999
2000\f
2001
2002/* Pushback. */
f6f4feb0 2003\f
19b8d12b 2004
f6f4feb0
MW
2005
2006static void
2007scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
2008#define FUNC_NAME "scm_unget_bytes"
19b8d12b
AW
2009{
2010 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f6f4feb0 2011 size_t old_len, new_len;
19b8d12b 2012
f6f4feb0 2013 scm_i_clear_pending_eof (port);
19b8d12b 2014
f6f4feb0 2015 if (pt->read_buf != pt->putback_buf)
19b8d12b
AW
2016 /* switch to the put-back buffer. */
2017 {
2018 if (pt->putback_buf == NULL)
2019 {
f6f4feb0
MW
2020 pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
2021 ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
19b8d12b
AW
2022 pt->putback_buf
2023 = (unsigned char *) scm_gc_malloc_pointerless
f6f4feb0 2024 (pt->putback_buf_size, "putback buffer");
19b8d12b
AW
2025 }
2026
2027 pt->saved_read_buf = pt->read_buf;
2028 pt->saved_read_pos = pt->read_pos;
2029 pt->saved_read_end = pt->read_end;
2030 pt->saved_read_buf_size = pt->read_buf_size;
2031
f6f4feb0
MW
2032 /* Put read_pos at the end of the buffer, so that ungets will not
2033 have to shift the buffer contents each time. */
2034 pt->read_buf = pt->putback_buf;
2035 pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
19b8d12b
AW
2036 pt->read_buf_size = pt->putback_buf_size;
2037 }
2038
f6f4feb0
MW
2039 old_len = pt->read_end - pt->read_pos;
2040 new_len = old_len + len;
2041
2042 if (new_len > pt->read_buf_size)
2043 /* The putback buffer needs to be enlarged. */
2044 {
2045 size_t new_buf_size;
2046 unsigned char *new_buf, *new_end, *new_pos;
2047
2048 new_buf_size = pt->read_buf_size * 2;
2049 if (new_buf_size < new_len)
2050 new_buf_size = new_len;
2051
2052 new_buf = (unsigned char *)
2053 scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
2054
2055 /* Put the bytes at the end of the buffer, so that future
2056 ungets won't need to shift the buffer. */
2057 new_end = new_buf + new_buf_size;
2058 new_pos = new_end - old_len;
2059 memcpy (new_pos, pt->read_pos, old_len);
2060
2061 pt->read_buf = pt->putback_buf = new_buf;
2062 pt->read_pos = new_pos;
2063 pt->read_end = new_end;
2064 pt->read_buf_size = pt->putback_buf_size = new_buf_size;
2065 }
2066 else if (pt->read_buf + len < pt->read_pos)
2067 /* If needed, shift the existing buffer contents up.
2068 This should not happen unless some external code
2069 manipulates the putback buffer pointers. */
2070 {
2071 unsigned char *new_end = pt->read_buf + pt->read_buf_size;
2072 unsigned char *new_pos = new_end - old_len;
2073
2074 memmove (new_pos, pt->read_pos, old_len);
2075 pt->read_pos = new_pos;
2076 pt->read_end = new_end;
2077 }
2078
2079 /* Move read_pos back and copy the bytes there. */
2080 pt->read_pos -= len;
2081 memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
2082
2083 if (pt->rw_active == SCM_PORT_WRITE)
2084 scm_flush (port);
19b8d12b
AW
2085
2086 if (pt->rw_random)
2087 pt->rw_active = SCM_PORT_READ;
2088}
2089#undef FUNC_NAME
2090
f6f4feb0
MW
2091void
2092scm_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
7f6c3f8f 2093{
f6f4feb0 2094 scm_i_unget_bytes_unlocked (buf, len, port);
7f6c3f8f
MW
2095}
2096
2097void
f6f4feb0
MW
2098scm_unget_byte_unlocked (int c, SCM port)
2099{
2100 unsigned char byte = c;
2101 scm_i_unget_bytes_unlocked (&byte, 1, port);
2102}
2103
2104void
2105scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
7f6c3f8f 2106{
f6f4feb0
MW
2107 scm_i_pthread_mutex_t *lock;
2108 scm_c_lock_port (port, &lock);
2109 scm_i_unget_bytes_unlocked (buf, len, port);
2110 if (lock)
2111 scm_i_pthread_mutex_unlock (lock);
2112}
7f6c3f8f 2113
c932ce0b
AW
2114void
2115scm_unget_byte (int c, SCM port)
2116{
f6f4feb0 2117 unsigned char byte = c;
92c0ebac
AW
2118 scm_i_pthread_mutex_t *lock;
2119 scm_c_lock_port (port, &lock);
f6f4feb0 2120 scm_i_unget_bytes_unlocked (&byte, 1, port);
92c0ebac
AW
2121 if (lock)
2122 scm_i_pthread_mutex_unlock (lock);
c932ce0b
AW
2123}
2124
19b8d12b 2125void
c932ce0b 2126scm_ungetc_unlocked (scm_t_wchar c, SCM port)
19b8d12b
AW
2127#define FUNC_NAME "scm_ungetc"
2128{
2129 scm_t_port *pt = SCM_PTAB_ENTRY (port);
be7ecef0 2130 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
19b8d12b
AW
2131 char *result;
2132 char result_buf[10];
19b8d12b 2133 size_t len;
19b8d12b 2134
19b8d12b 2135 len = sizeof (result_buf);
be7ecef0
AW
2136
2137 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
2138 {
2139 if (c < 0xf0)
2140 {
2141 result_buf[0] = (char) c;
2142 result = result_buf;
2143 len = 1;
2144 }
2145 else
2146 result =
2147 (char *) u32_to_u8 ((uint32_t *) &c, 1, (uint8_t *) result_buf, &len);
2148 }
2149 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1 && c <= 0xff)
2150 {
2151 result_buf[0] = (char) c;
2152 result = result_buf;
2153 len = 1;
2154 }
2155 else
2156 result = u32_conv_to_encoding (pt->encoding,
2157 (enum iconv_ilseq_handler) pt->ilseq_handler,
2158 (uint32_t *) &c, 1, NULL,
2159 result_buf, &len);
19b8d12b
AW
2160
2161 if (SCM_UNLIKELY (result == NULL || len == 0))
2162 scm_encoding_error (FUNC_NAME, errno,
2163 "conversion to port encoding failed",
2164 SCM_BOOL_F, SCM_MAKE_CHAR (c));
2165
f6f4feb0 2166 scm_i_unget_bytes_unlocked ((unsigned char *) result, len, port);
19b8d12b
AW
2167
2168 if (SCM_UNLIKELY (result != result_buf))
2169 free (result);
2170
2171 if (c == '\n')
2172 {
2173 /* What should col be in this case?
2174 * We'll leave it at -1.
2175 */
2176 SCM_LINUM (port) -= 1;
2177 }
2178 else
2179 SCM_COL(port) -= 1;
2180}
2181#undef FUNC_NAME
2182
c932ce0b
AW
2183void
2184scm_ungetc (scm_t_wchar c, SCM port)
2185{
92c0ebac
AW
2186 scm_i_pthread_mutex_t *lock;
2187 scm_c_lock_port (port, &lock);
c932ce0b 2188 scm_ungetc_unlocked (c, port);
92c0ebac
AW
2189 if (lock)
2190 scm_i_pthread_mutex_unlock (lock);
2191
c932ce0b 2192}
19b8d12b
AW
2193
2194void
c932ce0b 2195scm_ungets_unlocked (const char *s, int n, SCM port)
19b8d12b
AW
2196{
2197 /* This is simple minded and inefficient, but unreading strings is
2198 * probably not a common operation, and remember that line and
2199 * column numbers have to be handled...
2200 *
2201 * Please feel free to write an optimized version!
2202 */
2203 while (n--)
c932ce0b 2204 scm_ungetc_unlocked (s[n], port);
19b8d12b
AW
2205}
2206
c932ce0b
AW
2207void
2208scm_ungets (const char *s, int n, SCM port)
2209{
92c0ebac
AW
2210 scm_i_pthread_mutex_t *lock;
2211 scm_c_lock_port (port, &lock);
c932ce0b 2212 scm_ungets_unlocked (s, n, port);
92c0ebac
AW
2213 if (lock)
2214 scm_i_pthread_mutex_unlock (lock);
2215
c932ce0b 2216}
19b8d12b
AW
2217
2218SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
1bbd0b84 2219 (SCM port),
1e6808ea
MG
2220 "Return the next character available from @var{port},\n"
2221 "@emph{without} updating @var{port} to point to the following\n"
2222 "character. If no more characters are available, the\n"
c2dfff19
KR
2223 "end-of-file object is returned.\n"
2224 "\n"
2225 "The value returned by\n"
1e6808ea
MG
2226 "a call to @code{peek-char} is the same as the value that would\n"
2227 "have been returned by a call to @code{read-char} on the same\n"
2228 "port. The only difference is that the very next call to\n"
2229 "@code{read-char} or @code{peek-char} on that @var{port} will\n"
2230 "return the value returned by the preceding call to\n"
2231 "@code{peek-char}. In particular, a call to @code{peek-char} on\n"
2232 "an interactive port will hang waiting for input whenever a call\n"
c62da8f8
LC
2233 "to @code{read-char} would have hung.\n"
2234 "\n"
2235 "As for @code{read-char}, a @code{decoding-error} may be raised\n"
2236 "if such a situation occurs. However, unlike with @code{read-char},\n"
2237 "@var{port} still points at the beginning of the erroneous byte\n"
2238 "sequence when the error is raised.\n")
1bbd0b84 2239#define FUNC_NAME s_scm_peek_char
ee149d03 2240{
c62da8f8 2241 int err;
fd5eec2b
LC
2242 SCM result;
2243 scm_t_wchar c;
2244 char bytes[SCM_MBCHAR_BUF_SIZE];
7f6c3f8f 2245 long column, line;
fe8935d4 2246 size_t len = 0;
fd5eec2b 2247
ee149d03 2248 if (SCM_UNBNDP (port))
9de87eea 2249 port = scm_current_input_port ();
b2456dd4 2250 SCM_VALIDATE_OPINPORT (1, port);
fd5eec2b
LC
2251
2252 column = SCM_COL (port);
2253 line = SCM_LINUM (port);
2254
c62da8f8 2255 err = get_codepoint (port, &c, bytes, &len);
fd5eec2b 2256
f6f4feb0 2257 scm_i_unget_bytes_unlocked ((unsigned char *) bytes, len, port);
fd5eec2b 2258
c62da8f8
LC
2259 SCM_COL (port) = column;
2260 SCM_LINUM (port) = line;
fd5eec2b 2261
c62da8f8
LC
2262 if (SCM_UNLIKELY (err != 0))
2263 {
2264 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
2265
2266 /* Shouldn't happen since `catch' always aborts to prompt. */
2267 result = SCM_BOOL_F;
fd5eec2b 2268 }
c62da8f8 2269 else if (c == EOF)
69bc9ff3 2270 {
f6f4feb0
MW
2271 scm_i_set_pending_eof (port);
2272 result = SCM_EOF_VAL;
69bc9ff3 2273 }
c62da8f8
LC
2274 else
2275 result = SCM_MAKE_CHAR (c);
fd5eec2b
LC
2276
2277 return result;
3cb988bd 2278}
1bbd0b84 2279#undef FUNC_NAME
3cb988bd 2280
1be4270a 2281SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
1bbd0b84 2282 (SCM cobj, SCM port),
b7e64f8b
BT
2283 "Place character @var{cobj} in @var{port} so that it will be\n"
2284 "read by the next read operation. If called multiple times, the\n"
2285 "unread characters will be read again in last-in first-out\n"
2286 "order. If @var{port} is not supplied, the current input port\n"
2287 "is used.")
1bbd0b84 2288#define FUNC_NAME s_scm_unread_char
0f2d19dd
JB
2289{
2290 int c;
2291
34d19ef6 2292 SCM_VALIDATE_CHAR (1, cobj);
0f2d19dd 2293 if (SCM_UNBNDP (port))
9de87eea 2294 port = scm_current_input_port ();
b2456dd4 2295 SCM_VALIDATE_OPINPORT (2, port);
0f2d19dd 2296
7866a09b 2297 c = SCM_CHAR (cobj);
0f2d19dd 2298
c932ce0b 2299 scm_ungetc_unlocked (c, port);
0f2d19dd
JB
2300 return cobj;
2301}
1bbd0b84 2302#undef FUNC_NAME
0f2d19dd 2303
a1ec6916 2304SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
1bbd0b84 2305 (SCM str, SCM port),
b380b885
MD
2306 "Place the string @var{str} in @var{port} so that its characters will be\n"
2307 "read in subsequent read operations. If called multiple times, the\n"
2308 "unread characters will be read again in last-in first-out order. If\n"
2309 "@var{port} is not supplied, the current-input-port is used.")
1bbd0b84 2310#define FUNC_NAME s_scm_unread_string
ee1e7e13 2311{
889975e5 2312 int n;
34d19ef6 2313 SCM_VALIDATE_STRING (1, str);
ee1e7e13 2314 if (SCM_UNBNDP (port))
9de87eea 2315 port = scm_current_input_port ();
b2456dd4 2316 SCM_VALIDATE_OPINPORT (2, port);
ee1e7e13 2317
889975e5
MG
2318 n = scm_i_string_length (str);
2319
2320 while (n--)
c932ce0b 2321 scm_ungetc_unlocked (scm_i_string_ref (str, n), port);
ee1e7e13
MD
2322
2323 return str;
2324}
1bbd0b84 2325#undef FUNC_NAME
ee1e7e13 2326
840ae05d 2327
19b8d12b 2328\f
23f2b9a3 2329
19b8d12b 2330/* Manipulating the buffers. */
840ae05d 2331
4251ae2e
AW
2332/* This routine does not take any locks, as it is usually called as part
2333 of a port implementation. */
19b8d12b
AW
2334void
2335scm_port_non_buffer (scm_t_port *pt)
2336{
2337 pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf;
2338 pt->write_buf = pt->write_pos = &pt->shortbuf;
2339 pt->read_buf_size = pt->write_buf_size = 1;
2340 pt->write_end = pt->write_buf + pt->write_buf_size;
840ae05d 2341}
8ab3d8a0 2342
19b8d12b
AW
2343/* this should only be called when the read buffer is empty. it
2344 tries to refill the read buffer. it returns the first char from
2345 the port, which is either EOF or *(pt->read_pos). */
f6f4feb0
MW
2346static int
2347scm_i_fill_input_unlocked (SCM port)
82893676 2348{
19b8d12b 2349 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f6f4feb0 2350 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
8ab3d8a0 2351
19b8d12b 2352 assert (pt->read_pos == pt->read_end);
8ab3d8a0 2353
f6f4feb0
MW
2354 if (pti->pending_eof)
2355 {
2356 pti->pending_eof = 0;
2357 return EOF;
2358 }
2359
19b8d12b 2360 if (pt->read_buf == pt->putback_buf)
82893676 2361 {
19b8d12b
AW
2362 /* finished reading put-back chars. */
2363 pt->read_buf = pt->saved_read_buf;
2364 pt->read_pos = pt->saved_read_pos;
2365 pt->read_end = pt->saved_read_end;
2366 pt->read_buf_size = pt->saved_read_buf_size;
2367 if (pt->read_pos < pt->read_end)
2368 return *(pt->read_pos);
82893676 2369 }
19b8d12b 2370 return SCM_PORT_DESCRIPTOR (port)->fill_input (port);
82893676 2371}
82893676 2372
4251ae2e
AW
2373int
2374scm_fill_input (SCM port)
2375{
92c0ebac 2376 scm_i_pthread_mutex_t *lock;
4251ae2e
AW
2377 int ret;
2378
92c0ebac 2379 scm_c_lock_port (port, &lock);
4251ae2e 2380 ret = scm_fill_input_unlocked (port);
92c0ebac
AW
2381 if (lock)
2382 scm_i_pthread_mutex_unlock (lock);
2383
4251ae2e
AW
2384
2385 return ret;
2386}
2387
f6f4feb0
MW
2388/* Slow-path fallback for 'scm_get_byte_or_eof_unlocked' */
2389int
2390scm_slow_get_byte_or_eof_unlocked (SCM port)
d14af9f2 2391{
f6f4feb0
MW
2392 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2393
2394 if (pt->rw_active == SCM_PORT_WRITE)
2395 scm_flush_unlocked (port);
2396
2397 if (pt->rw_random)
2398 pt->rw_active = SCM_PORT_READ;
2399
2400 if (pt->read_pos >= pt->read_end)
2401 {
2402 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2403 return EOF;
2404 }
2405
2406 return *pt->read_pos++;
d14af9f2
MD
2407}
2408
f6f4feb0
MW
2409/* Slow-path fallback for 'scm_peek_byte_or_eof_unlocked' */
2410int
2411scm_slow_peek_byte_or_eof_unlocked (SCM port)
2412{
2413 scm_t_port *pt = SCM_PTAB_ENTRY (port);
d6a6989e 2414
f6f4feb0
MW
2415 if (pt->rw_active == SCM_PORT_WRITE)
2416 scm_flush_unlocked (port);
889975e5 2417
f6f4feb0
MW
2418 if (pt->rw_random)
2419 pt->rw_active = SCM_PORT_READ;
2420
2421 if (pt->read_pos >= pt->read_end)
2422 {
2423 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2424 {
2425 scm_i_set_pending_eof (port);
2426 return EOF;
2427 }
2428 }
2429
2430 return *pt->read_pos;
2431}
2432
a3ded465
AW
2433/* Move up to READ_LEN bytes from PORT's putback and/or read buffers
2434 into memory starting at DEST. Return the number of bytes moved.
2435 PORT's line/column numbers are left unchanged. */
19b8d12b
AW
2436size_t
2437scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
840ae05d 2438{
19b8d12b 2439 scm_t_port *pt = SCM_PTAB_ENTRY (port);
a3ded465 2440 size_t bytes_read = 0;
19b8d12b 2441 size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
840ae05d 2442
19b8d12b 2443 if (from_buf > 0)
840ae05d 2444 {
19b8d12b
AW
2445 memcpy (dest, pt->read_pos, from_buf);
2446 pt->read_pos += from_buf;
a3ded465 2447 bytes_read += from_buf;
19b8d12b
AW
2448 read_len -= from_buf;
2449 dest += from_buf;
840ae05d 2450 }
3fe6190f 2451
19b8d12b
AW
2452 /* if putback was active, try the real input buffer too. */
2453 if (pt->read_buf == pt->putback_buf)
69bc9ff3 2454 {
19b8d12b
AW
2455 from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len);
2456 if (from_buf > 0)
2457 {
2458 memcpy (dest, pt->saved_read_pos, from_buf);
2459 pt->saved_read_pos += from_buf;
a3ded465 2460 bytes_read += from_buf;
19b8d12b 2461 }
69bc9ff3 2462 }
8ab3d8a0 2463
a3ded465 2464 return bytes_read;
840ae05d
JB
2465}
2466
19b8d12b
AW
2467/* Clear a port's read buffers, returning the contents. */
2468SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
1bbd0b84 2469 (SCM port),
19b8d12b
AW
2470 "This procedure clears a port's input buffers, similar\n"
2471 "to the way that force-output clears the output buffer. The\n"
2472 "contents of the buffers are returned as a single string, e.g.,\n"
a150979d 2473 "\n"
19b8d12b
AW
2474 "@lisp\n"
2475 "(define p (open-input-file ...))\n"
2476 "(drain-input p) => empty string, nothing buffered yet.\n"
2477 "(unread-char (read-char p) p)\n"
2478 "(drain-input p) => initial chars from p, up to the buffer size.\n"
2479 "@end lisp\n\n"
2480 "Draining the buffers may be useful for cleanly finishing\n"
2481 "buffered I/O so that the file descriptor can be used directly\n"
2482 "for further input.")
2483#define FUNC_NAME s_scm_drain_input
0f2d19dd 2484{
19b8d12b
AW
2485 SCM result;
2486 char *data;
2487 scm_t_port *pt;
2488 long count;
0f2d19dd 2489
19b8d12b
AW
2490 SCM_VALIDATE_OPINPORT (1, port);
2491 pt = SCM_PTAB_ENTRY (port);
2492
2493 count = pt->read_end - pt->read_pos;
2494 if (pt->read_buf == pt->putback_buf)
2495 count += pt->saved_read_end - pt->saved_read_pos;
2496
2497 if (count)
2498 {
2499 result = scm_i_make_string (count, &data, 0);
2500 scm_take_from_input_buffers (port, data, count);
2501 }
2502 else
2503 result = scm_nullstr;
2504
2505 return result;
d043d8c2 2506}
1bbd0b84 2507#undef FUNC_NAME
d043d8c2 2508
19b8d12b 2509void
4251ae2e 2510scm_end_input_unlocked (SCM port)
0f2d19dd 2511{
19b8d12b
AW
2512 long offset;
2513 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2514
f6f4feb0 2515 scm_i_clear_pending_eof (port);
19b8d12b
AW
2516 if (pt->read_buf == pt->putback_buf)
2517 {
2518 offset = pt->read_end - pt->read_pos;
2519 pt->read_buf = pt->saved_read_buf;
2520 pt->read_pos = pt->saved_read_pos;
2521 pt->read_end = pt->saved_read_end;
2522 pt->read_buf_size = pt->saved_read_buf_size;
2523 }
2524 else
2525 offset = 0;
2526
2527 SCM_PORT_DESCRIPTOR (port)->end_input (port, offset);
0f2d19dd
JB
2528}
2529
4251ae2e
AW
2530void
2531scm_end_input (SCM port)
2532{
92c0ebac
AW
2533 scm_i_pthread_mutex_t *lock;
2534 scm_c_lock_port (port, &lock);
4251ae2e 2535 scm_end_input_unlocked (port);
92c0ebac
AW
2536 if (lock)
2537 scm_i_pthread_mutex_unlock (lock);
2538
4251ae2e
AW
2539}
2540
19b8d12b
AW
2541SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
2542 (SCM port),
2543 "Flush the specified output port, or the current output port if @var{port}\n"
2544 "is omitted. The current output buffer contents are passed to the\n"
2545 "underlying port implementation (e.g., in the case of fports, the\n"
2546 "data will be written to the file and the output buffer will be cleared.)\n"
2547 "It has no effect on an unbuffered port.\n\n"
2548 "The return value is unspecified.")
2549#define FUNC_NAME s_scm_force_output
d043d8c2 2550{
19b8d12b
AW
2551 if (SCM_UNBNDP (port))
2552 port = scm_current_output_port ();
2553 else
2554 {
2555 port = SCM_COERCE_OUTPORT (port);
2556 SCM_VALIDATE_OPOUTPORT (1, port);
2557 }
4251ae2e 2558 scm_flush_unlocked (port);
564478fd 2559 return SCM_UNSPECIFIED;
d043d8c2 2560}
1bbd0b84 2561#undef FUNC_NAME
d043d8c2 2562
19b8d12b 2563void
4251ae2e 2564scm_flush_unlocked (SCM port)
0f2d19dd 2565{
19b8d12b 2566 SCM_PORT_DESCRIPTOR (port)->flush (port);
0f2d19dd
JB
2567}
2568
4251ae2e
AW
2569void
2570scm_flush (SCM port)
2571{
92c0ebac
AW
2572 scm_i_pthread_mutex_t *lock;
2573 scm_c_lock_port (port, &lock);
4251ae2e 2574 scm_flush_unlocked (port);
92c0ebac
AW
2575 if (lock)
2576 scm_i_pthread_mutex_unlock (lock);
2577
4251ae2e
AW
2578}
2579
f6f4feb0
MW
2580int
2581scm_fill_input_unlocked (SCM port)
337edc59 2582{
f6f4feb0
MW
2583 return scm_i_fill_input_unlocked (port);
2584}
337edc59 2585
d14af9f2 2586
19b8d12b 2587\f
d6a6989e 2588
19b8d12b 2589/* Output. */
889975e5 2590
0607ebbf
AW
2591void
2592scm_putc (char c, SCM port)
2593{
92c0ebac
AW
2594 scm_i_pthread_mutex_t *lock;
2595 scm_c_lock_port (port, &lock);
0607ebbf 2596 scm_putc_unlocked (c, port);
92c0ebac
AW
2597 if (lock)
2598 scm_i_pthread_mutex_unlock (lock);
2599
0607ebbf
AW
2600}
2601
2602void
2603scm_puts (const char *s, SCM port)
2604{
92c0ebac
AW
2605 scm_i_pthread_mutex_t *lock;
2606 scm_c_lock_port (port, &lock);
0607ebbf 2607 scm_puts_unlocked (s, port);
92c0ebac
AW
2608 if (lock)
2609 scm_i_pthread_mutex_unlock (lock);
2610
0607ebbf
AW
2611}
2612
19b8d12b
AW
2613/* scm_c_write
2614 *
2615 * Used by an application to write arbitrary number of bytes to an SCM
2616 * port. Similar semantics as libc write. However, unlike libc
2617 * write, scm_c_write writes the requested number of bytes and has no
2618 * return value.
2619 *
2620 * Warning: Doesn't update port line and column counts!
2621 */
9d9c66ba 2622void
f209aeee 2623scm_c_write_unlocked (SCM port, const void *ptr, size_t size)
19b8d12b 2624#define FUNC_NAME "scm_c_write"
9d9c66ba 2625{
19b8d12b
AW
2626 scm_t_port *pt;
2627 scm_t_ptob_descriptor *ptob;
9d9c66ba 2628
19b8d12b 2629 SCM_VALIDATE_OPOUTPORT (1, port);
9d9c66ba 2630
19b8d12b
AW
2631 pt = SCM_PTAB_ENTRY (port);
2632 ptob = SCM_PORT_DESCRIPTOR (port);
9d9c66ba 2633
19b8d12b 2634 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 2635 scm_end_input_unlocked (port);
19b8d12b
AW
2636
2637 ptob->write (port, ptr, size);
2638
2639 if (pt->rw_random)
2640 pt->rw_active = SCM_PORT_WRITE;
889975e5 2641}
19b8d12b 2642#undef FUNC_NAME
889975e5 2643
f209aeee
AW
2644void
2645scm_c_write (SCM port, const void *ptr, size_t size)
2646{
92c0ebac
AW
2647 scm_i_pthread_mutex_t *lock;
2648 scm_c_lock_port (port, &lock);
f209aeee 2649 scm_c_write_unlocked (port, ptr, size);
92c0ebac
AW
2650 if (lock)
2651 scm_i_pthread_mutex_unlock (lock);
2652
f209aeee
AW
2653}
2654
19b8d12b
AW
2655/* scm_lfwrite
2656 *
2657 * This function differs from scm_c_write; it updates port line and
2658 * column. */
889975e5 2659void
f209aeee 2660scm_lfwrite_unlocked (const char *ptr, size_t size, SCM port)
889975e5 2661{
19b8d12b
AW
2662 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2663 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
f4bc4e59 2664
19b8d12b 2665 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 2666 scm_end_input_unlocked (port);
f4bc4e59 2667
19b8d12b 2668 ptob->write (port, ptr, size);
f4bc4e59 2669
19b8d12b
AW
2670 for (; size; ptr++, size--)
2671 update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
d9544bf0 2672
19b8d12b
AW
2673 if (pt->rw_random)
2674 pt->rw_active = SCM_PORT_WRITE;
2675}
f4bc4e59 2676
f209aeee
AW
2677void
2678scm_lfwrite (const char *ptr, size_t size, SCM port)
2679{
92c0ebac
AW
2680 scm_i_pthread_mutex_t *lock;
2681 scm_c_lock_port (port, &lock);
f209aeee 2682 scm_lfwrite_unlocked (ptr, size, port);
92c0ebac
AW
2683 if (lock)
2684 scm_i_pthread_mutex_unlock (lock);
2685
f209aeee
AW
2686}
2687
19b8d12b
AW
2688/* Write STR to PORT from START inclusive to END exclusive. */
2689void
2690scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
2691{
2692 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f4bc4e59 2693
19b8d12b 2694 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 2695 scm_end_input_unlocked (port);
f4bc4e59 2696
19b8d12b
AW
2697 if (end == (size_t) -1)
2698 end = scm_i_string_length (str);
f4bc4e59 2699
fa980bcc 2700 scm_i_display_substring (str, start, end, port);
f4bc4e59 2701
19b8d12b
AW
2702 if (pt->rw_random)
2703 pt->rw_active = SCM_PORT_WRITE;
889975e5
MG
2704}
2705
19b8d12b
AW
2706
2707\f
2708
2709/* Querying and setting positions, and character availability. */
2710
2711SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0,
889975e5 2712 (SCM port),
19b8d12b
AW
2713 "Return @code{#t} if a character is ready on input @var{port}\n"
2714 "and return @code{#f} otherwise. If @code{char-ready?} returns\n"
2715 "@code{#t} then the next @code{read-char} operation on\n"
2716 "@var{port} is guaranteed not to hang. If @var{port} is a file\n"
2717 "port at end of file then @code{char-ready?} returns @code{#t}.\n"
2718 "\n"
2719 "@code{char-ready?} exists to make it possible for a\n"
2720 "program to accept characters from interactive ports without\n"
2721 "getting stuck waiting for input. Any input editors associated\n"
2722 "with such ports must make sure that characters whose existence\n"
2723 "has been asserted by @code{char-ready?} cannot be rubbed out.\n"
2724 "If @code{char-ready?} were to return @code{#f} at end of file,\n"
2725 "a port at end of file would be indistinguishable from an\n"
2726 "interactive port that has no ready characters.")
2727#define FUNC_NAME s_scm_char_ready_p
889975e5
MG
2728{
2729 scm_t_port *pt;
889975e5 2730
19b8d12b
AW
2731 if (SCM_UNBNDP (port))
2732 port = scm_current_input_port ();
2733 /* It's possible to close the current input port, so validate even in
2734 this case. */
2735 SCM_VALIDATE_OPINPORT (1, port);
889975e5
MG
2736
2737 pt = SCM_PTAB_ENTRY (port);
19b8d12b
AW
2738
2739 /* if the current read buffer is filled, or the
2740 last pushed-back char has been read and the saved buffer is
2741 filled, result is true. */
2742 if (pt->read_pos < pt->read_end
2743 || (pt->read_buf == pt->putback_buf
2744 && pt->saved_read_pos < pt->saved_read_end))
2745 return SCM_BOOL_T;
889975e5 2746 else
19b8d12b
AW
2747 {
2748 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2749
2750 if (ptob->input_waiting)
2751 return scm_from_bool(ptob->input_waiting (port));
2752 else
2753 return SCM_BOOL_T;
2754 }
889975e5
MG
2755}
2756#undef FUNC_NAME
d6a6989e 2757
19b8d12b
AW
2758SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
2759 (SCM fd_port, SCM offset, SCM whence),
0858753e 2760 "Sets the current position of @var{fd_port} to the integer\n"
19b8d12b
AW
2761 "@var{offset}, which is interpreted according to the value of\n"
2762 "@var{whence}.\n"
2763 "\n"
2764 "One of the following variables should be supplied for\n"
2765 "@var{whence}:\n"
2766 "@defvar SEEK_SET\n"
2767 "Seek from the beginning of the file.\n"
2768 "@end defvar\n"
2769 "@defvar SEEK_CUR\n"
2770 "Seek from the current position.\n"
2771 "@end defvar\n"
2772 "@defvar SEEK_END\n"
2773 "Seek from the end of the file.\n"
2774 "@end defvar\n"
0858753e 2775 "If @var{fd_port} is a file descriptor, the underlying system\n"
19b8d12b
AW
2776 "call is @code{lseek}. @var{port} may be a string port.\n"
2777 "\n"
2778 "The value returned is the new position in the file. This means\n"
2779 "that the current position of a port can be obtained using:\n"
2780 "@lisp\n"
2781 "(seek port 0 SEEK_CUR)\n"
2782 "@end lisp")
2783#define FUNC_NAME s_scm_seek
889975e5 2784{
19b8d12b 2785 int how;
889975e5 2786
19b8d12b 2787 fd_port = SCM_COERCE_OUTPORT (fd_port);
889975e5 2788
19b8d12b
AW
2789 how = scm_to_int (whence);
2790 if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
2791 SCM_OUT_OF_RANGE (3, whence);
da288f50 2792
19b8d12b
AW
2793 if (SCM_OPPORTP (fd_port))
2794 {
f6f4feb0 2795 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port);
19b8d12b
AW
2796 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port);
2797 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2798 off_t_or_off64_t rv;
2799
2800 if (!ptob->seek)
2801 SCM_MISC_ERROR ("port is not seekable",
2802 scm_cons (fd_port, SCM_EOL));
2803 else
f6f4feb0 2804 rv = ptob->seek (fd_port, off, how);
889975e5 2805
f6f4feb0
MW
2806 /* Set stream-start flags according to new position. */
2807 pti->at_stream_start_for_bom_read = (rv == 0);
2808 pti->at_stream_start_for_bom_write = (rv == 0);
b22e94db 2809
f6f4feb0 2810 scm_i_clear_pending_eof (fd_port);
889975e5 2811
19b8d12b
AW
2812 return scm_from_off_t_or_off64_t (rv);
2813 }
2814 else /* file descriptor?. */
2815 {
2816 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2817 off_t_or_off64_t rv;
2818 rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
2819 if (rv == -1)
2820 SCM_SYSERROR;
2821 return scm_from_off_t_or_off64_t (rv);
2822 }
889975e5
MG
2823}
2824#undef FUNC_NAME
2825
19b8d12b
AW
2826#ifndef O_BINARY
2827#define O_BINARY 0
2828#endif
889975e5 2829
19b8d12b
AW
2830/* Mingw has ftruncate(), perhaps implemented above using chsize, but
2831 doesn't have the filename version truncate(), hence this code. */
2832#if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
2833static int
2834truncate (const char *file, off_t length)
889975e5 2835{
19b8d12b
AW
2836 int ret, fdes;
2837
2838 fdes = open (file, O_BINARY | O_WRONLY);
2839 if (fdes == -1)
2840 return -1;
2841
2842 ret = ftruncate (fdes, length);
2843 if (ret == -1)
889975e5 2844 {
19b8d12b
AW
2845 int save_errno = errno;
2846 close (fdes);
2847 errno = save_errno;
2848 return -1;
889975e5 2849 }
19b8d12b
AW
2850
2851 return close (fdes);
889975e5 2852}
19b8d12b 2853#endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
889975e5 2854
19b8d12b
AW
2855SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
2856 (SCM object, SCM length),
0858753e
AW
2857 "Truncate file @var{object} to @var{length} bytes. @var{object}\n"
2858 "can be a filename string, a port object, or an integer file\n"
2859 "descriptor.\n"
19b8d12b
AW
2860 "The return value is unspecified.\n"
2861 "\n"
2862 "For a port or file descriptor @var{length} can be omitted, in\n"
2863 "which case the file is truncated at the current position (per\n"
2864 "@code{ftell} above).\n"
2865 "\n"
2866 "On most systems a file can be extended by giving a length\n"
2867 "greater than the current size, but this is not mandatory in the\n"
2868 "POSIX standard.")
2869#define FUNC_NAME s_scm_truncate_file
889975e5 2870{
19b8d12b
AW
2871 int rv;
2872
2873 /* "object" can be a port, fdes or filename.
2874
2875 Negative "length" makes no sense, but it's left to truncate() or
2876 ftruncate() to give back an error for that (normally EINVAL).
2877 */
2878
2879 if (SCM_UNBNDP (length))
889975e5 2880 {
19b8d12b
AW
2881 /* must supply length if object is a filename. */
2882 if (scm_is_string (object))
2883 SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
2884
2885 length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
2886 }
2887
2888 object = SCM_COERCE_OUTPORT (object);
2889 if (scm_is_integer (object))
2890 {
2891 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2892 SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
2893 c_length));
2894 }
2895 else if (SCM_OPOUTPORTP (object))
2896 {
2897 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2898 scm_t_port *pt = SCM_PTAB_ENTRY (object);
2899 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object);
b22e94db 2900
19b8d12b
AW
2901 if (!ptob->truncate)
2902 SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
889975e5 2903
f6f4feb0 2904 scm_i_clear_pending_eof (object);
19b8d12b 2905 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 2906 scm_end_input_unlocked (object);
19b8d12b
AW
2907 else if (pt->rw_active == SCM_PORT_WRITE)
2908 ptob->flush (object);
889975e5 2909
19b8d12b
AW
2910 ptob->truncate (object, c_length);
2911 rv = 0;
889975e5
MG
2912 }
2913 else
2914 {
19b8d12b
AW
2915 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2916 char *str = scm_to_locale_string (object);
2917 int eno;
2918 SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
2919 eno = errno;
2920 free (str);
2921 errno = eno;
889975e5 2922 }
19b8d12b
AW
2923 if (rv == -1)
2924 SCM_SYSERROR;
2925 return SCM_UNSPECIFIED;
889975e5 2926}
19b8d12b 2927#undef FUNC_NAME
889975e5 2928
19b8d12b
AW
2929SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0,
2930 (SCM port),
2931 "Return the current line number for @var{port}.\n"
889975e5 2932 "\n"
19b8d12b
AW
2933 "The first line of a file is 0. But you might want to add 1\n"
2934 "when printing line numbers, since starting from 1 is\n"
2935 "traditional in error messages, and likely to be more natural to\n"
2936 "non-programmers.")
2937#define FUNC_NAME s_scm_port_line
889975e5 2938{
19b8d12b
AW
2939 port = SCM_COERCE_OUTPORT (port);
2940 SCM_VALIDATE_OPENPORT (1, port);
2941 return scm_from_long (SCM_LINUM (port));
889975e5
MG
2942}
2943#undef FUNC_NAME
2944
19b8d12b
AW
2945SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0,
2946 (SCM port, SCM line),
2947 "Set the current line number for @var{port} to @var{line}. The\n"
2948 "first line of a file is 0.")
2949#define FUNC_NAME s_scm_set_port_line_x
889975e5 2950{
19b8d12b
AW
2951 port = SCM_COERCE_OUTPORT (port);
2952 SCM_VALIDATE_OPENPORT (1, port);
2953 SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line);
2954 return SCM_UNSPECIFIED;
2955}
2956#undef FUNC_NAME
889975e5 2957
19b8d12b
AW
2958SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0,
2959 (SCM port),
2960 "Return the current column number of @var{port}.\n"
2961 "If the number is\n"
2962 "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n"
2963 "- i.e. the first character of the first line is line 0, column 0.\n"
2964 "(However, when you display a file position, for example in an error\n"
2965 "message, we recommend you add 1 to get 1-origin integers. This is\n"
2966 "because lines and column numbers traditionally start with 1, and that is\n"
2967 "what non-programmers will find most natural.)")
2968#define FUNC_NAME s_scm_port_column
2969{
2970 port = SCM_COERCE_OUTPORT (port);
2971 SCM_VALIDATE_OPENPORT (1, port);
2972 return scm_from_int (SCM_COL (port));
2973}
2974#undef FUNC_NAME
889975e5 2975
19b8d12b
AW
2976SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0,
2977 (SCM port, SCM column),
2978 "Set the current column of @var{port}. Before reading the first\n"
2979 "character on a line the column should be 0.")
2980#define FUNC_NAME s_scm_set_port_column_x
2981{
2982 port = SCM_COERCE_OUTPORT (port);
2983 SCM_VALIDATE_OPENPORT (1, port);
2984 SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column);
2985 return SCM_UNSPECIFIED;
2986}
2987#undef FUNC_NAME
889975e5 2988
19b8d12b
AW
2989SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
2990 (SCM port),
2991 "Return the filename associated with @var{port}, or @code{#f}\n"
2992 "if no filename is associated with the port.")
2993#define FUNC_NAME s_scm_port_filename
2994{
2995 port = SCM_COERCE_OUTPORT (port);
2996 SCM_VALIDATE_OPENPORT (1, port);
2997 return SCM_FILENAME (port);
2998}
2999#undef FUNC_NAME
889975e5 3000
19b8d12b
AW
3001SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
3002 (SCM port, SCM filename),
3003 "Change the filename associated with @var{port}, using the current input\n"
3004 "port if none is specified. Note that this does not change the port's\n"
3005 "source of data, but only the value that is returned by\n"
3006 "@code{port-filename} and reported in diagnostic output.")
3007#define FUNC_NAME s_scm_set_port_filename_x
3008{
3009 port = SCM_COERCE_OUTPORT (port);
3010 SCM_VALIDATE_OPENPORT (1, port);
3011 /* We allow the user to set the filename to whatever he likes. */
3012 SCM_SET_FILENAME (port, filename);
889975e5
MG
3013 return SCM_UNSPECIFIED;
3014}
3015#undef FUNC_NAME
3016
3017
19b8d12b
AW
3018\f
3019
3020/* Implementation helpers for port printing functions. */
889975e5 3021
f12733c9
MD
3022void
3023scm_print_port_mode (SCM exp, SCM port)
3024{
0607ebbf 3025 scm_puts_unlocked (SCM_CLOSEDP (exp)
f12733c9 3026 ? "closed: "
f9a64404
DH
3027 : (SCM_RDNG & SCM_CELL_WORD_0 (exp)
3028 ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
f12733c9
MD
3029 ? "input-output: "
3030 : "input: ")
f9a64404 3031 : (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
f12733c9
MD
3032 ? "output: "
3033 : "bogus: ")),
3034 port);
3035}
1cc91f1b 3036
f12733c9 3037int
e81d98ec 3038scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED)
0f2d19dd 3039{
f12733c9
MD
3040 char *type = SCM_PTOBNAME (SCM_PTOBNUM (exp));
3041 if (!type)
3042 type = "port";
0607ebbf 3043 scm_puts_unlocked ("#<", port);
f12733c9 3044 scm_print_port_mode (exp, port);
0607ebbf
AW
3045 scm_puts_unlocked (type, port);
3046 scm_putc_unlocked (' ', port);
0345e278 3047 scm_uintprint (SCM_CELL_WORD_1 (exp), 16, port);
0607ebbf 3048 scm_putc_unlocked ('>', port);
f12733c9 3049 return 1;
0f2d19dd
JB
3050}
3051
19b8d12b
AW
3052
3053\f
3054
3055/* Iterating over all ports. */
3056
3057struct for_each_data
3058{
3059 void (*proc) (void *data, SCM p);
3060 void *data;
3061};
3062
3063static SCM
3064for_each_trampoline (void *data, SCM port, SCM result)
3065{
3066 struct for_each_data *d = data;
3067
3068 d->proc (d->data, port);
3069
3070 return result;
3071}
3072
3073void
3074scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
3075{
3076 struct for_each_data d;
3077
3078 d.proc = proc;
3079 d.data = data;
3080
3081 scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL,
3082 scm_i_port_weak_set);
3083}
3084
3085static void
3086scm_for_each_trampoline (void *data, SCM port)
3087{
3088 scm_call_1 (SCM_PACK_POINTER (data), port);
3089}
3090
3091SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
3092 (SCM proc),
3093 "Apply @var{proc} to each port in the Guile port table\n"
3094 "in turn. The return value is unspecified. More specifically,\n"
3095 "@var{proc} is applied exactly once to every port that exists\n"
0858753e
AW
3096 "in the system at the time @code{port-for-each} is invoked.\n"
3097 "Changes to the port table while @code{port-for-each} is running\n"
3098 "have no effect as far as @code{port-for-each} is concerned.")
19b8d12b
AW
3099#define FUNC_NAME s_scm_port_for_each
3100{
3101 SCM_VALIDATE_PROC (1, proc);
3102
3103 scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc));
3104
3105 return SCM_UNSPECIFIED;
3106}
3107#undef FUNC_NAME
3108
3109static void
3110flush_output_port (void *closure, SCM port)
3111{
3112 if (SCM_OPOUTPORTP (port))
4251ae2e 3113 scm_flush_unlocked (port);
19b8d12b
AW
3114}
3115
3116SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0,
3117 (),
3118 "Equivalent to calling @code{force-output} on\n"
3119 "all open output ports. The return value is unspecified.")
3120#define FUNC_NAME s_scm_flush_all_ports
3121{
3122 scm_c_port_for_each (&flush_output_port, NULL);
3123 return SCM_UNSPECIFIED;
3124}
3125#undef FUNC_NAME
3126
3127
0f2d19dd 3128\f
ee149d03 3129
d68fee48 3130/* Void ports. */
0f2d19dd 3131
92c2555f 3132scm_t_bits scm_tc16_void_port = 0;
0f2d19dd 3133
e81d98ec 3134static int fill_input_void_port (SCM port SCM_UNUSED)
283a1a0e 3135{
70df8af6 3136 return EOF;
283a1a0e
GH
3137}
3138
31703ab8 3139static void
e81d98ec
DH
3140write_void_port (SCM port SCM_UNUSED,
3141 const void *data SCM_UNUSED,
3142 size_t size SCM_UNUSED)
31703ab8
GH
3143{
3144}
3145
d617ee18
MV
3146static SCM
3147scm_i_void_port (long mode_bits)
0f2d19dd 3148{
2721f918
AW
3149 SCM ret;
3150
3151 ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0);
da220f27 3152
2721f918 3153 scm_port_non_buffer (SCM_PTAB_ENTRY (ret));
402788a9 3154
2721f918 3155 return ret;
0f2d19dd
JB
3156}
3157
d617ee18
MV
3158SCM
3159scm_void_port (char *mode_str)
3160{
3161 return scm_i_void_port (scm_mode_bits (mode_str));
3162}
3163
a1ec6916 3164SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0,
1bbd0b84 3165 (SCM mode),
70df8af6 3166 "Create and return a new void port. A void port acts like\n"
bb2c02f2 3167 "@file{/dev/null}. The @var{mode} argument\n"
70df8af6 3168 "specifies the input/output modes for this port: see the\n"
b380b885 3169 "documentation for @code{open-file} in @ref{File Ports}.")
1bbd0b84 3170#define FUNC_NAME s_scm_sys_make_void_port
0f2d19dd 3171{
d617ee18 3172 return scm_i_void_port (scm_i_mode_bits (mode));
0f2d19dd 3173}
1bbd0b84 3174#undef FUNC_NAME
0f2d19dd 3175
19b8d12b 3176
0f2d19dd 3177\f
19b8d12b 3178
89545eba 3179/* Initialization. */
1cc91f1b 3180
0f2d19dd
JB
3181void
3182scm_init_ports ()
0f2d19dd 3183{
840ae05d 3184 /* lseek() symbols. */
e11e83f3
MV
3185 scm_c_define ("SEEK_SET", scm_from_int (SEEK_SET));
3186 scm_c_define ("SEEK_CUR", scm_from_int (SEEK_CUR));
3187 scm_c_define ("SEEK_END", scm_from_int (SEEK_END));
840ae05d 3188
70df8af6
GH
3189 scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
3190 write_void_port);
9de87eea 3191
f39448c5
AW
3192 cur_inport_fluid = scm_make_fluid ();
3193 cur_outport_fluid = scm_make_fluid ();
3194 cur_errport_fluid = scm_make_fluid ();
3195 cur_loadport_fluid = scm_make_fluid ();
9de87eea 3196
2721f918 3197 scm_i_port_weak_set = scm_c_make_weak_set (31);
d6a6989e 3198
a0599745 3199#include "libguile/ports.x"
889975e5 3200
d6a6989e 3201 /* Use Latin-1 as the default port encoding. */
c81c2ad3
AW
3202 SCM_VARIABLE_SET (default_port_encoding_var,
3203 scm_make_fluid_with_default (SCM_BOOL_F));
889975e5 3204 scm_port_encoding_init = 1;
d6a6989e 3205
b22e94db
LC
3206 SCM_VARIABLE_SET (default_conversion_strategy_var,
3207 scm_make_fluid_with_default (sym_substitute));
889975e5 3208 scm_conversion_strategy_init = 1;
b22e94db 3209
9670f238
AW
3210 /* These bindings are used when boot-9 turns `current-input-port' et
3211 al into parameters. They are then removed from the guile module. */
3212 scm_c_define ("%current-input-port-fluid", cur_inport_fluid);
3213 scm_c_define ("%current-output-port-fluid", cur_outport_fluid);
3214 scm_c_define ("%current-error-port-fluid", cur_errport_fluid);
0f2d19dd 3215}
89e00824
ML
3216
3217/*
3218 Local Variables:
3219 c-file-style: "gnu"
3220 End:
3221*/