Merge commit 'ca5e0414e96886177d883a249edd957d2331db65'
[bpt/guile.git] / libguile / ports.c
CommitLineData
e4598559 1/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2006,
122f24cc 2 * 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
f4bc4e59 3 *
73be1d9e 4 * This library is free software; you can redistribute it and/or
53befeb7
NJ
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
0f2d19dd 8 *
53befeb7
NJ
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
0f2d19dd 13 *
73be1d9e
MV
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
53befeb7
NJ
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
73be1d9e 18 */
1bbd0b84 19
1bbd0b84 20
0f2d19dd 21\f
d68fee48
JB
22/* Headers. */
23
2b829bbb
KR
24#define _LARGEFILE64_SOURCE /* ask for stat64 etc */
25
dbb605f5 26#ifdef HAVE_CONFIG_H
bd515f37
RB
27# include <config.h>
28#endif
29
0f2d19dd 30#include <stdio.h>
e6e2e95a 31#include <errno.h>
8ab3d8a0 32#include <fcntl.h> /* for chsize on mingw */
b5cb4464 33#include <assert.h>
f4bc4e59 34#include <iconv.h>
889975e5
MG
35#include <uniconv.h>
36#include <unistr.h>
37#include <striconveh.h>
e6e2e95a 38
fca43887
LC
39#include <assert.h>
40
a0599745 41#include "libguile/_scm.h"
4e047c3e 42#include "libguile/async.h"
8269ba5b 43#include "libguile/deprecation.h"
f0942910 44#include "libguile/eval.h"
8ab3d8a0 45#include "libguile/fports.h" /* direct access for seek and truncate */
9511876f 46#include "libguile/goops.h"
a0599745
MD
47#include "libguile/smob.h"
48#include "libguile/chars.h"
185e369a 49#include "libguile/dynwind.h"
0f2d19dd 50
a0599745 51#include "libguile/keywords.h"
5dbc6c06 52#include "libguile/hashtab.h"
a0599745
MD
53#include "libguile/root.h"
54#include "libguile/strings.h"
b42170a4 55#include "libguile/mallocs.h"
a0599745
MD
56#include "libguile/validate.h"
57#include "libguile/ports.h"
e4598559 58#include "libguile/ports-internal.h"
3a5fb14d 59#include "libguile/vectors.h"
2721f918 60#include "libguile/weak-set.h"
9de87eea 61#include "libguile/fluids.h"
889975e5 62#include "libguile/eq.h"
a38024ba 63#include "libguile/alist.h"
0f2d19dd 64
bd9e24b3
GH
65#ifdef HAVE_STRING_H
66#include <string.h>
67#endif
68
ec65f5da
MV
69#ifdef HAVE_IO_H
70#include <io.h>
71#endif
72
0f2d19dd
JB
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76
95b88819
GH
77#ifdef HAVE_SYS_IOCTL_H
78#include <sys/ioctl.h>
79#endif
d68fee48 80
8ab3d8a0
KR
81/* Mingw (version 3.4.5, circa 2006) has ftruncate as an alias for chsize
82 already, but have this code here in case that wasn't so in past versions,
83 or perhaps to help other minimal DOS environments.
84
85 gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
86 might be possibilities if we've got other systems without ftruncate. */
87
56a3dcd4 88#if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
82893676 89#define ftruncate(fd, size) chsize (fd, size)
8ab3d8a0
KR
90#undef HAVE_FTRUNCATE
91#define HAVE_FTRUNCATE 1
82893676
MG
92#endif
93
0f2d19dd 94\f
93c4fa21
AW
95/* Port encodings are case-insensitive ASCII strings. */
96static char
97ascii_toupper (char c)
98{
99 return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
100}
101
102/* It is only necessary to use this function on encodings that come from
103 the user and have not been canonicalized yet. Encodings that are set
104 on ports or in the default encoding fluid are in upper-case, and can
105 be compared with strcmp. */
106static int
107encoding_matches (const char *enc, const char *upper)
108{
109 if (!enc)
110 enc = "ISO-8859-1";
111
112 while (*enc)
113 if (ascii_toupper (*enc++) != *upper++)
114 return 0;
115
116 return !*upper;
117}
118
119static char*
120canonicalize_encoding (const char *enc)
121{
122 char *ret;
123 int i;
124
125 if (!enc)
126 return "ISO-8859-1";
127
128 ret = scm_gc_strdup (enc, "port");
129
130 for (i = 0; ret[i]; i++)
131 {
132 if (ret[i] > 127)
133 /* Restrict to ASCII. */
134 scm_misc_error (NULL, "invalid character encoding ~s",
135 scm_list_1 (scm_from_latin1_string (enc)));
136 else
137 ret[i] = ascii_toupper (ret[i]);
138 }
139
140 return ret;
141}
142
143
144\f
d68fee48 145/* The port kind table --- a dynamically resized array of port types. */
0f2d19dd
JB
146
147
148/* scm_ptobs scm_numptob
5dbc6c06 149 * implement a dynamically resized array of ptob records.
0f2d19dd
JB
150 * Indexes into this table are used when generating type
151 * tags for smobjects (if you know a tag you can get an index and conversely).
152 */
62bd5d66
AW
153static scm_t_ptob_descriptor **scm_ptobs = NULL;
154static long scm_numptob = 0; /* Number of port types. */
155static long scm_ptobs_size = 0; /* Number of slots in the port type
156 table. */
157static scm_i_pthread_mutex_t scm_ptobs_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER;
158
159long
160scm_c_num_port_types (void)
161{
162 long ret;
163
164 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
165 ret = scm_numptob;
166 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
167
168 return ret;
169}
170
171scm_t_ptob_descriptor*
172scm_c_port_type_ref (long ptobnum)
173{
174 scm_t_ptob_descriptor *ret = NULL;
175
176 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
177
178 if (0 <= ptobnum && ptobnum < scm_numptob)
179 ret = scm_ptobs[ptobnum];
180
181 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
182
183 if (!ret)
184 scm_out_of_range ("scm_c_port_type_ref", scm_from_long (ptobnum));
185
186 return ret;
187}
188
189long
190scm_c_port_type_add_x (scm_t_ptob_descriptor *desc)
191{
192 long ret = -1;
193
194 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
195
196 if (scm_numptob + 1 < SCM_I_MAX_PORT_TYPE_COUNT)
197 {
198 if (scm_numptob == scm_ptobs_size)
199 {
200 unsigned long old_size = scm_ptobs_size;
201 scm_t_ptob_descriptor **old_ptobs = scm_ptobs;
202
203 /* Currently there are only 9 predefined port types, so one
204 resize will cover it. */
205 scm_ptobs_size = old_size + 10;
206
207 if (scm_ptobs_size >= SCM_I_MAX_PORT_TYPE_COUNT)
208 scm_ptobs_size = SCM_I_MAX_PORT_TYPE_COUNT;
209
210 scm_ptobs = scm_gc_malloc (sizeof (*scm_ptobs) * scm_ptobs_size,
211 "scm_ptobs");
212
213 memcpy (scm_ptobs, old_ptobs, sizeof (*scm_ptobs) * scm_numptob);
214 }
215
216 ret = scm_numptob++;
217 scm_ptobs[ret] = desc;
218 }
219
220 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
221
222 if (ret < 0)
223 scm_out_of_range ("scm_c_port_type_add_x", scm_from_long (scm_numptob));
224
225 return ret;
226}
0f2d19dd 227
f12733c9 228/*
f12733c9 229 * We choose to use an interface similar to the smob interface with
affc96b5 230 * fill_input and write as standard fields, passed to the port
f12733c9
MD
231 * type constructor, and optional fields set by setters.
232 */
233
70df8af6 234static void
e81d98ec 235flush_port_default (SCM port SCM_UNUSED)
70df8af6
GH
236{
237}
238
239static void
e81d98ec 240end_input_default (SCM port SCM_UNUSED, int offset SCM_UNUSED)
70df8af6
GH
241{
242}
0f2d19dd 243
92c2555f 244scm_t_bits
f12733c9 245scm_make_port_type (char *name,
affc96b5 246 int (*fill_input) (SCM port),
8aa011a1 247 void (*write) (SCM port, const void *data, size_t size))
0f2d19dd 248{
62bd5d66
AW
249 scm_t_ptob_descriptor *desc;
250 long ptobnum;
affc96b5 251
62bd5d66
AW
252 desc = scm_gc_malloc_pointerless (sizeof (*desc), "port-type");
253 memset (desc, 0, sizeof (*desc));
affc96b5 254
62bd5d66
AW
255 desc->name = name;
256 desc->print = scm_port_print;
257 desc->write = write;
258 desc->flush = flush_port_default;
259 desc->end_input = end_input_default;
260 desc->fill_input = fill_input;
affc96b5 261
62bd5d66 262 ptobnum = scm_c_port_type_add_x (desc);
affc96b5 263
62bd5d66 264 /* Make a class object if GOOPS is present. */
63385df2 265 if (SCM_UNPACK (scm_port_class[0]) != 0)
62bd5d66
AW
266 scm_make_port_classes (ptobnum, name);
267
268 return scm_tc7_port + ptobnum * 256;
0f2d19dd
JB
269}
270
f12733c9 271void
23f2b9a3 272scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM))
f12733c9 273{
62bd5d66 274 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->mark = mark;
f12733c9
MD
275}
276
277void
23f2b9a3 278scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM))
f12733c9 279{
62bd5d66 280 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->free = free;
f12733c9
MD
281}
282
283void
23f2b9a3 284scm_set_port_print (scm_t_bits tc, int (*print) (SCM exp, SCM port,
19b8d12b 285 scm_print_state *pstate))
f12733c9 286{
62bd5d66 287 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->print = print;
f12733c9
MD
288}
289
290void
23f2b9a3 291scm_set_port_equalp (scm_t_bits tc, SCM (*equalp) (SCM, SCM))
f12733c9 292{
62bd5d66 293 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->equalp = equalp;
f12733c9
MD
294}
295
31703ab8 296void
19b8d12b 297scm_set_port_close (scm_t_bits tc, int (*close) (SCM))
31703ab8 298{
19b8d12b 299 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->close = close;
31703ab8
GH
300}
301
f12733c9 302void
19b8d12b 303scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port))
f12733c9 304{
03a2eeb0
AW
305 scm_t_ptob_descriptor *ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tc));
306 ptob->flush = flush;
307 ptob->flags |= SCM_PORT_TYPE_HAS_FLUSH;
f12733c9
MD
308}
309
310void
19b8d12b 311scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset))
f12733c9 312{
19b8d12b 313 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->end_input = end_input;
f12733c9
MD
314}
315
316void
19b8d12b 317scm_set_port_seek (scm_t_bits tc, scm_t_off (*seek) (SCM, scm_t_off, int))
f12733c9 318{
62bd5d66 319 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->seek = seek;
f12733c9
MD
320}
321
322void
f1ce9199 323scm_set_port_truncate (scm_t_bits tc, void (*truncate) (SCM, scm_t_off))
f12733c9 324{
62bd5d66 325 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->truncate = truncate;
f12733c9
MD
326}
327
328void
23f2b9a3 329scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM))
f12733c9 330{
62bd5d66 331 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting;
f12733c9
MD
332}
333
e140d85d
AW
334void
335scm_set_port_setvbuf (scm_t_bits tc, void (*setvbuf) (SCM, long, long))
336{
337 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->setvbuf = setvbuf;
f12733c9
MD
338}
339
45c0878b
MW
340static void
341scm_i_set_pending_eof (SCM port)
342{
343 SCM_PORT_GET_INTERNAL (port)->pending_eof = 1;
344}
345
346static void
347scm_i_clear_pending_eof (SCM port)
348{
349 SCM_PORT_GET_INTERNAL (port)->pending_eof = 0;
350}
351
a38024ba
MW
352SCM_DEFINE (scm_i_port_property, "%port-property", 2, 0, 0,
353 (SCM port, SCM key),
354 "Return the property of @var{port} associated with @var{key}.")
355#define FUNC_NAME s_scm_i_port_property
05d7f762 356{
79657fd3
MW
357 scm_i_pthread_mutex_t *lock;
358 SCM result;
359
a38024ba 360 SCM_VALIDATE_OPPORT (1, port);
79657fd3
MW
361 scm_c_lock_port (port, &lock);
362 result = scm_assq_ref (SCM_PORT_GET_INTERNAL (port)->alist, key);
363 if (lock)
364 scm_i_pthread_mutex_unlock (lock);
365 return result;
05d7f762 366}
a38024ba 367#undef FUNC_NAME
05d7f762 368
a38024ba
MW
369SCM_DEFINE (scm_i_set_port_property_x, "%set-port-property!", 3, 0, 0,
370 (SCM port, SCM key, SCM value),
371 "Set the property of @var{port} associated with @var{key} to @var{value}.")
372#define FUNC_NAME s_scm_i_set_port_property_x
05d7f762 373{
79657fd3 374 scm_i_pthread_mutex_t *lock;
a38024ba
MW
375 scm_t_port_internal *pti;
376
377 SCM_VALIDATE_OPPORT (1, port);
79657fd3 378 scm_c_lock_port (port, &lock);
a38024ba
MW
379 pti = SCM_PORT_GET_INTERNAL (port);
380 pti->alist = scm_assq_set_x (pti->alist, key, value);
79657fd3
MW
381 if (lock)
382 scm_i_pthread_mutex_unlock (lock);
a38024ba 383 return SCM_UNSPECIFIED;
05d7f762 384}
a38024ba 385#undef FUNC_NAME
05d7f762 386
0f2d19dd 387\f
0f2d19dd 388
d68fee48 389/* Standard ports --- current input, output, error, and more(!). */
0f2d19dd 390
34297700
AW
391static SCM cur_inport_fluid = SCM_BOOL_F;
392static SCM cur_outport_fluid = SCM_BOOL_F;
393static SCM cur_errport_fluid = SCM_BOOL_F;
394static SCM cur_loadport_fluid = SCM_BOOL_F;
9de87eea 395
3b3b36dd 396SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
e1546b65
MG
397 (),
398 "Return the current input port. This is the default port used\n"
399 "by many input procedures. Initially, @code{current-input-port}\n"
400 "returns the @dfn{standard input} in Unix and C terminology.")
1bbd0b84 401#define FUNC_NAME s_scm_current_input_port
0f2d19dd 402{
34297700 403 if (scm_is_true (cur_inport_fluid))
889975e5
MG
404 return scm_fluid_ref (cur_inport_fluid);
405 else
406 return SCM_BOOL_F;
0f2d19dd 407}
1bbd0b84 408#undef FUNC_NAME
0f2d19dd 409
3b3b36dd 410SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
e1546b65
MG
411 (),
412 "Return the current output port. This is the default port used\n"
9401323e 413 "by many output procedures. Initially,\n"
e1546b65
MG
414 "@code{current-output-port} returns the @dfn{standard output} in\n"
415 "Unix and C terminology.")
1bbd0b84 416#define FUNC_NAME s_scm_current_output_port
0f2d19dd 417{
34297700 418 if (scm_is_true (cur_outport_fluid))
889975e5
MG
419 return scm_fluid_ref (cur_outport_fluid);
420 else
421 return SCM_BOOL_F;
0f2d19dd 422}
1bbd0b84 423#undef FUNC_NAME
0f2d19dd 424
3b3b36dd 425SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
1bbd0b84 426 (),
b380b885
MD
427 "Return the port to which errors and warnings should be sent (the\n"
428 "@dfn{standard error} in Unix and C terminology).")
1bbd0b84 429#define FUNC_NAME s_scm_current_error_port
0f2d19dd 430{
34297700 431 if (scm_is_true (cur_errport_fluid))
889975e5
MG
432 return scm_fluid_ref (cur_errport_fluid);
433 else
434 return SCM_BOOL_F;
0f2d19dd 435}
1bbd0b84 436#undef FUNC_NAME
0f2d19dd 437
60617d81
MW
438static SCM current_warning_port_var;
439static scm_i_pthread_once_t current_warning_port_once = SCM_I_PTHREAD_ONCE_INIT;
440
441static void
442init_current_warning_port_var (void)
443{
444 current_warning_port_var
445 = scm_c_private_variable ("guile", "current-warning-port");
446}
447
3972de76
AW
448SCM
449scm_current_warning_port (void)
450{
60617d81
MW
451 scm_i_pthread_once (&current_warning_port_once,
452 init_current_warning_port_var);
453 return scm_call_0 (scm_variable_ref (current_warning_port_var));
3972de76
AW
454}
455
3b3b36dd 456SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
e1546b65 457 (),
b450f070 458 "Return the current-load-port.\n"
e1546b65 459 "The load port is used internally by @code{primitive-load}.")
1bbd0b84 460#define FUNC_NAME s_scm_current_load_port
31614d8e 461{
9de87eea 462 return scm_fluid_ref (cur_loadport_fluid);
31614d8e 463}
1bbd0b84 464#undef FUNC_NAME
31614d8e 465
3b3b36dd 466SCM_DEFINE (scm_set_current_input_port, "set-current-input-port", 1, 0, 0,
1bbd0b84 467 (SCM port),
8f85c0c6
NJ
468 "@deffnx {Scheme Procedure} set-current-output-port port\n"
469 "@deffnx {Scheme Procedure} set-current-error-port port\n"
b380b885
MD
470 "Change the ports returned by @code{current-input-port},\n"
471 "@code{current-output-port} and @code{current-error-port}, respectively,\n"
472 "so that they use the supplied @var{port} for input or output.")
1bbd0b84 473#define FUNC_NAME s_scm_set_current_input_port
0f2d19dd 474{
9de87eea 475 SCM oinp = scm_fluid_ref (cur_inport_fluid);
34d19ef6 476 SCM_VALIDATE_OPINPORT (1, port);
9de87eea 477 scm_fluid_set_x (cur_inport_fluid, port);
0f2d19dd
JB
478 return oinp;
479}
1bbd0b84 480#undef FUNC_NAME
0f2d19dd
JB
481
482
3b3b36dd 483SCM_DEFINE (scm_set_current_output_port, "set-current-output-port", 1, 0, 0,
e1546b65
MG
484 (SCM port),
485 "Set the current default output port to @var{port}.")
1bbd0b84 486#define FUNC_NAME s_scm_set_current_output_port
0f2d19dd 487{
9de87eea 488 SCM ooutp = scm_fluid_ref (cur_outport_fluid);
78446828 489 port = SCM_COERCE_OUTPORT (port);
34d19ef6 490 SCM_VALIDATE_OPOUTPORT (1, port);
9de87eea 491 scm_fluid_set_x (cur_outport_fluid, port);
0f2d19dd
JB
492 return ooutp;
493}
1bbd0b84 494#undef FUNC_NAME
0f2d19dd
JB
495
496
3b3b36dd 497SCM_DEFINE (scm_set_current_error_port, "set-current-error-port", 1, 0, 0,
e1546b65
MG
498 (SCM port),
499 "Set the current default error port to @var{port}.")
1bbd0b84 500#define FUNC_NAME s_scm_set_current_error_port
0f2d19dd 501{
9de87eea 502 SCM oerrp = scm_fluid_ref (cur_errport_fluid);
78446828 503 port = SCM_COERCE_OUTPORT (port);
34d19ef6 504 SCM_VALIDATE_OPOUTPORT (1, port);
9de87eea 505 scm_fluid_set_x (cur_errport_fluid, port);
0f2d19dd
JB
506 return oerrp;
507}
1bbd0b84 508#undef FUNC_NAME
0f2d19dd 509
3972de76
AW
510
511SCM
512scm_set_current_warning_port (SCM port)
513{
60617d81
MW
514 scm_i_pthread_once (&current_warning_port_once,
515 init_current_warning_port_var);
516 return scm_call_1 (scm_variable_ref (current_warning_port_var), port);
3972de76
AW
517}
518
519
185e369a 520void
661ae7ab 521scm_dynwind_current_input_port (SCM port)
9de87eea 522#define FUNC_NAME NULL
185e369a 523{
9de87eea 524 SCM_VALIDATE_OPINPORT (1, port);
661ae7ab 525 scm_dynwind_fluid (cur_inport_fluid, port);
185e369a 526}
9de87eea 527#undef FUNC_NAME
185e369a
MV
528
529void
661ae7ab 530scm_dynwind_current_output_port (SCM port)
9de87eea 531#define FUNC_NAME NULL
185e369a 532{
9de87eea
MV
533 port = SCM_COERCE_OUTPORT (port);
534 SCM_VALIDATE_OPOUTPORT (1, port);
661ae7ab 535 scm_dynwind_fluid (cur_outport_fluid, port);
185e369a 536}
9de87eea 537#undef FUNC_NAME
185e369a
MV
538
539void
661ae7ab 540scm_dynwind_current_error_port (SCM port)
9de87eea
MV
541#define FUNC_NAME NULL
542{
543 port = SCM_COERCE_OUTPORT (port);
544 SCM_VALIDATE_OPOUTPORT (1, port);
661ae7ab 545 scm_dynwind_fluid (cur_errport_fluid, port);
9de87eea
MV
546}
547#undef FUNC_NAME
548
549void
661ae7ab 550scm_i_dynwind_current_load_port (SCM port)
185e369a 551{
661ae7ab 552 scm_dynwind_fluid (cur_loadport_fluid, port);
185e369a
MV
553}
554
19b8d12b 555
0f2d19dd 556\f
0f2d19dd 557
19b8d12b
AW
558/* Retrieving a port's mode. */
559
560/* Return the flags that characterize a port based on the mode
561 * string used to open a file for that port.
562 *
563 * See PORT FLAGS in scm.h
5dbc6c06 564 */
19b8d12b
AW
565
566static long
567scm_i_mode_bits_n (SCM modes)
568{
569 return (SCM_OPN
570 | (scm_i_string_contains_char (modes, 'r')
571 || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
572 | (scm_i_string_contains_char (modes, 'w')
573 || scm_i_string_contains_char (modes, 'a')
574 || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
575 | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
576 | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
577}
578
579long
580scm_mode_bits (char *modes)
581{
8ebd06c6
AW
582 /* Valid characters are rw+a0l. So, use latin1. */
583 return scm_i_mode_bits (scm_from_latin1_string (modes));
19b8d12b
AW
584}
585
586long
587scm_i_mode_bits (SCM modes)
588{
589 long bits;
590
591 if (!scm_is_string (modes))
592 scm_wrong_type_arg_msg (NULL, 0, modes, "string");
593
594 bits = scm_i_mode_bits_n (modes);
595 scm_remember_upto_here_1 (modes);
596 return bits;
597}
598
599/* Return the mode flags from an open port.
600 * Some modes such as "append" are only used when opening
601 * a file and are not returned here. */
602
603SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
604 (SCM port),
605 "Return the port modes associated with the open port @var{port}.\n"
606 "These will not necessarily be identical to the modes used when\n"
607 "the port was opened, since modes such as \"append\" which are\n"
608 "used only during port creation are not retained.")
609#define FUNC_NAME s_scm_port_mode
610{
611 char modes[4];
612 modes[0] = '\0';
613
614 port = SCM_COERCE_OUTPORT (port);
615 SCM_VALIDATE_OPPORT (1, port);
616 if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
617 if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
618 strcpy (modes, "r+");
619 else
620 strcpy (modes, "r");
621 }
622 else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
623 strcpy (modes, "w");
624 if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
625 strcat (modes, "0");
626
627 return scm_from_latin1_string (modes);
628}
629#undef FUNC_NAME
630
19b8d12b
AW
631\f
632
633/* The port table --- a weak set of all ports.
634
635 We need a global registry of ports to flush them all at exit, and to
636 get all the ports matching a file descriptor. */
2721f918 637SCM scm_i_port_weak_set;
b9ad392e 638
19b8d12b 639
651a0735 640\f
651a0735 641
19b8d12b 642/* Port finalization. */
1cc91f1b 643
5a771d5f
AW
644struct do_free_data
645{
646 scm_t_ptob_descriptor *ptob;
647 SCM port;
648};
649
650static SCM
651do_free (void *body_data)
652{
653 struct do_free_data *data = body_data;
654
655 /* `close' is for explicit `close-port' by user. `free' is for this
656 purpose: ports collected by the GC. */
657 data->ptob->free (data->port);
658
659 return SCM_BOOL_T;
660}
661
651a0735
LC
662/* Finalize the object (a port) pointed to by PTR. */
663static void
6922d92f 664finalize_port (void *ptr, void *data)
651a0735 665{
21041372 666 SCM port = SCM_PACK_POINTER (ptr);
651a0735
LC
667
668 if (!SCM_PORTP (port))
669 abort ();
670
671 if (SCM_OPENP (port))
672 {
3753e227 673 struct do_free_data data;
651a0735 674
3753e227 675 SCM_CLR_PORT_OPEN_FLAG (port);
651a0735 676
3753e227
AW
677 data.ptob = SCM_PORT_DESCRIPTOR (port);
678 data.port = port;
5a771d5f 679
3753e227
AW
680 scm_internal_catch (SCM_BOOL_T, do_free, &data,
681 scm_handle_by_message_noexit, NULL);
5a771d5f 682
3753e227 683 scm_gc_ports_collected++;
651a0735
LC
684 }
685}
686
687
651a0735
LC
688\f
689
da220f27 690SCM
2721f918
AW
691scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
692 const char *encoding,
693 scm_t_string_failed_conversion_handler handler,
694 scm_t_bits stream)
0f2d19dd 695{
2721f918
AW
696 SCM ret;
697 scm_t_port *entry;
f6f4feb0 698 scm_t_port_internal *pti;
62bd5d66 699 scm_t_ptob_descriptor *ptob;
2721f918 700
f6f4feb0
MW
701 entry = scm_gc_typed_calloc (scm_t_port);
702 pti = scm_gc_typed_calloc (scm_t_port_internal);
62bd5d66
AW
703 ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag));
704
705 ret = scm_words (tag | mode_bits, 3);
706 SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry);
707 SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob);
5f16b897 708
92c0ebac
AW
709 entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock");
710 scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive);
30b126d2 711
f6f4feb0 712 entry->internal = pti;
840ae05d 713 entry->file_name = SCM_BOOL_F;
61e452ba 714 entry->rw_active = SCM_PORT_NEITHER;
2721f918
AW
715 entry->port = ret;
716 entry->stream = stream;
93c4fa21
AW
717
718 if (encoding_matches (encoding, "UTF-8"))
719 {
f6f4feb0 720 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
93c4fa21
AW
721 entry->encoding = "UTF-8";
722 }
723 else if (encoding_matches (encoding, "ISO-8859-1"))
724 {
f6f4feb0 725 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
93c4fa21
AW
726 entry->encoding = "ISO-8859-1";
727 }
6c98257f 728 else
93c4fa21 729 {
f6f4feb0 730 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
93c4fa21
AW
731 entry->encoding = canonicalize_encoding (encoding);
732 }
733
2721f918 734 entry->ilseq_handler = handler;
f6f4feb0 735 pti->iconv_descriptors = NULL;
0f2d19dd 736
f6f4feb0
MW
737 pti->at_stream_start_for_bom_read = 1;
738 pti->at_stream_start_for_bom_write = 1;
f4bc4e59 739
f6f4feb0
MW
740 pti->pending_eof = 0;
741 pti->alist = SCM_EOL;
3e05fc04 742
6978c673
AW
743 if (SCM_PORT_DESCRIPTOR (ret)->free)
744 scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL);
745
03a2eeb0
AW
746 if (SCM_PORT_DESCRIPTOR (ret)->flags & SCM_PORT_TYPE_HAS_FLUSH)
747 scm_weak_set_add_x (scm_i_port_weak_set, ret);
651a0735 748
2721f918
AW
749 return ret;
750}
751
752SCM
753scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
754{
755 return scm_c_make_port_with_encoding (tag, mode_bits,
756 scm_i_default_port_encoding (),
0dd7c540 757 scm_i_default_port_conversion_handler (),
2721f918
AW
758 stream);
759}
760
761SCM
762scm_new_port_table_entry (scm_t_bits tag)
763{
764 return scm_c_make_port (tag, 0, 0);
0f2d19dd
JB
765}
766
d68fee48 767\f
d68fee48 768
19b8d12b 769/* Predicates. */
1cc91f1b 770
19b8d12b
AW
771SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
772 (SCM x),
773 "Return a boolean indicating whether @var{x} is a port.\n"
774 "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
775 "@var{x}))}.")
776#define FUNC_NAME s_scm_port_p
0f2d19dd 777{
19b8d12b 778 return scm_from_bool (SCM_PORTP (x));
0f2d19dd 779}
19b8d12b 780#undef FUNC_NAME
0f2d19dd 781
19b8d12b
AW
782SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
783 (SCM x),
784 "Return @code{#t} if @var{x} is an input port, otherwise return\n"
785 "@code{#f}. Any object satisfying this predicate also satisfies\n"
786 "@code{port?}.")
787#define FUNC_NAME s_scm_input_port_p
0f2d19dd 788{
19b8d12b 789 return scm_from_bool (SCM_INPUT_PORT_P (x));
0f2d19dd 790}
1bbd0b84 791#undef FUNC_NAME
0f2d19dd 792
19b8d12b
AW
793SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
794 (SCM x),
795 "Return @code{#t} if @var{x} is an output port, otherwise return\n"
796 "@code{#f}. Any object satisfying this predicate also satisfies\n"
797 "@code{port?}.")
798#define FUNC_NAME s_scm_output_port_p
0f2d19dd 799{
19b8d12b
AW
800 x = SCM_COERCE_OUTPORT (x);
801 return scm_from_bool (SCM_OUTPUT_PORT_P (x));
0f2d19dd 802}
1bbd0b84 803#undef FUNC_NAME
0f2d19dd 804
19b8d12b
AW
805SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
806 (SCM port),
807 "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
808 "open.")
809#define FUNC_NAME s_scm_port_closed_p
d617ee18 810{
19b8d12b
AW
811 SCM_VALIDATE_PORT (1, port);
812 return scm_from_bool (!SCM_OPPORTP (port));
d617ee18 813}
19b8d12b 814#undef FUNC_NAME
eadd48de 815
19b8d12b
AW
816SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
817 (SCM x),
818 "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
819 "return @code{#f}.")
820#define FUNC_NAME s_scm_eof_object_p
eadd48de 821{
19b8d12b 822 return scm_from_bool (SCM_EOF_OBJECT_P (x));
eadd48de 823}
1bbd0b84 824#undef FUNC_NAME
eadd48de
GH
825
826
d68fee48 827\f
19b8d12b 828
d68fee48
JB
829/* Closing ports. */
830
03a2eeb0
AW
831static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
832
0f2d19dd
JB
833/* scm_close_port
834 * Call the close operation on a port object.
eadd48de 835 * see also scm_close.
0f2d19dd 836 */
3b3b36dd 837SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
1bbd0b84 838 (SCM port),
1e6808ea
MG
839 "Close the specified port object. Return @code{#t} if it\n"
840 "successfully closes a port or @code{#f} if it was already\n"
841 "closed. An exception may be raised if an error occurs, for\n"
842 "example when flushing buffered output. See also @ref{Ports and\n"
843 "File Descriptors, close}, for a procedure which can close file\n"
844 "descriptors.")
1bbd0b84 845#define FUNC_NAME s_scm_close_port
0f2d19dd 846{
f6f4feb0 847 scm_t_port_internal *pti;
eadd48de
GH
848 int rv;
849
78446828
MV
850 port = SCM_COERCE_OUTPORT (port);
851
7a754ca6 852 SCM_VALIDATE_PORT (1, port);
0f2d19dd 853 if (SCM_CLOSEDP (port))
eadd48de 854 return SCM_BOOL_F;
03a2eeb0 855
f6f4feb0 856 pti = SCM_PORT_GET_INTERNAL (port);
5a771d5f 857 SCM_CLR_PORT_OPEN_FLAG (port);
03a2eeb0
AW
858
859 if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH)
860 scm_weak_set_remove_x (scm_i_port_weak_set, port);
861
5a771d5f
AW
862 if (SCM_PORT_DESCRIPTOR (port)->close)
863 /* Note! This may throw an exception. Anything after this point
864 should be resilient to non-local exits. */
865 rv = SCM_PORT_DESCRIPTOR (port)->close (port);
866 else
867 rv = 0;
03a2eeb0 868
f6f4feb0 869 if (pti->iconv_descriptors)
03a2eeb0 870 {
5a771d5f
AW
871 /* If we don't get here, the iconv_descriptors finalizer will
872 clean up. */
f6f4feb0
MW
873 close_iconv_descriptors (pti->iconv_descriptors);
874 pti->iconv_descriptors = NULL;
03a2eeb0
AW
875 }
876
7888309b 877 return scm_from_bool (rv >= 0);
7a754ca6
MD
878}
879#undef FUNC_NAME
880
881SCM_DEFINE (scm_close_input_port, "close-input-port", 1, 0, 0,
882 (SCM port),
883 "Close the specified input port object. The routine has no effect if\n"
884 "the file has already been closed. An exception may be raised if an\n"
885 "error occurs. The value returned is unspecified.\n\n"
886 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
887 "which can close file descriptors.")
888#define FUNC_NAME s_scm_close_input_port
889{
890 SCM_VALIDATE_INPUT_PORT (1, port);
891 scm_close_port (port);
892 return SCM_UNSPECIFIED;
893}
894#undef FUNC_NAME
895
896SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0,
897 (SCM port),
898 "Close the specified output port object. The routine has no effect if\n"
899 "the file has already been closed. An exception may be raised if an\n"
900 "error occurs. The value returned is unspecified.\n\n"
901 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
902 "which can close file descriptors.")
903#define FUNC_NAME s_scm_close_output_port
904{
905 port = SCM_COERCE_OUTPORT (port);
906 SCM_VALIDATE_OUTPUT_PORT (1, port);
907 scm_close_port (port);
908 return SCM_UNSPECIFIED;
0f2d19dd 909}
1bbd0b84 910#undef FUNC_NAME
0f2d19dd 911
2721f918 912
19b8d12b 913\f
2721f918 914
19b8d12b
AW
915/* Encoding characters to byte streams, and decoding byte streams to
916 characters. */
5dbc6c06 917
19b8d12b
AW
918/* A fluid specifying the default encoding for newly created ports. If it is
919 a string, that is the encoding. If it is #f, it is in the "native"
920 (Latin-1) encoding. */
921SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
922
923static int scm_port_encoding_init = 0;
924
925/* Use ENCODING as the default encoding for future ports. */
c536b4b3 926void
19b8d12b 927scm_i_set_default_port_encoding (const char *encoding)
c2ca4493 928{
19b8d12b
AW
929 if (!scm_port_encoding_init
930 || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
931 scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
932 SCM_EOL);
fdfe6305 933
449ca87b 934 if (encoding_matches (encoding, "ISO-8859-1"))
19b8d12b
AW
935 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
936 else
93c4fa21
AW
937 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
938 scm_from_latin1_string (canonicalize_encoding (encoding)));
2721f918 939}
3a5fb14d 940
93c4fa21 941/* Return the name of the default encoding for newly created ports. */
19b8d12b
AW
942const char *
943scm_i_default_port_encoding (void)
2721f918 944{
19b8d12b 945 if (!scm_port_encoding_init)
93c4fa21 946 return "ISO-8859-1";
19b8d12b 947 else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
93c4fa21 948 return "ISO-8859-1";
19b8d12b
AW
949 else
950 {
951 SCM encoding;
952
953 encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
954 if (!scm_is_string (encoding))
93c4fa21 955 return "ISO-8859-1";
19b8d12b
AW
956 else
957 return scm_i_string_chars (encoding);
958 }
c536b4b3 959}
fdfe6305 960
0dd7c540
AW
961/* A fluid specifying the default conversion handler for newly created
962 ports. Its value should be one of the symbols below. */
963SCM_VARIABLE (default_conversion_strategy_var,
964 "%default-port-conversion-strategy");
1b3daef0 965
0dd7c540
AW
966/* Whether the above fluid is initialized. */
967static int scm_conversion_strategy_init = 0;
c2ca4493 968
0dd7c540
AW
969/* The possible conversion strategies. */
970SCM_SYMBOL (sym_error, "error");
971SCM_SYMBOL (sym_substitute, "substitute");
972SCM_SYMBOL (sym_escape, "escape");
c536b4b3 973
0dd7c540
AW
974/* Return the default failed encoding conversion policy for new created
975 ports. */
976scm_t_string_failed_conversion_handler
977scm_i_default_port_conversion_handler (void)
978{
979 scm_t_string_failed_conversion_handler handler;
d68fee48 980
0dd7c540
AW
981 if (!scm_conversion_strategy_init
982 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
983 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
984 else
985 {
986 SCM fluid, value;
0f2d19dd 987
0dd7c540
AW
988 fluid = SCM_VARIABLE_REF (default_conversion_strategy_var);
989 value = scm_fluid_ref (fluid);
0f2d19dd 990
0dd7c540
AW
991 if (scm_is_eq (sym_substitute, value))
992 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
993 else if (scm_is_eq (sym_escape, value))
994 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
995 else
996 /* Default to 'error also when the fluid's value is not one of
997 the valid symbols. */
998 handler = SCM_FAILED_CONVERSION_ERROR;
999 }
eb5c0a2a 1000
0dd7c540 1001 return handler;
60d0643d 1002}
0f2d19dd 1003
0dd7c540
AW
1004/* Use HANDLER as the default conversion strategy for future ports. */
1005void
1006scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler
1007 handler)
0f2d19dd 1008{
0dd7c540 1009 SCM strategy;
0f2d19dd 1010
0dd7c540
AW
1011 if (!scm_conversion_strategy_init
1012 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
1013 scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
1014 SCM_EOL);
1015
1016 switch (handler)
78446828 1017 {
0dd7c540
AW
1018 case SCM_FAILED_CONVERSION_ERROR:
1019 strategy = sym_error;
1020 break;
1021
1022 case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
1023 strategy = sym_escape;
1024 break;
1025
1026 case SCM_FAILED_CONVERSION_QUESTION_MARK:
1027 strategy = sym_substitute;
1028 break;
1029
1030 default:
1031 abort ();
78446828 1032 }
0f2d19dd 1033
0dd7c540
AW
1034 scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var),
1035 strategy);
1036}
5dbc6c06 1037
f6f4feb0
MW
1038static void
1039scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port);
1040
1041/* If the next LEN bytes from PORT are equal to those in BYTES, then
1042 return 1, else return 0. Leave the port position unchanged. */
1043static int
1044looking_at_bytes (SCM port, const unsigned char *bytes, int len)
0f2d19dd 1045{
f6f4feb0
MW
1046 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1047 int i = 0;
1048
1049 while (i < len && scm_peek_byte_or_eof_unlocked (port) == bytes[i])
78446828 1050 {
f6f4feb0
MW
1051 pt->read_pos++;
1052 i++;
78446828 1053 }
f6f4feb0
MW
1054 scm_i_unget_bytes_unlocked (bytes, i, port);
1055 return (i == len);
0f2d19dd
JB
1056}
1057
f6f4feb0
MW
1058static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF};
1059static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
1060static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
1061static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
1062static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
5dbc6c06 1063
f6f4feb0
MW
1064/* Decide what byte order to use for a UTF-16 port. Return "UTF-16BE"
1065 or "UTF-16LE". MODE must be either SCM_PORT_READ or SCM_PORT_WRITE,
1066 and specifies which operation is about to be done. The MODE
1067 determines how we will decide the byte order. We deliberately avoid
1068 reading from the port unless the user is about to do so. If the user
1069 is about to read, then we look for a BOM, and if present, we use it
1070 to determine the byte order. Otherwise we choose big endian, as
1071 recommended by the Unicode Standard. Note that the BOM (if any) is
1072 not consumed here. */
1073static const char *
1074decide_utf16_encoding (SCM port, scm_t_port_rw_active mode)
5dbc6c06 1075{
f6f4feb0
MW
1076 if (mode == SCM_PORT_READ
1077 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1078 && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom))
1079 return "UTF-16LE";
1080 else
1081 return "UTF-16BE";
5dbc6c06
HWN
1082}
1083
f6f4feb0
MW
1084/* Decide what byte order to use for a UTF-32 port. Return "UTF-32BE"
1085 or "UTF-32LE". See the comment above 'decide_utf16_encoding' for
1086 details. */
1087static const char *
1088decide_utf32_encoding (SCM port, scm_t_port_rw_active mode)
89ea5b7c 1089{
f6f4feb0
MW
1090 if (mode == SCM_PORT_READ
1091 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1092 && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom))
1093 return "UTF-32LE";
1094 else
1095 return "UTF-32BE";
89ea5b7c 1096}
0f2d19dd 1097
6c98257f 1098static void
2aed2667 1099finalize_iconv_descriptors (void *ptr, void *data)
c536b4b3 1100{
6c98257f
AW
1101 close_iconv_descriptors (ptr);
1102}
c2ca4493 1103
6c98257f
AW
1104static scm_t_iconv_descriptors *
1105open_iconv_descriptors (const char *encoding, int reading, int writing)
1106{
1107 scm_t_iconv_descriptors *id;
1108 iconv_t input_cd, output_cd;
8ebd06c6 1109 size_t i;
c536b4b3 1110
6c98257f
AW
1111 input_cd = (iconv_t) -1;
1112 output_cd = (iconv_t) -1;
d68fee48 1113
8ebd06c6
AW
1114 for (i = 0; encoding[i]; i++)
1115 if (encoding[i] > 127)
1116 goto invalid_encoding;
1117
6c98257f
AW
1118 if (reading)
1119 {
1120 /* Open an input iconv conversion descriptor, from ENCODING
1121 to UTF-8. We choose UTF-8, not UTF-32, because iconv
1122 implementations can typically convert from anything to
1123 UTF-8, but not to UTF-32 (see
1124 <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
1125
1126 /* Assume opening an iconv descriptor causes about 16 KB of
1127 allocation. */
1128 scm_gc_register_allocation (16 * 1024);
1129
1130 input_cd = iconv_open ("UTF-8", encoding);
1131 if (input_cd == (iconv_t) -1)
1132 goto invalid_encoding;
1133 }
0f2d19dd 1134
6c98257f 1135 if (writing)
19b8d12b 1136 {
6c98257f
AW
1137 /* Assume opening an iconv descriptor causes about 16 KB of
1138 allocation. */
1139 scm_gc_register_allocation (16 * 1024);
0f2d19dd 1140
6c98257f
AW
1141 output_cd = iconv_open (encoding, "UTF-8");
1142 if (output_cd == (iconv_t) -1)
1143 {
1144 if (input_cd != (iconv_t) -1)
1145 iconv_close (input_cd);
1146 goto invalid_encoding;
1147 }
19b8d12b 1148 }
eb5c0a2a 1149
6c98257f
AW
1150 id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
1151 id->input_cd = input_cd;
1152 id->output_cd = output_cd;
1153
6978c673
AW
1154 /* Register a finalizer to close the descriptors. */
1155 scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
19b8d12b 1156
6c98257f 1157 return id;
19b8d12b
AW
1158
1159 invalid_encoding:
1160 {
1161 SCM err;
8ebd06c6 1162 err = scm_from_latin1_string (encoding);
6c98257f 1163 scm_misc_error ("open_iconv_descriptors",
19b8d12b
AW
1164 "invalid or unknown character encoding ~s",
1165 scm_list_1 (err));
1166 }
0f2d19dd
JB
1167}
1168
6c98257f
AW
1169static void
1170close_iconv_descriptors (scm_t_iconv_descriptors *id)
1171{
1172 if (id->input_cd != (iconv_t) -1)
1173 iconv_close (id->input_cd);
1174 if (id->output_cd != (iconv_t) -1)
1175 iconv_close (id->output_cd);
1176 id->input_cd = (void *) -1;
1177 id->output_cd = (void *) -1;
1178}
1179
1180scm_t_iconv_descriptors *
f6f4feb0 1181scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode)
6c98257f 1182{
f6f4feb0 1183 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
6c98257f 1184
f6f4feb0 1185 assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
6c98257f 1186
f6f4feb0 1187 if (!pti->iconv_descriptors)
6c98257f 1188 {
f6f4feb0
MW
1189 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1190 const char *precise_encoding;
7b292a9d 1191
6c98257f
AW
1192 if (!pt->encoding)
1193 pt->encoding = "ISO-8859-1";
7be1705d 1194
f6f4feb0
MW
1195 /* If the specified encoding is UTF-16 or UTF-32, then make
1196 that more precise by deciding what byte order to use. */
1197 if (strcmp (pt->encoding, "UTF-16") == 0)
1198 precise_encoding = decide_utf16_encoding (port, mode);
1199 else if (strcmp (pt->encoding, "UTF-32") == 0)
1200 precise_encoding = decide_utf32_encoding (port, mode);
1201 else
1202 precise_encoding = pt->encoding;
7b292a9d 1203
f6f4feb0
MW
1204 pti->iconv_descriptors =
1205 open_iconv_descriptors (precise_encoding,
6c98257f
AW
1206 SCM_INPUT_PORT_P (port),
1207 SCM_OUTPUT_PORT_P (port));
1208 }
1209
f6f4feb0 1210 return pti->iconv_descriptors;
6c98257f
AW
1211}
1212
8ebd06c6 1213/* The name of the encoding is itself encoded in ASCII. */
6c98257f
AW
1214void
1215scm_i_set_port_encoding_x (SCM port, const char *encoding)
1216{
1217 scm_t_port *pt;
f6f4feb0 1218 scm_t_port_internal *pti;
6c98257f
AW
1219 scm_t_iconv_descriptors *prev;
1220
1221 /* Set the character encoding for this port. */
1222 pt = SCM_PTAB_ENTRY (port);
f6f4feb0
MW
1223 pti = SCM_PORT_GET_INTERNAL (port);
1224 prev = pti->iconv_descriptors;
7b292a9d 1225
f6f4feb0
MW
1226 /* In order to handle cases where the encoding changes mid-stream
1227 (e.g. within an HTTP stream, or within a file that is composed of
1228 segments with different encodings), we consider this to be "stream
1229 start" for purposes of BOM handling, regardless of our actual file
1230 position. */
1231 pti->at_stream_start_for_bom_read = 1;
1232 pti->at_stream_start_for_bom_write = 1;
6c98257f 1233
93c4fa21 1234 if (encoding_matches (encoding, "UTF-8"))
6c98257f
AW
1235 {
1236 pt->encoding = "UTF-8";
f6f4feb0 1237 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
6c98257f 1238 }
93c4fa21 1239 else if (encoding_matches (encoding, "ISO-8859-1"))
79eb47ea
AW
1240 {
1241 pt->encoding = "ISO-8859-1";
f6f4feb0 1242 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
79eb47ea 1243 }
6c98257f
AW
1244 else
1245 {
f6f4feb0
MW
1246 pt->encoding = canonicalize_encoding (encoding);
1247 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
6c98257f
AW
1248 }
1249
f6f4feb0 1250 pti->iconv_descriptors = NULL;
6c98257f
AW
1251 if (prev)
1252 close_iconv_descriptors (prev);
1253}
1254
19b8d12b
AW
1255SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
1256 (SCM port),
1257 "Returns, as a string, the character encoding that @var{port}\n"
1258 "uses to interpret its input and output.\n")
1259#define FUNC_NAME s_scm_port_encoding
0f2d19dd 1260{
19b8d12b
AW
1261 SCM_VALIDATE_PORT (1, port);
1262
93c4fa21 1263 return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding);
0f2d19dd 1264}
1bbd0b84 1265#undef FUNC_NAME
0f2d19dd 1266
19b8d12b
AW
1267SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
1268 (SCM port, SCM enc),
1269 "Sets the character encoding that will be used to interpret all\n"
1270 "port I/O. New ports are created with the encoding\n"
1271 "appropriate for the current locale if @code{setlocale} has \n"
1272 "been called or ISO-8859-1 otherwise\n"
1273 "and this procedure can be used to modify that encoding.\n")
1274#define FUNC_NAME s_scm_set_port_encoding_x
5dbc6c06 1275{
19b8d12b 1276 char *enc_str;
5dbc6c06 1277
19b8d12b
AW
1278 SCM_VALIDATE_PORT (1, port);
1279 SCM_VALIDATE_STRING (2, enc);
0f2d19dd 1280
8ebd06c6 1281 enc_str = scm_to_latin1_string (enc);
19b8d12b
AW
1282 scm_i_set_port_encoding_x (port, enc_str);
1283 free (enc_str);
1284
1285 return SCM_UNSPECIFIED;
0f2d19dd 1286}
1bbd0b84 1287#undef FUNC_NAME
0f2d19dd 1288
19b8d12b
AW
1289SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
1290 1, 0, 0, (SCM port),
1291 "Returns the behavior of the port when handling a character that\n"
1292 "is not representable in the port's current encoding.\n"
1293 "It returns the symbol @code{error} if unrepresentable characters\n"
1294 "should cause exceptions, @code{substitute} if the port should\n"
1295 "try to replace unrepresentable characters with question marks or\n"
1296 "approximate characters, or @code{escape} if unrepresentable\n"
1297 "characters should be converted to string escapes.\n"
1298 "\n"
1299 "If @var{port} is @code{#f}, then the current default behavior\n"
1300 "will be returned. New ports will have this default behavior\n"
1301 "when they are created.\n")
1302#define FUNC_NAME s_scm_port_conversion_strategy
889975e5 1303{
19b8d12b 1304 scm_t_string_failed_conversion_handler h;
7b292a9d 1305
0dd7c540
AW
1306 if (scm_is_false (port))
1307 h = scm_i_default_port_conversion_handler ();
1308 else
7b292a9d 1309 {
0dd7c540
AW
1310 scm_t_port *pt;
1311
19b8d12b 1312 SCM_VALIDATE_OPPORT (1, port);
0dd7c540
AW
1313 pt = SCM_PTAB_ENTRY (port);
1314
1315 h = pt->ilseq_handler;
7b292a9d
LC
1316 }
1317
19b8d12b
AW
1318 if (h == SCM_FAILED_CONVERSION_ERROR)
1319 return scm_from_latin1_symbol ("error");
1320 else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
1321 return scm_from_latin1_symbol ("substitute");
1322 else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1323 return scm_from_latin1_symbol ("escape");
1324 else
1325 abort ();
7b292a9d 1326
19b8d12b
AW
1327 /* Never gets here. */
1328 return SCM_UNDEFINED;
1329}
1330#undef FUNC_NAME
7b292a9d 1331
19b8d12b
AW
1332SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
1333 2, 0, 0,
1334 (SCM port, SCM sym),
1335 "Sets the behavior of the interpreter when outputting a character\n"
1336 "that is not representable in the port's current encoding.\n"
1337 "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
1338 "@code{'escape}. If it is @code{'error}, an error will be thrown\n"
1339 "when an unconvertible character is encountered. If it is\n"
1340 "@code{'substitute}, then unconvertible characters will \n"
1341 "be replaced with approximate characters, or with question marks\n"
1342 "if no approximately correct character is available.\n"
1343 "If it is @code{'escape},\n"
1344 "it will appear as a hex escape when output.\n"
1345 "\n"
1346 "If @var{port} is an open port, the conversion error behavior\n"
1347 "is set for that port. If it is @code{#f}, it is set as the\n"
1348 "default behavior for any future ports that get created in\n"
1349 "this thread.\n")
1350#define FUNC_NAME s_scm_set_port_conversion_strategy_x
1351{
0dd7c540 1352 scm_t_string_failed_conversion_handler handler;
7b292a9d 1353
0dd7c540
AW
1354 if (scm_is_eq (sym, sym_error))
1355 handler = SCM_FAILED_CONVERSION_ERROR;
1356 else if (scm_is_eq (sym, sym_substitute))
1357 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
1358 else if (scm_is_eq (sym, sym_escape))
1359 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
1360 else
1361 SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
7b292a9d 1362
0dd7c540
AW
1363 if (scm_is_false (port))
1364 scm_i_set_default_port_conversion_handler (handler);
1365 else
19b8d12b 1366 {
0dd7c540
AW
1367 SCM_VALIDATE_OPPORT (1, port);
1368 SCM_PTAB_ENTRY (port)->ilseq_handler = handler;
19b8d12b 1369 }
7b292a9d 1370
19b8d12b
AW
1371 return SCM_UNSPECIFIED;
1372}
1373#undef FUNC_NAME
7be1705d 1374
7b292a9d 1375
19b8d12b 1376\f
7b292a9d 1377
14dcb5cc
AW
1378/* The port lock. */
1379
1380static void
92c0ebac 1381lock_port (void *mutex)
14dcb5cc 1382{
789dd40b 1383 scm_i_pthread_mutex_lock ((scm_i_pthread_mutex_t *) mutex);
14dcb5cc
AW
1384}
1385
1386static void
92c0ebac 1387unlock_port (void *mutex)
14dcb5cc 1388{
789dd40b 1389 scm_i_pthread_mutex_unlock ((scm_i_pthread_mutex_t *) mutex);
14dcb5cc
AW
1390}
1391
1392void
1393scm_dynwind_lock_port (SCM port)
92c0ebac 1394#define FUNC_NAME "dynwind-lock-port"
14dcb5cc 1395{
92c0ebac
AW
1396 scm_i_pthread_mutex_t *lock;
1397 SCM_VALIDATE_OPPORT (SCM_ARG1, port);
1398 scm_c_lock_port (port, &lock);
1399 if (lock)
1400 {
1401 scm_dynwind_unwind_handler (unlock_port, lock, SCM_F_WIND_EXPLICITLY);
1402 scm_dynwind_rewind_handler (lock_port, lock, 0);
1403 }
14dcb5cc 1404}
92c0ebac 1405#undef FUNC_NAME
14dcb5cc
AW
1406
1407
1408\f
1409
19b8d12b 1410/* Input. */
7be1705d 1411
0d959103
AW
1412int
1413scm_get_byte_or_eof (SCM port)
1414{
92c0ebac 1415 scm_i_pthread_mutex_t *lock;
0d959103
AW
1416 int ret;
1417
92c0ebac 1418 scm_c_lock_port (port, &lock);
0d959103 1419 ret = scm_get_byte_or_eof_unlocked (port);
92c0ebac
AW
1420 if (lock)
1421 scm_i_pthread_mutex_unlock (lock);
0d959103
AW
1422
1423 return ret;
1424}
1425
1426int
1427scm_peek_byte_or_eof (SCM port)
1428{
92c0ebac 1429 scm_i_pthread_mutex_t *lock;
0d959103
AW
1430 int ret;
1431
92c0ebac 1432 scm_c_lock_port (port, &lock);
0d959103 1433 ret = scm_peek_byte_or_eof_unlocked (port);
92c0ebac
AW
1434 if (lock)
1435 scm_i_pthread_mutex_unlock (lock);
0d959103
AW
1436
1437 return ret;
1438}
1439
19b8d12b
AW
1440/* scm_c_read
1441 *
1442 * Used by an application to read arbitrary number of bytes from an
1443 * SCM port. Same semantics as libc read, except that scm_c_read only
1444 * returns less than SIZE bytes if at end-of-file.
1445 *
1446 * Warning: Doesn't update port line and column counts! */
7b292a9d 1447
19b8d12b
AW
1448/* This structure, and the following swap_buffer function, are used
1449 for temporarily swapping a port's own read buffer, and the buffer
1450 that the caller of scm_c_read provides. */
1451struct port_and_swap_buffer
1452{
1453 scm_t_port *pt;
1454 unsigned char *buffer;
1455 size_t size;
1456};
7b292a9d 1457
19b8d12b
AW
1458static void
1459swap_buffer (void *data)
1460{
1461 struct port_and_swap_buffer *psb = (struct port_and_swap_buffer *) data;
1462 unsigned char *old_buf = psb->pt->read_buf;
1463 size_t old_size = psb->pt->read_buf_size;
7be1705d 1464
19b8d12b
AW
1465 /* Make the port use (buffer, size) from the struct. */
1466 psb->pt->read_pos = psb->pt->read_buf = psb->pt->read_end = psb->buffer;
1467 psb->pt->read_buf_size = psb->size;
7b292a9d 1468
19b8d12b
AW
1469 /* Save the port's old (buffer, size) in the struct. */
1470 psb->buffer = old_buf;
1471 psb->size = old_size;
7b292a9d
LC
1472}
1473
f6f4feb0
MW
1474static int scm_i_fill_input_unlocked (SCM port);
1475
19b8d12b 1476size_t
be632904 1477scm_c_read_unlocked (SCM port, void *buffer, size_t size)
19b8d12b 1478#define FUNC_NAME "scm_c_read"
7b292a9d
LC
1479{
1480 scm_t_port *pt;
f6f4feb0 1481 scm_t_port_internal *pti;
19b8d12b
AW
1482 size_t n_read = 0, n_available;
1483 struct port_and_swap_buffer psb;
889975e5 1484
19b8d12b 1485 SCM_VALIDATE_OPINPORT (1, port);
889975e5 1486
19b8d12b 1487 pt = SCM_PTAB_ENTRY (port);
f6f4feb0 1488 pti = SCM_PORT_GET_INTERNAL (port);
19b8d12b
AW
1489 if (pt->rw_active == SCM_PORT_WRITE)
1490 SCM_PORT_DESCRIPTOR (port)->flush (port);
889975e5 1491
19b8d12b
AW
1492 if (pt->rw_random)
1493 pt->rw_active = SCM_PORT_READ;
889975e5 1494
19b8d12b
AW
1495 /* Take bytes first from the port's read buffer. */
1496 if (pt->read_pos < pt->read_end)
1497 {
1498 n_available = min (size, pt->read_end - pt->read_pos);
1499 memcpy (buffer, pt->read_pos, n_available);
1500 buffer = (char *) buffer + n_available;
1501 pt->read_pos += n_available;
1502 n_read += n_available;
1503 size -= n_available;
1504 }
889975e5 1505
19b8d12b
AW
1506 /* Avoid the scm_dynwind_* costs if we now have enough data. */
1507 if (size == 0)
1508 return n_read;
b5cb4464 1509
f6f4feb0
MW
1510 /* Now we will call scm_i_fill_input_unlocked repeatedly until we have
1511 read the requested number of bytes. (Note that a single
1512 scm_i_fill_input_unlocked call does not guarantee to fill the whole
1513 of the port's read buffer.) */
d1b9f8ac 1514 if (pt->read_buf_size <= 1
f6f4feb0 1515 && pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
b5cb4464 1516 {
f6f4feb0
MW
1517 /* The port that we are reading from is unbuffered - i.e. does not
1518 have its own persistent buffer - but we have a buffer, provided
1519 by our caller, that is the right size for the data that is
1520 wanted. For the following scm_i_fill_input_unlocked calls,
1521 therefore, we use the buffer in hand as the port's read buffer.
1522
1523 We need to make sure that the port's normal (1 byte) buffer is
1524 reinstated in case one of the scm_i_fill_input_unlocked ()
1525 calls throws an exception; we use the scm_dynwind_* API to
1526 achieve that.
75192345
MG
1527
1528 A consequence of this optimization is that the fill_input
1529 functions can't unget characters. That'll push data to the
1530 pushback buffer instead of this psb buffer. */
1531#if SCM_DEBUG == 1
1532 unsigned char *pback = pt->putback_buf;
1533#endif
6d227556
NJ
1534 psb.pt = pt;
1535 psb.buffer = buffer;
1536 psb.size = size;
1537 scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
1538 scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1539 scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1540
f6f4feb0
MW
1541 /* Call scm_i_fill_input_unlocked until we have all the bytes that
1542 we need, or we hit EOF. */
1543 while (pt->read_buf_size && (scm_i_fill_input_unlocked (port) != EOF))
6d227556
NJ
1544 {
1545 pt->read_buf_size -= (pt->read_end - pt->read_pos);
1546 pt->read_pos = pt->read_buf = pt->read_end;
1547 }
75192345
MG
1548#if SCM_DEBUG == 1
1549 if (pback != pt->putback_buf
1550 || pt->read_buf - (unsigned char *) buffer < 0)
1551 scm_misc_error (FUNC_NAME,
1552 "scm_c_read must not call a fill function that pushes "
1553 "back characters onto an unbuffered port", SCM_EOL);
1554#endif
6d227556 1555 n_read += pt->read_buf - (unsigned char *) buffer;
75192345 1556
6d227556
NJ
1557 /* Reinstate the port's normal buffer. */
1558 scm_dynwind_end ();
1559 }
1560 else
1561 {
1562 /* The port has its own buffer. It is important that we use it,
1563 even if it happens to be smaller than our caller's buffer, so
1564 that a custom port implementation's entry points (in
1565 particular, fill_input) can rely on the buffer always being
1566 the same as they first set up. */
f6f4feb0 1567 while (size && (scm_i_fill_input_unlocked (port) != EOF))
6d227556
NJ
1568 {
1569 n_available = min (size, pt->read_end - pt->read_pos);
1570 memcpy (buffer, pt->read_pos, n_available);
1571 buffer = (char *) buffer + n_available;
1572 pt->read_pos += n_available;
1573 n_read += n_available;
1574 size -= n_available;
1575 }
1576 }
6fe692e9 1577
b5cb4464 1578 return n_read;
6fe692e9 1579}
693758d5 1580#undef FUNC_NAME
6fe692e9 1581
be632904
AW
1582size_t
1583scm_c_read (SCM port, void *buffer, size_t size)
1584{
92c0ebac 1585 scm_i_pthread_mutex_t *lock;
be632904
AW
1586 size_t ret;
1587
92c0ebac 1588 scm_c_lock_port (port, &lock);
be632904 1589 ret = scm_c_read_unlocked (port, buffer, size);
92c0ebac
AW
1590 if (lock)
1591 scm_i_pthread_mutex_unlock (lock);
1592
be632904
AW
1593
1594 return ret;
1595}
1596
19b8d12b
AW
1597/* Update the line and column number of PORT after consumption of C. */
1598static inline void
1599update_port_lf (scm_t_wchar c, SCM port)
6fe692e9 1600{
19b8d12b
AW
1601 switch (c)
1602 {
1603 case '\a':
1604 case EOF:
1605 break;
1606 case '\b':
1607 SCM_DECCOL (port);
1608 break;
1609 case '\n':
1610 SCM_INCLINE (port);
1611 break;
1612 case '\r':
1613 SCM_ZEROCOL (port);
1614 break;
1615 case '\t':
1616 SCM_TABCOL (port);
1617 break;
1618 default:
1619 SCM_INCCOL (port);
1620 break;
1621 }
1622}
6fe692e9 1623
19b8d12b 1624#define SCM_MBCHAR_BUF_SIZE (4)
6fe692e9 1625
19b8d12b
AW
1626/* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
1627 UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
1628static scm_t_wchar
1629utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
283a1a0e 1630{
19b8d12b 1631 scm_t_wchar codepoint;
283a1a0e 1632
19b8d12b 1633 if (utf8_buf[0] <= 0x7f)
283a1a0e 1634 {
19b8d12b
AW
1635 assert (size == 1);
1636 codepoint = utf8_buf[0];
1637 }
1638 else if ((utf8_buf[0] & 0xe0) == 0xc0)
1639 {
1640 assert (size == 2);
1641 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
1642 | (utf8_buf[1] & 0x3f);
1643 }
1644 else if ((utf8_buf[0] & 0xf0) == 0xe0)
1645 {
1646 assert (size == 3);
1647 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
1648 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
1649 | (utf8_buf[2] & 0x3f);
283a1a0e
GH
1650 }
1651 else
19b8d12b
AW
1652 {
1653 assert (size == 4);
1654 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
1655 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
1656 | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
1657 | (utf8_buf[3] & 0x3f);
1658 }
283a1a0e 1659
19b8d12b 1660 return codepoint;
283a1a0e
GH
1661}
1662
19b8d12b
AW
1663/* Read a UTF-8 sequence from PORT. On success, return 0 and set
1664 *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
1665 representation, and set *LEN to the length in bytes. Return
1666 `EILSEQ' on error. */
1667static int
1668get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
1669 scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1670{
1671#define ASSERT_NOT_EOF(b) \
1672 if (SCM_UNLIKELY ((b) == EOF)) \
1673 goto invalid_seq
1674#define CONSUME_PEEKED_BYTE() \
1675 pt->read_pos++
ee149d03 1676
19b8d12b
AW
1677 int byte;
1678 scm_t_port *pt;
ee149d03 1679
19b8d12b
AW
1680 *len = 0;
1681 pt = SCM_PTAB_ENTRY (port);
840ae05d 1682
0d959103 1683 byte = scm_get_byte_or_eof_unlocked (port);
19b8d12b 1684 if (byte == EOF)
6c951427 1685 {
19b8d12b
AW
1686 *codepoint = EOF;
1687 return 0;
1688 }
6c951427 1689
19b8d12b
AW
1690 buf[0] = (scm_t_uint8) byte;
1691 *len = 1;
6c951427 1692
19b8d12b
AW
1693 if (buf[0] <= 0x7f)
1694 /* 1-byte form. */
1695 *codepoint = buf[0];
1696 else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
1697 {
1698 /* 2-byte form. */
0d959103 1699 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1700 ASSERT_NOT_EOF (byte);
6c951427 1701
19b8d12b
AW
1702 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1703 goto invalid_seq;
6c951427 1704
19b8d12b
AW
1705 CONSUME_PEEKED_BYTE ();
1706 buf[1] = (scm_t_uint8) byte;
1707 *len = 2;
1708
1709 *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
1710 | (buf[1] & 0x3f);
6c951427 1711 }
19b8d12b 1712 else if ((buf[0] & 0xf0) == 0xe0)
6c951427 1713 {
19b8d12b 1714 /* 3-byte form. */
0d959103 1715 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1716 ASSERT_NOT_EOF (byte);
6c951427 1717
19b8d12b
AW
1718 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
1719 || (buf[0] == 0xe0 && byte < 0xa0)
1720 || (buf[0] == 0xed && byte > 0x9f)))
1721 goto invalid_seq;
6c951427 1722
19b8d12b
AW
1723 CONSUME_PEEKED_BYTE ();
1724 buf[1] = (scm_t_uint8) byte;
1725 *len = 2;
1726
0d959103 1727 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b
AW
1728 ASSERT_NOT_EOF (byte);
1729
1730 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1731 goto invalid_seq;
1732
1733 CONSUME_PEEKED_BYTE ();
1734 buf[2] = (scm_t_uint8) byte;
1735 *len = 3;
1736
1737 *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
1738 | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
1739 | (buf[2] & 0x3f);
6c951427 1740 }
19b8d12b
AW
1741 else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
1742 {
1743 /* 4-byte form. */
0d959103 1744 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1745 ASSERT_NOT_EOF (byte);
6c951427 1746
19b8d12b
AW
1747 if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
1748 || (buf[0] == 0xf0 && byte < 0x90)
1749 || (buf[0] == 0xf4 && byte > 0x8f)))
1750 goto invalid_seq;
ee149d03 1751
19b8d12b
AW
1752 CONSUME_PEEKED_BYTE ();
1753 buf[1] = (scm_t_uint8) byte;
1754 *len = 2;
889975e5 1755
0d959103 1756 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1757 ASSERT_NOT_EOF (byte);
889975e5 1758
19b8d12b
AW
1759 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1760 goto invalid_seq;
63479112 1761
19b8d12b
AW
1762 CONSUME_PEEKED_BYTE ();
1763 buf[2] = (scm_t_uint8) byte;
1764 *len = 3;
63479112 1765
0d959103 1766 byte = scm_peek_byte_or_eof_unlocked (port);
19b8d12b 1767 ASSERT_NOT_EOF (byte);
63479112 1768
19b8d12b
AW
1769 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1770 goto invalid_seq;
63479112 1771
19b8d12b
AW
1772 CONSUME_PEEKED_BYTE ();
1773 buf[3] = (scm_t_uint8) byte;
1774 *len = 4;
840ae05d 1775
19b8d12b
AW
1776 *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
1777 | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
1778 | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
1779 | (buf[3] & 0x3f);
ee149d03
JB
1780 }
1781 else
19b8d12b 1782 goto invalid_seq;
ee149d03 1783
19b8d12b 1784 return 0;
ee149d03 1785
19b8d12b
AW
1786 invalid_seq:
1787 /* Here we could choose the consume the faulty byte when it's not a
1788 valid starting byte, but it's not a requirement. What Section 3.9
1789 of Unicode 6.0.0 mandates, though, is to not consume a byte that
1790 would otherwise be a valid starting byte. */
ee149d03 1791
19b8d12b 1792 return EILSEQ;
ee149d03 1793
19b8d12b
AW
1794#undef CONSUME_PEEKED_BYTE
1795#undef ASSERT_NOT_EOF
1796}
1797
79eb47ea 1798/* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
05b4d910
AW
1799 0 and set *CODEPOINT to the codepoint that was read, fill BUF with
1800 its UTF-8 representation, and set *LEN to the length in bytes.
1801 Return `EILSEQ' on error. */
79eb47ea
AW
1802static int
1803get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
1804 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1805{
1806 *codepoint = scm_get_byte_or_eof_unlocked (port);
1807
1808 if (*codepoint == EOF)
1809 *len = 0;
1810 else
1811 {
1812 *len = 1;
1813 buf[0] = *codepoint;
1814 }
1815 return 0;
1816}
1817
19b8d12b
AW
1818/* Likewise, read a byte sequence from PORT, passing it through its
1819 input conversion descriptor. */
1820static int
1821get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
1822 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1823{
6c98257f 1824 scm_t_iconv_descriptors *id;
19b8d12b 1825 scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
f6f4feb0 1826 size_t input_size = 0;
19b8d12b 1827
f6f4feb0 1828 id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ);
19b8d12b 1829
f6f4feb0 1830 for (;;)
19b8d12b 1831 {
f6f4feb0
MW
1832 int byte_read;
1833 char *input, *output;
19b8d12b
AW
1834 size_t input_left, output_left, done;
1835
0d959103 1836 byte_read = scm_get_byte_or_eof_unlocked (port);
f6f4feb0 1837 if (SCM_UNLIKELY (byte_read == EOF))
19b8d12b 1838 {
f6f4feb0
MW
1839 if (SCM_LIKELY (input_size == 0))
1840 {
1841 *codepoint = (scm_t_wchar) EOF;
1842 *len = input_size;
1843 return 0;
1844 }
1845 else
1846 {
1847 /* EOF found in the middle of a multibyte character. */
1848 scm_i_set_pending_eof (port);
1849 return EILSEQ;
1850 }
19b8d12b
AW
1851 }
1852
f6f4feb0 1853 buf[input_size++] = byte_read;
19b8d12b
AW
1854
1855 input = buf;
f6f4feb0
MW
1856 input_left = input_size;
1857 output = (char *) utf8_buf;
19b8d12b
AW
1858 output_left = sizeof (utf8_buf);
1859
6c98257f 1860 done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
f6f4feb0 1861
19b8d12b
AW
1862 if (done == (size_t) -1)
1863 {
f6f4feb0
MW
1864 int err = errno;
1865 if (SCM_LIKELY (err == EINVAL))
1866 /* The input byte sequence did not form a complete
1867 character. Read another byte and try again. */
1868 continue;
1869 else
1870 return err;
19b8d12b
AW
1871 }
1872 else
f6f4feb0
MW
1873 {
1874 size_t output_size = sizeof (utf8_buf) - output_left;
1875 if (SCM_LIKELY (output_size > 0))
1876 {
1877 /* iconv generated output. Convert the UTF8_BUF sequence
1878 to a Unicode code point. */
1879 *codepoint = utf8_to_codepoint (utf8_buf, output_size);
1880 *len = input_size;
1881 return 0;
1882 }
1883 else
1884 {
1885 /* iconv consumed some bytes without producing any output.
1886 Most likely this means that a Unicode byte-order mark
1887 (BOM) was consumed, which should not be included in the
1888 returned buf. Shift any remaining bytes to the beginning
1889 of buf, and continue the loop. */
1890 memmove (buf, input, input_left);
1891 input_size = input_left;
1892 continue;
1893 }
1894 }
19b8d12b 1895 }
19b8d12b
AW
1896}
1897
1898/* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
1899 with the byte representation of the codepoint in PORT's encoding, and
1900 set *LEN to the length in bytes of that representation. Return 0 on
1901 success and an errno value on error. */
24ea9f9c 1902static SCM_C_INLINE int
19b8d12b
AW
1903get_codepoint (SCM port, scm_t_wchar *codepoint,
1904 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1905{
1906 int err;
1907 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f6f4feb0 1908 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
19b8d12b 1909
f6f4feb0 1910 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
19b8d12b 1911 err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
f6f4feb0 1912 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
79eb47ea 1913 err = get_latin1_codepoint (port, codepoint, buf, len);
19b8d12b
AW
1914 else
1915 err = get_iconv_codepoint (port, codepoint, buf, len);
1916
1917 if (SCM_LIKELY (err == 0))
f6f4feb0
MW
1918 {
1919 if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
1920 {
1921 /* Record that we're no longer at stream start. */
1922 pti->at_stream_start_for_bom_read = 0;
1923 if (pt->rw_random)
1924 pti->at_stream_start_for_bom_write = 0;
1925
1926 /* If we just read a BOM in an encoding that recognizes them,
1927 then silently consume it and read another code point. */
1928 if (SCM_UNLIKELY
1929 (*codepoint == SCM_UNICODE_BOM
1930 && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
1931 || strcmp (pt->encoding, "UTF-16") == 0
1932 || strcmp (pt->encoding, "UTF-32") == 0)))
1933 return get_codepoint (port, codepoint, buf, len);
1934 }
1935 update_port_lf (*codepoint, port);
1936 }
19b8d12b
AW
1937 else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
1938 {
1939 *codepoint = '?';
1940 err = 0;
1941 update_port_lf (*codepoint, port);
1942 }
1943
1944 return err;
1945}
1946
1947/* Read a codepoint from PORT and return it. */
1948scm_t_wchar
be632904 1949scm_getc_unlocked (SCM port)
19b8d12b
AW
1950#define FUNC_NAME "scm_getc"
1951{
1952 int err;
1953 size_t len;
1954 scm_t_wchar codepoint;
1955 char buf[SCM_MBCHAR_BUF_SIZE];
1956
1957 err = get_codepoint (port, &codepoint, buf, &len);
1958 if (SCM_UNLIKELY (err != 0))
1959 /* At this point PORT should point past the invalid encoding, as per
1960 R6RS-lib Section 8.2.4. */
1961 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
1962
1963 return codepoint;
1964}
1965#undef FUNC_NAME
1966
be632904
AW
1967scm_t_wchar
1968scm_getc (SCM port)
1969{
92c0ebac 1970 scm_i_pthread_mutex_t *lock;
be632904
AW
1971 scm_t_wchar ret;
1972
92c0ebac 1973 scm_c_lock_port (port, &lock);
be632904 1974 ret = scm_getc_unlocked (port);
92c0ebac
AW
1975 if (lock)
1976 scm_i_pthread_mutex_unlock (lock);
1977
be632904
AW
1978
1979 return ret;
1980}
1981
19b8d12b
AW
1982SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
1983 (SCM port),
1984 "Return the next character available from @var{port}, updating\n"
1985 "@var{port} to point to the following character. If no more\n"
1986 "characters are available, the end-of-file object is returned.\n"
1987 "\n"
1988 "When @var{port}'s data cannot be decoded according to its\n"
1989 "character encoding, a @code{decoding-error} is raised and\n"
1990 "@var{port} points past the erroneous byte sequence.\n")
1991#define FUNC_NAME s_scm_read_char
1992{
1993 scm_t_wchar c;
1994 if (SCM_UNBNDP (port))
1995 port = scm_current_input_port ();
1996 SCM_VALIDATE_OPINPORT (1, port);
be632904 1997 c = scm_getc_unlocked (port);
19b8d12b
AW
1998 if (EOF == c)
1999 return SCM_EOF_VAL;
2000 return SCM_MAKE_CHAR (c);
2001}
2002#undef FUNC_NAME
2003
2004
2005\f
2006
2007/* Pushback. */
f6f4feb0 2008\f
19b8d12b 2009
f6f4feb0
MW
2010
2011static void
2012scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
2013#define FUNC_NAME "scm_unget_bytes"
19b8d12b
AW
2014{
2015 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f6f4feb0 2016 size_t old_len, new_len;
19b8d12b 2017
f6f4feb0 2018 scm_i_clear_pending_eof (port);
19b8d12b 2019
f6f4feb0 2020 if (pt->read_buf != pt->putback_buf)
19b8d12b
AW
2021 /* switch to the put-back buffer. */
2022 {
2023 if (pt->putback_buf == NULL)
2024 {
f6f4feb0
MW
2025 pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
2026 ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
19b8d12b
AW
2027 pt->putback_buf
2028 = (unsigned char *) scm_gc_malloc_pointerless
f6f4feb0 2029 (pt->putback_buf_size, "putback buffer");
19b8d12b
AW
2030 }
2031
2032 pt->saved_read_buf = pt->read_buf;
2033 pt->saved_read_pos = pt->read_pos;
2034 pt->saved_read_end = pt->read_end;
2035 pt->saved_read_buf_size = pt->read_buf_size;
2036
f6f4feb0
MW
2037 /* Put read_pos at the end of the buffer, so that ungets will not
2038 have to shift the buffer contents each time. */
2039 pt->read_buf = pt->putback_buf;
2040 pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
19b8d12b
AW
2041 pt->read_buf_size = pt->putback_buf_size;
2042 }
2043
f6f4feb0
MW
2044 old_len = pt->read_end - pt->read_pos;
2045 new_len = old_len + len;
2046
2047 if (new_len > pt->read_buf_size)
2048 /* The putback buffer needs to be enlarged. */
2049 {
2050 size_t new_buf_size;
2051 unsigned char *new_buf, *new_end, *new_pos;
2052
2053 new_buf_size = pt->read_buf_size * 2;
2054 if (new_buf_size < new_len)
2055 new_buf_size = new_len;
2056
2057 new_buf = (unsigned char *)
2058 scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
2059
2060 /* Put the bytes at the end of the buffer, so that future
2061 ungets won't need to shift the buffer. */
2062 new_end = new_buf + new_buf_size;
2063 new_pos = new_end - old_len;
2064 memcpy (new_pos, pt->read_pos, old_len);
2065
2066 pt->read_buf = pt->putback_buf = new_buf;
2067 pt->read_pos = new_pos;
2068 pt->read_end = new_end;
2069 pt->read_buf_size = pt->putback_buf_size = new_buf_size;
2070 }
2071 else if (pt->read_buf + len < pt->read_pos)
2072 /* If needed, shift the existing buffer contents up.
2073 This should not happen unless some external code
2074 manipulates the putback buffer pointers. */
2075 {
2076 unsigned char *new_end = pt->read_buf + pt->read_buf_size;
2077 unsigned char *new_pos = new_end - old_len;
2078
2079 memmove (new_pos, pt->read_pos, old_len);
2080 pt->read_pos = new_pos;
2081 pt->read_end = new_end;
2082 }
2083
2084 /* Move read_pos back and copy the bytes there. */
2085 pt->read_pos -= len;
2086 memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
2087
2088 if (pt->rw_active == SCM_PORT_WRITE)
2089 scm_flush (port);
19b8d12b
AW
2090
2091 if (pt->rw_random)
2092 pt->rw_active = SCM_PORT_READ;
2093}
2094#undef FUNC_NAME
2095
f6f4feb0
MW
2096void
2097scm_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
7f6c3f8f 2098{
f6f4feb0 2099 scm_i_unget_bytes_unlocked (buf, len, port);
7f6c3f8f
MW
2100}
2101
2102void
f6f4feb0
MW
2103scm_unget_byte_unlocked (int c, SCM port)
2104{
2105 unsigned char byte = c;
2106 scm_i_unget_bytes_unlocked (&byte, 1, port);
2107}
2108
2109void
2110scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
7f6c3f8f 2111{
f6f4feb0
MW
2112 scm_i_pthread_mutex_t *lock;
2113 scm_c_lock_port (port, &lock);
2114 scm_i_unget_bytes_unlocked (buf, len, port);
2115 if (lock)
2116 scm_i_pthread_mutex_unlock (lock);
2117}
7f6c3f8f 2118
c932ce0b
AW
2119void
2120scm_unget_byte (int c, SCM port)
2121{
f6f4feb0 2122 unsigned char byte = c;
92c0ebac
AW
2123 scm_i_pthread_mutex_t *lock;
2124 scm_c_lock_port (port, &lock);
f6f4feb0 2125 scm_i_unget_bytes_unlocked (&byte, 1, port);
92c0ebac
AW
2126 if (lock)
2127 scm_i_pthread_mutex_unlock (lock);
c932ce0b
AW
2128}
2129
19b8d12b 2130void
c932ce0b 2131scm_ungetc_unlocked (scm_t_wchar c, SCM port)
19b8d12b
AW
2132#define FUNC_NAME "scm_ungetc"
2133{
2134 scm_t_port *pt = SCM_PTAB_ENTRY (port);
be7ecef0 2135 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
19b8d12b
AW
2136 char *result;
2137 char result_buf[10];
19b8d12b 2138 size_t len;
19b8d12b 2139
19b8d12b 2140 len = sizeof (result_buf);
be7ecef0
AW
2141
2142 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
2143 {
2144 if (c < 0xf0)
2145 {
2146 result_buf[0] = (char) c;
2147 result = result_buf;
2148 len = 1;
2149 }
2150 else
2151 result =
2152 (char *) u32_to_u8 ((uint32_t *) &c, 1, (uint8_t *) result_buf, &len);
2153 }
2154 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1 && c <= 0xff)
2155 {
2156 result_buf[0] = (char) c;
2157 result = result_buf;
2158 len = 1;
2159 }
2160 else
2161 result = u32_conv_to_encoding (pt->encoding,
2162 (enum iconv_ilseq_handler) pt->ilseq_handler,
2163 (uint32_t *) &c, 1, NULL,
2164 result_buf, &len);
19b8d12b
AW
2165
2166 if (SCM_UNLIKELY (result == NULL || len == 0))
2167 scm_encoding_error (FUNC_NAME, errno,
2168 "conversion to port encoding failed",
2169 SCM_BOOL_F, SCM_MAKE_CHAR (c));
2170
f6f4feb0 2171 scm_i_unget_bytes_unlocked ((unsigned char *) result, len, port);
19b8d12b
AW
2172
2173 if (SCM_UNLIKELY (result != result_buf))
2174 free (result);
2175
2176 if (c == '\n')
2177 {
2178 /* What should col be in this case?
2179 * We'll leave it at -1.
2180 */
2181 SCM_LINUM (port) -= 1;
2182 }
2183 else
2184 SCM_COL(port) -= 1;
2185}
2186#undef FUNC_NAME
2187
c932ce0b
AW
2188void
2189scm_ungetc (scm_t_wchar c, SCM port)
2190{
92c0ebac
AW
2191 scm_i_pthread_mutex_t *lock;
2192 scm_c_lock_port (port, &lock);
c932ce0b 2193 scm_ungetc_unlocked (c, port);
92c0ebac
AW
2194 if (lock)
2195 scm_i_pthread_mutex_unlock (lock);
2196
c932ce0b 2197}
19b8d12b
AW
2198
2199void
c932ce0b 2200scm_ungets_unlocked (const char *s, int n, SCM port)
19b8d12b
AW
2201{
2202 /* This is simple minded and inefficient, but unreading strings is
2203 * probably not a common operation, and remember that line and
2204 * column numbers have to be handled...
2205 *
2206 * Please feel free to write an optimized version!
2207 */
2208 while (n--)
c932ce0b 2209 scm_ungetc_unlocked (s[n], port);
19b8d12b
AW
2210}
2211
c932ce0b
AW
2212void
2213scm_ungets (const char *s, int n, SCM port)
2214{
92c0ebac
AW
2215 scm_i_pthread_mutex_t *lock;
2216 scm_c_lock_port (port, &lock);
c932ce0b 2217 scm_ungets_unlocked (s, n, port);
92c0ebac
AW
2218 if (lock)
2219 scm_i_pthread_mutex_unlock (lock);
2220
c932ce0b 2221}
19b8d12b
AW
2222
2223SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
1bbd0b84 2224 (SCM port),
1e6808ea
MG
2225 "Return the next character available from @var{port},\n"
2226 "@emph{without} updating @var{port} to point to the following\n"
2227 "character. If no more characters are available, the\n"
c2dfff19
KR
2228 "end-of-file object is returned.\n"
2229 "\n"
2230 "The value returned by\n"
1e6808ea
MG
2231 "a call to @code{peek-char} is the same as the value that would\n"
2232 "have been returned by a call to @code{read-char} on the same\n"
2233 "port. The only difference is that the very next call to\n"
2234 "@code{read-char} or @code{peek-char} on that @var{port} will\n"
2235 "return the value returned by the preceding call to\n"
2236 "@code{peek-char}. In particular, a call to @code{peek-char} on\n"
2237 "an interactive port will hang waiting for input whenever a call\n"
c62da8f8
LC
2238 "to @code{read-char} would have hung.\n"
2239 "\n"
2240 "As for @code{read-char}, a @code{decoding-error} may be raised\n"
2241 "if such a situation occurs. However, unlike with @code{read-char},\n"
2242 "@var{port} still points at the beginning of the erroneous byte\n"
2243 "sequence when the error is raised.\n")
1bbd0b84 2244#define FUNC_NAME s_scm_peek_char
ee149d03 2245{
c62da8f8 2246 int err;
fd5eec2b
LC
2247 SCM result;
2248 scm_t_wchar c;
2249 char bytes[SCM_MBCHAR_BUF_SIZE];
7f6c3f8f 2250 long column, line;
fe8935d4 2251 size_t len = 0;
fd5eec2b 2252
ee149d03 2253 if (SCM_UNBNDP (port))
9de87eea 2254 port = scm_current_input_port ();
b2456dd4 2255 SCM_VALIDATE_OPINPORT (1, port);
fd5eec2b
LC
2256
2257 column = SCM_COL (port);
2258 line = SCM_LINUM (port);
2259
c62da8f8 2260 err = get_codepoint (port, &c, bytes, &len);
fd5eec2b 2261
f6f4feb0 2262 scm_i_unget_bytes_unlocked ((unsigned char *) bytes, len, port);
fd5eec2b 2263
c62da8f8
LC
2264 SCM_COL (port) = column;
2265 SCM_LINUM (port) = line;
fd5eec2b 2266
c62da8f8
LC
2267 if (SCM_UNLIKELY (err != 0))
2268 {
2269 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
2270
2271 /* Shouldn't happen since `catch' always aborts to prompt. */
2272 result = SCM_BOOL_F;
fd5eec2b 2273 }
c62da8f8 2274 else if (c == EOF)
69bc9ff3 2275 {
f6f4feb0
MW
2276 scm_i_set_pending_eof (port);
2277 result = SCM_EOF_VAL;
69bc9ff3 2278 }
c62da8f8
LC
2279 else
2280 result = SCM_MAKE_CHAR (c);
fd5eec2b
LC
2281
2282 return result;
3cb988bd 2283}
1bbd0b84 2284#undef FUNC_NAME
3cb988bd 2285
1be4270a 2286SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
1bbd0b84 2287 (SCM cobj, SCM port),
b7e64f8b
BT
2288 "Place character @var{cobj} in @var{port} so that it will be\n"
2289 "read by the next read operation. If called multiple times, the\n"
2290 "unread characters will be read again in last-in first-out\n"
2291 "order. If @var{port} is not supplied, the current input port\n"
2292 "is used.")
1bbd0b84 2293#define FUNC_NAME s_scm_unread_char
0f2d19dd
JB
2294{
2295 int c;
2296
34d19ef6 2297 SCM_VALIDATE_CHAR (1, cobj);
0f2d19dd 2298 if (SCM_UNBNDP (port))
9de87eea 2299 port = scm_current_input_port ();
b2456dd4 2300 SCM_VALIDATE_OPINPORT (2, port);
0f2d19dd 2301
7866a09b 2302 c = SCM_CHAR (cobj);
0f2d19dd 2303
c932ce0b 2304 scm_ungetc_unlocked (c, port);
0f2d19dd
JB
2305 return cobj;
2306}
1bbd0b84 2307#undef FUNC_NAME
0f2d19dd 2308
a1ec6916 2309SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
1bbd0b84 2310 (SCM str, SCM port),
b380b885
MD
2311 "Place the string @var{str} in @var{port} so that its characters will be\n"
2312 "read in subsequent read operations. If called multiple times, the\n"
2313 "unread characters will be read again in last-in first-out order. If\n"
2314 "@var{port} is not supplied, the current-input-port is used.")
1bbd0b84 2315#define FUNC_NAME s_scm_unread_string
ee1e7e13 2316{
889975e5 2317 int n;
34d19ef6 2318 SCM_VALIDATE_STRING (1, str);
ee1e7e13 2319 if (SCM_UNBNDP (port))
9de87eea 2320 port = scm_current_input_port ();
b2456dd4 2321 SCM_VALIDATE_OPINPORT (2, port);
ee1e7e13 2322
889975e5
MG
2323 n = scm_i_string_length (str);
2324
2325 while (n--)
c932ce0b 2326 scm_ungetc_unlocked (scm_i_string_ref (str, n), port);
ee1e7e13
MD
2327
2328 return str;
2329}
1bbd0b84 2330#undef FUNC_NAME
ee1e7e13 2331
840ae05d 2332
19b8d12b 2333\f
23f2b9a3 2334
19b8d12b 2335/* Manipulating the buffers. */
840ae05d 2336
4251ae2e
AW
2337/* This routine does not take any locks, as it is usually called as part
2338 of a port implementation. */
19b8d12b
AW
2339void
2340scm_port_non_buffer (scm_t_port *pt)
2341{
2342 pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf;
2343 pt->write_buf = pt->write_pos = &pt->shortbuf;
2344 pt->read_buf_size = pt->write_buf_size = 1;
2345 pt->write_end = pt->write_buf + pt->write_buf_size;
840ae05d 2346}
8ab3d8a0 2347
19b8d12b
AW
2348/* this should only be called when the read buffer is empty. it
2349 tries to refill the read buffer. it returns the first char from
2350 the port, which is either EOF or *(pt->read_pos). */
f6f4feb0
MW
2351static int
2352scm_i_fill_input_unlocked (SCM port)
82893676 2353{
19b8d12b 2354 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f6f4feb0 2355 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
8ab3d8a0 2356
19b8d12b 2357 assert (pt->read_pos == pt->read_end);
8ab3d8a0 2358
f6f4feb0
MW
2359 if (pti->pending_eof)
2360 {
2361 pti->pending_eof = 0;
2362 return EOF;
2363 }
2364
19b8d12b 2365 if (pt->read_buf == pt->putback_buf)
82893676 2366 {
19b8d12b
AW
2367 /* finished reading put-back chars. */
2368 pt->read_buf = pt->saved_read_buf;
2369 pt->read_pos = pt->saved_read_pos;
2370 pt->read_end = pt->saved_read_end;
2371 pt->read_buf_size = pt->saved_read_buf_size;
2372 if (pt->read_pos < pt->read_end)
2373 return *(pt->read_pos);
82893676 2374 }
19b8d12b 2375 return SCM_PORT_DESCRIPTOR (port)->fill_input (port);
82893676 2376}
82893676 2377
4251ae2e
AW
2378int
2379scm_fill_input (SCM port)
2380{
92c0ebac 2381 scm_i_pthread_mutex_t *lock;
4251ae2e
AW
2382 int ret;
2383
92c0ebac 2384 scm_c_lock_port (port, &lock);
4251ae2e 2385 ret = scm_fill_input_unlocked (port);
92c0ebac
AW
2386 if (lock)
2387 scm_i_pthread_mutex_unlock (lock);
2388
4251ae2e
AW
2389
2390 return ret;
2391}
2392
f6f4feb0
MW
2393/* Slow-path fallback for 'scm_get_byte_or_eof_unlocked' */
2394int
2395scm_slow_get_byte_or_eof_unlocked (SCM port)
d14af9f2 2396{
f6f4feb0
MW
2397 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2398
2399 if (pt->rw_active == SCM_PORT_WRITE)
2400 scm_flush_unlocked (port);
2401
2402 if (pt->rw_random)
2403 pt->rw_active = SCM_PORT_READ;
2404
2405 if (pt->read_pos >= pt->read_end)
2406 {
2407 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2408 return EOF;
2409 }
2410
2411 return *pt->read_pos++;
d14af9f2
MD
2412}
2413
f6f4feb0
MW
2414/* Slow-path fallback for 'scm_peek_byte_or_eof_unlocked' */
2415int
2416scm_slow_peek_byte_or_eof_unlocked (SCM port)
2417{
2418 scm_t_port *pt = SCM_PTAB_ENTRY (port);
d6a6989e 2419
f6f4feb0
MW
2420 if (pt->rw_active == SCM_PORT_WRITE)
2421 scm_flush_unlocked (port);
889975e5 2422
f6f4feb0
MW
2423 if (pt->rw_random)
2424 pt->rw_active = SCM_PORT_READ;
2425
2426 if (pt->read_pos >= pt->read_end)
2427 {
2428 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2429 {
2430 scm_i_set_pending_eof (port);
2431 return EOF;
2432 }
2433 }
2434
2435 return *pt->read_pos;
2436}
2437
a3ded465
AW
2438/* Move up to READ_LEN bytes from PORT's putback and/or read buffers
2439 into memory starting at DEST. Return the number of bytes moved.
2440 PORT's line/column numbers are left unchanged. */
19b8d12b
AW
2441size_t
2442scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
840ae05d 2443{
19b8d12b 2444 scm_t_port *pt = SCM_PTAB_ENTRY (port);
a3ded465 2445 size_t bytes_read = 0;
19b8d12b 2446 size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
840ae05d 2447
19b8d12b 2448 if (from_buf > 0)
840ae05d 2449 {
19b8d12b
AW
2450 memcpy (dest, pt->read_pos, from_buf);
2451 pt->read_pos += from_buf;
a3ded465 2452 bytes_read += from_buf;
19b8d12b
AW
2453 read_len -= from_buf;
2454 dest += from_buf;
840ae05d 2455 }
3fe6190f 2456
19b8d12b
AW
2457 /* if putback was active, try the real input buffer too. */
2458 if (pt->read_buf == pt->putback_buf)
69bc9ff3 2459 {
19b8d12b
AW
2460 from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len);
2461 if (from_buf > 0)
2462 {
2463 memcpy (dest, pt->saved_read_pos, from_buf);
2464 pt->saved_read_pos += from_buf;
a3ded465 2465 bytes_read += from_buf;
19b8d12b 2466 }
69bc9ff3 2467 }
8ab3d8a0 2468
a3ded465 2469 return bytes_read;
840ae05d
JB
2470}
2471
19b8d12b
AW
2472/* Clear a port's read buffers, returning the contents. */
2473SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
1bbd0b84 2474 (SCM port),
19b8d12b
AW
2475 "This procedure clears a port's input buffers, similar\n"
2476 "to the way that force-output clears the output buffer. The\n"
2477 "contents of the buffers are returned as a single string, e.g.,\n"
a150979d 2478 "\n"
19b8d12b
AW
2479 "@lisp\n"
2480 "(define p (open-input-file ...))\n"
2481 "(drain-input p) => empty string, nothing buffered yet.\n"
2482 "(unread-char (read-char p) p)\n"
2483 "(drain-input p) => initial chars from p, up to the buffer size.\n"
2484 "@end lisp\n\n"
2485 "Draining the buffers may be useful for cleanly finishing\n"
2486 "buffered I/O so that the file descriptor can be used directly\n"
2487 "for further input.")
2488#define FUNC_NAME s_scm_drain_input
0f2d19dd 2489{
19b8d12b
AW
2490 SCM result;
2491 char *data;
2492 scm_t_port *pt;
2493 long count;
0f2d19dd 2494
19b8d12b
AW
2495 SCM_VALIDATE_OPINPORT (1, port);
2496 pt = SCM_PTAB_ENTRY (port);
2497
2498 count = pt->read_end - pt->read_pos;
2499 if (pt->read_buf == pt->putback_buf)
2500 count += pt->saved_read_end - pt->saved_read_pos;
2501
2502 if (count)
2503 {
2504 result = scm_i_make_string (count, &data, 0);
2505 scm_take_from_input_buffers (port, data, count);
2506 }
2507 else
2508 result = scm_nullstr;
2509
2510 return result;
d043d8c2 2511}
1bbd0b84 2512#undef FUNC_NAME
d043d8c2 2513
19b8d12b 2514void
4251ae2e 2515scm_end_input_unlocked (SCM port)
0f2d19dd 2516{
19b8d12b
AW
2517 long offset;
2518 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2519
f6f4feb0 2520 scm_i_clear_pending_eof (port);
19b8d12b
AW
2521 if (pt->read_buf == pt->putback_buf)
2522 {
2523 offset = pt->read_end - pt->read_pos;
2524 pt->read_buf = pt->saved_read_buf;
2525 pt->read_pos = pt->saved_read_pos;
2526 pt->read_end = pt->saved_read_end;
2527 pt->read_buf_size = pt->saved_read_buf_size;
2528 }
2529 else
2530 offset = 0;
2531
2532 SCM_PORT_DESCRIPTOR (port)->end_input (port, offset);
0f2d19dd
JB
2533}
2534
4251ae2e
AW
2535void
2536scm_end_input (SCM port)
2537{
92c0ebac
AW
2538 scm_i_pthread_mutex_t *lock;
2539 scm_c_lock_port (port, &lock);
4251ae2e 2540 scm_end_input_unlocked (port);
92c0ebac
AW
2541 if (lock)
2542 scm_i_pthread_mutex_unlock (lock);
2543
4251ae2e
AW
2544}
2545
19b8d12b
AW
2546SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
2547 (SCM port),
2548 "Flush the specified output port, or the current output port if @var{port}\n"
2549 "is omitted. The current output buffer contents are passed to the\n"
2550 "underlying port implementation (e.g., in the case of fports, the\n"
2551 "data will be written to the file and the output buffer will be cleared.)\n"
2552 "It has no effect on an unbuffered port.\n\n"
2553 "The return value is unspecified.")
2554#define FUNC_NAME s_scm_force_output
d043d8c2 2555{
19b8d12b
AW
2556 if (SCM_UNBNDP (port))
2557 port = scm_current_output_port ();
2558 else
2559 {
2560 port = SCM_COERCE_OUTPORT (port);
2561 SCM_VALIDATE_OPOUTPORT (1, port);
2562 }
4251ae2e 2563 scm_flush_unlocked (port);
564478fd 2564 return SCM_UNSPECIFIED;
d043d8c2 2565}
1bbd0b84 2566#undef FUNC_NAME
d043d8c2 2567
19b8d12b 2568void
4251ae2e 2569scm_flush_unlocked (SCM port)
0f2d19dd 2570{
19b8d12b 2571 SCM_PORT_DESCRIPTOR (port)->flush (port);
0f2d19dd
JB
2572}
2573
4251ae2e
AW
2574void
2575scm_flush (SCM port)
2576{
92c0ebac
AW
2577 scm_i_pthread_mutex_t *lock;
2578 scm_c_lock_port (port, &lock);
4251ae2e 2579 scm_flush_unlocked (port);
92c0ebac
AW
2580 if (lock)
2581 scm_i_pthread_mutex_unlock (lock);
2582
4251ae2e
AW
2583}
2584
f6f4feb0
MW
2585int
2586scm_fill_input_unlocked (SCM port)
337edc59 2587{
f6f4feb0
MW
2588 return scm_i_fill_input_unlocked (port);
2589}
337edc59 2590
d14af9f2 2591
19b8d12b 2592\f
d6a6989e 2593
19b8d12b 2594/* Output. */
889975e5 2595
0607ebbf
AW
2596void
2597scm_putc (char c, SCM port)
2598{
92c0ebac
AW
2599 scm_i_pthread_mutex_t *lock;
2600 scm_c_lock_port (port, &lock);
0607ebbf 2601 scm_putc_unlocked (c, port);
92c0ebac
AW
2602 if (lock)
2603 scm_i_pthread_mutex_unlock (lock);
2604
0607ebbf
AW
2605}
2606
2607void
2608scm_puts (const char *s, SCM port)
2609{
92c0ebac
AW
2610 scm_i_pthread_mutex_t *lock;
2611 scm_c_lock_port (port, &lock);
0607ebbf 2612 scm_puts_unlocked (s, port);
92c0ebac
AW
2613 if (lock)
2614 scm_i_pthread_mutex_unlock (lock);
2615
0607ebbf
AW
2616}
2617
19b8d12b
AW
2618/* scm_c_write
2619 *
2620 * Used by an application to write arbitrary number of bytes to an SCM
2621 * port. Similar semantics as libc write. However, unlike libc
2622 * write, scm_c_write writes the requested number of bytes and has no
2623 * return value.
2624 *
2625 * Warning: Doesn't update port line and column counts!
2626 */
9d9c66ba 2627void
f209aeee 2628scm_c_write_unlocked (SCM port, const void *ptr, size_t size)
19b8d12b 2629#define FUNC_NAME "scm_c_write"
9d9c66ba 2630{
19b8d12b
AW
2631 scm_t_port *pt;
2632 scm_t_ptob_descriptor *ptob;
9d9c66ba 2633
19b8d12b 2634 SCM_VALIDATE_OPOUTPORT (1, port);
9d9c66ba 2635
19b8d12b
AW
2636 pt = SCM_PTAB_ENTRY (port);
2637 ptob = SCM_PORT_DESCRIPTOR (port);
9d9c66ba 2638
19b8d12b 2639 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 2640 scm_end_input_unlocked (port);
19b8d12b
AW
2641
2642 ptob->write (port, ptr, size);
2643
2644 if (pt->rw_random)
2645 pt->rw_active = SCM_PORT_WRITE;
889975e5 2646}
19b8d12b 2647#undef FUNC_NAME
889975e5 2648
f209aeee
AW
2649void
2650scm_c_write (SCM port, const void *ptr, size_t size)
2651{
92c0ebac
AW
2652 scm_i_pthread_mutex_t *lock;
2653 scm_c_lock_port (port, &lock);
f209aeee 2654 scm_c_write_unlocked (port, ptr, size);
92c0ebac
AW
2655 if (lock)
2656 scm_i_pthread_mutex_unlock (lock);
2657
f209aeee
AW
2658}
2659
19b8d12b
AW
2660/* scm_lfwrite
2661 *
2662 * This function differs from scm_c_write; it updates port line and
2663 * column. */
889975e5 2664void
f209aeee 2665scm_lfwrite_unlocked (const char *ptr, size_t size, SCM port)
889975e5 2666{
19b8d12b
AW
2667 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2668 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
f4bc4e59 2669
19b8d12b 2670 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 2671 scm_end_input_unlocked (port);
f4bc4e59 2672
19b8d12b 2673 ptob->write (port, ptr, size);
f4bc4e59 2674
19b8d12b
AW
2675 for (; size; ptr++, size--)
2676 update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
d9544bf0 2677
19b8d12b
AW
2678 if (pt->rw_random)
2679 pt->rw_active = SCM_PORT_WRITE;
2680}
f4bc4e59 2681
f209aeee
AW
2682void
2683scm_lfwrite (const char *ptr, size_t size, SCM port)
2684{
92c0ebac
AW
2685 scm_i_pthread_mutex_t *lock;
2686 scm_c_lock_port (port, &lock);
f209aeee 2687 scm_lfwrite_unlocked (ptr, size, port);
92c0ebac
AW
2688 if (lock)
2689 scm_i_pthread_mutex_unlock (lock);
2690
f209aeee
AW
2691}
2692
19b8d12b
AW
2693/* Write STR to PORT from START inclusive to END exclusive. */
2694void
2695scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
2696{
2697 scm_t_port *pt = SCM_PTAB_ENTRY (port);
f4bc4e59 2698
19b8d12b 2699 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 2700 scm_end_input_unlocked (port);
f4bc4e59 2701
19b8d12b
AW
2702 if (end == (size_t) -1)
2703 end = scm_i_string_length (str);
f4bc4e59 2704
fa980bcc 2705 scm_i_display_substring (str, start, end, port);
f4bc4e59 2706
19b8d12b
AW
2707 if (pt->rw_random)
2708 pt->rw_active = SCM_PORT_WRITE;
889975e5
MG
2709}
2710
19b8d12b
AW
2711
2712\f
2713
2714/* Querying and setting positions, and character availability. */
2715
2716SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0,
889975e5 2717 (SCM port),
19b8d12b
AW
2718 "Return @code{#t} if a character is ready on input @var{port}\n"
2719 "and return @code{#f} otherwise. If @code{char-ready?} returns\n"
2720 "@code{#t} then the next @code{read-char} operation on\n"
2721 "@var{port} is guaranteed not to hang. If @var{port} is a file\n"
2722 "port at end of file then @code{char-ready?} returns @code{#t}.\n"
2723 "\n"
2724 "@code{char-ready?} exists to make it possible for a\n"
2725 "program to accept characters from interactive ports without\n"
2726 "getting stuck waiting for input. Any input editors associated\n"
2727 "with such ports must make sure that characters whose existence\n"
2728 "has been asserted by @code{char-ready?} cannot be rubbed out.\n"
2729 "If @code{char-ready?} were to return @code{#f} at end of file,\n"
2730 "a port at end of file would be indistinguishable from an\n"
2731 "interactive port that has no ready characters.")
2732#define FUNC_NAME s_scm_char_ready_p
889975e5
MG
2733{
2734 scm_t_port *pt;
889975e5 2735
19b8d12b
AW
2736 if (SCM_UNBNDP (port))
2737 port = scm_current_input_port ();
2738 /* It's possible to close the current input port, so validate even in
2739 this case. */
2740 SCM_VALIDATE_OPINPORT (1, port);
889975e5
MG
2741
2742 pt = SCM_PTAB_ENTRY (port);
19b8d12b
AW
2743
2744 /* if the current read buffer is filled, or the
2745 last pushed-back char has been read and the saved buffer is
2746 filled, result is true. */
2747 if (pt->read_pos < pt->read_end
2748 || (pt->read_buf == pt->putback_buf
2749 && pt->saved_read_pos < pt->saved_read_end))
2750 return SCM_BOOL_T;
889975e5 2751 else
19b8d12b
AW
2752 {
2753 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2754
2755 if (ptob->input_waiting)
2756 return scm_from_bool(ptob->input_waiting (port));
2757 else
2758 return SCM_BOOL_T;
2759 }
889975e5
MG
2760}
2761#undef FUNC_NAME
d6a6989e 2762
19b8d12b
AW
2763SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
2764 (SCM fd_port, SCM offset, SCM whence),
0858753e 2765 "Sets the current position of @var{fd_port} to the integer\n"
19b8d12b
AW
2766 "@var{offset}, which is interpreted according to the value of\n"
2767 "@var{whence}.\n"
2768 "\n"
2769 "One of the following variables should be supplied for\n"
2770 "@var{whence}:\n"
2771 "@defvar SEEK_SET\n"
2772 "Seek from the beginning of the file.\n"
2773 "@end defvar\n"
2774 "@defvar SEEK_CUR\n"
2775 "Seek from the current position.\n"
2776 "@end defvar\n"
2777 "@defvar SEEK_END\n"
2778 "Seek from the end of the file.\n"
2779 "@end defvar\n"
0858753e 2780 "If @var{fd_port} is a file descriptor, the underlying system\n"
19b8d12b
AW
2781 "call is @code{lseek}. @var{port} may be a string port.\n"
2782 "\n"
2783 "The value returned is the new position in the file. This means\n"
2784 "that the current position of a port can be obtained using:\n"
2785 "@lisp\n"
2786 "(seek port 0 SEEK_CUR)\n"
2787 "@end lisp")
2788#define FUNC_NAME s_scm_seek
889975e5 2789{
19b8d12b 2790 int how;
889975e5 2791
19b8d12b 2792 fd_port = SCM_COERCE_OUTPORT (fd_port);
889975e5 2793
19b8d12b
AW
2794 how = scm_to_int (whence);
2795 if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
2796 SCM_OUT_OF_RANGE (3, whence);
da288f50 2797
19b8d12b
AW
2798 if (SCM_OPPORTP (fd_port))
2799 {
f6f4feb0 2800 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port);
19b8d12b
AW
2801 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port);
2802 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2803 off_t_or_off64_t rv;
2804
2805 if (!ptob->seek)
2806 SCM_MISC_ERROR ("port is not seekable",
2807 scm_cons (fd_port, SCM_EOL));
2808 else
f6f4feb0 2809 rv = ptob->seek (fd_port, off, how);
889975e5 2810
f6f4feb0
MW
2811 /* Set stream-start flags according to new position. */
2812 pti->at_stream_start_for_bom_read = (rv == 0);
2813 pti->at_stream_start_for_bom_write = (rv == 0);
b22e94db 2814
f6f4feb0 2815 scm_i_clear_pending_eof (fd_port);
889975e5 2816
19b8d12b
AW
2817 return scm_from_off_t_or_off64_t (rv);
2818 }
2819 else /* file descriptor?. */
2820 {
2821 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2822 off_t_or_off64_t rv;
2823 rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
2824 if (rv == -1)
2825 SCM_SYSERROR;
2826 return scm_from_off_t_or_off64_t (rv);
2827 }
889975e5
MG
2828}
2829#undef FUNC_NAME
2830
19b8d12b
AW
2831#ifndef O_BINARY
2832#define O_BINARY 0
2833#endif
889975e5 2834
19b8d12b
AW
2835/* Mingw has ftruncate(), perhaps implemented above using chsize, but
2836 doesn't have the filename version truncate(), hence this code. */
2837#if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
2838static int
2839truncate (const char *file, off_t length)
889975e5 2840{
19b8d12b
AW
2841 int ret, fdes;
2842
2843 fdes = open (file, O_BINARY | O_WRONLY);
2844 if (fdes == -1)
2845 return -1;
2846
2847 ret = ftruncate (fdes, length);
2848 if (ret == -1)
889975e5 2849 {
19b8d12b
AW
2850 int save_errno = errno;
2851 close (fdes);
2852 errno = save_errno;
2853 return -1;
889975e5 2854 }
19b8d12b
AW
2855
2856 return close (fdes);
889975e5 2857}
19b8d12b 2858#endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
889975e5 2859
19b8d12b
AW
2860SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
2861 (SCM object, SCM length),
0858753e
AW
2862 "Truncate file @var{object} to @var{length} bytes. @var{object}\n"
2863 "can be a filename string, a port object, or an integer file\n"
2864 "descriptor.\n"
19b8d12b
AW
2865 "The return value is unspecified.\n"
2866 "\n"
2867 "For a port or file descriptor @var{length} can be omitted, in\n"
2868 "which case the file is truncated at the current position (per\n"
2869 "@code{ftell} above).\n"
2870 "\n"
2871 "On most systems a file can be extended by giving a length\n"
2872 "greater than the current size, but this is not mandatory in the\n"
2873 "POSIX standard.")
2874#define FUNC_NAME s_scm_truncate_file
889975e5 2875{
19b8d12b
AW
2876 int rv;
2877
2878 /* "object" can be a port, fdes or filename.
2879
2880 Negative "length" makes no sense, but it's left to truncate() or
2881 ftruncate() to give back an error for that (normally EINVAL).
2882 */
2883
2884 if (SCM_UNBNDP (length))
889975e5 2885 {
19b8d12b
AW
2886 /* must supply length if object is a filename. */
2887 if (scm_is_string (object))
2888 SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
2889
2890 length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
2891 }
2892
2893 object = SCM_COERCE_OUTPORT (object);
2894 if (scm_is_integer (object))
2895 {
2896 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2897 SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
2898 c_length));
2899 }
2900 else if (SCM_OPOUTPORTP (object))
2901 {
2902 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2903 scm_t_port *pt = SCM_PTAB_ENTRY (object);
2904 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object);
b22e94db 2905
19b8d12b
AW
2906 if (!ptob->truncate)
2907 SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
889975e5 2908
f6f4feb0 2909 scm_i_clear_pending_eof (object);
19b8d12b 2910 if (pt->rw_active == SCM_PORT_READ)
4251ae2e 2911 scm_end_input_unlocked (object);
19b8d12b
AW
2912 else if (pt->rw_active == SCM_PORT_WRITE)
2913 ptob->flush (object);
889975e5 2914
19b8d12b
AW
2915 ptob->truncate (object, c_length);
2916 rv = 0;
889975e5
MG
2917 }
2918 else
2919 {
19b8d12b
AW
2920 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2921 char *str = scm_to_locale_string (object);
2922 int eno;
2923 SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
2924 eno = errno;
2925 free (str);
2926 errno = eno;
889975e5 2927 }
19b8d12b
AW
2928 if (rv == -1)
2929 SCM_SYSERROR;
2930 return SCM_UNSPECIFIED;
889975e5 2931}
19b8d12b 2932#undef FUNC_NAME
889975e5 2933
19b8d12b
AW
2934SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0,
2935 (SCM port),
2936 "Return the current line number for @var{port}.\n"
889975e5 2937 "\n"
19b8d12b
AW
2938 "The first line of a file is 0. But you might want to add 1\n"
2939 "when printing line numbers, since starting from 1 is\n"
2940 "traditional in error messages, and likely to be more natural to\n"
2941 "non-programmers.")
2942#define FUNC_NAME s_scm_port_line
889975e5 2943{
19b8d12b
AW
2944 port = SCM_COERCE_OUTPORT (port);
2945 SCM_VALIDATE_OPENPORT (1, port);
2946 return scm_from_long (SCM_LINUM (port));
889975e5
MG
2947}
2948#undef FUNC_NAME
2949
19b8d12b
AW
2950SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0,
2951 (SCM port, SCM line),
2952 "Set the current line number for @var{port} to @var{line}. The\n"
2953 "first line of a file is 0.")
2954#define FUNC_NAME s_scm_set_port_line_x
889975e5 2955{
19b8d12b
AW
2956 port = SCM_COERCE_OUTPORT (port);
2957 SCM_VALIDATE_OPENPORT (1, port);
2958 SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line);
2959 return SCM_UNSPECIFIED;
2960}
2961#undef FUNC_NAME
889975e5 2962
19b8d12b
AW
2963SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0,
2964 (SCM port),
2965 "Return the current column number of @var{port}.\n"
2966 "If the number is\n"
2967 "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n"
2968 "- i.e. the first character of the first line is line 0, column 0.\n"
2969 "(However, when you display a file position, for example in an error\n"
2970 "message, we recommend you add 1 to get 1-origin integers. This is\n"
2971 "because lines and column numbers traditionally start with 1, and that is\n"
2972 "what non-programmers will find most natural.)")
2973#define FUNC_NAME s_scm_port_column
2974{
2975 port = SCM_COERCE_OUTPORT (port);
2976 SCM_VALIDATE_OPENPORT (1, port);
2977 return scm_from_int (SCM_COL (port));
2978}
2979#undef FUNC_NAME
889975e5 2980
19b8d12b
AW
2981SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0,
2982 (SCM port, SCM column),
2983 "Set the current column of @var{port}. Before reading the first\n"
2984 "character on a line the column should be 0.")
2985#define FUNC_NAME s_scm_set_port_column_x
2986{
2987 port = SCM_COERCE_OUTPORT (port);
2988 SCM_VALIDATE_OPENPORT (1, port);
2989 SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column);
2990 return SCM_UNSPECIFIED;
2991}
2992#undef FUNC_NAME
889975e5 2993
19b8d12b
AW
2994SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
2995 (SCM port),
2996 "Return the filename associated with @var{port}, or @code{#f}\n"
2997 "if no filename is associated with the port.")
2998#define FUNC_NAME s_scm_port_filename
2999{
3000 port = SCM_COERCE_OUTPORT (port);
3001 SCM_VALIDATE_OPENPORT (1, port);
3002 return SCM_FILENAME (port);
3003}
3004#undef FUNC_NAME
889975e5 3005
19b8d12b
AW
3006SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
3007 (SCM port, SCM filename),
3008 "Change the filename associated with @var{port}, using the current input\n"
3009 "port if none is specified. Note that this does not change the port's\n"
3010 "source of data, but only the value that is returned by\n"
3011 "@code{port-filename} and reported in diagnostic output.")
3012#define FUNC_NAME s_scm_set_port_filename_x
3013{
3014 port = SCM_COERCE_OUTPORT (port);
3015 SCM_VALIDATE_OPENPORT (1, port);
3016 /* We allow the user to set the filename to whatever he likes. */
3017 SCM_SET_FILENAME (port, filename);
889975e5
MG
3018 return SCM_UNSPECIFIED;
3019}
3020#undef FUNC_NAME
3021
3022
19b8d12b
AW
3023\f
3024
3025/* Implementation helpers for port printing functions. */
889975e5 3026
f12733c9
MD
3027void
3028scm_print_port_mode (SCM exp, SCM port)
3029{
0607ebbf 3030 scm_puts_unlocked (SCM_CLOSEDP (exp)
f12733c9 3031 ? "closed: "
f9a64404
DH
3032 : (SCM_RDNG & SCM_CELL_WORD_0 (exp)
3033 ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
f12733c9
MD
3034 ? "input-output: "
3035 : "input: ")
f9a64404 3036 : (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
f12733c9
MD
3037 ? "output: "
3038 : "bogus: ")),
3039 port);
3040}
1cc91f1b 3041
f12733c9 3042int
e81d98ec 3043scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED)
0f2d19dd 3044{
f12733c9
MD
3045 char *type = SCM_PTOBNAME (SCM_PTOBNUM (exp));
3046 if (!type)
3047 type = "port";
0607ebbf 3048 scm_puts_unlocked ("#<", port);
f12733c9 3049 scm_print_port_mode (exp, port);
0607ebbf
AW
3050 scm_puts_unlocked (type, port);
3051 scm_putc_unlocked (' ', port);
0345e278 3052 scm_uintprint (SCM_CELL_WORD_1 (exp), 16, port);
0607ebbf 3053 scm_putc_unlocked ('>', port);
f12733c9 3054 return 1;
0f2d19dd
JB
3055}
3056
19b8d12b
AW
3057
3058\f
3059
3060/* Iterating over all ports. */
3061
3062struct for_each_data
3063{
3064 void (*proc) (void *data, SCM p);
3065 void *data;
3066};
3067
3068static SCM
3069for_each_trampoline (void *data, SCM port, SCM result)
3070{
3071 struct for_each_data *d = data;
3072
3073 d->proc (d->data, port);
3074
3075 return result;
3076}
3077
3078void
3079scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
3080{
3081 struct for_each_data d;
3082
3083 d.proc = proc;
3084 d.data = data;
3085
3086 scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL,
3087 scm_i_port_weak_set);
3088}
3089
3090static void
3091scm_for_each_trampoline (void *data, SCM port)
3092{
3093 scm_call_1 (SCM_PACK_POINTER (data), port);
3094}
3095
3096SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
3097 (SCM proc),
3098 "Apply @var{proc} to each port in the Guile port table\n"
3099 "in turn. The return value is unspecified. More specifically,\n"
3100 "@var{proc} is applied exactly once to every port that exists\n"
0858753e
AW
3101 "in the system at the time @code{port-for-each} is invoked.\n"
3102 "Changes to the port table while @code{port-for-each} is running\n"
3103 "have no effect as far as @code{port-for-each} is concerned.")
19b8d12b
AW
3104#define FUNC_NAME s_scm_port_for_each
3105{
3106 SCM_VALIDATE_PROC (1, proc);
3107
3108 scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc));
3109
3110 return SCM_UNSPECIFIED;
3111}
3112#undef FUNC_NAME
3113
3114static void
3115flush_output_port (void *closure, SCM port)
3116{
3117 if (SCM_OPOUTPORTP (port))
4251ae2e 3118 scm_flush_unlocked (port);
19b8d12b
AW
3119}
3120
3121SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0,
3122 (),
3123 "Equivalent to calling @code{force-output} on\n"
3124 "all open output ports. The return value is unspecified.")
3125#define FUNC_NAME s_scm_flush_all_ports
3126{
3127 scm_c_port_for_each (&flush_output_port, NULL);
3128 return SCM_UNSPECIFIED;
3129}
3130#undef FUNC_NAME
3131
3132
0f2d19dd 3133\f
ee149d03 3134
d68fee48 3135/* Void ports. */
0f2d19dd 3136
92c2555f 3137scm_t_bits scm_tc16_void_port = 0;
0f2d19dd 3138
e81d98ec 3139static int fill_input_void_port (SCM port SCM_UNUSED)
283a1a0e 3140{
70df8af6 3141 return EOF;
283a1a0e
GH
3142}
3143
31703ab8 3144static void
e81d98ec
DH
3145write_void_port (SCM port SCM_UNUSED,
3146 const void *data SCM_UNUSED,
3147 size_t size SCM_UNUSED)
31703ab8
GH
3148{
3149}
3150
d617ee18
MV
3151static SCM
3152scm_i_void_port (long mode_bits)
0f2d19dd 3153{
2721f918
AW
3154 SCM ret;
3155
3156 ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0);
da220f27 3157
2721f918 3158 scm_port_non_buffer (SCM_PTAB_ENTRY (ret));
402788a9 3159
2721f918 3160 return ret;
0f2d19dd
JB
3161}
3162
d617ee18
MV
3163SCM
3164scm_void_port (char *mode_str)
3165{
3166 return scm_i_void_port (scm_mode_bits (mode_str));
3167}
3168
a1ec6916 3169SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0,
1bbd0b84 3170 (SCM mode),
70df8af6 3171 "Create and return a new void port. A void port acts like\n"
bb2c02f2 3172 "@file{/dev/null}. The @var{mode} argument\n"
70df8af6 3173 "specifies the input/output modes for this port: see the\n"
b380b885 3174 "documentation for @code{open-file} in @ref{File Ports}.")
1bbd0b84 3175#define FUNC_NAME s_scm_sys_make_void_port
0f2d19dd 3176{
d617ee18 3177 return scm_i_void_port (scm_i_mode_bits (mode));
0f2d19dd 3178}
1bbd0b84 3179#undef FUNC_NAME
0f2d19dd 3180
19b8d12b 3181
0f2d19dd 3182\f
19b8d12b 3183
89545eba 3184/* Initialization. */
1cc91f1b 3185
0f2d19dd
JB
3186void
3187scm_init_ports ()
0f2d19dd 3188{
840ae05d 3189 /* lseek() symbols. */
e11e83f3
MV
3190 scm_c_define ("SEEK_SET", scm_from_int (SEEK_SET));
3191 scm_c_define ("SEEK_CUR", scm_from_int (SEEK_CUR));
3192 scm_c_define ("SEEK_END", scm_from_int (SEEK_END));
840ae05d 3193
70df8af6
GH
3194 scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
3195 write_void_port);
9de87eea 3196
f39448c5
AW
3197 cur_inport_fluid = scm_make_fluid ();
3198 cur_outport_fluid = scm_make_fluid ();
3199 cur_errport_fluid = scm_make_fluid ();
3200 cur_loadport_fluid = scm_make_fluid ();
9de87eea 3201
2721f918 3202 scm_i_port_weak_set = scm_c_make_weak_set (31);
d6a6989e 3203
a0599745 3204#include "libguile/ports.x"
889975e5 3205
d6a6989e 3206 /* Use Latin-1 as the default port encoding. */
c81c2ad3
AW
3207 SCM_VARIABLE_SET (default_port_encoding_var,
3208 scm_make_fluid_with_default (SCM_BOOL_F));
889975e5 3209 scm_port_encoding_init = 1;
d6a6989e 3210
b22e94db
LC
3211 SCM_VARIABLE_SET (default_conversion_strategy_var,
3212 scm_make_fluid_with_default (sym_substitute));
889975e5 3213 scm_conversion_strategy_init = 1;
b22e94db 3214
9670f238
AW
3215 /* These bindings are used when boot-9 turns `current-input-port' et
3216 al into parameters. They are then removed from the guile module. */
3217 scm_c_define ("%current-input-port-fluid", cur_inport_fluid);
3218 scm_c_define ("%current-output-port-fluid", cur_outport_fluid);
3219 scm_c_define ("%current-error-port-fluid", cur_errport_fluid);
0f2d19dd 3220}
89e00824
ML
3221
3222/*
3223 Local Variables:
3224 c-file-style: "gnu"
3225 End:
3226*/