Merge branch 'stable-2.0'
[bpt/guile.git] / libguile / ports.c
1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
2 * 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20
21 \f
22 /* Headers. */
23
24 #define _LARGEFILE64_SOURCE /* ask for stat64 etc */
25
26 #ifdef HAVE_CONFIG_H
27 # include <config.h>
28 #endif
29
30 #include <stdio.h>
31 #include <errno.h>
32 #include <fcntl.h> /* for chsize on mingw */
33 #include <assert.h>
34 #include <iconv.h>
35 #include <uniconv.h>
36 #include <unistr.h>
37 #include <striconveh.h>
38
39 #include <assert.h>
40
41 #include "libguile/_scm.h"
42 #include "libguile/async.h"
43 #include "libguile/deprecation.h"
44 #include "libguile/eval.h"
45 #include "libguile/fports.h" /* direct access for seek and truncate */
46 #include "libguile/goops.h"
47 #include "libguile/smob.h"
48 #include "libguile/chars.h"
49 #include "libguile/dynwind.h"
50
51 #include "libguile/keywords.h"
52 #include "libguile/hashtab.h"
53 #include "libguile/root.h"
54 #include "libguile/strings.h"
55 #include "libguile/mallocs.h"
56 #include "libguile/validate.h"
57 #include "libguile/ports.h"
58 #include "libguile/ports-internal.h"
59 #include "libguile/vectors.h"
60 #include "libguile/weak-set.h"
61 #include "libguile/fluids.h"
62 #include "libguile/eq.h"
63 #include "libguile/alist.h"
64
65 #ifdef HAVE_STRING_H
66 #include <string.h>
67 #endif
68
69 #ifdef HAVE_IO_H
70 #include <io.h>
71 #endif
72
73 #ifdef HAVE_UNISTD_H
74 #include <unistd.h>
75 #endif
76
77 #ifdef HAVE_SYS_IOCTL_H
78 #include <sys/ioctl.h>
79 #endif
80
81 /* Mingw (version 3.4.5, circa 2006) has ftruncate as an alias for chsize
82 already, but have this code here in case that wasn't so in past versions,
83 or perhaps to help other minimal DOS environments.
84
85 gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
86 might be possibilities if we've got other systems without ftruncate. */
87
88 #if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
89 #define ftruncate(fd, size) chsize (fd, size)
90 #undef HAVE_FTRUNCATE
91 #define HAVE_FTRUNCATE 1
92 #endif
93
94 \f
95 /* Port encodings are case-insensitive ASCII strings. */
96 static char
97 ascii_toupper (char c)
98 {
99 return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
100 }
101
102 /* It is only necessary to use this function on encodings that come from
103 the user and have not been canonicalized yet. Encodings that are set
104 on ports or in the default encoding fluid are in upper-case, and can
105 be compared with strcmp. */
106 static int
107 encoding_matches (const char *enc, const char *upper)
108 {
109 if (!enc)
110 enc = "ISO-8859-1";
111
112 while (*enc)
113 if (ascii_toupper (*enc++) != *upper++)
114 return 0;
115
116 return !*upper;
117 }
118
119 static char*
120 canonicalize_encoding (const char *enc)
121 {
122 char *ret;
123 int i;
124
125 if (!enc)
126 return "ISO-8859-1";
127
128 ret = scm_gc_strdup (enc, "port");
129
130 for (i = 0; ret[i]; i++)
131 {
132 if (ret[i] > 127)
133 /* Restrict to ASCII. */
134 scm_misc_error (NULL, "invalid character encoding ~s",
135 scm_list_1 (scm_from_latin1_string (enc)));
136 else
137 ret[i] = ascii_toupper (ret[i]);
138 }
139
140 return ret;
141 }
142
143
144 \f
145 /* The port kind table --- a dynamically resized array of port types. */
146
147
148 /* scm_ptobs scm_numptob
149 * implement a dynamically resized array of ptob records.
150 * Indexes into this table are used when generating type
151 * tags for smobjects (if you know a tag you can get an index and conversely).
152 */
153 static scm_t_ptob_descriptor **scm_ptobs = NULL;
154 static long scm_numptob = 0; /* Number of port types. */
155 static long scm_ptobs_size = 0; /* Number of slots in the port type
156 table. */
157 static scm_i_pthread_mutex_t scm_ptobs_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER;
158
159 long
160 scm_c_num_port_types (void)
161 {
162 long ret;
163
164 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
165 ret = scm_numptob;
166 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
167
168 return ret;
169 }
170
171 scm_t_ptob_descriptor*
172 scm_c_port_type_ref (long ptobnum)
173 {
174 scm_t_ptob_descriptor *ret = NULL;
175
176 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
177
178 if (0 <= ptobnum && ptobnum < scm_numptob)
179 ret = scm_ptobs[ptobnum];
180
181 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
182
183 if (!ret)
184 scm_out_of_range ("scm_c_port_type_ref", scm_from_long (ptobnum));
185
186 return ret;
187 }
188
189 long
190 scm_c_port_type_add_x (scm_t_ptob_descriptor *desc)
191 {
192 long ret = -1;
193
194 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
195
196 if (scm_numptob + 1 < SCM_I_MAX_PORT_TYPE_COUNT)
197 {
198 if (scm_numptob == scm_ptobs_size)
199 {
200 unsigned long old_size = scm_ptobs_size;
201 scm_t_ptob_descriptor **old_ptobs = scm_ptobs;
202
203 /* Currently there are only 9 predefined port types, so one
204 resize will cover it. */
205 scm_ptobs_size = old_size + 10;
206
207 if (scm_ptobs_size >= SCM_I_MAX_PORT_TYPE_COUNT)
208 scm_ptobs_size = SCM_I_MAX_PORT_TYPE_COUNT;
209
210 scm_ptobs = scm_gc_malloc (sizeof (*scm_ptobs) * scm_ptobs_size,
211 "scm_ptobs");
212
213 memcpy (scm_ptobs, old_ptobs, sizeof (*scm_ptobs) * scm_numptob);
214 }
215
216 ret = scm_numptob++;
217 scm_ptobs[ret] = desc;
218 }
219
220 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
221
222 if (ret < 0)
223 scm_out_of_range ("scm_c_port_type_add_x", scm_from_long (scm_numptob));
224
225 return ret;
226 }
227
228 /*
229 * We choose to use an interface similar to the smob interface with
230 * fill_input and write as standard fields, passed to the port
231 * type constructor, and optional fields set by setters.
232 */
233
234 static void
235 flush_port_default (SCM port SCM_UNUSED)
236 {
237 }
238
239 static void
240 end_input_default (SCM port SCM_UNUSED, int offset SCM_UNUSED)
241 {
242 }
243
244 scm_t_bits
245 scm_make_port_type (char *name,
246 int (*fill_input) (SCM port),
247 void (*write) (SCM port, const void *data, size_t size))
248 {
249 scm_t_ptob_descriptor *desc;
250 long ptobnum;
251
252 desc = scm_gc_malloc_pointerless (sizeof (*desc), "port-type");
253 memset (desc, 0, sizeof (*desc));
254
255 desc->name = name;
256 desc->print = scm_port_print;
257 desc->write = write;
258 desc->flush = flush_port_default;
259 desc->end_input = end_input_default;
260 desc->fill_input = fill_input;
261
262 ptobnum = scm_c_port_type_add_x (desc);
263
264 /* Make a class object if GOOPS is present. */
265 if (SCM_UNPACK (scm_port_class[0]) != 0)
266 scm_make_port_classes (ptobnum, name);
267
268 return scm_tc7_port + ptobnum * 256;
269 }
270
271 void
272 scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM))
273 {
274 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->mark = mark;
275 }
276
277 void
278 scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM))
279 {
280 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->free = free;
281 }
282
283 void
284 scm_set_port_print (scm_t_bits tc, int (*print) (SCM exp, SCM port,
285 scm_print_state *pstate))
286 {
287 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->print = print;
288 }
289
290 void
291 scm_set_port_equalp (scm_t_bits tc, SCM (*equalp) (SCM, SCM))
292 {
293 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->equalp = equalp;
294 }
295
296 void
297 scm_set_port_close (scm_t_bits tc, int (*close) (SCM))
298 {
299 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->close = close;
300 }
301
302 void
303 scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port))
304 {
305 scm_t_ptob_descriptor *ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tc));
306 ptob->flush = flush;
307 ptob->flags |= SCM_PORT_TYPE_HAS_FLUSH;
308 }
309
310 void
311 scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset))
312 {
313 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->end_input = end_input;
314 }
315
316 void
317 scm_set_port_seek (scm_t_bits tc, scm_t_off (*seek) (SCM, scm_t_off, int))
318 {
319 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->seek = seek;
320 }
321
322 void
323 scm_set_port_truncate (scm_t_bits tc, void (*truncate) (SCM, scm_t_off))
324 {
325 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->truncate = truncate;
326 }
327
328 void
329 scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM))
330 {
331 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting;
332 }
333
334 static void
335 scm_i_set_pending_eof (SCM port)
336 {
337 SCM_PORT_GET_INTERNAL (port)->pending_eof = 1;
338 }
339
340 static void
341 scm_i_clear_pending_eof (SCM port)
342 {
343 SCM_PORT_GET_INTERNAL (port)->pending_eof = 0;
344 }
345
346 SCM_DEFINE (scm_i_port_property, "%port-property", 2, 0, 0,
347 (SCM port, SCM key),
348 "Return the property of @var{port} associated with @var{key}.")
349 #define FUNC_NAME s_scm_i_port_property
350 {
351 scm_i_pthread_mutex_t *lock;
352 SCM result;
353
354 SCM_VALIDATE_OPPORT (1, port);
355 scm_c_lock_port (port, &lock);
356 result = scm_assq_ref (SCM_PORT_GET_INTERNAL (port)->alist, key);
357 if (lock)
358 scm_i_pthread_mutex_unlock (lock);
359 return result;
360 }
361 #undef FUNC_NAME
362
363 SCM_DEFINE (scm_i_set_port_property_x, "%set-port-property!", 3, 0, 0,
364 (SCM port, SCM key, SCM value),
365 "Set the property of @var{port} associated with @var{key} to @var{value}.")
366 #define FUNC_NAME s_scm_i_set_port_property_x
367 {
368 scm_i_pthread_mutex_t *lock;
369 scm_t_port_internal *pti;
370
371 SCM_VALIDATE_OPPORT (1, port);
372 scm_c_lock_port (port, &lock);
373 pti = SCM_PORT_GET_INTERNAL (port);
374 pti->alist = scm_assq_set_x (pti->alist, key, value);
375 if (lock)
376 scm_i_pthread_mutex_unlock (lock);
377 return SCM_UNSPECIFIED;
378 }
379 #undef FUNC_NAME
380
381 \f
382
383 /* Standard ports --- current input, output, error, and more(!). */
384
385 static SCM cur_inport_fluid = SCM_BOOL_F;
386 static SCM cur_outport_fluid = SCM_BOOL_F;
387 static SCM cur_errport_fluid = SCM_BOOL_F;
388 static SCM cur_loadport_fluid = SCM_BOOL_F;
389
390 SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
391 (),
392 "Return the current input port. This is the default port used\n"
393 "by many input procedures. Initially, @code{current-input-port}\n"
394 "returns the @dfn{standard input} in Unix and C terminology.")
395 #define FUNC_NAME s_scm_current_input_port
396 {
397 if (scm_is_true (cur_inport_fluid))
398 return scm_fluid_ref (cur_inport_fluid);
399 else
400 return SCM_BOOL_F;
401 }
402 #undef FUNC_NAME
403
404 SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
405 (),
406 "Return the current output port. This is the default port used\n"
407 "by many output procedures. Initially,\n"
408 "@code{current-output-port} returns the @dfn{standard output} in\n"
409 "Unix and C terminology.")
410 #define FUNC_NAME s_scm_current_output_port
411 {
412 if (scm_is_true (cur_outport_fluid))
413 return scm_fluid_ref (cur_outport_fluid);
414 else
415 return SCM_BOOL_F;
416 }
417 #undef FUNC_NAME
418
419 SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
420 (),
421 "Return the port to which errors and warnings should be sent (the\n"
422 "@dfn{standard error} in Unix and C terminology).")
423 #define FUNC_NAME s_scm_current_error_port
424 {
425 if (scm_is_true (cur_errport_fluid))
426 return scm_fluid_ref (cur_errport_fluid);
427 else
428 return SCM_BOOL_F;
429 }
430 #undef FUNC_NAME
431
432 SCM
433 scm_current_warning_port (void)
434 {
435 static SCM cwp_var = SCM_UNDEFINED;
436 static scm_i_pthread_mutex_t cwp_var_mutex
437 = SCM_I_PTHREAD_MUTEX_INITIALIZER;
438
439 scm_i_scm_pthread_mutex_lock (&cwp_var_mutex);
440 if (SCM_UNBNDP (cwp_var))
441 cwp_var = scm_c_private_variable ("guile", "current-warning-port");
442 scm_i_pthread_mutex_unlock (&cwp_var_mutex);
443
444 return scm_call_0 (scm_variable_ref (cwp_var));
445 }
446
447 SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
448 (),
449 "Return the current-load-port.\n"
450 "The load port is used internally by @code{primitive-load}.")
451 #define FUNC_NAME s_scm_current_load_port
452 {
453 return scm_fluid_ref (cur_loadport_fluid);
454 }
455 #undef FUNC_NAME
456
457 SCM_DEFINE (scm_set_current_input_port, "set-current-input-port", 1, 0, 0,
458 (SCM port),
459 "@deffnx {Scheme Procedure} set-current-output-port port\n"
460 "@deffnx {Scheme Procedure} set-current-error-port port\n"
461 "Change the ports returned by @code{current-input-port},\n"
462 "@code{current-output-port} and @code{current-error-port}, respectively,\n"
463 "so that they use the supplied @var{port} for input or output.")
464 #define FUNC_NAME s_scm_set_current_input_port
465 {
466 SCM oinp = scm_fluid_ref (cur_inport_fluid);
467 SCM_VALIDATE_OPINPORT (1, port);
468 scm_fluid_set_x (cur_inport_fluid, port);
469 return oinp;
470 }
471 #undef FUNC_NAME
472
473
474 SCM_DEFINE (scm_set_current_output_port, "set-current-output-port", 1, 0, 0,
475 (SCM port),
476 "Set the current default output port to @var{port}.")
477 #define FUNC_NAME s_scm_set_current_output_port
478 {
479 SCM ooutp = scm_fluid_ref (cur_outport_fluid);
480 port = SCM_COERCE_OUTPORT (port);
481 SCM_VALIDATE_OPOUTPORT (1, port);
482 scm_fluid_set_x (cur_outport_fluid, port);
483 return ooutp;
484 }
485 #undef FUNC_NAME
486
487
488 SCM_DEFINE (scm_set_current_error_port, "set-current-error-port", 1, 0, 0,
489 (SCM port),
490 "Set the current default error port to @var{port}.")
491 #define FUNC_NAME s_scm_set_current_error_port
492 {
493 SCM oerrp = scm_fluid_ref (cur_errport_fluid);
494 port = SCM_COERCE_OUTPORT (port);
495 SCM_VALIDATE_OPOUTPORT (1, port);
496 scm_fluid_set_x (cur_errport_fluid, port);
497 return oerrp;
498 }
499 #undef FUNC_NAME
500
501
502 SCM
503 scm_set_current_warning_port (SCM port)
504 {
505 static SCM cwp_var = SCM_BOOL_F;
506
507 if (scm_is_false (cwp_var))
508 cwp_var = scm_c_private_lookup ("guile", "current-warning-port");
509
510 return scm_call_1 (scm_variable_ref (cwp_var), port);
511 }
512
513
514 void
515 scm_dynwind_current_input_port (SCM port)
516 #define FUNC_NAME NULL
517 {
518 SCM_VALIDATE_OPINPORT (1, port);
519 scm_dynwind_fluid (cur_inport_fluid, port);
520 }
521 #undef FUNC_NAME
522
523 void
524 scm_dynwind_current_output_port (SCM port)
525 #define FUNC_NAME NULL
526 {
527 port = SCM_COERCE_OUTPORT (port);
528 SCM_VALIDATE_OPOUTPORT (1, port);
529 scm_dynwind_fluid (cur_outport_fluid, port);
530 }
531 #undef FUNC_NAME
532
533 void
534 scm_dynwind_current_error_port (SCM port)
535 #define FUNC_NAME NULL
536 {
537 port = SCM_COERCE_OUTPORT (port);
538 SCM_VALIDATE_OPOUTPORT (1, port);
539 scm_dynwind_fluid (cur_errport_fluid, port);
540 }
541 #undef FUNC_NAME
542
543 void
544 scm_i_dynwind_current_load_port (SCM port)
545 {
546 scm_dynwind_fluid (cur_loadport_fluid, port);
547 }
548
549
550 \f
551
552 /* Retrieving a port's mode. */
553
554 /* Return the flags that characterize a port based on the mode
555 * string used to open a file for that port.
556 *
557 * See PORT FLAGS in scm.h
558 */
559
560 static long
561 scm_i_mode_bits_n (SCM modes)
562 {
563 return (SCM_OPN
564 | (scm_i_string_contains_char (modes, 'r')
565 || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
566 | (scm_i_string_contains_char (modes, 'w')
567 || scm_i_string_contains_char (modes, 'a')
568 || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
569 | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
570 | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
571 }
572
573 long
574 scm_mode_bits (char *modes)
575 {
576 /* Valid characters are rw+a0l. So, use latin1. */
577 return scm_i_mode_bits (scm_from_latin1_string (modes));
578 }
579
580 long
581 scm_i_mode_bits (SCM modes)
582 {
583 long bits;
584
585 if (!scm_is_string (modes))
586 scm_wrong_type_arg_msg (NULL, 0, modes, "string");
587
588 bits = scm_i_mode_bits_n (modes);
589 scm_remember_upto_here_1 (modes);
590 return bits;
591 }
592
593 /* Return the mode flags from an open port.
594 * Some modes such as "append" are only used when opening
595 * a file and are not returned here. */
596
597 SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
598 (SCM port),
599 "Return the port modes associated with the open port @var{port}.\n"
600 "These will not necessarily be identical to the modes used when\n"
601 "the port was opened, since modes such as \"append\" which are\n"
602 "used only during port creation are not retained.")
603 #define FUNC_NAME s_scm_port_mode
604 {
605 char modes[4];
606 modes[0] = '\0';
607
608 port = SCM_COERCE_OUTPORT (port);
609 SCM_VALIDATE_OPPORT (1, port);
610 if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
611 if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
612 strcpy (modes, "r+");
613 else
614 strcpy (modes, "r");
615 }
616 else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
617 strcpy (modes, "w");
618 if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
619 strcat (modes, "0");
620
621 return scm_from_latin1_string (modes);
622 }
623 #undef FUNC_NAME
624
625
626 \f
627
628 /* The port table --- a weak set of all ports.
629
630 We need a global registry of ports to flush them all at exit, and to
631 get all the ports matching a file descriptor. */
632 SCM scm_i_port_weak_set;
633
634
635 \f
636
637 /* Port finalization. */
638
639 struct do_free_data
640 {
641 scm_t_ptob_descriptor *ptob;
642 SCM port;
643 };
644
645 static SCM
646 do_free (void *body_data)
647 {
648 struct do_free_data *data = body_data;
649
650 /* `close' is for explicit `close-port' by user. `free' is for this
651 purpose: ports collected by the GC. */
652 data->ptob->free (data->port);
653
654 return SCM_BOOL_T;
655 }
656
657 /* Finalize the object (a port) pointed to by PTR. */
658 static void
659 finalize_port (void *ptr, void *data)
660 {
661 SCM port = SCM_PACK_POINTER (ptr);
662
663 if (!SCM_PORTP (port))
664 abort ();
665
666 if (SCM_OPENP (port))
667 {
668 struct do_free_data data;
669
670 SCM_CLR_PORT_OPEN_FLAG (port);
671
672 data.ptob = SCM_PORT_DESCRIPTOR (port);
673 data.port = port;
674
675 scm_internal_catch (SCM_BOOL_T, do_free, &data,
676 scm_handle_by_message_noexit, NULL);
677
678 scm_gc_ports_collected++;
679 }
680 }
681
682
683 \f
684
685 SCM
686 scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
687 const char *encoding,
688 scm_t_string_failed_conversion_handler handler,
689 scm_t_bits stream)
690 {
691 SCM ret;
692 scm_t_port *entry;
693 scm_t_port_internal *pti;
694 scm_t_ptob_descriptor *ptob;
695
696 entry = scm_gc_typed_calloc (scm_t_port);
697 pti = scm_gc_typed_calloc (scm_t_port_internal);
698 ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag));
699
700 ret = scm_words (tag | mode_bits, 3);
701 SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry);
702 SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob);
703
704 entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock");
705 scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive);
706
707 entry->internal = pti;
708 entry->file_name = SCM_BOOL_F;
709 entry->rw_active = SCM_PORT_NEITHER;
710 entry->port = ret;
711 entry->stream = stream;
712
713 if (encoding_matches (encoding, "UTF-8"))
714 {
715 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
716 entry->encoding = "UTF-8";
717 }
718 else if (encoding_matches (encoding, "ISO-8859-1"))
719 {
720 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
721 entry->encoding = "ISO-8859-1";
722 }
723 else
724 {
725 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
726 entry->encoding = canonicalize_encoding (encoding);
727 }
728
729 entry->ilseq_handler = handler;
730 pti->iconv_descriptors = NULL;
731
732 pti->at_stream_start_for_bom_read = 1;
733 pti->at_stream_start_for_bom_write = 1;
734
735 pti->pending_eof = 0;
736 pti->alist = SCM_EOL;
737
738 if (SCM_PORT_DESCRIPTOR (ret)->free)
739 scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL);
740
741 if (SCM_PORT_DESCRIPTOR (ret)->flags & SCM_PORT_TYPE_HAS_FLUSH)
742 scm_weak_set_add_x (scm_i_port_weak_set, ret);
743
744 return ret;
745 }
746
747 SCM
748 scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
749 {
750 return scm_c_make_port_with_encoding (tag, mode_bits,
751 scm_i_default_port_encoding (),
752 scm_i_default_port_conversion_handler (),
753 stream);
754 }
755
756 SCM
757 scm_new_port_table_entry (scm_t_bits tag)
758 {
759 return scm_c_make_port (tag, 0, 0);
760 }
761
762 \f
763
764 /* Predicates. */
765
766 SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
767 (SCM x),
768 "Return a boolean indicating whether @var{x} is a port.\n"
769 "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
770 "@var{x}))}.")
771 #define FUNC_NAME s_scm_port_p
772 {
773 return scm_from_bool (SCM_PORTP (x));
774 }
775 #undef FUNC_NAME
776
777 SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
778 (SCM x),
779 "Return @code{#t} if @var{x} is an input port, otherwise return\n"
780 "@code{#f}. Any object satisfying this predicate also satisfies\n"
781 "@code{port?}.")
782 #define FUNC_NAME s_scm_input_port_p
783 {
784 return scm_from_bool (SCM_INPUT_PORT_P (x));
785 }
786 #undef FUNC_NAME
787
788 SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
789 (SCM x),
790 "Return @code{#t} if @var{x} is an output port, otherwise return\n"
791 "@code{#f}. Any object satisfying this predicate also satisfies\n"
792 "@code{port?}.")
793 #define FUNC_NAME s_scm_output_port_p
794 {
795 x = SCM_COERCE_OUTPORT (x);
796 return scm_from_bool (SCM_OUTPUT_PORT_P (x));
797 }
798 #undef FUNC_NAME
799
800 SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
801 (SCM port),
802 "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
803 "open.")
804 #define FUNC_NAME s_scm_port_closed_p
805 {
806 SCM_VALIDATE_PORT (1, port);
807 return scm_from_bool (!SCM_OPPORTP (port));
808 }
809 #undef FUNC_NAME
810
811 SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
812 (SCM x),
813 "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
814 "return @code{#f}.")
815 #define FUNC_NAME s_scm_eof_object_p
816 {
817 return scm_from_bool (SCM_EOF_OBJECT_P (x));
818 }
819 #undef FUNC_NAME
820
821
822 \f
823
824 /* Closing ports. */
825
826 static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
827
828 /* scm_close_port
829 * Call the close operation on a port object.
830 * see also scm_close.
831 */
832 SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
833 (SCM port),
834 "Close the specified port object. Return @code{#t} if it\n"
835 "successfully closes a port or @code{#f} if it was already\n"
836 "closed. An exception may be raised if an error occurs, for\n"
837 "example when flushing buffered output. See also @ref{Ports and\n"
838 "File Descriptors, close}, for a procedure which can close file\n"
839 "descriptors.")
840 #define FUNC_NAME s_scm_close_port
841 {
842 scm_t_port_internal *pti;
843 int rv;
844
845 port = SCM_COERCE_OUTPORT (port);
846
847 SCM_VALIDATE_PORT (1, port);
848 if (SCM_CLOSEDP (port))
849 return SCM_BOOL_F;
850
851 pti = SCM_PORT_GET_INTERNAL (port);
852 SCM_CLR_PORT_OPEN_FLAG (port);
853
854 if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH)
855 scm_weak_set_remove_x (scm_i_port_weak_set, port);
856
857 if (SCM_PORT_DESCRIPTOR (port)->close)
858 /* Note! This may throw an exception. Anything after this point
859 should be resilient to non-local exits. */
860 rv = SCM_PORT_DESCRIPTOR (port)->close (port);
861 else
862 rv = 0;
863
864 if (pti->iconv_descriptors)
865 {
866 /* If we don't get here, the iconv_descriptors finalizer will
867 clean up. */
868 close_iconv_descriptors (pti->iconv_descriptors);
869 pti->iconv_descriptors = NULL;
870 }
871
872 return scm_from_bool (rv >= 0);
873 }
874 #undef FUNC_NAME
875
876 SCM_DEFINE (scm_close_input_port, "close-input-port", 1, 0, 0,
877 (SCM port),
878 "Close the specified input port object. The routine has no effect if\n"
879 "the file has already been closed. An exception may be raised if an\n"
880 "error occurs. The value returned is unspecified.\n\n"
881 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
882 "which can close file descriptors.")
883 #define FUNC_NAME s_scm_close_input_port
884 {
885 SCM_VALIDATE_INPUT_PORT (1, port);
886 scm_close_port (port);
887 return SCM_UNSPECIFIED;
888 }
889 #undef FUNC_NAME
890
891 SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0,
892 (SCM port),
893 "Close the specified output port object. The routine has no effect if\n"
894 "the file has already been closed. An exception may be raised if an\n"
895 "error occurs. The value returned is unspecified.\n\n"
896 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
897 "which can close file descriptors.")
898 #define FUNC_NAME s_scm_close_output_port
899 {
900 port = SCM_COERCE_OUTPORT (port);
901 SCM_VALIDATE_OUTPUT_PORT (1, port);
902 scm_close_port (port);
903 return SCM_UNSPECIFIED;
904 }
905 #undef FUNC_NAME
906
907
908 \f
909
910 /* Encoding characters to byte streams, and decoding byte streams to
911 characters. */
912
913 /* A fluid specifying the default encoding for newly created ports. If it is
914 a string, that is the encoding. If it is #f, it is in the "native"
915 (Latin-1) encoding. */
916 SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
917
918 static int scm_port_encoding_init = 0;
919
920 /* Use ENCODING as the default encoding for future ports. */
921 void
922 scm_i_set_default_port_encoding (const char *encoding)
923 {
924 if (!scm_port_encoding_init
925 || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
926 scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
927 SCM_EOL);
928
929 if (encoding_matches (encoding, "ISO-8859-1"))
930 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
931 else
932 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
933 scm_from_latin1_string (canonicalize_encoding (encoding)));
934 }
935
936 /* Return the name of the default encoding for newly created ports. */
937 const char *
938 scm_i_default_port_encoding (void)
939 {
940 if (!scm_port_encoding_init)
941 return "ISO-8859-1";
942 else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
943 return "ISO-8859-1";
944 else
945 {
946 SCM encoding;
947
948 encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
949 if (!scm_is_string (encoding))
950 return "ISO-8859-1";
951 else
952 return scm_i_string_chars (encoding);
953 }
954 }
955
956 /* A fluid specifying the default conversion handler for newly created
957 ports. Its value should be one of the symbols below. */
958 SCM_VARIABLE (default_conversion_strategy_var,
959 "%default-port-conversion-strategy");
960
961 /* Whether the above fluid is initialized. */
962 static int scm_conversion_strategy_init = 0;
963
964 /* The possible conversion strategies. */
965 SCM_SYMBOL (sym_error, "error");
966 SCM_SYMBOL (sym_substitute, "substitute");
967 SCM_SYMBOL (sym_escape, "escape");
968
969 /* Return the default failed encoding conversion policy for new created
970 ports. */
971 scm_t_string_failed_conversion_handler
972 scm_i_default_port_conversion_handler (void)
973 {
974 scm_t_string_failed_conversion_handler handler;
975
976 if (!scm_conversion_strategy_init
977 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
978 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
979 else
980 {
981 SCM fluid, value;
982
983 fluid = SCM_VARIABLE_REF (default_conversion_strategy_var);
984 value = scm_fluid_ref (fluid);
985
986 if (scm_is_eq (sym_substitute, value))
987 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
988 else if (scm_is_eq (sym_escape, value))
989 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
990 else
991 /* Default to 'error also when the fluid's value is not one of
992 the valid symbols. */
993 handler = SCM_FAILED_CONVERSION_ERROR;
994 }
995
996 return handler;
997 }
998
999 /* Use HANDLER as the default conversion strategy for future ports. */
1000 void
1001 scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler
1002 handler)
1003 {
1004 SCM strategy;
1005
1006 if (!scm_conversion_strategy_init
1007 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
1008 scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
1009 SCM_EOL);
1010
1011 switch (handler)
1012 {
1013 case SCM_FAILED_CONVERSION_ERROR:
1014 strategy = sym_error;
1015 break;
1016
1017 case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
1018 strategy = sym_escape;
1019 break;
1020
1021 case SCM_FAILED_CONVERSION_QUESTION_MARK:
1022 strategy = sym_substitute;
1023 break;
1024
1025 default:
1026 abort ();
1027 }
1028
1029 scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var),
1030 strategy);
1031 }
1032
1033 static void
1034 scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port);
1035
1036 /* If the next LEN bytes from PORT are equal to those in BYTES, then
1037 return 1, else return 0. Leave the port position unchanged. */
1038 static int
1039 looking_at_bytes (SCM port, const unsigned char *bytes, int len)
1040 {
1041 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1042 int i = 0;
1043
1044 while (i < len && scm_peek_byte_or_eof_unlocked (port) == bytes[i])
1045 {
1046 pt->read_pos++;
1047 i++;
1048 }
1049 scm_i_unget_bytes_unlocked (bytes, i, port);
1050 return (i == len);
1051 }
1052
1053 static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF};
1054 static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
1055 static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
1056 static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
1057 static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
1058
1059 /* Decide what byte order to use for a UTF-16 port. Return "UTF-16BE"
1060 or "UTF-16LE". MODE must be either SCM_PORT_READ or SCM_PORT_WRITE,
1061 and specifies which operation is about to be done. The MODE
1062 determines how we will decide the byte order. We deliberately avoid
1063 reading from the port unless the user is about to do so. If the user
1064 is about to read, then we look for a BOM, and if present, we use it
1065 to determine the byte order. Otherwise we choose big endian, as
1066 recommended by the Unicode Standard. Note that the BOM (if any) is
1067 not consumed here. */
1068 static const char *
1069 decide_utf16_encoding (SCM port, scm_t_port_rw_active mode)
1070 {
1071 if (mode == SCM_PORT_READ
1072 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1073 && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom))
1074 return "UTF-16LE";
1075 else
1076 return "UTF-16BE";
1077 }
1078
1079 /* Decide what byte order to use for a UTF-32 port. Return "UTF-32BE"
1080 or "UTF-32LE". See the comment above 'decide_utf16_encoding' for
1081 details. */
1082 static const char *
1083 decide_utf32_encoding (SCM port, scm_t_port_rw_active mode)
1084 {
1085 if (mode == SCM_PORT_READ
1086 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1087 && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom))
1088 return "UTF-32LE";
1089 else
1090 return "UTF-32BE";
1091 }
1092
1093 static void
1094 finalize_iconv_descriptors (void *ptr, void *data)
1095 {
1096 close_iconv_descriptors (ptr);
1097 }
1098
1099 static scm_t_iconv_descriptors *
1100 open_iconv_descriptors (const char *encoding, int reading, int writing)
1101 {
1102 scm_t_iconv_descriptors *id;
1103 iconv_t input_cd, output_cd;
1104 size_t i;
1105
1106 input_cd = (iconv_t) -1;
1107 output_cd = (iconv_t) -1;
1108
1109 for (i = 0; encoding[i]; i++)
1110 if (encoding[i] > 127)
1111 goto invalid_encoding;
1112
1113 if (reading)
1114 {
1115 /* Open an input iconv conversion descriptor, from ENCODING
1116 to UTF-8. We choose UTF-8, not UTF-32, because iconv
1117 implementations can typically convert from anything to
1118 UTF-8, but not to UTF-32 (see
1119 <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
1120
1121 /* Assume opening an iconv descriptor causes about 16 KB of
1122 allocation. */
1123 scm_gc_register_allocation (16 * 1024);
1124
1125 input_cd = iconv_open ("UTF-8", encoding);
1126 if (input_cd == (iconv_t) -1)
1127 goto invalid_encoding;
1128 }
1129
1130 if (writing)
1131 {
1132 /* Assume opening an iconv descriptor causes about 16 KB of
1133 allocation. */
1134 scm_gc_register_allocation (16 * 1024);
1135
1136 output_cd = iconv_open (encoding, "UTF-8");
1137 if (output_cd == (iconv_t) -1)
1138 {
1139 if (input_cd != (iconv_t) -1)
1140 iconv_close (input_cd);
1141 goto invalid_encoding;
1142 }
1143 }
1144
1145 id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
1146 id->input_cd = input_cd;
1147 id->output_cd = output_cd;
1148
1149 /* Register a finalizer to close the descriptors. */
1150 scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
1151
1152 return id;
1153
1154 invalid_encoding:
1155 {
1156 SCM err;
1157 err = scm_from_latin1_string (encoding);
1158 scm_misc_error ("open_iconv_descriptors",
1159 "invalid or unknown character encoding ~s",
1160 scm_list_1 (err));
1161 }
1162 }
1163
1164 static void
1165 close_iconv_descriptors (scm_t_iconv_descriptors *id)
1166 {
1167 if (id->input_cd != (iconv_t) -1)
1168 iconv_close (id->input_cd);
1169 if (id->output_cd != (iconv_t) -1)
1170 iconv_close (id->output_cd);
1171 id->input_cd = (void *) -1;
1172 id->output_cd = (void *) -1;
1173 }
1174
1175 scm_t_iconv_descriptors *
1176 scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode)
1177 {
1178 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
1179
1180 assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
1181
1182 if (!pti->iconv_descriptors)
1183 {
1184 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1185 const char *precise_encoding;
1186
1187 if (!pt->encoding)
1188 pt->encoding = "ISO-8859-1";
1189
1190 /* If the specified encoding is UTF-16 or UTF-32, then make
1191 that more precise by deciding what byte order to use. */
1192 if (strcmp (pt->encoding, "UTF-16") == 0)
1193 precise_encoding = decide_utf16_encoding (port, mode);
1194 else if (strcmp (pt->encoding, "UTF-32") == 0)
1195 precise_encoding = decide_utf32_encoding (port, mode);
1196 else
1197 precise_encoding = pt->encoding;
1198
1199 pti->iconv_descriptors =
1200 open_iconv_descriptors (precise_encoding,
1201 SCM_INPUT_PORT_P (port),
1202 SCM_OUTPUT_PORT_P (port));
1203 }
1204
1205 return pti->iconv_descriptors;
1206 }
1207
1208 /* The name of the encoding is itself encoded in ASCII. */
1209 void
1210 scm_i_set_port_encoding_x (SCM port, const char *encoding)
1211 {
1212 scm_t_port *pt;
1213 scm_t_port_internal *pti;
1214 scm_t_iconv_descriptors *prev;
1215
1216 /* Set the character encoding for this port. */
1217 pt = SCM_PTAB_ENTRY (port);
1218 pti = SCM_PORT_GET_INTERNAL (port);
1219 prev = pti->iconv_descriptors;
1220
1221 /* In order to handle cases where the encoding changes mid-stream
1222 (e.g. within an HTTP stream, or within a file that is composed of
1223 segments with different encodings), we consider this to be "stream
1224 start" for purposes of BOM handling, regardless of our actual file
1225 position. */
1226 pti->at_stream_start_for_bom_read = 1;
1227 pti->at_stream_start_for_bom_write = 1;
1228
1229 if (encoding_matches (encoding, "UTF-8"))
1230 {
1231 pt->encoding = "UTF-8";
1232 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
1233 }
1234 else if (encoding_matches (encoding, "ISO-8859-1"))
1235 {
1236 pt->encoding = "ISO-8859-1";
1237 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
1238 }
1239 else
1240 {
1241 pt->encoding = canonicalize_encoding (encoding);
1242 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
1243 }
1244
1245 pti->iconv_descriptors = NULL;
1246 if (prev)
1247 close_iconv_descriptors (prev);
1248 }
1249
1250 SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
1251 (SCM port),
1252 "Returns, as a string, the character encoding that @var{port}\n"
1253 "uses to interpret its input and output.\n")
1254 #define FUNC_NAME s_scm_port_encoding
1255 {
1256 SCM_VALIDATE_PORT (1, port);
1257
1258 return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding);
1259 }
1260 #undef FUNC_NAME
1261
1262 SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
1263 (SCM port, SCM enc),
1264 "Sets the character encoding that will be used to interpret all\n"
1265 "port I/O. New ports are created with the encoding\n"
1266 "appropriate for the current locale if @code{setlocale} has \n"
1267 "been called or ISO-8859-1 otherwise\n"
1268 "and this procedure can be used to modify that encoding.\n")
1269 #define FUNC_NAME s_scm_set_port_encoding_x
1270 {
1271 char *enc_str;
1272
1273 SCM_VALIDATE_PORT (1, port);
1274 SCM_VALIDATE_STRING (2, enc);
1275
1276 enc_str = scm_to_latin1_string (enc);
1277 scm_i_set_port_encoding_x (port, enc_str);
1278 free (enc_str);
1279
1280 return SCM_UNSPECIFIED;
1281 }
1282 #undef FUNC_NAME
1283
1284 SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
1285 1, 0, 0, (SCM port),
1286 "Returns the behavior of the port when handling a character that\n"
1287 "is not representable in the port's current encoding.\n"
1288 "It returns the symbol @code{error} if unrepresentable characters\n"
1289 "should cause exceptions, @code{substitute} if the port should\n"
1290 "try to replace unrepresentable characters with question marks or\n"
1291 "approximate characters, or @code{escape} if unrepresentable\n"
1292 "characters should be converted to string escapes.\n"
1293 "\n"
1294 "If @var{port} is @code{#f}, then the current default behavior\n"
1295 "will be returned. New ports will have this default behavior\n"
1296 "when they are created.\n")
1297 #define FUNC_NAME s_scm_port_conversion_strategy
1298 {
1299 scm_t_string_failed_conversion_handler h;
1300
1301 SCM_VALIDATE_OPPORT (1, port);
1302
1303 if (scm_is_false (port))
1304 h = scm_i_default_port_conversion_handler ();
1305 else
1306 {
1307 scm_t_port *pt;
1308
1309 SCM_VALIDATE_OPPORT (1, port);
1310 pt = SCM_PTAB_ENTRY (port);
1311
1312 h = pt->ilseq_handler;
1313 }
1314
1315 if (h == SCM_FAILED_CONVERSION_ERROR)
1316 return scm_from_latin1_symbol ("error");
1317 else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
1318 return scm_from_latin1_symbol ("substitute");
1319 else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1320 return scm_from_latin1_symbol ("escape");
1321 else
1322 abort ();
1323
1324 /* Never gets here. */
1325 return SCM_UNDEFINED;
1326 }
1327 #undef FUNC_NAME
1328
1329 SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
1330 2, 0, 0,
1331 (SCM port, SCM sym),
1332 "Sets the behavior of the interpreter when outputting a character\n"
1333 "that is not representable in the port's current encoding.\n"
1334 "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
1335 "@code{'escape}. If it is @code{'error}, an error will be thrown\n"
1336 "when an unconvertible character is encountered. If it is\n"
1337 "@code{'substitute}, then unconvertible characters will \n"
1338 "be replaced with approximate characters, or with question marks\n"
1339 "if no approximately correct character is available.\n"
1340 "If it is @code{'escape},\n"
1341 "it will appear as a hex escape when output.\n"
1342 "\n"
1343 "If @var{port} is an open port, the conversion error behavior\n"
1344 "is set for that port. If it is @code{#f}, it is set as the\n"
1345 "default behavior for any future ports that get created in\n"
1346 "this thread.\n")
1347 #define FUNC_NAME s_scm_set_port_conversion_strategy_x
1348 {
1349 scm_t_string_failed_conversion_handler handler;
1350
1351 if (scm_is_eq (sym, sym_error))
1352 handler = SCM_FAILED_CONVERSION_ERROR;
1353 else if (scm_is_eq (sym, sym_substitute))
1354 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
1355 else if (scm_is_eq (sym, sym_escape))
1356 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
1357 else
1358 SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
1359
1360 if (scm_is_false (port))
1361 scm_i_set_default_port_conversion_handler (handler);
1362 else
1363 {
1364 SCM_VALIDATE_OPPORT (1, port);
1365 SCM_PTAB_ENTRY (port)->ilseq_handler = handler;
1366 }
1367
1368 return SCM_UNSPECIFIED;
1369 }
1370 #undef FUNC_NAME
1371
1372
1373 \f
1374
1375 /* The port lock. */
1376
1377 static void
1378 lock_port (void *mutex)
1379 {
1380 scm_i_pthread_mutex_lock ((scm_i_pthread_mutex_t *) mutex);
1381 }
1382
1383 static void
1384 unlock_port (void *mutex)
1385 {
1386 scm_i_pthread_mutex_unlock ((scm_i_pthread_mutex_t *) mutex);
1387 }
1388
1389 void
1390 scm_dynwind_lock_port (SCM port)
1391 #define FUNC_NAME "dynwind-lock-port"
1392 {
1393 scm_i_pthread_mutex_t *lock;
1394 SCM_VALIDATE_OPPORT (SCM_ARG1, port);
1395 scm_c_lock_port (port, &lock);
1396 if (lock)
1397 {
1398 scm_dynwind_unwind_handler (unlock_port, lock, SCM_F_WIND_EXPLICITLY);
1399 scm_dynwind_rewind_handler (lock_port, lock, 0);
1400 }
1401 }
1402 #undef FUNC_NAME
1403
1404
1405 \f
1406
1407 /* Input. */
1408
1409 int
1410 scm_get_byte_or_eof (SCM port)
1411 {
1412 scm_i_pthread_mutex_t *lock;
1413 int ret;
1414
1415 scm_c_lock_port (port, &lock);
1416 ret = scm_get_byte_or_eof_unlocked (port);
1417 if (lock)
1418 scm_i_pthread_mutex_unlock (lock);
1419
1420 return ret;
1421 }
1422
1423 int
1424 scm_peek_byte_or_eof (SCM port)
1425 {
1426 scm_i_pthread_mutex_t *lock;
1427 int ret;
1428
1429 scm_c_lock_port (port, &lock);
1430 ret = scm_peek_byte_or_eof_unlocked (port);
1431 if (lock)
1432 scm_i_pthread_mutex_unlock (lock);
1433
1434 return ret;
1435 }
1436
1437 /* scm_c_read
1438 *
1439 * Used by an application to read arbitrary number of bytes from an
1440 * SCM port. Same semantics as libc read, except that scm_c_read only
1441 * returns less than SIZE bytes if at end-of-file.
1442 *
1443 * Warning: Doesn't update port line and column counts! */
1444
1445 /* This structure, and the following swap_buffer function, are used
1446 for temporarily swapping a port's own read buffer, and the buffer
1447 that the caller of scm_c_read provides. */
1448 struct port_and_swap_buffer
1449 {
1450 scm_t_port *pt;
1451 unsigned char *buffer;
1452 size_t size;
1453 };
1454
1455 static void
1456 swap_buffer (void *data)
1457 {
1458 struct port_and_swap_buffer *psb = (struct port_and_swap_buffer *) data;
1459 unsigned char *old_buf = psb->pt->read_buf;
1460 size_t old_size = psb->pt->read_buf_size;
1461
1462 /* Make the port use (buffer, size) from the struct. */
1463 psb->pt->read_pos = psb->pt->read_buf = psb->pt->read_end = psb->buffer;
1464 psb->pt->read_buf_size = psb->size;
1465
1466 /* Save the port's old (buffer, size) in the struct. */
1467 psb->buffer = old_buf;
1468 psb->size = old_size;
1469 }
1470
1471 static int scm_i_fill_input_unlocked (SCM port);
1472
1473 size_t
1474 scm_c_read_unlocked (SCM port, void *buffer, size_t size)
1475 #define FUNC_NAME "scm_c_read"
1476 {
1477 scm_t_port *pt;
1478 scm_t_port_internal *pti;
1479 size_t n_read = 0, n_available;
1480 struct port_and_swap_buffer psb;
1481
1482 SCM_VALIDATE_OPINPORT (1, port);
1483
1484 pt = SCM_PTAB_ENTRY (port);
1485 pti = SCM_PORT_GET_INTERNAL (port);
1486 if (pt->rw_active == SCM_PORT_WRITE)
1487 SCM_PORT_DESCRIPTOR (port)->flush (port);
1488
1489 if (pt->rw_random)
1490 pt->rw_active = SCM_PORT_READ;
1491
1492 /* Take bytes first from the port's read buffer. */
1493 if (pt->read_pos < pt->read_end)
1494 {
1495 n_available = min (size, pt->read_end - pt->read_pos);
1496 memcpy (buffer, pt->read_pos, n_available);
1497 buffer = (char *) buffer + n_available;
1498 pt->read_pos += n_available;
1499 n_read += n_available;
1500 size -= n_available;
1501 }
1502
1503 /* Avoid the scm_dynwind_* costs if we now have enough data. */
1504 if (size == 0)
1505 return n_read;
1506
1507 /* Now we will call scm_i_fill_input_unlocked repeatedly until we have
1508 read the requested number of bytes. (Note that a single
1509 scm_i_fill_input_unlocked call does not guarantee to fill the whole
1510 of the port's read buffer.) */
1511 if (pt->read_buf_size <= 1
1512 && pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
1513 {
1514 /* The port that we are reading from is unbuffered - i.e. does not
1515 have its own persistent buffer - but we have a buffer, provided
1516 by our caller, that is the right size for the data that is
1517 wanted. For the following scm_i_fill_input_unlocked calls,
1518 therefore, we use the buffer in hand as the port's read buffer.
1519
1520 We need to make sure that the port's normal (1 byte) buffer is
1521 reinstated in case one of the scm_i_fill_input_unlocked ()
1522 calls throws an exception; we use the scm_dynwind_* API to
1523 achieve that.
1524
1525 A consequence of this optimization is that the fill_input
1526 functions can't unget characters. That'll push data to the
1527 pushback buffer instead of this psb buffer. */
1528 #if SCM_DEBUG == 1
1529 unsigned char *pback = pt->putback_buf;
1530 #endif
1531 psb.pt = pt;
1532 psb.buffer = buffer;
1533 psb.size = size;
1534 scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
1535 scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1536 scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1537
1538 /* Call scm_i_fill_input_unlocked until we have all the bytes that
1539 we need, or we hit EOF. */
1540 while (pt->read_buf_size && (scm_i_fill_input_unlocked (port) != EOF))
1541 {
1542 pt->read_buf_size -= (pt->read_end - pt->read_pos);
1543 pt->read_pos = pt->read_buf = pt->read_end;
1544 }
1545 #if SCM_DEBUG == 1
1546 if (pback != pt->putback_buf
1547 || pt->read_buf - (unsigned char *) buffer < 0)
1548 scm_misc_error (FUNC_NAME,
1549 "scm_c_read must not call a fill function that pushes "
1550 "back characters onto an unbuffered port", SCM_EOL);
1551 #endif
1552 n_read += pt->read_buf - (unsigned char *) buffer;
1553
1554 /* Reinstate the port's normal buffer. */
1555 scm_dynwind_end ();
1556 }
1557 else
1558 {
1559 /* The port has its own buffer. It is important that we use it,
1560 even if it happens to be smaller than our caller's buffer, so
1561 that a custom port implementation's entry points (in
1562 particular, fill_input) can rely on the buffer always being
1563 the same as they first set up. */
1564 while (size && (scm_i_fill_input_unlocked (port) != EOF))
1565 {
1566 n_available = min (size, pt->read_end - pt->read_pos);
1567 memcpy (buffer, pt->read_pos, n_available);
1568 buffer = (char *) buffer + n_available;
1569 pt->read_pos += n_available;
1570 n_read += n_available;
1571 size -= n_available;
1572 }
1573 }
1574
1575 return n_read;
1576 }
1577 #undef FUNC_NAME
1578
1579 size_t
1580 scm_c_read (SCM port, void *buffer, size_t size)
1581 {
1582 scm_i_pthread_mutex_t *lock;
1583 size_t ret;
1584
1585 scm_c_lock_port (port, &lock);
1586 ret = scm_c_read_unlocked (port, buffer, size);
1587 if (lock)
1588 scm_i_pthread_mutex_unlock (lock);
1589
1590
1591 return ret;
1592 }
1593
1594 /* Update the line and column number of PORT after consumption of C. */
1595 static inline void
1596 update_port_lf (scm_t_wchar c, SCM port)
1597 {
1598 switch (c)
1599 {
1600 case '\a':
1601 case EOF:
1602 break;
1603 case '\b':
1604 SCM_DECCOL (port);
1605 break;
1606 case '\n':
1607 SCM_INCLINE (port);
1608 break;
1609 case '\r':
1610 SCM_ZEROCOL (port);
1611 break;
1612 case '\t':
1613 SCM_TABCOL (port);
1614 break;
1615 default:
1616 SCM_INCCOL (port);
1617 break;
1618 }
1619 }
1620
1621 #define SCM_MBCHAR_BUF_SIZE (4)
1622
1623 /* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
1624 UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
1625 static scm_t_wchar
1626 utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
1627 {
1628 scm_t_wchar codepoint;
1629
1630 if (utf8_buf[0] <= 0x7f)
1631 {
1632 assert (size == 1);
1633 codepoint = utf8_buf[0];
1634 }
1635 else if ((utf8_buf[0] & 0xe0) == 0xc0)
1636 {
1637 assert (size == 2);
1638 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
1639 | (utf8_buf[1] & 0x3f);
1640 }
1641 else if ((utf8_buf[0] & 0xf0) == 0xe0)
1642 {
1643 assert (size == 3);
1644 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
1645 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
1646 | (utf8_buf[2] & 0x3f);
1647 }
1648 else
1649 {
1650 assert (size == 4);
1651 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
1652 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
1653 | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
1654 | (utf8_buf[3] & 0x3f);
1655 }
1656
1657 return codepoint;
1658 }
1659
1660 /* Read a UTF-8 sequence from PORT. On success, return 0 and set
1661 *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
1662 representation, and set *LEN to the length in bytes. Return
1663 `EILSEQ' on error. */
1664 static int
1665 get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
1666 scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1667 {
1668 #define ASSERT_NOT_EOF(b) \
1669 if (SCM_UNLIKELY ((b) == EOF)) \
1670 goto invalid_seq
1671 #define CONSUME_PEEKED_BYTE() \
1672 pt->read_pos++
1673
1674 int byte;
1675 scm_t_port *pt;
1676
1677 *len = 0;
1678 pt = SCM_PTAB_ENTRY (port);
1679
1680 byte = scm_get_byte_or_eof_unlocked (port);
1681 if (byte == EOF)
1682 {
1683 *codepoint = EOF;
1684 return 0;
1685 }
1686
1687 buf[0] = (scm_t_uint8) byte;
1688 *len = 1;
1689
1690 if (buf[0] <= 0x7f)
1691 /* 1-byte form. */
1692 *codepoint = buf[0];
1693 else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
1694 {
1695 /* 2-byte form. */
1696 byte = scm_peek_byte_or_eof_unlocked (port);
1697 ASSERT_NOT_EOF (byte);
1698
1699 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1700 goto invalid_seq;
1701
1702 CONSUME_PEEKED_BYTE ();
1703 buf[1] = (scm_t_uint8) byte;
1704 *len = 2;
1705
1706 *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
1707 | (buf[1] & 0x3f);
1708 }
1709 else if ((buf[0] & 0xf0) == 0xe0)
1710 {
1711 /* 3-byte form. */
1712 byte = scm_peek_byte_or_eof_unlocked (port);
1713 ASSERT_NOT_EOF (byte);
1714
1715 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
1716 || (buf[0] == 0xe0 && byte < 0xa0)
1717 || (buf[0] == 0xed && byte > 0x9f)))
1718 goto invalid_seq;
1719
1720 CONSUME_PEEKED_BYTE ();
1721 buf[1] = (scm_t_uint8) byte;
1722 *len = 2;
1723
1724 byte = scm_peek_byte_or_eof_unlocked (port);
1725 ASSERT_NOT_EOF (byte);
1726
1727 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1728 goto invalid_seq;
1729
1730 CONSUME_PEEKED_BYTE ();
1731 buf[2] = (scm_t_uint8) byte;
1732 *len = 3;
1733
1734 *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
1735 | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
1736 | (buf[2] & 0x3f);
1737 }
1738 else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
1739 {
1740 /* 4-byte form. */
1741 byte = scm_peek_byte_or_eof_unlocked (port);
1742 ASSERT_NOT_EOF (byte);
1743
1744 if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
1745 || (buf[0] == 0xf0 && byte < 0x90)
1746 || (buf[0] == 0xf4 && byte > 0x8f)))
1747 goto invalid_seq;
1748
1749 CONSUME_PEEKED_BYTE ();
1750 buf[1] = (scm_t_uint8) byte;
1751 *len = 2;
1752
1753 byte = scm_peek_byte_or_eof_unlocked (port);
1754 ASSERT_NOT_EOF (byte);
1755
1756 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1757 goto invalid_seq;
1758
1759 CONSUME_PEEKED_BYTE ();
1760 buf[2] = (scm_t_uint8) byte;
1761 *len = 3;
1762
1763 byte = scm_peek_byte_or_eof_unlocked (port);
1764 ASSERT_NOT_EOF (byte);
1765
1766 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1767 goto invalid_seq;
1768
1769 CONSUME_PEEKED_BYTE ();
1770 buf[3] = (scm_t_uint8) byte;
1771 *len = 4;
1772
1773 *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
1774 | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
1775 | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
1776 | (buf[3] & 0x3f);
1777 }
1778 else
1779 goto invalid_seq;
1780
1781 return 0;
1782
1783 invalid_seq:
1784 /* Here we could choose the consume the faulty byte when it's not a
1785 valid starting byte, but it's not a requirement. What Section 3.9
1786 of Unicode 6.0.0 mandates, though, is to not consume a byte that
1787 would otherwise be a valid starting byte. */
1788
1789 return EILSEQ;
1790
1791 #undef CONSUME_PEEKED_BYTE
1792 #undef ASSERT_NOT_EOF
1793 }
1794
1795 /* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
1796 0 and set *CODEPOINT to the codepoint that was read, fill BUF with
1797 its UTF-8 representation, and set *LEN to the length in bytes.
1798 Return `EILSEQ' on error. */
1799 static int
1800 get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
1801 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1802 {
1803 *codepoint = scm_get_byte_or_eof_unlocked (port);
1804
1805 if (*codepoint == EOF)
1806 *len = 0;
1807 else
1808 {
1809 *len = 1;
1810 buf[0] = *codepoint;
1811 }
1812 return 0;
1813 }
1814
1815 /* Likewise, read a byte sequence from PORT, passing it through its
1816 input conversion descriptor. */
1817 static int
1818 get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
1819 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1820 {
1821 scm_t_iconv_descriptors *id;
1822 scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
1823 size_t input_size = 0;
1824
1825 id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ);
1826
1827 for (;;)
1828 {
1829 int byte_read;
1830 char *input, *output;
1831 size_t input_left, output_left, done;
1832
1833 byte_read = scm_get_byte_or_eof_unlocked (port);
1834 if (SCM_UNLIKELY (byte_read == EOF))
1835 {
1836 if (SCM_LIKELY (input_size == 0))
1837 {
1838 *codepoint = (scm_t_wchar) EOF;
1839 *len = input_size;
1840 return 0;
1841 }
1842 else
1843 {
1844 /* EOF found in the middle of a multibyte character. */
1845 scm_i_set_pending_eof (port);
1846 return EILSEQ;
1847 }
1848 }
1849
1850 buf[input_size++] = byte_read;
1851
1852 input = buf;
1853 input_left = input_size;
1854 output = (char *) utf8_buf;
1855 output_left = sizeof (utf8_buf);
1856
1857 done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
1858
1859 if (done == (size_t) -1)
1860 {
1861 int err = errno;
1862 if (SCM_LIKELY (err == EINVAL))
1863 /* The input byte sequence did not form a complete
1864 character. Read another byte and try again. */
1865 continue;
1866 else
1867 return err;
1868 }
1869 else
1870 {
1871 size_t output_size = sizeof (utf8_buf) - output_left;
1872 if (SCM_LIKELY (output_size > 0))
1873 {
1874 /* iconv generated output. Convert the UTF8_BUF sequence
1875 to a Unicode code point. */
1876 *codepoint = utf8_to_codepoint (utf8_buf, output_size);
1877 *len = input_size;
1878 return 0;
1879 }
1880 else
1881 {
1882 /* iconv consumed some bytes without producing any output.
1883 Most likely this means that a Unicode byte-order mark
1884 (BOM) was consumed, which should not be included in the
1885 returned buf. Shift any remaining bytes to the beginning
1886 of buf, and continue the loop. */
1887 memmove (buf, input, input_left);
1888 input_size = input_left;
1889 continue;
1890 }
1891 }
1892 }
1893 }
1894
1895 /* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
1896 with the byte representation of the codepoint in PORT's encoding, and
1897 set *LEN to the length in bytes of that representation. Return 0 on
1898 success and an errno value on error. */
1899 static SCM_C_INLINE int
1900 get_codepoint (SCM port, scm_t_wchar *codepoint,
1901 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1902 {
1903 int err;
1904 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1905 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
1906
1907 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
1908 err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
1909 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
1910 err = get_latin1_codepoint (port, codepoint, buf, len);
1911 else
1912 err = get_iconv_codepoint (port, codepoint, buf, len);
1913
1914 if (SCM_LIKELY (err == 0))
1915 {
1916 if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
1917 {
1918 /* Record that we're no longer at stream start. */
1919 pti->at_stream_start_for_bom_read = 0;
1920 if (pt->rw_random)
1921 pti->at_stream_start_for_bom_write = 0;
1922
1923 /* If we just read a BOM in an encoding that recognizes them,
1924 then silently consume it and read another code point. */
1925 if (SCM_UNLIKELY
1926 (*codepoint == SCM_UNICODE_BOM
1927 && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
1928 || strcmp (pt->encoding, "UTF-16") == 0
1929 || strcmp (pt->encoding, "UTF-32") == 0)))
1930 return get_codepoint (port, codepoint, buf, len);
1931 }
1932 update_port_lf (*codepoint, port);
1933 }
1934 else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
1935 {
1936 *codepoint = '?';
1937 err = 0;
1938 update_port_lf (*codepoint, port);
1939 }
1940
1941 return err;
1942 }
1943
1944 /* Read a codepoint from PORT and return it. */
1945 scm_t_wchar
1946 scm_getc_unlocked (SCM port)
1947 #define FUNC_NAME "scm_getc"
1948 {
1949 int err;
1950 size_t len;
1951 scm_t_wchar codepoint;
1952 char buf[SCM_MBCHAR_BUF_SIZE];
1953
1954 err = get_codepoint (port, &codepoint, buf, &len);
1955 if (SCM_UNLIKELY (err != 0))
1956 /* At this point PORT should point past the invalid encoding, as per
1957 R6RS-lib Section 8.2.4. */
1958 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
1959
1960 return codepoint;
1961 }
1962 #undef FUNC_NAME
1963
1964 scm_t_wchar
1965 scm_getc (SCM port)
1966 {
1967 scm_i_pthread_mutex_t *lock;
1968 scm_t_wchar ret;
1969
1970 scm_c_lock_port (port, &lock);
1971 ret = scm_getc_unlocked (port);
1972 if (lock)
1973 scm_i_pthread_mutex_unlock (lock);
1974
1975
1976 return ret;
1977 }
1978
1979 SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
1980 (SCM port),
1981 "Return the next character available from @var{port}, updating\n"
1982 "@var{port} to point to the following character. If no more\n"
1983 "characters are available, the end-of-file object is returned.\n"
1984 "\n"
1985 "When @var{port}'s data cannot be decoded according to its\n"
1986 "character encoding, a @code{decoding-error} is raised and\n"
1987 "@var{port} points past the erroneous byte sequence.\n")
1988 #define FUNC_NAME s_scm_read_char
1989 {
1990 scm_t_wchar c;
1991 if (SCM_UNBNDP (port))
1992 port = scm_current_input_port ();
1993 SCM_VALIDATE_OPINPORT (1, port);
1994 c = scm_getc_unlocked (port);
1995 if (EOF == c)
1996 return SCM_EOF_VAL;
1997 return SCM_MAKE_CHAR (c);
1998 }
1999 #undef FUNC_NAME
2000
2001
2002 \f
2003
2004 /* Pushback. */
2005 \f
2006
2007
2008 static void
2009 scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
2010 #define FUNC_NAME "scm_unget_bytes"
2011 {
2012 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2013 size_t old_len, new_len;
2014
2015 scm_i_clear_pending_eof (port);
2016
2017 if (pt->read_buf != pt->putback_buf)
2018 /* switch to the put-back buffer. */
2019 {
2020 if (pt->putback_buf == NULL)
2021 {
2022 pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
2023 ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
2024 pt->putback_buf
2025 = (unsigned char *) scm_gc_malloc_pointerless
2026 (pt->putback_buf_size, "putback buffer");
2027 }
2028
2029 pt->saved_read_buf = pt->read_buf;
2030 pt->saved_read_pos = pt->read_pos;
2031 pt->saved_read_end = pt->read_end;
2032 pt->saved_read_buf_size = pt->read_buf_size;
2033
2034 /* Put read_pos at the end of the buffer, so that ungets will not
2035 have to shift the buffer contents each time. */
2036 pt->read_buf = pt->putback_buf;
2037 pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
2038 pt->read_buf_size = pt->putback_buf_size;
2039 }
2040
2041 old_len = pt->read_end - pt->read_pos;
2042 new_len = old_len + len;
2043
2044 if (new_len > pt->read_buf_size)
2045 /* The putback buffer needs to be enlarged. */
2046 {
2047 size_t new_buf_size;
2048 unsigned char *new_buf, *new_end, *new_pos;
2049
2050 new_buf_size = pt->read_buf_size * 2;
2051 if (new_buf_size < new_len)
2052 new_buf_size = new_len;
2053
2054 new_buf = (unsigned char *)
2055 scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
2056
2057 /* Put the bytes at the end of the buffer, so that future
2058 ungets won't need to shift the buffer. */
2059 new_end = new_buf + new_buf_size;
2060 new_pos = new_end - old_len;
2061 memcpy (new_pos, pt->read_pos, old_len);
2062
2063 pt->read_buf = pt->putback_buf = new_buf;
2064 pt->read_pos = new_pos;
2065 pt->read_end = new_end;
2066 pt->read_buf_size = pt->putback_buf_size = new_buf_size;
2067 }
2068 else if (pt->read_buf + len < pt->read_pos)
2069 /* If needed, shift the existing buffer contents up.
2070 This should not happen unless some external code
2071 manipulates the putback buffer pointers. */
2072 {
2073 unsigned char *new_end = pt->read_buf + pt->read_buf_size;
2074 unsigned char *new_pos = new_end - old_len;
2075
2076 memmove (new_pos, pt->read_pos, old_len);
2077 pt->read_pos = new_pos;
2078 pt->read_end = new_end;
2079 }
2080
2081 /* Move read_pos back and copy the bytes there. */
2082 pt->read_pos -= len;
2083 memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
2084
2085 if (pt->rw_active == SCM_PORT_WRITE)
2086 scm_flush (port);
2087
2088 if (pt->rw_random)
2089 pt->rw_active = SCM_PORT_READ;
2090 }
2091 #undef FUNC_NAME
2092
2093 void
2094 scm_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
2095 {
2096 scm_i_unget_bytes_unlocked (buf, len, port);
2097 }
2098
2099 void
2100 scm_unget_byte_unlocked (int c, SCM port)
2101 {
2102 unsigned char byte = c;
2103 scm_i_unget_bytes_unlocked (&byte, 1, port);
2104 }
2105
2106 void
2107 scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
2108 {
2109 scm_i_pthread_mutex_t *lock;
2110 scm_c_lock_port (port, &lock);
2111 scm_i_unget_bytes_unlocked (buf, len, port);
2112 if (lock)
2113 scm_i_pthread_mutex_unlock (lock);
2114 }
2115
2116 void
2117 scm_unget_byte (int c, SCM port)
2118 {
2119 unsigned char byte = c;
2120 scm_i_pthread_mutex_t *lock;
2121 scm_c_lock_port (port, &lock);
2122 scm_i_unget_bytes_unlocked (&byte, 1, port);
2123 if (lock)
2124 scm_i_pthread_mutex_unlock (lock);
2125 }
2126
2127 void
2128 scm_ungetc_unlocked (scm_t_wchar c, SCM port)
2129 #define FUNC_NAME "scm_ungetc"
2130 {
2131 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2132 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
2133 char *result;
2134 char result_buf[10];
2135 size_t len;
2136
2137 len = sizeof (result_buf);
2138
2139 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
2140 {
2141 if (c < 0xf0)
2142 {
2143 result_buf[0] = (char) c;
2144 result = result_buf;
2145 len = 1;
2146 }
2147 else
2148 result =
2149 (char *) u32_to_u8 ((uint32_t *) &c, 1, (uint8_t *) result_buf, &len);
2150 }
2151 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1 && c <= 0xff)
2152 {
2153 result_buf[0] = (char) c;
2154 result = result_buf;
2155 len = 1;
2156 }
2157 else
2158 result = u32_conv_to_encoding (pt->encoding,
2159 (enum iconv_ilseq_handler) pt->ilseq_handler,
2160 (uint32_t *) &c, 1, NULL,
2161 result_buf, &len);
2162
2163 if (SCM_UNLIKELY (result == NULL || len == 0))
2164 scm_encoding_error (FUNC_NAME, errno,
2165 "conversion to port encoding failed",
2166 SCM_BOOL_F, SCM_MAKE_CHAR (c));
2167
2168 scm_i_unget_bytes_unlocked ((unsigned char *) result, len, port);
2169
2170 if (SCM_UNLIKELY (result != result_buf))
2171 free (result);
2172
2173 if (c == '\n')
2174 {
2175 /* What should col be in this case?
2176 * We'll leave it at -1.
2177 */
2178 SCM_LINUM (port) -= 1;
2179 }
2180 else
2181 SCM_COL(port) -= 1;
2182 }
2183 #undef FUNC_NAME
2184
2185 void
2186 scm_ungetc (scm_t_wchar c, SCM port)
2187 {
2188 scm_i_pthread_mutex_t *lock;
2189 scm_c_lock_port (port, &lock);
2190 scm_ungetc_unlocked (c, port);
2191 if (lock)
2192 scm_i_pthread_mutex_unlock (lock);
2193
2194 }
2195
2196 void
2197 scm_ungets_unlocked (const char *s, int n, SCM port)
2198 {
2199 /* This is simple minded and inefficient, but unreading strings is
2200 * probably not a common operation, and remember that line and
2201 * column numbers have to be handled...
2202 *
2203 * Please feel free to write an optimized version!
2204 */
2205 while (n--)
2206 scm_ungetc_unlocked (s[n], port);
2207 }
2208
2209 void
2210 scm_ungets (const char *s, int n, SCM port)
2211 {
2212 scm_i_pthread_mutex_t *lock;
2213 scm_c_lock_port (port, &lock);
2214 scm_ungets_unlocked (s, n, port);
2215 if (lock)
2216 scm_i_pthread_mutex_unlock (lock);
2217
2218 }
2219
2220 SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
2221 (SCM port),
2222 "Return the next character available from @var{port},\n"
2223 "@emph{without} updating @var{port} to point to the following\n"
2224 "character. If no more characters are available, the\n"
2225 "end-of-file object is returned.\n"
2226 "\n"
2227 "The value returned by\n"
2228 "a call to @code{peek-char} is the same as the value that would\n"
2229 "have been returned by a call to @code{read-char} on the same\n"
2230 "port. The only difference is that the very next call to\n"
2231 "@code{read-char} or @code{peek-char} on that @var{port} will\n"
2232 "return the value returned by the preceding call to\n"
2233 "@code{peek-char}. In particular, a call to @code{peek-char} on\n"
2234 "an interactive port will hang waiting for input whenever a call\n"
2235 "to @code{read-char} would have hung.\n"
2236 "\n"
2237 "As for @code{read-char}, a @code{decoding-error} may be raised\n"
2238 "if such a situation occurs. However, unlike with @code{read-char},\n"
2239 "@var{port} still points at the beginning of the erroneous byte\n"
2240 "sequence when the error is raised.\n")
2241 #define FUNC_NAME s_scm_peek_char
2242 {
2243 int err;
2244 SCM result;
2245 scm_t_wchar c;
2246 char bytes[SCM_MBCHAR_BUF_SIZE];
2247 long column, line;
2248 size_t len = 0;
2249
2250 if (SCM_UNBNDP (port))
2251 port = scm_current_input_port ();
2252 SCM_VALIDATE_OPINPORT (1, port);
2253
2254 column = SCM_COL (port);
2255 line = SCM_LINUM (port);
2256
2257 err = get_codepoint (port, &c, bytes, &len);
2258
2259 scm_i_unget_bytes_unlocked ((unsigned char *) bytes, len, port);
2260
2261 SCM_COL (port) = column;
2262 SCM_LINUM (port) = line;
2263
2264 if (SCM_UNLIKELY (err != 0))
2265 {
2266 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
2267
2268 /* Shouldn't happen since `catch' always aborts to prompt. */
2269 result = SCM_BOOL_F;
2270 }
2271 else if (c == EOF)
2272 {
2273 scm_i_set_pending_eof (port);
2274 result = SCM_EOF_VAL;
2275 }
2276 else
2277 result = SCM_MAKE_CHAR (c);
2278
2279 return result;
2280 }
2281 #undef FUNC_NAME
2282
2283 SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
2284 (SCM cobj, SCM port),
2285 "Place character @var{cobj} in @var{port} so that it will be\n"
2286 "read by the next read operation. If called multiple times, the\n"
2287 "unread characters will be read again in last-in first-out\n"
2288 "order. If @var{port} is not supplied, the current input port\n"
2289 "is used.")
2290 #define FUNC_NAME s_scm_unread_char
2291 {
2292 int c;
2293
2294 SCM_VALIDATE_CHAR (1, cobj);
2295 if (SCM_UNBNDP (port))
2296 port = scm_current_input_port ();
2297 SCM_VALIDATE_OPINPORT (2, port);
2298
2299 c = SCM_CHAR (cobj);
2300
2301 scm_ungetc_unlocked (c, port);
2302 return cobj;
2303 }
2304 #undef FUNC_NAME
2305
2306 SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
2307 (SCM str, SCM port),
2308 "Place the string @var{str} in @var{port} so that its characters will be\n"
2309 "read in subsequent read operations. If called multiple times, the\n"
2310 "unread characters will be read again in last-in first-out order. If\n"
2311 "@var{port} is not supplied, the current-input-port is used.")
2312 #define FUNC_NAME s_scm_unread_string
2313 {
2314 int n;
2315 SCM_VALIDATE_STRING (1, str);
2316 if (SCM_UNBNDP (port))
2317 port = scm_current_input_port ();
2318 SCM_VALIDATE_OPINPORT (2, port);
2319
2320 n = scm_i_string_length (str);
2321
2322 while (n--)
2323 scm_ungetc_unlocked (scm_i_string_ref (str, n), port);
2324
2325 return str;
2326 }
2327 #undef FUNC_NAME
2328
2329
2330 \f
2331
2332 /* Manipulating the buffers. */
2333
2334 /* This routine does not take any locks, as it is usually called as part
2335 of a port implementation. */
2336 void
2337 scm_port_non_buffer (scm_t_port *pt)
2338 {
2339 pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf;
2340 pt->write_buf = pt->write_pos = &pt->shortbuf;
2341 pt->read_buf_size = pt->write_buf_size = 1;
2342 pt->write_end = pt->write_buf + pt->write_buf_size;
2343 }
2344
2345 /* this should only be called when the read buffer is empty. it
2346 tries to refill the read buffer. it returns the first char from
2347 the port, which is either EOF or *(pt->read_pos). */
2348 static int
2349 scm_i_fill_input_unlocked (SCM port)
2350 {
2351 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2352 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
2353
2354 assert (pt->read_pos == pt->read_end);
2355
2356 if (pti->pending_eof)
2357 {
2358 pti->pending_eof = 0;
2359 return EOF;
2360 }
2361
2362 if (pt->read_buf == pt->putback_buf)
2363 {
2364 /* finished reading put-back chars. */
2365 pt->read_buf = pt->saved_read_buf;
2366 pt->read_pos = pt->saved_read_pos;
2367 pt->read_end = pt->saved_read_end;
2368 pt->read_buf_size = pt->saved_read_buf_size;
2369 if (pt->read_pos < pt->read_end)
2370 return *(pt->read_pos);
2371 }
2372 return SCM_PORT_DESCRIPTOR (port)->fill_input (port);
2373 }
2374
2375 int
2376 scm_fill_input (SCM port)
2377 {
2378 scm_i_pthread_mutex_t *lock;
2379 int ret;
2380
2381 scm_c_lock_port (port, &lock);
2382 ret = scm_fill_input_unlocked (port);
2383 if (lock)
2384 scm_i_pthread_mutex_unlock (lock);
2385
2386
2387 return ret;
2388 }
2389
2390 /* Slow-path fallback for 'scm_get_byte_or_eof_unlocked' */
2391 int
2392 scm_slow_get_byte_or_eof_unlocked (SCM port)
2393 {
2394 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2395
2396 if (pt->rw_active == SCM_PORT_WRITE)
2397 scm_flush_unlocked (port);
2398
2399 if (pt->rw_random)
2400 pt->rw_active = SCM_PORT_READ;
2401
2402 if (pt->read_pos >= pt->read_end)
2403 {
2404 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2405 return EOF;
2406 }
2407
2408 return *pt->read_pos++;
2409 }
2410
2411 /* Slow-path fallback for 'scm_peek_byte_or_eof_unlocked' */
2412 int
2413 scm_slow_peek_byte_or_eof_unlocked (SCM port)
2414 {
2415 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2416
2417 if (pt->rw_active == SCM_PORT_WRITE)
2418 scm_flush_unlocked (port);
2419
2420 if (pt->rw_random)
2421 pt->rw_active = SCM_PORT_READ;
2422
2423 if (pt->read_pos >= pt->read_end)
2424 {
2425 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2426 {
2427 scm_i_set_pending_eof (port);
2428 return EOF;
2429 }
2430 }
2431
2432 return *pt->read_pos;
2433 }
2434
2435 /* Move up to READ_LEN bytes from PORT's putback and/or read buffers
2436 into memory starting at DEST. Return the number of bytes moved.
2437 PORT's line/column numbers are left unchanged. */
2438 size_t
2439 scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
2440 {
2441 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2442 size_t bytes_read = 0;
2443 size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
2444
2445 if (from_buf > 0)
2446 {
2447 memcpy (dest, pt->read_pos, from_buf);
2448 pt->read_pos += from_buf;
2449 bytes_read += from_buf;
2450 read_len -= from_buf;
2451 dest += from_buf;
2452 }
2453
2454 /* if putback was active, try the real input buffer too. */
2455 if (pt->read_buf == pt->putback_buf)
2456 {
2457 from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len);
2458 if (from_buf > 0)
2459 {
2460 memcpy (dest, pt->saved_read_pos, from_buf);
2461 pt->saved_read_pos += from_buf;
2462 bytes_read += from_buf;
2463 }
2464 }
2465
2466 return bytes_read;
2467 }
2468
2469 /* Clear a port's read buffers, returning the contents. */
2470 SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
2471 (SCM port),
2472 "This procedure clears a port's input buffers, similar\n"
2473 "to the way that force-output clears the output buffer. The\n"
2474 "contents of the buffers are returned as a single string, e.g.,\n"
2475 "\n"
2476 "@lisp\n"
2477 "(define p (open-input-file ...))\n"
2478 "(drain-input p) => empty string, nothing buffered yet.\n"
2479 "(unread-char (read-char p) p)\n"
2480 "(drain-input p) => initial chars from p, up to the buffer size.\n"
2481 "@end lisp\n\n"
2482 "Draining the buffers may be useful for cleanly finishing\n"
2483 "buffered I/O so that the file descriptor can be used directly\n"
2484 "for further input.")
2485 #define FUNC_NAME s_scm_drain_input
2486 {
2487 SCM result;
2488 char *data;
2489 scm_t_port *pt;
2490 long count;
2491
2492 SCM_VALIDATE_OPINPORT (1, port);
2493 pt = SCM_PTAB_ENTRY (port);
2494
2495 count = pt->read_end - pt->read_pos;
2496 if (pt->read_buf == pt->putback_buf)
2497 count += pt->saved_read_end - pt->saved_read_pos;
2498
2499 if (count)
2500 {
2501 result = scm_i_make_string (count, &data, 0);
2502 scm_take_from_input_buffers (port, data, count);
2503 }
2504 else
2505 result = scm_nullstr;
2506
2507 return result;
2508 }
2509 #undef FUNC_NAME
2510
2511 void
2512 scm_end_input_unlocked (SCM port)
2513 {
2514 long offset;
2515 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2516
2517 scm_i_clear_pending_eof (port);
2518 if (pt->read_buf == pt->putback_buf)
2519 {
2520 offset = pt->read_end - pt->read_pos;
2521 pt->read_buf = pt->saved_read_buf;
2522 pt->read_pos = pt->saved_read_pos;
2523 pt->read_end = pt->saved_read_end;
2524 pt->read_buf_size = pt->saved_read_buf_size;
2525 }
2526 else
2527 offset = 0;
2528
2529 SCM_PORT_DESCRIPTOR (port)->end_input (port, offset);
2530 }
2531
2532 void
2533 scm_end_input (SCM port)
2534 {
2535 scm_i_pthread_mutex_t *lock;
2536 scm_c_lock_port (port, &lock);
2537 scm_end_input_unlocked (port);
2538 if (lock)
2539 scm_i_pthread_mutex_unlock (lock);
2540
2541 }
2542
2543 SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
2544 (SCM port),
2545 "Flush the specified output port, or the current output port if @var{port}\n"
2546 "is omitted. The current output buffer contents are passed to the\n"
2547 "underlying port implementation (e.g., in the case of fports, the\n"
2548 "data will be written to the file and the output buffer will be cleared.)\n"
2549 "It has no effect on an unbuffered port.\n\n"
2550 "The return value is unspecified.")
2551 #define FUNC_NAME s_scm_force_output
2552 {
2553 if (SCM_UNBNDP (port))
2554 port = scm_current_output_port ();
2555 else
2556 {
2557 port = SCM_COERCE_OUTPORT (port);
2558 SCM_VALIDATE_OPOUTPORT (1, port);
2559 }
2560 scm_flush_unlocked (port);
2561 return SCM_UNSPECIFIED;
2562 }
2563 #undef FUNC_NAME
2564
2565 void
2566 scm_flush_unlocked (SCM port)
2567 {
2568 SCM_PORT_DESCRIPTOR (port)->flush (port);
2569 }
2570
2571 void
2572 scm_flush (SCM port)
2573 {
2574 scm_i_pthread_mutex_t *lock;
2575 scm_c_lock_port (port, &lock);
2576 scm_flush_unlocked (port);
2577 if (lock)
2578 scm_i_pthread_mutex_unlock (lock);
2579
2580 }
2581
2582 int
2583 scm_fill_input_unlocked (SCM port)
2584 {
2585 return scm_i_fill_input_unlocked (port);
2586 }
2587
2588
2589 \f
2590
2591 /* Output. */
2592
2593 void
2594 scm_putc (char c, SCM port)
2595 {
2596 scm_i_pthread_mutex_t *lock;
2597 scm_c_lock_port (port, &lock);
2598 scm_putc_unlocked (c, port);
2599 if (lock)
2600 scm_i_pthread_mutex_unlock (lock);
2601
2602 }
2603
2604 void
2605 scm_puts (const char *s, SCM port)
2606 {
2607 scm_i_pthread_mutex_t *lock;
2608 scm_c_lock_port (port, &lock);
2609 scm_puts_unlocked (s, port);
2610 if (lock)
2611 scm_i_pthread_mutex_unlock (lock);
2612
2613 }
2614
2615 /* scm_c_write
2616 *
2617 * Used by an application to write arbitrary number of bytes to an SCM
2618 * port. Similar semantics as libc write. However, unlike libc
2619 * write, scm_c_write writes the requested number of bytes and has no
2620 * return value.
2621 *
2622 * Warning: Doesn't update port line and column counts!
2623 */
2624 void
2625 scm_c_write_unlocked (SCM port, const void *ptr, size_t size)
2626 #define FUNC_NAME "scm_c_write"
2627 {
2628 scm_t_port *pt;
2629 scm_t_ptob_descriptor *ptob;
2630
2631 SCM_VALIDATE_OPOUTPORT (1, port);
2632
2633 pt = SCM_PTAB_ENTRY (port);
2634 ptob = SCM_PORT_DESCRIPTOR (port);
2635
2636 if (pt->rw_active == SCM_PORT_READ)
2637 scm_end_input_unlocked (port);
2638
2639 ptob->write (port, ptr, size);
2640
2641 if (pt->rw_random)
2642 pt->rw_active = SCM_PORT_WRITE;
2643 }
2644 #undef FUNC_NAME
2645
2646 void
2647 scm_c_write (SCM port, const void *ptr, size_t size)
2648 {
2649 scm_i_pthread_mutex_t *lock;
2650 scm_c_lock_port (port, &lock);
2651 scm_c_write_unlocked (port, ptr, size);
2652 if (lock)
2653 scm_i_pthread_mutex_unlock (lock);
2654
2655 }
2656
2657 /* scm_lfwrite
2658 *
2659 * This function differs from scm_c_write; it updates port line and
2660 * column. */
2661 void
2662 scm_lfwrite_unlocked (const char *ptr, size_t size, SCM port)
2663 {
2664 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2665 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2666
2667 if (pt->rw_active == SCM_PORT_READ)
2668 scm_end_input_unlocked (port);
2669
2670 ptob->write (port, ptr, size);
2671
2672 for (; size; ptr++, size--)
2673 update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
2674
2675 if (pt->rw_random)
2676 pt->rw_active = SCM_PORT_WRITE;
2677 }
2678
2679 void
2680 scm_lfwrite (const char *ptr, size_t size, SCM port)
2681 {
2682 scm_i_pthread_mutex_t *lock;
2683 scm_c_lock_port (port, &lock);
2684 scm_lfwrite_unlocked (ptr, size, port);
2685 if (lock)
2686 scm_i_pthread_mutex_unlock (lock);
2687
2688 }
2689
2690 /* Write STR to PORT from START inclusive to END exclusive. */
2691 void
2692 scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
2693 {
2694 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2695
2696 if (pt->rw_active == SCM_PORT_READ)
2697 scm_end_input_unlocked (port);
2698
2699 if (end == (size_t) -1)
2700 end = scm_i_string_length (str);
2701
2702 scm_i_display_substring (str, start, end, port);
2703
2704 if (pt->rw_random)
2705 pt->rw_active = SCM_PORT_WRITE;
2706 }
2707
2708
2709 \f
2710
2711 /* Querying and setting positions, and character availability. */
2712
2713 SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0,
2714 (SCM port),
2715 "Return @code{#t} if a character is ready on input @var{port}\n"
2716 "and return @code{#f} otherwise. If @code{char-ready?} returns\n"
2717 "@code{#t} then the next @code{read-char} operation on\n"
2718 "@var{port} is guaranteed not to hang. If @var{port} is a file\n"
2719 "port at end of file then @code{char-ready?} returns @code{#t}.\n"
2720 "\n"
2721 "@code{char-ready?} exists to make it possible for a\n"
2722 "program to accept characters from interactive ports without\n"
2723 "getting stuck waiting for input. Any input editors associated\n"
2724 "with such ports must make sure that characters whose existence\n"
2725 "has been asserted by @code{char-ready?} cannot be rubbed out.\n"
2726 "If @code{char-ready?} were to return @code{#f} at end of file,\n"
2727 "a port at end of file would be indistinguishable from an\n"
2728 "interactive port that has no ready characters.")
2729 #define FUNC_NAME s_scm_char_ready_p
2730 {
2731 scm_t_port *pt;
2732
2733 if (SCM_UNBNDP (port))
2734 port = scm_current_input_port ();
2735 /* It's possible to close the current input port, so validate even in
2736 this case. */
2737 SCM_VALIDATE_OPINPORT (1, port);
2738
2739 pt = SCM_PTAB_ENTRY (port);
2740
2741 /* if the current read buffer is filled, or the
2742 last pushed-back char has been read and the saved buffer is
2743 filled, result is true. */
2744 if (pt->read_pos < pt->read_end
2745 || (pt->read_buf == pt->putback_buf
2746 && pt->saved_read_pos < pt->saved_read_end))
2747 return SCM_BOOL_T;
2748 else
2749 {
2750 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2751
2752 if (ptob->input_waiting)
2753 return scm_from_bool(ptob->input_waiting (port));
2754 else
2755 return SCM_BOOL_T;
2756 }
2757 }
2758 #undef FUNC_NAME
2759
2760 SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
2761 (SCM fd_port, SCM offset, SCM whence),
2762 "Sets the current position of @var{fd_port} to the integer\n"
2763 "@var{offset}, which is interpreted according to the value of\n"
2764 "@var{whence}.\n"
2765 "\n"
2766 "One of the following variables should be supplied for\n"
2767 "@var{whence}:\n"
2768 "@defvar SEEK_SET\n"
2769 "Seek from the beginning of the file.\n"
2770 "@end defvar\n"
2771 "@defvar SEEK_CUR\n"
2772 "Seek from the current position.\n"
2773 "@end defvar\n"
2774 "@defvar SEEK_END\n"
2775 "Seek from the end of the file.\n"
2776 "@end defvar\n"
2777 "If @var{fd_port} is a file descriptor, the underlying system\n"
2778 "call is @code{lseek}. @var{port} may be a string port.\n"
2779 "\n"
2780 "The value returned is the new position in the file. This means\n"
2781 "that the current position of a port can be obtained using:\n"
2782 "@lisp\n"
2783 "(seek port 0 SEEK_CUR)\n"
2784 "@end lisp")
2785 #define FUNC_NAME s_scm_seek
2786 {
2787 int how;
2788
2789 fd_port = SCM_COERCE_OUTPORT (fd_port);
2790
2791 how = scm_to_int (whence);
2792 if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
2793 SCM_OUT_OF_RANGE (3, whence);
2794
2795 if (SCM_OPPORTP (fd_port))
2796 {
2797 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port);
2798 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port);
2799 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2800 off_t_or_off64_t rv;
2801
2802 if (!ptob->seek)
2803 SCM_MISC_ERROR ("port is not seekable",
2804 scm_cons (fd_port, SCM_EOL));
2805 else
2806 rv = ptob->seek (fd_port, off, how);
2807
2808 /* Set stream-start flags according to new position. */
2809 pti->at_stream_start_for_bom_read = (rv == 0);
2810 pti->at_stream_start_for_bom_write = (rv == 0);
2811
2812 scm_i_clear_pending_eof (fd_port);
2813
2814 return scm_from_off_t_or_off64_t (rv);
2815 }
2816 else /* file descriptor?. */
2817 {
2818 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2819 off_t_or_off64_t rv;
2820 rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
2821 if (rv == -1)
2822 SCM_SYSERROR;
2823 return scm_from_off_t_or_off64_t (rv);
2824 }
2825 }
2826 #undef FUNC_NAME
2827
2828 #ifndef O_BINARY
2829 #define O_BINARY 0
2830 #endif
2831
2832 /* Mingw has ftruncate(), perhaps implemented above using chsize, but
2833 doesn't have the filename version truncate(), hence this code. */
2834 #if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
2835 static int
2836 truncate (const char *file, off_t length)
2837 {
2838 int ret, fdes;
2839
2840 fdes = open (file, O_BINARY | O_WRONLY);
2841 if (fdes == -1)
2842 return -1;
2843
2844 ret = ftruncate (fdes, length);
2845 if (ret == -1)
2846 {
2847 int save_errno = errno;
2848 close (fdes);
2849 errno = save_errno;
2850 return -1;
2851 }
2852
2853 return close (fdes);
2854 }
2855 #endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
2856
2857 SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
2858 (SCM object, SCM length),
2859 "Truncate file @var{object} to @var{length} bytes. @var{object}\n"
2860 "can be a filename string, a port object, or an integer file\n"
2861 "descriptor.\n"
2862 "The return value is unspecified.\n"
2863 "\n"
2864 "For a port or file descriptor @var{length} can be omitted, in\n"
2865 "which case the file is truncated at the current position (per\n"
2866 "@code{ftell} above).\n"
2867 "\n"
2868 "On most systems a file can be extended by giving a length\n"
2869 "greater than the current size, but this is not mandatory in the\n"
2870 "POSIX standard.")
2871 #define FUNC_NAME s_scm_truncate_file
2872 {
2873 int rv;
2874
2875 /* "object" can be a port, fdes or filename.
2876
2877 Negative "length" makes no sense, but it's left to truncate() or
2878 ftruncate() to give back an error for that (normally EINVAL).
2879 */
2880
2881 if (SCM_UNBNDP (length))
2882 {
2883 /* must supply length if object is a filename. */
2884 if (scm_is_string (object))
2885 SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
2886
2887 length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
2888 }
2889
2890 object = SCM_COERCE_OUTPORT (object);
2891 if (scm_is_integer (object))
2892 {
2893 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2894 SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
2895 c_length));
2896 }
2897 else if (SCM_OPOUTPORTP (object))
2898 {
2899 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2900 scm_t_port *pt = SCM_PTAB_ENTRY (object);
2901 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object);
2902
2903 if (!ptob->truncate)
2904 SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
2905
2906 scm_i_clear_pending_eof (object);
2907 if (pt->rw_active == SCM_PORT_READ)
2908 scm_end_input_unlocked (object);
2909 else if (pt->rw_active == SCM_PORT_WRITE)
2910 ptob->flush (object);
2911
2912 ptob->truncate (object, c_length);
2913 rv = 0;
2914 }
2915 else
2916 {
2917 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2918 char *str = scm_to_locale_string (object);
2919 int eno;
2920 SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
2921 eno = errno;
2922 free (str);
2923 errno = eno;
2924 }
2925 if (rv == -1)
2926 SCM_SYSERROR;
2927 return SCM_UNSPECIFIED;
2928 }
2929 #undef FUNC_NAME
2930
2931 SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0,
2932 (SCM port),
2933 "Return the current line number for @var{port}.\n"
2934 "\n"
2935 "The first line of a file is 0. But you might want to add 1\n"
2936 "when printing line numbers, since starting from 1 is\n"
2937 "traditional in error messages, and likely to be more natural to\n"
2938 "non-programmers.")
2939 #define FUNC_NAME s_scm_port_line
2940 {
2941 port = SCM_COERCE_OUTPORT (port);
2942 SCM_VALIDATE_OPENPORT (1, port);
2943 return scm_from_long (SCM_LINUM (port));
2944 }
2945 #undef FUNC_NAME
2946
2947 SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0,
2948 (SCM port, SCM line),
2949 "Set the current line number for @var{port} to @var{line}. The\n"
2950 "first line of a file is 0.")
2951 #define FUNC_NAME s_scm_set_port_line_x
2952 {
2953 port = SCM_COERCE_OUTPORT (port);
2954 SCM_VALIDATE_OPENPORT (1, port);
2955 SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line);
2956 return SCM_UNSPECIFIED;
2957 }
2958 #undef FUNC_NAME
2959
2960 SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0,
2961 (SCM port),
2962 "Return the current column number of @var{port}.\n"
2963 "If the number is\n"
2964 "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n"
2965 "- i.e. the first character of the first line is line 0, column 0.\n"
2966 "(However, when you display a file position, for example in an error\n"
2967 "message, we recommend you add 1 to get 1-origin integers. This is\n"
2968 "because lines and column numbers traditionally start with 1, and that is\n"
2969 "what non-programmers will find most natural.)")
2970 #define FUNC_NAME s_scm_port_column
2971 {
2972 port = SCM_COERCE_OUTPORT (port);
2973 SCM_VALIDATE_OPENPORT (1, port);
2974 return scm_from_int (SCM_COL (port));
2975 }
2976 #undef FUNC_NAME
2977
2978 SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0,
2979 (SCM port, SCM column),
2980 "Set the current column of @var{port}. Before reading the first\n"
2981 "character on a line the column should be 0.")
2982 #define FUNC_NAME s_scm_set_port_column_x
2983 {
2984 port = SCM_COERCE_OUTPORT (port);
2985 SCM_VALIDATE_OPENPORT (1, port);
2986 SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column);
2987 return SCM_UNSPECIFIED;
2988 }
2989 #undef FUNC_NAME
2990
2991 SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
2992 (SCM port),
2993 "Return the filename associated with @var{port}, or @code{#f}\n"
2994 "if no filename is associated with the port.")
2995 #define FUNC_NAME s_scm_port_filename
2996 {
2997 port = SCM_COERCE_OUTPORT (port);
2998 SCM_VALIDATE_OPENPORT (1, port);
2999 return SCM_FILENAME (port);
3000 }
3001 #undef FUNC_NAME
3002
3003 SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
3004 (SCM port, SCM filename),
3005 "Change the filename associated with @var{port}, using the current input\n"
3006 "port if none is specified. Note that this does not change the port's\n"
3007 "source of data, but only the value that is returned by\n"
3008 "@code{port-filename} and reported in diagnostic output.")
3009 #define FUNC_NAME s_scm_set_port_filename_x
3010 {
3011 port = SCM_COERCE_OUTPORT (port);
3012 SCM_VALIDATE_OPENPORT (1, port);
3013 /* We allow the user to set the filename to whatever he likes. */
3014 SCM_SET_FILENAME (port, filename);
3015 return SCM_UNSPECIFIED;
3016 }
3017 #undef FUNC_NAME
3018
3019
3020 \f
3021
3022 /* Implementation helpers for port printing functions. */
3023
3024 void
3025 scm_print_port_mode (SCM exp, SCM port)
3026 {
3027 scm_puts_unlocked (SCM_CLOSEDP (exp)
3028 ? "closed: "
3029 : (SCM_RDNG & SCM_CELL_WORD_0 (exp)
3030 ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
3031 ? "input-output: "
3032 : "input: ")
3033 : (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
3034 ? "output: "
3035 : "bogus: ")),
3036 port);
3037 }
3038
3039 int
3040 scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED)
3041 {
3042 char *type = SCM_PTOBNAME (SCM_PTOBNUM (exp));
3043 if (!type)
3044 type = "port";
3045 scm_puts_unlocked ("#<", port);
3046 scm_print_port_mode (exp, port);
3047 scm_puts_unlocked (type, port);
3048 scm_putc_unlocked (' ', port);
3049 scm_uintprint (SCM_CELL_WORD_1 (exp), 16, port);
3050 scm_putc_unlocked ('>', port);
3051 return 1;
3052 }
3053
3054
3055 \f
3056
3057 /* Iterating over all ports. */
3058
3059 struct for_each_data
3060 {
3061 void (*proc) (void *data, SCM p);
3062 void *data;
3063 };
3064
3065 static SCM
3066 for_each_trampoline (void *data, SCM port, SCM result)
3067 {
3068 struct for_each_data *d = data;
3069
3070 d->proc (d->data, port);
3071
3072 return result;
3073 }
3074
3075 void
3076 scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
3077 {
3078 struct for_each_data d;
3079
3080 d.proc = proc;
3081 d.data = data;
3082
3083 scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL,
3084 scm_i_port_weak_set);
3085 }
3086
3087 static void
3088 scm_for_each_trampoline (void *data, SCM port)
3089 {
3090 scm_call_1 (SCM_PACK_POINTER (data), port);
3091 }
3092
3093 SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
3094 (SCM proc),
3095 "Apply @var{proc} to each port in the Guile port table\n"
3096 "in turn. The return value is unspecified. More specifically,\n"
3097 "@var{proc} is applied exactly once to every port that exists\n"
3098 "in the system at the time @code{port-for-each} is invoked.\n"
3099 "Changes to the port table while @code{port-for-each} is running\n"
3100 "have no effect as far as @code{port-for-each} is concerned.")
3101 #define FUNC_NAME s_scm_port_for_each
3102 {
3103 SCM_VALIDATE_PROC (1, proc);
3104
3105 scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc));
3106
3107 return SCM_UNSPECIFIED;
3108 }
3109 #undef FUNC_NAME
3110
3111 static void
3112 flush_output_port (void *closure, SCM port)
3113 {
3114 if (SCM_OPOUTPORTP (port))
3115 scm_flush_unlocked (port);
3116 }
3117
3118 SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0,
3119 (),
3120 "Equivalent to calling @code{force-output} on\n"
3121 "all open output ports. The return value is unspecified.")
3122 #define FUNC_NAME s_scm_flush_all_ports
3123 {
3124 scm_c_port_for_each (&flush_output_port, NULL);
3125 return SCM_UNSPECIFIED;
3126 }
3127 #undef FUNC_NAME
3128
3129
3130 \f
3131
3132 /* Void ports. */
3133
3134 scm_t_bits scm_tc16_void_port = 0;
3135
3136 static int fill_input_void_port (SCM port SCM_UNUSED)
3137 {
3138 return EOF;
3139 }
3140
3141 static void
3142 write_void_port (SCM port SCM_UNUSED,
3143 const void *data SCM_UNUSED,
3144 size_t size SCM_UNUSED)
3145 {
3146 }
3147
3148 static SCM
3149 scm_i_void_port (long mode_bits)
3150 {
3151 SCM ret;
3152
3153 ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0);
3154
3155 scm_port_non_buffer (SCM_PTAB_ENTRY (ret));
3156
3157 return ret;
3158 }
3159
3160 SCM
3161 scm_void_port (char *mode_str)
3162 {
3163 return scm_i_void_port (scm_mode_bits (mode_str));
3164 }
3165
3166 SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0,
3167 (SCM mode),
3168 "Create and return a new void port. A void port acts like\n"
3169 "@file{/dev/null}. The @var{mode} argument\n"
3170 "specifies the input/output modes for this port: see the\n"
3171 "documentation for @code{open-file} in @ref{File Ports}.")
3172 #define FUNC_NAME s_scm_sys_make_void_port
3173 {
3174 return scm_i_void_port (scm_i_mode_bits (mode));
3175 }
3176 #undef FUNC_NAME
3177
3178
3179 \f
3180
3181 /* Initialization. */
3182
3183 void
3184 scm_init_ports ()
3185 {
3186 /* lseek() symbols. */
3187 scm_c_define ("SEEK_SET", scm_from_int (SEEK_SET));
3188 scm_c_define ("SEEK_CUR", scm_from_int (SEEK_CUR));
3189 scm_c_define ("SEEK_END", scm_from_int (SEEK_END));
3190
3191 scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
3192 write_void_port);
3193
3194 cur_inport_fluid = scm_make_fluid ();
3195 cur_outport_fluid = scm_make_fluid ();
3196 cur_errport_fluid = scm_make_fluid ();
3197 cur_loadport_fluid = scm_make_fluid ();
3198
3199 scm_i_port_weak_set = scm_c_make_weak_set (31);
3200
3201 #include "libguile/ports.x"
3202
3203 /* Use Latin-1 as the default port encoding. */
3204 SCM_VARIABLE_SET (default_port_encoding_var,
3205 scm_make_fluid_with_default (SCM_BOOL_F));
3206 scm_port_encoding_init = 1;
3207
3208 SCM_VARIABLE_SET (default_conversion_strategy_var,
3209 scm_make_fluid_with_default (sym_substitute));
3210 scm_conversion_strategy_init = 1;
3211
3212 /* These bindings are used when boot-9 turns `current-input-port' et
3213 al into parameters. They are then removed from the guile module. */
3214 scm_c_define ("%current-input-port-fluid", cur_inport_fluid);
3215 scm_c_define ("%current-output-port-fluid", cur_outport_fluid);
3216 scm_c_define ("%current-error-port-fluid", cur_errport_fluid);
3217 }
3218
3219 /*
3220 Local Variables:
3221 c-file-style: "gnu"
3222 End:
3223 */