Add weak-vector-length, weak-vector-ref, weak-vector-set!
[bpt/guile.git] / libguile / ports.c
1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
2 * 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20
21 \f
22 /* Headers. */
23
24 #define _LARGEFILE64_SOURCE /* ask for stat64 etc */
25
26 #ifdef HAVE_CONFIG_H
27 # include <config.h>
28 #endif
29
30 #include <stdio.h>
31 #include <errno.h>
32 #include <fcntl.h> /* for chsize on mingw */
33 #include <assert.h>
34 #include <iconv.h>
35 #include <uniconv.h>
36 #include <unistr.h>
37 #include <striconveh.h>
38
39 #include <assert.h>
40
41 #include "libguile/_scm.h"
42 #include "libguile/async.h"
43 #include "libguile/deprecation.h"
44 #include "libguile/eval.h"
45 #include "libguile/fports.h" /* direct access for seek and truncate */
46 #include "libguile/goops.h"
47 #include "libguile/smob.h"
48 #include "libguile/chars.h"
49 #include "libguile/dynwind.h"
50
51 #include "libguile/keywords.h"
52 #include "libguile/hashtab.h"
53 #include "libguile/root.h"
54 #include "libguile/strings.h"
55 #include "libguile/mallocs.h"
56 #include "libguile/validate.h"
57 #include "libguile/ports.h"
58 #include "libguile/ports-internal.h"
59 #include "libguile/vectors.h"
60 #include "libguile/weak-set.h"
61 #include "libguile/fluids.h"
62 #include "libguile/eq.h"
63 #include "libguile/alist.h"
64
65 #ifdef HAVE_STRING_H
66 #include <string.h>
67 #endif
68
69 #ifdef HAVE_IO_H
70 #include <io.h>
71 #endif
72
73 #ifdef HAVE_UNISTD_H
74 #include <unistd.h>
75 #endif
76
77 #ifdef HAVE_SYS_IOCTL_H
78 #include <sys/ioctl.h>
79 #endif
80
81 /* Mingw (version 3.4.5, circa 2006) has ftruncate as an alias for chsize
82 already, but have this code here in case that wasn't so in past versions,
83 or perhaps to help other minimal DOS environments.
84
85 gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
86 might be possibilities if we've got other systems without ftruncate. */
87
88 #if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
89 #define ftruncate(fd, size) chsize (fd, size)
90 #undef HAVE_FTRUNCATE
91 #define HAVE_FTRUNCATE 1
92 #endif
93
94 \f
95 /* Port encodings are case-insensitive ASCII strings. */
96 static char
97 ascii_toupper (char c)
98 {
99 return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
100 }
101
102 /* It is only necessary to use this function on encodings that come from
103 the user and have not been canonicalized yet. Encodings that are set
104 on ports or in the default encoding fluid are in upper-case, and can
105 be compared with strcmp. */
106 static int
107 encoding_matches (const char *enc, const char *upper)
108 {
109 if (!enc)
110 enc = "ISO-8859-1";
111
112 while (*enc)
113 if (ascii_toupper (*enc++) != *upper++)
114 return 0;
115
116 return !*upper;
117 }
118
119 static char*
120 canonicalize_encoding (const char *enc)
121 {
122 char *ret;
123 int i;
124
125 if (!enc)
126 return "ISO-8859-1";
127
128 ret = scm_gc_strdup (enc, "port");
129
130 for (i = 0; ret[i]; i++)
131 {
132 if (ret[i] > 127)
133 /* Restrict to ASCII. */
134 scm_misc_error (NULL, "invalid character encoding ~s",
135 scm_list_1 (scm_from_latin1_string (enc)));
136 else
137 ret[i] = ascii_toupper (ret[i]);
138 }
139
140 return ret;
141 }
142
143
144 \f
145 /* The port kind table --- a dynamically resized array of port types. */
146
147
148 /* scm_ptobs scm_numptob
149 * implement a dynamically resized array of ptob records.
150 * Indexes into this table are used when generating type
151 * tags for smobjects (if you know a tag you can get an index and conversely).
152 */
153 static scm_t_ptob_descriptor **scm_ptobs = NULL;
154 static long scm_numptob = 0; /* Number of port types. */
155 static long scm_ptobs_size = 0; /* Number of slots in the port type
156 table. */
157 static scm_i_pthread_mutex_t scm_ptobs_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER;
158
159 long
160 scm_c_num_port_types (void)
161 {
162 long ret;
163
164 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
165 ret = scm_numptob;
166 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
167
168 return ret;
169 }
170
171 scm_t_ptob_descriptor*
172 scm_c_port_type_ref (long ptobnum)
173 {
174 scm_t_ptob_descriptor *ret = NULL;
175
176 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
177
178 if (0 <= ptobnum && ptobnum < scm_numptob)
179 ret = scm_ptobs[ptobnum];
180
181 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
182
183 if (!ret)
184 scm_out_of_range ("scm_c_port_type_ref", scm_from_long (ptobnum));
185
186 return ret;
187 }
188
189 long
190 scm_c_port_type_add_x (scm_t_ptob_descriptor *desc)
191 {
192 long ret = -1;
193
194 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
195
196 if (scm_numptob + 1 < SCM_I_MAX_PORT_TYPE_COUNT)
197 {
198 if (scm_numptob == scm_ptobs_size)
199 {
200 unsigned long old_size = scm_ptobs_size;
201 scm_t_ptob_descriptor **old_ptobs = scm_ptobs;
202
203 /* Currently there are only 9 predefined port types, so one
204 resize will cover it. */
205 scm_ptobs_size = old_size + 10;
206
207 if (scm_ptobs_size >= SCM_I_MAX_PORT_TYPE_COUNT)
208 scm_ptobs_size = SCM_I_MAX_PORT_TYPE_COUNT;
209
210 scm_ptobs = scm_gc_malloc (sizeof (*scm_ptobs) * scm_ptobs_size,
211 "scm_ptobs");
212
213 memcpy (scm_ptobs, old_ptobs, sizeof (*scm_ptobs) * scm_numptob);
214 }
215
216 ret = scm_numptob++;
217 scm_ptobs[ret] = desc;
218 }
219
220 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
221
222 if (ret < 0)
223 scm_out_of_range ("scm_c_port_type_add_x", scm_from_long (scm_numptob));
224
225 return ret;
226 }
227
228 /*
229 * We choose to use an interface similar to the smob interface with
230 * fill_input and write as standard fields, passed to the port
231 * type constructor, and optional fields set by setters.
232 */
233
234 static void
235 flush_port_default (SCM port SCM_UNUSED)
236 {
237 }
238
239 static void
240 end_input_default (SCM port SCM_UNUSED, int offset SCM_UNUSED)
241 {
242 }
243
244 scm_t_bits
245 scm_make_port_type (char *name,
246 int (*fill_input) (SCM port),
247 void (*write) (SCM port, const void *data, size_t size))
248 {
249 scm_t_ptob_descriptor *desc;
250 long ptobnum;
251
252 desc = scm_gc_malloc_pointerless (sizeof (*desc), "port-type");
253 memset (desc, 0, sizeof (*desc));
254
255 desc->name = name;
256 desc->print = scm_port_print;
257 desc->write = write;
258 desc->flush = flush_port_default;
259 desc->end_input = end_input_default;
260 desc->fill_input = fill_input;
261
262 ptobnum = scm_c_port_type_add_x (desc);
263
264 /* Make a class object if GOOPS is present. */
265 if (SCM_UNPACK (scm_port_class[0]) != 0)
266 scm_make_port_classes (ptobnum, name);
267
268 return scm_tc7_port + ptobnum * 256;
269 }
270
271 void
272 scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM))
273 {
274 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->mark = mark;
275 }
276
277 void
278 scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM))
279 {
280 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->free = free;
281 }
282
283 void
284 scm_set_port_print (scm_t_bits tc, int (*print) (SCM exp, SCM port,
285 scm_print_state *pstate))
286 {
287 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->print = print;
288 }
289
290 void
291 scm_set_port_equalp (scm_t_bits tc, SCM (*equalp) (SCM, SCM))
292 {
293 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->equalp = equalp;
294 }
295
296 void
297 scm_set_port_close (scm_t_bits tc, int (*close) (SCM))
298 {
299 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->close = close;
300 }
301
302 void
303 scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port))
304 {
305 scm_t_ptob_descriptor *ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tc));
306 ptob->flush = flush;
307 ptob->flags |= SCM_PORT_TYPE_HAS_FLUSH;
308 }
309
310 void
311 scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset))
312 {
313 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->end_input = end_input;
314 }
315
316 void
317 scm_set_port_seek (scm_t_bits tc, scm_t_off (*seek) (SCM, scm_t_off, int))
318 {
319 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->seek = seek;
320 }
321
322 void
323 scm_set_port_truncate (scm_t_bits tc, void (*truncate) (SCM, scm_t_off))
324 {
325 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->truncate = truncate;
326 }
327
328 void
329 scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM))
330 {
331 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting;
332 }
333
334 static void
335 scm_i_set_pending_eof (SCM port)
336 {
337 SCM_PORT_GET_INTERNAL (port)->pending_eof = 1;
338 }
339
340 static void
341 scm_i_clear_pending_eof (SCM port)
342 {
343 SCM_PORT_GET_INTERNAL (port)->pending_eof = 0;
344 }
345
346 SCM_DEFINE (scm_i_port_property, "%port-property", 2, 0, 0,
347 (SCM port, SCM key),
348 "Return the property of @var{port} associated with @var{key}.")
349 #define FUNC_NAME s_scm_i_port_property
350 {
351 scm_i_pthread_mutex_t *lock;
352 SCM result;
353
354 SCM_VALIDATE_OPPORT (1, port);
355 scm_c_lock_port (port, &lock);
356 result = scm_assq_ref (SCM_PORT_GET_INTERNAL (port)->alist, key);
357 if (lock)
358 scm_i_pthread_mutex_unlock (lock);
359 return result;
360 }
361 #undef FUNC_NAME
362
363 SCM_DEFINE (scm_i_set_port_property_x, "%set-port-property!", 3, 0, 0,
364 (SCM port, SCM key, SCM value),
365 "Set the property of @var{port} associated with @var{key} to @var{value}.")
366 #define FUNC_NAME s_scm_i_set_port_property_x
367 {
368 scm_i_pthread_mutex_t *lock;
369 scm_t_port_internal *pti;
370
371 SCM_VALIDATE_OPPORT (1, port);
372 scm_c_lock_port (port, &lock);
373 pti = SCM_PORT_GET_INTERNAL (port);
374 pti->alist = scm_assq_set_x (pti->alist, key, value);
375 if (lock)
376 scm_i_pthread_mutex_unlock (lock);
377 return SCM_UNSPECIFIED;
378 }
379 #undef FUNC_NAME
380
381 \f
382
383 /* Standard ports --- current input, output, error, and more(!). */
384
385 static SCM cur_inport_fluid = SCM_BOOL_F;
386 static SCM cur_outport_fluid = SCM_BOOL_F;
387 static SCM cur_errport_fluid = SCM_BOOL_F;
388 static SCM cur_loadport_fluid = SCM_BOOL_F;
389
390 SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
391 (),
392 "Return the current input port. This is the default port used\n"
393 "by many input procedures. Initially, @code{current-input-port}\n"
394 "returns the @dfn{standard input} in Unix and C terminology.")
395 #define FUNC_NAME s_scm_current_input_port
396 {
397 if (scm_is_true (cur_inport_fluid))
398 return scm_fluid_ref (cur_inport_fluid);
399 else
400 return SCM_BOOL_F;
401 }
402 #undef FUNC_NAME
403
404 SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
405 (),
406 "Return the current output port. This is the default port used\n"
407 "by many output procedures. Initially,\n"
408 "@code{current-output-port} returns the @dfn{standard output} in\n"
409 "Unix and C terminology.")
410 #define FUNC_NAME s_scm_current_output_port
411 {
412 if (scm_is_true (cur_outport_fluid))
413 return scm_fluid_ref (cur_outport_fluid);
414 else
415 return SCM_BOOL_F;
416 }
417 #undef FUNC_NAME
418
419 SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
420 (),
421 "Return the port to which errors and warnings should be sent (the\n"
422 "@dfn{standard error} in Unix and C terminology).")
423 #define FUNC_NAME s_scm_current_error_port
424 {
425 if (scm_is_true (cur_errport_fluid))
426 return scm_fluid_ref (cur_errport_fluid);
427 else
428 return SCM_BOOL_F;
429 }
430 #undef FUNC_NAME
431
432 SCM
433 scm_current_warning_port (void)
434 {
435 static SCM cwp_var = SCM_UNDEFINED;
436 static scm_i_pthread_mutex_t cwp_var_mutex
437 = SCM_I_PTHREAD_MUTEX_INITIALIZER;
438
439 scm_i_scm_pthread_mutex_lock (&cwp_var_mutex);
440 if (SCM_UNBNDP (cwp_var))
441 cwp_var = scm_c_private_variable ("guile", "current-warning-port");
442 scm_i_pthread_mutex_unlock (&cwp_var_mutex);
443
444 return scm_call_0 (scm_variable_ref (cwp_var));
445 }
446
447 SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
448 (),
449 "Return the current-load-port.\n"
450 "The load port is used internally by @code{primitive-load}.")
451 #define FUNC_NAME s_scm_current_load_port
452 {
453 return scm_fluid_ref (cur_loadport_fluid);
454 }
455 #undef FUNC_NAME
456
457 SCM_DEFINE (scm_set_current_input_port, "set-current-input-port", 1, 0, 0,
458 (SCM port),
459 "@deffnx {Scheme Procedure} set-current-output-port port\n"
460 "@deffnx {Scheme Procedure} set-current-error-port port\n"
461 "Change the ports returned by @code{current-input-port},\n"
462 "@code{current-output-port} and @code{current-error-port}, respectively,\n"
463 "so that they use the supplied @var{port} for input or output.")
464 #define FUNC_NAME s_scm_set_current_input_port
465 {
466 SCM oinp = scm_fluid_ref (cur_inport_fluid);
467 SCM_VALIDATE_OPINPORT (1, port);
468 scm_fluid_set_x (cur_inport_fluid, port);
469 return oinp;
470 }
471 #undef FUNC_NAME
472
473
474 SCM_DEFINE (scm_set_current_output_port, "set-current-output-port", 1, 0, 0,
475 (SCM port),
476 "Set the current default output port to @var{port}.")
477 #define FUNC_NAME s_scm_set_current_output_port
478 {
479 SCM ooutp = scm_fluid_ref (cur_outport_fluid);
480 port = SCM_COERCE_OUTPORT (port);
481 SCM_VALIDATE_OPOUTPORT (1, port);
482 scm_fluid_set_x (cur_outport_fluid, port);
483 return ooutp;
484 }
485 #undef FUNC_NAME
486
487
488 SCM_DEFINE (scm_set_current_error_port, "set-current-error-port", 1, 0, 0,
489 (SCM port),
490 "Set the current default error port to @var{port}.")
491 #define FUNC_NAME s_scm_set_current_error_port
492 {
493 SCM oerrp = scm_fluid_ref (cur_errport_fluid);
494 port = SCM_COERCE_OUTPORT (port);
495 SCM_VALIDATE_OPOUTPORT (1, port);
496 scm_fluid_set_x (cur_errport_fluid, port);
497 return oerrp;
498 }
499 #undef FUNC_NAME
500
501
502 SCM
503 scm_set_current_warning_port (SCM port)
504 {
505 static SCM cwp_var = SCM_BOOL_F;
506
507 if (scm_is_false (cwp_var))
508 cwp_var = scm_c_private_lookup ("guile", "current-warning-port");
509
510 return scm_call_1 (scm_variable_ref (cwp_var), port);
511 }
512
513
514 void
515 scm_dynwind_current_input_port (SCM port)
516 #define FUNC_NAME NULL
517 {
518 SCM_VALIDATE_OPINPORT (1, port);
519 scm_dynwind_fluid (cur_inport_fluid, port);
520 }
521 #undef FUNC_NAME
522
523 void
524 scm_dynwind_current_output_port (SCM port)
525 #define FUNC_NAME NULL
526 {
527 port = SCM_COERCE_OUTPORT (port);
528 SCM_VALIDATE_OPOUTPORT (1, port);
529 scm_dynwind_fluid (cur_outport_fluid, port);
530 }
531 #undef FUNC_NAME
532
533 void
534 scm_dynwind_current_error_port (SCM port)
535 #define FUNC_NAME NULL
536 {
537 port = SCM_COERCE_OUTPORT (port);
538 SCM_VALIDATE_OPOUTPORT (1, port);
539 scm_dynwind_fluid (cur_errport_fluid, port);
540 }
541 #undef FUNC_NAME
542
543 void
544 scm_i_dynwind_current_load_port (SCM port)
545 {
546 scm_dynwind_fluid (cur_loadport_fluid, port);
547 }
548
549
550 \f
551
552 /* Retrieving a port's mode. */
553
554 /* Return the flags that characterize a port based on the mode
555 * string used to open a file for that port.
556 *
557 * See PORT FLAGS in scm.h
558 */
559
560 static long
561 scm_i_mode_bits_n (SCM modes)
562 {
563 return (SCM_OPN
564 | (scm_i_string_contains_char (modes, 'r')
565 || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
566 | (scm_i_string_contains_char (modes, 'w')
567 || scm_i_string_contains_char (modes, 'a')
568 || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
569 | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
570 | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
571 }
572
573 long
574 scm_mode_bits (char *modes)
575 {
576 /* Valid characters are rw+a0l. So, use latin1. */
577 return scm_i_mode_bits (scm_from_latin1_string (modes));
578 }
579
580 long
581 scm_i_mode_bits (SCM modes)
582 {
583 long bits;
584
585 if (!scm_is_string (modes))
586 scm_wrong_type_arg_msg (NULL, 0, modes, "string");
587
588 bits = scm_i_mode_bits_n (modes);
589 scm_remember_upto_here_1 (modes);
590 return bits;
591 }
592
593 /* Return the mode flags from an open port.
594 * Some modes such as "append" are only used when opening
595 * a file and are not returned here. */
596
597 SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
598 (SCM port),
599 "Return the port modes associated with the open port @var{port}.\n"
600 "These will not necessarily be identical to the modes used when\n"
601 "the port was opened, since modes such as \"append\" which are\n"
602 "used only during port creation are not retained.")
603 #define FUNC_NAME s_scm_port_mode
604 {
605 char modes[4];
606 modes[0] = '\0';
607
608 port = SCM_COERCE_OUTPORT (port);
609 SCM_VALIDATE_OPPORT (1, port);
610 if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
611 if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
612 strcpy (modes, "r+");
613 else
614 strcpy (modes, "r");
615 }
616 else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
617 strcpy (modes, "w");
618 if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
619 strcat (modes, "0");
620
621 return scm_from_latin1_string (modes);
622 }
623 #undef FUNC_NAME
624
625
626 \f
627
628 /* The port table --- a weak set of all ports.
629
630 We need a global registry of ports to flush them all at exit, and to
631 get all the ports matching a file descriptor. */
632 SCM scm_i_port_weak_set;
633
634
635 \f
636
637 /* Port finalization. */
638
639 struct do_free_data
640 {
641 scm_t_ptob_descriptor *ptob;
642 SCM port;
643 };
644
645 static SCM
646 do_free (void *body_data)
647 {
648 struct do_free_data *data = body_data;
649
650 /* `close' is for explicit `close-port' by user. `free' is for this
651 purpose: ports collected by the GC. */
652 data->ptob->free (data->port);
653
654 return SCM_BOOL_T;
655 }
656
657 /* Finalize the object (a port) pointed to by PTR. */
658 static void
659 finalize_port (void *ptr, void *data)
660 {
661 SCM port = SCM_PACK_POINTER (ptr);
662
663 if (!SCM_PORTP (port))
664 abort ();
665
666 if (SCM_OPENP (port))
667 {
668 struct do_free_data data;
669
670 SCM_CLR_PORT_OPEN_FLAG (port);
671
672 data.ptob = SCM_PORT_DESCRIPTOR (port);
673 data.port = port;
674
675 scm_internal_catch (SCM_BOOL_T, do_free, &data,
676 scm_handle_by_message_noexit, NULL);
677
678 scm_gc_ports_collected++;
679 }
680 }
681
682
683 \f
684
685 SCM
686 scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
687 const char *encoding,
688 scm_t_string_failed_conversion_handler handler,
689 scm_t_bits stream)
690 {
691 SCM ret;
692 scm_t_port *entry;
693 scm_t_port_internal *pti;
694 scm_t_ptob_descriptor *ptob;
695
696 entry = scm_gc_typed_calloc (scm_t_port);
697 pti = scm_gc_typed_calloc (scm_t_port_internal);
698 ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag));
699
700 ret = scm_words (tag | mode_bits, 3);
701 SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry);
702 SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob);
703
704 entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock");
705 scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive);
706
707 entry->internal = pti;
708 entry->file_name = SCM_BOOL_F;
709 entry->rw_active = SCM_PORT_NEITHER;
710 entry->port = ret;
711 entry->stream = stream;
712
713 if (encoding_matches (encoding, "UTF-8"))
714 {
715 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
716 entry->encoding = "UTF-8";
717 }
718 else if (encoding_matches (encoding, "ISO-8859-1"))
719 {
720 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
721 entry->encoding = "ISO-8859-1";
722 }
723 else
724 {
725 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
726 entry->encoding = canonicalize_encoding (encoding);
727 }
728
729 entry->ilseq_handler = handler;
730 pti->iconv_descriptors = NULL;
731
732 pti->at_stream_start_for_bom_read = 1;
733 pti->at_stream_start_for_bom_write = 1;
734
735 pti->pending_eof = 0;
736 pti->alist = SCM_EOL;
737
738 if (SCM_PORT_DESCRIPTOR (ret)->free)
739 scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL);
740
741 if (SCM_PORT_DESCRIPTOR (ret)->flags & SCM_PORT_TYPE_HAS_FLUSH)
742 scm_weak_set_add_x (scm_i_port_weak_set, ret);
743
744 return ret;
745 }
746
747 SCM
748 scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
749 {
750 return scm_c_make_port_with_encoding (tag, mode_bits,
751 scm_i_default_port_encoding (),
752 scm_i_default_port_conversion_handler (),
753 stream);
754 }
755
756 SCM
757 scm_new_port_table_entry (scm_t_bits tag)
758 {
759 return scm_c_make_port (tag, 0, 0);
760 }
761
762 \f
763
764 /* Predicates. */
765
766 SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
767 (SCM x),
768 "Return a boolean indicating whether @var{x} is a port.\n"
769 "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
770 "@var{x}))}.")
771 #define FUNC_NAME s_scm_port_p
772 {
773 return scm_from_bool (SCM_PORTP (x));
774 }
775 #undef FUNC_NAME
776
777 SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
778 (SCM x),
779 "Return @code{#t} if @var{x} is an input port, otherwise return\n"
780 "@code{#f}. Any object satisfying this predicate also satisfies\n"
781 "@code{port?}.")
782 #define FUNC_NAME s_scm_input_port_p
783 {
784 return scm_from_bool (SCM_INPUT_PORT_P (x));
785 }
786 #undef FUNC_NAME
787
788 SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
789 (SCM x),
790 "Return @code{#t} if @var{x} is an output port, otherwise return\n"
791 "@code{#f}. Any object satisfying this predicate also satisfies\n"
792 "@code{port?}.")
793 #define FUNC_NAME s_scm_output_port_p
794 {
795 x = SCM_COERCE_OUTPORT (x);
796 return scm_from_bool (SCM_OUTPUT_PORT_P (x));
797 }
798 #undef FUNC_NAME
799
800 SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
801 (SCM port),
802 "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
803 "open.")
804 #define FUNC_NAME s_scm_port_closed_p
805 {
806 SCM_VALIDATE_PORT (1, port);
807 return scm_from_bool (!SCM_OPPORTP (port));
808 }
809 #undef FUNC_NAME
810
811 SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
812 (SCM x),
813 "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
814 "return @code{#f}.")
815 #define FUNC_NAME s_scm_eof_object_p
816 {
817 return scm_from_bool (SCM_EOF_OBJECT_P (x));
818 }
819 #undef FUNC_NAME
820
821
822 \f
823
824 /* Closing ports. */
825
826 static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
827
828 /* scm_close_port
829 * Call the close operation on a port object.
830 * see also scm_close.
831 */
832 SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
833 (SCM port),
834 "Close the specified port object. Return @code{#t} if it\n"
835 "successfully closes a port or @code{#f} if it was already\n"
836 "closed. An exception may be raised if an error occurs, for\n"
837 "example when flushing buffered output. See also @ref{Ports and\n"
838 "File Descriptors, close}, for a procedure which can close file\n"
839 "descriptors.")
840 #define FUNC_NAME s_scm_close_port
841 {
842 scm_t_port_internal *pti;
843 int rv;
844
845 port = SCM_COERCE_OUTPORT (port);
846
847 SCM_VALIDATE_PORT (1, port);
848 if (SCM_CLOSEDP (port))
849 return SCM_BOOL_F;
850
851 pti = SCM_PORT_GET_INTERNAL (port);
852 SCM_CLR_PORT_OPEN_FLAG (port);
853
854 if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH)
855 scm_weak_set_remove_x (scm_i_port_weak_set, port);
856
857 if (SCM_PORT_DESCRIPTOR (port)->close)
858 /* Note! This may throw an exception. Anything after this point
859 should be resilient to non-local exits. */
860 rv = SCM_PORT_DESCRIPTOR (port)->close (port);
861 else
862 rv = 0;
863
864 if (pti->iconv_descriptors)
865 {
866 /* If we don't get here, the iconv_descriptors finalizer will
867 clean up. */
868 close_iconv_descriptors (pti->iconv_descriptors);
869 pti->iconv_descriptors = NULL;
870 }
871
872 return scm_from_bool (rv >= 0);
873 }
874 #undef FUNC_NAME
875
876 SCM_DEFINE (scm_close_input_port, "close-input-port", 1, 0, 0,
877 (SCM port),
878 "Close the specified input port object. The routine has no effect if\n"
879 "the file has already been closed. An exception may be raised if an\n"
880 "error occurs. The value returned is unspecified.\n\n"
881 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
882 "which can close file descriptors.")
883 #define FUNC_NAME s_scm_close_input_port
884 {
885 SCM_VALIDATE_INPUT_PORT (1, port);
886 scm_close_port (port);
887 return SCM_UNSPECIFIED;
888 }
889 #undef FUNC_NAME
890
891 SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0,
892 (SCM port),
893 "Close the specified output port object. The routine has no effect if\n"
894 "the file has already been closed. An exception may be raised if an\n"
895 "error occurs. The value returned is unspecified.\n\n"
896 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
897 "which can close file descriptors.")
898 #define FUNC_NAME s_scm_close_output_port
899 {
900 port = SCM_COERCE_OUTPORT (port);
901 SCM_VALIDATE_OUTPUT_PORT (1, port);
902 scm_close_port (port);
903 return SCM_UNSPECIFIED;
904 }
905 #undef FUNC_NAME
906
907
908 \f
909
910 /* Encoding characters to byte streams, and decoding byte streams to
911 characters. */
912
913 /* A fluid specifying the default encoding for newly created ports. If it is
914 a string, that is the encoding. If it is #f, it is in the "native"
915 (Latin-1) encoding. */
916 SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
917
918 static int scm_port_encoding_init = 0;
919
920 /* Use ENCODING as the default encoding for future ports. */
921 void
922 scm_i_set_default_port_encoding (const char *encoding)
923 {
924 if (!scm_port_encoding_init
925 || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
926 scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
927 SCM_EOL);
928
929 if (encoding_matches (encoding, "ISO-8859-1"))
930 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
931 else
932 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
933 scm_from_latin1_string (canonicalize_encoding (encoding)));
934 }
935
936 /* Return the name of the default encoding for newly created ports. */
937 const char *
938 scm_i_default_port_encoding (void)
939 {
940 if (!scm_port_encoding_init)
941 return "ISO-8859-1";
942 else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
943 return "ISO-8859-1";
944 else
945 {
946 SCM encoding;
947
948 encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
949 if (!scm_is_string (encoding))
950 return "ISO-8859-1";
951 else
952 return scm_i_string_chars (encoding);
953 }
954 }
955
956 /* A fluid specifying the default conversion handler for newly created
957 ports. Its value should be one of the symbols below. */
958 SCM_VARIABLE (default_conversion_strategy_var,
959 "%default-port-conversion-strategy");
960
961 /* Whether the above fluid is initialized. */
962 static int scm_conversion_strategy_init = 0;
963
964 /* The possible conversion strategies. */
965 SCM_SYMBOL (sym_error, "error");
966 SCM_SYMBOL (sym_substitute, "substitute");
967 SCM_SYMBOL (sym_escape, "escape");
968
969 /* Return the default failed encoding conversion policy for new created
970 ports. */
971 scm_t_string_failed_conversion_handler
972 scm_i_default_port_conversion_handler (void)
973 {
974 scm_t_string_failed_conversion_handler handler;
975
976 if (!scm_conversion_strategy_init
977 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
978 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
979 else
980 {
981 SCM fluid, value;
982
983 fluid = SCM_VARIABLE_REF (default_conversion_strategy_var);
984 value = scm_fluid_ref (fluid);
985
986 if (scm_is_eq (sym_substitute, value))
987 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
988 else if (scm_is_eq (sym_escape, value))
989 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
990 else
991 /* Default to 'error also when the fluid's value is not one of
992 the valid symbols. */
993 handler = SCM_FAILED_CONVERSION_ERROR;
994 }
995
996 return handler;
997 }
998
999 /* Use HANDLER as the default conversion strategy for future ports. */
1000 void
1001 scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler
1002 handler)
1003 {
1004 SCM strategy;
1005
1006 if (!scm_conversion_strategy_init
1007 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
1008 scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
1009 SCM_EOL);
1010
1011 switch (handler)
1012 {
1013 case SCM_FAILED_CONVERSION_ERROR:
1014 strategy = sym_error;
1015 break;
1016
1017 case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
1018 strategy = sym_escape;
1019 break;
1020
1021 case SCM_FAILED_CONVERSION_QUESTION_MARK:
1022 strategy = sym_substitute;
1023 break;
1024
1025 default:
1026 abort ();
1027 }
1028
1029 scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var),
1030 strategy);
1031 }
1032
1033 static void
1034 scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port);
1035
1036 /* If the next LEN bytes from PORT are equal to those in BYTES, then
1037 return 1, else return 0. Leave the port position unchanged. */
1038 static int
1039 looking_at_bytes (SCM port, const unsigned char *bytes, int len)
1040 {
1041 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1042 int i = 0;
1043
1044 while (i < len && scm_peek_byte_or_eof_unlocked (port) == bytes[i])
1045 {
1046 pt->read_pos++;
1047 i++;
1048 }
1049 scm_i_unget_bytes_unlocked (bytes, i, port);
1050 return (i == len);
1051 }
1052
1053 static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF};
1054 static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
1055 static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
1056 static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
1057 static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
1058
1059 /* Decide what byte order to use for a UTF-16 port. Return "UTF-16BE"
1060 or "UTF-16LE". MODE must be either SCM_PORT_READ or SCM_PORT_WRITE,
1061 and specifies which operation is about to be done. The MODE
1062 determines how we will decide the byte order. We deliberately avoid
1063 reading from the port unless the user is about to do so. If the user
1064 is about to read, then we look for a BOM, and if present, we use it
1065 to determine the byte order. Otherwise we choose big endian, as
1066 recommended by the Unicode Standard. Note that the BOM (if any) is
1067 not consumed here. */
1068 static const char *
1069 decide_utf16_encoding (SCM port, scm_t_port_rw_active mode)
1070 {
1071 if (mode == SCM_PORT_READ
1072 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1073 && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom))
1074 return "UTF-16LE";
1075 else
1076 return "UTF-16BE";
1077 }
1078
1079 /* Decide what byte order to use for a UTF-32 port. Return "UTF-32BE"
1080 or "UTF-32LE". See the comment above 'decide_utf16_encoding' for
1081 details. */
1082 static const char *
1083 decide_utf32_encoding (SCM port, scm_t_port_rw_active mode)
1084 {
1085 if (mode == SCM_PORT_READ
1086 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1087 && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom))
1088 return "UTF-32LE";
1089 else
1090 return "UTF-32BE";
1091 }
1092
1093 static void
1094 finalize_iconv_descriptors (void *ptr, void *data)
1095 {
1096 close_iconv_descriptors (ptr);
1097 }
1098
1099 static scm_t_iconv_descriptors *
1100 open_iconv_descriptors (const char *encoding, int reading, int writing)
1101 {
1102 scm_t_iconv_descriptors *id;
1103 iconv_t input_cd, output_cd;
1104 size_t i;
1105
1106 input_cd = (iconv_t) -1;
1107 output_cd = (iconv_t) -1;
1108
1109 for (i = 0; encoding[i]; i++)
1110 if (encoding[i] > 127)
1111 goto invalid_encoding;
1112
1113 if (reading)
1114 {
1115 /* Open an input iconv conversion descriptor, from ENCODING
1116 to UTF-8. We choose UTF-8, not UTF-32, because iconv
1117 implementations can typically convert from anything to
1118 UTF-8, but not to UTF-32 (see
1119 <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
1120
1121 /* Assume opening an iconv descriptor causes about 16 KB of
1122 allocation. */
1123 scm_gc_register_allocation (16 * 1024);
1124
1125 input_cd = iconv_open ("UTF-8", encoding);
1126 if (input_cd == (iconv_t) -1)
1127 goto invalid_encoding;
1128 }
1129
1130 if (writing)
1131 {
1132 /* Assume opening an iconv descriptor causes about 16 KB of
1133 allocation. */
1134 scm_gc_register_allocation (16 * 1024);
1135
1136 output_cd = iconv_open (encoding, "UTF-8");
1137 if (output_cd == (iconv_t) -1)
1138 {
1139 if (input_cd != (iconv_t) -1)
1140 iconv_close (input_cd);
1141 goto invalid_encoding;
1142 }
1143 }
1144
1145 id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
1146 id->input_cd = input_cd;
1147 id->output_cd = output_cd;
1148
1149 /* Register a finalizer to close the descriptors. */
1150 scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
1151
1152 return id;
1153
1154 invalid_encoding:
1155 {
1156 SCM err;
1157 err = scm_from_latin1_string (encoding);
1158 scm_misc_error ("open_iconv_descriptors",
1159 "invalid or unknown character encoding ~s",
1160 scm_list_1 (err));
1161 }
1162 }
1163
1164 static void
1165 close_iconv_descriptors (scm_t_iconv_descriptors *id)
1166 {
1167 if (id->input_cd != (iconv_t) -1)
1168 iconv_close (id->input_cd);
1169 if (id->output_cd != (iconv_t) -1)
1170 iconv_close (id->output_cd);
1171 id->input_cd = (void *) -1;
1172 id->output_cd = (void *) -1;
1173 }
1174
1175 scm_t_iconv_descriptors *
1176 scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode)
1177 {
1178 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
1179
1180 assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
1181
1182 if (!pti->iconv_descriptors)
1183 {
1184 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1185 const char *precise_encoding;
1186
1187 if (!pt->encoding)
1188 pt->encoding = "ISO-8859-1";
1189
1190 /* If the specified encoding is UTF-16 or UTF-32, then make
1191 that more precise by deciding what byte order to use. */
1192 if (strcmp (pt->encoding, "UTF-16") == 0)
1193 precise_encoding = decide_utf16_encoding (port, mode);
1194 else if (strcmp (pt->encoding, "UTF-32") == 0)
1195 precise_encoding = decide_utf32_encoding (port, mode);
1196 else
1197 precise_encoding = pt->encoding;
1198
1199 pti->iconv_descriptors =
1200 open_iconv_descriptors (precise_encoding,
1201 SCM_INPUT_PORT_P (port),
1202 SCM_OUTPUT_PORT_P (port));
1203 }
1204
1205 return pti->iconv_descriptors;
1206 }
1207
1208 /* The name of the encoding is itself encoded in ASCII. */
1209 void
1210 scm_i_set_port_encoding_x (SCM port, const char *encoding)
1211 {
1212 scm_t_port *pt;
1213 scm_t_port_internal *pti;
1214 scm_t_iconv_descriptors *prev;
1215
1216 /* Set the character encoding for this port. */
1217 pt = SCM_PTAB_ENTRY (port);
1218 pti = SCM_PORT_GET_INTERNAL (port);
1219 prev = pti->iconv_descriptors;
1220
1221 /* In order to handle cases where the encoding changes mid-stream
1222 (e.g. within an HTTP stream, or within a file that is composed of
1223 segments with different encodings), we consider this to be "stream
1224 start" for purposes of BOM handling, regardless of our actual file
1225 position. */
1226 pti->at_stream_start_for_bom_read = 1;
1227 pti->at_stream_start_for_bom_write = 1;
1228
1229 if (encoding_matches (encoding, "UTF-8"))
1230 {
1231 pt->encoding = "UTF-8";
1232 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
1233 }
1234 else if (encoding_matches (encoding, "ISO-8859-1"))
1235 {
1236 pt->encoding = "ISO-8859-1";
1237 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
1238 }
1239 else
1240 {
1241 pt->encoding = canonicalize_encoding (encoding);
1242 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
1243 }
1244
1245 pti->iconv_descriptors = NULL;
1246 if (prev)
1247 close_iconv_descriptors (prev);
1248 }
1249
1250 SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
1251 (SCM port),
1252 "Returns, as a string, the character encoding that @var{port}\n"
1253 "uses to interpret its input and output.\n")
1254 #define FUNC_NAME s_scm_port_encoding
1255 {
1256 SCM_VALIDATE_PORT (1, port);
1257
1258 return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding);
1259 }
1260 #undef FUNC_NAME
1261
1262 SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
1263 (SCM port, SCM enc),
1264 "Sets the character encoding that will be used to interpret all\n"
1265 "port I/O. New ports are created with the encoding\n"
1266 "appropriate for the current locale if @code{setlocale} has \n"
1267 "been called or ISO-8859-1 otherwise\n"
1268 "and this procedure can be used to modify that encoding.\n")
1269 #define FUNC_NAME s_scm_set_port_encoding_x
1270 {
1271 char *enc_str;
1272
1273 SCM_VALIDATE_PORT (1, port);
1274 SCM_VALIDATE_STRING (2, enc);
1275
1276 enc_str = scm_to_latin1_string (enc);
1277 scm_i_set_port_encoding_x (port, enc_str);
1278 free (enc_str);
1279
1280 return SCM_UNSPECIFIED;
1281 }
1282 #undef FUNC_NAME
1283
1284 SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
1285 1, 0, 0, (SCM port),
1286 "Returns the behavior of the port when handling a character that\n"
1287 "is not representable in the port's current encoding.\n"
1288 "It returns the symbol @code{error} if unrepresentable characters\n"
1289 "should cause exceptions, @code{substitute} if the port should\n"
1290 "try to replace unrepresentable characters with question marks or\n"
1291 "approximate characters, or @code{escape} if unrepresentable\n"
1292 "characters should be converted to string escapes.\n"
1293 "\n"
1294 "If @var{port} is @code{#f}, then the current default behavior\n"
1295 "will be returned. New ports will have this default behavior\n"
1296 "when they are created.\n")
1297 #define FUNC_NAME s_scm_port_conversion_strategy
1298 {
1299 scm_t_string_failed_conversion_handler h;
1300
1301 if (scm_is_false (port))
1302 h = scm_i_default_port_conversion_handler ();
1303 else
1304 {
1305 scm_t_port *pt;
1306
1307 SCM_VALIDATE_OPPORT (1, port);
1308 pt = SCM_PTAB_ENTRY (port);
1309
1310 h = pt->ilseq_handler;
1311 }
1312
1313 if (h == SCM_FAILED_CONVERSION_ERROR)
1314 return scm_from_latin1_symbol ("error");
1315 else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
1316 return scm_from_latin1_symbol ("substitute");
1317 else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1318 return scm_from_latin1_symbol ("escape");
1319 else
1320 abort ();
1321
1322 /* Never gets here. */
1323 return SCM_UNDEFINED;
1324 }
1325 #undef FUNC_NAME
1326
1327 SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
1328 2, 0, 0,
1329 (SCM port, SCM sym),
1330 "Sets the behavior of the interpreter when outputting a character\n"
1331 "that is not representable in the port's current encoding.\n"
1332 "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
1333 "@code{'escape}. If it is @code{'error}, an error will be thrown\n"
1334 "when an unconvertible character is encountered. If it is\n"
1335 "@code{'substitute}, then unconvertible characters will \n"
1336 "be replaced with approximate characters, or with question marks\n"
1337 "if no approximately correct character is available.\n"
1338 "If it is @code{'escape},\n"
1339 "it will appear as a hex escape when output.\n"
1340 "\n"
1341 "If @var{port} is an open port, the conversion error behavior\n"
1342 "is set for that port. If it is @code{#f}, it is set as the\n"
1343 "default behavior for any future ports that get created in\n"
1344 "this thread.\n")
1345 #define FUNC_NAME s_scm_set_port_conversion_strategy_x
1346 {
1347 scm_t_string_failed_conversion_handler handler;
1348
1349 if (scm_is_eq (sym, sym_error))
1350 handler = SCM_FAILED_CONVERSION_ERROR;
1351 else if (scm_is_eq (sym, sym_substitute))
1352 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
1353 else if (scm_is_eq (sym, sym_escape))
1354 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
1355 else
1356 SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
1357
1358 if (scm_is_false (port))
1359 scm_i_set_default_port_conversion_handler (handler);
1360 else
1361 {
1362 SCM_VALIDATE_OPPORT (1, port);
1363 SCM_PTAB_ENTRY (port)->ilseq_handler = handler;
1364 }
1365
1366 return SCM_UNSPECIFIED;
1367 }
1368 #undef FUNC_NAME
1369
1370
1371 \f
1372
1373 /* The port lock. */
1374
1375 static void
1376 lock_port (void *mutex)
1377 {
1378 scm_i_pthread_mutex_lock ((scm_i_pthread_mutex_t *) mutex);
1379 }
1380
1381 static void
1382 unlock_port (void *mutex)
1383 {
1384 scm_i_pthread_mutex_unlock ((scm_i_pthread_mutex_t *) mutex);
1385 }
1386
1387 void
1388 scm_dynwind_lock_port (SCM port)
1389 #define FUNC_NAME "dynwind-lock-port"
1390 {
1391 scm_i_pthread_mutex_t *lock;
1392 SCM_VALIDATE_OPPORT (SCM_ARG1, port);
1393 scm_c_lock_port (port, &lock);
1394 if (lock)
1395 {
1396 scm_dynwind_unwind_handler (unlock_port, lock, SCM_F_WIND_EXPLICITLY);
1397 scm_dynwind_rewind_handler (lock_port, lock, 0);
1398 }
1399 }
1400 #undef FUNC_NAME
1401
1402
1403 \f
1404
1405 /* Input. */
1406
1407 int
1408 scm_get_byte_or_eof (SCM port)
1409 {
1410 scm_i_pthread_mutex_t *lock;
1411 int ret;
1412
1413 scm_c_lock_port (port, &lock);
1414 ret = scm_get_byte_or_eof_unlocked (port);
1415 if (lock)
1416 scm_i_pthread_mutex_unlock (lock);
1417
1418 return ret;
1419 }
1420
1421 int
1422 scm_peek_byte_or_eof (SCM port)
1423 {
1424 scm_i_pthread_mutex_t *lock;
1425 int ret;
1426
1427 scm_c_lock_port (port, &lock);
1428 ret = scm_peek_byte_or_eof_unlocked (port);
1429 if (lock)
1430 scm_i_pthread_mutex_unlock (lock);
1431
1432 return ret;
1433 }
1434
1435 /* scm_c_read
1436 *
1437 * Used by an application to read arbitrary number of bytes from an
1438 * SCM port. Same semantics as libc read, except that scm_c_read only
1439 * returns less than SIZE bytes if at end-of-file.
1440 *
1441 * Warning: Doesn't update port line and column counts! */
1442
1443 /* This structure, and the following swap_buffer function, are used
1444 for temporarily swapping a port's own read buffer, and the buffer
1445 that the caller of scm_c_read provides. */
1446 struct port_and_swap_buffer
1447 {
1448 scm_t_port *pt;
1449 unsigned char *buffer;
1450 size_t size;
1451 };
1452
1453 static void
1454 swap_buffer (void *data)
1455 {
1456 struct port_and_swap_buffer *psb = (struct port_and_swap_buffer *) data;
1457 unsigned char *old_buf = psb->pt->read_buf;
1458 size_t old_size = psb->pt->read_buf_size;
1459
1460 /* Make the port use (buffer, size) from the struct. */
1461 psb->pt->read_pos = psb->pt->read_buf = psb->pt->read_end = psb->buffer;
1462 psb->pt->read_buf_size = psb->size;
1463
1464 /* Save the port's old (buffer, size) in the struct. */
1465 psb->buffer = old_buf;
1466 psb->size = old_size;
1467 }
1468
1469 static int scm_i_fill_input_unlocked (SCM port);
1470
1471 size_t
1472 scm_c_read_unlocked (SCM port, void *buffer, size_t size)
1473 #define FUNC_NAME "scm_c_read"
1474 {
1475 scm_t_port *pt;
1476 scm_t_port_internal *pti;
1477 size_t n_read = 0, n_available;
1478 struct port_and_swap_buffer psb;
1479
1480 SCM_VALIDATE_OPINPORT (1, port);
1481
1482 pt = SCM_PTAB_ENTRY (port);
1483 pti = SCM_PORT_GET_INTERNAL (port);
1484 if (pt->rw_active == SCM_PORT_WRITE)
1485 SCM_PORT_DESCRIPTOR (port)->flush (port);
1486
1487 if (pt->rw_random)
1488 pt->rw_active = SCM_PORT_READ;
1489
1490 /* Take bytes first from the port's read buffer. */
1491 if (pt->read_pos < pt->read_end)
1492 {
1493 n_available = min (size, pt->read_end - pt->read_pos);
1494 memcpy (buffer, pt->read_pos, n_available);
1495 buffer = (char *) buffer + n_available;
1496 pt->read_pos += n_available;
1497 n_read += n_available;
1498 size -= n_available;
1499 }
1500
1501 /* Avoid the scm_dynwind_* costs if we now have enough data. */
1502 if (size == 0)
1503 return n_read;
1504
1505 /* Now we will call scm_i_fill_input_unlocked repeatedly until we have
1506 read the requested number of bytes. (Note that a single
1507 scm_i_fill_input_unlocked call does not guarantee to fill the whole
1508 of the port's read buffer.) */
1509 if (pt->read_buf_size <= 1
1510 && pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
1511 {
1512 /* The port that we are reading from is unbuffered - i.e. does not
1513 have its own persistent buffer - but we have a buffer, provided
1514 by our caller, that is the right size for the data that is
1515 wanted. For the following scm_i_fill_input_unlocked calls,
1516 therefore, we use the buffer in hand as the port's read buffer.
1517
1518 We need to make sure that the port's normal (1 byte) buffer is
1519 reinstated in case one of the scm_i_fill_input_unlocked ()
1520 calls throws an exception; we use the scm_dynwind_* API to
1521 achieve that.
1522
1523 A consequence of this optimization is that the fill_input
1524 functions can't unget characters. That'll push data to the
1525 pushback buffer instead of this psb buffer. */
1526 #if SCM_DEBUG == 1
1527 unsigned char *pback = pt->putback_buf;
1528 #endif
1529 psb.pt = pt;
1530 psb.buffer = buffer;
1531 psb.size = size;
1532 scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
1533 scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1534 scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1535
1536 /* Call scm_i_fill_input_unlocked until we have all the bytes that
1537 we need, or we hit EOF. */
1538 while (pt->read_buf_size && (scm_i_fill_input_unlocked (port) != EOF))
1539 {
1540 pt->read_buf_size -= (pt->read_end - pt->read_pos);
1541 pt->read_pos = pt->read_buf = pt->read_end;
1542 }
1543 #if SCM_DEBUG == 1
1544 if (pback != pt->putback_buf
1545 || pt->read_buf - (unsigned char *) buffer < 0)
1546 scm_misc_error (FUNC_NAME,
1547 "scm_c_read must not call a fill function that pushes "
1548 "back characters onto an unbuffered port", SCM_EOL);
1549 #endif
1550 n_read += pt->read_buf - (unsigned char *) buffer;
1551
1552 /* Reinstate the port's normal buffer. */
1553 scm_dynwind_end ();
1554 }
1555 else
1556 {
1557 /* The port has its own buffer. It is important that we use it,
1558 even if it happens to be smaller than our caller's buffer, so
1559 that a custom port implementation's entry points (in
1560 particular, fill_input) can rely on the buffer always being
1561 the same as they first set up. */
1562 while (size && (scm_i_fill_input_unlocked (port) != EOF))
1563 {
1564 n_available = min (size, pt->read_end - pt->read_pos);
1565 memcpy (buffer, pt->read_pos, n_available);
1566 buffer = (char *) buffer + n_available;
1567 pt->read_pos += n_available;
1568 n_read += n_available;
1569 size -= n_available;
1570 }
1571 }
1572
1573 return n_read;
1574 }
1575 #undef FUNC_NAME
1576
1577 size_t
1578 scm_c_read (SCM port, void *buffer, size_t size)
1579 {
1580 scm_i_pthread_mutex_t *lock;
1581 size_t ret;
1582
1583 scm_c_lock_port (port, &lock);
1584 ret = scm_c_read_unlocked (port, buffer, size);
1585 if (lock)
1586 scm_i_pthread_mutex_unlock (lock);
1587
1588
1589 return ret;
1590 }
1591
1592 /* Update the line and column number of PORT after consumption of C. */
1593 static inline void
1594 update_port_lf (scm_t_wchar c, SCM port)
1595 {
1596 switch (c)
1597 {
1598 case '\a':
1599 case EOF:
1600 break;
1601 case '\b':
1602 SCM_DECCOL (port);
1603 break;
1604 case '\n':
1605 SCM_INCLINE (port);
1606 break;
1607 case '\r':
1608 SCM_ZEROCOL (port);
1609 break;
1610 case '\t':
1611 SCM_TABCOL (port);
1612 break;
1613 default:
1614 SCM_INCCOL (port);
1615 break;
1616 }
1617 }
1618
1619 #define SCM_MBCHAR_BUF_SIZE (4)
1620
1621 /* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
1622 UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
1623 static scm_t_wchar
1624 utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
1625 {
1626 scm_t_wchar codepoint;
1627
1628 if (utf8_buf[0] <= 0x7f)
1629 {
1630 assert (size == 1);
1631 codepoint = utf8_buf[0];
1632 }
1633 else if ((utf8_buf[0] & 0xe0) == 0xc0)
1634 {
1635 assert (size == 2);
1636 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
1637 | (utf8_buf[1] & 0x3f);
1638 }
1639 else if ((utf8_buf[0] & 0xf0) == 0xe0)
1640 {
1641 assert (size == 3);
1642 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
1643 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
1644 | (utf8_buf[2] & 0x3f);
1645 }
1646 else
1647 {
1648 assert (size == 4);
1649 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
1650 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
1651 | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
1652 | (utf8_buf[3] & 0x3f);
1653 }
1654
1655 return codepoint;
1656 }
1657
1658 /* Read a UTF-8 sequence from PORT. On success, return 0 and set
1659 *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
1660 representation, and set *LEN to the length in bytes. Return
1661 `EILSEQ' on error. */
1662 static int
1663 get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
1664 scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1665 {
1666 #define ASSERT_NOT_EOF(b) \
1667 if (SCM_UNLIKELY ((b) == EOF)) \
1668 goto invalid_seq
1669 #define CONSUME_PEEKED_BYTE() \
1670 pt->read_pos++
1671
1672 int byte;
1673 scm_t_port *pt;
1674
1675 *len = 0;
1676 pt = SCM_PTAB_ENTRY (port);
1677
1678 byte = scm_get_byte_or_eof_unlocked (port);
1679 if (byte == EOF)
1680 {
1681 *codepoint = EOF;
1682 return 0;
1683 }
1684
1685 buf[0] = (scm_t_uint8) byte;
1686 *len = 1;
1687
1688 if (buf[0] <= 0x7f)
1689 /* 1-byte form. */
1690 *codepoint = buf[0];
1691 else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
1692 {
1693 /* 2-byte form. */
1694 byte = scm_peek_byte_or_eof_unlocked (port);
1695 ASSERT_NOT_EOF (byte);
1696
1697 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1698 goto invalid_seq;
1699
1700 CONSUME_PEEKED_BYTE ();
1701 buf[1] = (scm_t_uint8) byte;
1702 *len = 2;
1703
1704 *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
1705 | (buf[1] & 0x3f);
1706 }
1707 else if ((buf[0] & 0xf0) == 0xe0)
1708 {
1709 /* 3-byte form. */
1710 byte = scm_peek_byte_or_eof_unlocked (port);
1711 ASSERT_NOT_EOF (byte);
1712
1713 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
1714 || (buf[0] == 0xe0 && byte < 0xa0)
1715 || (buf[0] == 0xed && byte > 0x9f)))
1716 goto invalid_seq;
1717
1718 CONSUME_PEEKED_BYTE ();
1719 buf[1] = (scm_t_uint8) byte;
1720 *len = 2;
1721
1722 byte = scm_peek_byte_or_eof_unlocked (port);
1723 ASSERT_NOT_EOF (byte);
1724
1725 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1726 goto invalid_seq;
1727
1728 CONSUME_PEEKED_BYTE ();
1729 buf[2] = (scm_t_uint8) byte;
1730 *len = 3;
1731
1732 *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
1733 | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
1734 | (buf[2] & 0x3f);
1735 }
1736 else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
1737 {
1738 /* 4-byte form. */
1739 byte = scm_peek_byte_or_eof_unlocked (port);
1740 ASSERT_NOT_EOF (byte);
1741
1742 if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
1743 || (buf[0] == 0xf0 && byte < 0x90)
1744 || (buf[0] == 0xf4 && byte > 0x8f)))
1745 goto invalid_seq;
1746
1747 CONSUME_PEEKED_BYTE ();
1748 buf[1] = (scm_t_uint8) byte;
1749 *len = 2;
1750
1751 byte = scm_peek_byte_or_eof_unlocked (port);
1752 ASSERT_NOT_EOF (byte);
1753
1754 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1755 goto invalid_seq;
1756
1757 CONSUME_PEEKED_BYTE ();
1758 buf[2] = (scm_t_uint8) byte;
1759 *len = 3;
1760
1761 byte = scm_peek_byte_or_eof_unlocked (port);
1762 ASSERT_NOT_EOF (byte);
1763
1764 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1765 goto invalid_seq;
1766
1767 CONSUME_PEEKED_BYTE ();
1768 buf[3] = (scm_t_uint8) byte;
1769 *len = 4;
1770
1771 *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
1772 | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
1773 | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
1774 | (buf[3] & 0x3f);
1775 }
1776 else
1777 goto invalid_seq;
1778
1779 return 0;
1780
1781 invalid_seq:
1782 /* Here we could choose the consume the faulty byte when it's not a
1783 valid starting byte, but it's not a requirement. What Section 3.9
1784 of Unicode 6.0.0 mandates, though, is to not consume a byte that
1785 would otherwise be a valid starting byte. */
1786
1787 return EILSEQ;
1788
1789 #undef CONSUME_PEEKED_BYTE
1790 #undef ASSERT_NOT_EOF
1791 }
1792
1793 /* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
1794 0 and set *CODEPOINT to the codepoint that was read, fill BUF with
1795 its UTF-8 representation, and set *LEN to the length in bytes.
1796 Return `EILSEQ' on error. */
1797 static int
1798 get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
1799 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1800 {
1801 *codepoint = scm_get_byte_or_eof_unlocked (port);
1802
1803 if (*codepoint == EOF)
1804 *len = 0;
1805 else
1806 {
1807 *len = 1;
1808 buf[0] = *codepoint;
1809 }
1810 return 0;
1811 }
1812
1813 /* Likewise, read a byte sequence from PORT, passing it through its
1814 input conversion descriptor. */
1815 static int
1816 get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
1817 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1818 {
1819 scm_t_iconv_descriptors *id;
1820 scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
1821 size_t input_size = 0;
1822
1823 id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ);
1824
1825 for (;;)
1826 {
1827 int byte_read;
1828 char *input, *output;
1829 size_t input_left, output_left, done;
1830
1831 byte_read = scm_get_byte_or_eof_unlocked (port);
1832 if (SCM_UNLIKELY (byte_read == EOF))
1833 {
1834 if (SCM_LIKELY (input_size == 0))
1835 {
1836 *codepoint = (scm_t_wchar) EOF;
1837 *len = input_size;
1838 return 0;
1839 }
1840 else
1841 {
1842 /* EOF found in the middle of a multibyte character. */
1843 scm_i_set_pending_eof (port);
1844 return EILSEQ;
1845 }
1846 }
1847
1848 buf[input_size++] = byte_read;
1849
1850 input = buf;
1851 input_left = input_size;
1852 output = (char *) utf8_buf;
1853 output_left = sizeof (utf8_buf);
1854
1855 done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
1856
1857 if (done == (size_t) -1)
1858 {
1859 int err = errno;
1860 if (SCM_LIKELY (err == EINVAL))
1861 /* The input byte sequence did not form a complete
1862 character. Read another byte and try again. */
1863 continue;
1864 else
1865 return err;
1866 }
1867 else
1868 {
1869 size_t output_size = sizeof (utf8_buf) - output_left;
1870 if (SCM_LIKELY (output_size > 0))
1871 {
1872 /* iconv generated output. Convert the UTF8_BUF sequence
1873 to a Unicode code point. */
1874 *codepoint = utf8_to_codepoint (utf8_buf, output_size);
1875 *len = input_size;
1876 return 0;
1877 }
1878 else
1879 {
1880 /* iconv consumed some bytes without producing any output.
1881 Most likely this means that a Unicode byte-order mark
1882 (BOM) was consumed, which should not be included in the
1883 returned buf. Shift any remaining bytes to the beginning
1884 of buf, and continue the loop. */
1885 memmove (buf, input, input_left);
1886 input_size = input_left;
1887 continue;
1888 }
1889 }
1890 }
1891 }
1892
1893 /* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
1894 with the byte representation of the codepoint in PORT's encoding, and
1895 set *LEN to the length in bytes of that representation. Return 0 on
1896 success and an errno value on error. */
1897 static SCM_C_INLINE int
1898 get_codepoint (SCM port, scm_t_wchar *codepoint,
1899 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1900 {
1901 int err;
1902 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1903 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
1904
1905 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
1906 err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
1907 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
1908 err = get_latin1_codepoint (port, codepoint, buf, len);
1909 else
1910 err = get_iconv_codepoint (port, codepoint, buf, len);
1911
1912 if (SCM_LIKELY (err == 0))
1913 {
1914 if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
1915 {
1916 /* Record that we're no longer at stream start. */
1917 pti->at_stream_start_for_bom_read = 0;
1918 if (pt->rw_random)
1919 pti->at_stream_start_for_bom_write = 0;
1920
1921 /* If we just read a BOM in an encoding that recognizes them,
1922 then silently consume it and read another code point. */
1923 if (SCM_UNLIKELY
1924 (*codepoint == SCM_UNICODE_BOM
1925 && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
1926 || strcmp (pt->encoding, "UTF-16") == 0
1927 || strcmp (pt->encoding, "UTF-32") == 0)))
1928 return get_codepoint (port, codepoint, buf, len);
1929 }
1930 update_port_lf (*codepoint, port);
1931 }
1932 else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
1933 {
1934 *codepoint = '?';
1935 err = 0;
1936 update_port_lf (*codepoint, port);
1937 }
1938
1939 return err;
1940 }
1941
1942 /* Read a codepoint from PORT and return it. */
1943 scm_t_wchar
1944 scm_getc_unlocked (SCM port)
1945 #define FUNC_NAME "scm_getc"
1946 {
1947 int err;
1948 size_t len;
1949 scm_t_wchar codepoint;
1950 char buf[SCM_MBCHAR_BUF_SIZE];
1951
1952 err = get_codepoint (port, &codepoint, buf, &len);
1953 if (SCM_UNLIKELY (err != 0))
1954 /* At this point PORT should point past the invalid encoding, as per
1955 R6RS-lib Section 8.2.4. */
1956 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
1957
1958 return codepoint;
1959 }
1960 #undef FUNC_NAME
1961
1962 scm_t_wchar
1963 scm_getc (SCM port)
1964 {
1965 scm_i_pthread_mutex_t *lock;
1966 scm_t_wchar ret;
1967
1968 scm_c_lock_port (port, &lock);
1969 ret = scm_getc_unlocked (port);
1970 if (lock)
1971 scm_i_pthread_mutex_unlock (lock);
1972
1973
1974 return ret;
1975 }
1976
1977 SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
1978 (SCM port),
1979 "Return the next character available from @var{port}, updating\n"
1980 "@var{port} to point to the following character. If no more\n"
1981 "characters are available, the end-of-file object is returned.\n"
1982 "\n"
1983 "When @var{port}'s data cannot be decoded according to its\n"
1984 "character encoding, a @code{decoding-error} is raised and\n"
1985 "@var{port} points past the erroneous byte sequence.\n")
1986 #define FUNC_NAME s_scm_read_char
1987 {
1988 scm_t_wchar c;
1989 if (SCM_UNBNDP (port))
1990 port = scm_current_input_port ();
1991 SCM_VALIDATE_OPINPORT (1, port);
1992 c = scm_getc_unlocked (port);
1993 if (EOF == c)
1994 return SCM_EOF_VAL;
1995 return SCM_MAKE_CHAR (c);
1996 }
1997 #undef FUNC_NAME
1998
1999
2000 \f
2001
2002 /* Pushback. */
2003 \f
2004
2005
2006 static void
2007 scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
2008 #define FUNC_NAME "scm_unget_bytes"
2009 {
2010 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2011 size_t old_len, new_len;
2012
2013 scm_i_clear_pending_eof (port);
2014
2015 if (pt->read_buf != pt->putback_buf)
2016 /* switch to the put-back buffer. */
2017 {
2018 if (pt->putback_buf == NULL)
2019 {
2020 pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
2021 ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
2022 pt->putback_buf
2023 = (unsigned char *) scm_gc_malloc_pointerless
2024 (pt->putback_buf_size, "putback buffer");
2025 }
2026
2027 pt->saved_read_buf = pt->read_buf;
2028 pt->saved_read_pos = pt->read_pos;
2029 pt->saved_read_end = pt->read_end;
2030 pt->saved_read_buf_size = pt->read_buf_size;
2031
2032 /* Put read_pos at the end of the buffer, so that ungets will not
2033 have to shift the buffer contents each time. */
2034 pt->read_buf = pt->putback_buf;
2035 pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
2036 pt->read_buf_size = pt->putback_buf_size;
2037 }
2038
2039 old_len = pt->read_end - pt->read_pos;
2040 new_len = old_len + len;
2041
2042 if (new_len > pt->read_buf_size)
2043 /* The putback buffer needs to be enlarged. */
2044 {
2045 size_t new_buf_size;
2046 unsigned char *new_buf, *new_end, *new_pos;
2047
2048 new_buf_size = pt->read_buf_size * 2;
2049 if (new_buf_size < new_len)
2050 new_buf_size = new_len;
2051
2052 new_buf = (unsigned char *)
2053 scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
2054
2055 /* Put the bytes at the end of the buffer, so that future
2056 ungets won't need to shift the buffer. */
2057 new_end = new_buf + new_buf_size;
2058 new_pos = new_end - old_len;
2059 memcpy (new_pos, pt->read_pos, old_len);
2060
2061 pt->read_buf = pt->putback_buf = new_buf;
2062 pt->read_pos = new_pos;
2063 pt->read_end = new_end;
2064 pt->read_buf_size = pt->putback_buf_size = new_buf_size;
2065 }
2066 else if (pt->read_buf + len < pt->read_pos)
2067 /* If needed, shift the existing buffer contents up.
2068 This should not happen unless some external code
2069 manipulates the putback buffer pointers. */
2070 {
2071 unsigned char *new_end = pt->read_buf + pt->read_buf_size;
2072 unsigned char *new_pos = new_end - old_len;
2073
2074 memmove (new_pos, pt->read_pos, old_len);
2075 pt->read_pos = new_pos;
2076 pt->read_end = new_end;
2077 }
2078
2079 /* Move read_pos back and copy the bytes there. */
2080 pt->read_pos -= len;
2081 memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
2082
2083 if (pt->rw_active == SCM_PORT_WRITE)
2084 scm_flush (port);
2085
2086 if (pt->rw_random)
2087 pt->rw_active = SCM_PORT_READ;
2088 }
2089 #undef FUNC_NAME
2090
2091 void
2092 scm_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
2093 {
2094 scm_i_unget_bytes_unlocked (buf, len, port);
2095 }
2096
2097 void
2098 scm_unget_byte_unlocked (int c, SCM port)
2099 {
2100 unsigned char byte = c;
2101 scm_i_unget_bytes_unlocked (&byte, 1, port);
2102 }
2103
2104 void
2105 scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
2106 {
2107 scm_i_pthread_mutex_t *lock;
2108 scm_c_lock_port (port, &lock);
2109 scm_i_unget_bytes_unlocked (buf, len, port);
2110 if (lock)
2111 scm_i_pthread_mutex_unlock (lock);
2112 }
2113
2114 void
2115 scm_unget_byte (int c, SCM port)
2116 {
2117 unsigned char byte = c;
2118 scm_i_pthread_mutex_t *lock;
2119 scm_c_lock_port (port, &lock);
2120 scm_i_unget_bytes_unlocked (&byte, 1, port);
2121 if (lock)
2122 scm_i_pthread_mutex_unlock (lock);
2123 }
2124
2125 void
2126 scm_ungetc_unlocked (scm_t_wchar c, SCM port)
2127 #define FUNC_NAME "scm_ungetc"
2128 {
2129 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2130 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
2131 char *result;
2132 char result_buf[10];
2133 size_t len;
2134
2135 len = sizeof (result_buf);
2136
2137 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
2138 {
2139 if (c < 0xf0)
2140 {
2141 result_buf[0] = (char) c;
2142 result = result_buf;
2143 len = 1;
2144 }
2145 else
2146 result =
2147 (char *) u32_to_u8 ((uint32_t *) &c, 1, (uint8_t *) result_buf, &len);
2148 }
2149 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1 && c <= 0xff)
2150 {
2151 result_buf[0] = (char) c;
2152 result = result_buf;
2153 len = 1;
2154 }
2155 else
2156 result = u32_conv_to_encoding (pt->encoding,
2157 (enum iconv_ilseq_handler) pt->ilseq_handler,
2158 (uint32_t *) &c, 1, NULL,
2159 result_buf, &len);
2160
2161 if (SCM_UNLIKELY (result == NULL || len == 0))
2162 scm_encoding_error (FUNC_NAME, errno,
2163 "conversion to port encoding failed",
2164 SCM_BOOL_F, SCM_MAKE_CHAR (c));
2165
2166 scm_i_unget_bytes_unlocked ((unsigned char *) result, len, port);
2167
2168 if (SCM_UNLIKELY (result != result_buf))
2169 free (result);
2170
2171 if (c == '\n')
2172 {
2173 /* What should col be in this case?
2174 * We'll leave it at -1.
2175 */
2176 SCM_LINUM (port) -= 1;
2177 }
2178 else
2179 SCM_COL(port) -= 1;
2180 }
2181 #undef FUNC_NAME
2182
2183 void
2184 scm_ungetc (scm_t_wchar c, SCM port)
2185 {
2186 scm_i_pthread_mutex_t *lock;
2187 scm_c_lock_port (port, &lock);
2188 scm_ungetc_unlocked (c, port);
2189 if (lock)
2190 scm_i_pthread_mutex_unlock (lock);
2191
2192 }
2193
2194 void
2195 scm_ungets_unlocked (const char *s, int n, SCM port)
2196 {
2197 /* This is simple minded and inefficient, but unreading strings is
2198 * probably not a common operation, and remember that line and
2199 * column numbers have to be handled...
2200 *
2201 * Please feel free to write an optimized version!
2202 */
2203 while (n--)
2204 scm_ungetc_unlocked (s[n], port);
2205 }
2206
2207 void
2208 scm_ungets (const char *s, int n, SCM port)
2209 {
2210 scm_i_pthread_mutex_t *lock;
2211 scm_c_lock_port (port, &lock);
2212 scm_ungets_unlocked (s, n, port);
2213 if (lock)
2214 scm_i_pthread_mutex_unlock (lock);
2215
2216 }
2217
2218 SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
2219 (SCM port),
2220 "Return the next character available from @var{port},\n"
2221 "@emph{without} updating @var{port} to point to the following\n"
2222 "character. If no more characters are available, the\n"
2223 "end-of-file object is returned.\n"
2224 "\n"
2225 "The value returned by\n"
2226 "a call to @code{peek-char} is the same as the value that would\n"
2227 "have been returned by a call to @code{read-char} on the same\n"
2228 "port. The only difference is that the very next call to\n"
2229 "@code{read-char} or @code{peek-char} on that @var{port} will\n"
2230 "return the value returned by the preceding call to\n"
2231 "@code{peek-char}. In particular, a call to @code{peek-char} on\n"
2232 "an interactive port will hang waiting for input whenever a call\n"
2233 "to @code{read-char} would have hung.\n"
2234 "\n"
2235 "As for @code{read-char}, a @code{decoding-error} may be raised\n"
2236 "if such a situation occurs. However, unlike with @code{read-char},\n"
2237 "@var{port} still points at the beginning of the erroneous byte\n"
2238 "sequence when the error is raised.\n")
2239 #define FUNC_NAME s_scm_peek_char
2240 {
2241 int err;
2242 SCM result;
2243 scm_t_wchar c;
2244 char bytes[SCM_MBCHAR_BUF_SIZE];
2245 long column, line;
2246 size_t len = 0;
2247
2248 if (SCM_UNBNDP (port))
2249 port = scm_current_input_port ();
2250 SCM_VALIDATE_OPINPORT (1, port);
2251
2252 column = SCM_COL (port);
2253 line = SCM_LINUM (port);
2254
2255 err = get_codepoint (port, &c, bytes, &len);
2256
2257 scm_i_unget_bytes_unlocked ((unsigned char *) bytes, len, port);
2258
2259 SCM_COL (port) = column;
2260 SCM_LINUM (port) = line;
2261
2262 if (SCM_UNLIKELY (err != 0))
2263 {
2264 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
2265
2266 /* Shouldn't happen since `catch' always aborts to prompt. */
2267 result = SCM_BOOL_F;
2268 }
2269 else if (c == EOF)
2270 {
2271 scm_i_set_pending_eof (port);
2272 result = SCM_EOF_VAL;
2273 }
2274 else
2275 result = SCM_MAKE_CHAR (c);
2276
2277 return result;
2278 }
2279 #undef FUNC_NAME
2280
2281 SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
2282 (SCM cobj, SCM port),
2283 "Place character @var{cobj} in @var{port} so that it will be\n"
2284 "read by the next read operation. If called multiple times, the\n"
2285 "unread characters will be read again in last-in first-out\n"
2286 "order. If @var{port} is not supplied, the current input port\n"
2287 "is used.")
2288 #define FUNC_NAME s_scm_unread_char
2289 {
2290 int c;
2291
2292 SCM_VALIDATE_CHAR (1, cobj);
2293 if (SCM_UNBNDP (port))
2294 port = scm_current_input_port ();
2295 SCM_VALIDATE_OPINPORT (2, port);
2296
2297 c = SCM_CHAR (cobj);
2298
2299 scm_ungetc_unlocked (c, port);
2300 return cobj;
2301 }
2302 #undef FUNC_NAME
2303
2304 SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
2305 (SCM str, SCM port),
2306 "Place the string @var{str} in @var{port} so that its characters will be\n"
2307 "read in subsequent read operations. If called multiple times, the\n"
2308 "unread characters will be read again in last-in first-out order. If\n"
2309 "@var{port} is not supplied, the current-input-port is used.")
2310 #define FUNC_NAME s_scm_unread_string
2311 {
2312 int n;
2313 SCM_VALIDATE_STRING (1, str);
2314 if (SCM_UNBNDP (port))
2315 port = scm_current_input_port ();
2316 SCM_VALIDATE_OPINPORT (2, port);
2317
2318 n = scm_i_string_length (str);
2319
2320 while (n--)
2321 scm_ungetc_unlocked (scm_i_string_ref (str, n), port);
2322
2323 return str;
2324 }
2325 #undef FUNC_NAME
2326
2327
2328 \f
2329
2330 /* Manipulating the buffers. */
2331
2332 /* This routine does not take any locks, as it is usually called as part
2333 of a port implementation. */
2334 void
2335 scm_port_non_buffer (scm_t_port *pt)
2336 {
2337 pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf;
2338 pt->write_buf = pt->write_pos = &pt->shortbuf;
2339 pt->read_buf_size = pt->write_buf_size = 1;
2340 pt->write_end = pt->write_buf + pt->write_buf_size;
2341 }
2342
2343 /* this should only be called when the read buffer is empty. it
2344 tries to refill the read buffer. it returns the first char from
2345 the port, which is either EOF or *(pt->read_pos). */
2346 static int
2347 scm_i_fill_input_unlocked (SCM port)
2348 {
2349 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2350 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
2351
2352 assert (pt->read_pos == pt->read_end);
2353
2354 if (pti->pending_eof)
2355 {
2356 pti->pending_eof = 0;
2357 return EOF;
2358 }
2359
2360 if (pt->read_buf == pt->putback_buf)
2361 {
2362 /* finished reading put-back chars. */
2363 pt->read_buf = pt->saved_read_buf;
2364 pt->read_pos = pt->saved_read_pos;
2365 pt->read_end = pt->saved_read_end;
2366 pt->read_buf_size = pt->saved_read_buf_size;
2367 if (pt->read_pos < pt->read_end)
2368 return *(pt->read_pos);
2369 }
2370 return SCM_PORT_DESCRIPTOR (port)->fill_input (port);
2371 }
2372
2373 int
2374 scm_fill_input (SCM port)
2375 {
2376 scm_i_pthread_mutex_t *lock;
2377 int ret;
2378
2379 scm_c_lock_port (port, &lock);
2380 ret = scm_fill_input_unlocked (port);
2381 if (lock)
2382 scm_i_pthread_mutex_unlock (lock);
2383
2384
2385 return ret;
2386 }
2387
2388 /* Slow-path fallback for 'scm_get_byte_or_eof_unlocked' */
2389 int
2390 scm_slow_get_byte_or_eof_unlocked (SCM port)
2391 {
2392 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2393
2394 if (pt->rw_active == SCM_PORT_WRITE)
2395 scm_flush_unlocked (port);
2396
2397 if (pt->rw_random)
2398 pt->rw_active = SCM_PORT_READ;
2399
2400 if (pt->read_pos >= pt->read_end)
2401 {
2402 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2403 return EOF;
2404 }
2405
2406 return *pt->read_pos++;
2407 }
2408
2409 /* Slow-path fallback for 'scm_peek_byte_or_eof_unlocked' */
2410 int
2411 scm_slow_peek_byte_or_eof_unlocked (SCM port)
2412 {
2413 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2414
2415 if (pt->rw_active == SCM_PORT_WRITE)
2416 scm_flush_unlocked (port);
2417
2418 if (pt->rw_random)
2419 pt->rw_active = SCM_PORT_READ;
2420
2421 if (pt->read_pos >= pt->read_end)
2422 {
2423 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2424 {
2425 scm_i_set_pending_eof (port);
2426 return EOF;
2427 }
2428 }
2429
2430 return *pt->read_pos;
2431 }
2432
2433 /* Move up to READ_LEN bytes from PORT's putback and/or read buffers
2434 into memory starting at DEST. Return the number of bytes moved.
2435 PORT's line/column numbers are left unchanged. */
2436 size_t
2437 scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
2438 {
2439 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2440 size_t bytes_read = 0;
2441 size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
2442
2443 if (from_buf > 0)
2444 {
2445 memcpy (dest, pt->read_pos, from_buf);
2446 pt->read_pos += from_buf;
2447 bytes_read += from_buf;
2448 read_len -= from_buf;
2449 dest += from_buf;
2450 }
2451
2452 /* if putback was active, try the real input buffer too. */
2453 if (pt->read_buf == pt->putback_buf)
2454 {
2455 from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len);
2456 if (from_buf > 0)
2457 {
2458 memcpy (dest, pt->saved_read_pos, from_buf);
2459 pt->saved_read_pos += from_buf;
2460 bytes_read += from_buf;
2461 }
2462 }
2463
2464 return bytes_read;
2465 }
2466
2467 /* Clear a port's read buffers, returning the contents. */
2468 SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
2469 (SCM port),
2470 "This procedure clears a port's input buffers, similar\n"
2471 "to the way that force-output clears the output buffer. The\n"
2472 "contents of the buffers are returned as a single string, e.g.,\n"
2473 "\n"
2474 "@lisp\n"
2475 "(define p (open-input-file ...))\n"
2476 "(drain-input p) => empty string, nothing buffered yet.\n"
2477 "(unread-char (read-char p) p)\n"
2478 "(drain-input p) => initial chars from p, up to the buffer size.\n"
2479 "@end lisp\n\n"
2480 "Draining the buffers may be useful for cleanly finishing\n"
2481 "buffered I/O so that the file descriptor can be used directly\n"
2482 "for further input.")
2483 #define FUNC_NAME s_scm_drain_input
2484 {
2485 SCM result;
2486 char *data;
2487 scm_t_port *pt;
2488 long count;
2489
2490 SCM_VALIDATE_OPINPORT (1, port);
2491 pt = SCM_PTAB_ENTRY (port);
2492
2493 count = pt->read_end - pt->read_pos;
2494 if (pt->read_buf == pt->putback_buf)
2495 count += pt->saved_read_end - pt->saved_read_pos;
2496
2497 if (count)
2498 {
2499 result = scm_i_make_string (count, &data, 0);
2500 scm_take_from_input_buffers (port, data, count);
2501 }
2502 else
2503 result = scm_nullstr;
2504
2505 return result;
2506 }
2507 #undef FUNC_NAME
2508
2509 void
2510 scm_end_input_unlocked (SCM port)
2511 {
2512 long offset;
2513 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2514
2515 scm_i_clear_pending_eof (port);
2516 if (pt->read_buf == pt->putback_buf)
2517 {
2518 offset = pt->read_end - pt->read_pos;
2519 pt->read_buf = pt->saved_read_buf;
2520 pt->read_pos = pt->saved_read_pos;
2521 pt->read_end = pt->saved_read_end;
2522 pt->read_buf_size = pt->saved_read_buf_size;
2523 }
2524 else
2525 offset = 0;
2526
2527 SCM_PORT_DESCRIPTOR (port)->end_input (port, offset);
2528 }
2529
2530 void
2531 scm_end_input (SCM port)
2532 {
2533 scm_i_pthread_mutex_t *lock;
2534 scm_c_lock_port (port, &lock);
2535 scm_end_input_unlocked (port);
2536 if (lock)
2537 scm_i_pthread_mutex_unlock (lock);
2538
2539 }
2540
2541 SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
2542 (SCM port),
2543 "Flush the specified output port, or the current output port if @var{port}\n"
2544 "is omitted. The current output buffer contents are passed to the\n"
2545 "underlying port implementation (e.g., in the case of fports, the\n"
2546 "data will be written to the file and the output buffer will be cleared.)\n"
2547 "It has no effect on an unbuffered port.\n\n"
2548 "The return value is unspecified.")
2549 #define FUNC_NAME s_scm_force_output
2550 {
2551 if (SCM_UNBNDP (port))
2552 port = scm_current_output_port ();
2553 else
2554 {
2555 port = SCM_COERCE_OUTPORT (port);
2556 SCM_VALIDATE_OPOUTPORT (1, port);
2557 }
2558 scm_flush_unlocked (port);
2559 return SCM_UNSPECIFIED;
2560 }
2561 #undef FUNC_NAME
2562
2563 void
2564 scm_flush_unlocked (SCM port)
2565 {
2566 SCM_PORT_DESCRIPTOR (port)->flush (port);
2567 }
2568
2569 void
2570 scm_flush (SCM port)
2571 {
2572 scm_i_pthread_mutex_t *lock;
2573 scm_c_lock_port (port, &lock);
2574 scm_flush_unlocked (port);
2575 if (lock)
2576 scm_i_pthread_mutex_unlock (lock);
2577
2578 }
2579
2580 int
2581 scm_fill_input_unlocked (SCM port)
2582 {
2583 return scm_i_fill_input_unlocked (port);
2584 }
2585
2586
2587 \f
2588
2589 /* Output. */
2590
2591 void
2592 scm_putc (char c, SCM port)
2593 {
2594 scm_i_pthread_mutex_t *lock;
2595 scm_c_lock_port (port, &lock);
2596 scm_putc_unlocked (c, port);
2597 if (lock)
2598 scm_i_pthread_mutex_unlock (lock);
2599
2600 }
2601
2602 void
2603 scm_puts (const char *s, SCM port)
2604 {
2605 scm_i_pthread_mutex_t *lock;
2606 scm_c_lock_port (port, &lock);
2607 scm_puts_unlocked (s, port);
2608 if (lock)
2609 scm_i_pthread_mutex_unlock (lock);
2610
2611 }
2612
2613 /* scm_c_write
2614 *
2615 * Used by an application to write arbitrary number of bytes to an SCM
2616 * port. Similar semantics as libc write. However, unlike libc
2617 * write, scm_c_write writes the requested number of bytes and has no
2618 * return value.
2619 *
2620 * Warning: Doesn't update port line and column counts!
2621 */
2622 void
2623 scm_c_write_unlocked (SCM port, const void *ptr, size_t size)
2624 #define FUNC_NAME "scm_c_write"
2625 {
2626 scm_t_port *pt;
2627 scm_t_ptob_descriptor *ptob;
2628
2629 SCM_VALIDATE_OPOUTPORT (1, port);
2630
2631 pt = SCM_PTAB_ENTRY (port);
2632 ptob = SCM_PORT_DESCRIPTOR (port);
2633
2634 if (pt->rw_active == SCM_PORT_READ)
2635 scm_end_input_unlocked (port);
2636
2637 ptob->write (port, ptr, size);
2638
2639 if (pt->rw_random)
2640 pt->rw_active = SCM_PORT_WRITE;
2641 }
2642 #undef FUNC_NAME
2643
2644 void
2645 scm_c_write (SCM port, const void *ptr, size_t size)
2646 {
2647 scm_i_pthread_mutex_t *lock;
2648 scm_c_lock_port (port, &lock);
2649 scm_c_write_unlocked (port, ptr, size);
2650 if (lock)
2651 scm_i_pthread_mutex_unlock (lock);
2652
2653 }
2654
2655 /* scm_lfwrite
2656 *
2657 * This function differs from scm_c_write; it updates port line and
2658 * column. */
2659 void
2660 scm_lfwrite_unlocked (const char *ptr, size_t size, SCM port)
2661 {
2662 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2663 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2664
2665 if (pt->rw_active == SCM_PORT_READ)
2666 scm_end_input_unlocked (port);
2667
2668 ptob->write (port, ptr, size);
2669
2670 for (; size; ptr++, size--)
2671 update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
2672
2673 if (pt->rw_random)
2674 pt->rw_active = SCM_PORT_WRITE;
2675 }
2676
2677 void
2678 scm_lfwrite (const char *ptr, size_t size, SCM port)
2679 {
2680 scm_i_pthread_mutex_t *lock;
2681 scm_c_lock_port (port, &lock);
2682 scm_lfwrite_unlocked (ptr, size, port);
2683 if (lock)
2684 scm_i_pthread_mutex_unlock (lock);
2685
2686 }
2687
2688 /* Write STR to PORT from START inclusive to END exclusive. */
2689 void
2690 scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
2691 {
2692 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2693
2694 if (pt->rw_active == SCM_PORT_READ)
2695 scm_end_input_unlocked (port);
2696
2697 if (end == (size_t) -1)
2698 end = scm_i_string_length (str);
2699
2700 scm_i_display_substring (str, start, end, port);
2701
2702 if (pt->rw_random)
2703 pt->rw_active = SCM_PORT_WRITE;
2704 }
2705
2706
2707 \f
2708
2709 /* Querying and setting positions, and character availability. */
2710
2711 SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0,
2712 (SCM port),
2713 "Return @code{#t} if a character is ready on input @var{port}\n"
2714 "and return @code{#f} otherwise. If @code{char-ready?} returns\n"
2715 "@code{#t} then the next @code{read-char} operation on\n"
2716 "@var{port} is guaranteed not to hang. If @var{port} is a file\n"
2717 "port at end of file then @code{char-ready?} returns @code{#t}.\n"
2718 "\n"
2719 "@code{char-ready?} exists to make it possible for a\n"
2720 "program to accept characters from interactive ports without\n"
2721 "getting stuck waiting for input. Any input editors associated\n"
2722 "with such ports must make sure that characters whose existence\n"
2723 "has been asserted by @code{char-ready?} cannot be rubbed out.\n"
2724 "If @code{char-ready?} were to return @code{#f} at end of file,\n"
2725 "a port at end of file would be indistinguishable from an\n"
2726 "interactive port that has no ready characters.")
2727 #define FUNC_NAME s_scm_char_ready_p
2728 {
2729 scm_t_port *pt;
2730
2731 if (SCM_UNBNDP (port))
2732 port = scm_current_input_port ();
2733 /* It's possible to close the current input port, so validate even in
2734 this case. */
2735 SCM_VALIDATE_OPINPORT (1, port);
2736
2737 pt = SCM_PTAB_ENTRY (port);
2738
2739 /* if the current read buffer is filled, or the
2740 last pushed-back char has been read and the saved buffer is
2741 filled, result is true. */
2742 if (pt->read_pos < pt->read_end
2743 || (pt->read_buf == pt->putback_buf
2744 && pt->saved_read_pos < pt->saved_read_end))
2745 return SCM_BOOL_T;
2746 else
2747 {
2748 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2749
2750 if (ptob->input_waiting)
2751 return scm_from_bool(ptob->input_waiting (port));
2752 else
2753 return SCM_BOOL_T;
2754 }
2755 }
2756 #undef FUNC_NAME
2757
2758 SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
2759 (SCM fd_port, SCM offset, SCM whence),
2760 "Sets the current position of @var{fd_port} to the integer\n"
2761 "@var{offset}, which is interpreted according to the value of\n"
2762 "@var{whence}.\n"
2763 "\n"
2764 "One of the following variables should be supplied for\n"
2765 "@var{whence}:\n"
2766 "@defvar SEEK_SET\n"
2767 "Seek from the beginning of the file.\n"
2768 "@end defvar\n"
2769 "@defvar SEEK_CUR\n"
2770 "Seek from the current position.\n"
2771 "@end defvar\n"
2772 "@defvar SEEK_END\n"
2773 "Seek from the end of the file.\n"
2774 "@end defvar\n"
2775 "If @var{fd_port} is a file descriptor, the underlying system\n"
2776 "call is @code{lseek}. @var{port} may be a string port.\n"
2777 "\n"
2778 "The value returned is the new position in the file. This means\n"
2779 "that the current position of a port can be obtained using:\n"
2780 "@lisp\n"
2781 "(seek port 0 SEEK_CUR)\n"
2782 "@end lisp")
2783 #define FUNC_NAME s_scm_seek
2784 {
2785 int how;
2786
2787 fd_port = SCM_COERCE_OUTPORT (fd_port);
2788
2789 how = scm_to_int (whence);
2790 if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
2791 SCM_OUT_OF_RANGE (3, whence);
2792
2793 if (SCM_OPPORTP (fd_port))
2794 {
2795 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port);
2796 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port);
2797 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2798 off_t_or_off64_t rv;
2799
2800 if (!ptob->seek)
2801 SCM_MISC_ERROR ("port is not seekable",
2802 scm_cons (fd_port, SCM_EOL));
2803 else
2804 rv = ptob->seek (fd_port, off, how);
2805
2806 /* Set stream-start flags according to new position. */
2807 pti->at_stream_start_for_bom_read = (rv == 0);
2808 pti->at_stream_start_for_bom_write = (rv == 0);
2809
2810 scm_i_clear_pending_eof (fd_port);
2811
2812 return scm_from_off_t_or_off64_t (rv);
2813 }
2814 else /* file descriptor?. */
2815 {
2816 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2817 off_t_or_off64_t rv;
2818 rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
2819 if (rv == -1)
2820 SCM_SYSERROR;
2821 return scm_from_off_t_or_off64_t (rv);
2822 }
2823 }
2824 #undef FUNC_NAME
2825
2826 #ifndef O_BINARY
2827 #define O_BINARY 0
2828 #endif
2829
2830 /* Mingw has ftruncate(), perhaps implemented above using chsize, but
2831 doesn't have the filename version truncate(), hence this code. */
2832 #if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
2833 static int
2834 truncate (const char *file, off_t length)
2835 {
2836 int ret, fdes;
2837
2838 fdes = open (file, O_BINARY | O_WRONLY);
2839 if (fdes == -1)
2840 return -1;
2841
2842 ret = ftruncate (fdes, length);
2843 if (ret == -1)
2844 {
2845 int save_errno = errno;
2846 close (fdes);
2847 errno = save_errno;
2848 return -1;
2849 }
2850
2851 return close (fdes);
2852 }
2853 #endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
2854
2855 SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
2856 (SCM object, SCM length),
2857 "Truncate file @var{object} to @var{length} bytes. @var{object}\n"
2858 "can be a filename string, a port object, or an integer file\n"
2859 "descriptor.\n"
2860 "The return value is unspecified.\n"
2861 "\n"
2862 "For a port or file descriptor @var{length} can be omitted, in\n"
2863 "which case the file is truncated at the current position (per\n"
2864 "@code{ftell} above).\n"
2865 "\n"
2866 "On most systems a file can be extended by giving a length\n"
2867 "greater than the current size, but this is not mandatory in the\n"
2868 "POSIX standard.")
2869 #define FUNC_NAME s_scm_truncate_file
2870 {
2871 int rv;
2872
2873 /* "object" can be a port, fdes or filename.
2874
2875 Negative "length" makes no sense, but it's left to truncate() or
2876 ftruncate() to give back an error for that (normally EINVAL).
2877 */
2878
2879 if (SCM_UNBNDP (length))
2880 {
2881 /* must supply length if object is a filename. */
2882 if (scm_is_string (object))
2883 SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
2884
2885 length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
2886 }
2887
2888 object = SCM_COERCE_OUTPORT (object);
2889 if (scm_is_integer (object))
2890 {
2891 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2892 SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
2893 c_length));
2894 }
2895 else if (SCM_OPOUTPORTP (object))
2896 {
2897 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2898 scm_t_port *pt = SCM_PTAB_ENTRY (object);
2899 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object);
2900
2901 if (!ptob->truncate)
2902 SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
2903
2904 scm_i_clear_pending_eof (object);
2905 if (pt->rw_active == SCM_PORT_READ)
2906 scm_end_input_unlocked (object);
2907 else if (pt->rw_active == SCM_PORT_WRITE)
2908 ptob->flush (object);
2909
2910 ptob->truncate (object, c_length);
2911 rv = 0;
2912 }
2913 else
2914 {
2915 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2916 char *str = scm_to_locale_string (object);
2917 int eno;
2918 SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
2919 eno = errno;
2920 free (str);
2921 errno = eno;
2922 }
2923 if (rv == -1)
2924 SCM_SYSERROR;
2925 return SCM_UNSPECIFIED;
2926 }
2927 #undef FUNC_NAME
2928
2929 SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0,
2930 (SCM port),
2931 "Return the current line number for @var{port}.\n"
2932 "\n"
2933 "The first line of a file is 0. But you might want to add 1\n"
2934 "when printing line numbers, since starting from 1 is\n"
2935 "traditional in error messages, and likely to be more natural to\n"
2936 "non-programmers.")
2937 #define FUNC_NAME s_scm_port_line
2938 {
2939 port = SCM_COERCE_OUTPORT (port);
2940 SCM_VALIDATE_OPENPORT (1, port);
2941 return scm_from_long (SCM_LINUM (port));
2942 }
2943 #undef FUNC_NAME
2944
2945 SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0,
2946 (SCM port, SCM line),
2947 "Set the current line number for @var{port} to @var{line}. The\n"
2948 "first line of a file is 0.")
2949 #define FUNC_NAME s_scm_set_port_line_x
2950 {
2951 port = SCM_COERCE_OUTPORT (port);
2952 SCM_VALIDATE_OPENPORT (1, port);
2953 SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line);
2954 return SCM_UNSPECIFIED;
2955 }
2956 #undef FUNC_NAME
2957
2958 SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0,
2959 (SCM port),
2960 "Return the current column number of @var{port}.\n"
2961 "If the number is\n"
2962 "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n"
2963 "- i.e. the first character of the first line is line 0, column 0.\n"
2964 "(However, when you display a file position, for example in an error\n"
2965 "message, we recommend you add 1 to get 1-origin integers. This is\n"
2966 "because lines and column numbers traditionally start with 1, and that is\n"
2967 "what non-programmers will find most natural.)")
2968 #define FUNC_NAME s_scm_port_column
2969 {
2970 port = SCM_COERCE_OUTPORT (port);
2971 SCM_VALIDATE_OPENPORT (1, port);
2972 return scm_from_int (SCM_COL (port));
2973 }
2974 #undef FUNC_NAME
2975
2976 SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0,
2977 (SCM port, SCM column),
2978 "Set the current column of @var{port}. Before reading the first\n"
2979 "character on a line the column should be 0.")
2980 #define FUNC_NAME s_scm_set_port_column_x
2981 {
2982 port = SCM_COERCE_OUTPORT (port);
2983 SCM_VALIDATE_OPENPORT (1, port);
2984 SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column);
2985 return SCM_UNSPECIFIED;
2986 }
2987 #undef FUNC_NAME
2988
2989 SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
2990 (SCM port),
2991 "Return the filename associated with @var{port}, or @code{#f}\n"
2992 "if no filename is associated with the port.")
2993 #define FUNC_NAME s_scm_port_filename
2994 {
2995 port = SCM_COERCE_OUTPORT (port);
2996 SCM_VALIDATE_OPENPORT (1, port);
2997 return SCM_FILENAME (port);
2998 }
2999 #undef FUNC_NAME
3000
3001 SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
3002 (SCM port, SCM filename),
3003 "Change the filename associated with @var{port}, using the current input\n"
3004 "port if none is specified. Note that this does not change the port's\n"
3005 "source of data, but only the value that is returned by\n"
3006 "@code{port-filename} and reported in diagnostic output.")
3007 #define FUNC_NAME s_scm_set_port_filename_x
3008 {
3009 port = SCM_COERCE_OUTPORT (port);
3010 SCM_VALIDATE_OPENPORT (1, port);
3011 /* We allow the user to set the filename to whatever he likes. */
3012 SCM_SET_FILENAME (port, filename);
3013 return SCM_UNSPECIFIED;
3014 }
3015 #undef FUNC_NAME
3016
3017
3018 \f
3019
3020 /* Implementation helpers for port printing functions. */
3021
3022 void
3023 scm_print_port_mode (SCM exp, SCM port)
3024 {
3025 scm_puts_unlocked (SCM_CLOSEDP (exp)
3026 ? "closed: "
3027 : (SCM_RDNG & SCM_CELL_WORD_0 (exp)
3028 ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
3029 ? "input-output: "
3030 : "input: ")
3031 : (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
3032 ? "output: "
3033 : "bogus: ")),
3034 port);
3035 }
3036
3037 int
3038 scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED)
3039 {
3040 char *type = SCM_PTOBNAME (SCM_PTOBNUM (exp));
3041 if (!type)
3042 type = "port";
3043 scm_puts_unlocked ("#<", port);
3044 scm_print_port_mode (exp, port);
3045 scm_puts_unlocked (type, port);
3046 scm_putc_unlocked (' ', port);
3047 scm_uintprint (SCM_CELL_WORD_1 (exp), 16, port);
3048 scm_putc_unlocked ('>', port);
3049 return 1;
3050 }
3051
3052
3053 \f
3054
3055 /* Iterating over all ports. */
3056
3057 struct for_each_data
3058 {
3059 void (*proc) (void *data, SCM p);
3060 void *data;
3061 };
3062
3063 static SCM
3064 for_each_trampoline (void *data, SCM port, SCM result)
3065 {
3066 struct for_each_data *d = data;
3067
3068 d->proc (d->data, port);
3069
3070 return result;
3071 }
3072
3073 void
3074 scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
3075 {
3076 struct for_each_data d;
3077
3078 d.proc = proc;
3079 d.data = data;
3080
3081 scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL,
3082 scm_i_port_weak_set);
3083 }
3084
3085 static void
3086 scm_for_each_trampoline (void *data, SCM port)
3087 {
3088 scm_call_1 (SCM_PACK_POINTER (data), port);
3089 }
3090
3091 SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
3092 (SCM proc),
3093 "Apply @var{proc} to each port in the Guile port table\n"
3094 "in turn. The return value is unspecified. More specifically,\n"
3095 "@var{proc} is applied exactly once to every port that exists\n"
3096 "in the system at the time @code{port-for-each} is invoked.\n"
3097 "Changes to the port table while @code{port-for-each} is running\n"
3098 "have no effect as far as @code{port-for-each} is concerned.")
3099 #define FUNC_NAME s_scm_port_for_each
3100 {
3101 SCM_VALIDATE_PROC (1, proc);
3102
3103 scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc));
3104
3105 return SCM_UNSPECIFIED;
3106 }
3107 #undef FUNC_NAME
3108
3109 static void
3110 flush_output_port (void *closure, SCM port)
3111 {
3112 if (SCM_OPOUTPORTP (port))
3113 scm_flush_unlocked (port);
3114 }
3115
3116 SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0,
3117 (),
3118 "Equivalent to calling @code{force-output} on\n"
3119 "all open output ports. The return value is unspecified.")
3120 #define FUNC_NAME s_scm_flush_all_ports
3121 {
3122 scm_c_port_for_each (&flush_output_port, NULL);
3123 return SCM_UNSPECIFIED;
3124 }
3125 #undef FUNC_NAME
3126
3127
3128 \f
3129
3130 /* Void ports. */
3131
3132 scm_t_bits scm_tc16_void_port = 0;
3133
3134 static int fill_input_void_port (SCM port SCM_UNUSED)
3135 {
3136 return EOF;
3137 }
3138
3139 static void
3140 write_void_port (SCM port SCM_UNUSED,
3141 const void *data SCM_UNUSED,
3142 size_t size SCM_UNUSED)
3143 {
3144 }
3145
3146 static SCM
3147 scm_i_void_port (long mode_bits)
3148 {
3149 SCM ret;
3150
3151 ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0);
3152
3153 scm_port_non_buffer (SCM_PTAB_ENTRY (ret));
3154
3155 return ret;
3156 }
3157
3158 SCM
3159 scm_void_port (char *mode_str)
3160 {
3161 return scm_i_void_port (scm_mode_bits (mode_str));
3162 }
3163
3164 SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0,
3165 (SCM mode),
3166 "Create and return a new void port. A void port acts like\n"
3167 "@file{/dev/null}. The @var{mode} argument\n"
3168 "specifies the input/output modes for this port: see the\n"
3169 "documentation for @code{open-file} in @ref{File Ports}.")
3170 #define FUNC_NAME s_scm_sys_make_void_port
3171 {
3172 return scm_i_void_port (scm_i_mode_bits (mode));
3173 }
3174 #undef FUNC_NAME
3175
3176
3177 \f
3178
3179 /* Initialization. */
3180
3181 void
3182 scm_init_ports ()
3183 {
3184 /* lseek() symbols. */
3185 scm_c_define ("SEEK_SET", scm_from_int (SEEK_SET));
3186 scm_c_define ("SEEK_CUR", scm_from_int (SEEK_CUR));
3187 scm_c_define ("SEEK_END", scm_from_int (SEEK_END));
3188
3189 scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
3190 write_void_port);
3191
3192 cur_inport_fluid = scm_make_fluid ();
3193 cur_outport_fluid = scm_make_fluid ();
3194 cur_errport_fluid = scm_make_fluid ();
3195 cur_loadport_fluid = scm_make_fluid ();
3196
3197 scm_i_port_weak_set = scm_c_make_weak_set (31);
3198
3199 #include "libguile/ports.x"
3200
3201 /* Use Latin-1 as the default port encoding. */
3202 SCM_VARIABLE_SET (default_port_encoding_var,
3203 scm_make_fluid_with_default (SCM_BOOL_F));
3204 scm_port_encoding_init = 1;
3205
3206 SCM_VARIABLE_SET (default_conversion_strategy_var,
3207 scm_make_fluid_with_default (sym_substitute));
3208 scm_conversion_strategy_init = 1;
3209
3210 /* These bindings are used when boot-9 turns `current-input-port' et
3211 al into parameters. They are then removed from the guile module. */
3212 scm_c_define ("%current-input-port-fluid", cur_inport_fluid);
3213 scm_c_define ("%current-output-port-fluid", cur_outport_fluid);
3214 scm_c_define ("%current-error-port-fluid", cur_errport_fluid);
3215 }
3216
3217 /*
3218 Local Variables:
3219 c-file-style: "gnu"
3220 End:
3221 */