Merge commit '60617d819d77a1b92ed6c557a0b49b8e9a8e97b9'
[bpt/guile.git] / libguile / ports.c
1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2006,
2 * 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20
21 \f
22 /* Headers. */
23
24 #define _LARGEFILE64_SOURCE /* ask for stat64 etc */
25
26 #ifdef HAVE_CONFIG_H
27 # include <config.h>
28 #endif
29
30 #include <stdio.h>
31 #include <errno.h>
32 #include <fcntl.h> /* for chsize on mingw */
33 #include <assert.h>
34 #include <iconv.h>
35 #include <uniconv.h>
36 #include <unistr.h>
37 #include <striconveh.h>
38
39 #include <assert.h>
40
41 #include "libguile/_scm.h"
42 #include "libguile/async.h"
43 #include "libguile/deprecation.h"
44 #include "libguile/eval.h"
45 #include "libguile/fports.h" /* direct access for seek and truncate */
46 #include "libguile/goops.h"
47 #include "libguile/smob.h"
48 #include "libguile/chars.h"
49 #include "libguile/dynwind.h"
50
51 #include "libguile/keywords.h"
52 #include "libguile/hashtab.h"
53 #include "libguile/root.h"
54 #include "libguile/strings.h"
55 #include "libguile/mallocs.h"
56 #include "libguile/validate.h"
57 #include "libguile/ports.h"
58 #include "libguile/ports-internal.h"
59 #include "libguile/vectors.h"
60 #include "libguile/weak-set.h"
61 #include "libguile/fluids.h"
62 #include "libguile/eq.h"
63 #include "libguile/alist.h"
64
65 #ifdef HAVE_STRING_H
66 #include <string.h>
67 #endif
68
69 #ifdef HAVE_IO_H
70 #include <io.h>
71 #endif
72
73 #ifdef HAVE_UNISTD_H
74 #include <unistd.h>
75 #endif
76
77 #ifdef HAVE_SYS_IOCTL_H
78 #include <sys/ioctl.h>
79 #endif
80
81 /* Mingw (version 3.4.5, circa 2006) has ftruncate as an alias for chsize
82 already, but have this code here in case that wasn't so in past versions,
83 or perhaps to help other minimal DOS environments.
84
85 gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
86 might be possibilities if we've got other systems without ftruncate. */
87
88 #if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
89 #define ftruncate(fd, size) chsize (fd, size)
90 #undef HAVE_FTRUNCATE
91 #define HAVE_FTRUNCATE 1
92 #endif
93
94 \f
95 /* Port encodings are case-insensitive ASCII strings. */
96 static char
97 ascii_toupper (char c)
98 {
99 return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
100 }
101
102 /* It is only necessary to use this function on encodings that come from
103 the user and have not been canonicalized yet. Encodings that are set
104 on ports or in the default encoding fluid are in upper-case, and can
105 be compared with strcmp. */
106 static int
107 encoding_matches (const char *enc, const char *upper)
108 {
109 if (!enc)
110 enc = "ISO-8859-1";
111
112 while (*enc)
113 if (ascii_toupper (*enc++) != *upper++)
114 return 0;
115
116 return !*upper;
117 }
118
119 static char*
120 canonicalize_encoding (const char *enc)
121 {
122 char *ret;
123 int i;
124
125 if (!enc)
126 return "ISO-8859-1";
127
128 ret = scm_gc_strdup (enc, "port");
129
130 for (i = 0; ret[i]; i++)
131 {
132 if (ret[i] > 127)
133 /* Restrict to ASCII. */
134 scm_misc_error (NULL, "invalid character encoding ~s",
135 scm_list_1 (scm_from_latin1_string (enc)));
136 else
137 ret[i] = ascii_toupper (ret[i]);
138 }
139
140 return ret;
141 }
142
143
144 \f
145 /* The port kind table --- a dynamically resized array of port types. */
146
147
148 /* scm_ptobs scm_numptob
149 * implement a dynamically resized array of ptob records.
150 * Indexes into this table are used when generating type
151 * tags for smobjects (if you know a tag you can get an index and conversely).
152 */
153 static scm_t_ptob_descriptor **scm_ptobs = NULL;
154 static long scm_numptob = 0; /* Number of port types. */
155 static long scm_ptobs_size = 0; /* Number of slots in the port type
156 table. */
157 static scm_i_pthread_mutex_t scm_ptobs_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER;
158
159 long
160 scm_c_num_port_types (void)
161 {
162 long ret;
163
164 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
165 ret = scm_numptob;
166 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
167
168 return ret;
169 }
170
171 scm_t_ptob_descriptor*
172 scm_c_port_type_ref (long ptobnum)
173 {
174 scm_t_ptob_descriptor *ret = NULL;
175
176 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
177
178 if (0 <= ptobnum && ptobnum < scm_numptob)
179 ret = scm_ptobs[ptobnum];
180
181 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
182
183 if (!ret)
184 scm_out_of_range ("scm_c_port_type_ref", scm_from_long (ptobnum));
185
186 return ret;
187 }
188
189 long
190 scm_c_port_type_add_x (scm_t_ptob_descriptor *desc)
191 {
192 long ret = -1;
193
194 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
195
196 if (scm_numptob + 1 < SCM_I_MAX_PORT_TYPE_COUNT)
197 {
198 if (scm_numptob == scm_ptobs_size)
199 {
200 unsigned long old_size = scm_ptobs_size;
201 scm_t_ptob_descriptor **old_ptobs = scm_ptobs;
202
203 /* Currently there are only 9 predefined port types, so one
204 resize will cover it. */
205 scm_ptobs_size = old_size + 10;
206
207 if (scm_ptobs_size >= SCM_I_MAX_PORT_TYPE_COUNT)
208 scm_ptobs_size = SCM_I_MAX_PORT_TYPE_COUNT;
209
210 scm_ptobs = scm_gc_malloc (sizeof (*scm_ptobs) * scm_ptobs_size,
211 "scm_ptobs");
212
213 memcpy (scm_ptobs, old_ptobs, sizeof (*scm_ptobs) * scm_numptob);
214 }
215
216 ret = scm_numptob++;
217 scm_ptobs[ret] = desc;
218 }
219
220 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
221
222 if (ret < 0)
223 scm_out_of_range ("scm_c_port_type_add_x", scm_from_long (scm_numptob));
224
225 return ret;
226 }
227
228 /*
229 * We choose to use an interface similar to the smob interface with
230 * fill_input and write as standard fields, passed to the port
231 * type constructor, and optional fields set by setters.
232 */
233
234 static void
235 flush_port_default (SCM port SCM_UNUSED)
236 {
237 }
238
239 static void
240 end_input_default (SCM port SCM_UNUSED, int offset SCM_UNUSED)
241 {
242 }
243
244 scm_t_bits
245 scm_make_port_type (char *name,
246 int (*fill_input) (SCM port),
247 void (*write) (SCM port, const void *data, size_t size))
248 {
249 scm_t_ptob_descriptor *desc;
250 long ptobnum;
251
252 desc = scm_gc_malloc_pointerless (sizeof (*desc), "port-type");
253 memset (desc, 0, sizeof (*desc));
254
255 desc->name = name;
256 desc->print = scm_port_print;
257 desc->write = write;
258 desc->flush = flush_port_default;
259 desc->end_input = end_input_default;
260 desc->fill_input = fill_input;
261
262 ptobnum = scm_c_port_type_add_x (desc);
263
264 /* Make a class object if GOOPS is present. */
265 if (SCM_UNPACK (scm_port_class[0]) != 0)
266 scm_make_port_classes (ptobnum, name);
267
268 return scm_tc7_port + ptobnum * 256;
269 }
270
271 void
272 scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM))
273 {
274 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->mark = mark;
275 }
276
277 void
278 scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM))
279 {
280 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->free = free;
281 }
282
283 void
284 scm_set_port_print (scm_t_bits tc, int (*print) (SCM exp, SCM port,
285 scm_print_state *pstate))
286 {
287 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->print = print;
288 }
289
290 void
291 scm_set_port_equalp (scm_t_bits tc, SCM (*equalp) (SCM, SCM))
292 {
293 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->equalp = equalp;
294 }
295
296 void
297 scm_set_port_close (scm_t_bits tc, int (*close) (SCM))
298 {
299 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->close = close;
300 }
301
302 void
303 scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port))
304 {
305 scm_t_ptob_descriptor *ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tc));
306 ptob->flush = flush;
307 ptob->flags |= SCM_PORT_TYPE_HAS_FLUSH;
308 }
309
310 void
311 scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset))
312 {
313 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->end_input = end_input;
314 }
315
316 void
317 scm_set_port_seek (scm_t_bits tc, scm_t_off (*seek) (SCM, scm_t_off, int))
318 {
319 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->seek = seek;
320 }
321
322 void
323 scm_set_port_truncate (scm_t_bits tc, void (*truncate) (SCM, scm_t_off))
324 {
325 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->truncate = truncate;
326 }
327
328 void
329 scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM))
330 {
331 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting;
332 }
333
334 void
335 scm_set_port_setvbuf (scm_t_bits tc, void (*setvbuf) (SCM, long, long))
336 {
337 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->setvbuf = setvbuf;
338 }
339
340 static void
341 scm_i_set_pending_eof (SCM port)
342 {
343 SCM_PORT_GET_INTERNAL (port)->pending_eof = 1;
344 }
345
346 static void
347 scm_i_clear_pending_eof (SCM port)
348 {
349 SCM_PORT_GET_INTERNAL (port)->pending_eof = 0;
350 }
351
352 SCM_DEFINE (scm_i_port_property, "%port-property", 2, 0, 0,
353 (SCM port, SCM key),
354 "Return the property of @var{port} associated with @var{key}.")
355 #define FUNC_NAME s_scm_i_port_property
356 {
357 scm_i_pthread_mutex_t *lock;
358 SCM result;
359
360 SCM_VALIDATE_OPPORT (1, port);
361 scm_c_lock_port (port, &lock);
362 result = scm_assq_ref (SCM_PORT_GET_INTERNAL (port)->alist, key);
363 if (lock)
364 scm_i_pthread_mutex_unlock (lock);
365 return result;
366 }
367 #undef FUNC_NAME
368
369 SCM_DEFINE (scm_i_set_port_property_x, "%set-port-property!", 3, 0, 0,
370 (SCM port, SCM key, SCM value),
371 "Set the property of @var{port} associated with @var{key} to @var{value}.")
372 #define FUNC_NAME s_scm_i_set_port_property_x
373 {
374 scm_i_pthread_mutex_t *lock;
375 scm_t_port_internal *pti;
376
377 SCM_VALIDATE_OPPORT (1, port);
378 scm_c_lock_port (port, &lock);
379 pti = SCM_PORT_GET_INTERNAL (port);
380 pti->alist = scm_assq_set_x (pti->alist, key, value);
381 if (lock)
382 scm_i_pthread_mutex_unlock (lock);
383 return SCM_UNSPECIFIED;
384 }
385 #undef FUNC_NAME
386
387 \f
388
389 /* Standard ports --- current input, output, error, and more(!). */
390
391 static SCM cur_inport_fluid = SCM_BOOL_F;
392 static SCM cur_outport_fluid = SCM_BOOL_F;
393 static SCM cur_errport_fluid = SCM_BOOL_F;
394 static SCM cur_loadport_fluid = SCM_BOOL_F;
395
396 SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
397 (),
398 "Return the current input port. This is the default port used\n"
399 "by many input procedures. Initially, @code{current-input-port}\n"
400 "returns the @dfn{standard input} in Unix and C terminology.")
401 #define FUNC_NAME s_scm_current_input_port
402 {
403 if (scm_is_true (cur_inport_fluid))
404 return scm_fluid_ref (cur_inport_fluid);
405 else
406 return SCM_BOOL_F;
407 }
408 #undef FUNC_NAME
409
410 SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
411 (),
412 "Return the current output port. This is the default port used\n"
413 "by many output procedures. Initially,\n"
414 "@code{current-output-port} returns the @dfn{standard output} in\n"
415 "Unix and C terminology.")
416 #define FUNC_NAME s_scm_current_output_port
417 {
418 if (scm_is_true (cur_outport_fluid))
419 return scm_fluid_ref (cur_outport_fluid);
420 else
421 return SCM_BOOL_F;
422 }
423 #undef FUNC_NAME
424
425 SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
426 (),
427 "Return the port to which errors and warnings should be sent (the\n"
428 "@dfn{standard error} in Unix and C terminology).")
429 #define FUNC_NAME s_scm_current_error_port
430 {
431 if (scm_is_true (cur_errport_fluid))
432 return scm_fluid_ref (cur_errport_fluid);
433 else
434 return SCM_BOOL_F;
435 }
436 #undef FUNC_NAME
437
438 static SCM current_warning_port_var;
439 static scm_i_pthread_once_t current_warning_port_once = SCM_I_PTHREAD_ONCE_INIT;
440
441 static void
442 init_current_warning_port_var (void)
443 {
444 current_warning_port_var
445 = scm_c_private_variable ("guile", "current-warning-port");
446 }
447
448 SCM
449 scm_current_warning_port (void)
450 {
451 scm_i_pthread_once (&current_warning_port_once,
452 init_current_warning_port_var);
453 return scm_call_0 (scm_variable_ref (current_warning_port_var));
454 }
455
456 SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
457 (),
458 "Return the current-load-port.\n"
459 "The load port is used internally by @code{primitive-load}.")
460 #define FUNC_NAME s_scm_current_load_port
461 {
462 return scm_fluid_ref (cur_loadport_fluid);
463 }
464 #undef FUNC_NAME
465
466 SCM_DEFINE (scm_set_current_input_port, "set-current-input-port", 1, 0, 0,
467 (SCM port),
468 "@deffnx {Scheme Procedure} set-current-output-port port\n"
469 "@deffnx {Scheme Procedure} set-current-error-port port\n"
470 "Change the ports returned by @code{current-input-port},\n"
471 "@code{current-output-port} and @code{current-error-port}, respectively,\n"
472 "so that they use the supplied @var{port} for input or output.")
473 #define FUNC_NAME s_scm_set_current_input_port
474 {
475 SCM oinp = scm_fluid_ref (cur_inport_fluid);
476 SCM_VALIDATE_OPINPORT (1, port);
477 scm_fluid_set_x (cur_inport_fluid, port);
478 return oinp;
479 }
480 #undef FUNC_NAME
481
482
483 SCM_DEFINE (scm_set_current_output_port, "set-current-output-port", 1, 0, 0,
484 (SCM port),
485 "Set the current default output port to @var{port}.")
486 #define FUNC_NAME s_scm_set_current_output_port
487 {
488 SCM ooutp = scm_fluid_ref (cur_outport_fluid);
489 port = SCM_COERCE_OUTPORT (port);
490 SCM_VALIDATE_OPOUTPORT (1, port);
491 scm_fluid_set_x (cur_outport_fluid, port);
492 return ooutp;
493 }
494 #undef FUNC_NAME
495
496
497 SCM_DEFINE (scm_set_current_error_port, "set-current-error-port", 1, 0, 0,
498 (SCM port),
499 "Set the current default error port to @var{port}.")
500 #define FUNC_NAME s_scm_set_current_error_port
501 {
502 SCM oerrp = scm_fluid_ref (cur_errport_fluid);
503 port = SCM_COERCE_OUTPORT (port);
504 SCM_VALIDATE_OPOUTPORT (1, port);
505 scm_fluid_set_x (cur_errport_fluid, port);
506 return oerrp;
507 }
508 #undef FUNC_NAME
509
510
511 SCM
512 scm_set_current_warning_port (SCM port)
513 {
514 scm_i_pthread_once (&current_warning_port_once,
515 init_current_warning_port_var);
516 return scm_call_1 (scm_variable_ref (current_warning_port_var), port);
517 }
518
519
520 void
521 scm_dynwind_current_input_port (SCM port)
522 #define FUNC_NAME NULL
523 {
524 SCM_VALIDATE_OPINPORT (1, port);
525 scm_dynwind_fluid (cur_inport_fluid, port);
526 }
527 #undef FUNC_NAME
528
529 void
530 scm_dynwind_current_output_port (SCM port)
531 #define FUNC_NAME NULL
532 {
533 port = SCM_COERCE_OUTPORT (port);
534 SCM_VALIDATE_OPOUTPORT (1, port);
535 scm_dynwind_fluid (cur_outport_fluid, port);
536 }
537 #undef FUNC_NAME
538
539 void
540 scm_dynwind_current_error_port (SCM port)
541 #define FUNC_NAME NULL
542 {
543 port = SCM_COERCE_OUTPORT (port);
544 SCM_VALIDATE_OPOUTPORT (1, port);
545 scm_dynwind_fluid (cur_errport_fluid, port);
546 }
547 #undef FUNC_NAME
548
549 void
550 scm_i_dynwind_current_load_port (SCM port)
551 {
552 scm_dynwind_fluid (cur_loadport_fluid, port);
553 }
554
555
556 \f
557
558 /* Retrieving a port's mode. */
559
560 /* Return the flags that characterize a port based on the mode
561 * string used to open a file for that port.
562 *
563 * See PORT FLAGS in scm.h
564 */
565
566 static long
567 scm_i_mode_bits_n (SCM modes)
568 {
569 return (SCM_OPN
570 | (scm_i_string_contains_char (modes, 'r')
571 || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
572 | (scm_i_string_contains_char (modes, 'w')
573 || scm_i_string_contains_char (modes, 'a')
574 || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
575 | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
576 | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
577 }
578
579 long
580 scm_mode_bits (char *modes)
581 {
582 /* Valid characters are rw+a0l. So, use latin1. */
583 return scm_i_mode_bits (scm_from_latin1_string (modes));
584 }
585
586 long
587 scm_i_mode_bits (SCM modes)
588 {
589 long bits;
590
591 if (!scm_is_string (modes))
592 scm_wrong_type_arg_msg (NULL, 0, modes, "string");
593
594 bits = scm_i_mode_bits_n (modes);
595 scm_remember_upto_here_1 (modes);
596 return bits;
597 }
598
599 /* Return the mode flags from an open port.
600 * Some modes such as "append" are only used when opening
601 * a file and are not returned here. */
602
603 SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
604 (SCM port),
605 "Return the port modes associated with the open port @var{port}.\n"
606 "These will not necessarily be identical to the modes used when\n"
607 "the port was opened, since modes such as \"append\" which are\n"
608 "used only during port creation are not retained.")
609 #define FUNC_NAME s_scm_port_mode
610 {
611 char modes[4];
612 modes[0] = '\0';
613
614 port = SCM_COERCE_OUTPORT (port);
615 SCM_VALIDATE_OPPORT (1, port);
616 if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
617 if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
618 strcpy (modes, "r+");
619 else
620 strcpy (modes, "r");
621 }
622 else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
623 strcpy (modes, "w");
624 if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
625 strcat (modes, "0");
626
627 return scm_from_latin1_string (modes);
628 }
629 #undef FUNC_NAME
630
631 \f
632
633 /* The port table --- a weak set of all ports.
634
635 We need a global registry of ports to flush them all at exit, and to
636 get all the ports matching a file descriptor. */
637 SCM scm_i_port_weak_set;
638
639
640 \f
641
642 /* Port finalization. */
643
644 struct do_free_data
645 {
646 scm_t_ptob_descriptor *ptob;
647 SCM port;
648 };
649
650 static SCM
651 do_free (void *body_data)
652 {
653 struct do_free_data *data = body_data;
654
655 /* `close' is for explicit `close-port' by user. `free' is for this
656 purpose: ports collected by the GC. */
657 data->ptob->free (data->port);
658
659 return SCM_BOOL_T;
660 }
661
662 /* Finalize the object (a port) pointed to by PTR. */
663 static void
664 finalize_port (void *ptr, void *data)
665 {
666 SCM port = SCM_PACK_POINTER (ptr);
667
668 if (!SCM_PORTP (port))
669 abort ();
670
671 if (SCM_OPENP (port))
672 {
673 struct do_free_data data;
674
675 SCM_CLR_PORT_OPEN_FLAG (port);
676
677 data.ptob = SCM_PORT_DESCRIPTOR (port);
678 data.port = port;
679
680 scm_internal_catch (SCM_BOOL_T, do_free, &data,
681 scm_handle_by_message_noexit, NULL);
682
683 scm_gc_ports_collected++;
684 }
685 }
686
687
688 \f
689
690 SCM
691 scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
692 const char *encoding,
693 scm_t_string_failed_conversion_handler handler,
694 scm_t_bits stream)
695 {
696 SCM ret;
697 scm_t_port *entry;
698 scm_t_port_internal *pti;
699 scm_t_ptob_descriptor *ptob;
700
701 entry = scm_gc_typed_calloc (scm_t_port);
702 pti = scm_gc_typed_calloc (scm_t_port_internal);
703 ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag));
704
705 ret = scm_words (tag | mode_bits, 3);
706 SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry);
707 SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob);
708
709 entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock");
710 scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive);
711
712 entry->internal = pti;
713 entry->file_name = SCM_BOOL_F;
714 entry->rw_active = SCM_PORT_NEITHER;
715 entry->port = ret;
716 entry->stream = stream;
717
718 if (encoding_matches (encoding, "UTF-8"))
719 {
720 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
721 entry->encoding = "UTF-8";
722 }
723 else if (encoding_matches (encoding, "ISO-8859-1"))
724 {
725 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
726 entry->encoding = "ISO-8859-1";
727 }
728 else
729 {
730 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
731 entry->encoding = canonicalize_encoding (encoding);
732 }
733
734 entry->ilseq_handler = handler;
735 pti->iconv_descriptors = NULL;
736
737 pti->at_stream_start_for_bom_read = 1;
738 pti->at_stream_start_for_bom_write = 1;
739
740 pti->pending_eof = 0;
741 pti->alist = SCM_EOL;
742
743 if (SCM_PORT_DESCRIPTOR (ret)->free)
744 scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL);
745
746 if (SCM_PORT_DESCRIPTOR (ret)->flags & SCM_PORT_TYPE_HAS_FLUSH)
747 scm_weak_set_add_x (scm_i_port_weak_set, ret);
748
749 return ret;
750 }
751
752 SCM
753 scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
754 {
755 return scm_c_make_port_with_encoding (tag, mode_bits,
756 scm_i_default_port_encoding (),
757 scm_i_default_port_conversion_handler (),
758 stream);
759 }
760
761 SCM
762 scm_new_port_table_entry (scm_t_bits tag)
763 {
764 return scm_c_make_port (tag, 0, 0);
765 }
766
767 \f
768
769 /* Predicates. */
770
771 SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
772 (SCM x),
773 "Return a boolean indicating whether @var{x} is a port.\n"
774 "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
775 "@var{x}))}.")
776 #define FUNC_NAME s_scm_port_p
777 {
778 return scm_from_bool (SCM_PORTP (x));
779 }
780 #undef FUNC_NAME
781
782 SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
783 (SCM x),
784 "Return @code{#t} if @var{x} is an input port, otherwise return\n"
785 "@code{#f}. Any object satisfying this predicate also satisfies\n"
786 "@code{port?}.")
787 #define FUNC_NAME s_scm_input_port_p
788 {
789 return scm_from_bool (SCM_INPUT_PORT_P (x));
790 }
791 #undef FUNC_NAME
792
793 SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
794 (SCM x),
795 "Return @code{#t} if @var{x} is an output port, otherwise return\n"
796 "@code{#f}. Any object satisfying this predicate also satisfies\n"
797 "@code{port?}.")
798 #define FUNC_NAME s_scm_output_port_p
799 {
800 x = SCM_COERCE_OUTPORT (x);
801 return scm_from_bool (SCM_OUTPUT_PORT_P (x));
802 }
803 #undef FUNC_NAME
804
805 SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
806 (SCM port),
807 "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
808 "open.")
809 #define FUNC_NAME s_scm_port_closed_p
810 {
811 SCM_VALIDATE_PORT (1, port);
812 return scm_from_bool (!SCM_OPPORTP (port));
813 }
814 #undef FUNC_NAME
815
816 SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
817 (SCM x),
818 "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
819 "return @code{#f}.")
820 #define FUNC_NAME s_scm_eof_object_p
821 {
822 return scm_from_bool (SCM_EOF_OBJECT_P (x));
823 }
824 #undef FUNC_NAME
825
826
827 \f
828
829 /* Closing ports. */
830
831 static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
832
833 /* scm_close_port
834 * Call the close operation on a port object.
835 * see also scm_close.
836 */
837 SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
838 (SCM port),
839 "Close the specified port object. Return @code{#t} if it\n"
840 "successfully closes a port or @code{#f} if it was already\n"
841 "closed. An exception may be raised if an error occurs, for\n"
842 "example when flushing buffered output. See also @ref{Ports and\n"
843 "File Descriptors, close}, for a procedure which can close file\n"
844 "descriptors.")
845 #define FUNC_NAME s_scm_close_port
846 {
847 scm_t_port_internal *pti;
848 int rv;
849
850 port = SCM_COERCE_OUTPORT (port);
851
852 SCM_VALIDATE_PORT (1, port);
853 if (SCM_CLOSEDP (port))
854 return SCM_BOOL_F;
855
856 pti = SCM_PORT_GET_INTERNAL (port);
857 SCM_CLR_PORT_OPEN_FLAG (port);
858
859 if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH)
860 scm_weak_set_remove_x (scm_i_port_weak_set, port);
861
862 if (SCM_PORT_DESCRIPTOR (port)->close)
863 /* Note! This may throw an exception. Anything after this point
864 should be resilient to non-local exits. */
865 rv = SCM_PORT_DESCRIPTOR (port)->close (port);
866 else
867 rv = 0;
868
869 if (pti->iconv_descriptors)
870 {
871 /* If we don't get here, the iconv_descriptors finalizer will
872 clean up. */
873 close_iconv_descriptors (pti->iconv_descriptors);
874 pti->iconv_descriptors = NULL;
875 }
876
877 return scm_from_bool (rv >= 0);
878 }
879 #undef FUNC_NAME
880
881 SCM_DEFINE (scm_close_input_port, "close-input-port", 1, 0, 0,
882 (SCM port),
883 "Close the specified input port object. The routine has no effect if\n"
884 "the file has already been closed. An exception may be raised if an\n"
885 "error occurs. The value returned is unspecified.\n\n"
886 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
887 "which can close file descriptors.")
888 #define FUNC_NAME s_scm_close_input_port
889 {
890 SCM_VALIDATE_INPUT_PORT (1, port);
891 scm_close_port (port);
892 return SCM_UNSPECIFIED;
893 }
894 #undef FUNC_NAME
895
896 SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0,
897 (SCM port),
898 "Close the specified output port object. The routine has no effect if\n"
899 "the file has already been closed. An exception may be raised if an\n"
900 "error occurs. The value returned is unspecified.\n\n"
901 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
902 "which can close file descriptors.")
903 #define FUNC_NAME s_scm_close_output_port
904 {
905 port = SCM_COERCE_OUTPORT (port);
906 SCM_VALIDATE_OUTPUT_PORT (1, port);
907 scm_close_port (port);
908 return SCM_UNSPECIFIED;
909 }
910 #undef FUNC_NAME
911
912
913 \f
914
915 /* Encoding characters to byte streams, and decoding byte streams to
916 characters. */
917
918 /* A fluid specifying the default encoding for newly created ports. If it is
919 a string, that is the encoding. If it is #f, it is in the "native"
920 (Latin-1) encoding. */
921 SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
922
923 static int scm_port_encoding_init = 0;
924
925 /* Use ENCODING as the default encoding for future ports. */
926 void
927 scm_i_set_default_port_encoding (const char *encoding)
928 {
929 if (!scm_port_encoding_init
930 || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
931 scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
932 SCM_EOL);
933
934 if (encoding_matches (encoding, "ISO-8859-1"))
935 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
936 else
937 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
938 scm_from_latin1_string (canonicalize_encoding (encoding)));
939 }
940
941 /* Return the name of the default encoding for newly created ports. */
942 const char *
943 scm_i_default_port_encoding (void)
944 {
945 if (!scm_port_encoding_init)
946 return "ISO-8859-1";
947 else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
948 return "ISO-8859-1";
949 else
950 {
951 SCM encoding;
952
953 encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
954 if (!scm_is_string (encoding))
955 return "ISO-8859-1";
956 else
957 return scm_i_string_chars (encoding);
958 }
959 }
960
961 /* A fluid specifying the default conversion handler for newly created
962 ports. Its value should be one of the symbols below. */
963 SCM_VARIABLE (default_conversion_strategy_var,
964 "%default-port-conversion-strategy");
965
966 /* Whether the above fluid is initialized. */
967 static int scm_conversion_strategy_init = 0;
968
969 /* The possible conversion strategies. */
970 SCM_SYMBOL (sym_error, "error");
971 SCM_SYMBOL (sym_substitute, "substitute");
972 SCM_SYMBOL (sym_escape, "escape");
973
974 /* Return the default failed encoding conversion policy for new created
975 ports. */
976 scm_t_string_failed_conversion_handler
977 scm_i_default_port_conversion_handler (void)
978 {
979 scm_t_string_failed_conversion_handler handler;
980
981 if (!scm_conversion_strategy_init
982 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
983 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
984 else
985 {
986 SCM fluid, value;
987
988 fluid = SCM_VARIABLE_REF (default_conversion_strategy_var);
989 value = scm_fluid_ref (fluid);
990
991 if (scm_is_eq (sym_substitute, value))
992 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
993 else if (scm_is_eq (sym_escape, value))
994 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
995 else
996 /* Default to 'error also when the fluid's value is not one of
997 the valid symbols. */
998 handler = SCM_FAILED_CONVERSION_ERROR;
999 }
1000
1001 return handler;
1002 }
1003
1004 /* Use HANDLER as the default conversion strategy for future ports. */
1005 void
1006 scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler
1007 handler)
1008 {
1009 SCM strategy;
1010
1011 if (!scm_conversion_strategy_init
1012 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
1013 scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
1014 SCM_EOL);
1015
1016 switch (handler)
1017 {
1018 case SCM_FAILED_CONVERSION_ERROR:
1019 strategy = sym_error;
1020 break;
1021
1022 case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
1023 strategy = sym_escape;
1024 break;
1025
1026 case SCM_FAILED_CONVERSION_QUESTION_MARK:
1027 strategy = sym_substitute;
1028 break;
1029
1030 default:
1031 abort ();
1032 }
1033
1034 scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var),
1035 strategy);
1036 }
1037
1038 static void
1039 scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port);
1040
1041 /* If the next LEN bytes from PORT are equal to those in BYTES, then
1042 return 1, else return 0. Leave the port position unchanged. */
1043 static int
1044 looking_at_bytes (SCM port, const unsigned char *bytes, int len)
1045 {
1046 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1047 int i = 0;
1048
1049 while (i < len && scm_peek_byte_or_eof_unlocked (port) == bytes[i])
1050 {
1051 pt->read_pos++;
1052 i++;
1053 }
1054 scm_i_unget_bytes_unlocked (bytes, i, port);
1055 return (i == len);
1056 }
1057
1058 static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF};
1059 static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
1060 static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
1061 static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
1062 static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
1063
1064 /* Decide what byte order to use for a UTF-16 port. Return "UTF-16BE"
1065 or "UTF-16LE". MODE must be either SCM_PORT_READ or SCM_PORT_WRITE,
1066 and specifies which operation is about to be done. The MODE
1067 determines how we will decide the byte order. We deliberately avoid
1068 reading from the port unless the user is about to do so. If the user
1069 is about to read, then we look for a BOM, and if present, we use it
1070 to determine the byte order. Otherwise we choose big endian, as
1071 recommended by the Unicode Standard. Note that the BOM (if any) is
1072 not consumed here. */
1073 static const char *
1074 decide_utf16_encoding (SCM port, scm_t_port_rw_active mode)
1075 {
1076 if (mode == SCM_PORT_READ
1077 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1078 && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom))
1079 return "UTF-16LE";
1080 else
1081 return "UTF-16BE";
1082 }
1083
1084 /* Decide what byte order to use for a UTF-32 port. Return "UTF-32BE"
1085 or "UTF-32LE". See the comment above 'decide_utf16_encoding' for
1086 details. */
1087 static const char *
1088 decide_utf32_encoding (SCM port, scm_t_port_rw_active mode)
1089 {
1090 if (mode == SCM_PORT_READ
1091 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1092 && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom))
1093 return "UTF-32LE";
1094 else
1095 return "UTF-32BE";
1096 }
1097
1098 static void
1099 finalize_iconv_descriptors (void *ptr, void *data)
1100 {
1101 close_iconv_descriptors (ptr);
1102 }
1103
1104 static scm_t_iconv_descriptors *
1105 open_iconv_descriptors (const char *encoding, int reading, int writing)
1106 {
1107 scm_t_iconv_descriptors *id;
1108 iconv_t input_cd, output_cd;
1109 size_t i;
1110
1111 input_cd = (iconv_t) -1;
1112 output_cd = (iconv_t) -1;
1113
1114 for (i = 0; encoding[i]; i++)
1115 if (encoding[i] > 127)
1116 goto invalid_encoding;
1117
1118 if (reading)
1119 {
1120 /* Open an input iconv conversion descriptor, from ENCODING
1121 to UTF-8. We choose UTF-8, not UTF-32, because iconv
1122 implementations can typically convert from anything to
1123 UTF-8, but not to UTF-32 (see
1124 <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
1125
1126 /* Assume opening an iconv descriptor causes about 16 KB of
1127 allocation. */
1128 scm_gc_register_allocation (16 * 1024);
1129
1130 input_cd = iconv_open ("UTF-8", encoding);
1131 if (input_cd == (iconv_t) -1)
1132 goto invalid_encoding;
1133 }
1134
1135 if (writing)
1136 {
1137 /* Assume opening an iconv descriptor causes about 16 KB of
1138 allocation. */
1139 scm_gc_register_allocation (16 * 1024);
1140
1141 output_cd = iconv_open (encoding, "UTF-8");
1142 if (output_cd == (iconv_t) -1)
1143 {
1144 if (input_cd != (iconv_t) -1)
1145 iconv_close (input_cd);
1146 goto invalid_encoding;
1147 }
1148 }
1149
1150 id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
1151 id->input_cd = input_cd;
1152 id->output_cd = output_cd;
1153
1154 /* Register a finalizer to close the descriptors. */
1155 scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
1156
1157 return id;
1158
1159 invalid_encoding:
1160 {
1161 SCM err;
1162 err = scm_from_latin1_string (encoding);
1163 scm_misc_error ("open_iconv_descriptors",
1164 "invalid or unknown character encoding ~s",
1165 scm_list_1 (err));
1166 }
1167 }
1168
1169 static void
1170 close_iconv_descriptors (scm_t_iconv_descriptors *id)
1171 {
1172 if (id->input_cd != (iconv_t) -1)
1173 iconv_close (id->input_cd);
1174 if (id->output_cd != (iconv_t) -1)
1175 iconv_close (id->output_cd);
1176 id->input_cd = (void *) -1;
1177 id->output_cd = (void *) -1;
1178 }
1179
1180 scm_t_iconv_descriptors *
1181 scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode)
1182 {
1183 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
1184
1185 assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
1186
1187 if (!pti->iconv_descriptors)
1188 {
1189 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1190 const char *precise_encoding;
1191
1192 if (!pt->encoding)
1193 pt->encoding = "ISO-8859-1";
1194
1195 /* If the specified encoding is UTF-16 or UTF-32, then make
1196 that more precise by deciding what byte order to use. */
1197 if (strcmp (pt->encoding, "UTF-16") == 0)
1198 precise_encoding = decide_utf16_encoding (port, mode);
1199 else if (strcmp (pt->encoding, "UTF-32") == 0)
1200 precise_encoding = decide_utf32_encoding (port, mode);
1201 else
1202 precise_encoding = pt->encoding;
1203
1204 pti->iconv_descriptors =
1205 open_iconv_descriptors (precise_encoding,
1206 SCM_INPUT_PORT_P (port),
1207 SCM_OUTPUT_PORT_P (port));
1208 }
1209
1210 return pti->iconv_descriptors;
1211 }
1212
1213 /* The name of the encoding is itself encoded in ASCII. */
1214 void
1215 scm_i_set_port_encoding_x (SCM port, const char *encoding)
1216 {
1217 scm_t_port *pt;
1218 scm_t_port_internal *pti;
1219 scm_t_iconv_descriptors *prev;
1220
1221 /* Set the character encoding for this port. */
1222 pt = SCM_PTAB_ENTRY (port);
1223 pti = SCM_PORT_GET_INTERNAL (port);
1224 prev = pti->iconv_descriptors;
1225
1226 /* In order to handle cases where the encoding changes mid-stream
1227 (e.g. within an HTTP stream, or within a file that is composed of
1228 segments with different encodings), we consider this to be "stream
1229 start" for purposes of BOM handling, regardless of our actual file
1230 position. */
1231 pti->at_stream_start_for_bom_read = 1;
1232 pti->at_stream_start_for_bom_write = 1;
1233
1234 if (encoding_matches (encoding, "UTF-8"))
1235 {
1236 pt->encoding = "UTF-8";
1237 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
1238 }
1239 else if (encoding_matches (encoding, "ISO-8859-1"))
1240 {
1241 pt->encoding = "ISO-8859-1";
1242 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
1243 }
1244 else
1245 {
1246 pt->encoding = canonicalize_encoding (encoding);
1247 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
1248 }
1249
1250 pti->iconv_descriptors = NULL;
1251 if (prev)
1252 close_iconv_descriptors (prev);
1253 }
1254
1255 SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
1256 (SCM port),
1257 "Returns, as a string, the character encoding that @var{port}\n"
1258 "uses to interpret its input and output.\n")
1259 #define FUNC_NAME s_scm_port_encoding
1260 {
1261 SCM_VALIDATE_PORT (1, port);
1262
1263 return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding);
1264 }
1265 #undef FUNC_NAME
1266
1267 SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
1268 (SCM port, SCM enc),
1269 "Sets the character encoding that will be used to interpret all\n"
1270 "port I/O. New ports are created with the encoding\n"
1271 "appropriate for the current locale if @code{setlocale} has \n"
1272 "been called or ISO-8859-1 otherwise\n"
1273 "and this procedure can be used to modify that encoding.\n")
1274 #define FUNC_NAME s_scm_set_port_encoding_x
1275 {
1276 char *enc_str;
1277
1278 SCM_VALIDATE_PORT (1, port);
1279 SCM_VALIDATE_STRING (2, enc);
1280
1281 enc_str = scm_to_latin1_string (enc);
1282 scm_i_set_port_encoding_x (port, enc_str);
1283 free (enc_str);
1284
1285 return SCM_UNSPECIFIED;
1286 }
1287 #undef FUNC_NAME
1288
1289 SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
1290 1, 0, 0, (SCM port),
1291 "Returns the behavior of the port when handling a character that\n"
1292 "is not representable in the port's current encoding.\n"
1293 "It returns the symbol @code{error} if unrepresentable characters\n"
1294 "should cause exceptions, @code{substitute} if the port should\n"
1295 "try to replace unrepresentable characters with question marks or\n"
1296 "approximate characters, or @code{escape} if unrepresentable\n"
1297 "characters should be converted to string escapes.\n"
1298 "\n"
1299 "If @var{port} is @code{#f}, then the current default behavior\n"
1300 "will be returned. New ports will have this default behavior\n"
1301 "when they are created.\n")
1302 #define FUNC_NAME s_scm_port_conversion_strategy
1303 {
1304 scm_t_string_failed_conversion_handler h;
1305
1306 if (scm_is_false (port))
1307 h = scm_i_default_port_conversion_handler ();
1308 else
1309 {
1310 scm_t_port *pt;
1311
1312 SCM_VALIDATE_OPPORT (1, port);
1313 pt = SCM_PTAB_ENTRY (port);
1314
1315 h = pt->ilseq_handler;
1316 }
1317
1318 if (h == SCM_FAILED_CONVERSION_ERROR)
1319 return scm_from_latin1_symbol ("error");
1320 else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
1321 return scm_from_latin1_symbol ("substitute");
1322 else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1323 return scm_from_latin1_symbol ("escape");
1324 else
1325 abort ();
1326
1327 /* Never gets here. */
1328 return SCM_UNDEFINED;
1329 }
1330 #undef FUNC_NAME
1331
1332 SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
1333 2, 0, 0,
1334 (SCM port, SCM sym),
1335 "Sets the behavior of the interpreter when outputting a character\n"
1336 "that is not representable in the port's current encoding.\n"
1337 "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
1338 "@code{'escape}. If it is @code{'error}, an error will be thrown\n"
1339 "when an unconvertible character is encountered. If it is\n"
1340 "@code{'substitute}, then unconvertible characters will \n"
1341 "be replaced with approximate characters, or with question marks\n"
1342 "if no approximately correct character is available.\n"
1343 "If it is @code{'escape},\n"
1344 "it will appear as a hex escape when output.\n"
1345 "\n"
1346 "If @var{port} is an open port, the conversion error behavior\n"
1347 "is set for that port. If it is @code{#f}, it is set as the\n"
1348 "default behavior for any future ports that get created in\n"
1349 "this thread.\n")
1350 #define FUNC_NAME s_scm_set_port_conversion_strategy_x
1351 {
1352 scm_t_string_failed_conversion_handler handler;
1353
1354 if (scm_is_eq (sym, sym_error))
1355 handler = SCM_FAILED_CONVERSION_ERROR;
1356 else if (scm_is_eq (sym, sym_substitute))
1357 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
1358 else if (scm_is_eq (sym, sym_escape))
1359 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
1360 else
1361 SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
1362
1363 if (scm_is_false (port))
1364 scm_i_set_default_port_conversion_handler (handler);
1365 else
1366 {
1367 SCM_VALIDATE_OPPORT (1, port);
1368 SCM_PTAB_ENTRY (port)->ilseq_handler = handler;
1369 }
1370
1371 return SCM_UNSPECIFIED;
1372 }
1373 #undef FUNC_NAME
1374
1375
1376 \f
1377
1378 /* The port lock. */
1379
1380 static void
1381 lock_port (void *mutex)
1382 {
1383 scm_i_pthread_mutex_lock ((scm_i_pthread_mutex_t *) mutex);
1384 }
1385
1386 static void
1387 unlock_port (void *mutex)
1388 {
1389 scm_i_pthread_mutex_unlock ((scm_i_pthread_mutex_t *) mutex);
1390 }
1391
1392 void
1393 scm_dynwind_lock_port (SCM port)
1394 #define FUNC_NAME "dynwind-lock-port"
1395 {
1396 scm_i_pthread_mutex_t *lock;
1397 SCM_VALIDATE_OPPORT (SCM_ARG1, port);
1398 scm_c_lock_port (port, &lock);
1399 if (lock)
1400 {
1401 scm_dynwind_unwind_handler (unlock_port, lock, SCM_F_WIND_EXPLICITLY);
1402 scm_dynwind_rewind_handler (lock_port, lock, 0);
1403 }
1404 }
1405 #undef FUNC_NAME
1406
1407
1408 \f
1409
1410 /* Input. */
1411
1412 int
1413 scm_get_byte_or_eof (SCM port)
1414 {
1415 scm_i_pthread_mutex_t *lock;
1416 int ret;
1417
1418 scm_c_lock_port (port, &lock);
1419 ret = scm_get_byte_or_eof_unlocked (port);
1420 if (lock)
1421 scm_i_pthread_mutex_unlock (lock);
1422
1423 return ret;
1424 }
1425
1426 int
1427 scm_peek_byte_or_eof (SCM port)
1428 {
1429 scm_i_pthread_mutex_t *lock;
1430 int ret;
1431
1432 scm_c_lock_port (port, &lock);
1433 ret = scm_peek_byte_or_eof_unlocked (port);
1434 if (lock)
1435 scm_i_pthread_mutex_unlock (lock);
1436
1437 return ret;
1438 }
1439
1440 /* scm_c_read
1441 *
1442 * Used by an application to read arbitrary number of bytes from an
1443 * SCM port. Same semantics as libc read, except that scm_c_read only
1444 * returns less than SIZE bytes if at end-of-file.
1445 *
1446 * Warning: Doesn't update port line and column counts! */
1447
1448 /* This structure, and the following swap_buffer function, are used
1449 for temporarily swapping a port's own read buffer, and the buffer
1450 that the caller of scm_c_read provides. */
1451 struct port_and_swap_buffer
1452 {
1453 scm_t_port *pt;
1454 unsigned char *buffer;
1455 size_t size;
1456 };
1457
1458 static void
1459 swap_buffer (void *data)
1460 {
1461 struct port_and_swap_buffer *psb = (struct port_and_swap_buffer *) data;
1462 unsigned char *old_buf = psb->pt->read_buf;
1463 size_t old_size = psb->pt->read_buf_size;
1464
1465 /* Make the port use (buffer, size) from the struct. */
1466 psb->pt->read_pos = psb->pt->read_buf = psb->pt->read_end = psb->buffer;
1467 psb->pt->read_buf_size = psb->size;
1468
1469 /* Save the port's old (buffer, size) in the struct. */
1470 psb->buffer = old_buf;
1471 psb->size = old_size;
1472 }
1473
1474 static int scm_i_fill_input_unlocked (SCM port);
1475
1476 size_t
1477 scm_c_read_unlocked (SCM port, void *buffer, size_t size)
1478 #define FUNC_NAME "scm_c_read"
1479 {
1480 scm_t_port *pt;
1481 scm_t_port_internal *pti;
1482 size_t n_read = 0, n_available;
1483 struct port_and_swap_buffer psb;
1484
1485 SCM_VALIDATE_OPINPORT (1, port);
1486
1487 pt = SCM_PTAB_ENTRY (port);
1488 pti = SCM_PORT_GET_INTERNAL (port);
1489 if (pt->rw_active == SCM_PORT_WRITE)
1490 SCM_PORT_DESCRIPTOR (port)->flush (port);
1491
1492 if (pt->rw_random)
1493 pt->rw_active = SCM_PORT_READ;
1494
1495 /* Take bytes first from the port's read buffer. */
1496 if (pt->read_pos < pt->read_end)
1497 {
1498 n_available = min (size, pt->read_end - pt->read_pos);
1499 memcpy (buffer, pt->read_pos, n_available);
1500 buffer = (char *) buffer + n_available;
1501 pt->read_pos += n_available;
1502 n_read += n_available;
1503 size -= n_available;
1504 }
1505
1506 /* Avoid the scm_dynwind_* costs if we now have enough data. */
1507 if (size == 0)
1508 return n_read;
1509
1510 /* Now we will call scm_i_fill_input_unlocked repeatedly until we have
1511 read the requested number of bytes. (Note that a single
1512 scm_i_fill_input_unlocked call does not guarantee to fill the whole
1513 of the port's read buffer.) */
1514 if (pt->read_buf_size <= 1
1515 && pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
1516 {
1517 /* The port that we are reading from is unbuffered - i.e. does not
1518 have its own persistent buffer - but we have a buffer, provided
1519 by our caller, that is the right size for the data that is
1520 wanted. For the following scm_i_fill_input_unlocked calls,
1521 therefore, we use the buffer in hand as the port's read buffer.
1522
1523 We need to make sure that the port's normal (1 byte) buffer is
1524 reinstated in case one of the scm_i_fill_input_unlocked ()
1525 calls throws an exception; we use the scm_dynwind_* API to
1526 achieve that.
1527
1528 A consequence of this optimization is that the fill_input
1529 functions can't unget characters. That'll push data to the
1530 pushback buffer instead of this psb buffer. */
1531 #if SCM_DEBUG == 1
1532 unsigned char *pback = pt->putback_buf;
1533 #endif
1534 psb.pt = pt;
1535 psb.buffer = buffer;
1536 psb.size = size;
1537 scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
1538 scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1539 scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1540
1541 /* Call scm_i_fill_input_unlocked until we have all the bytes that
1542 we need, or we hit EOF. */
1543 while (pt->read_buf_size && (scm_i_fill_input_unlocked (port) != EOF))
1544 {
1545 pt->read_buf_size -= (pt->read_end - pt->read_pos);
1546 pt->read_pos = pt->read_buf = pt->read_end;
1547 }
1548 #if SCM_DEBUG == 1
1549 if (pback != pt->putback_buf
1550 || pt->read_buf - (unsigned char *) buffer < 0)
1551 scm_misc_error (FUNC_NAME,
1552 "scm_c_read must not call a fill function that pushes "
1553 "back characters onto an unbuffered port", SCM_EOL);
1554 #endif
1555 n_read += pt->read_buf - (unsigned char *) buffer;
1556
1557 /* Reinstate the port's normal buffer. */
1558 scm_dynwind_end ();
1559 }
1560 else
1561 {
1562 /* The port has its own buffer. It is important that we use it,
1563 even if it happens to be smaller than our caller's buffer, so
1564 that a custom port implementation's entry points (in
1565 particular, fill_input) can rely on the buffer always being
1566 the same as they first set up. */
1567 while (size && (scm_i_fill_input_unlocked (port) != EOF))
1568 {
1569 n_available = min (size, pt->read_end - pt->read_pos);
1570 memcpy (buffer, pt->read_pos, n_available);
1571 buffer = (char *) buffer + n_available;
1572 pt->read_pos += n_available;
1573 n_read += n_available;
1574 size -= n_available;
1575 }
1576 }
1577
1578 return n_read;
1579 }
1580 #undef FUNC_NAME
1581
1582 size_t
1583 scm_c_read (SCM port, void *buffer, size_t size)
1584 {
1585 scm_i_pthread_mutex_t *lock;
1586 size_t ret;
1587
1588 scm_c_lock_port (port, &lock);
1589 ret = scm_c_read_unlocked (port, buffer, size);
1590 if (lock)
1591 scm_i_pthread_mutex_unlock (lock);
1592
1593
1594 return ret;
1595 }
1596
1597 /* Update the line and column number of PORT after consumption of C. */
1598 static inline void
1599 update_port_lf (scm_t_wchar c, SCM port)
1600 {
1601 switch (c)
1602 {
1603 case '\a':
1604 case EOF:
1605 break;
1606 case '\b':
1607 SCM_DECCOL (port);
1608 break;
1609 case '\n':
1610 SCM_INCLINE (port);
1611 break;
1612 case '\r':
1613 SCM_ZEROCOL (port);
1614 break;
1615 case '\t':
1616 SCM_TABCOL (port);
1617 break;
1618 default:
1619 SCM_INCCOL (port);
1620 break;
1621 }
1622 }
1623
1624 #define SCM_MBCHAR_BUF_SIZE (4)
1625
1626 /* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
1627 UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
1628 static scm_t_wchar
1629 utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
1630 {
1631 scm_t_wchar codepoint;
1632
1633 if (utf8_buf[0] <= 0x7f)
1634 {
1635 assert (size == 1);
1636 codepoint = utf8_buf[0];
1637 }
1638 else if ((utf8_buf[0] & 0xe0) == 0xc0)
1639 {
1640 assert (size == 2);
1641 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
1642 | (utf8_buf[1] & 0x3f);
1643 }
1644 else if ((utf8_buf[0] & 0xf0) == 0xe0)
1645 {
1646 assert (size == 3);
1647 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
1648 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
1649 | (utf8_buf[2] & 0x3f);
1650 }
1651 else
1652 {
1653 assert (size == 4);
1654 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
1655 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
1656 | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
1657 | (utf8_buf[3] & 0x3f);
1658 }
1659
1660 return codepoint;
1661 }
1662
1663 /* Read a UTF-8 sequence from PORT. On success, return 0 and set
1664 *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
1665 representation, and set *LEN to the length in bytes. Return
1666 `EILSEQ' on error. */
1667 static int
1668 get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
1669 scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1670 {
1671 #define ASSERT_NOT_EOF(b) \
1672 if (SCM_UNLIKELY ((b) == EOF)) \
1673 goto invalid_seq
1674 #define CONSUME_PEEKED_BYTE() \
1675 pt->read_pos++
1676
1677 int byte;
1678 scm_t_port *pt;
1679
1680 *len = 0;
1681 pt = SCM_PTAB_ENTRY (port);
1682
1683 byte = scm_get_byte_or_eof_unlocked (port);
1684 if (byte == EOF)
1685 {
1686 *codepoint = EOF;
1687 return 0;
1688 }
1689
1690 buf[0] = (scm_t_uint8) byte;
1691 *len = 1;
1692
1693 if (buf[0] <= 0x7f)
1694 /* 1-byte form. */
1695 *codepoint = buf[0];
1696 else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
1697 {
1698 /* 2-byte form. */
1699 byte = scm_peek_byte_or_eof_unlocked (port);
1700 ASSERT_NOT_EOF (byte);
1701
1702 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1703 goto invalid_seq;
1704
1705 CONSUME_PEEKED_BYTE ();
1706 buf[1] = (scm_t_uint8) byte;
1707 *len = 2;
1708
1709 *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
1710 | (buf[1] & 0x3f);
1711 }
1712 else if ((buf[0] & 0xf0) == 0xe0)
1713 {
1714 /* 3-byte form. */
1715 byte = scm_peek_byte_or_eof_unlocked (port);
1716 ASSERT_NOT_EOF (byte);
1717
1718 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
1719 || (buf[0] == 0xe0 && byte < 0xa0)
1720 || (buf[0] == 0xed && byte > 0x9f)))
1721 goto invalid_seq;
1722
1723 CONSUME_PEEKED_BYTE ();
1724 buf[1] = (scm_t_uint8) byte;
1725 *len = 2;
1726
1727 byte = scm_peek_byte_or_eof_unlocked (port);
1728 ASSERT_NOT_EOF (byte);
1729
1730 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1731 goto invalid_seq;
1732
1733 CONSUME_PEEKED_BYTE ();
1734 buf[2] = (scm_t_uint8) byte;
1735 *len = 3;
1736
1737 *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
1738 | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
1739 | (buf[2] & 0x3f);
1740 }
1741 else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
1742 {
1743 /* 4-byte form. */
1744 byte = scm_peek_byte_or_eof_unlocked (port);
1745 ASSERT_NOT_EOF (byte);
1746
1747 if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
1748 || (buf[0] == 0xf0 && byte < 0x90)
1749 || (buf[0] == 0xf4 && byte > 0x8f)))
1750 goto invalid_seq;
1751
1752 CONSUME_PEEKED_BYTE ();
1753 buf[1] = (scm_t_uint8) byte;
1754 *len = 2;
1755
1756 byte = scm_peek_byte_or_eof_unlocked (port);
1757 ASSERT_NOT_EOF (byte);
1758
1759 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1760 goto invalid_seq;
1761
1762 CONSUME_PEEKED_BYTE ();
1763 buf[2] = (scm_t_uint8) byte;
1764 *len = 3;
1765
1766 byte = scm_peek_byte_or_eof_unlocked (port);
1767 ASSERT_NOT_EOF (byte);
1768
1769 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1770 goto invalid_seq;
1771
1772 CONSUME_PEEKED_BYTE ();
1773 buf[3] = (scm_t_uint8) byte;
1774 *len = 4;
1775
1776 *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
1777 | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
1778 | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
1779 | (buf[3] & 0x3f);
1780 }
1781 else
1782 goto invalid_seq;
1783
1784 return 0;
1785
1786 invalid_seq:
1787 /* Here we could choose the consume the faulty byte when it's not a
1788 valid starting byte, but it's not a requirement. What Section 3.9
1789 of Unicode 6.0.0 mandates, though, is to not consume a byte that
1790 would otherwise be a valid starting byte. */
1791
1792 return EILSEQ;
1793
1794 #undef CONSUME_PEEKED_BYTE
1795 #undef ASSERT_NOT_EOF
1796 }
1797
1798 /* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
1799 0 and set *CODEPOINT to the codepoint that was read, fill BUF with
1800 its UTF-8 representation, and set *LEN to the length in bytes.
1801 Return `EILSEQ' on error. */
1802 static int
1803 get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
1804 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1805 {
1806 *codepoint = scm_get_byte_or_eof_unlocked (port);
1807
1808 if (*codepoint == EOF)
1809 *len = 0;
1810 else
1811 {
1812 *len = 1;
1813 buf[0] = *codepoint;
1814 }
1815 return 0;
1816 }
1817
1818 /* Likewise, read a byte sequence from PORT, passing it through its
1819 input conversion descriptor. */
1820 static int
1821 get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
1822 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1823 {
1824 scm_t_iconv_descriptors *id;
1825 scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
1826 size_t input_size = 0;
1827
1828 id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ);
1829
1830 for (;;)
1831 {
1832 int byte_read;
1833 char *input, *output;
1834 size_t input_left, output_left, done;
1835
1836 byte_read = scm_get_byte_or_eof_unlocked (port);
1837 if (SCM_UNLIKELY (byte_read == EOF))
1838 {
1839 if (SCM_LIKELY (input_size == 0))
1840 {
1841 *codepoint = (scm_t_wchar) EOF;
1842 *len = input_size;
1843 return 0;
1844 }
1845 else
1846 {
1847 /* EOF found in the middle of a multibyte character. */
1848 scm_i_set_pending_eof (port);
1849 return EILSEQ;
1850 }
1851 }
1852
1853 buf[input_size++] = byte_read;
1854
1855 input = buf;
1856 input_left = input_size;
1857 output = (char *) utf8_buf;
1858 output_left = sizeof (utf8_buf);
1859
1860 done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
1861
1862 if (done == (size_t) -1)
1863 {
1864 int err = errno;
1865 if (SCM_LIKELY (err == EINVAL))
1866 /* The input byte sequence did not form a complete
1867 character. Read another byte and try again. */
1868 continue;
1869 else
1870 return err;
1871 }
1872 else
1873 {
1874 size_t output_size = sizeof (utf8_buf) - output_left;
1875 if (SCM_LIKELY (output_size > 0))
1876 {
1877 /* iconv generated output. Convert the UTF8_BUF sequence
1878 to a Unicode code point. */
1879 *codepoint = utf8_to_codepoint (utf8_buf, output_size);
1880 *len = input_size;
1881 return 0;
1882 }
1883 else
1884 {
1885 /* iconv consumed some bytes without producing any output.
1886 Most likely this means that a Unicode byte-order mark
1887 (BOM) was consumed, which should not be included in the
1888 returned buf. Shift any remaining bytes to the beginning
1889 of buf, and continue the loop. */
1890 memmove (buf, input, input_left);
1891 input_size = input_left;
1892 continue;
1893 }
1894 }
1895 }
1896 }
1897
1898 /* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
1899 with the byte representation of the codepoint in PORT's encoding, and
1900 set *LEN to the length in bytes of that representation. Return 0 on
1901 success and an errno value on error. */
1902 static SCM_C_INLINE int
1903 get_codepoint (SCM port, scm_t_wchar *codepoint,
1904 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1905 {
1906 int err;
1907 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1908 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
1909
1910 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
1911 err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
1912 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
1913 err = get_latin1_codepoint (port, codepoint, buf, len);
1914 else
1915 err = get_iconv_codepoint (port, codepoint, buf, len);
1916
1917 if (SCM_LIKELY (err == 0))
1918 {
1919 if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
1920 {
1921 /* Record that we're no longer at stream start. */
1922 pti->at_stream_start_for_bom_read = 0;
1923 if (pt->rw_random)
1924 pti->at_stream_start_for_bom_write = 0;
1925
1926 /* If we just read a BOM in an encoding that recognizes them,
1927 then silently consume it and read another code point. */
1928 if (SCM_UNLIKELY
1929 (*codepoint == SCM_UNICODE_BOM
1930 && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
1931 || strcmp (pt->encoding, "UTF-16") == 0
1932 || strcmp (pt->encoding, "UTF-32") == 0)))
1933 return get_codepoint (port, codepoint, buf, len);
1934 }
1935 update_port_lf (*codepoint, port);
1936 }
1937 else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
1938 {
1939 *codepoint = '?';
1940 err = 0;
1941 update_port_lf (*codepoint, port);
1942 }
1943
1944 return err;
1945 }
1946
1947 /* Read a codepoint from PORT and return it. */
1948 scm_t_wchar
1949 scm_getc_unlocked (SCM port)
1950 #define FUNC_NAME "scm_getc"
1951 {
1952 int err;
1953 size_t len;
1954 scm_t_wchar codepoint;
1955 char buf[SCM_MBCHAR_BUF_SIZE];
1956
1957 err = get_codepoint (port, &codepoint, buf, &len);
1958 if (SCM_UNLIKELY (err != 0))
1959 /* At this point PORT should point past the invalid encoding, as per
1960 R6RS-lib Section 8.2.4. */
1961 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
1962
1963 return codepoint;
1964 }
1965 #undef FUNC_NAME
1966
1967 scm_t_wchar
1968 scm_getc (SCM port)
1969 {
1970 scm_i_pthread_mutex_t *lock;
1971 scm_t_wchar ret;
1972
1973 scm_c_lock_port (port, &lock);
1974 ret = scm_getc_unlocked (port);
1975 if (lock)
1976 scm_i_pthread_mutex_unlock (lock);
1977
1978
1979 return ret;
1980 }
1981
1982 SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
1983 (SCM port),
1984 "Return the next character available from @var{port}, updating\n"
1985 "@var{port} to point to the following character. If no more\n"
1986 "characters are available, the end-of-file object is returned.\n"
1987 "\n"
1988 "When @var{port}'s data cannot be decoded according to its\n"
1989 "character encoding, a @code{decoding-error} is raised and\n"
1990 "@var{port} points past the erroneous byte sequence.\n")
1991 #define FUNC_NAME s_scm_read_char
1992 {
1993 scm_t_wchar c;
1994 if (SCM_UNBNDP (port))
1995 port = scm_current_input_port ();
1996 SCM_VALIDATE_OPINPORT (1, port);
1997 c = scm_getc_unlocked (port);
1998 if (EOF == c)
1999 return SCM_EOF_VAL;
2000 return SCM_MAKE_CHAR (c);
2001 }
2002 #undef FUNC_NAME
2003
2004
2005 \f
2006
2007 /* Pushback. */
2008 \f
2009
2010
2011 static void
2012 scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
2013 #define FUNC_NAME "scm_unget_bytes"
2014 {
2015 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2016 size_t old_len, new_len;
2017
2018 scm_i_clear_pending_eof (port);
2019
2020 if (pt->read_buf != pt->putback_buf)
2021 /* switch to the put-back buffer. */
2022 {
2023 if (pt->putback_buf == NULL)
2024 {
2025 pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
2026 ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
2027 pt->putback_buf
2028 = (unsigned char *) scm_gc_malloc_pointerless
2029 (pt->putback_buf_size, "putback buffer");
2030 }
2031
2032 pt->saved_read_buf = pt->read_buf;
2033 pt->saved_read_pos = pt->read_pos;
2034 pt->saved_read_end = pt->read_end;
2035 pt->saved_read_buf_size = pt->read_buf_size;
2036
2037 /* Put read_pos at the end of the buffer, so that ungets will not
2038 have to shift the buffer contents each time. */
2039 pt->read_buf = pt->putback_buf;
2040 pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
2041 pt->read_buf_size = pt->putback_buf_size;
2042 }
2043
2044 old_len = pt->read_end - pt->read_pos;
2045 new_len = old_len + len;
2046
2047 if (new_len > pt->read_buf_size)
2048 /* The putback buffer needs to be enlarged. */
2049 {
2050 size_t new_buf_size;
2051 unsigned char *new_buf, *new_end, *new_pos;
2052
2053 new_buf_size = pt->read_buf_size * 2;
2054 if (new_buf_size < new_len)
2055 new_buf_size = new_len;
2056
2057 new_buf = (unsigned char *)
2058 scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
2059
2060 /* Put the bytes at the end of the buffer, so that future
2061 ungets won't need to shift the buffer. */
2062 new_end = new_buf + new_buf_size;
2063 new_pos = new_end - old_len;
2064 memcpy (new_pos, pt->read_pos, old_len);
2065
2066 pt->read_buf = pt->putback_buf = new_buf;
2067 pt->read_pos = new_pos;
2068 pt->read_end = new_end;
2069 pt->read_buf_size = pt->putback_buf_size = new_buf_size;
2070 }
2071 else if (pt->read_buf + len < pt->read_pos)
2072 /* If needed, shift the existing buffer contents up.
2073 This should not happen unless some external code
2074 manipulates the putback buffer pointers. */
2075 {
2076 unsigned char *new_end = pt->read_buf + pt->read_buf_size;
2077 unsigned char *new_pos = new_end - old_len;
2078
2079 memmove (new_pos, pt->read_pos, old_len);
2080 pt->read_pos = new_pos;
2081 pt->read_end = new_end;
2082 }
2083
2084 /* Move read_pos back and copy the bytes there. */
2085 pt->read_pos -= len;
2086 memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
2087
2088 if (pt->rw_active == SCM_PORT_WRITE)
2089 scm_flush (port);
2090
2091 if (pt->rw_random)
2092 pt->rw_active = SCM_PORT_READ;
2093 }
2094 #undef FUNC_NAME
2095
2096 void
2097 scm_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
2098 {
2099 scm_i_unget_bytes_unlocked (buf, len, port);
2100 }
2101
2102 void
2103 scm_unget_byte_unlocked (int c, SCM port)
2104 {
2105 unsigned char byte = c;
2106 scm_i_unget_bytes_unlocked (&byte, 1, port);
2107 }
2108
2109 void
2110 scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
2111 {
2112 scm_i_pthread_mutex_t *lock;
2113 scm_c_lock_port (port, &lock);
2114 scm_i_unget_bytes_unlocked (buf, len, port);
2115 if (lock)
2116 scm_i_pthread_mutex_unlock (lock);
2117 }
2118
2119 void
2120 scm_unget_byte (int c, SCM port)
2121 {
2122 unsigned char byte = c;
2123 scm_i_pthread_mutex_t *lock;
2124 scm_c_lock_port (port, &lock);
2125 scm_i_unget_bytes_unlocked (&byte, 1, port);
2126 if (lock)
2127 scm_i_pthread_mutex_unlock (lock);
2128 }
2129
2130 void
2131 scm_ungetc_unlocked (scm_t_wchar c, SCM port)
2132 #define FUNC_NAME "scm_ungetc"
2133 {
2134 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2135 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
2136 char *result;
2137 char result_buf[10];
2138 size_t len;
2139
2140 len = sizeof (result_buf);
2141
2142 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
2143 {
2144 if (c < 0xf0)
2145 {
2146 result_buf[0] = (char) c;
2147 result = result_buf;
2148 len = 1;
2149 }
2150 else
2151 result =
2152 (char *) u32_to_u8 ((uint32_t *) &c, 1, (uint8_t *) result_buf, &len);
2153 }
2154 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1 && c <= 0xff)
2155 {
2156 result_buf[0] = (char) c;
2157 result = result_buf;
2158 len = 1;
2159 }
2160 else
2161 result = u32_conv_to_encoding (pt->encoding,
2162 (enum iconv_ilseq_handler) pt->ilseq_handler,
2163 (uint32_t *) &c, 1, NULL,
2164 result_buf, &len);
2165
2166 if (SCM_UNLIKELY (result == NULL || len == 0))
2167 scm_encoding_error (FUNC_NAME, errno,
2168 "conversion to port encoding failed",
2169 SCM_BOOL_F, SCM_MAKE_CHAR (c));
2170
2171 scm_i_unget_bytes_unlocked ((unsigned char *) result, len, port);
2172
2173 if (SCM_UNLIKELY (result != result_buf))
2174 free (result);
2175
2176 if (c == '\n')
2177 {
2178 /* What should col be in this case?
2179 * We'll leave it at -1.
2180 */
2181 SCM_LINUM (port) -= 1;
2182 }
2183 else
2184 SCM_COL(port) -= 1;
2185 }
2186 #undef FUNC_NAME
2187
2188 void
2189 scm_ungetc (scm_t_wchar c, SCM port)
2190 {
2191 scm_i_pthread_mutex_t *lock;
2192 scm_c_lock_port (port, &lock);
2193 scm_ungetc_unlocked (c, port);
2194 if (lock)
2195 scm_i_pthread_mutex_unlock (lock);
2196
2197 }
2198
2199 void
2200 scm_ungets_unlocked (const char *s, int n, SCM port)
2201 {
2202 /* This is simple minded and inefficient, but unreading strings is
2203 * probably not a common operation, and remember that line and
2204 * column numbers have to be handled...
2205 *
2206 * Please feel free to write an optimized version!
2207 */
2208 while (n--)
2209 scm_ungetc_unlocked (s[n], port);
2210 }
2211
2212 void
2213 scm_ungets (const char *s, int n, SCM port)
2214 {
2215 scm_i_pthread_mutex_t *lock;
2216 scm_c_lock_port (port, &lock);
2217 scm_ungets_unlocked (s, n, port);
2218 if (lock)
2219 scm_i_pthread_mutex_unlock (lock);
2220
2221 }
2222
2223 SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
2224 (SCM port),
2225 "Return the next character available from @var{port},\n"
2226 "@emph{without} updating @var{port} to point to the following\n"
2227 "character. If no more characters are available, the\n"
2228 "end-of-file object is returned.\n"
2229 "\n"
2230 "The value returned by\n"
2231 "a call to @code{peek-char} is the same as the value that would\n"
2232 "have been returned by a call to @code{read-char} on the same\n"
2233 "port. The only difference is that the very next call to\n"
2234 "@code{read-char} or @code{peek-char} on that @var{port} will\n"
2235 "return the value returned by the preceding call to\n"
2236 "@code{peek-char}. In particular, a call to @code{peek-char} on\n"
2237 "an interactive port will hang waiting for input whenever a call\n"
2238 "to @code{read-char} would have hung.\n"
2239 "\n"
2240 "As for @code{read-char}, a @code{decoding-error} may be raised\n"
2241 "if such a situation occurs. However, unlike with @code{read-char},\n"
2242 "@var{port} still points at the beginning of the erroneous byte\n"
2243 "sequence when the error is raised.\n")
2244 #define FUNC_NAME s_scm_peek_char
2245 {
2246 int err;
2247 SCM result;
2248 scm_t_wchar c;
2249 char bytes[SCM_MBCHAR_BUF_SIZE];
2250 long column, line;
2251 size_t len = 0;
2252
2253 if (SCM_UNBNDP (port))
2254 port = scm_current_input_port ();
2255 SCM_VALIDATE_OPINPORT (1, port);
2256
2257 column = SCM_COL (port);
2258 line = SCM_LINUM (port);
2259
2260 err = get_codepoint (port, &c, bytes, &len);
2261
2262 scm_i_unget_bytes_unlocked ((unsigned char *) bytes, len, port);
2263
2264 SCM_COL (port) = column;
2265 SCM_LINUM (port) = line;
2266
2267 if (SCM_UNLIKELY (err != 0))
2268 {
2269 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
2270
2271 /* Shouldn't happen since `catch' always aborts to prompt. */
2272 result = SCM_BOOL_F;
2273 }
2274 else if (c == EOF)
2275 {
2276 scm_i_set_pending_eof (port);
2277 result = SCM_EOF_VAL;
2278 }
2279 else
2280 result = SCM_MAKE_CHAR (c);
2281
2282 return result;
2283 }
2284 #undef FUNC_NAME
2285
2286 SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
2287 (SCM cobj, SCM port),
2288 "Place character @var{cobj} in @var{port} so that it will be\n"
2289 "read by the next read operation. If called multiple times, the\n"
2290 "unread characters will be read again in last-in first-out\n"
2291 "order. If @var{port} is not supplied, the current input port\n"
2292 "is used.")
2293 #define FUNC_NAME s_scm_unread_char
2294 {
2295 int c;
2296
2297 SCM_VALIDATE_CHAR (1, cobj);
2298 if (SCM_UNBNDP (port))
2299 port = scm_current_input_port ();
2300 SCM_VALIDATE_OPINPORT (2, port);
2301
2302 c = SCM_CHAR (cobj);
2303
2304 scm_ungetc_unlocked (c, port);
2305 return cobj;
2306 }
2307 #undef FUNC_NAME
2308
2309 SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
2310 (SCM str, SCM port),
2311 "Place the string @var{str} in @var{port} so that its characters will be\n"
2312 "read in subsequent read operations. If called multiple times, the\n"
2313 "unread characters will be read again in last-in first-out order. If\n"
2314 "@var{port} is not supplied, the current-input-port is used.")
2315 #define FUNC_NAME s_scm_unread_string
2316 {
2317 int n;
2318 SCM_VALIDATE_STRING (1, str);
2319 if (SCM_UNBNDP (port))
2320 port = scm_current_input_port ();
2321 SCM_VALIDATE_OPINPORT (2, port);
2322
2323 n = scm_i_string_length (str);
2324
2325 while (n--)
2326 scm_ungetc_unlocked (scm_i_string_ref (str, n), port);
2327
2328 return str;
2329 }
2330 #undef FUNC_NAME
2331
2332
2333 \f
2334
2335 /* Manipulating the buffers. */
2336
2337 /* This routine does not take any locks, as it is usually called as part
2338 of a port implementation. */
2339 void
2340 scm_port_non_buffer (scm_t_port *pt)
2341 {
2342 pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf;
2343 pt->write_buf = pt->write_pos = &pt->shortbuf;
2344 pt->read_buf_size = pt->write_buf_size = 1;
2345 pt->write_end = pt->write_buf + pt->write_buf_size;
2346 }
2347
2348 /* this should only be called when the read buffer is empty. it
2349 tries to refill the read buffer. it returns the first char from
2350 the port, which is either EOF or *(pt->read_pos). */
2351 static int
2352 scm_i_fill_input_unlocked (SCM port)
2353 {
2354 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2355 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
2356
2357 assert (pt->read_pos == pt->read_end);
2358
2359 if (pti->pending_eof)
2360 {
2361 pti->pending_eof = 0;
2362 return EOF;
2363 }
2364
2365 if (pt->read_buf == pt->putback_buf)
2366 {
2367 /* finished reading put-back chars. */
2368 pt->read_buf = pt->saved_read_buf;
2369 pt->read_pos = pt->saved_read_pos;
2370 pt->read_end = pt->saved_read_end;
2371 pt->read_buf_size = pt->saved_read_buf_size;
2372 if (pt->read_pos < pt->read_end)
2373 return *(pt->read_pos);
2374 }
2375 return SCM_PORT_DESCRIPTOR (port)->fill_input (port);
2376 }
2377
2378 int
2379 scm_fill_input (SCM port)
2380 {
2381 scm_i_pthread_mutex_t *lock;
2382 int ret;
2383
2384 scm_c_lock_port (port, &lock);
2385 ret = scm_fill_input_unlocked (port);
2386 if (lock)
2387 scm_i_pthread_mutex_unlock (lock);
2388
2389
2390 return ret;
2391 }
2392
2393 /* Slow-path fallback for 'scm_get_byte_or_eof_unlocked' */
2394 int
2395 scm_slow_get_byte_or_eof_unlocked (SCM port)
2396 {
2397 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2398
2399 if (pt->rw_active == SCM_PORT_WRITE)
2400 scm_flush_unlocked (port);
2401
2402 if (pt->rw_random)
2403 pt->rw_active = SCM_PORT_READ;
2404
2405 if (pt->read_pos >= pt->read_end)
2406 {
2407 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2408 return EOF;
2409 }
2410
2411 return *pt->read_pos++;
2412 }
2413
2414 /* Slow-path fallback for 'scm_peek_byte_or_eof_unlocked' */
2415 int
2416 scm_slow_peek_byte_or_eof_unlocked (SCM port)
2417 {
2418 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2419
2420 if (pt->rw_active == SCM_PORT_WRITE)
2421 scm_flush_unlocked (port);
2422
2423 if (pt->rw_random)
2424 pt->rw_active = SCM_PORT_READ;
2425
2426 if (pt->read_pos >= pt->read_end)
2427 {
2428 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2429 {
2430 scm_i_set_pending_eof (port);
2431 return EOF;
2432 }
2433 }
2434
2435 return *pt->read_pos;
2436 }
2437
2438 /* Move up to READ_LEN bytes from PORT's putback and/or read buffers
2439 into memory starting at DEST. Return the number of bytes moved.
2440 PORT's line/column numbers are left unchanged. */
2441 size_t
2442 scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
2443 {
2444 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2445 size_t bytes_read = 0;
2446 size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
2447
2448 if (from_buf > 0)
2449 {
2450 memcpy (dest, pt->read_pos, from_buf);
2451 pt->read_pos += from_buf;
2452 bytes_read += from_buf;
2453 read_len -= from_buf;
2454 dest += from_buf;
2455 }
2456
2457 /* if putback was active, try the real input buffer too. */
2458 if (pt->read_buf == pt->putback_buf)
2459 {
2460 from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len);
2461 if (from_buf > 0)
2462 {
2463 memcpy (dest, pt->saved_read_pos, from_buf);
2464 pt->saved_read_pos += from_buf;
2465 bytes_read += from_buf;
2466 }
2467 }
2468
2469 return bytes_read;
2470 }
2471
2472 /* Clear a port's read buffers, returning the contents. */
2473 SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
2474 (SCM port),
2475 "This procedure clears a port's input buffers, similar\n"
2476 "to the way that force-output clears the output buffer. The\n"
2477 "contents of the buffers are returned as a single string, e.g.,\n"
2478 "\n"
2479 "@lisp\n"
2480 "(define p (open-input-file ...))\n"
2481 "(drain-input p) => empty string, nothing buffered yet.\n"
2482 "(unread-char (read-char p) p)\n"
2483 "(drain-input p) => initial chars from p, up to the buffer size.\n"
2484 "@end lisp\n\n"
2485 "Draining the buffers may be useful for cleanly finishing\n"
2486 "buffered I/O so that the file descriptor can be used directly\n"
2487 "for further input.")
2488 #define FUNC_NAME s_scm_drain_input
2489 {
2490 SCM result;
2491 char *data;
2492 scm_t_port *pt;
2493 long count;
2494
2495 SCM_VALIDATE_OPINPORT (1, port);
2496 pt = SCM_PTAB_ENTRY (port);
2497
2498 count = pt->read_end - pt->read_pos;
2499 if (pt->read_buf == pt->putback_buf)
2500 count += pt->saved_read_end - pt->saved_read_pos;
2501
2502 if (count)
2503 {
2504 result = scm_i_make_string (count, &data, 0);
2505 scm_take_from_input_buffers (port, data, count);
2506 }
2507 else
2508 result = scm_nullstr;
2509
2510 return result;
2511 }
2512 #undef FUNC_NAME
2513
2514 void
2515 scm_end_input_unlocked (SCM port)
2516 {
2517 long offset;
2518 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2519
2520 scm_i_clear_pending_eof (port);
2521 if (pt->read_buf == pt->putback_buf)
2522 {
2523 offset = pt->read_end - pt->read_pos;
2524 pt->read_buf = pt->saved_read_buf;
2525 pt->read_pos = pt->saved_read_pos;
2526 pt->read_end = pt->saved_read_end;
2527 pt->read_buf_size = pt->saved_read_buf_size;
2528 }
2529 else
2530 offset = 0;
2531
2532 SCM_PORT_DESCRIPTOR (port)->end_input (port, offset);
2533 }
2534
2535 void
2536 scm_end_input (SCM port)
2537 {
2538 scm_i_pthread_mutex_t *lock;
2539 scm_c_lock_port (port, &lock);
2540 scm_end_input_unlocked (port);
2541 if (lock)
2542 scm_i_pthread_mutex_unlock (lock);
2543
2544 }
2545
2546 SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
2547 (SCM port),
2548 "Flush the specified output port, or the current output port if @var{port}\n"
2549 "is omitted. The current output buffer contents are passed to the\n"
2550 "underlying port implementation (e.g., in the case of fports, the\n"
2551 "data will be written to the file and the output buffer will be cleared.)\n"
2552 "It has no effect on an unbuffered port.\n\n"
2553 "The return value is unspecified.")
2554 #define FUNC_NAME s_scm_force_output
2555 {
2556 if (SCM_UNBNDP (port))
2557 port = scm_current_output_port ();
2558 else
2559 {
2560 port = SCM_COERCE_OUTPORT (port);
2561 SCM_VALIDATE_OPOUTPORT (1, port);
2562 }
2563 scm_flush_unlocked (port);
2564 return SCM_UNSPECIFIED;
2565 }
2566 #undef FUNC_NAME
2567
2568 void
2569 scm_flush_unlocked (SCM port)
2570 {
2571 SCM_PORT_DESCRIPTOR (port)->flush (port);
2572 }
2573
2574 void
2575 scm_flush (SCM port)
2576 {
2577 scm_i_pthread_mutex_t *lock;
2578 scm_c_lock_port (port, &lock);
2579 scm_flush_unlocked (port);
2580 if (lock)
2581 scm_i_pthread_mutex_unlock (lock);
2582
2583 }
2584
2585 int
2586 scm_fill_input_unlocked (SCM port)
2587 {
2588 return scm_i_fill_input_unlocked (port);
2589 }
2590
2591
2592 \f
2593
2594 /* Output. */
2595
2596 void
2597 scm_putc (char c, SCM port)
2598 {
2599 scm_i_pthread_mutex_t *lock;
2600 scm_c_lock_port (port, &lock);
2601 scm_putc_unlocked (c, port);
2602 if (lock)
2603 scm_i_pthread_mutex_unlock (lock);
2604
2605 }
2606
2607 void
2608 scm_puts (const char *s, SCM port)
2609 {
2610 scm_i_pthread_mutex_t *lock;
2611 scm_c_lock_port (port, &lock);
2612 scm_puts_unlocked (s, port);
2613 if (lock)
2614 scm_i_pthread_mutex_unlock (lock);
2615
2616 }
2617
2618 /* scm_c_write
2619 *
2620 * Used by an application to write arbitrary number of bytes to an SCM
2621 * port. Similar semantics as libc write. However, unlike libc
2622 * write, scm_c_write writes the requested number of bytes and has no
2623 * return value.
2624 *
2625 * Warning: Doesn't update port line and column counts!
2626 */
2627 void
2628 scm_c_write_unlocked (SCM port, const void *ptr, size_t size)
2629 #define FUNC_NAME "scm_c_write"
2630 {
2631 scm_t_port *pt;
2632 scm_t_ptob_descriptor *ptob;
2633
2634 SCM_VALIDATE_OPOUTPORT (1, port);
2635
2636 pt = SCM_PTAB_ENTRY (port);
2637 ptob = SCM_PORT_DESCRIPTOR (port);
2638
2639 if (pt->rw_active == SCM_PORT_READ)
2640 scm_end_input_unlocked (port);
2641
2642 ptob->write (port, ptr, size);
2643
2644 if (pt->rw_random)
2645 pt->rw_active = SCM_PORT_WRITE;
2646 }
2647 #undef FUNC_NAME
2648
2649 void
2650 scm_c_write (SCM port, const void *ptr, size_t size)
2651 {
2652 scm_i_pthread_mutex_t *lock;
2653 scm_c_lock_port (port, &lock);
2654 scm_c_write_unlocked (port, ptr, size);
2655 if (lock)
2656 scm_i_pthread_mutex_unlock (lock);
2657
2658 }
2659
2660 /* scm_lfwrite
2661 *
2662 * This function differs from scm_c_write; it updates port line and
2663 * column. */
2664 void
2665 scm_lfwrite_unlocked (const char *ptr, size_t size, SCM port)
2666 {
2667 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2668 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2669
2670 if (pt->rw_active == SCM_PORT_READ)
2671 scm_end_input_unlocked (port);
2672
2673 ptob->write (port, ptr, size);
2674
2675 for (; size; ptr++, size--)
2676 update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
2677
2678 if (pt->rw_random)
2679 pt->rw_active = SCM_PORT_WRITE;
2680 }
2681
2682 void
2683 scm_lfwrite (const char *ptr, size_t size, SCM port)
2684 {
2685 scm_i_pthread_mutex_t *lock;
2686 scm_c_lock_port (port, &lock);
2687 scm_lfwrite_unlocked (ptr, size, port);
2688 if (lock)
2689 scm_i_pthread_mutex_unlock (lock);
2690
2691 }
2692
2693 /* Write STR to PORT from START inclusive to END exclusive. */
2694 void
2695 scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
2696 {
2697 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2698
2699 if (pt->rw_active == SCM_PORT_READ)
2700 scm_end_input_unlocked (port);
2701
2702 if (end == (size_t) -1)
2703 end = scm_i_string_length (str);
2704
2705 scm_i_display_substring (str, start, end, port);
2706
2707 if (pt->rw_random)
2708 pt->rw_active = SCM_PORT_WRITE;
2709 }
2710
2711
2712 \f
2713
2714 /* Querying and setting positions, and character availability. */
2715
2716 SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0,
2717 (SCM port),
2718 "Return @code{#t} if a character is ready on input @var{port}\n"
2719 "and return @code{#f} otherwise. If @code{char-ready?} returns\n"
2720 "@code{#t} then the next @code{read-char} operation on\n"
2721 "@var{port} is guaranteed not to hang. If @var{port} is a file\n"
2722 "port at end of file then @code{char-ready?} returns @code{#t}.\n"
2723 "\n"
2724 "@code{char-ready?} exists to make it possible for a\n"
2725 "program to accept characters from interactive ports without\n"
2726 "getting stuck waiting for input. Any input editors associated\n"
2727 "with such ports must make sure that characters whose existence\n"
2728 "has been asserted by @code{char-ready?} cannot be rubbed out.\n"
2729 "If @code{char-ready?} were to return @code{#f} at end of file,\n"
2730 "a port at end of file would be indistinguishable from an\n"
2731 "interactive port that has no ready characters.")
2732 #define FUNC_NAME s_scm_char_ready_p
2733 {
2734 scm_t_port *pt;
2735
2736 if (SCM_UNBNDP (port))
2737 port = scm_current_input_port ();
2738 /* It's possible to close the current input port, so validate even in
2739 this case. */
2740 SCM_VALIDATE_OPINPORT (1, port);
2741
2742 pt = SCM_PTAB_ENTRY (port);
2743
2744 /* if the current read buffer is filled, or the
2745 last pushed-back char has been read and the saved buffer is
2746 filled, result is true. */
2747 if (pt->read_pos < pt->read_end
2748 || (pt->read_buf == pt->putback_buf
2749 && pt->saved_read_pos < pt->saved_read_end))
2750 return SCM_BOOL_T;
2751 else
2752 {
2753 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2754
2755 if (ptob->input_waiting)
2756 return scm_from_bool(ptob->input_waiting (port));
2757 else
2758 return SCM_BOOL_T;
2759 }
2760 }
2761 #undef FUNC_NAME
2762
2763 SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
2764 (SCM fd_port, SCM offset, SCM whence),
2765 "Sets the current position of @var{fd_port} to the integer\n"
2766 "@var{offset}, which is interpreted according to the value of\n"
2767 "@var{whence}.\n"
2768 "\n"
2769 "One of the following variables should be supplied for\n"
2770 "@var{whence}:\n"
2771 "@defvar SEEK_SET\n"
2772 "Seek from the beginning of the file.\n"
2773 "@end defvar\n"
2774 "@defvar SEEK_CUR\n"
2775 "Seek from the current position.\n"
2776 "@end defvar\n"
2777 "@defvar SEEK_END\n"
2778 "Seek from the end of the file.\n"
2779 "@end defvar\n"
2780 "If @var{fd_port} is a file descriptor, the underlying system\n"
2781 "call is @code{lseek}. @var{port} may be a string port.\n"
2782 "\n"
2783 "The value returned is the new position in the file. This means\n"
2784 "that the current position of a port can be obtained using:\n"
2785 "@lisp\n"
2786 "(seek port 0 SEEK_CUR)\n"
2787 "@end lisp")
2788 #define FUNC_NAME s_scm_seek
2789 {
2790 int how;
2791
2792 fd_port = SCM_COERCE_OUTPORT (fd_port);
2793
2794 how = scm_to_int (whence);
2795 if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
2796 SCM_OUT_OF_RANGE (3, whence);
2797
2798 if (SCM_OPPORTP (fd_port))
2799 {
2800 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port);
2801 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port);
2802 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2803 off_t_or_off64_t rv;
2804
2805 if (!ptob->seek)
2806 SCM_MISC_ERROR ("port is not seekable",
2807 scm_cons (fd_port, SCM_EOL));
2808 else
2809 rv = ptob->seek (fd_port, off, how);
2810
2811 /* Set stream-start flags according to new position. */
2812 pti->at_stream_start_for_bom_read = (rv == 0);
2813 pti->at_stream_start_for_bom_write = (rv == 0);
2814
2815 scm_i_clear_pending_eof (fd_port);
2816
2817 return scm_from_off_t_or_off64_t (rv);
2818 }
2819 else /* file descriptor?. */
2820 {
2821 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2822 off_t_or_off64_t rv;
2823 rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
2824 if (rv == -1)
2825 SCM_SYSERROR;
2826 return scm_from_off_t_or_off64_t (rv);
2827 }
2828 }
2829 #undef FUNC_NAME
2830
2831 #ifndef O_BINARY
2832 #define O_BINARY 0
2833 #endif
2834
2835 /* Mingw has ftruncate(), perhaps implemented above using chsize, but
2836 doesn't have the filename version truncate(), hence this code. */
2837 #if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
2838 static int
2839 truncate (const char *file, off_t length)
2840 {
2841 int ret, fdes;
2842
2843 fdes = open (file, O_BINARY | O_WRONLY);
2844 if (fdes == -1)
2845 return -1;
2846
2847 ret = ftruncate (fdes, length);
2848 if (ret == -1)
2849 {
2850 int save_errno = errno;
2851 close (fdes);
2852 errno = save_errno;
2853 return -1;
2854 }
2855
2856 return close (fdes);
2857 }
2858 #endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
2859
2860 SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
2861 (SCM object, SCM length),
2862 "Truncate file @var{object} to @var{length} bytes. @var{object}\n"
2863 "can be a filename string, a port object, or an integer file\n"
2864 "descriptor.\n"
2865 "The return value is unspecified.\n"
2866 "\n"
2867 "For a port or file descriptor @var{length} can be omitted, in\n"
2868 "which case the file is truncated at the current position (per\n"
2869 "@code{ftell} above).\n"
2870 "\n"
2871 "On most systems a file can be extended by giving a length\n"
2872 "greater than the current size, but this is not mandatory in the\n"
2873 "POSIX standard.")
2874 #define FUNC_NAME s_scm_truncate_file
2875 {
2876 int rv;
2877
2878 /* "object" can be a port, fdes or filename.
2879
2880 Negative "length" makes no sense, but it's left to truncate() or
2881 ftruncate() to give back an error for that (normally EINVAL).
2882 */
2883
2884 if (SCM_UNBNDP (length))
2885 {
2886 /* must supply length if object is a filename. */
2887 if (scm_is_string (object))
2888 SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
2889
2890 length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
2891 }
2892
2893 object = SCM_COERCE_OUTPORT (object);
2894 if (scm_is_integer (object))
2895 {
2896 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2897 SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
2898 c_length));
2899 }
2900 else if (SCM_OPOUTPORTP (object))
2901 {
2902 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2903 scm_t_port *pt = SCM_PTAB_ENTRY (object);
2904 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object);
2905
2906 if (!ptob->truncate)
2907 SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
2908
2909 scm_i_clear_pending_eof (object);
2910 if (pt->rw_active == SCM_PORT_READ)
2911 scm_end_input_unlocked (object);
2912 else if (pt->rw_active == SCM_PORT_WRITE)
2913 ptob->flush (object);
2914
2915 ptob->truncate (object, c_length);
2916 rv = 0;
2917 }
2918 else
2919 {
2920 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2921 char *str = scm_to_locale_string (object);
2922 int eno;
2923 SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
2924 eno = errno;
2925 free (str);
2926 errno = eno;
2927 }
2928 if (rv == -1)
2929 SCM_SYSERROR;
2930 return SCM_UNSPECIFIED;
2931 }
2932 #undef FUNC_NAME
2933
2934 SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0,
2935 (SCM port),
2936 "Return the current line number for @var{port}.\n"
2937 "\n"
2938 "The first line of a file is 0. But you might want to add 1\n"
2939 "when printing line numbers, since starting from 1 is\n"
2940 "traditional in error messages, and likely to be more natural to\n"
2941 "non-programmers.")
2942 #define FUNC_NAME s_scm_port_line
2943 {
2944 port = SCM_COERCE_OUTPORT (port);
2945 SCM_VALIDATE_OPENPORT (1, port);
2946 return scm_from_long (SCM_LINUM (port));
2947 }
2948 #undef FUNC_NAME
2949
2950 SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0,
2951 (SCM port, SCM line),
2952 "Set the current line number for @var{port} to @var{line}. The\n"
2953 "first line of a file is 0.")
2954 #define FUNC_NAME s_scm_set_port_line_x
2955 {
2956 port = SCM_COERCE_OUTPORT (port);
2957 SCM_VALIDATE_OPENPORT (1, port);
2958 SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line);
2959 return SCM_UNSPECIFIED;
2960 }
2961 #undef FUNC_NAME
2962
2963 SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0,
2964 (SCM port),
2965 "Return the current column number of @var{port}.\n"
2966 "If the number is\n"
2967 "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n"
2968 "- i.e. the first character of the first line is line 0, column 0.\n"
2969 "(However, when you display a file position, for example in an error\n"
2970 "message, we recommend you add 1 to get 1-origin integers. This is\n"
2971 "because lines and column numbers traditionally start with 1, and that is\n"
2972 "what non-programmers will find most natural.)")
2973 #define FUNC_NAME s_scm_port_column
2974 {
2975 port = SCM_COERCE_OUTPORT (port);
2976 SCM_VALIDATE_OPENPORT (1, port);
2977 return scm_from_int (SCM_COL (port));
2978 }
2979 #undef FUNC_NAME
2980
2981 SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0,
2982 (SCM port, SCM column),
2983 "Set the current column of @var{port}. Before reading the first\n"
2984 "character on a line the column should be 0.")
2985 #define FUNC_NAME s_scm_set_port_column_x
2986 {
2987 port = SCM_COERCE_OUTPORT (port);
2988 SCM_VALIDATE_OPENPORT (1, port);
2989 SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column);
2990 return SCM_UNSPECIFIED;
2991 }
2992 #undef FUNC_NAME
2993
2994 SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
2995 (SCM port),
2996 "Return the filename associated with @var{port}, or @code{#f}\n"
2997 "if no filename is associated with the port.")
2998 #define FUNC_NAME s_scm_port_filename
2999 {
3000 port = SCM_COERCE_OUTPORT (port);
3001 SCM_VALIDATE_OPENPORT (1, port);
3002 return SCM_FILENAME (port);
3003 }
3004 #undef FUNC_NAME
3005
3006 SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
3007 (SCM port, SCM filename),
3008 "Change the filename associated with @var{port}, using the current input\n"
3009 "port if none is specified. Note that this does not change the port's\n"
3010 "source of data, but only the value that is returned by\n"
3011 "@code{port-filename} and reported in diagnostic output.")
3012 #define FUNC_NAME s_scm_set_port_filename_x
3013 {
3014 port = SCM_COERCE_OUTPORT (port);
3015 SCM_VALIDATE_OPENPORT (1, port);
3016 /* We allow the user to set the filename to whatever he likes. */
3017 SCM_SET_FILENAME (port, filename);
3018 return SCM_UNSPECIFIED;
3019 }
3020 #undef FUNC_NAME
3021
3022
3023 \f
3024
3025 /* Implementation helpers for port printing functions. */
3026
3027 void
3028 scm_print_port_mode (SCM exp, SCM port)
3029 {
3030 scm_puts_unlocked (SCM_CLOSEDP (exp)
3031 ? "closed: "
3032 : (SCM_RDNG & SCM_CELL_WORD_0 (exp)
3033 ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
3034 ? "input-output: "
3035 : "input: ")
3036 : (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
3037 ? "output: "
3038 : "bogus: ")),
3039 port);
3040 }
3041
3042 int
3043 scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED)
3044 {
3045 char *type = SCM_PTOBNAME (SCM_PTOBNUM (exp));
3046 if (!type)
3047 type = "port";
3048 scm_puts_unlocked ("#<", port);
3049 scm_print_port_mode (exp, port);
3050 scm_puts_unlocked (type, port);
3051 scm_putc_unlocked (' ', port);
3052 scm_uintprint (SCM_CELL_WORD_1 (exp), 16, port);
3053 scm_putc_unlocked ('>', port);
3054 return 1;
3055 }
3056
3057
3058 \f
3059
3060 /* Iterating over all ports. */
3061
3062 struct for_each_data
3063 {
3064 void (*proc) (void *data, SCM p);
3065 void *data;
3066 };
3067
3068 static SCM
3069 for_each_trampoline (void *data, SCM port, SCM result)
3070 {
3071 struct for_each_data *d = data;
3072
3073 d->proc (d->data, port);
3074
3075 return result;
3076 }
3077
3078 void
3079 scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
3080 {
3081 struct for_each_data d;
3082
3083 d.proc = proc;
3084 d.data = data;
3085
3086 scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL,
3087 scm_i_port_weak_set);
3088 }
3089
3090 static void
3091 scm_for_each_trampoline (void *data, SCM port)
3092 {
3093 scm_call_1 (SCM_PACK_POINTER (data), port);
3094 }
3095
3096 SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
3097 (SCM proc),
3098 "Apply @var{proc} to each port in the Guile port table\n"
3099 "in turn. The return value is unspecified. More specifically,\n"
3100 "@var{proc} is applied exactly once to every port that exists\n"
3101 "in the system at the time @code{port-for-each} is invoked.\n"
3102 "Changes to the port table while @code{port-for-each} is running\n"
3103 "have no effect as far as @code{port-for-each} is concerned.")
3104 #define FUNC_NAME s_scm_port_for_each
3105 {
3106 SCM_VALIDATE_PROC (1, proc);
3107
3108 scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc));
3109
3110 return SCM_UNSPECIFIED;
3111 }
3112 #undef FUNC_NAME
3113
3114 static void
3115 flush_output_port (void *closure, SCM port)
3116 {
3117 if (SCM_OPOUTPORTP (port))
3118 scm_flush_unlocked (port);
3119 }
3120
3121 SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0,
3122 (),
3123 "Equivalent to calling @code{force-output} on\n"
3124 "all open output ports. The return value is unspecified.")
3125 #define FUNC_NAME s_scm_flush_all_ports
3126 {
3127 scm_c_port_for_each (&flush_output_port, NULL);
3128 return SCM_UNSPECIFIED;
3129 }
3130 #undef FUNC_NAME
3131
3132
3133 \f
3134
3135 /* Void ports. */
3136
3137 scm_t_bits scm_tc16_void_port = 0;
3138
3139 static int fill_input_void_port (SCM port SCM_UNUSED)
3140 {
3141 return EOF;
3142 }
3143
3144 static void
3145 write_void_port (SCM port SCM_UNUSED,
3146 const void *data SCM_UNUSED,
3147 size_t size SCM_UNUSED)
3148 {
3149 }
3150
3151 static SCM
3152 scm_i_void_port (long mode_bits)
3153 {
3154 SCM ret;
3155
3156 ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0);
3157
3158 scm_port_non_buffer (SCM_PTAB_ENTRY (ret));
3159
3160 return ret;
3161 }
3162
3163 SCM
3164 scm_void_port (char *mode_str)
3165 {
3166 return scm_i_void_port (scm_mode_bits (mode_str));
3167 }
3168
3169 SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0,
3170 (SCM mode),
3171 "Create and return a new void port. A void port acts like\n"
3172 "@file{/dev/null}. The @var{mode} argument\n"
3173 "specifies the input/output modes for this port: see the\n"
3174 "documentation for @code{open-file} in @ref{File Ports}.")
3175 #define FUNC_NAME s_scm_sys_make_void_port
3176 {
3177 return scm_i_void_port (scm_i_mode_bits (mode));
3178 }
3179 #undef FUNC_NAME
3180
3181
3182 \f
3183
3184 /* Initialization. */
3185
3186 void
3187 scm_init_ports ()
3188 {
3189 /* lseek() symbols. */
3190 scm_c_define ("SEEK_SET", scm_from_int (SEEK_SET));
3191 scm_c_define ("SEEK_CUR", scm_from_int (SEEK_CUR));
3192 scm_c_define ("SEEK_END", scm_from_int (SEEK_END));
3193
3194 scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
3195 write_void_port);
3196
3197 cur_inport_fluid = scm_make_fluid ();
3198 cur_outport_fluid = scm_make_fluid ();
3199 cur_errport_fluid = scm_make_fluid ();
3200 cur_loadport_fluid = scm_make_fluid ();
3201
3202 scm_i_port_weak_set = scm_c_make_weak_set (31);
3203
3204 #include "libguile/ports.x"
3205
3206 /* Use Latin-1 as the default port encoding. */
3207 SCM_VARIABLE_SET (default_port_encoding_var,
3208 scm_make_fluid_with_default (SCM_BOOL_F));
3209 scm_port_encoding_init = 1;
3210
3211 SCM_VARIABLE_SET (default_conversion_strategy_var,
3212 scm_make_fluid_with_default (sym_substitute));
3213 scm_conversion_strategy_init = 1;
3214
3215 /* These bindings are used when boot-9 turns `current-input-port' et
3216 al into parameters. They are then removed from the guile module. */
3217 scm_c_define ("%current-input-port-fluid", cur_inport_fluid);
3218 scm_c_define ("%current-output-port-fluid", cur_outport_fluid);
3219 scm_c_define ("%current-error-port-fluid", cur_errport_fluid);
3220 }
3221
3222 /*
3223 Local Variables:
3224 c-file-style: "gnu"
3225 End:
3226 */