Merge commit '750ac8c592e792e627444f476877f282525b132e'
[bpt/guile.git] / libguile / ports.c
1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
2 * 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20
21 \f
22 /* Headers. */
23
24 #define _LARGEFILE64_SOURCE /* ask for stat64 etc */
25
26 #ifdef HAVE_CONFIG_H
27 # include <config.h>
28 #endif
29
30 #include <stdio.h>
31 #include <errno.h>
32 #include <fcntl.h> /* for chsize on mingw */
33 #include <assert.h>
34 #include <iconv.h>
35 #include <uniconv.h>
36 #include <unistr.h>
37 #include <striconveh.h>
38
39 #include <assert.h>
40
41 #include "libguile/_scm.h"
42 #include "libguile/async.h"
43 #include "libguile/deprecation.h"
44 #include "libguile/eval.h"
45 #include "libguile/fports.h" /* direct access for seek and truncate */
46 #include "libguile/goops.h"
47 #include "libguile/smob.h"
48 #include "libguile/chars.h"
49 #include "libguile/dynwind.h"
50
51 #include "libguile/keywords.h"
52 #include "libguile/hashtab.h"
53 #include "libguile/root.h"
54 #include "libguile/strings.h"
55 #include "libguile/mallocs.h"
56 #include "libguile/validate.h"
57 #include "libguile/ports.h"
58 #include "libguile/ports-internal.h"
59 #include "libguile/vectors.h"
60 #include "libguile/weak-set.h"
61 #include "libguile/fluids.h"
62 #include "libguile/eq.h"
63
64 #ifdef HAVE_STRING_H
65 #include <string.h>
66 #endif
67
68 #ifdef HAVE_IO_H
69 #include <io.h>
70 #endif
71
72 #ifdef HAVE_UNISTD_H
73 #include <unistd.h>
74 #endif
75
76 #ifdef HAVE_SYS_IOCTL_H
77 #include <sys/ioctl.h>
78 #endif
79
80 /* Mingw (version 3.4.5, circa 2006) has ftruncate as an alias for chsize
81 already, but have this code here in case that wasn't so in past versions,
82 or perhaps to help other minimal DOS environments.
83
84 gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
85 might be possibilities if we've got other systems without ftruncate. */
86
87 #if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
88 #define ftruncate(fd, size) chsize (fd, size)
89 #undef HAVE_FTRUNCATE
90 #define HAVE_FTRUNCATE 1
91 #endif
92
93 \f
94 /* Port encodings are case-insensitive ASCII strings. */
95 static char
96 ascii_toupper (char c)
97 {
98 return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
99 }
100
101 /* It is only necessary to use this function on encodings that come from
102 the user and have not been canonicalized yet. Encodings that are set
103 on ports or in the default encoding fluid are in upper-case, and can
104 be compared with strcmp. */
105 static int
106 encoding_matches (const char *enc, const char *upper)
107 {
108 if (!enc)
109 enc = "ISO-8859-1";
110
111 while (*enc)
112 if (ascii_toupper (*enc++) != *upper++)
113 return 0;
114
115 return !*upper;
116 }
117
118 static char*
119 canonicalize_encoding (const char *enc)
120 {
121 char *ret;
122 int i;
123
124 if (!enc)
125 return "ISO-8859-1";
126
127 ret = scm_gc_strdup (enc, "port");
128
129 for (i = 0; ret[i]; i++)
130 {
131 if (ret[i] > 127)
132 /* Restrict to ASCII. */
133 scm_misc_error (NULL, "invalid character encoding ~s",
134 scm_list_1 (scm_from_latin1_string (enc)));
135 else
136 ret[i] = ascii_toupper (ret[i]);
137 }
138
139 return ret;
140 }
141
142
143 \f
144 /* The port kind table --- a dynamically resized array of port types. */
145
146
147 /* scm_ptobs scm_numptob
148 * implement a dynamically resized array of ptob records.
149 * Indexes into this table are used when generating type
150 * tags for smobjects (if you know a tag you can get an index and conversely).
151 */
152 static scm_t_ptob_descriptor **scm_ptobs = NULL;
153 static long scm_numptob = 0; /* Number of port types. */
154 static long scm_ptobs_size = 0; /* Number of slots in the port type
155 table. */
156 static scm_i_pthread_mutex_t scm_ptobs_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER;
157
158 long
159 scm_c_num_port_types (void)
160 {
161 long ret;
162
163 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
164 ret = scm_numptob;
165 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
166
167 return ret;
168 }
169
170 scm_t_ptob_descriptor*
171 scm_c_port_type_ref (long ptobnum)
172 {
173 scm_t_ptob_descriptor *ret = NULL;
174
175 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
176
177 if (0 <= ptobnum && ptobnum < scm_numptob)
178 ret = scm_ptobs[ptobnum];
179
180 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
181
182 if (!ret)
183 scm_out_of_range ("scm_c_port_type_ref", scm_from_long (ptobnum));
184
185 return ret;
186 }
187
188 long
189 scm_c_port_type_add_x (scm_t_ptob_descriptor *desc)
190 {
191 long ret = -1;
192
193 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
194
195 if (scm_numptob + 1 < SCM_I_MAX_PORT_TYPE_COUNT)
196 {
197 if (scm_numptob == scm_ptobs_size)
198 {
199 unsigned long old_size = scm_ptobs_size;
200 scm_t_ptob_descriptor **old_ptobs = scm_ptobs;
201
202 /* Currently there are only 9 predefined port types, so one
203 resize will cover it. */
204 scm_ptobs_size = old_size + 10;
205
206 if (scm_ptobs_size >= SCM_I_MAX_PORT_TYPE_COUNT)
207 scm_ptobs_size = SCM_I_MAX_PORT_TYPE_COUNT;
208
209 scm_ptobs = scm_gc_malloc (sizeof (*scm_ptobs) * scm_ptobs_size,
210 "scm_ptobs");
211
212 memcpy (scm_ptobs, old_ptobs, sizeof (*scm_ptobs) * scm_numptob);
213 }
214
215 ret = scm_numptob++;
216 scm_ptobs[ret] = desc;
217 }
218
219 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
220
221 if (ret < 0)
222 scm_out_of_range ("scm_c_port_type_add_x", scm_from_long (scm_numptob));
223
224 return ret;
225 }
226
227 /*
228 * We choose to use an interface similar to the smob interface with
229 * fill_input and write as standard fields, passed to the port
230 * type constructor, and optional fields set by setters.
231 */
232
233 static void
234 flush_port_default (SCM port SCM_UNUSED)
235 {
236 }
237
238 static void
239 end_input_default (SCM port SCM_UNUSED, int offset SCM_UNUSED)
240 {
241 }
242
243 scm_t_bits
244 scm_make_port_type (char *name,
245 int (*fill_input) (SCM port),
246 void (*write) (SCM port, const void *data, size_t size))
247 {
248 scm_t_ptob_descriptor *desc;
249 long ptobnum;
250
251 desc = scm_gc_malloc_pointerless (sizeof (*desc), "port-type");
252 memset (desc, 0, sizeof (*desc));
253
254 desc->name = name;
255 desc->print = scm_port_print;
256 desc->write = write;
257 desc->flush = flush_port_default;
258 desc->end_input = end_input_default;
259 desc->fill_input = fill_input;
260
261 ptobnum = scm_c_port_type_add_x (desc);
262
263 /* Make a class object if GOOPS is present. */
264 if (SCM_UNPACK (scm_port_class[0]) != 0)
265 scm_make_port_classes (ptobnum, name);
266
267 return scm_tc7_port + ptobnum * 256;
268 }
269
270 void
271 scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM))
272 {
273 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->mark = mark;
274 }
275
276 void
277 scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM))
278 {
279 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->free = free;
280 }
281
282 void
283 scm_set_port_print (scm_t_bits tc, int (*print) (SCM exp, SCM port,
284 scm_print_state *pstate))
285 {
286 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->print = print;
287 }
288
289 void
290 scm_set_port_equalp (scm_t_bits tc, SCM (*equalp) (SCM, SCM))
291 {
292 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->equalp = equalp;
293 }
294
295 void
296 scm_set_port_close (scm_t_bits tc, int (*close) (SCM))
297 {
298 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->close = close;
299 }
300
301 void
302 scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port))
303 {
304 scm_t_ptob_descriptor *ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tc));
305 ptob->flush = flush;
306 ptob->flags |= SCM_PORT_TYPE_HAS_FLUSH;
307 }
308
309 void
310 scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset))
311 {
312 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->end_input = end_input;
313 }
314
315 void
316 scm_set_port_seek (scm_t_bits tc, scm_t_off (*seek) (SCM, scm_t_off, int))
317 {
318 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->seek = seek;
319 }
320
321 void
322 scm_set_port_truncate (scm_t_bits tc, void (*truncate) (SCM, scm_t_off))
323 {
324 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->truncate = truncate;
325 }
326
327 void
328 scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM))
329 {
330 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting;
331 }
332
333 static void
334 scm_i_set_pending_eof (SCM port)
335 {
336 SCM_PORT_GET_INTERNAL (port)->pending_eof = 1;
337 }
338
339 static void
340 scm_i_clear_pending_eof (SCM port)
341 {
342 SCM_PORT_GET_INTERNAL (port)->pending_eof = 0;
343 }
344
345 SCM
346 scm_i_port_alist (SCM port)
347 {
348 return SCM_PORT_GET_INTERNAL (port)->alist;
349 }
350
351 void
352 scm_i_set_port_alist_x (SCM port, SCM alist)
353 {
354 SCM_PORT_GET_INTERNAL (port)->alist = alist;
355 }
356
357 \f
358
359 /* Standard ports --- current input, output, error, and more(!). */
360
361 static SCM cur_inport_fluid = SCM_BOOL_F;
362 static SCM cur_outport_fluid = SCM_BOOL_F;
363 static SCM cur_errport_fluid = SCM_BOOL_F;
364 static SCM cur_loadport_fluid = SCM_BOOL_F;
365
366 SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
367 (),
368 "Return the current input port. This is the default port used\n"
369 "by many input procedures. Initially, @code{current-input-port}\n"
370 "returns the @dfn{standard input} in Unix and C terminology.")
371 #define FUNC_NAME s_scm_current_input_port
372 {
373 if (scm_is_true (cur_inport_fluid))
374 return scm_fluid_ref (cur_inport_fluid);
375 else
376 return SCM_BOOL_F;
377 }
378 #undef FUNC_NAME
379
380 SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
381 (),
382 "Return the current output port. This is the default port used\n"
383 "by many output procedures. Initially,\n"
384 "@code{current-output-port} returns the @dfn{standard output} in\n"
385 "Unix and C terminology.")
386 #define FUNC_NAME s_scm_current_output_port
387 {
388 if (scm_is_true (cur_outport_fluid))
389 return scm_fluid_ref (cur_outport_fluid);
390 else
391 return SCM_BOOL_F;
392 }
393 #undef FUNC_NAME
394
395 SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
396 (),
397 "Return the port to which errors and warnings should be sent (the\n"
398 "@dfn{standard error} in Unix and C terminology).")
399 #define FUNC_NAME s_scm_current_error_port
400 {
401 if (scm_is_true (cur_errport_fluid))
402 return scm_fluid_ref (cur_errport_fluid);
403 else
404 return SCM_BOOL_F;
405 }
406 #undef FUNC_NAME
407
408 SCM
409 scm_current_warning_port (void)
410 {
411 static SCM cwp_var = SCM_UNDEFINED;
412 static scm_i_pthread_mutex_t cwp_var_mutex
413 = SCM_I_PTHREAD_MUTEX_INITIALIZER;
414
415 scm_i_scm_pthread_mutex_lock (&cwp_var_mutex);
416 if (SCM_UNBNDP (cwp_var))
417 cwp_var = scm_c_private_variable ("guile", "current-warning-port");
418 scm_i_pthread_mutex_unlock (&cwp_var_mutex);
419
420 return scm_call_0 (scm_variable_ref (cwp_var));
421 }
422
423 SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
424 (),
425 "Return the current-load-port.\n"
426 "The load port is used internally by @code{primitive-load}.")
427 #define FUNC_NAME s_scm_current_load_port
428 {
429 return scm_fluid_ref (cur_loadport_fluid);
430 }
431 #undef FUNC_NAME
432
433 SCM_DEFINE (scm_set_current_input_port, "set-current-input-port", 1, 0, 0,
434 (SCM port),
435 "@deffnx {Scheme Procedure} set-current-output-port port\n"
436 "@deffnx {Scheme Procedure} set-current-error-port port\n"
437 "Change the ports returned by @code{current-input-port},\n"
438 "@code{current-output-port} and @code{current-error-port}, respectively,\n"
439 "so that they use the supplied @var{port} for input or output.")
440 #define FUNC_NAME s_scm_set_current_input_port
441 {
442 SCM oinp = scm_fluid_ref (cur_inport_fluid);
443 SCM_VALIDATE_OPINPORT (1, port);
444 scm_fluid_set_x (cur_inport_fluid, port);
445 return oinp;
446 }
447 #undef FUNC_NAME
448
449
450 SCM_DEFINE (scm_set_current_output_port, "set-current-output-port", 1, 0, 0,
451 (SCM port),
452 "Set the current default output port to @var{port}.")
453 #define FUNC_NAME s_scm_set_current_output_port
454 {
455 SCM ooutp = scm_fluid_ref (cur_outport_fluid);
456 port = SCM_COERCE_OUTPORT (port);
457 SCM_VALIDATE_OPOUTPORT (1, port);
458 scm_fluid_set_x (cur_outport_fluid, port);
459 return ooutp;
460 }
461 #undef FUNC_NAME
462
463
464 SCM_DEFINE (scm_set_current_error_port, "set-current-error-port", 1, 0, 0,
465 (SCM port),
466 "Set the current default error port to @var{port}.")
467 #define FUNC_NAME s_scm_set_current_error_port
468 {
469 SCM oerrp = scm_fluid_ref (cur_errport_fluid);
470 port = SCM_COERCE_OUTPORT (port);
471 SCM_VALIDATE_OPOUTPORT (1, port);
472 scm_fluid_set_x (cur_errport_fluid, port);
473 return oerrp;
474 }
475 #undef FUNC_NAME
476
477
478 SCM
479 scm_set_current_warning_port (SCM port)
480 {
481 static SCM cwp_var = SCM_BOOL_F;
482
483 if (scm_is_false (cwp_var))
484 cwp_var = scm_c_private_lookup ("guile", "current-warning-port");
485
486 return scm_call_1 (scm_variable_ref (cwp_var), port);
487 }
488
489
490 void
491 scm_dynwind_current_input_port (SCM port)
492 #define FUNC_NAME NULL
493 {
494 SCM_VALIDATE_OPINPORT (1, port);
495 scm_dynwind_fluid (cur_inport_fluid, port);
496 }
497 #undef FUNC_NAME
498
499 void
500 scm_dynwind_current_output_port (SCM port)
501 #define FUNC_NAME NULL
502 {
503 port = SCM_COERCE_OUTPORT (port);
504 SCM_VALIDATE_OPOUTPORT (1, port);
505 scm_dynwind_fluid (cur_outport_fluid, port);
506 }
507 #undef FUNC_NAME
508
509 void
510 scm_dynwind_current_error_port (SCM port)
511 #define FUNC_NAME NULL
512 {
513 port = SCM_COERCE_OUTPORT (port);
514 SCM_VALIDATE_OPOUTPORT (1, port);
515 scm_dynwind_fluid (cur_errport_fluid, port);
516 }
517 #undef FUNC_NAME
518
519 void
520 scm_i_dynwind_current_load_port (SCM port)
521 {
522 scm_dynwind_fluid (cur_loadport_fluid, port);
523 }
524
525
526 \f
527
528 /* Retrieving a port's mode. */
529
530 /* Return the flags that characterize a port based on the mode
531 * string used to open a file for that port.
532 *
533 * See PORT FLAGS in scm.h
534 */
535
536 static long
537 scm_i_mode_bits_n (SCM modes)
538 {
539 return (SCM_OPN
540 | (scm_i_string_contains_char (modes, 'r')
541 || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
542 | (scm_i_string_contains_char (modes, 'w')
543 || scm_i_string_contains_char (modes, 'a')
544 || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
545 | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
546 | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
547 }
548
549 long
550 scm_mode_bits (char *modes)
551 {
552 /* Valid characters are rw+a0l. So, use latin1. */
553 return scm_i_mode_bits (scm_from_latin1_string (modes));
554 }
555
556 long
557 scm_i_mode_bits (SCM modes)
558 {
559 long bits;
560
561 if (!scm_is_string (modes))
562 scm_wrong_type_arg_msg (NULL, 0, modes, "string");
563
564 bits = scm_i_mode_bits_n (modes);
565 scm_remember_upto_here_1 (modes);
566 return bits;
567 }
568
569 /* Return the mode flags from an open port.
570 * Some modes such as "append" are only used when opening
571 * a file and are not returned here. */
572
573 SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
574 (SCM port),
575 "Return the port modes associated with the open port @var{port}.\n"
576 "These will not necessarily be identical to the modes used when\n"
577 "the port was opened, since modes such as \"append\" which are\n"
578 "used only during port creation are not retained.")
579 #define FUNC_NAME s_scm_port_mode
580 {
581 char modes[4];
582 modes[0] = '\0';
583
584 port = SCM_COERCE_OUTPORT (port);
585 SCM_VALIDATE_OPPORT (1, port);
586 if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
587 if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
588 strcpy (modes, "r+");
589 else
590 strcpy (modes, "r");
591 }
592 else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
593 strcpy (modes, "w");
594 if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
595 strcat (modes, "0");
596
597 return scm_from_latin1_string (modes);
598 }
599 #undef FUNC_NAME
600
601
602 \f
603
604 /* The port table --- a weak set of all ports.
605
606 We need a global registry of ports to flush them all at exit, and to
607 get all the ports matching a file descriptor. */
608 SCM scm_i_port_weak_set;
609
610
611 \f
612
613 /* Port finalization. */
614
615 struct do_free_data
616 {
617 scm_t_ptob_descriptor *ptob;
618 SCM port;
619 };
620
621 static SCM
622 do_free (void *body_data)
623 {
624 struct do_free_data *data = body_data;
625
626 /* `close' is for explicit `close-port' by user. `free' is for this
627 purpose: ports collected by the GC. */
628 data->ptob->free (data->port);
629
630 return SCM_BOOL_T;
631 }
632
633 /* Finalize the object (a port) pointed to by PTR. */
634 static void
635 finalize_port (void *ptr, void *data)
636 {
637 SCM port = SCM_PACK_POINTER (ptr);
638
639 if (!SCM_PORTP (port))
640 abort ();
641
642 if (SCM_OPENP (port))
643 {
644 struct do_free_data data;
645
646 SCM_CLR_PORT_OPEN_FLAG (port);
647
648 data.ptob = SCM_PORT_DESCRIPTOR (port);
649 data.port = port;
650
651 scm_internal_catch (SCM_BOOL_T, do_free, &data,
652 scm_handle_by_message_noexit, NULL);
653
654 scm_gc_ports_collected++;
655 }
656 }
657
658
659 \f
660
661 SCM
662 scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
663 const char *encoding,
664 scm_t_string_failed_conversion_handler handler,
665 scm_t_bits stream)
666 {
667 SCM ret;
668 scm_t_port *entry;
669 scm_t_port_internal *pti;
670 scm_t_ptob_descriptor *ptob;
671
672 entry = scm_gc_typed_calloc (scm_t_port);
673 pti = scm_gc_typed_calloc (scm_t_port_internal);
674 ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag));
675
676 ret = scm_words (tag | mode_bits, 3);
677 SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry);
678 SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob);
679
680 entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock");
681 scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive);
682
683 entry->internal = pti;
684 entry->file_name = SCM_BOOL_F;
685 entry->rw_active = SCM_PORT_NEITHER;
686 entry->port = ret;
687 entry->stream = stream;
688
689 if (encoding_matches (encoding, "UTF-8"))
690 {
691 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
692 entry->encoding = "UTF-8";
693 }
694 else if (encoding_matches (encoding, "ISO-8859-1"))
695 {
696 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
697 entry->encoding = "ISO-8859-1";
698 }
699 else
700 {
701 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
702 entry->encoding = canonicalize_encoding (encoding);
703 }
704
705 entry->ilseq_handler = handler;
706 pti->iconv_descriptors = NULL;
707
708 pti->at_stream_start_for_bom_read = 1;
709 pti->at_stream_start_for_bom_write = 1;
710
711 pti->pending_eof = 0;
712 pti->alist = SCM_EOL;
713
714 if (SCM_PORT_DESCRIPTOR (ret)->free)
715 scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL);
716
717 if (SCM_PORT_DESCRIPTOR (ret)->flags & SCM_PORT_TYPE_HAS_FLUSH)
718 scm_weak_set_add_x (scm_i_port_weak_set, ret);
719
720 return ret;
721 }
722
723 SCM
724 scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
725 {
726 return scm_c_make_port_with_encoding (tag, mode_bits,
727 scm_i_default_port_encoding (),
728 scm_i_default_port_conversion_handler (),
729 stream);
730 }
731
732 SCM
733 scm_new_port_table_entry (scm_t_bits tag)
734 {
735 return scm_c_make_port (tag, 0, 0);
736 }
737
738 \f
739
740 /* Predicates. */
741
742 SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
743 (SCM x),
744 "Return a boolean indicating whether @var{x} is a port.\n"
745 "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
746 "@var{x}))}.")
747 #define FUNC_NAME s_scm_port_p
748 {
749 return scm_from_bool (SCM_PORTP (x));
750 }
751 #undef FUNC_NAME
752
753 SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
754 (SCM x),
755 "Return @code{#t} if @var{x} is an input port, otherwise return\n"
756 "@code{#f}. Any object satisfying this predicate also satisfies\n"
757 "@code{port?}.")
758 #define FUNC_NAME s_scm_input_port_p
759 {
760 return scm_from_bool (SCM_INPUT_PORT_P (x));
761 }
762 #undef FUNC_NAME
763
764 SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
765 (SCM x),
766 "Return @code{#t} if @var{x} is an output port, otherwise return\n"
767 "@code{#f}. Any object satisfying this predicate also satisfies\n"
768 "@code{port?}.")
769 #define FUNC_NAME s_scm_output_port_p
770 {
771 x = SCM_COERCE_OUTPORT (x);
772 return scm_from_bool (SCM_OUTPUT_PORT_P (x));
773 }
774 #undef FUNC_NAME
775
776 SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
777 (SCM port),
778 "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
779 "open.")
780 #define FUNC_NAME s_scm_port_closed_p
781 {
782 SCM_VALIDATE_PORT (1, port);
783 return scm_from_bool (!SCM_OPPORTP (port));
784 }
785 #undef FUNC_NAME
786
787 SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
788 (SCM x),
789 "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
790 "return @code{#f}.")
791 #define FUNC_NAME s_scm_eof_object_p
792 {
793 return scm_from_bool (SCM_EOF_OBJECT_P (x));
794 }
795 #undef FUNC_NAME
796
797
798 \f
799
800 /* Closing ports. */
801
802 static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
803
804 /* scm_close_port
805 * Call the close operation on a port object.
806 * see also scm_close.
807 */
808 SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
809 (SCM port),
810 "Close the specified port object. Return @code{#t} if it\n"
811 "successfully closes a port or @code{#f} if it was already\n"
812 "closed. An exception may be raised if an error occurs, for\n"
813 "example when flushing buffered output. See also @ref{Ports and\n"
814 "File Descriptors, close}, for a procedure which can close file\n"
815 "descriptors.")
816 #define FUNC_NAME s_scm_close_port
817 {
818 scm_t_port_internal *pti;
819 int rv;
820
821 port = SCM_COERCE_OUTPORT (port);
822
823 SCM_VALIDATE_PORT (1, port);
824 if (SCM_CLOSEDP (port))
825 return SCM_BOOL_F;
826
827 pti = SCM_PORT_GET_INTERNAL (port);
828 SCM_CLR_PORT_OPEN_FLAG (port);
829
830 if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH)
831 scm_weak_set_remove_x (scm_i_port_weak_set, port);
832
833 if (SCM_PORT_DESCRIPTOR (port)->close)
834 /* Note! This may throw an exception. Anything after this point
835 should be resilient to non-local exits. */
836 rv = SCM_PORT_DESCRIPTOR (port)->close (port);
837 else
838 rv = 0;
839
840 if (pti->iconv_descriptors)
841 {
842 /* If we don't get here, the iconv_descriptors finalizer will
843 clean up. */
844 close_iconv_descriptors (pti->iconv_descriptors);
845 pti->iconv_descriptors = NULL;
846 }
847
848 return scm_from_bool (rv >= 0);
849 }
850 #undef FUNC_NAME
851
852 SCM_DEFINE (scm_close_input_port, "close-input-port", 1, 0, 0,
853 (SCM port),
854 "Close the specified input port object. The routine has no effect if\n"
855 "the file has already been closed. An exception may be raised if an\n"
856 "error occurs. The value returned is unspecified.\n\n"
857 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
858 "which can close file descriptors.")
859 #define FUNC_NAME s_scm_close_input_port
860 {
861 SCM_VALIDATE_INPUT_PORT (1, port);
862 scm_close_port (port);
863 return SCM_UNSPECIFIED;
864 }
865 #undef FUNC_NAME
866
867 SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0,
868 (SCM port),
869 "Close the specified output port object. The routine has no effect if\n"
870 "the file has already been closed. An exception may be raised if an\n"
871 "error occurs. The value returned is unspecified.\n\n"
872 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
873 "which can close file descriptors.")
874 #define FUNC_NAME s_scm_close_output_port
875 {
876 port = SCM_COERCE_OUTPORT (port);
877 SCM_VALIDATE_OUTPUT_PORT (1, port);
878 scm_close_port (port);
879 return SCM_UNSPECIFIED;
880 }
881 #undef FUNC_NAME
882
883
884 \f
885
886 /* Encoding characters to byte streams, and decoding byte streams to
887 characters. */
888
889 /* A fluid specifying the default encoding for newly created ports. If it is
890 a string, that is the encoding. If it is #f, it is in the "native"
891 (Latin-1) encoding. */
892 SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
893
894 static int scm_port_encoding_init = 0;
895
896 /* Use ENCODING as the default encoding for future ports. */
897 void
898 scm_i_set_default_port_encoding (const char *encoding)
899 {
900 if (!scm_port_encoding_init
901 || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
902 scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
903 SCM_EOL);
904
905 if (encoding_matches (encoding, "ISO-8859-1"))
906 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
907 else
908 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
909 scm_from_latin1_string (canonicalize_encoding (encoding)));
910 }
911
912 /* Return the name of the default encoding for newly created ports. */
913 const char *
914 scm_i_default_port_encoding (void)
915 {
916 if (!scm_port_encoding_init)
917 return "ISO-8859-1";
918 else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
919 return "ISO-8859-1";
920 else
921 {
922 SCM encoding;
923
924 encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
925 if (!scm_is_string (encoding))
926 return "ISO-8859-1";
927 else
928 return scm_i_string_chars (encoding);
929 }
930 }
931
932 /* A fluid specifying the default conversion handler for newly created
933 ports. Its value should be one of the symbols below. */
934 SCM_VARIABLE (default_conversion_strategy_var,
935 "%default-port-conversion-strategy");
936
937 /* Whether the above fluid is initialized. */
938 static int scm_conversion_strategy_init = 0;
939
940 /* The possible conversion strategies. */
941 SCM_SYMBOL (sym_error, "error");
942 SCM_SYMBOL (sym_substitute, "substitute");
943 SCM_SYMBOL (sym_escape, "escape");
944
945 /* Return the default failed encoding conversion policy for new created
946 ports. */
947 scm_t_string_failed_conversion_handler
948 scm_i_default_port_conversion_handler (void)
949 {
950 scm_t_string_failed_conversion_handler handler;
951
952 if (!scm_conversion_strategy_init
953 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
954 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
955 else
956 {
957 SCM fluid, value;
958
959 fluid = SCM_VARIABLE_REF (default_conversion_strategy_var);
960 value = scm_fluid_ref (fluid);
961
962 if (scm_is_eq (sym_substitute, value))
963 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
964 else if (scm_is_eq (sym_escape, value))
965 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
966 else
967 /* Default to 'error also when the fluid's value is not one of
968 the valid symbols. */
969 handler = SCM_FAILED_CONVERSION_ERROR;
970 }
971
972 return handler;
973 }
974
975 /* Use HANDLER as the default conversion strategy for future ports. */
976 void
977 scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler
978 handler)
979 {
980 SCM strategy;
981
982 if (!scm_conversion_strategy_init
983 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
984 scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
985 SCM_EOL);
986
987 switch (handler)
988 {
989 case SCM_FAILED_CONVERSION_ERROR:
990 strategy = sym_error;
991 break;
992
993 case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
994 strategy = sym_escape;
995 break;
996
997 case SCM_FAILED_CONVERSION_QUESTION_MARK:
998 strategy = sym_substitute;
999 break;
1000
1001 default:
1002 abort ();
1003 }
1004
1005 scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var),
1006 strategy);
1007 }
1008
1009 static void
1010 scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port);
1011
1012 /* If the next LEN bytes from PORT are equal to those in BYTES, then
1013 return 1, else return 0. Leave the port position unchanged. */
1014 static int
1015 looking_at_bytes (SCM port, const unsigned char *bytes, int len)
1016 {
1017 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1018 int i = 0;
1019
1020 while (i < len && scm_peek_byte_or_eof_unlocked (port) == bytes[i])
1021 {
1022 pt->read_pos++;
1023 i++;
1024 }
1025 scm_i_unget_bytes_unlocked (bytes, i, port);
1026 return (i == len);
1027 }
1028
1029 static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF};
1030 static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
1031 static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
1032 static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
1033 static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
1034
1035 /* Decide what byte order to use for a UTF-16 port. Return "UTF-16BE"
1036 or "UTF-16LE". MODE must be either SCM_PORT_READ or SCM_PORT_WRITE,
1037 and specifies which operation is about to be done. The MODE
1038 determines how we will decide the byte order. We deliberately avoid
1039 reading from the port unless the user is about to do so. If the user
1040 is about to read, then we look for a BOM, and if present, we use it
1041 to determine the byte order. Otherwise we choose big endian, as
1042 recommended by the Unicode Standard. Note that the BOM (if any) is
1043 not consumed here. */
1044 static const char *
1045 decide_utf16_encoding (SCM port, scm_t_port_rw_active mode)
1046 {
1047 if (mode == SCM_PORT_READ
1048 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1049 && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom))
1050 return "UTF-16LE";
1051 else
1052 return "UTF-16BE";
1053 }
1054
1055 /* Decide what byte order to use for a UTF-32 port. Return "UTF-32BE"
1056 or "UTF-32LE". See the comment above 'decide_utf16_encoding' for
1057 details. */
1058 static const char *
1059 decide_utf32_encoding (SCM port, scm_t_port_rw_active mode)
1060 {
1061 if (mode == SCM_PORT_READ
1062 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1063 && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom))
1064 return "UTF-32LE";
1065 else
1066 return "UTF-32BE";
1067 }
1068
1069 static void
1070 finalize_iconv_descriptors (void *ptr, void *data)
1071 {
1072 close_iconv_descriptors (ptr);
1073 }
1074
1075 static scm_t_iconv_descriptors *
1076 open_iconv_descriptors (const char *encoding, int reading, int writing)
1077 {
1078 scm_t_iconv_descriptors *id;
1079 iconv_t input_cd, output_cd;
1080 size_t i;
1081
1082 input_cd = (iconv_t) -1;
1083 output_cd = (iconv_t) -1;
1084
1085 for (i = 0; encoding[i]; i++)
1086 if (encoding[i] > 127)
1087 goto invalid_encoding;
1088
1089 if (reading)
1090 {
1091 /* Open an input iconv conversion descriptor, from ENCODING
1092 to UTF-8. We choose UTF-8, not UTF-32, because iconv
1093 implementations can typically convert from anything to
1094 UTF-8, but not to UTF-32 (see
1095 <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
1096
1097 /* Assume opening an iconv descriptor causes about 16 KB of
1098 allocation. */
1099 scm_gc_register_allocation (16 * 1024);
1100
1101 input_cd = iconv_open ("UTF-8", encoding);
1102 if (input_cd == (iconv_t) -1)
1103 goto invalid_encoding;
1104 }
1105
1106 if (writing)
1107 {
1108 /* Assume opening an iconv descriptor causes about 16 KB of
1109 allocation. */
1110 scm_gc_register_allocation (16 * 1024);
1111
1112 output_cd = iconv_open (encoding, "UTF-8");
1113 if (output_cd == (iconv_t) -1)
1114 {
1115 if (input_cd != (iconv_t) -1)
1116 iconv_close (input_cd);
1117 goto invalid_encoding;
1118 }
1119 }
1120
1121 id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
1122 id->input_cd = input_cd;
1123 id->output_cd = output_cd;
1124
1125 /* Register a finalizer to close the descriptors. */
1126 scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
1127
1128 return id;
1129
1130 invalid_encoding:
1131 {
1132 SCM err;
1133 err = scm_from_latin1_string (encoding);
1134 scm_misc_error ("open_iconv_descriptors",
1135 "invalid or unknown character encoding ~s",
1136 scm_list_1 (err));
1137 }
1138 }
1139
1140 static void
1141 close_iconv_descriptors (scm_t_iconv_descriptors *id)
1142 {
1143 if (id->input_cd != (iconv_t) -1)
1144 iconv_close (id->input_cd);
1145 if (id->output_cd != (iconv_t) -1)
1146 iconv_close (id->output_cd);
1147 id->input_cd = (void *) -1;
1148 id->output_cd = (void *) -1;
1149 }
1150
1151 scm_t_iconv_descriptors *
1152 scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode)
1153 {
1154 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
1155
1156 assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
1157
1158 if (!pti->iconv_descriptors)
1159 {
1160 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1161 const char *precise_encoding;
1162
1163 if (!pt->encoding)
1164 pt->encoding = "ISO-8859-1";
1165
1166 /* If the specified encoding is UTF-16 or UTF-32, then make
1167 that more precise by deciding what byte order to use. */
1168 if (strcmp (pt->encoding, "UTF-16") == 0)
1169 precise_encoding = decide_utf16_encoding (port, mode);
1170 else if (strcmp (pt->encoding, "UTF-32") == 0)
1171 precise_encoding = decide_utf32_encoding (port, mode);
1172 else
1173 precise_encoding = pt->encoding;
1174
1175 pti->iconv_descriptors =
1176 open_iconv_descriptors (precise_encoding,
1177 SCM_INPUT_PORT_P (port),
1178 SCM_OUTPUT_PORT_P (port));
1179 }
1180
1181 return pti->iconv_descriptors;
1182 }
1183
1184 /* The name of the encoding is itself encoded in ASCII. */
1185 void
1186 scm_i_set_port_encoding_x (SCM port, const char *encoding)
1187 {
1188 scm_t_port *pt;
1189 scm_t_port_internal *pti;
1190 scm_t_iconv_descriptors *prev;
1191
1192 /* Set the character encoding for this port. */
1193 pt = SCM_PTAB_ENTRY (port);
1194 pti = SCM_PORT_GET_INTERNAL (port);
1195 prev = pti->iconv_descriptors;
1196
1197 /* In order to handle cases where the encoding changes mid-stream
1198 (e.g. within an HTTP stream, or within a file that is composed of
1199 segments with different encodings), we consider this to be "stream
1200 start" for purposes of BOM handling, regardless of our actual file
1201 position. */
1202 pti->at_stream_start_for_bom_read = 1;
1203 pti->at_stream_start_for_bom_write = 1;
1204
1205 if (encoding_matches (encoding, "UTF-8"))
1206 {
1207 pt->encoding = "UTF-8";
1208 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
1209 }
1210 else if (encoding_matches (encoding, "ISO-8859-1"))
1211 {
1212 pt->encoding = "ISO-8859-1";
1213 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
1214 }
1215 else
1216 {
1217 pt->encoding = canonicalize_encoding (encoding);
1218 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
1219 }
1220
1221 pti->iconv_descriptors = NULL;
1222 if (prev)
1223 close_iconv_descriptors (prev);
1224 }
1225
1226 SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
1227 (SCM port),
1228 "Returns, as a string, the character encoding that @var{port}\n"
1229 "uses to interpret its input and output.\n")
1230 #define FUNC_NAME s_scm_port_encoding
1231 {
1232 SCM_VALIDATE_PORT (1, port);
1233
1234 return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding);
1235 }
1236 #undef FUNC_NAME
1237
1238 SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
1239 (SCM port, SCM enc),
1240 "Sets the character encoding that will be used to interpret all\n"
1241 "port I/O. New ports are created with the encoding\n"
1242 "appropriate for the current locale if @code{setlocale} has \n"
1243 "been called or ISO-8859-1 otherwise\n"
1244 "and this procedure can be used to modify that encoding.\n")
1245 #define FUNC_NAME s_scm_set_port_encoding_x
1246 {
1247 char *enc_str;
1248
1249 SCM_VALIDATE_PORT (1, port);
1250 SCM_VALIDATE_STRING (2, enc);
1251
1252 enc_str = scm_to_latin1_string (enc);
1253 scm_i_set_port_encoding_x (port, enc_str);
1254 free (enc_str);
1255
1256 return SCM_UNSPECIFIED;
1257 }
1258 #undef FUNC_NAME
1259
1260 SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
1261 1, 0, 0, (SCM port),
1262 "Returns the behavior of the port when handling a character that\n"
1263 "is not representable in the port's current encoding.\n"
1264 "It returns the symbol @code{error} if unrepresentable characters\n"
1265 "should cause exceptions, @code{substitute} if the port should\n"
1266 "try to replace unrepresentable characters with question marks or\n"
1267 "approximate characters, or @code{escape} if unrepresentable\n"
1268 "characters should be converted to string escapes.\n"
1269 "\n"
1270 "If @var{port} is @code{#f}, then the current default behavior\n"
1271 "will be returned. New ports will have this default behavior\n"
1272 "when they are created.\n")
1273 #define FUNC_NAME s_scm_port_conversion_strategy
1274 {
1275 scm_t_string_failed_conversion_handler h;
1276
1277 SCM_VALIDATE_OPPORT (1, port);
1278
1279 if (scm_is_false (port))
1280 h = scm_i_default_port_conversion_handler ();
1281 else
1282 {
1283 scm_t_port *pt;
1284
1285 SCM_VALIDATE_OPPORT (1, port);
1286 pt = SCM_PTAB_ENTRY (port);
1287
1288 h = pt->ilseq_handler;
1289 }
1290
1291 if (h == SCM_FAILED_CONVERSION_ERROR)
1292 return scm_from_latin1_symbol ("error");
1293 else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
1294 return scm_from_latin1_symbol ("substitute");
1295 else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1296 return scm_from_latin1_symbol ("escape");
1297 else
1298 abort ();
1299
1300 /* Never gets here. */
1301 return SCM_UNDEFINED;
1302 }
1303 #undef FUNC_NAME
1304
1305 SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
1306 2, 0, 0,
1307 (SCM port, SCM sym),
1308 "Sets the behavior of the interpreter when outputting a character\n"
1309 "that is not representable in the port's current encoding.\n"
1310 "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
1311 "@code{'escape}. If it is @code{'error}, an error will be thrown\n"
1312 "when an unconvertible character is encountered. If it is\n"
1313 "@code{'substitute}, then unconvertible characters will \n"
1314 "be replaced with approximate characters, or with question marks\n"
1315 "if no approximately correct character is available.\n"
1316 "If it is @code{'escape},\n"
1317 "it will appear as a hex escape when output.\n"
1318 "\n"
1319 "If @var{port} is an open port, the conversion error behavior\n"
1320 "is set for that port. If it is @code{#f}, it is set as the\n"
1321 "default behavior for any future ports that get created in\n"
1322 "this thread.\n")
1323 #define FUNC_NAME s_scm_set_port_conversion_strategy_x
1324 {
1325 scm_t_string_failed_conversion_handler handler;
1326
1327 if (scm_is_eq (sym, sym_error))
1328 handler = SCM_FAILED_CONVERSION_ERROR;
1329 else if (scm_is_eq (sym, sym_substitute))
1330 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
1331 else if (scm_is_eq (sym, sym_escape))
1332 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
1333 else
1334 SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
1335
1336 if (scm_is_false (port))
1337 scm_i_set_default_port_conversion_handler (handler);
1338 else
1339 {
1340 SCM_VALIDATE_OPPORT (1, port);
1341 SCM_PTAB_ENTRY (port)->ilseq_handler = handler;
1342 }
1343
1344 return SCM_UNSPECIFIED;
1345 }
1346 #undef FUNC_NAME
1347
1348
1349 \f
1350
1351 /* The port lock. */
1352
1353 static void
1354 lock_port (void *mutex)
1355 {
1356 scm_i_pthread_mutex_lock ((scm_i_pthread_mutex_t *) mutex);
1357 }
1358
1359 static void
1360 unlock_port (void *mutex)
1361 {
1362 scm_i_pthread_mutex_unlock ((scm_i_pthread_mutex_t *) mutex);
1363 }
1364
1365 void
1366 scm_dynwind_lock_port (SCM port)
1367 #define FUNC_NAME "dynwind-lock-port"
1368 {
1369 scm_i_pthread_mutex_t *lock;
1370 SCM_VALIDATE_OPPORT (SCM_ARG1, port);
1371 scm_c_lock_port (port, &lock);
1372 if (lock)
1373 {
1374 scm_dynwind_unwind_handler (unlock_port, lock, SCM_F_WIND_EXPLICITLY);
1375 scm_dynwind_rewind_handler (lock_port, lock, 0);
1376 }
1377 }
1378 #undef FUNC_NAME
1379
1380
1381 \f
1382
1383 /* Input. */
1384
1385 int
1386 scm_get_byte_or_eof (SCM port)
1387 {
1388 scm_i_pthread_mutex_t *lock;
1389 int ret;
1390
1391 scm_c_lock_port (port, &lock);
1392 ret = scm_get_byte_or_eof_unlocked (port);
1393 if (lock)
1394 scm_i_pthread_mutex_unlock (lock);
1395
1396 return ret;
1397 }
1398
1399 int
1400 scm_peek_byte_or_eof (SCM port)
1401 {
1402 scm_i_pthread_mutex_t *lock;
1403 int ret;
1404
1405 scm_c_lock_port (port, &lock);
1406 ret = scm_peek_byte_or_eof_unlocked (port);
1407 if (lock)
1408 scm_i_pthread_mutex_unlock (lock);
1409
1410 return ret;
1411 }
1412
1413 /* scm_c_read
1414 *
1415 * Used by an application to read arbitrary number of bytes from an
1416 * SCM port. Same semantics as libc read, except that scm_c_read only
1417 * returns less than SIZE bytes if at end-of-file.
1418 *
1419 * Warning: Doesn't update port line and column counts! */
1420
1421 /* This structure, and the following swap_buffer function, are used
1422 for temporarily swapping a port's own read buffer, and the buffer
1423 that the caller of scm_c_read provides. */
1424 struct port_and_swap_buffer
1425 {
1426 scm_t_port *pt;
1427 unsigned char *buffer;
1428 size_t size;
1429 };
1430
1431 static void
1432 swap_buffer (void *data)
1433 {
1434 struct port_and_swap_buffer *psb = (struct port_and_swap_buffer *) data;
1435 unsigned char *old_buf = psb->pt->read_buf;
1436 size_t old_size = psb->pt->read_buf_size;
1437
1438 /* Make the port use (buffer, size) from the struct. */
1439 psb->pt->read_pos = psb->pt->read_buf = psb->pt->read_end = psb->buffer;
1440 psb->pt->read_buf_size = psb->size;
1441
1442 /* Save the port's old (buffer, size) in the struct. */
1443 psb->buffer = old_buf;
1444 psb->size = old_size;
1445 }
1446
1447 static int scm_i_fill_input_unlocked (SCM port);
1448
1449 size_t
1450 scm_c_read_unlocked (SCM port, void *buffer, size_t size)
1451 #define FUNC_NAME "scm_c_read"
1452 {
1453 scm_t_port *pt;
1454 scm_t_port_internal *pti;
1455 size_t n_read = 0, n_available;
1456 struct port_and_swap_buffer psb;
1457
1458 SCM_VALIDATE_OPINPORT (1, port);
1459
1460 pt = SCM_PTAB_ENTRY (port);
1461 pti = SCM_PORT_GET_INTERNAL (port);
1462 if (pt->rw_active == SCM_PORT_WRITE)
1463 SCM_PORT_DESCRIPTOR (port)->flush (port);
1464
1465 if (pt->rw_random)
1466 pt->rw_active = SCM_PORT_READ;
1467
1468 /* Take bytes first from the port's read buffer. */
1469 if (pt->read_pos < pt->read_end)
1470 {
1471 n_available = min (size, pt->read_end - pt->read_pos);
1472 memcpy (buffer, pt->read_pos, n_available);
1473 buffer = (char *) buffer + n_available;
1474 pt->read_pos += n_available;
1475 n_read += n_available;
1476 size -= n_available;
1477 }
1478
1479 /* Avoid the scm_dynwind_* costs if we now have enough data. */
1480 if (size == 0)
1481 return n_read;
1482
1483 /* Now we will call scm_i_fill_input_unlocked repeatedly until we have
1484 read the requested number of bytes. (Note that a single
1485 scm_i_fill_input_unlocked call does not guarantee to fill the whole
1486 of the port's read buffer.) */
1487 if (pt->read_buf_size <= 1
1488 && pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
1489 {
1490 /* The port that we are reading from is unbuffered - i.e. does not
1491 have its own persistent buffer - but we have a buffer, provided
1492 by our caller, that is the right size for the data that is
1493 wanted. For the following scm_i_fill_input_unlocked calls,
1494 therefore, we use the buffer in hand as the port's read buffer.
1495
1496 We need to make sure that the port's normal (1 byte) buffer is
1497 reinstated in case one of the scm_i_fill_input_unlocked ()
1498 calls throws an exception; we use the scm_dynwind_* API to
1499 achieve that.
1500
1501 A consequence of this optimization is that the fill_input
1502 functions can't unget characters. That'll push data to the
1503 pushback buffer instead of this psb buffer. */
1504 #if SCM_DEBUG == 1
1505 unsigned char *pback = pt->putback_buf;
1506 #endif
1507 psb.pt = pt;
1508 psb.buffer = buffer;
1509 psb.size = size;
1510 scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
1511 scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1512 scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1513
1514 /* Call scm_i_fill_input_unlocked until we have all the bytes that
1515 we need, or we hit EOF. */
1516 while (pt->read_buf_size && (scm_i_fill_input_unlocked (port) != EOF))
1517 {
1518 pt->read_buf_size -= (pt->read_end - pt->read_pos);
1519 pt->read_pos = pt->read_buf = pt->read_end;
1520 }
1521 #if SCM_DEBUG == 1
1522 if (pback != pt->putback_buf
1523 || pt->read_buf - (unsigned char *) buffer < 0)
1524 scm_misc_error (FUNC_NAME,
1525 "scm_c_read must not call a fill function that pushes "
1526 "back characters onto an unbuffered port", SCM_EOL);
1527 #endif
1528 n_read += pt->read_buf - (unsigned char *) buffer;
1529
1530 /* Reinstate the port's normal buffer. */
1531 scm_dynwind_end ();
1532 }
1533 else
1534 {
1535 /* The port has its own buffer. It is important that we use it,
1536 even if it happens to be smaller than our caller's buffer, so
1537 that a custom port implementation's entry points (in
1538 particular, fill_input) can rely on the buffer always being
1539 the same as they first set up. */
1540 while (size && (scm_i_fill_input_unlocked (port) != EOF))
1541 {
1542 n_available = min (size, pt->read_end - pt->read_pos);
1543 memcpy (buffer, pt->read_pos, n_available);
1544 buffer = (char *) buffer + n_available;
1545 pt->read_pos += n_available;
1546 n_read += n_available;
1547 size -= n_available;
1548 }
1549 }
1550
1551 return n_read;
1552 }
1553 #undef FUNC_NAME
1554
1555 size_t
1556 scm_c_read (SCM port, void *buffer, size_t size)
1557 {
1558 scm_i_pthread_mutex_t *lock;
1559 size_t ret;
1560
1561 scm_c_lock_port (port, &lock);
1562 ret = scm_c_read_unlocked (port, buffer, size);
1563 if (lock)
1564 scm_i_pthread_mutex_unlock (lock);
1565
1566
1567 return ret;
1568 }
1569
1570 /* Update the line and column number of PORT after consumption of C. */
1571 static inline void
1572 update_port_lf (scm_t_wchar c, SCM port)
1573 {
1574 switch (c)
1575 {
1576 case '\a':
1577 case EOF:
1578 break;
1579 case '\b':
1580 SCM_DECCOL (port);
1581 break;
1582 case '\n':
1583 SCM_INCLINE (port);
1584 break;
1585 case '\r':
1586 SCM_ZEROCOL (port);
1587 break;
1588 case '\t':
1589 SCM_TABCOL (port);
1590 break;
1591 default:
1592 SCM_INCCOL (port);
1593 break;
1594 }
1595 }
1596
1597 #define SCM_MBCHAR_BUF_SIZE (4)
1598
1599 /* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
1600 UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
1601 static scm_t_wchar
1602 utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
1603 {
1604 scm_t_wchar codepoint;
1605
1606 if (utf8_buf[0] <= 0x7f)
1607 {
1608 assert (size == 1);
1609 codepoint = utf8_buf[0];
1610 }
1611 else if ((utf8_buf[0] & 0xe0) == 0xc0)
1612 {
1613 assert (size == 2);
1614 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
1615 | (utf8_buf[1] & 0x3f);
1616 }
1617 else if ((utf8_buf[0] & 0xf0) == 0xe0)
1618 {
1619 assert (size == 3);
1620 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
1621 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
1622 | (utf8_buf[2] & 0x3f);
1623 }
1624 else
1625 {
1626 assert (size == 4);
1627 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
1628 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
1629 | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
1630 | (utf8_buf[3] & 0x3f);
1631 }
1632
1633 return codepoint;
1634 }
1635
1636 /* Read a UTF-8 sequence from PORT. On success, return 0 and set
1637 *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
1638 representation, and set *LEN to the length in bytes. Return
1639 `EILSEQ' on error. */
1640 static int
1641 get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
1642 scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1643 {
1644 #define ASSERT_NOT_EOF(b) \
1645 if (SCM_UNLIKELY ((b) == EOF)) \
1646 goto invalid_seq
1647 #define CONSUME_PEEKED_BYTE() \
1648 pt->read_pos++
1649
1650 int byte;
1651 scm_t_port *pt;
1652
1653 *len = 0;
1654 pt = SCM_PTAB_ENTRY (port);
1655
1656 byte = scm_get_byte_or_eof_unlocked (port);
1657 if (byte == EOF)
1658 {
1659 *codepoint = EOF;
1660 return 0;
1661 }
1662
1663 buf[0] = (scm_t_uint8) byte;
1664 *len = 1;
1665
1666 if (buf[0] <= 0x7f)
1667 /* 1-byte form. */
1668 *codepoint = buf[0];
1669 else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
1670 {
1671 /* 2-byte form. */
1672 byte = scm_peek_byte_or_eof_unlocked (port);
1673 ASSERT_NOT_EOF (byte);
1674
1675 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1676 goto invalid_seq;
1677
1678 CONSUME_PEEKED_BYTE ();
1679 buf[1] = (scm_t_uint8) byte;
1680 *len = 2;
1681
1682 *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
1683 | (buf[1] & 0x3f);
1684 }
1685 else if ((buf[0] & 0xf0) == 0xe0)
1686 {
1687 /* 3-byte form. */
1688 byte = scm_peek_byte_or_eof_unlocked (port);
1689 ASSERT_NOT_EOF (byte);
1690
1691 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
1692 || (buf[0] == 0xe0 && byte < 0xa0)
1693 || (buf[0] == 0xed && byte > 0x9f)))
1694 goto invalid_seq;
1695
1696 CONSUME_PEEKED_BYTE ();
1697 buf[1] = (scm_t_uint8) byte;
1698 *len = 2;
1699
1700 byte = scm_peek_byte_or_eof_unlocked (port);
1701 ASSERT_NOT_EOF (byte);
1702
1703 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1704 goto invalid_seq;
1705
1706 CONSUME_PEEKED_BYTE ();
1707 buf[2] = (scm_t_uint8) byte;
1708 *len = 3;
1709
1710 *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
1711 | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
1712 | (buf[2] & 0x3f);
1713 }
1714 else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
1715 {
1716 /* 4-byte form. */
1717 byte = scm_peek_byte_or_eof_unlocked (port);
1718 ASSERT_NOT_EOF (byte);
1719
1720 if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
1721 || (buf[0] == 0xf0 && byte < 0x90)
1722 || (buf[0] == 0xf4 && byte > 0x8f)))
1723 goto invalid_seq;
1724
1725 CONSUME_PEEKED_BYTE ();
1726 buf[1] = (scm_t_uint8) byte;
1727 *len = 2;
1728
1729 byte = scm_peek_byte_or_eof_unlocked (port);
1730 ASSERT_NOT_EOF (byte);
1731
1732 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1733 goto invalid_seq;
1734
1735 CONSUME_PEEKED_BYTE ();
1736 buf[2] = (scm_t_uint8) byte;
1737 *len = 3;
1738
1739 byte = scm_peek_byte_or_eof_unlocked (port);
1740 ASSERT_NOT_EOF (byte);
1741
1742 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1743 goto invalid_seq;
1744
1745 CONSUME_PEEKED_BYTE ();
1746 buf[3] = (scm_t_uint8) byte;
1747 *len = 4;
1748
1749 *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
1750 | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
1751 | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
1752 | (buf[3] & 0x3f);
1753 }
1754 else
1755 goto invalid_seq;
1756
1757 return 0;
1758
1759 invalid_seq:
1760 /* Here we could choose the consume the faulty byte when it's not a
1761 valid starting byte, but it's not a requirement. What Section 3.9
1762 of Unicode 6.0.0 mandates, though, is to not consume a byte that
1763 would otherwise be a valid starting byte. */
1764
1765 return EILSEQ;
1766
1767 #undef CONSUME_PEEKED_BYTE
1768 #undef ASSERT_NOT_EOF
1769 }
1770
1771 /* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
1772 0 and set *CODEPOINT to the codepoint that was read, fill BUF with
1773 its UTF-8 representation, and set *LEN to the length in bytes.
1774 Return `EILSEQ' on error. */
1775 static int
1776 get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
1777 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1778 {
1779 *codepoint = scm_get_byte_or_eof_unlocked (port);
1780
1781 if (*codepoint == EOF)
1782 *len = 0;
1783 else
1784 {
1785 *len = 1;
1786 buf[0] = *codepoint;
1787 }
1788 return 0;
1789 }
1790
1791 /* Likewise, read a byte sequence from PORT, passing it through its
1792 input conversion descriptor. */
1793 static int
1794 get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
1795 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1796 {
1797 scm_t_iconv_descriptors *id;
1798 scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
1799 size_t input_size = 0;
1800
1801 id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ);
1802
1803 for (;;)
1804 {
1805 int byte_read;
1806 char *input, *output;
1807 size_t input_left, output_left, done;
1808
1809 byte_read = scm_get_byte_or_eof_unlocked (port);
1810 if (SCM_UNLIKELY (byte_read == EOF))
1811 {
1812 if (SCM_LIKELY (input_size == 0))
1813 {
1814 *codepoint = (scm_t_wchar) EOF;
1815 *len = input_size;
1816 return 0;
1817 }
1818 else
1819 {
1820 /* EOF found in the middle of a multibyte character. */
1821 scm_i_set_pending_eof (port);
1822 return EILSEQ;
1823 }
1824 }
1825
1826 buf[input_size++] = byte_read;
1827
1828 input = buf;
1829 input_left = input_size;
1830 output = (char *) utf8_buf;
1831 output_left = sizeof (utf8_buf);
1832
1833 done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
1834
1835 if (done == (size_t) -1)
1836 {
1837 int err = errno;
1838 if (SCM_LIKELY (err == EINVAL))
1839 /* The input byte sequence did not form a complete
1840 character. Read another byte and try again. */
1841 continue;
1842 else
1843 return err;
1844 }
1845 else
1846 {
1847 size_t output_size = sizeof (utf8_buf) - output_left;
1848 if (SCM_LIKELY (output_size > 0))
1849 {
1850 /* iconv generated output. Convert the UTF8_BUF sequence
1851 to a Unicode code point. */
1852 *codepoint = utf8_to_codepoint (utf8_buf, output_size);
1853 *len = input_size;
1854 return 0;
1855 }
1856 else
1857 {
1858 /* iconv consumed some bytes without producing any output.
1859 Most likely this means that a Unicode byte-order mark
1860 (BOM) was consumed, which should not be included in the
1861 returned buf. Shift any remaining bytes to the beginning
1862 of buf, and continue the loop. */
1863 memmove (buf, input, input_left);
1864 input_size = input_left;
1865 continue;
1866 }
1867 }
1868 }
1869 }
1870
1871 /* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
1872 with the byte representation of the codepoint in PORT's encoding, and
1873 set *LEN to the length in bytes of that representation. Return 0 on
1874 success and an errno value on error. */
1875 static SCM_C_INLINE int
1876 get_codepoint (SCM port, scm_t_wchar *codepoint,
1877 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1878 {
1879 int err;
1880 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1881 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
1882
1883 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
1884 err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
1885 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
1886 err = get_latin1_codepoint (port, codepoint, buf, len);
1887 else
1888 err = get_iconv_codepoint (port, codepoint, buf, len);
1889
1890 if (SCM_LIKELY (err == 0))
1891 {
1892 if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
1893 {
1894 /* Record that we're no longer at stream start. */
1895 pti->at_stream_start_for_bom_read = 0;
1896 if (pt->rw_random)
1897 pti->at_stream_start_for_bom_write = 0;
1898
1899 /* If we just read a BOM in an encoding that recognizes them,
1900 then silently consume it and read another code point. */
1901 if (SCM_UNLIKELY
1902 (*codepoint == SCM_UNICODE_BOM
1903 && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
1904 || strcmp (pt->encoding, "UTF-16") == 0
1905 || strcmp (pt->encoding, "UTF-32") == 0)))
1906 return get_codepoint (port, codepoint, buf, len);
1907 }
1908 update_port_lf (*codepoint, port);
1909 }
1910 else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
1911 {
1912 *codepoint = '?';
1913 err = 0;
1914 update_port_lf (*codepoint, port);
1915 }
1916
1917 return err;
1918 }
1919
1920 /* Read a codepoint from PORT and return it. */
1921 scm_t_wchar
1922 scm_getc_unlocked (SCM port)
1923 #define FUNC_NAME "scm_getc"
1924 {
1925 int err;
1926 size_t len;
1927 scm_t_wchar codepoint;
1928 char buf[SCM_MBCHAR_BUF_SIZE];
1929
1930 err = get_codepoint (port, &codepoint, buf, &len);
1931 if (SCM_UNLIKELY (err != 0))
1932 /* At this point PORT should point past the invalid encoding, as per
1933 R6RS-lib Section 8.2.4. */
1934 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
1935
1936 return codepoint;
1937 }
1938 #undef FUNC_NAME
1939
1940 scm_t_wchar
1941 scm_getc (SCM port)
1942 {
1943 scm_i_pthread_mutex_t *lock;
1944 scm_t_wchar ret;
1945
1946 scm_c_lock_port (port, &lock);
1947 ret = scm_getc_unlocked (port);
1948 if (lock)
1949 scm_i_pthread_mutex_unlock (lock);
1950
1951
1952 return ret;
1953 }
1954
1955 SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
1956 (SCM port),
1957 "Return the next character available from @var{port}, updating\n"
1958 "@var{port} to point to the following character. If no more\n"
1959 "characters are available, the end-of-file object is returned.\n"
1960 "\n"
1961 "When @var{port}'s data cannot be decoded according to its\n"
1962 "character encoding, a @code{decoding-error} is raised and\n"
1963 "@var{port} points past the erroneous byte sequence.\n")
1964 #define FUNC_NAME s_scm_read_char
1965 {
1966 scm_t_wchar c;
1967 if (SCM_UNBNDP (port))
1968 port = scm_current_input_port ();
1969 SCM_VALIDATE_OPINPORT (1, port);
1970 c = scm_getc_unlocked (port);
1971 if (EOF == c)
1972 return SCM_EOF_VAL;
1973 return SCM_MAKE_CHAR (c);
1974 }
1975 #undef FUNC_NAME
1976
1977
1978 \f
1979
1980 /* Pushback. */
1981 \f
1982
1983
1984 static void
1985 scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
1986 #define FUNC_NAME "scm_unget_bytes"
1987 {
1988 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1989 size_t old_len, new_len;
1990
1991 scm_i_clear_pending_eof (port);
1992
1993 if (pt->read_buf != pt->putback_buf)
1994 /* switch to the put-back buffer. */
1995 {
1996 if (pt->putback_buf == NULL)
1997 {
1998 pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
1999 ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
2000 pt->putback_buf
2001 = (unsigned char *) scm_gc_malloc_pointerless
2002 (pt->putback_buf_size, "putback buffer");
2003 }
2004
2005 pt->saved_read_buf = pt->read_buf;
2006 pt->saved_read_pos = pt->read_pos;
2007 pt->saved_read_end = pt->read_end;
2008 pt->saved_read_buf_size = pt->read_buf_size;
2009
2010 /* Put read_pos at the end of the buffer, so that ungets will not
2011 have to shift the buffer contents each time. */
2012 pt->read_buf = pt->putback_buf;
2013 pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
2014 pt->read_buf_size = pt->putback_buf_size;
2015 }
2016
2017 old_len = pt->read_end - pt->read_pos;
2018 new_len = old_len + len;
2019
2020 if (new_len > pt->read_buf_size)
2021 /* The putback buffer needs to be enlarged. */
2022 {
2023 size_t new_buf_size;
2024 unsigned char *new_buf, *new_end, *new_pos;
2025
2026 new_buf_size = pt->read_buf_size * 2;
2027 if (new_buf_size < new_len)
2028 new_buf_size = new_len;
2029
2030 new_buf = (unsigned char *)
2031 scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
2032
2033 /* Put the bytes at the end of the buffer, so that future
2034 ungets won't need to shift the buffer. */
2035 new_end = new_buf + new_buf_size;
2036 new_pos = new_end - old_len;
2037 memcpy (new_pos, pt->read_pos, old_len);
2038
2039 pt->read_buf = pt->putback_buf = new_buf;
2040 pt->read_pos = new_pos;
2041 pt->read_end = new_end;
2042 pt->read_buf_size = pt->putback_buf_size = new_buf_size;
2043 }
2044 else if (pt->read_buf + len < pt->read_pos)
2045 /* If needed, shift the existing buffer contents up.
2046 This should not happen unless some external code
2047 manipulates the putback buffer pointers. */
2048 {
2049 unsigned char *new_end = pt->read_buf + pt->read_buf_size;
2050 unsigned char *new_pos = new_end - old_len;
2051
2052 memmove (new_pos, pt->read_pos, old_len);
2053 pt->read_pos = new_pos;
2054 pt->read_end = new_end;
2055 }
2056
2057 /* Move read_pos back and copy the bytes there. */
2058 pt->read_pos -= len;
2059 memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
2060
2061 if (pt->rw_active == SCM_PORT_WRITE)
2062 scm_flush (port);
2063
2064 if (pt->rw_random)
2065 pt->rw_active = SCM_PORT_READ;
2066 }
2067 #undef FUNC_NAME
2068
2069 void
2070 scm_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
2071 {
2072 scm_i_unget_bytes_unlocked (buf, len, port);
2073 }
2074
2075 void
2076 scm_unget_byte_unlocked (int c, SCM port)
2077 {
2078 unsigned char byte = c;
2079 scm_i_unget_bytes_unlocked (&byte, 1, port);
2080 }
2081
2082 void
2083 scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
2084 {
2085 scm_i_pthread_mutex_t *lock;
2086 scm_c_lock_port (port, &lock);
2087 scm_i_unget_bytes_unlocked (buf, len, port);
2088 if (lock)
2089 scm_i_pthread_mutex_unlock (lock);
2090 }
2091
2092 void
2093 scm_unget_byte (int c, SCM port)
2094 {
2095 unsigned char byte = c;
2096 scm_i_pthread_mutex_t *lock;
2097 scm_c_lock_port (port, &lock);
2098 scm_i_unget_bytes_unlocked (&byte, 1, port);
2099 if (lock)
2100 scm_i_pthread_mutex_unlock (lock);
2101 }
2102
2103 void
2104 scm_ungetc_unlocked (scm_t_wchar c, SCM port)
2105 #define FUNC_NAME "scm_ungetc"
2106 {
2107 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2108 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
2109 char *result;
2110 char result_buf[10];
2111 size_t len;
2112
2113 len = sizeof (result_buf);
2114
2115 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
2116 {
2117 if (c < 0xf0)
2118 {
2119 result_buf[0] = (char) c;
2120 result = result_buf;
2121 len = 1;
2122 }
2123 else
2124 result =
2125 (char *) u32_to_u8 ((uint32_t *) &c, 1, (uint8_t *) result_buf, &len);
2126 }
2127 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1 && c <= 0xff)
2128 {
2129 result_buf[0] = (char) c;
2130 result = result_buf;
2131 len = 1;
2132 }
2133 else
2134 result = u32_conv_to_encoding (pt->encoding,
2135 (enum iconv_ilseq_handler) pt->ilseq_handler,
2136 (uint32_t *) &c, 1, NULL,
2137 result_buf, &len);
2138
2139 if (SCM_UNLIKELY (result == NULL || len == 0))
2140 scm_encoding_error (FUNC_NAME, errno,
2141 "conversion to port encoding failed",
2142 SCM_BOOL_F, SCM_MAKE_CHAR (c));
2143
2144 scm_i_unget_bytes_unlocked ((unsigned char *) result, len, port);
2145
2146 if (SCM_UNLIKELY (result != result_buf))
2147 free (result);
2148
2149 if (c == '\n')
2150 {
2151 /* What should col be in this case?
2152 * We'll leave it at -1.
2153 */
2154 SCM_LINUM (port) -= 1;
2155 }
2156 else
2157 SCM_COL(port) -= 1;
2158 }
2159 #undef FUNC_NAME
2160
2161 void
2162 scm_ungetc (scm_t_wchar c, SCM port)
2163 {
2164 scm_i_pthread_mutex_t *lock;
2165 scm_c_lock_port (port, &lock);
2166 scm_ungetc_unlocked (c, port);
2167 if (lock)
2168 scm_i_pthread_mutex_unlock (lock);
2169
2170 }
2171
2172 void
2173 scm_ungets_unlocked (const char *s, int n, SCM port)
2174 {
2175 /* This is simple minded and inefficient, but unreading strings is
2176 * probably not a common operation, and remember that line and
2177 * column numbers have to be handled...
2178 *
2179 * Please feel free to write an optimized version!
2180 */
2181 while (n--)
2182 scm_ungetc_unlocked (s[n], port);
2183 }
2184
2185 void
2186 scm_ungets (const char *s, int n, SCM port)
2187 {
2188 scm_i_pthread_mutex_t *lock;
2189 scm_c_lock_port (port, &lock);
2190 scm_ungets_unlocked (s, n, port);
2191 if (lock)
2192 scm_i_pthread_mutex_unlock (lock);
2193
2194 }
2195
2196 SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
2197 (SCM port),
2198 "Return the next character available from @var{port},\n"
2199 "@emph{without} updating @var{port} to point to the following\n"
2200 "character. If no more characters are available, the\n"
2201 "end-of-file object is returned.\n"
2202 "\n"
2203 "The value returned by\n"
2204 "a call to @code{peek-char} is the same as the value that would\n"
2205 "have been returned by a call to @code{read-char} on the same\n"
2206 "port. The only difference is that the very next call to\n"
2207 "@code{read-char} or @code{peek-char} on that @var{port} will\n"
2208 "return the value returned by the preceding call to\n"
2209 "@code{peek-char}. In particular, a call to @code{peek-char} on\n"
2210 "an interactive port will hang waiting for input whenever a call\n"
2211 "to @code{read-char} would have hung.\n"
2212 "\n"
2213 "As for @code{read-char}, a @code{decoding-error} may be raised\n"
2214 "if such a situation occurs. However, unlike with @code{read-char},\n"
2215 "@var{port} still points at the beginning of the erroneous byte\n"
2216 "sequence when the error is raised.\n")
2217 #define FUNC_NAME s_scm_peek_char
2218 {
2219 int err;
2220 SCM result;
2221 scm_t_wchar c;
2222 char bytes[SCM_MBCHAR_BUF_SIZE];
2223 long column, line;
2224 size_t len = 0;
2225
2226 if (SCM_UNBNDP (port))
2227 port = scm_current_input_port ();
2228 SCM_VALIDATE_OPINPORT (1, port);
2229
2230 column = SCM_COL (port);
2231 line = SCM_LINUM (port);
2232
2233 err = get_codepoint (port, &c, bytes, &len);
2234
2235 scm_i_unget_bytes_unlocked ((unsigned char *) bytes, len, port);
2236
2237 SCM_COL (port) = column;
2238 SCM_LINUM (port) = line;
2239
2240 if (SCM_UNLIKELY (err != 0))
2241 {
2242 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
2243
2244 /* Shouldn't happen since `catch' always aborts to prompt. */
2245 result = SCM_BOOL_F;
2246 }
2247 else if (c == EOF)
2248 {
2249 scm_i_set_pending_eof (port);
2250 result = SCM_EOF_VAL;
2251 }
2252 else
2253 result = SCM_MAKE_CHAR (c);
2254
2255 return result;
2256 }
2257 #undef FUNC_NAME
2258
2259 SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
2260 (SCM cobj, SCM port),
2261 "Place character @var{cobj} in @var{port} so that it will be\n"
2262 "read by the next read operation. If called multiple times, the\n"
2263 "unread characters will be read again in last-in first-out\n"
2264 "order. If @var{port} is not supplied, the current input port\n"
2265 "is used.")
2266 #define FUNC_NAME s_scm_unread_char
2267 {
2268 int c;
2269
2270 SCM_VALIDATE_CHAR (1, cobj);
2271 if (SCM_UNBNDP (port))
2272 port = scm_current_input_port ();
2273 SCM_VALIDATE_OPINPORT (2, port);
2274
2275 c = SCM_CHAR (cobj);
2276
2277 scm_ungetc_unlocked (c, port);
2278 return cobj;
2279 }
2280 #undef FUNC_NAME
2281
2282 SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
2283 (SCM str, SCM port),
2284 "Place the string @var{str} in @var{port} so that its characters will be\n"
2285 "read in subsequent read operations. If called multiple times, the\n"
2286 "unread characters will be read again in last-in first-out order. If\n"
2287 "@var{port} is not supplied, the current-input-port is used.")
2288 #define FUNC_NAME s_scm_unread_string
2289 {
2290 int n;
2291 SCM_VALIDATE_STRING (1, str);
2292 if (SCM_UNBNDP (port))
2293 port = scm_current_input_port ();
2294 SCM_VALIDATE_OPINPORT (2, port);
2295
2296 n = scm_i_string_length (str);
2297
2298 while (n--)
2299 scm_ungetc_unlocked (scm_i_string_ref (str, n), port);
2300
2301 return str;
2302 }
2303 #undef FUNC_NAME
2304
2305
2306 \f
2307
2308 /* Manipulating the buffers. */
2309
2310 /* This routine does not take any locks, as it is usually called as part
2311 of a port implementation. */
2312 void
2313 scm_port_non_buffer (scm_t_port *pt)
2314 {
2315 pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf;
2316 pt->write_buf = pt->write_pos = &pt->shortbuf;
2317 pt->read_buf_size = pt->write_buf_size = 1;
2318 pt->write_end = pt->write_buf + pt->write_buf_size;
2319 }
2320
2321 /* this should only be called when the read buffer is empty. it
2322 tries to refill the read buffer. it returns the first char from
2323 the port, which is either EOF or *(pt->read_pos). */
2324 static int
2325 scm_i_fill_input_unlocked (SCM port)
2326 {
2327 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2328 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
2329
2330 assert (pt->read_pos == pt->read_end);
2331
2332 if (pti->pending_eof)
2333 {
2334 pti->pending_eof = 0;
2335 return EOF;
2336 }
2337
2338 if (pt->read_buf == pt->putback_buf)
2339 {
2340 /* finished reading put-back chars. */
2341 pt->read_buf = pt->saved_read_buf;
2342 pt->read_pos = pt->saved_read_pos;
2343 pt->read_end = pt->saved_read_end;
2344 pt->read_buf_size = pt->saved_read_buf_size;
2345 if (pt->read_pos < pt->read_end)
2346 return *(pt->read_pos);
2347 }
2348 return SCM_PORT_DESCRIPTOR (port)->fill_input (port);
2349 }
2350
2351 int
2352 scm_fill_input (SCM port)
2353 {
2354 scm_i_pthread_mutex_t *lock;
2355 int ret;
2356
2357 scm_c_lock_port (port, &lock);
2358 ret = scm_fill_input_unlocked (port);
2359 if (lock)
2360 scm_i_pthread_mutex_unlock (lock);
2361
2362
2363 return ret;
2364 }
2365
2366 /* Slow-path fallback for 'scm_get_byte_or_eof_unlocked' */
2367 int
2368 scm_slow_get_byte_or_eof_unlocked (SCM port)
2369 {
2370 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2371
2372 if (pt->rw_active == SCM_PORT_WRITE)
2373 scm_flush_unlocked (port);
2374
2375 if (pt->rw_random)
2376 pt->rw_active = SCM_PORT_READ;
2377
2378 if (pt->read_pos >= pt->read_end)
2379 {
2380 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2381 return EOF;
2382 }
2383
2384 return *pt->read_pos++;
2385 }
2386
2387 /* Slow-path fallback for 'scm_peek_byte_or_eof_unlocked' */
2388 int
2389 scm_slow_peek_byte_or_eof_unlocked (SCM port)
2390 {
2391 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2392
2393 if (pt->rw_active == SCM_PORT_WRITE)
2394 scm_flush_unlocked (port);
2395
2396 if (pt->rw_random)
2397 pt->rw_active = SCM_PORT_READ;
2398
2399 if (pt->read_pos >= pt->read_end)
2400 {
2401 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2402 {
2403 scm_i_set_pending_eof (port);
2404 return EOF;
2405 }
2406 }
2407
2408 return *pt->read_pos;
2409 }
2410
2411 /* Move up to READ_LEN bytes from PORT's putback and/or read buffers
2412 into memory starting at DEST. Return the number of bytes moved.
2413 PORT's line/column numbers are left unchanged. */
2414 size_t
2415 scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
2416 {
2417 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2418 size_t bytes_read = 0;
2419 size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
2420
2421 if (from_buf > 0)
2422 {
2423 memcpy (dest, pt->read_pos, from_buf);
2424 pt->read_pos += from_buf;
2425 bytes_read += from_buf;
2426 read_len -= from_buf;
2427 dest += from_buf;
2428 }
2429
2430 /* if putback was active, try the real input buffer too. */
2431 if (pt->read_buf == pt->putback_buf)
2432 {
2433 from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len);
2434 if (from_buf > 0)
2435 {
2436 memcpy (dest, pt->saved_read_pos, from_buf);
2437 pt->saved_read_pos += from_buf;
2438 bytes_read += from_buf;
2439 }
2440 }
2441
2442 return bytes_read;
2443 }
2444
2445 /* Clear a port's read buffers, returning the contents. */
2446 SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
2447 (SCM port),
2448 "This procedure clears a port's input buffers, similar\n"
2449 "to the way that force-output clears the output buffer. The\n"
2450 "contents of the buffers are returned as a single string, e.g.,\n"
2451 "\n"
2452 "@lisp\n"
2453 "(define p (open-input-file ...))\n"
2454 "(drain-input p) => empty string, nothing buffered yet.\n"
2455 "(unread-char (read-char p) p)\n"
2456 "(drain-input p) => initial chars from p, up to the buffer size.\n"
2457 "@end lisp\n\n"
2458 "Draining the buffers may be useful for cleanly finishing\n"
2459 "buffered I/O so that the file descriptor can be used directly\n"
2460 "for further input.")
2461 #define FUNC_NAME s_scm_drain_input
2462 {
2463 SCM result;
2464 char *data;
2465 scm_t_port *pt;
2466 long count;
2467
2468 SCM_VALIDATE_OPINPORT (1, port);
2469 pt = SCM_PTAB_ENTRY (port);
2470
2471 count = pt->read_end - pt->read_pos;
2472 if (pt->read_buf == pt->putback_buf)
2473 count += pt->saved_read_end - pt->saved_read_pos;
2474
2475 if (count)
2476 {
2477 result = scm_i_make_string (count, &data, 0);
2478 scm_take_from_input_buffers (port, data, count);
2479 }
2480 else
2481 result = scm_nullstr;
2482
2483 return result;
2484 }
2485 #undef FUNC_NAME
2486
2487 void
2488 scm_end_input_unlocked (SCM port)
2489 {
2490 long offset;
2491 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2492
2493 scm_i_clear_pending_eof (port);
2494 if (pt->read_buf == pt->putback_buf)
2495 {
2496 offset = pt->read_end - pt->read_pos;
2497 pt->read_buf = pt->saved_read_buf;
2498 pt->read_pos = pt->saved_read_pos;
2499 pt->read_end = pt->saved_read_end;
2500 pt->read_buf_size = pt->saved_read_buf_size;
2501 }
2502 else
2503 offset = 0;
2504
2505 SCM_PORT_DESCRIPTOR (port)->end_input (port, offset);
2506 }
2507
2508 void
2509 scm_end_input (SCM port)
2510 {
2511 scm_i_pthread_mutex_t *lock;
2512 scm_c_lock_port (port, &lock);
2513 scm_end_input_unlocked (port);
2514 if (lock)
2515 scm_i_pthread_mutex_unlock (lock);
2516
2517 }
2518
2519 SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
2520 (SCM port),
2521 "Flush the specified output port, or the current output port if @var{port}\n"
2522 "is omitted. The current output buffer contents are passed to the\n"
2523 "underlying port implementation (e.g., in the case of fports, the\n"
2524 "data will be written to the file and the output buffer will be cleared.)\n"
2525 "It has no effect on an unbuffered port.\n\n"
2526 "The return value is unspecified.")
2527 #define FUNC_NAME s_scm_force_output
2528 {
2529 if (SCM_UNBNDP (port))
2530 port = scm_current_output_port ();
2531 else
2532 {
2533 port = SCM_COERCE_OUTPORT (port);
2534 SCM_VALIDATE_OPOUTPORT (1, port);
2535 }
2536 scm_flush_unlocked (port);
2537 return SCM_UNSPECIFIED;
2538 }
2539 #undef FUNC_NAME
2540
2541 void
2542 scm_flush_unlocked (SCM port)
2543 {
2544 SCM_PORT_DESCRIPTOR (port)->flush (port);
2545 }
2546
2547 void
2548 scm_flush (SCM port)
2549 {
2550 scm_i_pthread_mutex_t *lock;
2551 scm_c_lock_port (port, &lock);
2552 scm_flush_unlocked (port);
2553 if (lock)
2554 scm_i_pthread_mutex_unlock (lock);
2555
2556 }
2557
2558 int
2559 scm_fill_input_unlocked (SCM port)
2560 {
2561 return scm_i_fill_input_unlocked (port);
2562 }
2563
2564
2565 \f
2566
2567 /* Output. */
2568
2569 void
2570 scm_putc (char c, SCM port)
2571 {
2572 scm_i_pthread_mutex_t *lock;
2573 scm_c_lock_port (port, &lock);
2574 scm_putc_unlocked (c, port);
2575 if (lock)
2576 scm_i_pthread_mutex_unlock (lock);
2577
2578 }
2579
2580 void
2581 scm_puts (const char *s, SCM port)
2582 {
2583 scm_i_pthread_mutex_t *lock;
2584 scm_c_lock_port (port, &lock);
2585 scm_puts_unlocked (s, port);
2586 if (lock)
2587 scm_i_pthread_mutex_unlock (lock);
2588
2589 }
2590
2591 /* scm_c_write
2592 *
2593 * Used by an application to write arbitrary number of bytes to an SCM
2594 * port. Similar semantics as libc write. However, unlike libc
2595 * write, scm_c_write writes the requested number of bytes and has no
2596 * return value.
2597 *
2598 * Warning: Doesn't update port line and column counts!
2599 */
2600 void
2601 scm_c_write_unlocked (SCM port, const void *ptr, size_t size)
2602 #define FUNC_NAME "scm_c_write"
2603 {
2604 scm_t_port *pt;
2605 scm_t_ptob_descriptor *ptob;
2606
2607 SCM_VALIDATE_OPOUTPORT (1, port);
2608
2609 pt = SCM_PTAB_ENTRY (port);
2610 ptob = SCM_PORT_DESCRIPTOR (port);
2611
2612 if (pt->rw_active == SCM_PORT_READ)
2613 scm_end_input_unlocked (port);
2614
2615 ptob->write (port, ptr, size);
2616
2617 if (pt->rw_random)
2618 pt->rw_active = SCM_PORT_WRITE;
2619 }
2620 #undef FUNC_NAME
2621
2622 void
2623 scm_c_write (SCM port, const void *ptr, size_t size)
2624 {
2625 scm_i_pthread_mutex_t *lock;
2626 scm_c_lock_port (port, &lock);
2627 scm_c_write_unlocked (port, ptr, size);
2628 if (lock)
2629 scm_i_pthread_mutex_unlock (lock);
2630
2631 }
2632
2633 /* scm_lfwrite
2634 *
2635 * This function differs from scm_c_write; it updates port line and
2636 * column. */
2637 void
2638 scm_lfwrite_unlocked (const char *ptr, size_t size, SCM port)
2639 {
2640 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2641 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2642
2643 if (pt->rw_active == SCM_PORT_READ)
2644 scm_end_input_unlocked (port);
2645
2646 ptob->write (port, ptr, size);
2647
2648 for (; size; ptr++, size--)
2649 update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
2650
2651 if (pt->rw_random)
2652 pt->rw_active = SCM_PORT_WRITE;
2653 }
2654
2655 void
2656 scm_lfwrite (const char *ptr, size_t size, SCM port)
2657 {
2658 scm_i_pthread_mutex_t *lock;
2659 scm_c_lock_port (port, &lock);
2660 scm_lfwrite_unlocked (ptr, size, port);
2661 if (lock)
2662 scm_i_pthread_mutex_unlock (lock);
2663
2664 }
2665
2666 /* Write STR to PORT from START inclusive to END exclusive. */
2667 void
2668 scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
2669 {
2670 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2671
2672 if (pt->rw_active == SCM_PORT_READ)
2673 scm_end_input_unlocked (port);
2674
2675 if (end == (size_t) -1)
2676 end = scm_i_string_length (str);
2677
2678 scm_i_display_substring (str, start, end, port);
2679
2680 if (pt->rw_random)
2681 pt->rw_active = SCM_PORT_WRITE;
2682 }
2683
2684
2685 \f
2686
2687 /* Querying and setting positions, and character availability. */
2688
2689 SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0,
2690 (SCM port),
2691 "Return @code{#t} if a character is ready on input @var{port}\n"
2692 "and return @code{#f} otherwise. If @code{char-ready?} returns\n"
2693 "@code{#t} then the next @code{read-char} operation on\n"
2694 "@var{port} is guaranteed not to hang. If @var{port} is a file\n"
2695 "port at end of file then @code{char-ready?} returns @code{#t}.\n"
2696 "\n"
2697 "@code{char-ready?} exists to make it possible for a\n"
2698 "program to accept characters from interactive ports without\n"
2699 "getting stuck waiting for input. Any input editors associated\n"
2700 "with such ports must make sure that characters whose existence\n"
2701 "has been asserted by @code{char-ready?} cannot be rubbed out.\n"
2702 "If @code{char-ready?} were to return @code{#f} at end of file,\n"
2703 "a port at end of file would be indistinguishable from an\n"
2704 "interactive port that has no ready characters.")
2705 #define FUNC_NAME s_scm_char_ready_p
2706 {
2707 scm_t_port *pt;
2708
2709 if (SCM_UNBNDP (port))
2710 port = scm_current_input_port ();
2711 /* It's possible to close the current input port, so validate even in
2712 this case. */
2713 SCM_VALIDATE_OPINPORT (1, port);
2714
2715 pt = SCM_PTAB_ENTRY (port);
2716
2717 /* if the current read buffer is filled, or the
2718 last pushed-back char has been read and the saved buffer is
2719 filled, result is true. */
2720 if (pt->read_pos < pt->read_end
2721 || (pt->read_buf == pt->putback_buf
2722 && pt->saved_read_pos < pt->saved_read_end))
2723 return SCM_BOOL_T;
2724 else
2725 {
2726 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2727
2728 if (ptob->input_waiting)
2729 return scm_from_bool(ptob->input_waiting (port));
2730 else
2731 return SCM_BOOL_T;
2732 }
2733 }
2734 #undef FUNC_NAME
2735
2736 SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
2737 (SCM fd_port, SCM offset, SCM whence),
2738 "Sets the current position of @var{fd_port} to the integer\n"
2739 "@var{offset}, which is interpreted according to the value of\n"
2740 "@var{whence}.\n"
2741 "\n"
2742 "One of the following variables should be supplied for\n"
2743 "@var{whence}:\n"
2744 "@defvar SEEK_SET\n"
2745 "Seek from the beginning of the file.\n"
2746 "@end defvar\n"
2747 "@defvar SEEK_CUR\n"
2748 "Seek from the current position.\n"
2749 "@end defvar\n"
2750 "@defvar SEEK_END\n"
2751 "Seek from the end of the file.\n"
2752 "@end defvar\n"
2753 "If @var{fd_port} is a file descriptor, the underlying system\n"
2754 "call is @code{lseek}. @var{port} may be a string port.\n"
2755 "\n"
2756 "The value returned is the new position in the file. This means\n"
2757 "that the current position of a port can be obtained using:\n"
2758 "@lisp\n"
2759 "(seek port 0 SEEK_CUR)\n"
2760 "@end lisp")
2761 #define FUNC_NAME s_scm_seek
2762 {
2763 int how;
2764
2765 fd_port = SCM_COERCE_OUTPORT (fd_port);
2766
2767 how = scm_to_int (whence);
2768 if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
2769 SCM_OUT_OF_RANGE (3, whence);
2770
2771 if (SCM_OPPORTP (fd_port))
2772 {
2773 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port);
2774 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port);
2775 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2776 off_t_or_off64_t rv;
2777
2778 if (!ptob->seek)
2779 SCM_MISC_ERROR ("port is not seekable",
2780 scm_cons (fd_port, SCM_EOL));
2781 else
2782 rv = ptob->seek (fd_port, off, how);
2783
2784 /* Set stream-start flags according to new position. */
2785 pti->at_stream_start_for_bom_read = (rv == 0);
2786 pti->at_stream_start_for_bom_write = (rv == 0);
2787
2788 scm_i_clear_pending_eof (fd_port);
2789
2790 return scm_from_off_t_or_off64_t (rv);
2791 }
2792 else /* file descriptor?. */
2793 {
2794 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2795 off_t_or_off64_t rv;
2796 rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
2797 if (rv == -1)
2798 SCM_SYSERROR;
2799 return scm_from_off_t_or_off64_t (rv);
2800 }
2801 }
2802 #undef FUNC_NAME
2803
2804 #ifndef O_BINARY
2805 #define O_BINARY 0
2806 #endif
2807
2808 /* Mingw has ftruncate(), perhaps implemented above using chsize, but
2809 doesn't have the filename version truncate(), hence this code. */
2810 #if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
2811 static int
2812 truncate (const char *file, off_t length)
2813 {
2814 int ret, fdes;
2815
2816 fdes = open (file, O_BINARY | O_WRONLY);
2817 if (fdes == -1)
2818 return -1;
2819
2820 ret = ftruncate (fdes, length);
2821 if (ret == -1)
2822 {
2823 int save_errno = errno;
2824 close (fdes);
2825 errno = save_errno;
2826 return -1;
2827 }
2828
2829 return close (fdes);
2830 }
2831 #endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
2832
2833 SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
2834 (SCM object, SCM length),
2835 "Truncate file @var{object} to @var{length} bytes. @var{object}\n"
2836 "can be a filename string, a port object, or an integer file\n"
2837 "descriptor.\n"
2838 "The return value is unspecified.\n"
2839 "\n"
2840 "For a port or file descriptor @var{length} can be omitted, in\n"
2841 "which case the file is truncated at the current position (per\n"
2842 "@code{ftell} above).\n"
2843 "\n"
2844 "On most systems a file can be extended by giving a length\n"
2845 "greater than the current size, but this is not mandatory in the\n"
2846 "POSIX standard.")
2847 #define FUNC_NAME s_scm_truncate_file
2848 {
2849 int rv;
2850
2851 /* "object" can be a port, fdes or filename.
2852
2853 Negative "length" makes no sense, but it's left to truncate() or
2854 ftruncate() to give back an error for that (normally EINVAL).
2855 */
2856
2857 if (SCM_UNBNDP (length))
2858 {
2859 /* must supply length if object is a filename. */
2860 if (scm_is_string (object))
2861 SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
2862
2863 length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
2864 }
2865
2866 object = SCM_COERCE_OUTPORT (object);
2867 if (scm_is_integer (object))
2868 {
2869 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2870 SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
2871 c_length));
2872 }
2873 else if (SCM_OPOUTPORTP (object))
2874 {
2875 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2876 scm_t_port *pt = SCM_PTAB_ENTRY (object);
2877 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object);
2878
2879 if (!ptob->truncate)
2880 SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
2881
2882 scm_i_clear_pending_eof (object);
2883 if (pt->rw_active == SCM_PORT_READ)
2884 scm_end_input_unlocked (object);
2885 else if (pt->rw_active == SCM_PORT_WRITE)
2886 ptob->flush (object);
2887
2888 ptob->truncate (object, c_length);
2889 rv = 0;
2890 }
2891 else
2892 {
2893 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2894 char *str = scm_to_locale_string (object);
2895 int eno;
2896 SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
2897 eno = errno;
2898 free (str);
2899 errno = eno;
2900 }
2901 if (rv == -1)
2902 SCM_SYSERROR;
2903 return SCM_UNSPECIFIED;
2904 }
2905 #undef FUNC_NAME
2906
2907 SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0,
2908 (SCM port),
2909 "Return the current line number for @var{port}.\n"
2910 "\n"
2911 "The first line of a file is 0. But you might want to add 1\n"
2912 "when printing line numbers, since starting from 1 is\n"
2913 "traditional in error messages, and likely to be more natural to\n"
2914 "non-programmers.")
2915 #define FUNC_NAME s_scm_port_line
2916 {
2917 port = SCM_COERCE_OUTPORT (port);
2918 SCM_VALIDATE_OPENPORT (1, port);
2919 return scm_from_long (SCM_LINUM (port));
2920 }
2921 #undef FUNC_NAME
2922
2923 SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0,
2924 (SCM port, SCM line),
2925 "Set the current line number for @var{port} to @var{line}. The\n"
2926 "first line of a file is 0.")
2927 #define FUNC_NAME s_scm_set_port_line_x
2928 {
2929 port = SCM_COERCE_OUTPORT (port);
2930 SCM_VALIDATE_OPENPORT (1, port);
2931 SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line);
2932 return SCM_UNSPECIFIED;
2933 }
2934 #undef FUNC_NAME
2935
2936 SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0,
2937 (SCM port),
2938 "Return the current column number of @var{port}.\n"
2939 "If the number is\n"
2940 "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n"
2941 "- i.e. the first character of the first line is line 0, column 0.\n"
2942 "(However, when you display a file position, for example in an error\n"
2943 "message, we recommend you add 1 to get 1-origin integers. This is\n"
2944 "because lines and column numbers traditionally start with 1, and that is\n"
2945 "what non-programmers will find most natural.)")
2946 #define FUNC_NAME s_scm_port_column
2947 {
2948 port = SCM_COERCE_OUTPORT (port);
2949 SCM_VALIDATE_OPENPORT (1, port);
2950 return scm_from_int (SCM_COL (port));
2951 }
2952 #undef FUNC_NAME
2953
2954 SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0,
2955 (SCM port, SCM column),
2956 "Set the current column of @var{port}. Before reading the first\n"
2957 "character on a line the column should be 0.")
2958 #define FUNC_NAME s_scm_set_port_column_x
2959 {
2960 port = SCM_COERCE_OUTPORT (port);
2961 SCM_VALIDATE_OPENPORT (1, port);
2962 SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column);
2963 return SCM_UNSPECIFIED;
2964 }
2965 #undef FUNC_NAME
2966
2967 SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
2968 (SCM port),
2969 "Return the filename associated with @var{port}, or @code{#f}\n"
2970 "if no filename is associated with the port.")
2971 #define FUNC_NAME s_scm_port_filename
2972 {
2973 port = SCM_COERCE_OUTPORT (port);
2974 SCM_VALIDATE_OPENPORT (1, port);
2975 return SCM_FILENAME (port);
2976 }
2977 #undef FUNC_NAME
2978
2979 SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
2980 (SCM port, SCM filename),
2981 "Change the filename associated with @var{port}, using the current input\n"
2982 "port if none is specified. Note that this does not change the port's\n"
2983 "source of data, but only the value that is returned by\n"
2984 "@code{port-filename} and reported in diagnostic output.")
2985 #define FUNC_NAME s_scm_set_port_filename_x
2986 {
2987 port = SCM_COERCE_OUTPORT (port);
2988 SCM_VALIDATE_OPENPORT (1, port);
2989 /* We allow the user to set the filename to whatever he likes. */
2990 SCM_SET_FILENAME (port, filename);
2991 return SCM_UNSPECIFIED;
2992 }
2993 #undef FUNC_NAME
2994
2995
2996 \f
2997
2998 /* Implementation helpers for port printing functions. */
2999
3000 void
3001 scm_print_port_mode (SCM exp, SCM port)
3002 {
3003 scm_puts_unlocked (SCM_CLOSEDP (exp)
3004 ? "closed: "
3005 : (SCM_RDNG & SCM_CELL_WORD_0 (exp)
3006 ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
3007 ? "input-output: "
3008 : "input: ")
3009 : (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
3010 ? "output: "
3011 : "bogus: ")),
3012 port);
3013 }
3014
3015 int
3016 scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED)
3017 {
3018 char *type = SCM_PTOBNAME (SCM_PTOBNUM (exp));
3019 if (!type)
3020 type = "port";
3021 scm_puts_unlocked ("#<", port);
3022 scm_print_port_mode (exp, port);
3023 scm_puts_unlocked (type, port);
3024 scm_putc_unlocked (' ', port);
3025 scm_uintprint (SCM_CELL_WORD_1 (exp), 16, port);
3026 scm_putc_unlocked ('>', port);
3027 return 1;
3028 }
3029
3030
3031 \f
3032
3033 /* Iterating over all ports. */
3034
3035 struct for_each_data
3036 {
3037 void (*proc) (void *data, SCM p);
3038 void *data;
3039 };
3040
3041 static SCM
3042 for_each_trampoline (void *data, SCM port, SCM result)
3043 {
3044 struct for_each_data *d = data;
3045
3046 d->proc (d->data, port);
3047
3048 return result;
3049 }
3050
3051 void
3052 scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
3053 {
3054 struct for_each_data d;
3055
3056 d.proc = proc;
3057 d.data = data;
3058
3059 scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL,
3060 scm_i_port_weak_set);
3061 }
3062
3063 static void
3064 scm_for_each_trampoline (void *data, SCM port)
3065 {
3066 scm_call_1 (SCM_PACK_POINTER (data), port);
3067 }
3068
3069 SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
3070 (SCM proc),
3071 "Apply @var{proc} to each port in the Guile port table\n"
3072 "in turn. The return value is unspecified. More specifically,\n"
3073 "@var{proc} is applied exactly once to every port that exists\n"
3074 "in the system at the time @code{port-for-each} is invoked.\n"
3075 "Changes to the port table while @code{port-for-each} is running\n"
3076 "have no effect as far as @code{port-for-each} is concerned.")
3077 #define FUNC_NAME s_scm_port_for_each
3078 {
3079 SCM_VALIDATE_PROC (1, proc);
3080
3081 scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc));
3082
3083 return SCM_UNSPECIFIED;
3084 }
3085 #undef FUNC_NAME
3086
3087 static void
3088 flush_output_port (void *closure, SCM port)
3089 {
3090 if (SCM_OPOUTPORTP (port))
3091 scm_flush_unlocked (port);
3092 }
3093
3094 SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0,
3095 (),
3096 "Equivalent to calling @code{force-output} on\n"
3097 "all open output ports. The return value is unspecified.")
3098 #define FUNC_NAME s_scm_flush_all_ports
3099 {
3100 scm_c_port_for_each (&flush_output_port, NULL);
3101 return SCM_UNSPECIFIED;
3102 }
3103 #undef FUNC_NAME
3104
3105
3106 \f
3107
3108 /* Void ports. */
3109
3110 scm_t_bits scm_tc16_void_port = 0;
3111
3112 static int fill_input_void_port (SCM port SCM_UNUSED)
3113 {
3114 return EOF;
3115 }
3116
3117 static void
3118 write_void_port (SCM port SCM_UNUSED,
3119 const void *data SCM_UNUSED,
3120 size_t size SCM_UNUSED)
3121 {
3122 }
3123
3124 static SCM
3125 scm_i_void_port (long mode_bits)
3126 {
3127 SCM ret;
3128
3129 ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0);
3130
3131 scm_port_non_buffer (SCM_PTAB_ENTRY (ret));
3132
3133 return ret;
3134 }
3135
3136 SCM
3137 scm_void_port (char *mode_str)
3138 {
3139 return scm_i_void_port (scm_mode_bits (mode_str));
3140 }
3141
3142 SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0,
3143 (SCM mode),
3144 "Create and return a new void port. A void port acts like\n"
3145 "@file{/dev/null}. The @var{mode} argument\n"
3146 "specifies the input/output modes for this port: see the\n"
3147 "documentation for @code{open-file} in @ref{File Ports}.")
3148 #define FUNC_NAME s_scm_sys_make_void_port
3149 {
3150 return scm_i_void_port (scm_i_mode_bits (mode));
3151 }
3152 #undef FUNC_NAME
3153
3154
3155 \f
3156
3157 /* Initialization. */
3158
3159 void
3160 scm_init_ports ()
3161 {
3162 /* lseek() symbols. */
3163 scm_c_define ("SEEK_SET", scm_from_int (SEEK_SET));
3164 scm_c_define ("SEEK_CUR", scm_from_int (SEEK_CUR));
3165 scm_c_define ("SEEK_END", scm_from_int (SEEK_END));
3166
3167 scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
3168 write_void_port);
3169
3170 cur_inport_fluid = scm_make_fluid ();
3171 cur_outport_fluid = scm_make_fluid ();
3172 cur_errport_fluid = scm_make_fluid ();
3173 cur_loadport_fluid = scm_make_fluid ();
3174
3175 scm_i_port_weak_set = scm_c_make_weak_set (31);
3176
3177 #include "libguile/ports.x"
3178
3179 /* Use Latin-1 as the default port encoding. */
3180 SCM_VARIABLE_SET (default_port_encoding_var,
3181 scm_make_fluid_with_default (SCM_BOOL_F));
3182 scm_port_encoding_init = 1;
3183
3184 SCM_VARIABLE_SET (default_conversion_strategy_var,
3185 scm_make_fluid_with_default (sym_substitute));
3186 scm_conversion_strategy_init = 1;
3187
3188 /* These bindings are used when boot-9 turns `current-input-port' et
3189 al into parameters. They are then removed from the guile module. */
3190 scm_c_define ("%current-input-port-fluid", cur_inport_fluid);
3191 scm_c_define ("%current-output-port-fluid", cur_outport_fluid);
3192 scm_c_define ("%current-error-port-fluid", cur_errport_fluid);
3193 }
3194
3195 /*
3196 Local Variables:
3197 c-file-style: "gnu"
3198 End:
3199 */