Merge remote-tracking branch 'origin/stable-2.0'
[bpt/guile.git] / libguile / ports.c
1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
2 * 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20
21 \f
22 /* Headers. */
23
24 #define _LARGEFILE64_SOURCE /* ask for stat64 etc */
25
26 #ifdef HAVE_CONFIG_H
27 # include <config.h>
28 #endif
29
30 #include <stdio.h>
31 #include <errno.h>
32 #include <fcntl.h> /* for chsize on mingw */
33 #include <assert.h>
34 #include <iconv.h>
35 #include <uniconv.h>
36 #include <unistr.h>
37 #include <striconveh.h>
38
39 #include <assert.h>
40
41 #include "libguile/_scm.h"
42 #include "libguile/async.h"
43 #include "libguile/deprecation.h"
44 #include "libguile/eval.h"
45 #include "libguile/fports.h" /* direct access for seek and truncate */
46 #include "libguile/goops.h"
47 #include "libguile/smob.h"
48 #include "libguile/chars.h"
49 #include "libguile/dynwind.h"
50
51 #include "libguile/keywords.h"
52 #include "libguile/hashtab.h"
53 #include "libguile/root.h"
54 #include "libguile/strings.h"
55 #include "libguile/mallocs.h"
56 #include "libguile/validate.h"
57 #include "libguile/ports.h"
58 #include "libguile/ports-internal.h"
59 #include "libguile/vectors.h"
60 #include "libguile/weak-set.h"
61 #include "libguile/fluids.h"
62 #include "libguile/eq.h"
63
64 #ifdef HAVE_STRING_H
65 #include <string.h>
66 #endif
67
68 #ifdef HAVE_IO_H
69 #include <io.h>
70 #endif
71
72 #ifdef HAVE_UNISTD_H
73 #include <unistd.h>
74 #endif
75
76 #ifdef HAVE_SYS_IOCTL_H
77 #include <sys/ioctl.h>
78 #endif
79
80 /* Mingw (version 3.4.5, circa 2006) has ftruncate as an alias for chsize
81 already, but have this code here in case that wasn't so in past versions,
82 or perhaps to help other minimal DOS environments.
83
84 gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
85 might be possibilities if we've got other systems without ftruncate. */
86
87 #if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
88 #define ftruncate(fd, size) chsize (fd, size)
89 #undef HAVE_FTRUNCATE
90 #define HAVE_FTRUNCATE 1
91 #endif
92
93 \f
94 /* Port encodings are case-insensitive ASCII strings. */
95 static char
96 ascii_toupper (char c)
97 {
98 return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
99 }
100
101 /* It is only necessary to use this function on encodings that come from
102 the user and have not been canonicalized yet. Encodings that are set
103 on ports or in the default encoding fluid are in upper-case, and can
104 be compared with strcmp. */
105 static int
106 encoding_matches (const char *enc, const char *upper)
107 {
108 if (!enc)
109 enc = "ISO-8859-1";
110
111 while (*enc)
112 if (ascii_toupper (*enc++) != *upper++)
113 return 0;
114
115 return !*upper;
116 }
117
118 static char*
119 canonicalize_encoding (const char *enc)
120 {
121 char *ret;
122 int i;
123
124 if (!enc)
125 return "ISO-8859-1";
126
127 ret = scm_gc_strdup (enc, "port");
128
129 for (i = 0; ret[i]; i++)
130 {
131 if (ret[i] > 127)
132 /* Restrict to ASCII. */
133 scm_misc_error (NULL, "invalid character encoding ~s",
134 scm_list_1 (scm_from_latin1_string (enc)));
135 else
136 ret[i] = ascii_toupper (ret[i]);
137 }
138
139 return ret;
140 }
141
142
143 \f
144 /* The port kind table --- a dynamically resized array of port types. */
145
146
147 /* scm_ptobs scm_numptob
148 * implement a dynamically resized array of ptob records.
149 * Indexes into this table are used when generating type
150 * tags for smobjects (if you know a tag you can get an index and conversely).
151 */
152 static scm_t_ptob_descriptor **scm_ptobs = NULL;
153 static long scm_numptob = 0; /* Number of port types. */
154 static long scm_ptobs_size = 0; /* Number of slots in the port type
155 table. */
156 static scm_i_pthread_mutex_t scm_ptobs_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER;
157
158 long
159 scm_c_num_port_types (void)
160 {
161 long ret;
162
163 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
164 ret = scm_numptob;
165 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
166
167 return ret;
168 }
169
170 scm_t_ptob_descriptor*
171 scm_c_port_type_ref (long ptobnum)
172 {
173 scm_t_ptob_descriptor *ret = NULL;
174
175 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
176
177 if (0 <= ptobnum && ptobnum < scm_numptob)
178 ret = scm_ptobs[ptobnum];
179
180 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
181
182 if (!ret)
183 scm_out_of_range ("scm_c_port_type_ref", scm_from_long (ptobnum));
184
185 return ret;
186 }
187
188 long
189 scm_c_port_type_add_x (scm_t_ptob_descriptor *desc)
190 {
191 long ret = -1;
192
193 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
194
195 if (scm_numptob + 1 < SCM_I_MAX_PORT_TYPE_COUNT)
196 {
197 if (scm_numptob == scm_ptobs_size)
198 {
199 unsigned long old_size = scm_ptobs_size;
200 scm_t_ptob_descriptor **old_ptobs = scm_ptobs;
201
202 /* Currently there are only 9 predefined port types, so one
203 resize will cover it. */
204 scm_ptobs_size = old_size + 10;
205
206 if (scm_ptobs_size >= SCM_I_MAX_PORT_TYPE_COUNT)
207 scm_ptobs_size = SCM_I_MAX_PORT_TYPE_COUNT;
208
209 scm_ptobs = scm_gc_malloc (sizeof (*scm_ptobs) * scm_ptobs_size,
210 "scm_ptobs");
211
212 memcpy (scm_ptobs, old_ptobs, sizeof (*scm_ptobs) * scm_numptob);
213 }
214
215 ret = scm_numptob++;
216 scm_ptobs[ret] = desc;
217 }
218
219 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
220
221 if (ret < 0)
222 scm_out_of_range ("scm_c_port_type_add_x", scm_from_long (scm_numptob));
223
224 return ret;
225 }
226
227 /*
228 * We choose to use an interface similar to the smob interface with
229 * fill_input and write as standard fields, passed to the port
230 * type constructor, and optional fields set by setters.
231 */
232
233 static void
234 flush_port_default (SCM port SCM_UNUSED)
235 {
236 }
237
238 static void
239 end_input_default (SCM port SCM_UNUSED, int offset SCM_UNUSED)
240 {
241 }
242
243 scm_t_bits
244 scm_make_port_type (char *name,
245 int (*fill_input) (SCM port),
246 void (*write) (SCM port, const void *data, size_t size))
247 {
248 scm_t_ptob_descriptor *desc;
249 long ptobnum;
250
251 desc = scm_gc_malloc_pointerless (sizeof (*desc), "port-type");
252 memset (desc, 0, sizeof (*desc));
253
254 desc->name = name;
255 desc->print = scm_port_print;
256 desc->write = write;
257 desc->flush = flush_port_default;
258 desc->end_input = end_input_default;
259 desc->fill_input = fill_input;
260
261 ptobnum = scm_c_port_type_add_x (desc);
262
263 /* Make a class object if GOOPS is present. */
264 if (SCM_UNPACK (scm_port_class[0]) != 0)
265 scm_make_port_classes (ptobnum, name);
266
267 return scm_tc7_port + ptobnum * 256;
268 }
269
270 void
271 scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM))
272 {
273 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->mark = mark;
274 }
275
276 void
277 scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM))
278 {
279 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->free = free;
280 }
281
282 void
283 scm_set_port_print (scm_t_bits tc, int (*print) (SCM exp, SCM port,
284 scm_print_state *pstate))
285 {
286 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->print = print;
287 }
288
289 void
290 scm_set_port_equalp (scm_t_bits tc, SCM (*equalp) (SCM, SCM))
291 {
292 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->equalp = equalp;
293 }
294
295 void
296 scm_set_port_close (scm_t_bits tc, int (*close) (SCM))
297 {
298 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->close = close;
299 }
300
301 void
302 scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port))
303 {
304 scm_t_ptob_descriptor *ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tc));
305 ptob->flush = flush;
306 ptob->flags |= SCM_PORT_TYPE_HAS_FLUSH;
307 }
308
309 void
310 scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset))
311 {
312 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->end_input = end_input;
313 }
314
315 void
316 scm_set_port_seek (scm_t_bits tc, scm_t_off (*seek) (SCM, scm_t_off, int))
317 {
318 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->seek = seek;
319 }
320
321 void
322 scm_set_port_truncate (scm_t_bits tc, void (*truncate) (SCM, scm_t_off))
323 {
324 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->truncate = truncate;
325 }
326
327 void
328 scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM))
329 {
330 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting;
331 }
332
333 static void
334 scm_i_set_pending_eof (SCM port)
335 {
336 SCM_PORT_GET_INTERNAL (port)->pending_eof = 1;
337 }
338
339 static void
340 scm_i_clear_pending_eof (SCM port)
341 {
342 SCM_PORT_GET_INTERNAL (port)->pending_eof = 0;
343 }
344
345 SCM
346 scm_i_port_alist (SCM port)
347 {
348 return SCM_PORT_GET_INTERNAL (port)->alist;
349 }
350
351 void
352 scm_i_set_port_alist_x (SCM port, SCM alist)
353 {
354 SCM_PORT_GET_INTERNAL (port)->alist = alist;
355 }
356
357 \f
358
359 /* Standard ports --- current input, output, error, and more(!). */
360
361 static SCM cur_inport_fluid = SCM_BOOL_F;
362 static SCM cur_outport_fluid = SCM_BOOL_F;
363 static SCM cur_errport_fluid = SCM_BOOL_F;
364 static SCM cur_loadport_fluid = SCM_BOOL_F;
365
366 SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
367 (),
368 "Return the current input port. This is the default port used\n"
369 "by many input procedures. Initially, @code{current-input-port}\n"
370 "returns the @dfn{standard input} in Unix and C terminology.")
371 #define FUNC_NAME s_scm_current_input_port
372 {
373 if (scm_is_true (cur_inport_fluid))
374 return scm_fluid_ref (cur_inport_fluid);
375 else
376 return SCM_BOOL_F;
377 }
378 #undef FUNC_NAME
379
380 SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
381 (),
382 "Return the current output port. This is the default port used\n"
383 "by many output procedures. Initially,\n"
384 "@code{current-output-port} returns the @dfn{standard output} in\n"
385 "Unix and C terminology.")
386 #define FUNC_NAME s_scm_current_output_port
387 {
388 if (scm_is_true (cur_outport_fluid))
389 return scm_fluid_ref (cur_outport_fluid);
390 else
391 return SCM_BOOL_F;
392 }
393 #undef FUNC_NAME
394
395 SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
396 (),
397 "Return the port to which errors and warnings should be sent (the\n"
398 "@dfn{standard error} in Unix and C terminology).")
399 #define FUNC_NAME s_scm_current_error_port
400 {
401 if (scm_is_true (cur_errport_fluid))
402 return scm_fluid_ref (cur_errport_fluid);
403 else
404 return SCM_BOOL_F;
405 }
406 #undef FUNC_NAME
407
408 SCM
409 scm_current_warning_port (void)
410 {
411 static SCM cwp_var = SCM_UNDEFINED;
412 static scm_i_pthread_mutex_t cwp_var_mutex
413 = SCM_I_PTHREAD_MUTEX_INITIALIZER;
414
415 scm_i_scm_pthread_mutex_lock (&cwp_var_mutex);
416 if (SCM_UNBNDP (cwp_var))
417 cwp_var = scm_c_private_variable ("guile", "current-warning-port");
418 scm_i_pthread_mutex_unlock (&cwp_var_mutex);
419
420 return scm_call_0 (scm_variable_ref (cwp_var));
421 }
422
423 SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
424 (),
425 "Return the current-load-port.\n"
426 "The load port is used internally by @code{primitive-load}.")
427 #define FUNC_NAME s_scm_current_load_port
428 {
429 return scm_fluid_ref (cur_loadport_fluid);
430 }
431 #undef FUNC_NAME
432
433 SCM_DEFINE (scm_set_current_input_port, "set-current-input-port", 1, 0, 0,
434 (SCM port),
435 "@deffnx {Scheme Procedure} set-current-output-port port\n"
436 "@deffnx {Scheme Procedure} set-current-error-port port\n"
437 "Change the ports returned by @code{current-input-port},\n"
438 "@code{current-output-port} and @code{current-error-port}, respectively,\n"
439 "so that they use the supplied @var{port} for input or output.")
440 #define FUNC_NAME s_scm_set_current_input_port
441 {
442 SCM oinp = scm_fluid_ref (cur_inport_fluid);
443 SCM_VALIDATE_OPINPORT (1, port);
444 scm_fluid_set_x (cur_inport_fluid, port);
445 return oinp;
446 }
447 #undef FUNC_NAME
448
449
450 SCM_DEFINE (scm_set_current_output_port, "set-current-output-port", 1, 0, 0,
451 (SCM port),
452 "Set the current default output port to @var{port}.")
453 #define FUNC_NAME s_scm_set_current_output_port
454 {
455 SCM ooutp = scm_fluid_ref (cur_outport_fluid);
456 port = SCM_COERCE_OUTPORT (port);
457 SCM_VALIDATE_OPOUTPORT (1, port);
458 scm_fluid_set_x (cur_outport_fluid, port);
459 return ooutp;
460 }
461 #undef FUNC_NAME
462
463
464 SCM_DEFINE (scm_set_current_error_port, "set-current-error-port", 1, 0, 0,
465 (SCM port),
466 "Set the current default error port to @var{port}.")
467 #define FUNC_NAME s_scm_set_current_error_port
468 {
469 SCM oerrp = scm_fluid_ref (cur_errport_fluid);
470 port = SCM_COERCE_OUTPORT (port);
471 SCM_VALIDATE_OPOUTPORT (1, port);
472 scm_fluid_set_x (cur_errport_fluid, port);
473 return oerrp;
474 }
475 #undef FUNC_NAME
476
477
478 SCM
479 scm_set_current_warning_port (SCM port)
480 {
481 static SCM cwp_var = SCM_BOOL_F;
482
483 if (scm_is_false (cwp_var))
484 cwp_var = scm_c_private_lookup ("guile", "current-warning-port");
485
486 return scm_call_1 (scm_variable_ref (cwp_var), port);
487 }
488
489
490 void
491 scm_dynwind_current_input_port (SCM port)
492 #define FUNC_NAME NULL
493 {
494 SCM_VALIDATE_OPINPORT (1, port);
495 scm_dynwind_fluid (cur_inport_fluid, port);
496 }
497 #undef FUNC_NAME
498
499 void
500 scm_dynwind_current_output_port (SCM port)
501 #define FUNC_NAME NULL
502 {
503 port = SCM_COERCE_OUTPORT (port);
504 SCM_VALIDATE_OPOUTPORT (1, port);
505 scm_dynwind_fluid (cur_outport_fluid, port);
506 }
507 #undef FUNC_NAME
508
509 void
510 scm_dynwind_current_error_port (SCM port)
511 #define FUNC_NAME NULL
512 {
513 port = SCM_COERCE_OUTPORT (port);
514 SCM_VALIDATE_OPOUTPORT (1, port);
515 scm_dynwind_fluid (cur_errport_fluid, port);
516 }
517 #undef FUNC_NAME
518
519 void
520 scm_i_dynwind_current_load_port (SCM port)
521 {
522 scm_dynwind_fluid (cur_loadport_fluid, port);
523 }
524
525
526 \f
527
528 /* Retrieving a port's mode. */
529
530 /* Return the flags that characterize a port based on the mode
531 * string used to open a file for that port.
532 *
533 * See PORT FLAGS in scm.h
534 */
535
536 static long
537 scm_i_mode_bits_n (SCM modes)
538 {
539 return (SCM_OPN
540 | (scm_i_string_contains_char (modes, 'r')
541 || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
542 | (scm_i_string_contains_char (modes, 'w')
543 || scm_i_string_contains_char (modes, 'a')
544 || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
545 | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
546 | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
547 }
548
549 long
550 scm_mode_bits (char *modes)
551 {
552 /* Valid characters are rw+a0l. So, use latin1. */
553 return scm_i_mode_bits (scm_from_latin1_string (modes));
554 }
555
556 long
557 scm_i_mode_bits (SCM modes)
558 {
559 long bits;
560
561 if (!scm_is_string (modes))
562 scm_wrong_type_arg_msg (NULL, 0, modes, "string");
563
564 bits = scm_i_mode_bits_n (modes);
565 scm_remember_upto_here_1 (modes);
566 return bits;
567 }
568
569 /* Return the mode flags from an open port.
570 * Some modes such as "append" are only used when opening
571 * a file and are not returned here. */
572
573 SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
574 (SCM port),
575 "Return the port modes associated with the open port @var{port}.\n"
576 "These will not necessarily be identical to the modes used when\n"
577 "the port was opened, since modes such as \"append\" which are\n"
578 "used only during port creation are not retained.")
579 #define FUNC_NAME s_scm_port_mode
580 {
581 char modes[4];
582 modes[0] = '\0';
583
584 port = SCM_COERCE_OUTPORT (port);
585 SCM_VALIDATE_OPPORT (1, port);
586 if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
587 if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
588 strcpy (modes, "r+");
589 else
590 strcpy (modes, "r");
591 }
592 else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
593 strcpy (modes, "w");
594 if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
595 strcat (modes, "0");
596
597 return scm_from_latin1_string (modes);
598 }
599 #undef FUNC_NAME
600
601
602 \f
603
604 /* The port table --- a weak set of all ports.
605
606 We need a global registry of ports to flush them all at exit, and to
607 get all the ports matching a file descriptor. */
608 SCM scm_i_port_weak_set;
609
610
611 \f
612
613 /* Port finalization. */
614
615 struct do_free_data
616 {
617 scm_t_ptob_descriptor *ptob;
618 SCM port;
619 };
620
621 static SCM
622 do_free (void *body_data)
623 {
624 struct do_free_data *data = body_data;
625
626 /* `close' is for explicit `close-port' by user. `free' is for this
627 purpose: ports collected by the GC. */
628 data->ptob->free (data->port);
629
630 return SCM_BOOL_T;
631 }
632
633 /* Finalize the object (a port) pointed to by PTR. */
634 static void
635 finalize_port (void *ptr, void *data)
636 {
637 SCM port = SCM_PACK_POINTER (ptr);
638
639 if (!SCM_PORTP (port))
640 abort ();
641
642 if (SCM_OPENP (port))
643 {
644 struct do_free_data data;
645
646 SCM_CLR_PORT_OPEN_FLAG (port);
647
648 data.ptob = SCM_PORT_DESCRIPTOR (port);
649 data.port = port;
650
651 scm_internal_catch (SCM_BOOL_T, do_free, &data,
652 scm_handle_by_message_noexit, NULL);
653
654 scm_gc_ports_collected++;
655 }
656 }
657
658
659 \f
660
661 SCM
662 scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
663 const char *encoding,
664 scm_t_string_failed_conversion_handler handler,
665 scm_t_bits stream)
666 {
667 SCM ret;
668 scm_t_port *entry;
669 scm_t_port_internal *pti;
670 scm_t_ptob_descriptor *ptob;
671
672 entry = scm_gc_typed_calloc (scm_t_port);
673 pti = scm_gc_typed_calloc (scm_t_port_internal);
674 ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag));
675
676 ret = scm_words (tag | mode_bits, 3);
677 SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry);
678 SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob);
679
680 entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock");
681 scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive);
682
683 entry->internal = pti;
684 entry->file_name = SCM_BOOL_F;
685 entry->rw_active = SCM_PORT_NEITHER;
686 entry->port = ret;
687 entry->stream = stream;
688
689 if (encoding_matches (encoding, "UTF-8"))
690 {
691 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
692 entry->encoding = "UTF-8";
693 }
694 else if (encoding_matches (encoding, "ISO-8859-1"))
695 {
696 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
697 entry->encoding = "ISO-8859-1";
698 }
699 else
700 {
701 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
702 entry->encoding = canonicalize_encoding (encoding);
703 }
704
705 entry->ilseq_handler = handler;
706 pti->iconv_descriptors = NULL;
707
708 pti->at_stream_start_for_bom_read = 1;
709 pti->at_stream_start_for_bom_write = 1;
710
711 pti->pending_eof = 0;
712 pti->alist = SCM_EOL;
713
714 if (SCM_PORT_DESCRIPTOR (ret)->free)
715 scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL);
716
717 if (SCM_PORT_DESCRIPTOR (ret)->flags & SCM_PORT_TYPE_HAS_FLUSH)
718 scm_weak_set_add_x (scm_i_port_weak_set, ret);
719
720 return ret;
721 }
722
723 SCM
724 scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
725 {
726 return scm_c_make_port_with_encoding (tag, mode_bits,
727 scm_i_default_port_encoding (),
728 scm_i_default_port_conversion_handler (),
729 stream);
730 }
731
732 SCM
733 scm_new_port_table_entry (scm_t_bits tag)
734 {
735 return scm_c_make_port (tag, 0, 0);
736 }
737
738 \f
739
740 /* Predicates. */
741
742 SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
743 (SCM x),
744 "Return a boolean indicating whether @var{x} is a port.\n"
745 "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
746 "@var{x}))}.")
747 #define FUNC_NAME s_scm_port_p
748 {
749 return scm_from_bool (SCM_PORTP (x));
750 }
751 #undef FUNC_NAME
752
753 SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
754 (SCM x),
755 "Return @code{#t} if @var{x} is an input port, otherwise return\n"
756 "@code{#f}. Any object satisfying this predicate also satisfies\n"
757 "@code{port?}.")
758 #define FUNC_NAME s_scm_input_port_p
759 {
760 return scm_from_bool (SCM_INPUT_PORT_P (x));
761 }
762 #undef FUNC_NAME
763
764 SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
765 (SCM x),
766 "Return @code{#t} if @var{x} is an output port, otherwise return\n"
767 "@code{#f}. Any object satisfying this predicate also satisfies\n"
768 "@code{port?}.")
769 #define FUNC_NAME s_scm_output_port_p
770 {
771 x = SCM_COERCE_OUTPORT (x);
772 return scm_from_bool (SCM_OUTPUT_PORT_P (x));
773 }
774 #undef FUNC_NAME
775
776 SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
777 (SCM port),
778 "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
779 "open.")
780 #define FUNC_NAME s_scm_port_closed_p
781 {
782 SCM_VALIDATE_PORT (1, port);
783 return scm_from_bool (!SCM_OPPORTP (port));
784 }
785 #undef FUNC_NAME
786
787 SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
788 (SCM x),
789 "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
790 "return @code{#f}.")
791 #define FUNC_NAME s_scm_eof_object_p
792 {
793 return scm_from_bool (SCM_EOF_OBJECT_P (x));
794 }
795 #undef FUNC_NAME
796
797
798 \f
799
800 /* Closing ports. */
801
802 static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
803
804 /* scm_close_port
805 * Call the close operation on a port object.
806 * see also scm_close.
807 */
808 SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
809 (SCM port),
810 "Close the specified port object. Return @code{#t} if it\n"
811 "successfully closes a port or @code{#f} if it was already\n"
812 "closed. An exception may be raised if an error occurs, for\n"
813 "example when flushing buffered output. See also @ref{Ports and\n"
814 "File Descriptors, close}, for a procedure which can close file\n"
815 "descriptors.")
816 #define FUNC_NAME s_scm_close_port
817 {
818 scm_t_port_internal *pti;
819 int rv;
820
821 port = SCM_COERCE_OUTPORT (port);
822
823 SCM_VALIDATE_PORT (1, port);
824 if (SCM_CLOSEDP (port))
825 return SCM_BOOL_F;
826
827 pti = SCM_PORT_GET_INTERNAL (port);
828 SCM_CLR_PORT_OPEN_FLAG (port);
829
830 if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH)
831 scm_weak_set_remove_x (scm_i_port_weak_set, port);
832
833 if (SCM_PORT_DESCRIPTOR (port)->close)
834 /* Note! This may throw an exception. Anything after this point
835 should be resilient to non-local exits. */
836 rv = SCM_PORT_DESCRIPTOR (port)->close (port);
837 else
838 rv = 0;
839
840 if (pti->iconv_descriptors)
841 {
842 /* If we don't get here, the iconv_descriptors finalizer will
843 clean up. */
844 close_iconv_descriptors (pti->iconv_descriptors);
845 pti->iconv_descriptors = NULL;
846 }
847
848 return scm_from_bool (rv >= 0);
849 }
850 #undef FUNC_NAME
851
852 SCM_DEFINE (scm_close_input_port, "close-input-port", 1, 0, 0,
853 (SCM port),
854 "Close the specified input port object. The routine has no effect if\n"
855 "the file has already been closed. An exception may be raised if an\n"
856 "error occurs. The value returned is unspecified.\n\n"
857 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
858 "which can close file descriptors.")
859 #define FUNC_NAME s_scm_close_input_port
860 {
861 SCM_VALIDATE_INPUT_PORT (1, port);
862 scm_close_port (port);
863 return SCM_UNSPECIFIED;
864 }
865 #undef FUNC_NAME
866
867 SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0,
868 (SCM port),
869 "Close the specified output port object. The routine has no effect if\n"
870 "the file has already been closed. An exception may be raised if an\n"
871 "error occurs. The value returned is unspecified.\n\n"
872 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
873 "which can close file descriptors.")
874 #define FUNC_NAME s_scm_close_output_port
875 {
876 port = SCM_COERCE_OUTPORT (port);
877 SCM_VALIDATE_OUTPUT_PORT (1, port);
878 scm_close_port (port);
879 return SCM_UNSPECIFIED;
880 }
881 #undef FUNC_NAME
882
883
884 \f
885
886 /* Encoding characters to byte streams, and decoding byte streams to
887 characters. */
888
889 /* A fluid specifying the default encoding for newly created ports. If it is
890 a string, that is the encoding. If it is #f, it is in the "native"
891 (Latin-1) encoding. */
892 SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
893
894 static int scm_port_encoding_init = 0;
895
896 /* Use ENCODING as the default encoding for future ports. */
897 void
898 scm_i_set_default_port_encoding (const char *encoding)
899 {
900 if (!scm_port_encoding_init
901 || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
902 scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
903 SCM_EOL);
904
905 if (encoding_matches (encoding, "ISO-8859-1"))
906 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
907 else
908 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
909 scm_from_latin1_string (canonicalize_encoding (encoding)));
910 }
911
912 /* Return the name of the default encoding for newly created ports. */
913 const char *
914 scm_i_default_port_encoding (void)
915 {
916 if (!scm_port_encoding_init)
917 return "ISO-8859-1";
918 else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
919 return "ISO-8859-1";
920 else
921 {
922 SCM encoding;
923
924 encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
925 if (!scm_is_string (encoding))
926 return "ISO-8859-1";
927 else
928 return scm_i_string_chars (encoding);
929 }
930 }
931
932 /* A fluid specifying the default conversion handler for newly created
933 ports. Its value should be one of the symbols below. */
934 SCM_VARIABLE (default_conversion_strategy_var,
935 "%default-port-conversion-strategy");
936
937 /* Whether the above fluid is initialized. */
938 static int scm_conversion_strategy_init = 0;
939
940 /* The possible conversion strategies. */
941 SCM_SYMBOL (sym_error, "error");
942 SCM_SYMBOL (sym_substitute, "substitute");
943 SCM_SYMBOL (sym_escape, "escape");
944
945 /* Return the default failed encoding conversion policy for new created
946 ports. */
947 scm_t_string_failed_conversion_handler
948 scm_i_default_port_conversion_handler (void)
949 {
950 scm_t_string_failed_conversion_handler handler;
951
952 if (!scm_conversion_strategy_init
953 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
954 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
955 else
956 {
957 SCM fluid, value;
958
959 fluid = SCM_VARIABLE_REF (default_conversion_strategy_var);
960 value = scm_fluid_ref (fluid);
961
962 if (scm_is_eq (sym_substitute, value))
963 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
964 else if (scm_is_eq (sym_escape, value))
965 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
966 else
967 /* Default to 'error also when the fluid's value is not one of
968 the valid symbols. */
969 handler = SCM_FAILED_CONVERSION_ERROR;
970 }
971
972 return handler;
973 }
974
975 /* Use HANDLER as the default conversion strategy for future ports. */
976 void
977 scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler
978 handler)
979 {
980 SCM strategy;
981
982 if (!scm_conversion_strategy_init
983 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
984 scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
985 SCM_EOL);
986
987 switch (handler)
988 {
989 case SCM_FAILED_CONVERSION_ERROR:
990 strategy = sym_error;
991 break;
992
993 case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
994 strategy = sym_escape;
995 break;
996
997 case SCM_FAILED_CONVERSION_QUESTION_MARK:
998 strategy = sym_substitute;
999 break;
1000
1001 default:
1002 abort ();
1003 }
1004
1005 scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var),
1006 strategy);
1007 }
1008
1009 static void
1010 scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port);
1011
1012 /* If the next LEN bytes from PORT are equal to those in BYTES, then
1013 return 1, else return 0. Leave the port position unchanged. */
1014 static int
1015 looking_at_bytes (SCM port, const unsigned char *bytes, int len)
1016 {
1017 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1018 int i = 0;
1019
1020 while (i < len && scm_peek_byte_or_eof_unlocked (port) == bytes[i])
1021 {
1022 pt->read_pos++;
1023 i++;
1024 }
1025 scm_i_unget_bytes_unlocked (bytes, i, port);
1026 return (i == len);
1027 }
1028
1029 static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF};
1030 static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
1031 static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
1032 static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
1033 static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
1034
1035 /* Decide what byte order to use for a UTF-16 port. Return "UTF-16BE"
1036 or "UTF-16LE". MODE must be either SCM_PORT_READ or SCM_PORT_WRITE,
1037 and specifies which operation is about to be done. The MODE
1038 determines how we will decide the byte order. We deliberately avoid
1039 reading from the port unless the user is about to do so. If the user
1040 is about to read, then we look for a BOM, and if present, we use it
1041 to determine the byte order. Otherwise we choose big endian, as
1042 recommended by the Unicode Standard. Note that the BOM (if any) is
1043 not consumed here. */
1044 static const char *
1045 decide_utf16_encoding (SCM port, scm_t_port_rw_active mode)
1046 {
1047 if (mode == SCM_PORT_READ
1048 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1049 && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom))
1050 return "UTF-16LE";
1051 else
1052 return "UTF-16BE";
1053 }
1054
1055 /* Decide what byte order to use for a UTF-32 port. Return "UTF-32BE"
1056 or "UTF-32LE". See the comment above 'decide_utf16_encoding' for
1057 details. */
1058 static const char *
1059 decide_utf32_encoding (SCM port, scm_t_port_rw_active mode)
1060 {
1061 if (mode == SCM_PORT_READ
1062 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1063 && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom))
1064 return "UTF-32LE";
1065 else
1066 return "UTF-32BE";
1067 }
1068
1069 static void
1070 finalize_iconv_descriptors (void *ptr, void *data)
1071 {
1072 close_iconv_descriptors (ptr);
1073 }
1074
1075 static scm_t_iconv_descriptors *
1076 open_iconv_descriptors (const char *encoding, int reading, int writing)
1077 {
1078 scm_t_iconv_descriptors *id;
1079 iconv_t input_cd, output_cd;
1080 size_t i;
1081
1082 input_cd = (iconv_t) -1;
1083 output_cd = (iconv_t) -1;
1084
1085 for (i = 0; encoding[i]; i++)
1086 if (encoding[i] > 127)
1087 goto invalid_encoding;
1088
1089 if (reading)
1090 {
1091 /* Open an input iconv conversion descriptor, from ENCODING
1092 to UTF-8. We choose UTF-8, not UTF-32, because iconv
1093 implementations can typically convert from anything to
1094 UTF-8, but not to UTF-32 (see
1095 <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
1096
1097 /* Assume opening an iconv descriptor causes about 16 KB of
1098 allocation. */
1099 scm_gc_register_allocation (16 * 1024);
1100
1101 input_cd = iconv_open ("UTF-8", encoding);
1102 if (input_cd == (iconv_t) -1)
1103 goto invalid_encoding;
1104 }
1105
1106 if (writing)
1107 {
1108 /* Assume opening an iconv descriptor causes about 16 KB of
1109 allocation. */
1110 scm_gc_register_allocation (16 * 1024);
1111
1112 output_cd = iconv_open (encoding, "UTF-8");
1113 if (output_cd == (iconv_t) -1)
1114 {
1115 if (input_cd != (iconv_t) -1)
1116 iconv_close (input_cd);
1117 goto invalid_encoding;
1118 }
1119 }
1120
1121 id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
1122 id->input_cd = input_cd;
1123 id->output_cd = output_cd;
1124
1125 /* Register a finalizer to close the descriptors. */
1126 scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
1127
1128 return id;
1129
1130 invalid_encoding:
1131 {
1132 SCM err;
1133 err = scm_from_latin1_string (encoding);
1134 scm_misc_error ("open_iconv_descriptors",
1135 "invalid or unknown character encoding ~s",
1136 scm_list_1 (err));
1137 }
1138 }
1139
1140 static void
1141 close_iconv_descriptors (scm_t_iconv_descriptors *id)
1142 {
1143 if (id->input_cd != (iconv_t) -1)
1144 iconv_close (id->input_cd);
1145 if (id->output_cd != (iconv_t) -1)
1146 iconv_close (id->output_cd);
1147 id->input_cd = (void *) -1;
1148 id->output_cd = (void *) -1;
1149 }
1150
1151 scm_t_iconv_descriptors *
1152 scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode)
1153 {
1154 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
1155
1156 assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
1157
1158 if (!pti->iconv_descriptors)
1159 {
1160 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1161 const char *precise_encoding;
1162
1163 if (!pt->encoding)
1164 pt->encoding = "ISO-8859-1";
1165
1166 /* If the specified encoding is UTF-16 or UTF-32, then make
1167 that more precise by deciding what byte order to use. */
1168 if (strcmp (pt->encoding, "UTF-16") == 0)
1169 precise_encoding = decide_utf16_encoding (port, mode);
1170 else if (strcmp (pt->encoding, "UTF-32") == 0)
1171 precise_encoding = decide_utf32_encoding (port, mode);
1172 else
1173 precise_encoding = pt->encoding;
1174
1175 pti->iconv_descriptors =
1176 open_iconv_descriptors (precise_encoding,
1177 SCM_INPUT_PORT_P (port),
1178 SCM_OUTPUT_PORT_P (port));
1179 }
1180
1181 return pti->iconv_descriptors;
1182 }
1183
1184 /* The name of the encoding is itself encoded in ASCII. */
1185 void
1186 scm_i_set_port_encoding_x (SCM port, const char *encoding)
1187 {
1188 scm_t_port *pt;
1189 scm_t_port_internal *pti;
1190 scm_t_iconv_descriptors *prev;
1191
1192 /* Set the character encoding for this port. */
1193 pt = SCM_PTAB_ENTRY (port);
1194 pti = SCM_PORT_GET_INTERNAL (port);
1195 prev = pti->iconv_descriptors;
1196
1197 /* In order to handle cases where the encoding changes mid-stream
1198 (e.g. within an HTTP stream, or within a file that is composed of
1199 segments with different encodings), we consider this to be "stream
1200 start" for purposes of BOM handling, regardless of our actual file
1201 position. */
1202 pti->at_stream_start_for_bom_read = 1;
1203 pti->at_stream_start_for_bom_write = 1;
1204
1205 if (encoding_matches (encoding, "UTF-8"))
1206 {
1207 pt->encoding = "UTF-8";
1208 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
1209 }
1210 else if (encoding_matches (encoding, "ISO-8859-1"))
1211 {
1212 pt->encoding = "ISO-8859-1";
1213 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
1214 }
1215 else
1216 {
1217 pt->encoding = canonicalize_encoding (encoding);
1218 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
1219 }
1220
1221 pti->iconv_descriptors = NULL;
1222 if (prev)
1223 close_iconv_descriptors (prev);
1224 }
1225
1226 SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
1227 (SCM port),
1228 "Returns, as a string, the character encoding that @var{port}\n"
1229 "uses to interpret its input and output.\n")
1230 #define FUNC_NAME s_scm_port_encoding
1231 {
1232 SCM_VALIDATE_PORT (1, port);
1233
1234 return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding);
1235 }
1236 #undef FUNC_NAME
1237
1238 SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
1239 (SCM port, SCM enc),
1240 "Sets the character encoding that will be used to interpret all\n"
1241 "port I/O. New ports are created with the encoding\n"
1242 "appropriate for the current locale if @code{setlocale} has \n"
1243 "been called or ISO-8859-1 otherwise\n"
1244 "and this procedure can be used to modify that encoding.\n")
1245 #define FUNC_NAME s_scm_set_port_encoding_x
1246 {
1247 char *enc_str;
1248
1249 SCM_VALIDATE_PORT (1, port);
1250 SCM_VALIDATE_STRING (2, enc);
1251
1252 enc_str = scm_to_latin1_string (enc);
1253 scm_i_set_port_encoding_x (port, enc_str);
1254 free (enc_str);
1255
1256 return SCM_UNSPECIFIED;
1257 }
1258 #undef FUNC_NAME
1259
1260 SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
1261 1, 0, 0, (SCM port),
1262 "Returns the behavior of the port when handling a character that\n"
1263 "is not representable in the port's current encoding.\n"
1264 "It returns the symbol @code{error} if unrepresentable characters\n"
1265 "should cause exceptions, @code{substitute} if the port should\n"
1266 "try to replace unrepresentable characters with question marks or\n"
1267 "approximate characters, or @code{escape} if unrepresentable\n"
1268 "characters should be converted to string escapes.\n"
1269 "\n"
1270 "If @var{port} is @code{#f}, then the current default behavior\n"
1271 "will be returned. New ports will have this default behavior\n"
1272 "when they are created.\n")
1273 #define FUNC_NAME s_scm_port_conversion_strategy
1274 {
1275 scm_t_string_failed_conversion_handler h;
1276
1277 SCM_VALIDATE_OPPORT (1, port);
1278
1279 if (scm_is_false (port))
1280 h = scm_i_default_port_conversion_handler ();
1281 else
1282 {
1283 scm_t_port *pt;
1284
1285 SCM_VALIDATE_OPPORT (1, port);
1286 pt = SCM_PTAB_ENTRY (port);
1287
1288 h = pt->ilseq_handler;
1289 }
1290
1291 if (h == SCM_FAILED_CONVERSION_ERROR)
1292 return scm_from_latin1_symbol ("error");
1293 else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
1294 return scm_from_latin1_symbol ("substitute");
1295 else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1296 return scm_from_latin1_symbol ("escape");
1297 else
1298 abort ();
1299
1300 /* Never gets here. */
1301 return SCM_UNDEFINED;
1302 }
1303 #undef FUNC_NAME
1304
1305 SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
1306 2, 0, 0,
1307 (SCM port, SCM sym),
1308 "Sets the behavior of the interpreter when outputting a character\n"
1309 "that is not representable in the port's current encoding.\n"
1310 "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
1311 "@code{'escape}. If it is @code{'error}, an error will be thrown\n"
1312 "when an unconvertible character is encountered. If it is\n"
1313 "@code{'substitute}, then unconvertible characters will \n"
1314 "be replaced with approximate characters, or with question marks\n"
1315 "if no approximately correct character is available.\n"
1316 "If it is @code{'escape},\n"
1317 "it will appear as a hex escape when output.\n"
1318 "\n"
1319 "If @var{port} is an open port, the conversion error behavior\n"
1320 "is set for that port. If it is @code{#f}, it is set as the\n"
1321 "default behavior for any future ports that get created in\n"
1322 "this thread.\n")
1323 #define FUNC_NAME s_scm_set_port_conversion_strategy_x
1324 {
1325 scm_t_string_failed_conversion_handler handler;
1326
1327 if (scm_is_eq (sym, sym_error))
1328 handler = SCM_FAILED_CONVERSION_ERROR;
1329 else if (scm_is_eq (sym, sym_substitute))
1330 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
1331 else if (scm_is_eq (sym, sym_escape))
1332 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
1333 else
1334 SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
1335
1336 if (scm_is_false (port))
1337 scm_i_set_default_port_conversion_handler (handler);
1338 else
1339 {
1340 SCM_VALIDATE_OPPORT (1, port);
1341 SCM_PTAB_ENTRY (port)->ilseq_handler = handler;
1342 }
1343
1344 return SCM_UNSPECIFIED;
1345 }
1346 #undef FUNC_NAME
1347
1348
1349 \f
1350
1351 /* The port lock. */
1352
1353 static void
1354 lock_port (void *mutex)
1355 {
1356 scm_i_pthread_mutex_lock (mutex);
1357 }
1358
1359 static void
1360 unlock_port (void *mutex)
1361 {
1362 scm_i_pthread_mutex_unlock (mutex);
1363 }
1364
1365 void
1366 scm_dynwind_lock_port (SCM port)
1367 #define FUNC_NAME "dynwind-lock-port"
1368 {
1369 scm_i_pthread_mutex_t *lock;
1370 SCM_VALIDATE_OPPORT (SCM_ARG1, port);
1371 scm_c_lock_port (port, &lock);
1372 if (lock)
1373 {
1374 scm_dynwind_unwind_handler (unlock_port, lock, SCM_F_WIND_EXPLICITLY);
1375 scm_dynwind_rewind_handler (lock_port, lock, 0);
1376 }
1377 }
1378 #undef FUNC_NAME
1379
1380
1381 \f
1382
1383 /* Input. */
1384
1385 int
1386 scm_get_byte_or_eof (SCM port)
1387 {
1388 scm_i_pthread_mutex_t *lock;
1389 int ret;
1390
1391 scm_c_lock_port (port, &lock);
1392 ret = scm_get_byte_or_eof_unlocked (port);
1393 if (lock)
1394 scm_i_pthread_mutex_unlock (lock);
1395
1396 return ret;
1397 }
1398
1399 int
1400 scm_peek_byte_or_eof (SCM port)
1401 {
1402 scm_i_pthread_mutex_t *lock;
1403 int ret;
1404
1405 scm_c_lock_port (port, &lock);
1406 ret = scm_peek_byte_or_eof_unlocked (port);
1407 if (lock)
1408 scm_i_pthread_mutex_unlock (lock);
1409
1410 return ret;
1411 }
1412
1413 /* scm_c_read
1414 *
1415 * Used by an application to read arbitrary number of bytes from an
1416 * SCM port. Same semantics as libc read, except that scm_c_read only
1417 * returns less than SIZE bytes if at end-of-file.
1418 *
1419 * Warning: Doesn't update port line and column counts! */
1420
1421 /* This structure, and the following swap_buffer function, are used
1422 for temporarily swapping a port's own read buffer, and the buffer
1423 that the caller of scm_c_read provides. */
1424 struct port_and_swap_buffer
1425 {
1426 scm_t_port *pt;
1427 unsigned char *buffer;
1428 size_t size;
1429 };
1430
1431 static void
1432 swap_buffer (void *data)
1433 {
1434 struct port_and_swap_buffer *psb = (struct port_and_swap_buffer *) data;
1435 unsigned char *old_buf = psb->pt->read_buf;
1436 size_t old_size = psb->pt->read_buf_size;
1437
1438 /* Make the port use (buffer, size) from the struct. */
1439 psb->pt->read_pos = psb->pt->read_buf = psb->pt->read_end = psb->buffer;
1440 psb->pt->read_buf_size = psb->size;
1441
1442 /* Save the port's old (buffer, size) in the struct. */
1443 psb->buffer = old_buf;
1444 psb->size = old_size;
1445 }
1446
1447 static int scm_i_fill_input_unlocked (SCM port);
1448
1449 size_t
1450 scm_c_read_unlocked (SCM port, void *buffer, size_t size)
1451 #define FUNC_NAME "scm_c_read"
1452 {
1453 scm_t_port *pt;
1454 scm_t_port_internal *pti;
1455 size_t n_read = 0, n_available;
1456 struct port_and_swap_buffer psb;
1457
1458 SCM_VALIDATE_OPINPORT (1, port);
1459
1460 pt = SCM_PTAB_ENTRY (port);
1461 pti = SCM_PORT_GET_INTERNAL (port);
1462 if (pt->rw_active == SCM_PORT_WRITE)
1463 SCM_PORT_DESCRIPTOR (port)->flush (port);
1464
1465 if (pt->rw_random)
1466 pt->rw_active = SCM_PORT_READ;
1467
1468 /* Take bytes first from the port's read buffer. */
1469 if (pt->read_pos < pt->read_end)
1470 {
1471 n_available = min (size, pt->read_end - pt->read_pos);
1472 memcpy (buffer, pt->read_pos, n_available);
1473 buffer = (char *) buffer + n_available;
1474 pt->read_pos += n_available;
1475 n_read += n_available;
1476 size -= n_available;
1477 }
1478
1479 /* Avoid the scm_dynwind_* costs if we now have enough data. */
1480 if (size == 0)
1481 return n_read;
1482
1483 /* Now we will call scm_i_fill_input_unlocked repeatedly until we have
1484 read the requested number of bytes. (Note that a single
1485 scm_i_fill_input_unlocked call does not guarantee to fill the whole
1486 of the port's read buffer.) */
1487 if (pt->read_buf_size <= 1
1488 && pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
1489 {
1490 /* The port that we are reading from is unbuffered - i.e. does not
1491 have its own persistent buffer - but we have a buffer, provided
1492 by our caller, that is the right size for the data that is
1493 wanted. For the following scm_i_fill_input_unlocked calls,
1494 therefore, we use the buffer in hand as the port's read buffer.
1495
1496 We need to make sure that the port's normal (1 byte) buffer is
1497 reinstated in case one of the scm_i_fill_input_unlocked ()
1498 calls throws an exception; we use the scm_dynwind_* API to
1499 achieve that.
1500
1501 A consequence of this optimization is that the fill_input
1502 functions can't unget characters. That'll push data to the
1503 pushback buffer instead of this psb buffer. */
1504 #if SCM_DEBUG == 1
1505 unsigned char *pback = pt->putback_buf;
1506 #endif
1507 psb.pt = pt;
1508 psb.buffer = buffer;
1509 psb.size = size;
1510 scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
1511 scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1512 scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1513
1514 /* Call scm_i_fill_input_unlocked until we have all the bytes that
1515 we need, or we hit EOF. */
1516 while (pt->read_buf_size && (scm_i_fill_input_unlocked (port) != EOF))
1517 {
1518 pt->read_buf_size -= (pt->read_end - pt->read_pos);
1519 pt->read_pos = pt->read_buf = pt->read_end;
1520 }
1521 #if SCM_DEBUG == 1
1522 if (pback != pt->putback_buf
1523 || pt->read_buf - (unsigned char *) buffer < 0)
1524 scm_misc_error (FUNC_NAME,
1525 "scm_c_read must not call a fill function that pushes "
1526 "back characters onto an unbuffered port", SCM_EOL);
1527 #endif
1528 n_read += pt->read_buf - (unsigned char *) buffer;
1529
1530 /* Reinstate the port's normal buffer. */
1531 scm_dynwind_end ();
1532 }
1533 else
1534 {
1535 /* The port has its own buffer. It is important that we use it,
1536 even if it happens to be smaller than our caller's buffer, so
1537 that a custom port implementation's entry points (in
1538 particular, fill_input) can rely on the buffer always being
1539 the same as they first set up. */
1540 while (size && (scm_i_fill_input_unlocked (port) != EOF))
1541 {
1542 n_available = min (size, pt->read_end - pt->read_pos);
1543 memcpy (buffer, pt->read_pos, n_available);
1544 buffer = (char *) buffer + n_available;
1545 pt->read_pos += n_available;
1546 n_read += n_available;
1547 size -= n_available;
1548 }
1549 }
1550
1551 return n_read;
1552 }
1553 #undef FUNC_NAME
1554
1555 size_t
1556 scm_c_read (SCM port, void *buffer, size_t size)
1557 {
1558 scm_i_pthread_mutex_t *lock;
1559 size_t ret;
1560
1561 scm_c_lock_port (port, &lock);
1562 ret = scm_c_read_unlocked (port, buffer, size);
1563 if (lock)
1564 scm_i_pthread_mutex_unlock (lock);
1565
1566
1567 return ret;
1568 }
1569
1570 /* Update the line and column number of PORT after consumption of C. */
1571 static inline void
1572 update_port_lf (scm_t_wchar c, SCM port)
1573 {
1574 switch (c)
1575 {
1576 case '\a':
1577 case EOF:
1578 break;
1579 case '\b':
1580 SCM_DECCOL (port);
1581 break;
1582 case '\n':
1583 SCM_INCLINE (port);
1584 break;
1585 case '\r':
1586 SCM_ZEROCOL (port);
1587 break;
1588 case '\t':
1589 SCM_TABCOL (port);
1590 break;
1591 default:
1592 SCM_INCCOL (port);
1593 break;
1594 }
1595 }
1596
1597 #define SCM_MBCHAR_BUF_SIZE (4)
1598
1599 /* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
1600 UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
1601 static scm_t_wchar
1602 utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
1603 {
1604 scm_t_wchar codepoint;
1605
1606 if (utf8_buf[0] <= 0x7f)
1607 {
1608 assert (size == 1);
1609 codepoint = utf8_buf[0];
1610 }
1611 else if ((utf8_buf[0] & 0xe0) == 0xc0)
1612 {
1613 assert (size == 2);
1614 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
1615 | (utf8_buf[1] & 0x3f);
1616 }
1617 else if ((utf8_buf[0] & 0xf0) == 0xe0)
1618 {
1619 assert (size == 3);
1620 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
1621 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
1622 | (utf8_buf[2] & 0x3f);
1623 }
1624 else
1625 {
1626 assert (size == 4);
1627 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
1628 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
1629 | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
1630 | (utf8_buf[3] & 0x3f);
1631 }
1632
1633 return codepoint;
1634 }
1635
1636 /* Read a UTF-8 sequence from PORT. On success, return 0 and set
1637 *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
1638 representation, and set *LEN to the length in bytes. Return
1639 `EILSEQ' on error. */
1640 static int
1641 get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
1642 scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1643 {
1644 #define ASSERT_NOT_EOF(b) \
1645 if (SCM_UNLIKELY ((b) == EOF)) \
1646 goto invalid_seq
1647 #define CONSUME_PEEKED_BYTE() \
1648 pt->read_pos++
1649
1650 int byte;
1651 scm_t_port *pt;
1652
1653 *len = 0;
1654 pt = SCM_PTAB_ENTRY (port);
1655
1656 byte = scm_get_byte_or_eof_unlocked (port);
1657 if (byte == EOF)
1658 {
1659 *codepoint = EOF;
1660 return 0;
1661 }
1662
1663 buf[0] = (scm_t_uint8) byte;
1664 *len = 1;
1665
1666 if (buf[0] <= 0x7f)
1667 /* 1-byte form. */
1668 *codepoint = buf[0];
1669 else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
1670 {
1671 /* 2-byte form. */
1672 byte = scm_peek_byte_or_eof_unlocked (port);
1673 ASSERT_NOT_EOF (byte);
1674
1675 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1676 goto invalid_seq;
1677
1678 CONSUME_PEEKED_BYTE ();
1679 buf[1] = (scm_t_uint8) byte;
1680 *len = 2;
1681
1682 *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
1683 | (buf[1] & 0x3f);
1684 }
1685 else if ((buf[0] & 0xf0) == 0xe0)
1686 {
1687 /* 3-byte form. */
1688 byte = scm_peek_byte_or_eof_unlocked (port);
1689 ASSERT_NOT_EOF (byte);
1690
1691 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
1692 || (buf[0] == 0xe0 && byte < 0xa0)
1693 || (buf[0] == 0xed && byte > 0x9f)))
1694 goto invalid_seq;
1695
1696 CONSUME_PEEKED_BYTE ();
1697 buf[1] = (scm_t_uint8) byte;
1698 *len = 2;
1699
1700 byte = scm_peek_byte_or_eof_unlocked (port);
1701 ASSERT_NOT_EOF (byte);
1702
1703 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1704 goto invalid_seq;
1705
1706 CONSUME_PEEKED_BYTE ();
1707 buf[2] = (scm_t_uint8) byte;
1708 *len = 3;
1709
1710 *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
1711 | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
1712 | (buf[2] & 0x3f);
1713 }
1714 else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
1715 {
1716 /* 4-byte form. */
1717 byte = scm_peek_byte_or_eof_unlocked (port);
1718 ASSERT_NOT_EOF (byte);
1719
1720 if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
1721 || (buf[0] == 0xf0 && byte < 0x90)
1722 || (buf[0] == 0xf4 && byte > 0x8f)))
1723 goto invalid_seq;
1724
1725 CONSUME_PEEKED_BYTE ();
1726 buf[1] = (scm_t_uint8) byte;
1727 *len = 2;
1728
1729 byte = scm_peek_byte_or_eof_unlocked (port);
1730 ASSERT_NOT_EOF (byte);
1731
1732 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1733 goto invalid_seq;
1734
1735 CONSUME_PEEKED_BYTE ();
1736 buf[2] = (scm_t_uint8) byte;
1737 *len = 3;
1738
1739 byte = scm_peek_byte_or_eof_unlocked (port);
1740 ASSERT_NOT_EOF (byte);
1741
1742 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1743 goto invalid_seq;
1744
1745 CONSUME_PEEKED_BYTE ();
1746 buf[3] = (scm_t_uint8) byte;
1747 *len = 4;
1748
1749 *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
1750 | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
1751 | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
1752 | (buf[3] & 0x3f);
1753 }
1754 else
1755 goto invalid_seq;
1756
1757 return 0;
1758
1759 invalid_seq:
1760 /* Here we could choose the consume the faulty byte when it's not a
1761 valid starting byte, but it's not a requirement. What Section 3.9
1762 of Unicode 6.0.0 mandates, though, is to not consume a byte that
1763 would otherwise be a valid starting byte. */
1764
1765 return EILSEQ;
1766
1767 #undef CONSUME_PEEKED_BYTE
1768 #undef ASSERT_NOT_EOF
1769 }
1770
1771 /* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
1772 0 and set *CODEPOINT to the codepoint that was read, fill BUF with
1773 its UTF-8 representation, and set *LEN to the length in bytes.
1774 Return `EILSEQ' on error. */
1775 static int
1776 get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
1777 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1778 {
1779 *codepoint = scm_get_byte_or_eof_unlocked (port);
1780
1781 if (*codepoint == EOF)
1782 *len = 0;
1783 else
1784 {
1785 *len = 1;
1786 buf[0] = *codepoint;
1787 }
1788 return 0;
1789 }
1790
1791 /* Likewise, read a byte sequence from PORT, passing it through its
1792 input conversion descriptor. */
1793 static int
1794 get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
1795 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1796 {
1797 scm_t_iconv_descriptors *id;
1798 scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
1799 size_t input_size = 0;
1800
1801 id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ);
1802
1803 for (;;)
1804 {
1805 int byte_read;
1806 char *input, *output;
1807 size_t input_left, output_left, done;
1808
1809 byte_read = scm_get_byte_or_eof_unlocked (port);
1810 if (SCM_UNLIKELY (byte_read == EOF))
1811 {
1812 if (SCM_LIKELY (input_size == 0))
1813 {
1814 *codepoint = (scm_t_wchar) EOF;
1815 *len = input_size;
1816 return 0;
1817 }
1818 else
1819 {
1820 /* EOF found in the middle of a multibyte character. */
1821 scm_i_set_pending_eof (port);
1822 return EILSEQ;
1823 }
1824 }
1825
1826 buf[input_size++] = byte_read;
1827
1828 input = buf;
1829 input_left = input_size;
1830 output = (char *) utf8_buf;
1831 output_left = sizeof (utf8_buf);
1832
1833 done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
1834
1835 if (done == (size_t) -1)
1836 {
1837 int err = errno;
1838 if (SCM_LIKELY (err == EINVAL))
1839 /* The input byte sequence did not form a complete
1840 character. Read another byte and try again. */
1841 continue;
1842 else
1843 return err;
1844 }
1845 else
1846 {
1847 size_t output_size = sizeof (utf8_buf) - output_left;
1848 if (SCM_LIKELY (output_size > 0))
1849 {
1850 /* iconv generated output. Convert the UTF8_BUF sequence
1851 to a Unicode code point. */
1852 *codepoint = utf8_to_codepoint (utf8_buf, output_size);
1853 *len = input_size;
1854 return 0;
1855 }
1856 else
1857 {
1858 /* iconv consumed some bytes without producing any output.
1859 Most likely this means that a Unicode byte-order mark
1860 (BOM) was consumed, which should not be included in the
1861 returned buf. Shift any remaining bytes to the beginning
1862 of buf, and continue the loop. */
1863 memmove (buf, input, input_left);
1864 input_size = input_left;
1865 continue;
1866 }
1867 }
1868 }
1869 }
1870
1871 /* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
1872 with the byte representation of the codepoint in PORT's encoding, and
1873 set *LEN to the length in bytes of that representation. Return 0 on
1874 success and an errno value on error. */
1875 static SCM_C_INLINE int
1876 get_codepoint (SCM port, scm_t_wchar *codepoint,
1877 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1878 {
1879 int err;
1880 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1881 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
1882
1883 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
1884 err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
1885 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
1886 err = get_latin1_codepoint (port, codepoint, buf, len);
1887 else
1888 err = get_iconv_codepoint (port, codepoint, buf, len);
1889
1890 if (SCM_LIKELY (err == 0))
1891 {
1892 if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
1893 {
1894 /* Record that we're no longer at stream start. */
1895 pti->at_stream_start_for_bom_read = 0;
1896 if (pt->rw_random)
1897 pti->at_stream_start_for_bom_write = 0;
1898
1899 /* If we just read a BOM in an encoding that recognizes them,
1900 then silently consume it and read another code point. */
1901 if (SCM_UNLIKELY
1902 (*codepoint == SCM_UNICODE_BOM
1903 && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
1904 || strcmp (pt->encoding, "UTF-16") == 0
1905 || strcmp (pt->encoding, "UTF-32") == 0)))
1906 return get_codepoint (port, codepoint, buf, len);
1907 }
1908 update_port_lf (*codepoint, port);
1909 }
1910 else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
1911 {
1912 *codepoint = '?';
1913 err = 0;
1914 update_port_lf (*codepoint, port);
1915 }
1916
1917 return err;
1918 }
1919
1920 /* Read a codepoint from PORT and return it. */
1921 scm_t_wchar
1922 scm_getc_unlocked (SCM port)
1923 #define FUNC_NAME "scm_getc"
1924 {
1925 int err;
1926 size_t len;
1927 scm_t_wchar codepoint;
1928 char buf[SCM_MBCHAR_BUF_SIZE];
1929
1930 err = get_codepoint (port, &codepoint, buf, &len);
1931 if (SCM_UNLIKELY (err != 0))
1932 /* At this point PORT should point past the invalid encoding, as per
1933 R6RS-lib Section 8.2.4. */
1934 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
1935
1936 return codepoint;
1937 }
1938 #undef FUNC_NAME
1939
1940 scm_t_wchar
1941 scm_getc (SCM port)
1942 {
1943 scm_i_pthread_mutex_t *lock;
1944 scm_t_wchar ret;
1945
1946 scm_c_lock_port (port, &lock);
1947 ret = scm_getc_unlocked (port);
1948 if (lock)
1949 scm_i_pthread_mutex_unlock (lock);
1950
1951
1952 return ret;
1953 }
1954
1955 SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
1956 (SCM port),
1957 "Return the next character available from @var{port}, updating\n"
1958 "@var{port} to point to the following character. If no more\n"
1959 "characters are available, the end-of-file object is returned.\n"
1960 "\n"
1961 "When @var{port}'s data cannot be decoded according to its\n"
1962 "character encoding, a @code{decoding-error} is raised and\n"
1963 "@var{port} points past the erroneous byte sequence.\n")
1964 #define FUNC_NAME s_scm_read_char
1965 {
1966 scm_t_wchar c;
1967 if (SCM_UNBNDP (port))
1968 port = scm_current_input_port ();
1969 SCM_VALIDATE_OPINPORT (1, port);
1970 c = scm_getc_unlocked (port);
1971 if (EOF == c)
1972 return SCM_EOF_VAL;
1973 return SCM_MAKE_CHAR (c);
1974 }
1975 #undef FUNC_NAME
1976
1977
1978 \f
1979
1980 /* Pushback. */
1981 \f
1982
1983
1984 static void
1985 scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
1986 #define FUNC_NAME "scm_unget_bytes"
1987 {
1988 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1989 size_t old_len, new_len;
1990
1991 scm_i_clear_pending_eof (port);
1992
1993 if (pt->read_buf != pt->putback_buf)
1994 /* switch to the put-back buffer. */
1995 {
1996 if (pt->putback_buf == NULL)
1997 {
1998 pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
1999 ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
2000 pt->putback_buf
2001 = (unsigned char *) scm_gc_malloc_pointerless
2002 (pt->putback_buf_size, "putback buffer");
2003 }
2004
2005 pt->saved_read_buf = pt->read_buf;
2006 pt->saved_read_pos = pt->read_pos;
2007 pt->saved_read_end = pt->read_end;
2008 pt->saved_read_buf_size = pt->read_buf_size;
2009
2010 /* Put read_pos at the end of the buffer, so that ungets will not
2011 have to shift the buffer contents each time. */
2012 pt->read_buf = pt->putback_buf;
2013 pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
2014 pt->read_buf_size = pt->putback_buf_size;
2015 }
2016
2017 old_len = pt->read_end - pt->read_pos;
2018 new_len = old_len + len;
2019
2020 if (new_len > pt->read_buf_size)
2021 /* The putback buffer needs to be enlarged. */
2022 {
2023 size_t new_buf_size;
2024 unsigned char *new_buf, *new_end, *new_pos;
2025
2026 new_buf_size = pt->read_buf_size * 2;
2027 if (new_buf_size < new_len)
2028 new_buf_size = new_len;
2029
2030 new_buf = (unsigned char *)
2031 scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
2032
2033 /* Put the bytes at the end of the buffer, so that future
2034 ungets won't need to shift the buffer. */
2035 new_end = new_buf + new_buf_size;
2036 new_pos = new_end - old_len;
2037 memcpy (new_pos, pt->read_pos, old_len);
2038
2039 pt->read_buf = pt->putback_buf = new_buf;
2040 pt->read_pos = new_pos;
2041 pt->read_end = new_end;
2042 pt->read_buf_size = pt->putback_buf_size = new_buf_size;
2043 }
2044 else if (pt->read_buf + len < pt->read_pos)
2045 /* If needed, shift the existing buffer contents up.
2046 This should not happen unless some external code
2047 manipulates the putback buffer pointers. */
2048 {
2049 unsigned char *new_end = pt->read_buf + pt->read_buf_size;
2050 unsigned char *new_pos = new_end - old_len;
2051
2052 memmove (new_pos, pt->read_pos, old_len);
2053 pt->read_pos = new_pos;
2054 pt->read_end = new_end;
2055 }
2056
2057 /* Move read_pos back and copy the bytes there. */
2058 pt->read_pos -= len;
2059 memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
2060
2061 if (pt->rw_active == SCM_PORT_WRITE)
2062 scm_flush (port);
2063
2064 if (pt->rw_random)
2065 pt->rw_active = SCM_PORT_READ;
2066 }
2067 #undef FUNC_NAME
2068
2069 void
2070 scm_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
2071 {
2072 scm_i_unget_bytes_unlocked (buf, len, port);
2073 }
2074
2075 void
2076 scm_unget_byte_unlocked (int c, SCM port)
2077 {
2078 unsigned char byte = c;
2079 scm_i_unget_bytes_unlocked (&byte, 1, port);
2080 }
2081
2082 void
2083 scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
2084 {
2085 scm_i_pthread_mutex_t *lock;
2086 scm_c_lock_port (port, &lock);
2087 scm_i_unget_bytes_unlocked (buf, len, port);
2088 if (lock)
2089 scm_i_pthread_mutex_unlock (lock);
2090 }
2091
2092 void
2093 scm_unget_byte (int c, SCM port)
2094 {
2095 unsigned char byte = c;
2096 scm_i_pthread_mutex_t *lock;
2097 scm_c_lock_port (port, &lock);
2098 scm_i_unget_bytes_unlocked (&byte, 1, port);
2099 if (lock)
2100 scm_i_pthread_mutex_unlock (lock);
2101 }
2102
2103 void
2104 scm_ungetc_unlocked (scm_t_wchar c, SCM port)
2105 #define FUNC_NAME "scm_ungetc"
2106 {
2107 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2108 char *result;
2109 char result_buf[10];
2110 size_t len;
2111
2112 len = sizeof (result_buf);
2113 result = u32_conv_to_encoding (pt->encoding,
2114 (enum iconv_ilseq_handler) pt->ilseq_handler,
2115 (uint32_t *) &c, 1, NULL,
2116 result_buf, &len);
2117
2118 if (SCM_UNLIKELY (result == NULL || len == 0))
2119 scm_encoding_error (FUNC_NAME, errno,
2120 "conversion to port encoding failed",
2121 SCM_BOOL_F, SCM_MAKE_CHAR (c));
2122
2123 scm_i_unget_bytes_unlocked ((unsigned char *) result, len, port);
2124
2125 if (SCM_UNLIKELY (result != result_buf))
2126 free (result);
2127
2128 if (c == '\n')
2129 {
2130 /* What should col be in this case?
2131 * We'll leave it at -1.
2132 */
2133 SCM_LINUM (port) -= 1;
2134 }
2135 else
2136 SCM_COL(port) -= 1;
2137 }
2138 #undef FUNC_NAME
2139
2140 void
2141 scm_ungetc (scm_t_wchar c, SCM port)
2142 {
2143 scm_i_pthread_mutex_t *lock;
2144 scm_c_lock_port (port, &lock);
2145 scm_ungetc_unlocked (c, port);
2146 if (lock)
2147 scm_i_pthread_mutex_unlock (lock);
2148
2149 }
2150
2151 void
2152 scm_ungets_unlocked (const char *s, int n, SCM port)
2153 {
2154 /* This is simple minded and inefficient, but unreading strings is
2155 * probably not a common operation, and remember that line and
2156 * column numbers have to be handled...
2157 *
2158 * Please feel free to write an optimized version!
2159 */
2160 while (n--)
2161 scm_ungetc_unlocked (s[n], port);
2162 }
2163
2164 void
2165 scm_ungets (const char *s, int n, SCM port)
2166 {
2167 scm_i_pthread_mutex_t *lock;
2168 scm_c_lock_port (port, &lock);
2169 scm_ungets_unlocked (s, n, port);
2170 if (lock)
2171 scm_i_pthread_mutex_unlock (lock);
2172
2173 }
2174
2175 SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
2176 (SCM port),
2177 "Return the next character available from @var{port},\n"
2178 "@emph{without} updating @var{port} to point to the following\n"
2179 "character. If no more characters are available, the\n"
2180 "end-of-file object is returned.\n"
2181 "\n"
2182 "The value returned by\n"
2183 "a call to @code{peek-char} is the same as the value that would\n"
2184 "have been returned by a call to @code{read-char} on the same\n"
2185 "port. The only difference is that the very next call to\n"
2186 "@code{read-char} or @code{peek-char} on that @var{port} will\n"
2187 "return the value returned by the preceding call to\n"
2188 "@code{peek-char}. In particular, a call to @code{peek-char} on\n"
2189 "an interactive port will hang waiting for input whenever a call\n"
2190 "to @code{read-char} would have hung.\n"
2191 "\n"
2192 "As for @code{read-char}, a @code{decoding-error} may be raised\n"
2193 "if such a situation occurs. However, unlike with @code{read-char},\n"
2194 "@var{port} still points at the beginning of the erroneous byte\n"
2195 "sequence when the error is raised.\n")
2196 #define FUNC_NAME s_scm_peek_char
2197 {
2198 int err;
2199 SCM result;
2200 scm_t_wchar c;
2201 char bytes[SCM_MBCHAR_BUF_SIZE];
2202 long column, line;
2203 size_t len;
2204
2205 if (SCM_UNBNDP (port))
2206 port = scm_current_input_port ();
2207 SCM_VALIDATE_OPINPORT (1, port);
2208
2209 column = SCM_COL (port);
2210 line = SCM_LINUM (port);
2211
2212 err = get_codepoint (port, &c, bytes, &len);
2213
2214 scm_i_unget_bytes_unlocked ((unsigned char *) bytes, len, port);
2215
2216 SCM_COL (port) = column;
2217 SCM_LINUM (port) = line;
2218
2219 if (SCM_UNLIKELY (err != 0))
2220 {
2221 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
2222
2223 /* Shouldn't happen since `catch' always aborts to prompt. */
2224 result = SCM_BOOL_F;
2225 }
2226 else if (c == EOF)
2227 {
2228 scm_i_set_pending_eof (port);
2229 result = SCM_EOF_VAL;
2230 }
2231 else
2232 result = SCM_MAKE_CHAR (c);
2233
2234 return result;
2235 }
2236 #undef FUNC_NAME
2237
2238 SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
2239 (SCM cobj, SCM port),
2240 "Place character @var{cobj} in @var{port} so that it will be\n"
2241 "read by the next read operation. If called multiple times, the\n"
2242 "unread characters will be read again in last-in first-out\n"
2243 "order. If @var{port} is not supplied, the current input port\n"
2244 "is used.")
2245 #define FUNC_NAME s_scm_unread_char
2246 {
2247 int c;
2248
2249 SCM_VALIDATE_CHAR (1, cobj);
2250 if (SCM_UNBNDP (port))
2251 port = scm_current_input_port ();
2252 SCM_VALIDATE_OPINPORT (2, port);
2253
2254 c = SCM_CHAR (cobj);
2255
2256 scm_ungetc_unlocked (c, port);
2257 return cobj;
2258 }
2259 #undef FUNC_NAME
2260
2261 SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
2262 (SCM str, SCM port),
2263 "Place the string @var{str} in @var{port} so that its characters will be\n"
2264 "read in subsequent read operations. If called multiple times, the\n"
2265 "unread characters will be read again in last-in first-out order. If\n"
2266 "@var{port} is not supplied, the current-input-port is used.")
2267 #define FUNC_NAME s_scm_unread_string
2268 {
2269 int n;
2270 SCM_VALIDATE_STRING (1, str);
2271 if (SCM_UNBNDP (port))
2272 port = scm_current_input_port ();
2273 SCM_VALIDATE_OPINPORT (2, port);
2274
2275 n = scm_i_string_length (str);
2276
2277 while (n--)
2278 scm_ungetc_unlocked (scm_i_string_ref (str, n), port);
2279
2280 return str;
2281 }
2282 #undef FUNC_NAME
2283
2284
2285 \f
2286
2287 /* Manipulating the buffers. */
2288
2289 /* This routine does not take any locks, as it is usually called as part
2290 of a port implementation. */
2291 void
2292 scm_port_non_buffer (scm_t_port *pt)
2293 {
2294 pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf;
2295 pt->write_buf = pt->write_pos = &pt->shortbuf;
2296 pt->read_buf_size = pt->write_buf_size = 1;
2297 pt->write_end = pt->write_buf + pt->write_buf_size;
2298 }
2299
2300 /* this should only be called when the read buffer is empty. it
2301 tries to refill the read buffer. it returns the first char from
2302 the port, which is either EOF or *(pt->read_pos). */
2303 static int
2304 scm_i_fill_input_unlocked (SCM port)
2305 {
2306 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2307 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
2308
2309 assert (pt->read_pos == pt->read_end);
2310
2311 if (pti->pending_eof)
2312 {
2313 pti->pending_eof = 0;
2314 return EOF;
2315 }
2316
2317 if (pt->read_buf == pt->putback_buf)
2318 {
2319 /* finished reading put-back chars. */
2320 pt->read_buf = pt->saved_read_buf;
2321 pt->read_pos = pt->saved_read_pos;
2322 pt->read_end = pt->saved_read_end;
2323 pt->read_buf_size = pt->saved_read_buf_size;
2324 if (pt->read_pos < pt->read_end)
2325 return *(pt->read_pos);
2326 }
2327 return SCM_PORT_DESCRIPTOR (port)->fill_input (port);
2328 }
2329
2330 int
2331 scm_fill_input (SCM port)
2332 {
2333 scm_i_pthread_mutex_t *lock;
2334 int ret;
2335
2336 scm_c_lock_port (port, &lock);
2337 ret = scm_fill_input_unlocked (port);
2338 if (lock)
2339 scm_i_pthread_mutex_unlock (lock);
2340
2341
2342 return ret;
2343 }
2344
2345 /* Slow-path fallback for 'scm_get_byte_or_eof_unlocked' */
2346 int
2347 scm_slow_get_byte_or_eof_unlocked (SCM port)
2348 {
2349 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2350
2351 if (pt->rw_active == SCM_PORT_WRITE)
2352 scm_flush_unlocked (port);
2353
2354 if (pt->rw_random)
2355 pt->rw_active = SCM_PORT_READ;
2356
2357 if (pt->read_pos >= pt->read_end)
2358 {
2359 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2360 return EOF;
2361 }
2362
2363 return *pt->read_pos++;
2364 }
2365
2366 /* Slow-path fallback for 'scm_peek_byte_or_eof_unlocked' */
2367 int
2368 scm_slow_peek_byte_or_eof_unlocked (SCM port)
2369 {
2370 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2371
2372 if (pt->rw_active == SCM_PORT_WRITE)
2373 scm_flush_unlocked (port);
2374
2375 if (pt->rw_random)
2376 pt->rw_active = SCM_PORT_READ;
2377
2378 if (pt->read_pos >= pt->read_end)
2379 {
2380 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2381 {
2382 scm_i_set_pending_eof (port);
2383 return EOF;
2384 }
2385 }
2386
2387 return *pt->read_pos;
2388 }
2389
2390 /* Move up to READ_LEN bytes from PORT's putback and/or read buffers
2391 into memory starting at DEST. Return the number of bytes moved.
2392 PORT's line/column numbers are left unchanged. */
2393 size_t
2394 scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
2395 {
2396 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2397 size_t bytes_read = 0;
2398 size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
2399
2400 if (from_buf > 0)
2401 {
2402 memcpy (dest, pt->read_pos, from_buf);
2403 pt->read_pos += from_buf;
2404 bytes_read += from_buf;
2405 read_len -= from_buf;
2406 dest += from_buf;
2407 }
2408
2409 /* if putback was active, try the real input buffer too. */
2410 if (pt->read_buf == pt->putback_buf)
2411 {
2412 from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len);
2413 if (from_buf > 0)
2414 {
2415 memcpy (dest, pt->saved_read_pos, from_buf);
2416 pt->saved_read_pos += from_buf;
2417 bytes_read += from_buf;
2418 }
2419 }
2420
2421 return bytes_read;
2422 }
2423
2424 /* Clear a port's read buffers, returning the contents. */
2425 SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
2426 (SCM port),
2427 "This procedure clears a port's input buffers, similar\n"
2428 "to the way that force-output clears the output buffer. The\n"
2429 "contents of the buffers are returned as a single string, e.g.,\n"
2430 "\n"
2431 "@lisp\n"
2432 "(define p (open-input-file ...))\n"
2433 "(drain-input p) => empty string, nothing buffered yet.\n"
2434 "(unread-char (read-char p) p)\n"
2435 "(drain-input p) => initial chars from p, up to the buffer size.\n"
2436 "@end lisp\n\n"
2437 "Draining the buffers may be useful for cleanly finishing\n"
2438 "buffered I/O so that the file descriptor can be used directly\n"
2439 "for further input.")
2440 #define FUNC_NAME s_scm_drain_input
2441 {
2442 SCM result;
2443 char *data;
2444 scm_t_port *pt;
2445 long count;
2446
2447 SCM_VALIDATE_OPINPORT (1, port);
2448 pt = SCM_PTAB_ENTRY (port);
2449
2450 count = pt->read_end - pt->read_pos;
2451 if (pt->read_buf == pt->putback_buf)
2452 count += pt->saved_read_end - pt->saved_read_pos;
2453
2454 if (count)
2455 {
2456 result = scm_i_make_string (count, &data, 0);
2457 scm_take_from_input_buffers (port, data, count);
2458 }
2459 else
2460 result = scm_nullstr;
2461
2462 return result;
2463 }
2464 #undef FUNC_NAME
2465
2466 void
2467 scm_end_input_unlocked (SCM port)
2468 {
2469 long offset;
2470 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2471
2472 scm_i_clear_pending_eof (port);
2473 if (pt->read_buf == pt->putback_buf)
2474 {
2475 offset = pt->read_end - pt->read_pos;
2476 pt->read_buf = pt->saved_read_buf;
2477 pt->read_pos = pt->saved_read_pos;
2478 pt->read_end = pt->saved_read_end;
2479 pt->read_buf_size = pt->saved_read_buf_size;
2480 }
2481 else
2482 offset = 0;
2483
2484 SCM_PORT_DESCRIPTOR (port)->end_input (port, offset);
2485 }
2486
2487 void
2488 scm_end_input (SCM port)
2489 {
2490 scm_i_pthread_mutex_t *lock;
2491 scm_c_lock_port (port, &lock);
2492 scm_end_input_unlocked (port);
2493 if (lock)
2494 scm_i_pthread_mutex_unlock (lock);
2495
2496 }
2497
2498 SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
2499 (SCM port),
2500 "Flush the specified output port, or the current output port if @var{port}\n"
2501 "is omitted. The current output buffer contents are passed to the\n"
2502 "underlying port implementation (e.g., in the case of fports, the\n"
2503 "data will be written to the file and the output buffer will be cleared.)\n"
2504 "It has no effect on an unbuffered port.\n\n"
2505 "The return value is unspecified.")
2506 #define FUNC_NAME s_scm_force_output
2507 {
2508 if (SCM_UNBNDP (port))
2509 port = scm_current_output_port ();
2510 else
2511 {
2512 port = SCM_COERCE_OUTPORT (port);
2513 SCM_VALIDATE_OPOUTPORT (1, port);
2514 }
2515 scm_flush_unlocked (port);
2516 return SCM_UNSPECIFIED;
2517 }
2518 #undef FUNC_NAME
2519
2520 void
2521 scm_flush_unlocked (SCM port)
2522 {
2523 SCM_PORT_DESCRIPTOR (port)->flush (port);
2524 }
2525
2526 void
2527 scm_flush (SCM port)
2528 {
2529 scm_i_pthread_mutex_t *lock;
2530 scm_c_lock_port (port, &lock);
2531 scm_flush_unlocked (port);
2532 if (lock)
2533 scm_i_pthread_mutex_unlock (lock);
2534
2535 }
2536
2537 int
2538 scm_fill_input_unlocked (SCM port)
2539 {
2540 return scm_i_fill_input_unlocked (port);
2541 }
2542
2543
2544 \f
2545
2546 /* Output. */
2547
2548 void
2549 scm_putc (char c, SCM port)
2550 {
2551 scm_i_pthread_mutex_t *lock;
2552 scm_c_lock_port (port, &lock);
2553 scm_putc_unlocked (c, port);
2554 if (lock)
2555 scm_i_pthread_mutex_unlock (lock);
2556
2557 }
2558
2559 void
2560 scm_puts (const char *s, SCM port)
2561 {
2562 scm_i_pthread_mutex_t *lock;
2563 scm_c_lock_port (port, &lock);
2564 scm_puts_unlocked (s, port);
2565 if (lock)
2566 scm_i_pthread_mutex_unlock (lock);
2567
2568 }
2569
2570 /* scm_c_write
2571 *
2572 * Used by an application to write arbitrary number of bytes to an SCM
2573 * port. Similar semantics as libc write. However, unlike libc
2574 * write, scm_c_write writes the requested number of bytes and has no
2575 * return value.
2576 *
2577 * Warning: Doesn't update port line and column counts!
2578 */
2579 void
2580 scm_c_write_unlocked (SCM port, const void *ptr, size_t size)
2581 #define FUNC_NAME "scm_c_write"
2582 {
2583 scm_t_port *pt;
2584 scm_t_ptob_descriptor *ptob;
2585
2586 SCM_VALIDATE_OPOUTPORT (1, port);
2587
2588 pt = SCM_PTAB_ENTRY (port);
2589 ptob = SCM_PORT_DESCRIPTOR (port);
2590
2591 if (pt->rw_active == SCM_PORT_READ)
2592 scm_end_input_unlocked (port);
2593
2594 ptob->write (port, ptr, size);
2595
2596 if (pt->rw_random)
2597 pt->rw_active = SCM_PORT_WRITE;
2598 }
2599 #undef FUNC_NAME
2600
2601 void
2602 scm_c_write (SCM port, const void *ptr, size_t size)
2603 {
2604 scm_i_pthread_mutex_t *lock;
2605 scm_c_lock_port (port, &lock);
2606 scm_c_write_unlocked (port, ptr, size);
2607 if (lock)
2608 scm_i_pthread_mutex_unlock (lock);
2609
2610 }
2611
2612 /* scm_lfwrite
2613 *
2614 * This function differs from scm_c_write; it updates port line and
2615 * column. */
2616 void
2617 scm_lfwrite_unlocked (const char *ptr, size_t size, SCM port)
2618 {
2619 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2620 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2621
2622 if (pt->rw_active == SCM_PORT_READ)
2623 scm_end_input_unlocked (port);
2624
2625 ptob->write (port, ptr, size);
2626
2627 for (; size; ptr++, size--)
2628 update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
2629
2630 if (pt->rw_random)
2631 pt->rw_active = SCM_PORT_WRITE;
2632 }
2633
2634 void
2635 scm_lfwrite (const char *ptr, size_t size, SCM port)
2636 {
2637 scm_i_pthread_mutex_t *lock;
2638 scm_c_lock_port (port, &lock);
2639 scm_lfwrite_unlocked (ptr, size, port);
2640 if (lock)
2641 scm_i_pthread_mutex_unlock (lock);
2642
2643 }
2644
2645 /* Write STR to PORT from START inclusive to END exclusive. */
2646 void
2647 scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
2648 {
2649 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2650
2651 if (pt->rw_active == SCM_PORT_READ)
2652 scm_end_input_unlocked (port);
2653
2654 if (end == (size_t) -1)
2655 end = scm_i_string_length (str);
2656
2657 scm_i_display_substring (str, start, end, port);
2658
2659 if (pt->rw_random)
2660 pt->rw_active = SCM_PORT_WRITE;
2661 }
2662
2663
2664 \f
2665
2666 /* Querying and setting positions, and character availability. */
2667
2668 SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0,
2669 (SCM port),
2670 "Return @code{#t} if a character is ready on input @var{port}\n"
2671 "and return @code{#f} otherwise. If @code{char-ready?} returns\n"
2672 "@code{#t} then the next @code{read-char} operation on\n"
2673 "@var{port} is guaranteed not to hang. If @var{port} is a file\n"
2674 "port at end of file then @code{char-ready?} returns @code{#t}.\n"
2675 "\n"
2676 "@code{char-ready?} exists to make it possible for a\n"
2677 "program to accept characters from interactive ports without\n"
2678 "getting stuck waiting for input. Any input editors associated\n"
2679 "with such ports must make sure that characters whose existence\n"
2680 "has been asserted by @code{char-ready?} cannot be rubbed out.\n"
2681 "If @code{char-ready?} were to return @code{#f} at end of file,\n"
2682 "a port at end of file would be indistinguishable from an\n"
2683 "interactive port that has no ready characters.")
2684 #define FUNC_NAME s_scm_char_ready_p
2685 {
2686 scm_t_port *pt;
2687
2688 if (SCM_UNBNDP (port))
2689 port = scm_current_input_port ();
2690 /* It's possible to close the current input port, so validate even in
2691 this case. */
2692 SCM_VALIDATE_OPINPORT (1, port);
2693
2694 pt = SCM_PTAB_ENTRY (port);
2695
2696 /* if the current read buffer is filled, or the
2697 last pushed-back char has been read and the saved buffer is
2698 filled, result is true. */
2699 if (pt->read_pos < pt->read_end
2700 || (pt->read_buf == pt->putback_buf
2701 && pt->saved_read_pos < pt->saved_read_end))
2702 return SCM_BOOL_T;
2703 else
2704 {
2705 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2706
2707 if (ptob->input_waiting)
2708 return scm_from_bool(ptob->input_waiting (port));
2709 else
2710 return SCM_BOOL_T;
2711 }
2712 }
2713 #undef FUNC_NAME
2714
2715 SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
2716 (SCM fd_port, SCM offset, SCM whence),
2717 "Sets the current position of @var{fd_port} to the integer\n"
2718 "@var{offset}, which is interpreted according to the value of\n"
2719 "@var{whence}.\n"
2720 "\n"
2721 "One of the following variables should be supplied for\n"
2722 "@var{whence}:\n"
2723 "@defvar SEEK_SET\n"
2724 "Seek from the beginning of the file.\n"
2725 "@end defvar\n"
2726 "@defvar SEEK_CUR\n"
2727 "Seek from the current position.\n"
2728 "@end defvar\n"
2729 "@defvar SEEK_END\n"
2730 "Seek from the end of the file.\n"
2731 "@end defvar\n"
2732 "If @var{fd_port} is a file descriptor, the underlying system\n"
2733 "call is @code{lseek}. @var{port} may be a string port.\n"
2734 "\n"
2735 "The value returned is the new position in the file. This means\n"
2736 "that the current position of a port can be obtained using:\n"
2737 "@lisp\n"
2738 "(seek port 0 SEEK_CUR)\n"
2739 "@end lisp")
2740 #define FUNC_NAME s_scm_seek
2741 {
2742 int how;
2743
2744 fd_port = SCM_COERCE_OUTPORT (fd_port);
2745
2746 how = scm_to_int (whence);
2747 if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
2748 SCM_OUT_OF_RANGE (3, whence);
2749
2750 if (SCM_OPPORTP (fd_port))
2751 {
2752 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port);
2753 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port);
2754 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2755 off_t_or_off64_t rv;
2756
2757 if (!ptob->seek)
2758 SCM_MISC_ERROR ("port is not seekable",
2759 scm_cons (fd_port, SCM_EOL));
2760 else
2761 rv = ptob->seek (fd_port, off, how);
2762
2763 /* Set stream-start flags according to new position. */
2764 pti->at_stream_start_for_bom_read = (rv == 0);
2765 pti->at_stream_start_for_bom_write = (rv == 0);
2766
2767 scm_i_clear_pending_eof (fd_port);
2768
2769 return scm_from_off_t_or_off64_t (rv);
2770 }
2771 else /* file descriptor?. */
2772 {
2773 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2774 off_t_or_off64_t rv;
2775 rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
2776 if (rv == -1)
2777 SCM_SYSERROR;
2778 return scm_from_off_t_or_off64_t (rv);
2779 }
2780 }
2781 #undef FUNC_NAME
2782
2783 #ifndef O_BINARY
2784 #define O_BINARY 0
2785 #endif
2786
2787 /* Mingw has ftruncate(), perhaps implemented above using chsize, but
2788 doesn't have the filename version truncate(), hence this code. */
2789 #if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
2790 static int
2791 truncate (const char *file, off_t length)
2792 {
2793 int ret, fdes;
2794
2795 fdes = open (file, O_BINARY | O_WRONLY);
2796 if (fdes == -1)
2797 return -1;
2798
2799 ret = ftruncate (fdes, length);
2800 if (ret == -1)
2801 {
2802 int save_errno = errno;
2803 close (fdes);
2804 errno = save_errno;
2805 return -1;
2806 }
2807
2808 return close (fdes);
2809 }
2810 #endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
2811
2812 SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
2813 (SCM object, SCM length),
2814 "Truncate file @var{object} to @var{length} bytes. @var{object}\n"
2815 "can be a filename string, a port object, or an integer file\n"
2816 "descriptor.\n"
2817 "The return value is unspecified.\n"
2818 "\n"
2819 "For a port or file descriptor @var{length} can be omitted, in\n"
2820 "which case the file is truncated at the current position (per\n"
2821 "@code{ftell} above).\n"
2822 "\n"
2823 "On most systems a file can be extended by giving a length\n"
2824 "greater than the current size, but this is not mandatory in the\n"
2825 "POSIX standard.")
2826 #define FUNC_NAME s_scm_truncate_file
2827 {
2828 int rv;
2829
2830 /* "object" can be a port, fdes or filename.
2831
2832 Negative "length" makes no sense, but it's left to truncate() or
2833 ftruncate() to give back an error for that (normally EINVAL).
2834 */
2835
2836 if (SCM_UNBNDP (length))
2837 {
2838 /* must supply length if object is a filename. */
2839 if (scm_is_string (object))
2840 SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
2841
2842 length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
2843 }
2844
2845 object = SCM_COERCE_OUTPORT (object);
2846 if (scm_is_integer (object))
2847 {
2848 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2849 SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
2850 c_length));
2851 }
2852 else if (SCM_OPOUTPORTP (object))
2853 {
2854 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2855 scm_t_port *pt = SCM_PTAB_ENTRY (object);
2856 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object);
2857
2858 if (!ptob->truncate)
2859 SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
2860
2861 scm_i_clear_pending_eof (object);
2862 if (pt->rw_active == SCM_PORT_READ)
2863 scm_end_input_unlocked (object);
2864 else if (pt->rw_active == SCM_PORT_WRITE)
2865 ptob->flush (object);
2866
2867 ptob->truncate (object, c_length);
2868 rv = 0;
2869 }
2870 else
2871 {
2872 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2873 char *str = scm_to_locale_string (object);
2874 int eno;
2875 SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
2876 eno = errno;
2877 free (str);
2878 errno = eno;
2879 }
2880 if (rv == -1)
2881 SCM_SYSERROR;
2882 return SCM_UNSPECIFIED;
2883 }
2884 #undef FUNC_NAME
2885
2886 SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0,
2887 (SCM port),
2888 "Return the current line number for @var{port}.\n"
2889 "\n"
2890 "The first line of a file is 0. But you might want to add 1\n"
2891 "when printing line numbers, since starting from 1 is\n"
2892 "traditional in error messages, and likely to be more natural to\n"
2893 "non-programmers.")
2894 #define FUNC_NAME s_scm_port_line
2895 {
2896 port = SCM_COERCE_OUTPORT (port);
2897 SCM_VALIDATE_OPENPORT (1, port);
2898 return scm_from_long (SCM_LINUM (port));
2899 }
2900 #undef FUNC_NAME
2901
2902 SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0,
2903 (SCM port, SCM line),
2904 "Set the current line number for @var{port} to @var{line}. The\n"
2905 "first line of a file is 0.")
2906 #define FUNC_NAME s_scm_set_port_line_x
2907 {
2908 port = SCM_COERCE_OUTPORT (port);
2909 SCM_VALIDATE_OPENPORT (1, port);
2910 SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line);
2911 return SCM_UNSPECIFIED;
2912 }
2913 #undef FUNC_NAME
2914
2915 SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0,
2916 (SCM port),
2917 "Return the current column number of @var{port}.\n"
2918 "If the number is\n"
2919 "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n"
2920 "- i.e. the first character of the first line is line 0, column 0.\n"
2921 "(However, when you display a file position, for example in an error\n"
2922 "message, we recommend you add 1 to get 1-origin integers. This is\n"
2923 "because lines and column numbers traditionally start with 1, and that is\n"
2924 "what non-programmers will find most natural.)")
2925 #define FUNC_NAME s_scm_port_column
2926 {
2927 port = SCM_COERCE_OUTPORT (port);
2928 SCM_VALIDATE_OPENPORT (1, port);
2929 return scm_from_int (SCM_COL (port));
2930 }
2931 #undef FUNC_NAME
2932
2933 SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0,
2934 (SCM port, SCM column),
2935 "Set the current column of @var{port}. Before reading the first\n"
2936 "character on a line the column should be 0.")
2937 #define FUNC_NAME s_scm_set_port_column_x
2938 {
2939 port = SCM_COERCE_OUTPORT (port);
2940 SCM_VALIDATE_OPENPORT (1, port);
2941 SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column);
2942 return SCM_UNSPECIFIED;
2943 }
2944 #undef FUNC_NAME
2945
2946 SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
2947 (SCM port),
2948 "Return the filename associated with @var{port}, or @code{#f}\n"
2949 "if no filename is associated with the port.")
2950 #define FUNC_NAME s_scm_port_filename
2951 {
2952 port = SCM_COERCE_OUTPORT (port);
2953 SCM_VALIDATE_OPENPORT (1, port);
2954 return SCM_FILENAME (port);
2955 }
2956 #undef FUNC_NAME
2957
2958 SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
2959 (SCM port, SCM filename),
2960 "Change the filename associated with @var{port}, using the current input\n"
2961 "port if none is specified. Note that this does not change the port's\n"
2962 "source of data, but only the value that is returned by\n"
2963 "@code{port-filename} and reported in diagnostic output.")
2964 #define FUNC_NAME s_scm_set_port_filename_x
2965 {
2966 port = SCM_COERCE_OUTPORT (port);
2967 SCM_VALIDATE_OPENPORT (1, port);
2968 /* We allow the user to set the filename to whatever he likes. */
2969 SCM_SET_FILENAME (port, filename);
2970 return SCM_UNSPECIFIED;
2971 }
2972 #undef FUNC_NAME
2973
2974
2975 \f
2976
2977 /* Implementation helpers for port printing functions. */
2978
2979 void
2980 scm_print_port_mode (SCM exp, SCM port)
2981 {
2982 scm_puts_unlocked (SCM_CLOSEDP (exp)
2983 ? "closed: "
2984 : (SCM_RDNG & SCM_CELL_WORD_0 (exp)
2985 ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
2986 ? "input-output: "
2987 : "input: ")
2988 : (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
2989 ? "output: "
2990 : "bogus: ")),
2991 port);
2992 }
2993
2994 int
2995 scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED)
2996 {
2997 char *type = SCM_PTOBNAME (SCM_PTOBNUM (exp));
2998 if (!type)
2999 type = "port";
3000 scm_puts_unlocked ("#<", port);
3001 scm_print_port_mode (exp, port);
3002 scm_puts_unlocked (type, port);
3003 scm_putc_unlocked (' ', port);
3004 scm_uintprint (SCM_CELL_WORD_1 (exp), 16, port);
3005 scm_putc_unlocked ('>', port);
3006 return 1;
3007 }
3008
3009
3010 \f
3011
3012 /* Iterating over all ports. */
3013
3014 struct for_each_data
3015 {
3016 void (*proc) (void *data, SCM p);
3017 void *data;
3018 };
3019
3020 static SCM
3021 for_each_trampoline (void *data, SCM port, SCM result)
3022 {
3023 struct for_each_data *d = data;
3024
3025 d->proc (d->data, port);
3026
3027 return result;
3028 }
3029
3030 void
3031 scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
3032 {
3033 struct for_each_data d;
3034
3035 d.proc = proc;
3036 d.data = data;
3037
3038 scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL,
3039 scm_i_port_weak_set);
3040 }
3041
3042 static void
3043 scm_for_each_trampoline (void *data, SCM port)
3044 {
3045 scm_call_1 (SCM_PACK_POINTER (data), port);
3046 }
3047
3048 SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
3049 (SCM proc),
3050 "Apply @var{proc} to each port in the Guile port table\n"
3051 "in turn. The return value is unspecified. More specifically,\n"
3052 "@var{proc} is applied exactly once to every port that exists\n"
3053 "in the system at the time @code{port-for-each} is invoked.\n"
3054 "Changes to the port table while @code{port-for-each} is running\n"
3055 "have no effect as far as @code{port-for-each} is concerned.")
3056 #define FUNC_NAME s_scm_port_for_each
3057 {
3058 SCM_VALIDATE_PROC (1, proc);
3059
3060 scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc));
3061
3062 return SCM_UNSPECIFIED;
3063 }
3064 #undef FUNC_NAME
3065
3066 static void
3067 flush_output_port (void *closure, SCM port)
3068 {
3069 if (SCM_OPOUTPORTP (port))
3070 scm_flush_unlocked (port);
3071 }
3072
3073 SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0,
3074 (),
3075 "Equivalent to calling @code{force-output} on\n"
3076 "all open output ports. The return value is unspecified.")
3077 #define FUNC_NAME s_scm_flush_all_ports
3078 {
3079 scm_c_port_for_each (&flush_output_port, NULL);
3080 return SCM_UNSPECIFIED;
3081 }
3082 #undef FUNC_NAME
3083
3084
3085 \f
3086
3087 /* Void ports. */
3088
3089 scm_t_bits scm_tc16_void_port = 0;
3090
3091 static int fill_input_void_port (SCM port SCM_UNUSED)
3092 {
3093 return EOF;
3094 }
3095
3096 static void
3097 write_void_port (SCM port SCM_UNUSED,
3098 const void *data SCM_UNUSED,
3099 size_t size SCM_UNUSED)
3100 {
3101 }
3102
3103 static SCM
3104 scm_i_void_port (long mode_bits)
3105 {
3106 SCM ret;
3107
3108 ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0);
3109
3110 scm_port_non_buffer (SCM_PTAB_ENTRY (ret));
3111
3112 return ret;
3113 }
3114
3115 SCM
3116 scm_void_port (char *mode_str)
3117 {
3118 return scm_i_void_port (scm_mode_bits (mode_str));
3119 }
3120
3121 SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0,
3122 (SCM mode),
3123 "Create and return a new void port. A void port acts like\n"
3124 "@file{/dev/null}. The @var{mode} argument\n"
3125 "specifies the input/output modes for this port: see the\n"
3126 "documentation for @code{open-file} in @ref{File Ports}.")
3127 #define FUNC_NAME s_scm_sys_make_void_port
3128 {
3129 return scm_i_void_port (scm_i_mode_bits (mode));
3130 }
3131 #undef FUNC_NAME
3132
3133
3134 \f
3135
3136 /* Initialization. */
3137
3138 void
3139 scm_init_ports ()
3140 {
3141 /* lseek() symbols. */
3142 scm_c_define ("SEEK_SET", scm_from_int (SEEK_SET));
3143 scm_c_define ("SEEK_CUR", scm_from_int (SEEK_CUR));
3144 scm_c_define ("SEEK_END", scm_from_int (SEEK_END));
3145
3146 scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
3147 write_void_port);
3148
3149 cur_inport_fluid = scm_make_fluid ();
3150 cur_outport_fluid = scm_make_fluid ();
3151 cur_errport_fluid = scm_make_fluid ();
3152 cur_loadport_fluid = scm_make_fluid ();
3153
3154 scm_i_port_weak_set = scm_c_make_weak_set (31);
3155
3156 #include "libguile/ports.x"
3157
3158 /* Use Latin-1 as the default port encoding. */
3159 SCM_VARIABLE_SET (default_port_encoding_var,
3160 scm_make_fluid_with_default (SCM_BOOL_F));
3161 scm_port_encoding_init = 1;
3162
3163 SCM_VARIABLE_SET (default_conversion_strategy_var,
3164 scm_make_fluid_with_default (sym_substitute));
3165 scm_conversion_strategy_init = 1;
3166
3167 /* These bindings are used when boot-9 turns `current-input-port' et
3168 al into parameters. They are then removed from the guile module. */
3169 scm_c_define ("%current-input-port-fluid", cur_inport_fluid);
3170 scm_c_define ("%current-output-port-fluid", cur_outport_fluid);
3171 scm_c_define ("%current-error-port-fluid", cur_errport_fluid);
3172 }
3173
3174 /*
3175 Local Variables:
3176 c-file-style: "gnu"
3177 End:
3178 */