Merge commit 'a38024baaa32d1a6d91fdc81388c88bbb926c3ae'
[bpt/guile.git] / libguile / ports.c
1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
2 * 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20
21 \f
22 /* Headers. */
23
24 #define _LARGEFILE64_SOURCE /* ask for stat64 etc */
25
26 #ifdef HAVE_CONFIG_H
27 # include <config.h>
28 #endif
29
30 #include <stdio.h>
31 #include <errno.h>
32 #include <fcntl.h> /* for chsize on mingw */
33 #include <assert.h>
34 #include <iconv.h>
35 #include <uniconv.h>
36 #include <unistr.h>
37 #include <striconveh.h>
38
39 #include <assert.h>
40
41 #include "libguile/_scm.h"
42 #include "libguile/async.h"
43 #include "libguile/deprecation.h"
44 #include "libguile/eval.h"
45 #include "libguile/fports.h" /* direct access for seek and truncate */
46 #include "libguile/goops.h"
47 #include "libguile/smob.h"
48 #include "libguile/chars.h"
49 #include "libguile/dynwind.h"
50
51 #include "libguile/keywords.h"
52 #include "libguile/hashtab.h"
53 #include "libguile/root.h"
54 #include "libguile/strings.h"
55 #include "libguile/mallocs.h"
56 #include "libguile/validate.h"
57 #include "libguile/ports.h"
58 #include "libguile/ports-internal.h"
59 #include "libguile/vectors.h"
60 #include "libguile/weak-set.h"
61 #include "libguile/fluids.h"
62 #include "libguile/eq.h"
63 #include "libguile/alist.h"
64
65 #ifdef HAVE_STRING_H
66 #include <string.h>
67 #endif
68
69 #ifdef HAVE_IO_H
70 #include <io.h>
71 #endif
72
73 #ifdef HAVE_UNISTD_H
74 #include <unistd.h>
75 #endif
76
77 #ifdef HAVE_SYS_IOCTL_H
78 #include <sys/ioctl.h>
79 #endif
80
81 /* Mingw (version 3.4.5, circa 2006) has ftruncate as an alias for chsize
82 already, but have this code here in case that wasn't so in past versions,
83 or perhaps to help other minimal DOS environments.
84
85 gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
86 might be possibilities if we've got other systems without ftruncate. */
87
88 #if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
89 #define ftruncate(fd, size) chsize (fd, size)
90 #undef HAVE_FTRUNCATE
91 #define HAVE_FTRUNCATE 1
92 #endif
93
94 \f
95 /* Port encodings are case-insensitive ASCII strings. */
96 static char
97 ascii_toupper (char c)
98 {
99 return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
100 }
101
102 /* It is only necessary to use this function on encodings that come from
103 the user and have not been canonicalized yet. Encodings that are set
104 on ports or in the default encoding fluid are in upper-case, and can
105 be compared with strcmp. */
106 static int
107 encoding_matches (const char *enc, const char *upper)
108 {
109 if (!enc)
110 enc = "ISO-8859-1";
111
112 while (*enc)
113 if (ascii_toupper (*enc++) != *upper++)
114 return 0;
115
116 return !*upper;
117 }
118
119 static char*
120 canonicalize_encoding (const char *enc)
121 {
122 char *ret;
123 int i;
124
125 if (!enc)
126 return "ISO-8859-1";
127
128 ret = scm_gc_strdup (enc, "port");
129
130 for (i = 0; ret[i]; i++)
131 {
132 if (ret[i] > 127)
133 /* Restrict to ASCII. */
134 scm_misc_error (NULL, "invalid character encoding ~s",
135 scm_list_1 (scm_from_latin1_string (enc)));
136 else
137 ret[i] = ascii_toupper (ret[i]);
138 }
139
140 return ret;
141 }
142
143
144 \f
145 /* The port kind table --- a dynamically resized array of port types. */
146
147
148 /* scm_ptobs scm_numptob
149 * implement a dynamically resized array of ptob records.
150 * Indexes into this table are used when generating type
151 * tags for smobjects (if you know a tag you can get an index and conversely).
152 */
153 static scm_t_ptob_descriptor **scm_ptobs = NULL;
154 static long scm_numptob = 0; /* Number of port types. */
155 static long scm_ptobs_size = 0; /* Number of slots in the port type
156 table. */
157 static scm_i_pthread_mutex_t scm_ptobs_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER;
158
159 long
160 scm_c_num_port_types (void)
161 {
162 long ret;
163
164 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
165 ret = scm_numptob;
166 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
167
168 return ret;
169 }
170
171 scm_t_ptob_descriptor*
172 scm_c_port_type_ref (long ptobnum)
173 {
174 scm_t_ptob_descriptor *ret = NULL;
175
176 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
177
178 if (0 <= ptobnum && ptobnum < scm_numptob)
179 ret = scm_ptobs[ptobnum];
180
181 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
182
183 if (!ret)
184 scm_out_of_range ("scm_c_port_type_ref", scm_from_long (ptobnum));
185
186 return ret;
187 }
188
189 long
190 scm_c_port_type_add_x (scm_t_ptob_descriptor *desc)
191 {
192 long ret = -1;
193
194 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
195
196 if (scm_numptob + 1 < SCM_I_MAX_PORT_TYPE_COUNT)
197 {
198 if (scm_numptob == scm_ptobs_size)
199 {
200 unsigned long old_size = scm_ptobs_size;
201 scm_t_ptob_descriptor **old_ptobs = scm_ptobs;
202
203 /* Currently there are only 9 predefined port types, so one
204 resize will cover it. */
205 scm_ptobs_size = old_size + 10;
206
207 if (scm_ptobs_size >= SCM_I_MAX_PORT_TYPE_COUNT)
208 scm_ptobs_size = SCM_I_MAX_PORT_TYPE_COUNT;
209
210 scm_ptobs = scm_gc_malloc (sizeof (*scm_ptobs) * scm_ptobs_size,
211 "scm_ptobs");
212
213 memcpy (scm_ptobs, old_ptobs, sizeof (*scm_ptobs) * scm_numptob);
214 }
215
216 ret = scm_numptob++;
217 scm_ptobs[ret] = desc;
218 }
219
220 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
221
222 if (ret < 0)
223 scm_out_of_range ("scm_c_port_type_add_x", scm_from_long (scm_numptob));
224
225 return ret;
226 }
227
228 /*
229 * We choose to use an interface similar to the smob interface with
230 * fill_input and write as standard fields, passed to the port
231 * type constructor, and optional fields set by setters.
232 */
233
234 static void
235 flush_port_default (SCM port SCM_UNUSED)
236 {
237 }
238
239 static void
240 end_input_default (SCM port SCM_UNUSED, int offset SCM_UNUSED)
241 {
242 }
243
244 scm_t_bits
245 scm_make_port_type (char *name,
246 int (*fill_input) (SCM port),
247 void (*write) (SCM port, const void *data, size_t size))
248 {
249 scm_t_ptob_descriptor *desc;
250 long ptobnum;
251
252 desc = scm_gc_malloc_pointerless (sizeof (*desc), "port-type");
253 memset (desc, 0, sizeof (*desc));
254
255 desc->name = name;
256 desc->print = scm_port_print;
257 desc->write = write;
258 desc->flush = flush_port_default;
259 desc->end_input = end_input_default;
260 desc->fill_input = fill_input;
261
262 ptobnum = scm_c_port_type_add_x (desc);
263
264 /* Make a class object if GOOPS is present. */
265 if (SCM_UNPACK (scm_port_class[0]) != 0)
266 scm_make_port_classes (ptobnum, name);
267
268 return scm_tc7_port + ptobnum * 256;
269 }
270
271 void
272 scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM))
273 {
274 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->mark = mark;
275 }
276
277 void
278 scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM))
279 {
280 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->free = free;
281 }
282
283 void
284 scm_set_port_print (scm_t_bits tc, int (*print) (SCM exp, SCM port,
285 scm_print_state *pstate))
286 {
287 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->print = print;
288 }
289
290 void
291 scm_set_port_equalp (scm_t_bits tc, SCM (*equalp) (SCM, SCM))
292 {
293 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->equalp = equalp;
294 }
295
296 void
297 scm_set_port_close (scm_t_bits tc, int (*close) (SCM))
298 {
299 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->close = close;
300 }
301
302 void
303 scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port))
304 {
305 scm_t_ptob_descriptor *ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tc));
306 ptob->flush = flush;
307 ptob->flags |= SCM_PORT_TYPE_HAS_FLUSH;
308 }
309
310 void
311 scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset))
312 {
313 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->end_input = end_input;
314 }
315
316 void
317 scm_set_port_seek (scm_t_bits tc, scm_t_off (*seek) (SCM, scm_t_off, int))
318 {
319 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->seek = seek;
320 }
321
322 void
323 scm_set_port_truncate (scm_t_bits tc, void (*truncate) (SCM, scm_t_off))
324 {
325 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->truncate = truncate;
326 }
327
328 void
329 scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM))
330 {
331 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting;
332 }
333
334 static void
335 scm_i_set_pending_eof (SCM port)
336 {
337 SCM_PORT_GET_INTERNAL (port)->pending_eof = 1;
338 }
339
340 static void
341 scm_i_clear_pending_eof (SCM port)
342 {
343 SCM_PORT_GET_INTERNAL (port)->pending_eof = 0;
344 }
345
346 SCM_DEFINE (scm_i_port_property, "%port-property", 2, 0, 0,
347 (SCM port, SCM key),
348 "Return the property of @var{port} associated with @var{key}.")
349 #define FUNC_NAME s_scm_i_port_property
350 {
351 SCM_VALIDATE_OPPORT (1, port);
352 return scm_assq_ref (SCM_PORT_GET_INTERNAL (port)->alist, key);
353 }
354 #undef FUNC_NAME
355
356 SCM_DEFINE (scm_i_set_port_property_x, "%set-port-property!", 3, 0, 0,
357 (SCM port, SCM key, SCM value),
358 "Set the property of @var{port} associated with @var{key} to @var{value}.")
359 #define FUNC_NAME s_scm_i_set_port_property_x
360 {
361 scm_t_port_internal *pti;
362
363 SCM_VALIDATE_OPPORT (1, port);
364 pti = SCM_PORT_GET_INTERNAL (port);
365 pti->alist = scm_assq_set_x (pti->alist, key, value);
366 return SCM_UNSPECIFIED;
367 }
368 #undef FUNC_NAME
369
370 \f
371
372 /* Standard ports --- current input, output, error, and more(!). */
373
374 static SCM cur_inport_fluid = SCM_BOOL_F;
375 static SCM cur_outport_fluid = SCM_BOOL_F;
376 static SCM cur_errport_fluid = SCM_BOOL_F;
377 static SCM cur_loadport_fluid = SCM_BOOL_F;
378
379 SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
380 (),
381 "Return the current input port. This is the default port used\n"
382 "by many input procedures. Initially, @code{current-input-port}\n"
383 "returns the @dfn{standard input} in Unix and C terminology.")
384 #define FUNC_NAME s_scm_current_input_port
385 {
386 if (scm_is_true (cur_inport_fluid))
387 return scm_fluid_ref (cur_inport_fluid);
388 else
389 return SCM_BOOL_F;
390 }
391 #undef FUNC_NAME
392
393 SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
394 (),
395 "Return the current output port. This is the default port used\n"
396 "by many output procedures. Initially,\n"
397 "@code{current-output-port} returns the @dfn{standard output} in\n"
398 "Unix and C terminology.")
399 #define FUNC_NAME s_scm_current_output_port
400 {
401 if (scm_is_true (cur_outport_fluid))
402 return scm_fluid_ref (cur_outport_fluid);
403 else
404 return SCM_BOOL_F;
405 }
406 #undef FUNC_NAME
407
408 SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
409 (),
410 "Return the port to which errors and warnings should be sent (the\n"
411 "@dfn{standard error} in Unix and C terminology).")
412 #define FUNC_NAME s_scm_current_error_port
413 {
414 if (scm_is_true (cur_errport_fluid))
415 return scm_fluid_ref (cur_errport_fluid);
416 else
417 return SCM_BOOL_F;
418 }
419 #undef FUNC_NAME
420
421 SCM
422 scm_current_warning_port (void)
423 {
424 static SCM cwp_var = SCM_UNDEFINED;
425 static scm_i_pthread_mutex_t cwp_var_mutex
426 = SCM_I_PTHREAD_MUTEX_INITIALIZER;
427
428 scm_i_scm_pthread_mutex_lock (&cwp_var_mutex);
429 if (SCM_UNBNDP (cwp_var))
430 cwp_var = scm_c_private_variable ("guile", "current-warning-port");
431 scm_i_pthread_mutex_unlock (&cwp_var_mutex);
432
433 return scm_call_0 (scm_variable_ref (cwp_var));
434 }
435
436 SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
437 (),
438 "Return the current-load-port.\n"
439 "The load port is used internally by @code{primitive-load}.")
440 #define FUNC_NAME s_scm_current_load_port
441 {
442 return scm_fluid_ref (cur_loadport_fluid);
443 }
444 #undef FUNC_NAME
445
446 SCM_DEFINE (scm_set_current_input_port, "set-current-input-port", 1, 0, 0,
447 (SCM port),
448 "@deffnx {Scheme Procedure} set-current-output-port port\n"
449 "@deffnx {Scheme Procedure} set-current-error-port port\n"
450 "Change the ports returned by @code{current-input-port},\n"
451 "@code{current-output-port} and @code{current-error-port}, respectively,\n"
452 "so that they use the supplied @var{port} for input or output.")
453 #define FUNC_NAME s_scm_set_current_input_port
454 {
455 SCM oinp = scm_fluid_ref (cur_inport_fluid);
456 SCM_VALIDATE_OPINPORT (1, port);
457 scm_fluid_set_x (cur_inport_fluid, port);
458 return oinp;
459 }
460 #undef FUNC_NAME
461
462
463 SCM_DEFINE (scm_set_current_output_port, "set-current-output-port", 1, 0, 0,
464 (SCM port),
465 "Set the current default output port to @var{port}.")
466 #define FUNC_NAME s_scm_set_current_output_port
467 {
468 SCM ooutp = scm_fluid_ref (cur_outport_fluid);
469 port = SCM_COERCE_OUTPORT (port);
470 SCM_VALIDATE_OPOUTPORT (1, port);
471 scm_fluid_set_x (cur_outport_fluid, port);
472 return ooutp;
473 }
474 #undef FUNC_NAME
475
476
477 SCM_DEFINE (scm_set_current_error_port, "set-current-error-port", 1, 0, 0,
478 (SCM port),
479 "Set the current default error port to @var{port}.")
480 #define FUNC_NAME s_scm_set_current_error_port
481 {
482 SCM oerrp = scm_fluid_ref (cur_errport_fluid);
483 port = SCM_COERCE_OUTPORT (port);
484 SCM_VALIDATE_OPOUTPORT (1, port);
485 scm_fluid_set_x (cur_errport_fluid, port);
486 return oerrp;
487 }
488 #undef FUNC_NAME
489
490
491 SCM
492 scm_set_current_warning_port (SCM port)
493 {
494 static SCM cwp_var = SCM_BOOL_F;
495
496 if (scm_is_false (cwp_var))
497 cwp_var = scm_c_private_lookup ("guile", "current-warning-port");
498
499 return scm_call_1 (scm_variable_ref (cwp_var), port);
500 }
501
502
503 void
504 scm_dynwind_current_input_port (SCM port)
505 #define FUNC_NAME NULL
506 {
507 SCM_VALIDATE_OPINPORT (1, port);
508 scm_dynwind_fluid (cur_inport_fluid, port);
509 }
510 #undef FUNC_NAME
511
512 void
513 scm_dynwind_current_output_port (SCM port)
514 #define FUNC_NAME NULL
515 {
516 port = SCM_COERCE_OUTPORT (port);
517 SCM_VALIDATE_OPOUTPORT (1, port);
518 scm_dynwind_fluid (cur_outport_fluid, port);
519 }
520 #undef FUNC_NAME
521
522 void
523 scm_dynwind_current_error_port (SCM port)
524 #define FUNC_NAME NULL
525 {
526 port = SCM_COERCE_OUTPORT (port);
527 SCM_VALIDATE_OPOUTPORT (1, port);
528 scm_dynwind_fluid (cur_errport_fluid, port);
529 }
530 #undef FUNC_NAME
531
532 void
533 scm_i_dynwind_current_load_port (SCM port)
534 {
535 scm_dynwind_fluid (cur_loadport_fluid, port);
536 }
537
538
539 \f
540
541 /* Retrieving a port's mode. */
542
543 /* Return the flags that characterize a port based on the mode
544 * string used to open a file for that port.
545 *
546 * See PORT FLAGS in scm.h
547 */
548
549 static long
550 scm_i_mode_bits_n (SCM modes)
551 {
552 return (SCM_OPN
553 | (scm_i_string_contains_char (modes, 'r')
554 || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
555 | (scm_i_string_contains_char (modes, 'w')
556 || scm_i_string_contains_char (modes, 'a')
557 || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
558 | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
559 | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
560 }
561
562 long
563 scm_mode_bits (char *modes)
564 {
565 /* Valid characters are rw+a0l. So, use latin1. */
566 return scm_i_mode_bits (scm_from_latin1_string (modes));
567 }
568
569 long
570 scm_i_mode_bits (SCM modes)
571 {
572 long bits;
573
574 if (!scm_is_string (modes))
575 scm_wrong_type_arg_msg (NULL, 0, modes, "string");
576
577 bits = scm_i_mode_bits_n (modes);
578 scm_remember_upto_here_1 (modes);
579 return bits;
580 }
581
582 /* Return the mode flags from an open port.
583 * Some modes such as "append" are only used when opening
584 * a file and are not returned here. */
585
586 SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
587 (SCM port),
588 "Return the port modes associated with the open port @var{port}.\n"
589 "These will not necessarily be identical to the modes used when\n"
590 "the port was opened, since modes such as \"append\" which are\n"
591 "used only during port creation are not retained.")
592 #define FUNC_NAME s_scm_port_mode
593 {
594 char modes[4];
595 modes[0] = '\0';
596
597 port = SCM_COERCE_OUTPORT (port);
598 SCM_VALIDATE_OPPORT (1, port);
599 if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
600 if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
601 strcpy (modes, "r+");
602 else
603 strcpy (modes, "r");
604 }
605 else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
606 strcpy (modes, "w");
607 if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
608 strcat (modes, "0");
609
610 return scm_from_latin1_string (modes);
611 }
612 #undef FUNC_NAME
613
614
615 \f
616
617 /* The port table --- a weak set of all ports.
618
619 We need a global registry of ports to flush them all at exit, and to
620 get all the ports matching a file descriptor. */
621 SCM scm_i_port_weak_set;
622
623
624 \f
625
626 /* Port finalization. */
627
628 struct do_free_data
629 {
630 scm_t_ptob_descriptor *ptob;
631 SCM port;
632 };
633
634 static SCM
635 do_free (void *body_data)
636 {
637 struct do_free_data *data = body_data;
638
639 /* `close' is for explicit `close-port' by user. `free' is for this
640 purpose: ports collected by the GC. */
641 data->ptob->free (data->port);
642
643 return SCM_BOOL_T;
644 }
645
646 /* Finalize the object (a port) pointed to by PTR. */
647 static void
648 finalize_port (void *ptr, void *data)
649 {
650 SCM port = SCM_PACK_POINTER (ptr);
651
652 if (!SCM_PORTP (port))
653 abort ();
654
655 if (SCM_OPENP (port))
656 {
657 struct do_free_data data;
658
659 SCM_CLR_PORT_OPEN_FLAG (port);
660
661 data.ptob = SCM_PORT_DESCRIPTOR (port);
662 data.port = port;
663
664 scm_internal_catch (SCM_BOOL_T, do_free, &data,
665 scm_handle_by_message_noexit, NULL);
666
667 scm_gc_ports_collected++;
668 }
669 }
670
671
672 \f
673
674 SCM
675 scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
676 const char *encoding,
677 scm_t_string_failed_conversion_handler handler,
678 scm_t_bits stream)
679 {
680 SCM ret;
681 scm_t_port *entry;
682 scm_t_port_internal *pti;
683 scm_t_ptob_descriptor *ptob;
684
685 entry = scm_gc_typed_calloc (scm_t_port);
686 pti = scm_gc_typed_calloc (scm_t_port_internal);
687 ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag));
688
689 ret = scm_words (tag | mode_bits, 3);
690 SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry);
691 SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob);
692
693 entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock");
694 scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive);
695
696 entry->internal = pti;
697 entry->file_name = SCM_BOOL_F;
698 entry->rw_active = SCM_PORT_NEITHER;
699 entry->port = ret;
700 entry->stream = stream;
701
702 if (encoding_matches (encoding, "UTF-8"))
703 {
704 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
705 entry->encoding = "UTF-8";
706 }
707 else if (encoding_matches (encoding, "ISO-8859-1"))
708 {
709 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
710 entry->encoding = "ISO-8859-1";
711 }
712 else
713 {
714 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
715 entry->encoding = canonicalize_encoding (encoding);
716 }
717
718 entry->ilseq_handler = handler;
719 pti->iconv_descriptors = NULL;
720
721 pti->at_stream_start_for_bom_read = 1;
722 pti->at_stream_start_for_bom_write = 1;
723
724 pti->pending_eof = 0;
725 pti->alist = SCM_EOL;
726
727 if (SCM_PORT_DESCRIPTOR (ret)->free)
728 scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL);
729
730 if (SCM_PORT_DESCRIPTOR (ret)->flags & SCM_PORT_TYPE_HAS_FLUSH)
731 scm_weak_set_add_x (scm_i_port_weak_set, ret);
732
733 return ret;
734 }
735
736 SCM
737 scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
738 {
739 return scm_c_make_port_with_encoding (tag, mode_bits,
740 scm_i_default_port_encoding (),
741 scm_i_default_port_conversion_handler (),
742 stream);
743 }
744
745 SCM
746 scm_new_port_table_entry (scm_t_bits tag)
747 {
748 return scm_c_make_port (tag, 0, 0);
749 }
750
751 \f
752
753 /* Predicates. */
754
755 SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
756 (SCM x),
757 "Return a boolean indicating whether @var{x} is a port.\n"
758 "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
759 "@var{x}))}.")
760 #define FUNC_NAME s_scm_port_p
761 {
762 return scm_from_bool (SCM_PORTP (x));
763 }
764 #undef FUNC_NAME
765
766 SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
767 (SCM x),
768 "Return @code{#t} if @var{x} is an input port, otherwise return\n"
769 "@code{#f}. Any object satisfying this predicate also satisfies\n"
770 "@code{port?}.")
771 #define FUNC_NAME s_scm_input_port_p
772 {
773 return scm_from_bool (SCM_INPUT_PORT_P (x));
774 }
775 #undef FUNC_NAME
776
777 SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
778 (SCM x),
779 "Return @code{#t} if @var{x} is an output port, otherwise return\n"
780 "@code{#f}. Any object satisfying this predicate also satisfies\n"
781 "@code{port?}.")
782 #define FUNC_NAME s_scm_output_port_p
783 {
784 x = SCM_COERCE_OUTPORT (x);
785 return scm_from_bool (SCM_OUTPUT_PORT_P (x));
786 }
787 #undef FUNC_NAME
788
789 SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
790 (SCM port),
791 "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
792 "open.")
793 #define FUNC_NAME s_scm_port_closed_p
794 {
795 SCM_VALIDATE_PORT (1, port);
796 return scm_from_bool (!SCM_OPPORTP (port));
797 }
798 #undef FUNC_NAME
799
800 SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
801 (SCM x),
802 "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
803 "return @code{#f}.")
804 #define FUNC_NAME s_scm_eof_object_p
805 {
806 return scm_from_bool (SCM_EOF_OBJECT_P (x));
807 }
808 #undef FUNC_NAME
809
810
811 \f
812
813 /* Closing ports. */
814
815 static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
816
817 /* scm_close_port
818 * Call the close operation on a port object.
819 * see also scm_close.
820 */
821 SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
822 (SCM port),
823 "Close the specified port object. Return @code{#t} if it\n"
824 "successfully closes a port or @code{#f} if it was already\n"
825 "closed. An exception may be raised if an error occurs, for\n"
826 "example when flushing buffered output. See also @ref{Ports and\n"
827 "File Descriptors, close}, for a procedure which can close file\n"
828 "descriptors.")
829 #define FUNC_NAME s_scm_close_port
830 {
831 scm_t_port_internal *pti;
832 int rv;
833
834 port = SCM_COERCE_OUTPORT (port);
835
836 SCM_VALIDATE_PORT (1, port);
837 if (SCM_CLOSEDP (port))
838 return SCM_BOOL_F;
839
840 pti = SCM_PORT_GET_INTERNAL (port);
841 SCM_CLR_PORT_OPEN_FLAG (port);
842
843 if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH)
844 scm_weak_set_remove_x (scm_i_port_weak_set, port);
845
846 if (SCM_PORT_DESCRIPTOR (port)->close)
847 /* Note! This may throw an exception. Anything after this point
848 should be resilient to non-local exits. */
849 rv = SCM_PORT_DESCRIPTOR (port)->close (port);
850 else
851 rv = 0;
852
853 if (pti->iconv_descriptors)
854 {
855 /* If we don't get here, the iconv_descriptors finalizer will
856 clean up. */
857 close_iconv_descriptors (pti->iconv_descriptors);
858 pti->iconv_descriptors = NULL;
859 }
860
861 return scm_from_bool (rv >= 0);
862 }
863 #undef FUNC_NAME
864
865 SCM_DEFINE (scm_close_input_port, "close-input-port", 1, 0, 0,
866 (SCM port),
867 "Close the specified input port object. The routine has no effect if\n"
868 "the file has already been closed. An exception may be raised if an\n"
869 "error occurs. The value returned is unspecified.\n\n"
870 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
871 "which can close file descriptors.")
872 #define FUNC_NAME s_scm_close_input_port
873 {
874 SCM_VALIDATE_INPUT_PORT (1, port);
875 scm_close_port (port);
876 return SCM_UNSPECIFIED;
877 }
878 #undef FUNC_NAME
879
880 SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0,
881 (SCM port),
882 "Close the specified output port object. The routine has no effect if\n"
883 "the file has already been closed. An exception may be raised if an\n"
884 "error occurs. The value returned is unspecified.\n\n"
885 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
886 "which can close file descriptors.")
887 #define FUNC_NAME s_scm_close_output_port
888 {
889 port = SCM_COERCE_OUTPORT (port);
890 SCM_VALIDATE_OUTPUT_PORT (1, port);
891 scm_close_port (port);
892 return SCM_UNSPECIFIED;
893 }
894 #undef FUNC_NAME
895
896
897 \f
898
899 /* Encoding characters to byte streams, and decoding byte streams to
900 characters. */
901
902 /* A fluid specifying the default encoding for newly created ports. If it is
903 a string, that is the encoding. If it is #f, it is in the "native"
904 (Latin-1) encoding. */
905 SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
906
907 static int scm_port_encoding_init = 0;
908
909 /* Use ENCODING as the default encoding for future ports. */
910 void
911 scm_i_set_default_port_encoding (const char *encoding)
912 {
913 if (!scm_port_encoding_init
914 || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
915 scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
916 SCM_EOL);
917
918 if (encoding_matches (encoding, "ISO-8859-1"))
919 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
920 else
921 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
922 scm_from_latin1_string (canonicalize_encoding (encoding)));
923 }
924
925 /* Return the name of the default encoding for newly created ports. */
926 const char *
927 scm_i_default_port_encoding (void)
928 {
929 if (!scm_port_encoding_init)
930 return "ISO-8859-1";
931 else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
932 return "ISO-8859-1";
933 else
934 {
935 SCM encoding;
936
937 encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
938 if (!scm_is_string (encoding))
939 return "ISO-8859-1";
940 else
941 return scm_i_string_chars (encoding);
942 }
943 }
944
945 /* A fluid specifying the default conversion handler for newly created
946 ports. Its value should be one of the symbols below. */
947 SCM_VARIABLE (default_conversion_strategy_var,
948 "%default-port-conversion-strategy");
949
950 /* Whether the above fluid is initialized. */
951 static int scm_conversion_strategy_init = 0;
952
953 /* The possible conversion strategies. */
954 SCM_SYMBOL (sym_error, "error");
955 SCM_SYMBOL (sym_substitute, "substitute");
956 SCM_SYMBOL (sym_escape, "escape");
957
958 /* Return the default failed encoding conversion policy for new created
959 ports. */
960 scm_t_string_failed_conversion_handler
961 scm_i_default_port_conversion_handler (void)
962 {
963 scm_t_string_failed_conversion_handler handler;
964
965 if (!scm_conversion_strategy_init
966 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
967 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
968 else
969 {
970 SCM fluid, value;
971
972 fluid = SCM_VARIABLE_REF (default_conversion_strategy_var);
973 value = scm_fluid_ref (fluid);
974
975 if (scm_is_eq (sym_substitute, value))
976 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
977 else if (scm_is_eq (sym_escape, value))
978 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
979 else
980 /* Default to 'error also when the fluid's value is not one of
981 the valid symbols. */
982 handler = SCM_FAILED_CONVERSION_ERROR;
983 }
984
985 return handler;
986 }
987
988 /* Use HANDLER as the default conversion strategy for future ports. */
989 void
990 scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler
991 handler)
992 {
993 SCM strategy;
994
995 if (!scm_conversion_strategy_init
996 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
997 scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
998 SCM_EOL);
999
1000 switch (handler)
1001 {
1002 case SCM_FAILED_CONVERSION_ERROR:
1003 strategy = sym_error;
1004 break;
1005
1006 case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
1007 strategy = sym_escape;
1008 break;
1009
1010 case SCM_FAILED_CONVERSION_QUESTION_MARK:
1011 strategy = sym_substitute;
1012 break;
1013
1014 default:
1015 abort ();
1016 }
1017
1018 scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var),
1019 strategy);
1020 }
1021
1022 static void
1023 scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port);
1024
1025 /* If the next LEN bytes from PORT are equal to those in BYTES, then
1026 return 1, else return 0. Leave the port position unchanged. */
1027 static int
1028 looking_at_bytes (SCM port, const unsigned char *bytes, int len)
1029 {
1030 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1031 int i = 0;
1032
1033 while (i < len && scm_peek_byte_or_eof_unlocked (port) == bytes[i])
1034 {
1035 pt->read_pos++;
1036 i++;
1037 }
1038 scm_i_unget_bytes_unlocked (bytes, i, port);
1039 return (i == len);
1040 }
1041
1042 static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF};
1043 static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
1044 static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
1045 static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
1046 static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
1047
1048 /* Decide what byte order to use for a UTF-16 port. Return "UTF-16BE"
1049 or "UTF-16LE". MODE must be either SCM_PORT_READ or SCM_PORT_WRITE,
1050 and specifies which operation is about to be done. The MODE
1051 determines how we will decide the byte order. We deliberately avoid
1052 reading from the port unless the user is about to do so. If the user
1053 is about to read, then we look for a BOM, and if present, we use it
1054 to determine the byte order. Otherwise we choose big endian, as
1055 recommended by the Unicode Standard. Note that the BOM (if any) is
1056 not consumed here. */
1057 static const char *
1058 decide_utf16_encoding (SCM port, scm_t_port_rw_active mode)
1059 {
1060 if (mode == SCM_PORT_READ
1061 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1062 && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom))
1063 return "UTF-16LE";
1064 else
1065 return "UTF-16BE";
1066 }
1067
1068 /* Decide what byte order to use for a UTF-32 port. Return "UTF-32BE"
1069 or "UTF-32LE". See the comment above 'decide_utf16_encoding' for
1070 details. */
1071 static const char *
1072 decide_utf32_encoding (SCM port, scm_t_port_rw_active mode)
1073 {
1074 if (mode == SCM_PORT_READ
1075 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1076 && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom))
1077 return "UTF-32LE";
1078 else
1079 return "UTF-32BE";
1080 }
1081
1082 static void
1083 finalize_iconv_descriptors (void *ptr, void *data)
1084 {
1085 close_iconv_descriptors (ptr);
1086 }
1087
1088 static scm_t_iconv_descriptors *
1089 open_iconv_descriptors (const char *encoding, int reading, int writing)
1090 {
1091 scm_t_iconv_descriptors *id;
1092 iconv_t input_cd, output_cd;
1093 size_t i;
1094
1095 input_cd = (iconv_t) -1;
1096 output_cd = (iconv_t) -1;
1097
1098 for (i = 0; encoding[i]; i++)
1099 if (encoding[i] > 127)
1100 goto invalid_encoding;
1101
1102 if (reading)
1103 {
1104 /* Open an input iconv conversion descriptor, from ENCODING
1105 to UTF-8. We choose UTF-8, not UTF-32, because iconv
1106 implementations can typically convert from anything to
1107 UTF-8, but not to UTF-32 (see
1108 <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
1109
1110 /* Assume opening an iconv descriptor causes about 16 KB of
1111 allocation. */
1112 scm_gc_register_allocation (16 * 1024);
1113
1114 input_cd = iconv_open ("UTF-8", encoding);
1115 if (input_cd == (iconv_t) -1)
1116 goto invalid_encoding;
1117 }
1118
1119 if (writing)
1120 {
1121 /* Assume opening an iconv descriptor causes about 16 KB of
1122 allocation. */
1123 scm_gc_register_allocation (16 * 1024);
1124
1125 output_cd = iconv_open (encoding, "UTF-8");
1126 if (output_cd == (iconv_t) -1)
1127 {
1128 if (input_cd != (iconv_t) -1)
1129 iconv_close (input_cd);
1130 goto invalid_encoding;
1131 }
1132 }
1133
1134 id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
1135 id->input_cd = input_cd;
1136 id->output_cd = output_cd;
1137
1138 /* Register a finalizer to close the descriptors. */
1139 scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
1140
1141 return id;
1142
1143 invalid_encoding:
1144 {
1145 SCM err;
1146 err = scm_from_latin1_string (encoding);
1147 scm_misc_error ("open_iconv_descriptors",
1148 "invalid or unknown character encoding ~s",
1149 scm_list_1 (err));
1150 }
1151 }
1152
1153 static void
1154 close_iconv_descriptors (scm_t_iconv_descriptors *id)
1155 {
1156 if (id->input_cd != (iconv_t) -1)
1157 iconv_close (id->input_cd);
1158 if (id->output_cd != (iconv_t) -1)
1159 iconv_close (id->output_cd);
1160 id->input_cd = (void *) -1;
1161 id->output_cd = (void *) -1;
1162 }
1163
1164 scm_t_iconv_descriptors *
1165 scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode)
1166 {
1167 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
1168
1169 assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
1170
1171 if (!pti->iconv_descriptors)
1172 {
1173 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1174 const char *precise_encoding;
1175
1176 if (!pt->encoding)
1177 pt->encoding = "ISO-8859-1";
1178
1179 /* If the specified encoding is UTF-16 or UTF-32, then make
1180 that more precise by deciding what byte order to use. */
1181 if (strcmp (pt->encoding, "UTF-16") == 0)
1182 precise_encoding = decide_utf16_encoding (port, mode);
1183 else if (strcmp (pt->encoding, "UTF-32") == 0)
1184 precise_encoding = decide_utf32_encoding (port, mode);
1185 else
1186 precise_encoding = pt->encoding;
1187
1188 pti->iconv_descriptors =
1189 open_iconv_descriptors (precise_encoding,
1190 SCM_INPUT_PORT_P (port),
1191 SCM_OUTPUT_PORT_P (port));
1192 }
1193
1194 return pti->iconv_descriptors;
1195 }
1196
1197 /* The name of the encoding is itself encoded in ASCII. */
1198 void
1199 scm_i_set_port_encoding_x (SCM port, const char *encoding)
1200 {
1201 scm_t_port *pt;
1202 scm_t_port_internal *pti;
1203 scm_t_iconv_descriptors *prev;
1204
1205 /* Set the character encoding for this port. */
1206 pt = SCM_PTAB_ENTRY (port);
1207 pti = SCM_PORT_GET_INTERNAL (port);
1208 prev = pti->iconv_descriptors;
1209
1210 /* In order to handle cases where the encoding changes mid-stream
1211 (e.g. within an HTTP stream, or within a file that is composed of
1212 segments with different encodings), we consider this to be "stream
1213 start" for purposes of BOM handling, regardless of our actual file
1214 position. */
1215 pti->at_stream_start_for_bom_read = 1;
1216 pti->at_stream_start_for_bom_write = 1;
1217
1218 if (encoding_matches (encoding, "UTF-8"))
1219 {
1220 pt->encoding = "UTF-8";
1221 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
1222 }
1223 else if (encoding_matches (encoding, "ISO-8859-1"))
1224 {
1225 pt->encoding = "ISO-8859-1";
1226 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
1227 }
1228 else
1229 {
1230 pt->encoding = canonicalize_encoding (encoding);
1231 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
1232 }
1233
1234 pti->iconv_descriptors = NULL;
1235 if (prev)
1236 close_iconv_descriptors (prev);
1237 }
1238
1239 SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
1240 (SCM port),
1241 "Returns, as a string, the character encoding that @var{port}\n"
1242 "uses to interpret its input and output.\n")
1243 #define FUNC_NAME s_scm_port_encoding
1244 {
1245 SCM_VALIDATE_PORT (1, port);
1246
1247 return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding);
1248 }
1249 #undef FUNC_NAME
1250
1251 SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
1252 (SCM port, SCM enc),
1253 "Sets the character encoding that will be used to interpret all\n"
1254 "port I/O. New ports are created with the encoding\n"
1255 "appropriate for the current locale if @code{setlocale} has \n"
1256 "been called or ISO-8859-1 otherwise\n"
1257 "and this procedure can be used to modify that encoding.\n")
1258 #define FUNC_NAME s_scm_set_port_encoding_x
1259 {
1260 char *enc_str;
1261
1262 SCM_VALIDATE_PORT (1, port);
1263 SCM_VALIDATE_STRING (2, enc);
1264
1265 enc_str = scm_to_latin1_string (enc);
1266 scm_i_set_port_encoding_x (port, enc_str);
1267 free (enc_str);
1268
1269 return SCM_UNSPECIFIED;
1270 }
1271 #undef FUNC_NAME
1272
1273 SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
1274 1, 0, 0, (SCM port),
1275 "Returns the behavior of the port when handling a character that\n"
1276 "is not representable in the port's current encoding.\n"
1277 "It returns the symbol @code{error} if unrepresentable characters\n"
1278 "should cause exceptions, @code{substitute} if the port should\n"
1279 "try to replace unrepresentable characters with question marks or\n"
1280 "approximate characters, or @code{escape} if unrepresentable\n"
1281 "characters should be converted to string escapes.\n"
1282 "\n"
1283 "If @var{port} is @code{#f}, then the current default behavior\n"
1284 "will be returned. New ports will have this default behavior\n"
1285 "when they are created.\n")
1286 #define FUNC_NAME s_scm_port_conversion_strategy
1287 {
1288 scm_t_string_failed_conversion_handler h;
1289
1290 SCM_VALIDATE_OPPORT (1, port);
1291
1292 if (scm_is_false (port))
1293 h = scm_i_default_port_conversion_handler ();
1294 else
1295 {
1296 scm_t_port *pt;
1297
1298 SCM_VALIDATE_OPPORT (1, port);
1299 pt = SCM_PTAB_ENTRY (port);
1300
1301 h = pt->ilseq_handler;
1302 }
1303
1304 if (h == SCM_FAILED_CONVERSION_ERROR)
1305 return scm_from_latin1_symbol ("error");
1306 else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
1307 return scm_from_latin1_symbol ("substitute");
1308 else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1309 return scm_from_latin1_symbol ("escape");
1310 else
1311 abort ();
1312
1313 /* Never gets here. */
1314 return SCM_UNDEFINED;
1315 }
1316 #undef FUNC_NAME
1317
1318 SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
1319 2, 0, 0,
1320 (SCM port, SCM sym),
1321 "Sets the behavior of the interpreter when outputting a character\n"
1322 "that is not representable in the port's current encoding.\n"
1323 "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
1324 "@code{'escape}. If it is @code{'error}, an error will be thrown\n"
1325 "when an unconvertible character is encountered. If it is\n"
1326 "@code{'substitute}, then unconvertible characters will \n"
1327 "be replaced with approximate characters, or with question marks\n"
1328 "if no approximately correct character is available.\n"
1329 "If it is @code{'escape},\n"
1330 "it will appear as a hex escape when output.\n"
1331 "\n"
1332 "If @var{port} is an open port, the conversion error behavior\n"
1333 "is set for that port. If it is @code{#f}, it is set as the\n"
1334 "default behavior for any future ports that get created in\n"
1335 "this thread.\n")
1336 #define FUNC_NAME s_scm_set_port_conversion_strategy_x
1337 {
1338 scm_t_string_failed_conversion_handler handler;
1339
1340 if (scm_is_eq (sym, sym_error))
1341 handler = SCM_FAILED_CONVERSION_ERROR;
1342 else if (scm_is_eq (sym, sym_substitute))
1343 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
1344 else if (scm_is_eq (sym, sym_escape))
1345 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
1346 else
1347 SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
1348
1349 if (scm_is_false (port))
1350 scm_i_set_default_port_conversion_handler (handler);
1351 else
1352 {
1353 SCM_VALIDATE_OPPORT (1, port);
1354 SCM_PTAB_ENTRY (port)->ilseq_handler = handler;
1355 }
1356
1357 return SCM_UNSPECIFIED;
1358 }
1359 #undef FUNC_NAME
1360
1361
1362 \f
1363
1364 /* The port lock. */
1365
1366 static void
1367 lock_port (void *mutex)
1368 {
1369 scm_i_pthread_mutex_lock ((scm_i_pthread_mutex_t *) mutex);
1370 }
1371
1372 static void
1373 unlock_port (void *mutex)
1374 {
1375 scm_i_pthread_mutex_unlock ((scm_i_pthread_mutex_t *) mutex);
1376 }
1377
1378 void
1379 scm_dynwind_lock_port (SCM port)
1380 #define FUNC_NAME "dynwind-lock-port"
1381 {
1382 scm_i_pthread_mutex_t *lock;
1383 SCM_VALIDATE_OPPORT (SCM_ARG1, port);
1384 scm_c_lock_port (port, &lock);
1385 if (lock)
1386 {
1387 scm_dynwind_unwind_handler (unlock_port, lock, SCM_F_WIND_EXPLICITLY);
1388 scm_dynwind_rewind_handler (lock_port, lock, 0);
1389 }
1390 }
1391 #undef FUNC_NAME
1392
1393
1394 \f
1395
1396 /* Input. */
1397
1398 int
1399 scm_get_byte_or_eof (SCM port)
1400 {
1401 scm_i_pthread_mutex_t *lock;
1402 int ret;
1403
1404 scm_c_lock_port (port, &lock);
1405 ret = scm_get_byte_or_eof_unlocked (port);
1406 if (lock)
1407 scm_i_pthread_mutex_unlock (lock);
1408
1409 return ret;
1410 }
1411
1412 int
1413 scm_peek_byte_or_eof (SCM port)
1414 {
1415 scm_i_pthread_mutex_t *lock;
1416 int ret;
1417
1418 scm_c_lock_port (port, &lock);
1419 ret = scm_peek_byte_or_eof_unlocked (port);
1420 if (lock)
1421 scm_i_pthread_mutex_unlock (lock);
1422
1423 return ret;
1424 }
1425
1426 /* scm_c_read
1427 *
1428 * Used by an application to read arbitrary number of bytes from an
1429 * SCM port. Same semantics as libc read, except that scm_c_read only
1430 * returns less than SIZE bytes if at end-of-file.
1431 *
1432 * Warning: Doesn't update port line and column counts! */
1433
1434 /* This structure, and the following swap_buffer function, are used
1435 for temporarily swapping a port's own read buffer, and the buffer
1436 that the caller of scm_c_read provides. */
1437 struct port_and_swap_buffer
1438 {
1439 scm_t_port *pt;
1440 unsigned char *buffer;
1441 size_t size;
1442 };
1443
1444 static void
1445 swap_buffer (void *data)
1446 {
1447 struct port_and_swap_buffer *psb = (struct port_and_swap_buffer *) data;
1448 unsigned char *old_buf = psb->pt->read_buf;
1449 size_t old_size = psb->pt->read_buf_size;
1450
1451 /* Make the port use (buffer, size) from the struct. */
1452 psb->pt->read_pos = psb->pt->read_buf = psb->pt->read_end = psb->buffer;
1453 psb->pt->read_buf_size = psb->size;
1454
1455 /* Save the port's old (buffer, size) in the struct. */
1456 psb->buffer = old_buf;
1457 psb->size = old_size;
1458 }
1459
1460 static int scm_i_fill_input_unlocked (SCM port);
1461
1462 size_t
1463 scm_c_read_unlocked (SCM port, void *buffer, size_t size)
1464 #define FUNC_NAME "scm_c_read"
1465 {
1466 scm_t_port *pt;
1467 scm_t_port_internal *pti;
1468 size_t n_read = 0, n_available;
1469 struct port_and_swap_buffer psb;
1470
1471 SCM_VALIDATE_OPINPORT (1, port);
1472
1473 pt = SCM_PTAB_ENTRY (port);
1474 pti = SCM_PORT_GET_INTERNAL (port);
1475 if (pt->rw_active == SCM_PORT_WRITE)
1476 SCM_PORT_DESCRIPTOR (port)->flush (port);
1477
1478 if (pt->rw_random)
1479 pt->rw_active = SCM_PORT_READ;
1480
1481 /* Take bytes first from the port's read buffer. */
1482 if (pt->read_pos < pt->read_end)
1483 {
1484 n_available = min (size, pt->read_end - pt->read_pos);
1485 memcpy (buffer, pt->read_pos, n_available);
1486 buffer = (char *) buffer + n_available;
1487 pt->read_pos += n_available;
1488 n_read += n_available;
1489 size -= n_available;
1490 }
1491
1492 /* Avoid the scm_dynwind_* costs if we now have enough data. */
1493 if (size == 0)
1494 return n_read;
1495
1496 /* Now we will call scm_i_fill_input_unlocked repeatedly until we have
1497 read the requested number of bytes. (Note that a single
1498 scm_i_fill_input_unlocked call does not guarantee to fill the whole
1499 of the port's read buffer.) */
1500 if (pt->read_buf_size <= 1
1501 && pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
1502 {
1503 /* The port that we are reading from is unbuffered - i.e. does not
1504 have its own persistent buffer - but we have a buffer, provided
1505 by our caller, that is the right size for the data that is
1506 wanted. For the following scm_i_fill_input_unlocked calls,
1507 therefore, we use the buffer in hand as the port's read buffer.
1508
1509 We need to make sure that the port's normal (1 byte) buffer is
1510 reinstated in case one of the scm_i_fill_input_unlocked ()
1511 calls throws an exception; we use the scm_dynwind_* API to
1512 achieve that.
1513
1514 A consequence of this optimization is that the fill_input
1515 functions can't unget characters. That'll push data to the
1516 pushback buffer instead of this psb buffer. */
1517 #if SCM_DEBUG == 1
1518 unsigned char *pback = pt->putback_buf;
1519 #endif
1520 psb.pt = pt;
1521 psb.buffer = buffer;
1522 psb.size = size;
1523 scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
1524 scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1525 scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1526
1527 /* Call scm_i_fill_input_unlocked until we have all the bytes that
1528 we need, or we hit EOF. */
1529 while (pt->read_buf_size && (scm_i_fill_input_unlocked (port) != EOF))
1530 {
1531 pt->read_buf_size -= (pt->read_end - pt->read_pos);
1532 pt->read_pos = pt->read_buf = pt->read_end;
1533 }
1534 #if SCM_DEBUG == 1
1535 if (pback != pt->putback_buf
1536 || pt->read_buf - (unsigned char *) buffer < 0)
1537 scm_misc_error (FUNC_NAME,
1538 "scm_c_read must not call a fill function that pushes "
1539 "back characters onto an unbuffered port", SCM_EOL);
1540 #endif
1541 n_read += pt->read_buf - (unsigned char *) buffer;
1542
1543 /* Reinstate the port's normal buffer. */
1544 scm_dynwind_end ();
1545 }
1546 else
1547 {
1548 /* The port has its own buffer. It is important that we use it,
1549 even if it happens to be smaller than our caller's buffer, so
1550 that a custom port implementation's entry points (in
1551 particular, fill_input) can rely on the buffer always being
1552 the same as they first set up. */
1553 while (size && (scm_i_fill_input_unlocked (port) != EOF))
1554 {
1555 n_available = min (size, pt->read_end - pt->read_pos);
1556 memcpy (buffer, pt->read_pos, n_available);
1557 buffer = (char *) buffer + n_available;
1558 pt->read_pos += n_available;
1559 n_read += n_available;
1560 size -= n_available;
1561 }
1562 }
1563
1564 return n_read;
1565 }
1566 #undef FUNC_NAME
1567
1568 size_t
1569 scm_c_read (SCM port, void *buffer, size_t size)
1570 {
1571 scm_i_pthread_mutex_t *lock;
1572 size_t ret;
1573
1574 scm_c_lock_port (port, &lock);
1575 ret = scm_c_read_unlocked (port, buffer, size);
1576 if (lock)
1577 scm_i_pthread_mutex_unlock (lock);
1578
1579
1580 return ret;
1581 }
1582
1583 /* Update the line and column number of PORT after consumption of C. */
1584 static inline void
1585 update_port_lf (scm_t_wchar c, SCM port)
1586 {
1587 switch (c)
1588 {
1589 case '\a':
1590 case EOF:
1591 break;
1592 case '\b':
1593 SCM_DECCOL (port);
1594 break;
1595 case '\n':
1596 SCM_INCLINE (port);
1597 break;
1598 case '\r':
1599 SCM_ZEROCOL (port);
1600 break;
1601 case '\t':
1602 SCM_TABCOL (port);
1603 break;
1604 default:
1605 SCM_INCCOL (port);
1606 break;
1607 }
1608 }
1609
1610 #define SCM_MBCHAR_BUF_SIZE (4)
1611
1612 /* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
1613 UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
1614 static scm_t_wchar
1615 utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
1616 {
1617 scm_t_wchar codepoint;
1618
1619 if (utf8_buf[0] <= 0x7f)
1620 {
1621 assert (size == 1);
1622 codepoint = utf8_buf[0];
1623 }
1624 else if ((utf8_buf[0] & 0xe0) == 0xc0)
1625 {
1626 assert (size == 2);
1627 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
1628 | (utf8_buf[1] & 0x3f);
1629 }
1630 else if ((utf8_buf[0] & 0xf0) == 0xe0)
1631 {
1632 assert (size == 3);
1633 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
1634 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
1635 | (utf8_buf[2] & 0x3f);
1636 }
1637 else
1638 {
1639 assert (size == 4);
1640 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
1641 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
1642 | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
1643 | (utf8_buf[3] & 0x3f);
1644 }
1645
1646 return codepoint;
1647 }
1648
1649 /* Read a UTF-8 sequence from PORT. On success, return 0 and set
1650 *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
1651 representation, and set *LEN to the length in bytes. Return
1652 `EILSEQ' on error. */
1653 static int
1654 get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
1655 scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1656 {
1657 #define ASSERT_NOT_EOF(b) \
1658 if (SCM_UNLIKELY ((b) == EOF)) \
1659 goto invalid_seq
1660 #define CONSUME_PEEKED_BYTE() \
1661 pt->read_pos++
1662
1663 int byte;
1664 scm_t_port *pt;
1665
1666 *len = 0;
1667 pt = SCM_PTAB_ENTRY (port);
1668
1669 byte = scm_get_byte_or_eof_unlocked (port);
1670 if (byte == EOF)
1671 {
1672 *codepoint = EOF;
1673 return 0;
1674 }
1675
1676 buf[0] = (scm_t_uint8) byte;
1677 *len = 1;
1678
1679 if (buf[0] <= 0x7f)
1680 /* 1-byte form. */
1681 *codepoint = buf[0];
1682 else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
1683 {
1684 /* 2-byte form. */
1685 byte = scm_peek_byte_or_eof_unlocked (port);
1686 ASSERT_NOT_EOF (byte);
1687
1688 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1689 goto invalid_seq;
1690
1691 CONSUME_PEEKED_BYTE ();
1692 buf[1] = (scm_t_uint8) byte;
1693 *len = 2;
1694
1695 *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
1696 | (buf[1] & 0x3f);
1697 }
1698 else if ((buf[0] & 0xf0) == 0xe0)
1699 {
1700 /* 3-byte form. */
1701 byte = scm_peek_byte_or_eof_unlocked (port);
1702 ASSERT_NOT_EOF (byte);
1703
1704 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
1705 || (buf[0] == 0xe0 && byte < 0xa0)
1706 || (buf[0] == 0xed && byte > 0x9f)))
1707 goto invalid_seq;
1708
1709 CONSUME_PEEKED_BYTE ();
1710 buf[1] = (scm_t_uint8) byte;
1711 *len = 2;
1712
1713 byte = scm_peek_byte_or_eof_unlocked (port);
1714 ASSERT_NOT_EOF (byte);
1715
1716 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1717 goto invalid_seq;
1718
1719 CONSUME_PEEKED_BYTE ();
1720 buf[2] = (scm_t_uint8) byte;
1721 *len = 3;
1722
1723 *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
1724 | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
1725 | (buf[2] & 0x3f);
1726 }
1727 else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
1728 {
1729 /* 4-byte form. */
1730 byte = scm_peek_byte_or_eof_unlocked (port);
1731 ASSERT_NOT_EOF (byte);
1732
1733 if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
1734 || (buf[0] == 0xf0 && byte < 0x90)
1735 || (buf[0] == 0xf4 && byte > 0x8f)))
1736 goto invalid_seq;
1737
1738 CONSUME_PEEKED_BYTE ();
1739 buf[1] = (scm_t_uint8) byte;
1740 *len = 2;
1741
1742 byte = scm_peek_byte_or_eof_unlocked (port);
1743 ASSERT_NOT_EOF (byte);
1744
1745 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1746 goto invalid_seq;
1747
1748 CONSUME_PEEKED_BYTE ();
1749 buf[2] = (scm_t_uint8) byte;
1750 *len = 3;
1751
1752 byte = scm_peek_byte_or_eof_unlocked (port);
1753 ASSERT_NOT_EOF (byte);
1754
1755 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1756 goto invalid_seq;
1757
1758 CONSUME_PEEKED_BYTE ();
1759 buf[3] = (scm_t_uint8) byte;
1760 *len = 4;
1761
1762 *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
1763 | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
1764 | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
1765 | (buf[3] & 0x3f);
1766 }
1767 else
1768 goto invalid_seq;
1769
1770 return 0;
1771
1772 invalid_seq:
1773 /* Here we could choose the consume the faulty byte when it's not a
1774 valid starting byte, but it's not a requirement. What Section 3.9
1775 of Unicode 6.0.0 mandates, though, is to not consume a byte that
1776 would otherwise be a valid starting byte. */
1777
1778 return EILSEQ;
1779
1780 #undef CONSUME_PEEKED_BYTE
1781 #undef ASSERT_NOT_EOF
1782 }
1783
1784 /* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
1785 0 and set *CODEPOINT to the codepoint that was read, fill BUF with
1786 its UTF-8 representation, and set *LEN to the length in bytes.
1787 Return `EILSEQ' on error. */
1788 static int
1789 get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
1790 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1791 {
1792 *codepoint = scm_get_byte_or_eof_unlocked (port);
1793
1794 if (*codepoint == EOF)
1795 *len = 0;
1796 else
1797 {
1798 *len = 1;
1799 buf[0] = *codepoint;
1800 }
1801 return 0;
1802 }
1803
1804 /* Likewise, read a byte sequence from PORT, passing it through its
1805 input conversion descriptor. */
1806 static int
1807 get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
1808 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1809 {
1810 scm_t_iconv_descriptors *id;
1811 scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
1812 size_t input_size = 0;
1813
1814 id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ);
1815
1816 for (;;)
1817 {
1818 int byte_read;
1819 char *input, *output;
1820 size_t input_left, output_left, done;
1821
1822 byte_read = scm_get_byte_or_eof_unlocked (port);
1823 if (SCM_UNLIKELY (byte_read == EOF))
1824 {
1825 if (SCM_LIKELY (input_size == 0))
1826 {
1827 *codepoint = (scm_t_wchar) EOF;
1828 *len = input_size;
1829 return 0;
1830 }
1831 else
1832 {
1833 /* EOF found in the middle of a multibyte character. */
1834 scm_i_set_pending_eof (port);
1835 return EILSEQ;
1836 }
1837 }
1838
1839 buf[input_size++] = byte_read;
1840
1841 input = buf;
1842 input_left = input_size;
1843 output = (char *) utf8_buf;
1844 output_left = sizeof (utf8_buf);
1845
1846 done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
1847
1848 if (done == (size_t) -1)
1849 {
1850 int err = errno;
1851 if (SCM_LIKELY (err == EINVAL))
1852 /* The input byte sequence did not form a complete
1853 character. Read another byte and try again. */
1854 continue;
1855 else
1856 return err;
1857 }
1858 else
1859 {
1860 size_t output_size = sizeof (utf8_buf) - output_left;
1861 if (SCM_LIKELY (output_size > 0))
1862 {
1863 /* iconv generated output. Convert the UTF8_BUF sequence
1864 to a Unicode code point. */
1865 *codepoint = utf8_to_codepoint (utf8_buf, output_size);
1866 *len = input_size;
1867 return 0;
1868 }
1869 else
1870 {
1871 /* iconv consumed some bytes without producing any output.
1872 Most likely this means that a Unicode byte-order mark
1873 (BOM) was consumed, which should not be included in the
1874 returned buf. Shift any remaining bytes to the beginning
1875 of buf, and continue the loop. */
1876 memmove (buf, input, input_left);
1877 input_size = input_left;
1878 continue;
1879 }
1880 }
1881 }
1882 }
1883
1884 /* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
1885 with the byte representation of the codepoint in PORT's encoding, and
1886 set *LEN to the length in bytes of that representation. Return 0 on
1887 success and an errno value on error. */
1888 static SCM_C_INLINE int
1889 get_codepoint (SCM port, scm_t_wchar *codepoint,
1890 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1891 {
1892 int err;
1893 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1894 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
1895
1896 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
1897 err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
1898 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
1899 err = get_latin1_codepoint (port, codepoint, buf, len);
1900 else
1901 err = get_iconv_codepoint (port, codepoint, buf, len);
1902
1903 if (SCM_LIKELY (err == 0))
1904 {
1905 if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
1906 {
1907 /* Record that we're no longer at stream start. */
1908 pti->at_stream_start_for_bom_read = 0;
1909 if (pt->rw_random)
1910 pti->at_stream_start_for_bom_write = 0;
1911
1912 /* If we just read a BOM in an encoding that recognizes them,
1913 then silently consume it and read another code point. */
1914 if (SCM_UNLIKELY
1915 (*codepoint == SCM_UNICODE_BOM
1916 && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
1917 || strcmp (pt->encoding, "UTF-16") == 0
1918 || strcmp (pt->encoding, "UTF-32") == 0)))
1919 return get_codepoint (port, codepoint, buf, len);
1920 }
1921 update_port_lf (*codepoint, port);
1922 }
1923 else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
1924 {
1925 *codepoint = '?';
1926 err = 0;
1927 update_port_lf (*codepoint, port);
1928 }
1929
1930 return err;
1931 }
1932
1933 /* Read a codepoint from PORT and return it. */
1934 scm_t_wchar
1935 scm_getc_unlocked (SCM port)
1936 #define FUNC_NAME "scm_getc"
1937 {
1938 int err;
1939 size_t len;
1940 scm_t_wchar codepoint;
1941 char buf[SCM_MBCHAR_BUF_SIZE];
1942
1943 err = get_codepoint (port, &codepoint, buf, &len);
1944 if (SCM_UNLIKELY (err != 0))
1945 /* At this point PORT should point past the invalid encoding, as per
1946 R6RS-lib Section 8.2.4. */
1947 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
1948
1949 return codepoint;
1950 }
1951 #undef FUNC_NAME
1952
1953 scm_t_wchar
1954 scm_getc (SCM port)
1955 {
1956 scm_i_pthread_mutex_t *lock;
1957 scm_t_wchar ret;
1958
1959 scm_c_lock_port (port, &lock);
1960 ret = scm_getc_unlocked (port);
1961 if (lock)
1962 scm_i_pthread_mutex_unlock (lock);
1963
1964
1965 return ret;
1966 }
1967
1968 SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
1969 (SCM port),
1970 "Return the next character available from @var{port}, updating\n"
1971 "@var{port} to point to the following character. If no more\n"
1972 "characters are available, the end-of-file object is returned.\n"
1973 "\n"
1974 "When @var{port}'s data cannot be decoded according to its\n"
1975 "character encoding, a @code{decoding-error} is raised and\n"
1976 "@var{port} points past the erroneous byte sequence.\n")
1977 #define FUNC_NAME s_scm_read_char
1978 {
1979 scm_t_wchar c;
1980 if (SCM_UNBNDP (port))
1981 port = scm_current_input_port ();
1982 SCM_VALIDATE_OPINPORT (1, port);
1983 c = scm_getc_unlocked (port);
1984 if (EOF == c)
1985 return SCM_EOF_VAL;
1986 return SCM_MAKE_CHAR (c);
1987 }
1988 #undef FUNC_NAME
1989
1990
1991 \f
1992
1993 /* Pushback. */
1994 \f
1995
1996
1997 static void
1998 scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
1999 #define FUNC_NAME "scm_unget_bytes"
2000 {
2001 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2002 size_t old_len, new_len;
2003
2004 scm_i_clear_pending_eof (port);
2005
2006 if (pt->read_buf != pt->putback_buf)
2007 /* switch to the put-back buffer. */
2008 {
2009 if (pt->putback_buf == NULL)
2010 {
2011 pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
2012 ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
2013 pt->putback_buf
2014 = (unsigned char *) scm_gc_malloc_pointerless
2015 (pt->putback_buf_size, "putback buffer");
2016 }
2017
2018 pt->saved_read_buf = pt->read_buf;
2019 pt->saved_read_pos = pt->read_pos;
2020 pt->saved_read_end = pt->read_end;
2021 pt->saved_read_buf_size = pt->read_buf_size;
2022
2023 /* Put read_pos at the end of the buffer, so that ungets will not
2024 have to shift the buffer contents each time. */
2025 pt->read_buf = pt->putback_buf;
2026 pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
2027 pt->read_buf_size = pt->putback_buf_size;
2028 }
2029
2030 old_len = pt->read_end - pt->read_pos;
2031 new_len = old_len + len;
2032
2033 if (new_len > pt->read_buf_size)
2034 /* The putback buffer needs to be enlarged. */
2035 {
2036 size_t new_buf_size;
2037 unsigned char *new_buf, *new_end, *new_pos;
2038
2039 new_buf_size = pt->read_buf_size * 2;
2040 if (new_buf_size < new_len)
2041 new_buf_size = new_len;
2042
2043 new_buf = (unsigned char *)
2044 scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
2045
2046 /* Put the bytes at the end of the buffer, so that future
2047 ungets won't need to shift the buffer. */
2048 new_end = new_buf + new_buf_size;
2049 new_pos = new_end - old_len;
2050 memcpy (new_pos, pt->read_pos, old_len);
2051
2052 pt->read_buf = pt->putback_buf = new_buf;
2053 pt->read_pos = new_pos;
2054 pt->read_end = new_end;
2055 pt->read_buf_size = pt->putback_buf_size = new_buf_size;
2056 }
2057 else if (pt->read_buf + len < pt->read_pos)
2058 /* If needed, shift the existing buffer contents up.
2059 This should not happen unless some external code
2060 manipulates the putback buffer pointers. */
2061 {
2062 unsigned char *new_end = pt->read_buf + pt->read_buf_size;
2063 unsigned char *new_pos = new_end - old_len;
2064
2065 memmove (new_pos, pt->read_pos, old_len);
2066 pt->read_pos = new_pos;
2067 pt->read_end = new_end;
2068 }
2069
2070 /* Move read_pos back and copy the bytes there. */
2071 pt->read_pos -= len;
2072 memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
2073
2074 if (pt->rw_active == SCM_PORT_WRITE)
2075 scm_flush (port);
2076
2077 if (pt->rw_random)
2078 pt->rw_active = SCM_PORT_READ;
2079 }
2080 #undef FUNC_NAME
2081
2082 void
2083 scm_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
2084 {
2085 scm_i_unget_bytes_unlocked (buf, len, port);
2086 }
2087
2088 void
2089 scm_unget_byte_unlocked (int c, SCM port)
2090 {
2091 unsigned char byte = c;
2092 scm_i_unget_bytes_unlocked (&byte, 1, port);
2093 }
2094
2095 void
2096 scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
2097 {
2098 scm_i_pthread_mutex_t *lock;
2099 scm_c_lock_port (port, &lock);
2100 scm_i_unget_bytes_unlocked (buf, len, port);
2101 if (lock)
2102 scm_i_pthread_mutex_unlock (lock);
2103 }
2104
2105 void
2106 scm_unget_byte (int c, SCM port)
2107 {
2108 unsigned char byte = c;
2109 scm_i_pthread_mutex_t *lock;
2110 scm_c_lock_port (port, &lock);
2111 scm_i_unget_bytes_unlocked (&byte, 1, port);
2112 if (lock)
2113 scm_i_pthread_mutex_unlock (lock);
2114 }
2115
2116 void
2117 scm_ungetc_unlocked (scm_t_wchar c, SCM port)
2118 #define FUNC_NAME "scm_ungetc"
2119 {
2120 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2121 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
2122 char *result;
2123 char result_buf[10];
2124 size_t len;
2125
2126 len = sizeof (result_buf);
2127
2128 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
2129 {
2130 if (c < 0xf0)
2131 {
2132 result_buf[0] = (char) c;
2133 result = result_buf;
2134 len = 1;
2135 }
2136 else
2137 result =
2138 (char *) u32_to_u8 ((uint32_t *) &c, 1, (uint8_t *) result_buf, &len);
2139 }
2140 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1 && c <= 0xff)
2141 {
2142 result_buf[0] = (char) c;
2143 result = result_buf;
2144 len = 1;
2145 }
2146 else
2147 result = u32_conv_to_encoding (pt->encoding,
2148 (enum iconv_ilseq_handler) pt->ilseq_handler,
2149 (uint32_t *) &c, 1, NULL,
2150 result_buf, &len);
2151
2152 if (SCM_UNLIKELY (result == NULL || len == 0))
2153 scm_encoding_error (FUNC_NAME, errno,
2154 "conversion to port encoding failed",
2155 SCM_BOOL_F, SCM_MAKE_CHAR (c));
2156
2157 scm_i_unget_bytes_unlocked ((unsigned char *) result, len, port);
2158
2159 if (SCM_UNLIKELY (result != result_buf))
2160 free (result);
2161
2162 if (c == '\n')
2163 {
2164 /* What should col be in this case?
2165 * We'll leave it at -1.
2166 */
2167 SCM_LINUM (port) -= 1;
2168 }
2169 else
2170 SCM_COL(port) -= 1;
2171 }
2172 #undef FUNC_NAME
2173
2174 void
2175 scm_ungetc (scm_t_wchar c, SCM port)
2176 {
2177 scm_i_pthread_mutex_t *lock;
2178 scm_c_lock_port (port, &lock);
2179 scm_ungetc_unlocked (c, port);
2180 if (lock)
2181 scm_i_pthread_mutex_unlock (lock);
2182
2183 }
2184
2185 void
2186 scm_ungets_unlocked (const char *s, int n, SCM port)
2187 {
2188 /* This is simple minded and inefficient, but unreading strings is
2189 * probably not a common operation, and remember that line and
2190 * column numbers have to be handled...
2191 *
2192 * Please feel free to write an optimized version!
2193 */
2194 while (n--)
2195 scm_ungetc_unlocked (s[n], port);
2196 }
2197
2198 void
2199 scm_ungets (const char *s, int n, SCM port)
2200 {
2201 scm_i_pthread_mutex_t *lock;
2202 scm_c_lock_port (port, &lock);
2203 scm_ungets_unlocked (s, n, port);
2204 if (lock)
2205 scm_i_pthread_mutex_unlock (lock);
2206
2207 }
2208
2209 SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
2210 (SCM port),
2211 "Return the next character available from @var{port},\n"
2212 "@emph{without} updating @var{port} to point to the following\n"
2213 "character. If no more characters are available, the\n"
2214 "end-of-file object is returned.\n"
2215 "\n"
2216 "The value returned by\n"
2217 "a call to @code{peek-char} is the same as the value that would\n"
2218 "have been returned by a call to @code{read-char} on the same\n"
2219 "port. The only difference is that the very next call to\n"
2220 "@code{read-char} or @code{peek-char} on that @var{port} will\n"
2221 "return the value returned by the preceding call to\n"
2222 "@code{peek-char}. In particular, a call to @code{peek-char} on\n"
2223 "an interactive port will hang waiting for input whenever a call\n"
2224 "to @code{read-char} would have hung.\n"
2225 "\n"
2226 "As for @code{read-char}, a @code{decoding-error} may be raised\n"
2227 "if such a situation occurs. However, unlike with @code{read-char},\n"
2228 "@var{port} still points at the beginning of the erroneous byte\n"
2229 "sequence when the error is raised.\n")
2230 #define FUNC_NAME s_scm_peek_char
2231 {
2232 int err;
2233 SCM result;
2234 scm_t_wchar c;
2235 char bytes[SCM_MBCHAR_BUF_SIZE];
2236 long column, line;
2237 size_t len = 0;
2238
2239 if (SCM_UNBNDP (port))
2240 port = scm_current_input_port ();
2241 SCM_VALIDATE_OPINPORT (1, port);
2242
2243 column = SCM_COL (port);
2244 line = SCM_LINUM (port);
2245
2246 err = get_codepoint (port, &c, bytes, &len);
2247
2248 scm_i_unget_bytes_unlocked ((unsigned char *) bytes, len, port);
2249
2250 SCM_COL (port) = column;
2251 SCM_LINUM (port) = line;
2252
2253 if (SCM_UNLIKELY (err != 0))
2254 {
2255 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
2256
2257 /* Shouldn't happen since `catch' always aborts to prompt. */
2258 result = SCM_BOOL_F;
2259 }
2260 else if (c == EOF)
2261 {
2262 scm_i_set_pending_eof (port);
2263 result = SCM_EOF_VAL;
2264 }
2265 else
2266 result = SCM_MAKE_CHAR (c);
2267
2268 return result;
2269 }
2270 #undef FUNC_NAME
2271
2272 SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
2273 (SCM cobj, SCM port),
2274 "Place character @var{cobj} in @var{port} so that it will be\n"
2275 "read by the next read operation. If called multiple times, the\n"
2276 "unread characters will be read again in last-in first-out\n"
2277 "order. If @var{port} is not supplied, the current input port\n"
2278 "is used.")
2279 #define FUNC_NAME s_scm_unread_char
2280 {
2281 int c;
2282
2283 SCM_VALIDATE_CHAR (1, cobj);
2284 if (SCM_UNBNDP (port))
2285 port = scm_current_input_port ();
2286 SCM_VALIDATE_OPINPORT (2, port);
2287
2288 c = SCM_CHAR (cobj);
2289
2290 scm_ungetc_unlocked (c, port);
2291 return cobj;
2292 }
2293 #undef FUNC_NAME
2294
2295 SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
2296 (SCM str, SCM port),
2297 "Place the string @var{str} in @var{port} so that its characters will be\n"
2298 "read in subsequent read operations. If called multiple times, the\n"
2299 "unread characters will be read again in last-in first-out order. If\n"
2300 "@var{port} is not supplied, the current-input-port is used.")
2301 #define FUNC_NAME s_scm_unread_string
2302 {
2303 int n;
2304 SCM_VALIDATE_STRING (1, str);
2305 if (SCM_UNBNDP (port))
2306 port = scm_current_input_port ();
2307 SCM_VALIDATE_OPINPORT (2, port);
2308
2309 n = scm_i_string_length (str);
2310
2311 while (n--)
2312 scm_ungetc_unlocked (scm_i_string_ref (str, n), port);
2313
2314 return str;
2315 }
2316 #undef FUNC_NAME
2317
2318
2319 \f
2320
2321 /* Manipulating the buffers. */
2322
2323 /* This routine does not take any locks, as it is usually called as part
2324 of a port implementation. */
2325 void
2326 scm_port_non_buffer (scm_t_port *pt)
2327 {
2328 pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf;
2329 pt->write_buf = pt->write_pos = &pt->shortbuf;
2330 pt->read_buf_size = pt->write_buf_size = 1;
2331 pt->write_end = pt->write_buf + pt->write_buf_size;
2332 }
2333
2334 /* this should only be called when the read buffer is empty. it
2335 tries to refill the read buffer. it returns the first char from
2336 the port, which is either EOF or *(pt->read_pos). */
2337 static int
2338 scm_i_fill_input_unlocked (SCM port)
2339 {
2340 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2341 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
2342
2343 assert (pt->read_pos == pt->read_end);
2344
2345 if (pti->pending_eof)
2346 {
2347 pti->pending_eof = 0;
2348 return EOF;
2349 }
2350
2351 if (pt->read_buf == pt->putback_buf)
2352 {
2353 /* finished reading put-back chars. */
2354 pt->read_buf = pt->saved_read_buf;
2355 pt->read_pos = pt->saved_read_pos;
2356 pt->read_end = pt->saved_read_end;
2357 pt->read_buf_size = pt->saved_read_buf_size;
2358 if (pt->read_pos < pt->read_end)
2359 return *(pt->read_pos);
2360 }
2361 return SCM_PORT_DESCRIPTOR (port)->fill_input (port);
2362 }
2363
2364 int
2365 scm_fill_input (SCM port)
2366 {
2367 scm_i_pthread_mutex_t *lock;
2368 int ret;
2369
2370 scm_c_lock_port (port, &lock);
2371 ret = scm_fill_input_unlocked (port);
2372 if (lock)
2373 scm_i_pthread_mutex_unlock (lock);
2374
2375
2376 return ret;
2377 }
2378
2379 /* Slow-path fallback for 'scm_get_byte_or_eof_unlocked' */
2380 int
2381 scm_slow_get_byte_or_eof_unlocked (SCM port)
2382 {
2383 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2384
2385 if (pt->rw_active == SCM_PORT_WRITE)
2386 scm_flush_unlocked (port);
2387
2388 if (pt->rw_random)
2389 pt->rw_active = SCM_PORT_READ;
2390
2391 if (pt->read_pos >= pt->read_end)
2392 {
2393 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2394 return EOF;
2395 }
2396
2397 return *pt->read_pos++;
2398 }
2399
2400 /* Slow-path fallback for 'scm_peek_byte_or_eof_unlocked' */
2401 int
2402 scm_slow_peek_byte_or_eof_unlocked (SCM port)
2403 {
2404 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2405
2406 if (pt->rw_active == SCM_PORT_WRITE)
2407 scm_flush_unlocked (port);
2408
2409 if (pt->rw_random)
2410 pt->rw_active = SCM_PORT_READ;
2411
2412 if (pt->read_pos >= pt->read_end)
2413 {
2414 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2415 {
2416 scm_i_set_pending_eof (port);
2417 return EOF;
2418 }
2419 }
2420
2421 return *pt->read_pos;
2422 }
2423
2424 /* Move up to READ_LEN bytes from PORT's putback and/or read buffers
2425 into memory starting at DEST. Return the number of bytes moved.
2426 PORT's line/column numbers are left unchanged. */
2427 size_t
2428 scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
2429 {
2430 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2431 size_t bytes_read = 0;
2432 size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
2433
2434 if (from_buf > 0)
2435 {
2436 memcpy (dest, pt->read_pos, from_buf);
2437 pt->read_pos += from_buf;
2438 bytes_read += from_buf;
2439 read_len -= from_buf;
2440 dest += from_buf;
2441 }
2442
2443 /* if putback was active, try the real input buffer too. */
2444 if (pt->read_buf == pt->putback_buf)
2445 {
2446 from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len);
2447 if (from_buf > 0)
2448 {
2449 memcpy (dest, pt->saved_read_pos, from_buf);
2450 pt->saved_read_pos += from_buf;
2451 bytes_read += from_buf;
2452 }
2453 }
2454
2455 return bytes_read;
2456 }
2457
2458 /* Clear a port's read buffers, returning the contents. */
2459 SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
2460 (SCM port),
2461 "This procedure clears a port's input buffers, similar\n"
2462 "to the way that force-output clears the output buffer. The\n"
2463 "contents of the buffers are returned as a single string, e.g.,\n"
2464 "\n"
2465 "@lisp\n"
2466 "(define p (open-input-file ...))\n"
2467 "(drain-input p) => empty string, nothing buffered yet.\n"
2468 "(unread-char (read-char p) p)\n"
2469 "(drain-input p) => initial chars from p, up to the buffer size.\n"
2470 "@end lisp\n\n"
2471 "Draining the buffers may be useful for cleanly finishing\n"
2472 "buffered I/O so that the file descriptor can be used directly\n"
2473 "for further input.")
2474 #define FUNC_NAME s_scm_drain_input
2475 {
2476 SCM result;
2477 char *data;
2478 scm_t_port *pt;
2479 long count;
2480
2481 SCM_VALIDATE_OPINPORT (1, port);
2482 pt = SCM_PTAB_ENTRY (port);
2483
2484 count = pt->read_end - pt->read_pos;
2485 if (pt->read_buf == pt->putback_buf)
2486 count += pt->saved_read_end - pt->saved_read_pos;
2487
2488 if (count)
2489 {
2490 result = scm_i_make_string (count, &data, 0);
2491 scm_take_from_input_buffers (port, data, count);
2492 }
2493 else
2494 result = scm_nullstr;
2495
2496 return result;
2497 }
2498 #undef FUNC_NAME
2499
2500 void
2501 scm_end_input_unlocked (SCM port)
2502 {
2503 long offset;
2504 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2505
2506 scm_i_clear_pending_eof (port);
2507 if (pt->read_buf == pt->putback_buf)
2508 {
2509 offset = pt->read_end - pt->read_pos;
2510 pt->read_buf = pt->saved_read_buf;
2511 pt->read_pos = pt->saved_read_pos;
2512 pt->read_end = pt->saved_read_end;
2513 pt->read_buf_size = pt->saved_read_buf_size;
2514 }
2515 else
2516 offset = 0;
2517
2518 SCM_PORT_DESCRIPTOR (port)->end_input (port, offset);
2519 }
2520
2521 void
2522 scm_end_input (SCM port)
2523 {
2524 scm_i_pthread_mutex_t *lock;
2525 scm_c_lock_port (port, &lock);
2526 scm_end_input_unlocked (port);
2527 if (lock)
2528 scm_i_pthread_mutex_unlock (lock);
2529
2530 }
2531
2532 SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
2533 (SCM port),
2534 "Flush the specified output port, or the current output port if @var{port}\n"
2535 "is omitted. The current output buffer contents are passed to the\n"
2536 "underlying port implementation (e.g., in the case of fports, the\n"
2537 "data will be written to the file and the output buffer will be cleared.)\n"
2538 "It has no effect on an unbuffered port.\n\n"
2539 "The return value is unspecified.")
2540 #define FUNC_NAME s_scm_force_output
2541 {
2542 if (SCM_UNBNDP (port))
2543 port = scm_current_output_port ();
2544 else
2545 {
2546 port = SCM_COERCE_OUTPORT (port);
2547 SCM_VALIDATE_OPOUTPORT (1, port);
2548 }
2549 scm_flush_unlocked (port);
2550 return SCM_UNSPECIFIED;
2551 }
2552 #undef FUNC_NAME
2553
2554 void
2555 scm_flush_unlocked (SCM port)
2556 {
2557 SCM_PORT_DESCRIPTOR (port)->flush (port);
2558 }
2559
2560 void
2561 scm_flush (SCM port)
2562 {
2563 scm_i_pthread_mutex_t *lock;
2564 scm_c_lock_port (port, &lock);
2565 scm_flush_unlocked (port);
2566 if (lock)
2567 scm_i_pthread_mutex_unlock (lock);
2568
2569 }
2570
2571 int
2572 scm_fill_input_unlocked (SCM port)
2573 {
2574 return scm_i_fill_input_unlocked (port);
2575 }
2576
2577
2578 \f
2579
2580 /* Output. */
2581
2582 void
2583 scm_putc (char c, SCM port)
2584 {
2585 scm_i_pthread_mutex_t *lock;
2586 scm_c_lock_port (port, &lock);
2587 scm_putc_unlocked (c, port);
2588 if (lock)
2589 scm_i_pthread_mutex_unlock (lock);
2590
2591 }
2592
2593 void
2594 scm_puts (const char *s, SCM port)
2595 {
2596 scm_i_pthread_mutex_t *lock;
2597 scm_c_lock_port (port, &lock);
2598 scm_puts_unlocked (s, port);
2599 if (lock)
2600 scm_i_pthread_mutex_unlock (lock);
2601
2602 }
2603
2604 /* scm_c_write
2605 *
2606 * Used by an application to write arbitrary number of bytes to an SCM
2607 * port. Similar semantics as libc write. However, unlike libc
2608 * write, scm_c_write writes the requested number of bytes and has no
2609 * return value.
2610 *
2611 * Warning: Doesn't update port line and column counts!
2612 */
2613 void
2614 scm_c_write_unlocked (SCM port, const void *ptr, size_t size)
2615 #define FUNC_NAME "scm_c_write"
2616 {
2617 scm_t_port *pt;
2618 scm_t_ptob_descriptor *ptob;
2619
2620 SCM_VALIDATE_OPOUTPORT (1, port);
2621
2622 pt = SCM_PTAB_ENTRY (port);
2623 ptob = SCM_PORT_DESCRIPTOR (port);
2624
2625 if (pt->rw_active == SCM_PORT_READ)
2626 scm_end_input_unlocked (port);
2627
2628 ptob->write (port, ptr, size);
2629
2630 if (pt->rw_random)
2631 pt->rw_active = SCM_PORT_WRITE;
2632 }
2633 #undef FUNC_NAME
2634
2635 void
2636 scm_c_write (SCM port, const void *ptr, size_t size)
2637 {
2638 scm_i_pthread_mutex_t *lock;
2639 scm_c_lock_port (port, &lock);
2640 scm_c_write_unlocked (port, ptr, size);
2641 if (lock)
2642 scm_i_pthread_mutex_unlock (lock);
2643
2644 }
2645
2646 /* scm_lfwrite
2647 *
2648 * This function differs from scm_c_write; it updates port line and
2649 * column. */
2650 void
2651 scm_lfwrite_unlocked (const char *ptr, size_t size, SCM port)
2652 {
2653 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2654 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2655
2656 if (pt->rw_active == SCM_PORT_READ)
2657 scm_end_input_unlocked (port);
2658
2659 ptob->write (port, ptr, size);
2660
2661 for (; size; ptr++, size--)
2662 update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
2663
2664 if (pt->rw_random)
2665 pt->rw_active = SCM_PORT_WRITE;
2666 }
2667
2668 void
2669 scm_lfwrite (const char *ptr, size_t size, SCM port)
2670 {
2671 scm_i_pthread_mutex_t *lock;
2672 scm_c_lock_port (port, &lock);
2673 scm_lfwrite_unlocked (ptr, size, port);
2674 if (lock)
2675 scm_i_pthread_mutex_unlock (lock);
2676
2677 }
2678
2679 /* Write STR to PORT from START inclusive to END exclusive. */
2680 void
2681 scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
2682 {
2683 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2684
2685 if (pt->rw_active == SCM_PORT_READ)
2686 scm_end_input_unlocked (port);
2687
2688 if (end == (size_t) -1)
2689 end = scm_i_string_length (str);
2690
2691 scm_i_display_substring (str, start, end, port);
2692
2693 if (pt->rw_random)
2694 pt->rw_active = SCM_PORT_WRITE;
2695 }
2696
2697
2698 \f
2699
2700 /* Querying and setting positions, and character availability. */
2701
2702 SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0,
2703 (SCM port),
2704 "Return @code{#t} if a character is ready on input @var{port}\n"
2705 "and return @code{#f} otherwise. If @code{char-ready?} returns\n"
2706 "@code{#t} then the next @code{read-char} operation on\n"
2707 "@var{port} is guaranteed not to hang. If @var{port} is a file\n"
2708 "port at end of file then @code{char-ready?} returns @code{#t}.\n"
2709 "\n"
2710 "@code{char-ready?} exists to make it possible for a\n"
2711 "program to accept characters from interactive ports without\n"
2712 "getting stuck waiting for input. Any input editors associated\n"
2713 "with such ports must make sure that characters whose existence\n"
2714 "has been asserted by @code{char-ready?} cannot be rubbed out.\n"
2715 "If @code{char-ready?} were to return @code{#f} at end of file,\n"
2716 "a port at end of file would be indistinguishable from an\n"
2717 "interactive port that has no ready characters.")
2718 #define FUNC_NAME s_scm_char_ready_p
2719 {
2720 scm_t_port *pt;
2721
2722 if (SCM_UNBNDP (port))
2723 port = scm_current_input_port ();
2724 /* It's possible to close the current input port, so validate even in
2725 this case. */
2726 SCM_VALIDATE_OPINPORT (1, port);
2727
2728 pt = SCM_PTAB_ENTRY (port);
2729
2730 /* if the current read buffer is filled, or the
2731 last pushed-back char has been read and the saved buffer is
2732 filled, result is true. */
2733 if (pt->read_pos < pt->read_end
2734 || (pt->read_buf == pt->putback_buf
2735 && pt->saved_read_pos < pt->saved_read_end))
2736 return SCM_BOOL_T;
2737 else
2738 {
2739 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2740
2741 if (ptob->input_waiting)
2742 return scm_from_bool(ptob->input_waiting (port));
2743 else
2744 return SCM_BOOL_T;
2745 }
2746 }
2747 #undef FUNC_NAME
2748
2749 SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
2750 (SCM fd_port, SCM offset, SCM whence),
2751 "Sets the current position of @var{fd_port} to the integer\n"
2752 "@var{offset}, which is interpreted according to the value of\n"
2753 "@var{whence}.\n"
2754 "\n"
2755 "One of the following variables should be supplied for\n"
2756 "@var{whence}:\n"
2757 "@defvar SEEK_SET\n"
2758 "Seek from the beginning of the file.\n"
2759 "@end defvar\n"
2760 "@defvar SEEK_CUR\n"
2761 "Seek from the current position.\n"
2762 "@end defvar\n"
2763 "@defvar SEEK_END\n"
2764 "Seek from the end of the file.\n"
2765 "@end defvar\n"
2766 "If @var{fd_port} is a file descriptor, the underlying system\n"
2767 "call is @code{lseek}. @var{port} may be a string port.\n"
2768 "\n"
2769 "The value returned is the new position in the file. This means\n"
2770 "that the current position of a port can be obtained using:\n"
2771 "@lisp\n"
2772 "(seek port 0 SEEK_CUR)\n"
2773 "@end lisp")
2774 #define FUNC_NAME s_scm_seek
2775 {
2776 int how;
2777
2778 fd_port = SCM_COERCE_OUTPORT (fd_port);
2779
2780 how = scm_to_int (whence);
2781 if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
2782 SCM_OUT_OF_RANGE (3, whence);
2783
2784 if (SCM_OPPORTP (fd_port))
2785 {
2786 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port);
2787 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port);
2788 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2789 off_t_or_off64_t rv;
2790
2791 if (!ptob->seek)
2792 SCM_MISC_ERROR ("port is not seekable",
2793 scm_cons (fd_port, SCM_EOL));
2794 else
2795 rv = ptob->seek (fd_port, off, how);
2796
2797 /* Set stream-start flags according to new position. */
2798 pti->at_stream_start_for_bom_read = (rv == 0);
2799 pti->at_stream_start_for_bom_write = (rv == 0);
2800
2801 scm_i_clear_pending_eof (fd_port);
2802
2803 return scm_from_off_t_or_off64_t (rv);
2804 }
2805 else /* file descriptor?. */
2806 {
2807 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2808 off_t_or_off64_t rv;
2809 rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
2810 if (rv == -1)
2811 SCM_SYSERROR;
2812 return scm_from_off_t_or_off64_t (rv);
2813 }
2814 }
2815 #undef FUNC_NAME
2816
2817 #ifndef O_BINARY
2818 #define O_BINARY 0
2819 #endif
2820
2821 /* Mingw has ftruncate(), perhaps implemented above using chsize, but
2822 doesn't have the filename version truncate(), hence this code. */
2823 #if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
2824 static int
2825 truncate (const char *file, off_t length)
2826 {
2827 int ret, fdes;
2828
2829 fdes = open (file, O_BINARY | O_WRONLY);
2830 if (fdes == -1)
2831 return -1;
2832
2833 ret = ftruncate (fdes, length);
2834 if (ret == -1)
2835 {
2836 int save_errno = errno;
2837 close (fdes);
2838 errno = save_errno;
2839 return -1;
2840 }
2841
2842 return close (fdes);
2843 }
2844 #endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
2845
2846 SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
2847 (SCM object, SCM length),
2848 "Truncate file @var{object} to @var{length} bytes. @var{object}\n"
2849 "can be a filename string, a port object, or an integer file\n"
2850 "descriptor.\n"
2851 "The return value is unspecified.\n"
2852 "\n"
2853 "For a port or file descriptor @var{length} can be omitted, in\n"
2854 "which case the file is truncated at the current position (per\n"
2855 "@code{ftell} above).\n"
2856 "\n"
2857 "On most systems a file can be extended by giving a length\n"
2858 "greater than the current size, but this is not mandatory in the\n"
2859 "POSIX standard.")
2860 #define FUNC_NAME s_scm_truncate_file
2861 {
2862 int rv;
2863
2864 /* "object" can be a port, fdes or filename.
2865
2866 Negative "length" makes no sense, but it's left to truncate() or
2867 ftruncate() to give back an error for that (normally EINVAL).
2868 */
2869
2870 if (SCM_UNBNDP (length))
2871 {
2872 /* must supply length if object is a filename. */
2873 if (scm_is_string (object))
2874 SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
2875
2876 length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
2877 }
2878
2879 object = SCM_COERCE_OUTPORT (object);
2880 if (scm_is_integer (object))
2881 {
2882 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2883 SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
2884 c_length));
2885 }
2886 else if (SCM_OPOUTPORTP (object))
2887 {
2888 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2889 scm_t_port *pt = SCM_PTAB_ENTRY (object);
2890 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object);
2891
2892 if (!ptob->truncate)
2893 SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
2894
2895 scm_i_clear_pending_eof (object);
2896 if (pt->rw_active == SCM_PORT_READ)
2897 scm_end_input_unlocked (object);
2898 else if (pt->rw_active == SCM_PORT_WRITE)
2899 ptob->flush (object);
2900
2901 ptob->truncate (object, c_length);
2902 rv = 0;
2903 }
2904 else
2905 {
2906 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2907 char *str = scm_to_locale_string (object);
2908 int eno;
2909 SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
2910 eno = errno;
2911 free (str);
2912 errno = eno;
2913 }
2914 if (rv == -1)
2915 SCM_SYSERROR;
2916 return SCM_UNSPECIFIED;
2917 }
2918 #undef FUNC_NAME
2919
2920 SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0,
2921 (SCM port),
2922 "Return the current line number for @var{port}.\n"
2923 "\n"
2924 "The first line of a file is 0. But you might want to add 1\n"
2925 "when printing line numbers, since starting from 1 is\n"
2926 "traditional in error messages, and likely to be more natural to\n"
2927 "non-programmers.")
2928 #define FUNC_NAME s_scm_port_line
2929 {
2930 port = SCM_COERCE_OUTPORT (port);
2931 SCM_VALIDATE_OPENPORT (1, port);
2932 return scm_from_long (SCM_LINUM (port));
2933 }
2934 #undef FUNC_NAME
2935
2936 SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0,
2937 (SCM port, SCM line),
2938 "Set the current line number for @var{port} to @var{line}. The\n"
2939 "first line of a file is 0.")
2940 #define FUNC_NAME s_scm_set_port_line_x
2941 {
2942 port = SCM_COERCE_OUTPORT (port);
2943 SCM_VALIDATE_OPENPORT (1, port);
2944 SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line);
2945 return SCM_UNSPECIFIED;
2946 }
2947 #undef FUNC_NAME
2948
2949 SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0,
2950 (SCM port),
2951 "Return the current column number of @var{port}.\n"
2952 "If the number is\n"
2953 "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n"
2954 "- i.e. the first character of the first line is line 0, column 0.\n"
2955 "(However, when you display a file position, for example in an error\n"
2956 "message, we recommend you add 1 to get 1-origin integers. This is\n"
2957 "because lines and column numbers traditionally start with 1, and that is\n"
2958 "what non-programmers will find most natural.)")
2959 #define FUNC_NAME s_scm_port_column
2960 {
2961 port = SCM_COERCE_OUTPORT (port);
2962 SCM_VALIDATE_OPENPORT (1, port);
2963 return scm_from_int (SCM_COL (port));
2964 }
2965 #undef FUNC_NAME
2966
2967 SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0,
2968 (SCM port, SCM column),
2969 "Set the current column of @var{port}. Before reading the first\n"
2970 "character on a line the column should be 0.")
2971 #define FUNC_NAME s_scm_set_port_column_x
2972 {
2973 port = SCM_COERCE_OUTPORT (port);
2974 SCM_VALIDATE_OPENPORT (1, port);
2975 SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column);
2976 return SCM_UNSPECIFIED;
2977 }
2978 #undef FUNC_NAME
2979
2980 SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
2981 (SCM port),
2982 "Return the filename associated with @var{port}, or @code{#f}\n"
2983 "if no filename is associated with the port.")
2984 #define FUNC_NAME s_scm_port_filename
2985 {
2986 port = SCM_COERCE_OUTPORT (port);
2987 SCM_VALIDATE_OPENPORT (1, port);
2988 return SCM_FILENAME (port);
2989 }
2990 #undef FUNC_NAME
2991
2992 SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
2993 (SCM port, SCM filename),
2994 "Change the filename associated with @var{port}, using the current input\n"
2995 "port if none is specified. Note that this does not change the port's\n"
2996 "source of data, but only the value that is returned by\n"
2997 "@code{port-filename} and reported in diagnostic output.")
2998 #define FUNC_NAME s_scm_set_port_filename_x
2999 {
3000 port = SCM_COERCE_OUTPORT (port);
3001 SCM_VALIDATE_OPENPORT (1, port);
3002 /* We allow the user to set the filename to whatever he likes. */
3003 SCM_SET_FILENAME (port, filename);
3004 return SCM_UNSPECIFIED;
3005 }
3006 #undef FUNC_NAME
3007
3008
3009 \f
3010
3011 /* Implementation helpers for port printing functions. */
3012
3013 void
3014 scm_print_port_mode (SCM exp, SCM port)
3015 {
3016 scm_puts_unlocked (SCM_CLOSEDP (exp)
3017 ? "closed: "
3018 : (SCM_RDNG & SCM_CELL_WORD_0 (exp)
3019 ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
3020 ? "input-output: "
3021 : "input: ")
3022 : (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
3023 ? "output: "
3024 : "bogus: ")),
3025 port);
3026 }
3027
3028 int
3029 scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED)
3030 {
3031 char *type = SCM_PTOBNAME (SCM_PTOBNUM (exp));
3032 if (!type)
3033 type = "port";
3034 scm_puts_unlocked ("#<", port);
3035 scm_print_port_mode (exp, port);
3036 scm_puts_unlocked (type, port);
3037 scm_putc_unlocked (' ', port);
3038 scm_uintprint (SCM_CELL_WORD_1 (exp), 16, port);
3039 scm_putc_unlocked ('>', port);
3040 return 1;
3041 }
3042
3043
3044 \f
3045
3046 /* Iterating over all ports. */
3047
3048 struct for_each_data
3049 {
3050 void (*proc) (void *data, SCM p);
3051 void *data;
3052 };
3053
3054 static SCM
3055 for_each_trampoline (void *data, SCM port, SCM result)
3056 {
3057 struct for_each_data *d = data;
3058
3059 d->proc (d->data, port);
3060
3061 return result;
3062 }
3063
3064 void
3065 scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
3066 {
3067 struct for_each_data d;
3068
3069 d.proc = proc;
3070 d.data = data;
3071
3072 scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL,
3073 scm_i_port_weak_set);
3074 }
3075
3076 static void
3077 scm_for_each_trampoline (void *data, SCM port)
3078 {
3079 scm_call_1 (SCM_PACK_POINTER (data), port);
3080 }
3081
3082 SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
3083 (SCM proc),
3084 "Apply @var{proc} to each port in the Guile port table\n"
3085 "in turn. The return value is unspecified. More specifically,\n"
3086 "@var{proc} is applied exactly once to every port that exists\n"
3087 "in the system at the time @code{port-for-each} is invoked.\n"
3088 "Changes to the port table while @code{port-for-each} is running\n"
3089 "have no effect as far as @code{port-for-each} is concerned.")
3090 #define FUNC_NAME s_scm_port_for_each
3091 {
3092 SCM_VALIDATE_PROC (1, proc);
3093
3094 scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc));
3095
3096 return SCM_UNSPECIFIED;
3097 }
3098 #undef FUNC_NAME
3099
3100 static void
3101 flush_output_port (void *closure, SCM port)
3102 {
3103 if (SCM_OPOUTPORTP (port))
3104 scm_flush_unlocked (port);
3105 }
3106
3107 SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0,
3108 (),
3109 "Equivalent to calling @code{force-output} on\n"
3110 "all open output ports. The return value is unspecified.")
3111 #define FUNC_NAME s_scm_flush_all_ports
3112 {
3113 scm_c_port_for_each (&flush_output_port, NULL);
3114 return SCM_UNSPECIFIED;
3115 }
3116 #undef FUNC_NAME
3117
3118
3119 \f
3120
3121 /* Void ports. */
3122
3123 scm_t_bits scm_tc16_void_port = 0;
3124
3125 static int fill_input_void_port (SCM port SCM_UNUSED)
3126 {
3127 return EOF;
3128 }
3129
3130 static void
3131 write_void_port (SCM port SCM_UNUSED,
3132 const void *data SCM_UNUSED,
3133 size_t size SCM_UNUSED)
3134 {
3135 }
3136
3137 static SCM
3138 scm_i_void_port (long mode_bits)
3139 {
3140 SCM ret;
3141
3142 ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0);
3143
3144 scm_port_non_buffer (SCM_PTAB_ENTRY (ret));
3145
3146 return ret;
3147 }
3148
3149 SCM
3150 scm_void_port (char *mode_str)
3151 {
3152 return scm_i_void_port (scm_mode_bits (mode_str));
3153 }
3154
3155 SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0,
3156 (SCM mode),
3157 "Create and return a new void port. A void port acts like\n"
3158 "@file{/dev/null}. The @var{mode} argument\n"
3159 "specifies the input/output modes for this port: see the\n"
3160 "documentation for @code{open-file} in @ref{File Ports}.")
3161 #define FUNC_NAME s_scm_sys_make_void_port
3162 {
3163 return scm_i_void_port (scm_i_mode_bits (mode));
3164 }
3165 #undef FUNC_NAME
3166
3167
3168 \f
3169
3170 /* Initialization. */
3171
3172 void
3173 scm_init_ports ()
3174 {
3175 /* lseek() symbols. */
3176 scm_c_define ("SEEK_SET", scm_from_int (SEEK_SET));
3177 scm_c_define ("SEEK_CUR", scm_from_int (SEEK_CUR));
3178 scm_c_define ("SEEK_END", scm_from_int (SEEK_END));
3179
3180 scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
3181 write_void_port);
3182
3183 cur_inport_fluid = scm_make_fluid ();
3184 cur_outport_fluid = scm_make_fluid ();
3185 cur_errport_fluid = scm_make_fluid ();
3186 cur_loadport_fluid = scm_make_fluid ();
3187
3188 scm_i_port_weak_set = scm_c_make_weak_set (31);
3189
3190 #include "libguile/ports.x"
3191
3192 /* Use Latin-1 as the default port encoding. */
3193 SCM_VARIABLE_SET (default_port_encoding_var,
3194 scm_make_fluid_with_default (SCM_BOOL_F));
3195 scm_port_encoding_init = 1;
3196
3197 SCM_VARIABLE_SET (default_conversion_strategy_var,
3198 scm_make_fluid_with_default (sym_substitute));
3199 scm_conversion_strategy_init = 1;
3200
3201 /* These bindings are used when boot-9 turns `current-input-port' et
3202 al into parameters. They are then removed from the guile module. */
3203 scm_c_define ("%current-input-port-fluid", cur_inport_fluid);
3204 scm_c_define ("%current-output-port-fluid", cur_outport_fluid);
3205 scm_c_define ("%current-error-port-fluid", cur_errport_fluid);
3206 }
3207
3208 /*
3209 Local Variables:
3210 c-file-style: "gnu"
3211 End:
3212 */