Export <slot> from GOOPS
[bpt/guile.git] / libguile / ports.c
1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2006,
2 * 2007, 2008, 2009, 2010, 2011, 2012, 2013,
3 * 2014, 2015 Free Software Foundation, Inc.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public License
7 * as published by the Free Software Foundation; either version 3 of
8 * the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301 USA
19 */
20
21
22 \f
23 /* Headers. */
24
25 #define _LARGEFILE64_SOURCE /* ask for stat64 etc */
26
27 #ifdef HAVE_CONFIG_H
28 # include <config.h>
29 #endif
30
31 #include <stdio.h>
32 #include <errno.h>
33 #include <fcntl.h> /* for chsize on mingw */
34 #include <assert.h>
35 #include <iconv.h>
36 #include <uniconv.h>
37 #include <unistr.h>
38 #include <striconveh.h>
39
40 #include <assert.h>
41
42 #include "libguile/_scm.h"
43 #include "libguile/async.h"
44 #include "libguile/deprecation.h"
45 #include "libguile/eval.h"
46 #include "libguile/fports.h" /* direct access for seek and truncate */
47 #include "libguile/goops.h"
48 #include "libguile/smob.h"
49 #include "libguile/chars.h"
50 #include "libguile/dynwind.h"
51
52 #include "libguile/keywords.h"
53 #include "libguile/hashtab.h"
54 #include "libguile/root.h"
55 #include "libguile/strings.h"
56 #include "libguile/mallocs.h"
57 #include "libguile/validate.h"
58 #include "libguile/ports.h"
59 #include "libguile/ports-internal.h"
60 #include "libguile/vectors.h"
61 #include "libguile/weak-set.h"
62 #include "libguile/fluids.h"
63 #include "libguile/eq.h"
64 #include "libguile/alist.h"
65
66 #ifdef HAVE_STRING_H
67 #include <string.h>
68 #endif
69
70 #ifdef HAVE_IO_H
71 #include <io.h>
72 #endif
73
74 #include <unistd.h>
75
76 #ifdef HAVE_SYS_IOCTL_H
77 #include <sys/ioctl.h>
78 #endif
79
80 /* Mingw (version 3.4.5, circa 2006) has ftruncate as an alias for chsize
81 already, but have this code here in case that wasn't so in past versions,
82 or perhaps to help other minimal DOS environments.
83
84 gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
85 might be possibilities if we've got other systems without ftruncate. */
86
87 #if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
88 #define ftruncate(fd, size) chsize (fd, size)
89 #undef HAVE_FTRUNCATE
90 #define HAVE_FTRUNCATE 1
91 #endif
92
93 \f
94 /* Port encodings are case-insensitive ASCII strings. */
95 static char
96 ascii_toupper (char c)
97 {
98 return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
99 }
100
101 /* It is only necessary to use this function on encodings that come from
102 the user and have not been canonicalized yet. Encodings that are set
103 on ports or in the default encoding fluid are in upper-case, and can
104 be compared with strcmp. */
105 static int
106 encoding_matches (const char *enc, const char *upper)
107 {
108 if (!enc)
109 enc = "ISO-8859-1";
110
111 while (*enc)
112 if (ascii_toupper (*enc++) != *upper++)
113 return 0;
114
115 return !*upper;
116 }
117
118 static char*
119 canonicalize_encoding (const char *enc)
120 {
121 char *ret;
122 int i;
123
124 if (!enc)
125 return "ISO-8859-1";
126
127 ret = scm_gc_strdup (enc, "port");
128
129 for (i = 0; ret[i]; i++)
130 {
131 if (ret[i] > 127)
132 /* Restrict to ASCII. */
133 scm_misc_error (NULL, "invalid character encoding ~s",
134 scm_list_1 (scm_from_latin1_string (enc)));
135 else
136 ret[i] = ascii_toupper (ret[i]);
137 }
138
139 return ret;
140 }
141
142
143 \f
144 /* The port kind table --- a dynamically resized array of port types. */
145
146
147 /* scm_ptobs scm_numptob
148 * implement a dynamically resized array of ptob records.
149 * Indexes into this table are used when generating type
150 * tags for smobjects (if you know a tag you can get an index and conversely).
151 */
152 static scm_t_ptob_descriptor **scm_ptobs = NULL;
153 static long scm_numptob = 0; /* Number of port types. */
154 static long scm_ptobs_size = 0; /* Number of slots in the port type
155 table. */
156 static scm_i_pthread_mutex_t scm_ptobs_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER;
157
158 long
159 scm_c_num_port_types (void)
160 {
161 long ret;
162
163 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
164 ret = scm_numptob;
165 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
166
167 return ret;
168 }
169
170 scm_t_ptob_descriptor*
171 scm_c_port_type_ref (long ptobnum)
172 {
173 scm_t_ptob_descriptor *ret = NULL;
174
175 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
176
177 if (0 <= ptobnum && ptobnum < scm_numptob)
178 ret = scm_ptobs[ptobnum];
179
180 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
181
182 if (!ret)
183 scm_out_of_range ("scm_c_port_type_ref", scm_from_long (ptobnum));
184
185 return ret;
186 }
187
188 long
189 scm_c_port_type_add_x (scm_t_ptob_descriptor *desc)
190 {
191 long ret = -1;
192
193 scm_i_pthread_mutex_lock (&scm_ptobs_lock);
194
195 if (scm_numptob + 1 < SCM_I_MAX_PORT_TYPE_COUNT)
196 {
197 if (scm_numptob == scm_ptobs_size)
198 {
199 unsigned long old_size = scm_ptobs_size;
200 scm_t_ptob_descriptor **old_ptobs = scm_ptobs;
201
202 /* Currently there are only 9 predefined port types, so one
203 resize will cover it. */
204 scm_ptobs_size = old_size + 10;
205
206 if (scm_ptobs_size >= SCM_I_MAX_PORT_TYPE_COUNT)
207 scm_ptobs_size = SCM_I_MAX_PORT_TYPE_COUNT;
208
209 scm_ptobs = scm_gc_malloc (sizeof (*scm_ptobs) * scm_ptobs_size,
210 "scm_ptobs");
211
212 memcpy (scm_ptobs, old_ptobs, sizeof (*scm_ptobs) * scm_numptob);
213 }
214
215 ret = scm_numptob++;
216 scm_ptobs[ret] = desc;
217 }
218
219 scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
220
221 if (ret < 0)
222 scm_out_of_range ("scm_c_port_type_add_x", scm_from_long (scm_numptob));
223
224 return ret;
225 }
226
227 /*
228 * We choose to use an interface similar to the smob interface with
229 * fill_input and write as standard fields, passed to the port
230 * type constructor, and optional fields set by setters.
231 */
232
233 static void
234 flush_port_default (SCM port SCM_UNUSED)
235 {
236 }
237
238 static void
239 end_input_default (SCM port SCM_UNUSED, int offset SCM_UNUSED)
240 {
241 }
242
243 scm_t_bits
244 scm_make_port_type (char *name,
245 int (*fill_input) (SCM port),
246 void (*write) (SCM port, const void *data, size_t size))
247 {
248 scm_t_ptob_descriptor *desc;
249 long ptobnum;
250
251 desc = scm_gc_malloc_pointerless (sizeof (*desc), "port-type");
252 memset (desc, 0, sizeof (*desc));
253
254 desc->name = name;
255 desc->print = scm_port_print;
256 desc->write = write;
257 desc->flush = flush_port_default;
258 desc->end_input = end_input_default;
259 desc->fill_input = fill_input;
260
261 ptobnum = scm_c_port_type_add_x (desc);
262
263 /* Make a class object if GOOPS is present. */
264 if (SCM_UNPACK (scm_i_port_class[0]) != 0)
265 scm_make_port_classes (ptobnum, name);
266
267 return scm_tc7_port + ptobnum * 256;
268 }
269
270 void
271 scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM))
272 {
273 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->mark = mark;
274 }
275
276 void
277 scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM))
278 {
279 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->free = free;
280 }
281
282 void
283 scm_set_port_print (scm_t_bits tc, int (*print) (SCM exp, SCM port,
284 scm_print_state *pstate))
285 {
286 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->print = print;
287 }
288
289 void
290 scm_set_port_equalp (scm_t_bits tc, SCM (*equalp) (SCM, SCM))
291 {
292 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->equalp = equalp;
293 }
294
295 void
296 scm_set_port_close (scm_t_bits tc, int (*close) (SCM))
297 {
298 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->close = close;
299 }
300
301 void
302 scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port))
303 {
304 scm_t_ptob_descriptor *ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tc));
305 ptob->flush = flush;
306 ptob->flags |= SCM_PORT_TYPE_HAS_FLUSH;
307 }
308
309 void
310 scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset))
311 {
312 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->end_input = end_input;
313 }
314
315 void
316 scm_set_port_seek (scm_t_bits tc, scm_t_off (*seek) (SCM, scm_t_off, int))
317 {
318 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->seek = seek;
319 }
320
321 void
322 scm_set_port_truncate (scm_t_bits tc, void (*truncate) (SCM, scm_t_off))
323 {
324 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->truncate = truncate;
325 }
326
327 void
328 scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM))
329 {
330 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting;
331 }
332
333 void
334 scm_set_port_setvbuf (scm_t_bits tc, void (*setvbuf) (SCM, long, long))
335 {
336 scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->setvbuf = setvbuf;
337 }
338
339 static void
340 scm_i_set_pending_eof (SCM port)
341 {
342 SCM_PORT_GET_INTERNAL (port)->pending_eof = 1;
343 }
344
345 static void
346 scm_i_clear_pending_eof (SCM port)
347 {
348 SCM_PORT_GET_INTERNAL (port)->pending_eof = 0;
349 }
350
351 SCM_DEFINE (scm_i_port_property, "%port-property", 2, 0, 0,
352 (SCM port, SCM key),
353 "Return the property of @var{port} associated with @var{key}.")
354 #define FUNC_NAME s_scm_i_port_property
355 {
356 scm_i_pthread_mutex_t *lock;
357 SCM result;
358
359 SCM_VALIDATE_OPPORT (1, port);
360 scm_c_lock_port (port, &lock);
361 result = scm_assq_ref (SCM_PORT_GET_INTERNAL (port)->alist, key);
362 if (lock)
363 scm_i_pthread_mutex_unlock (lock);
364 return result;
365 }
366 #undef FUNC_NAME
367
368 SCM_DEFINE (scm_i_set_port_property_x, "%set-port-property!", 3, 0, 0,
369 (SCM port, SCM key, SCM value),
370 "Set the property of @var{port} associated with @var{key} to @var{value}.")
371 #define FUNC_NAME s_scm_i_set_port_property_x
372 {
373 scm_i_pthread_mutex_t *lock;
374 scm_t_port_internal *pti;
375
376 SCM_VALIDATE_OPPORT (1, port);
377 scm_c_lock_port (port, &lock);
378 pti = SCM_PORT_GET_INTERNAL (port);
379 pti->alist = scm_assq_set_x (pti->alist, key, value);
380 if (lock)
381 scm_i_pthread_mutex_unlock (lock);
382 return SCM_UNSPECIFIED;
383 }
384 #undef FUNC_NAME
385
386 \f
387
388 /* Standard ports --- current input, output, error, and more(!). */
389
390 static SCM cur_inport_fluid = SCM_BOOL_F;
391 static SCM cur_outport_fluid = SCM_BOOL_F;
392 static SCM cur_errport_fluid = SCM_BOOL_F;
393 static SCM cur_warnport_fluid = SCM_BOOL_F;
394 static SCM cur_loadport_fluid = SCM_BOOL_F;
395
396 SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
397 (void),
398 "Return the current input port. This is the default port used\n"
399 "by many input procedures. Initially, @code{current-input-port}\n"
400 "returns the @dfn{standard input} in Unix and C terminology.")
401 #define FUNC_NAME s_scm_current_input_port
402 {
403 if (scm_is_true (cur_inport_fluid))
404 return scm_fluid_ref (cur_inport_fluid);
405 else
406 return SCM_BOOL_F;
407 }
408 #undef FUNC_NAME
409
410 SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
411 (void),
412 "Return the current output port. This is the default port used\n"
413 "by many output procedures. Initially,\n"
414 "@code{current-output-port} returns the @dfn{standard output} in\n"
415 "Unix and C terminology.")
416 #define FUNC_NAME s_scm_current_output_port
417 {
418 if (scm_is_true (cur_outport_fluid))
419 return scm_fluid_ref (cur_outport_fluid);
420 else
421 return SCM_BOOL_F;
422 }
423 #undef FUNC_NAME
424
425 SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
426 (void),
427 "Return the port to which errors and warnings should be sent (the\n"
428 "@dfn{standard error} in Unix and C terminology).")
429 #define FUNC_NAME s_scm_current_error_port
430 {
431 if (scm_is_true (cur_errport_fluid))
432 return scm_fluid_ref (cur_errport_fluid);
433 else
434 return SCM_BOOL_F;
435 }
436 #undef FUNC_NAME
437
438 SCM_DEFINE (scm_current_warning_port, "current-warning-port", 0, 0, 0,
439 (void),
440 "Return the port to which diagnostic warnings should be sent.")
441 #define FUNC_NAME s_scm_current_warning_port
442 {
443 if (scm_is_true (cur_warnport_fluid))
444 return scm_fluid_ref (cur_warnport_fluid);
445 else
446 return SCM_BOOL_F;
447 }
448 #undef FUNC_NAME
449
450 SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
451 (),
452 "Return the current-load-port.\n"
453 "The load port is used internally by @code{primitive-load}.")
454 #define FUNC_NAME s_scm_current_load_port
455 {
456 return scm_fluid_ref (cur_loadport_fluid);
457 }
458 #undef FUNC_NAME
459
460 SCM_DEFINE (scm_set_current_input_port, "set-current-input-port", 1, 0, 0,
461 (SCM port),
462 "@deffnx {Scheme Procedure} set-current-output-port port\n"
463 "@deffnx {Scheme Procedure} set-current-error-port port\n"
464 "Change the ports returned by @code{current-input-port},\n"
465 "@code{current-output-port} and @code{current-error-port}, respectively,\n"
466 "so that they use the supplied @var{port} for input or output.")
467 #define FUNC_NAME s_scm_set_current_input_port
468 {
469 SCM oinp = scm_fluid_ref (cur_inport_fluid);
470 SCM_VALIDATE_OPINPORT (1, port);
471 scm_fluid_set_x (cur_inport_fluid, port);
472 return oinp;
473 }
474 #undef FUNC_NAME
475
476
477 SCM_DEFINE (scm_set_current_output_port, "set-current-output-port", 1, 0, 0,
478 (SCM port),
479 "Set the current default output port to @var{port}.")
480 #define FUNC_NAME s_scm_set_current_output_port
481 {
482 SCM ooutp = scm_fluid_ref (cur_outport_fluid);
483 port = SCM_COERCE_OUTPORT (port);
484 SCM_VALIDATE_OPOUTPORT (1, port);
485 scm_fluid_set_x (cur_outport_fluid, port);
486 return ooutp;
487 }
488 #undef FUNC_NAME
489
490
491 SCM_DEFINE (scm_set_current_error_port, "set-current-error-port", 1, 0, 0,
492 (SCM port),
493 "Set the current default error port to @var{port}.")
494 #define FUNC_NAME s_scm_set_current_error_port
495 {
496 SCM oerrp = scm_fluid_ref (cur_errport_fluid);
497 port = SCM_COERCE_OUTPORT (port);
498 SCM_VALIDATE_OPOUTPORT (1, port);
499 scm_fluid_set_x (cur_errport_fluid, port);
500 return oerrp;
501 }
502 #undef FUNC_NAME
503
504
505 SCM
506 scm_set_current_warning_port (SCM port)
507 #define FUNC_NAME "set-current-warning-port"
508 {
509 SCM owarnp = scm_fluid_ref (cur_warnport_fluid);
510 port = SCM_COERCE_OUTPORT (port);
511 SCM_VALIDATE_OPOUTPORT (1, port);
512 scm_fluid_set_x (cur_warnport_fluid, port);
513 return owarnp;
514 }
515 #undef FUNC_NAME
516
517
518 void
519 scm_dynwind_current_input_port (SCM port)
520 #define FUNC_NAME NULL
521 {
522 SCM_VALIDATE_OPINPORT (1, port);
523 scm_dynwind_fluid (cur_inport_fluid, port);
524 }
525 #undef FUNC_NAME
526
527 void
528 scm_dynwind_current_output_port (SCM port)
529 #define FUNC_NAME NULL
530 {
531 port = SCM_COERCE_OUTPORT (port);
532 SCM_VALIDATE_OPOUTPORT (1, port);
533 scm_dynwind_fluid (cur_outport_fluid, port);
534 }
535 #undef FUNC_NAME
536
537 void
538 scm_dynwind_current_error_port (SCM port)
539 #define FUNC_NAME NULL
540 {
541 port = SCM_COERCE_OUTPORT (port);
542 SCM_VALIDATE_OPOUTPORT (1, port);
543 scm_dynwind_fluid (cur_errport_fluid, port);
544 }
545 #undef FUNC_NAME
546
547 void
548 scm_i_dynwind_current_load_port (SCM port)
549 {
550 scm_dynwind_fluid (cur_loadport_fluid, port);
551 }
552
553
554 \f
555
556 /* Retrieving a port's mode. */
557
558 /* Return the flags that characterize a port based on the mode
559 * string used to open a file for that port.
560 *
561 * See PORT FLAGS in scm.h
562 */
563
564 static long
565 scm_i_mode_bits_n (SCM modes)
566 {
567 return (SCM_OPN
568 | (scm_i_string_contains_char (modes, 'r')
569 || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
570 | (scm_i_string_contains_char (modes, 'w')
571 || scm_i_string_contains_char (modes, 'a')
572 || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
573 | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
574 | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
575 }
576
577 long
578 scm_mode_bits (char *modes)
579 {
580 /* Valid characters are rw+a0l. So, use latin1. */
581 return scm_i_mode_bits (scm_from_latin1_string (modes));
582 }
583
584 long
585 scm_i_mode_bits (SCM modes)
586 {
587 long bits;
588
589 if (!scm_is_string (modes))
590 scm_wrong_type_arg_msg (NULL, 0, modes, "string");
591
592 bits = scm_i_mode_bits_n (modes);
593 scm_remember_upto_here_1 (modes);
594 return bits;
595 }
596
597 /* Return the mode flags from an open port.
598 * Some modes such as "append" are only used when opening
599 * a file and are not returned here. */
600
601 SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
602 (SCM port),
603 "Return the port modes associated with the open port @var{port}.\n"
604 "These will not necessarily be identical to the modes used when\n"
605 "the port was opened, since modes such as \"append\" which are\n"
606 "used only during port creation are not retained.")
607 #define FUNC_NAME s_scm_port_mode
608 {
609 char modes[4];
610 modes[0] = '\0';
611
612 port = SCM_COERCE_OUTPORT (port);
613 SCM_VALIDATE_OPPORT (1, port);
614 if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
615 if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
616 strcpy (modes, "r+");
617 else
618 strcpy (modes, "r");
619 }
620 else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
621 strcpy (modes, "w");
622 if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
623 strcat (modes, "0");
624
625 return scm_from_latin1_string (modes);
626 }
627 #undef FUNC_NAME
628
629 \f
630
631 /* The port table --- a weak set of all ports.
632
633 We need a global registry of ports to flush them all at exit, and to
634 get all the ports matching a file descriptor. */
635 SCM scm_i_port_weak_set;
636
637
638 \f
639
640 /* Port finalization. */
641
642 struct do_free_data
643 {
644 scm_t_ptob_descriptor *ptob;
645 SCM port;
646 };
647
648 static SCM
649 do_free (void *body_data)
650 {
651 struct do_free_data *data = body_data;
652
653 /* `close' is for explicit `close-port' by user. `free' is for this
654 purpose: ports collected by the GC. */
655 data->ptob->free (data->port);
656
657 return SCM_BOOL_T;
658 }
659
660 /* Finalize the object (a port) pointed to by PTR. */
661 static void
662 finalize_port (void *ptr, void *data)
663 {
664 SCM port = SCM_PACK_POINTER (ptr);
665
666 if (!SCM_PORTP (port))
667 abort ();
668
669 if (SCM_OPENP (port))
670 {
671 struct do_free_data data;
672
673 SCM_CLR_PORT_OPEN_FLAG (port);
674
675 data.ptob = SCM_PORT_DESCRIPTOR (port);
676 data.port = port;
677
678 scm_internal_catch (SCM_BOOL_T, do_free, &data,
679 scm_handle_by_message_noexit, NULL);
680
681 scm_gc_ports_collected++;
682 }
683 }
684
685
686 \f
687
688 SCM
689 scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
690 const char *encoding,
691 scm_t_string_failed_conversion_handler handler,
692 scm_t_bits stream)
693 {
694 SCM ret;
695 scm_t_port *entry;
696 scm_t_port_internal *pti;
697 scm_t_ptob_descriptor *ptob;
698
699 entry = scm_gc_typed_calloc (scm_t_port);
700 pti = scm_gc_typed_calloc (scm_t_port_internal);
701 ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag));
702
703 ret = scm_words (tag | mode_bits, 3);
704 SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry);
705 SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob);
706
707 entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock");
708 scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive);
709
710 entry->internal = pti;
711 entry->file_name = SCM_BOOL_F;
712 entry->rw_active = SCM_PORT_NEITHER;
713 entry->port = ret;
714 entry->stream = stream;
715
716 if (encoding_matches (encoding, "UTF-8"))
717 {
718 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
719 entry->encoding = "UTF-8";
720 }
721 else if (encoding_matches (encoding, "ISO-8859-1"))
722 {
723 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
724 entry->encoding = "ISO-8859-1";
725 }
726 else
727 {
728 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
729 entry->encoding = canonicalize_encoding (encoding);
730 }
731
732 entry->ilseq_handler = handler;
733 pti->iconv_descriptors = NULL;
734
735 pti->at_stream_start_for_bom_read = 1;
736 pti->at_stream_start_for_bom_write = 1;
737
738 pti->pending_eof = 0;
739 pti->alist = SCM_EOL;
740
741 if (SCM_PORT_DESCRIPTOR (ret)->free)
742 scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL);
743
744 if (SCM_PORT_DESCRIPTOR (ret)->flags & SCM_PORT_TYPE_HAS_FLUSH)
745 scm_weak_set_add_x (scm_i_port_weak_set, ret);
746
747 return ret;
748 }
749
750 SCM
751 scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
752 {
753 return scm_c_make_port_with_encoding (tag, mode_bits,
754 scm_i_default_port_encoding (),
755 scm_i_default_port_conversion_handler (),
756 stream);
757 }
758
759 SCM
760 scm_new_port_table_entry (scm_t_bits tag)
761 {
762 return scm_c_make_port (tag, 0, 0);
763 }
764
765 \f
766
767 /* Predicates. */
768
769 SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
770 (SCM x),
771 "Return a boolean indicating whether @var{x} is a port.\n"
772 "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
773 "@var{x}))}.")
774 #define FUNC_NAME s_scm_port_p
775 {
776 return scm_from_bool (SCM_PORTP (x));
777 }
778 #undef FUNC_NAME
779
780 SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
781 (SCM x),
782 "Return @code{#t} if @var{x} is an input port, otherwise return\n"
783 "@code{#f}. Any object satisfying this predicate also satisfies\n"
784 "@code{port?}.")
785 #define FUNC_NAME s_scm_input_port_p
786 {
787 return scm_from_bool (SCM_INPUT_PORT_P (x));
788 }
789 #undef FUNC_NAME
790
791 SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
792 (SCM x),
793 "Return @code{#t} if @var{x} is an output port, otherwise return\n"
794 "@code{#f}. Any object satisfying this predicate also satisfies\n"
795 "@code{port?}.")
796 #define FUNC_NAME s_scm_output_port_p
797 {
798 x = SCM_COERCE_OUTPORT (x);
799 return scm_from_bool (SCM_OUTPUT_PORT_P (x));
800 }
801 #undef FUNC_NAME
802
803 SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
804 (SCM port),
805 "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
806 "open.")
807 #define FUNC_NAME s_scm_port_closed_p
808 {
809 SCM_VALIDATE_PORT (1, port);
810 return scm_from_bool (!SCM_OPPORTP (port));
811 }
812 #undef FUNC_NAME
813
814 SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
815 (SCM x),
816 "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
817 "return @code{#f}.")
818 #define FUNC_NAME s_scm_eof_object_p
819 {
820 return scm_from_bool (SCM_EOF_OBJECT_P (x));
821 }
822 #undef FUNC_NAME
823
824
825 \f
826
827 /* Closing ports. */
828
829 static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
830
831 /* scm_close_port
832 * Call the close operation on a port object.
833 * see also scm_close.
834 */
835 SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
836 (SCM port),
837 "Close the specified port object. Return @code{#t} if it\n"
838 "successfully closes a port or @code{#f} if it was already\n"
839 "closed. An exception may be raised if an error occurs, for\n"
840 "example when flushing buffered output. See also @ref{Ports and\n"
841 "File Descriptors, close}, for a procedure which can close file\n"
842 "descriptors.")
843 #define FUNC_NAME s_scm_close_port
844 {
845 scm_t_port_internal *pti;
846 int rv;
847
848 port = SCM_COERCE_OUTPORT (port);
849
850 SCM_VALIDATE_PORT (1, port);
851 if (SCM_CLOSEDP (port))
852 return SCM_BOOL_F;
853
854 pti = SCM_PORT_GET_INTERNAL (port);
855 SCM_CLR_PORT_OPEN_FLAG (port);
856
857 if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH)
858 scm_weak_set_remove_x (scm_i_port_weak_set, port);
859
860 if (SCM_PORT_DESCRIPTOR (port)->close)
861 /* Note! This may throw an exception. Anything after this point
862 should be resilient to non-local exits. */
863 rv = SCM_PORT_DESCRIPTOR (port)->close (port);
864 else
865 rv = 0;
866
867 if (pti->iconv_descriptors)
868 {
869 /* If we don't get here, the iconv_descriptors finalizer will
870 clean up. */
871 close_iconv_descriptors (pti->iconv_descriptors);
872 pti->iconv_descriptors = NULL;
873 }
874
875 return scm_from_bool (rv >= 0);
876 }
877 #undef FUNC_NAME
878
879 SCM_DEFINE (scm_close_input_port, "close-input-port", 1, 0, 0,
880 (SCM port),
881 "Close the specified input port object. The routine has no effect if\n"
882 "the file has already been closed. An exception may be raised if an\n"
883 "error occurs. The value returned is unspecified.\n\n"
884 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
885 "which can close file descriptors.")
886 #define FUNC_NAME s_scm_close_input_port
887 {
888 SCM_VALIDATE_INPUT_PORT (1, port);
889 scm_close_port (port);
890 return SCM_UNSPECIFIED;
891 }
892 #undef FUNC_NAME
893
894 SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0,
895 (SCM port),
896 "Close the specified output port object. The routine has no effect if\n"
897 "the file has already been closed. An exception may be raised if an\n"
898 "error occurs. The value returned is unspecified.\n\n"
899 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
900 "which can close file descriptors.")
901 #define FUNC_NAME s_scm_close_output_port
902 {
903 port = SCM_COERCE_OUTPORT (port);
904 SCM_VALIDATE_OUTPUT_PORT (1, port);
905 scm_close_port (port);
906 return SCM_UNSPECIFIED;
907 }
908 #undef FUNC_NAME
909
910
911 \f
912
913 /* Encoding characters to byte streams, and decoding byte streams to
914 characters. */
915
916 /* A fluid specifying the default encoding for newly created ports. If it is
917 a string, that is the encoding. If it is #f, it is in the "native"
918 (Latin-1) encoding. */
919 SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
920
921 static int scm_port_encoding_init = 0;
922
923 /* Use ENCODING as the default encoding for future ports. */
924 void
925 scm_i_set_default_port_encoding (const char *encoding)
926 {
927 if (!scm_port_encoding_init
928 || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
929 scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
930 SCM_EOL);
931
932 if (encoding_matches (encoding, "ISO-8859-1"))
933 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
934 else
935 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
936 scm_from_latin1_string (canonicalize_encoding (encoding)));
937 }
938
939 /* Return the name of the default encoding for newly created ports. */
940 const char *
941 scm_i_default_port_encoding (void)
942 {
943 if (!scm_port_encoding_init)
944 return "ISO-8859-1";
945 else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
946 return "ISO-8859-1";
947 else
948 {
949 SCM encoding;
950
951 encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
952 if (!scm_is_string (encoding))
953 return "ISO-8859-1";
954 else
955 return scm_i_string_chars (encoding);
956 }
957 }
958
959 /* A fluid specifying the default conversion handler for newly created
960 ports. Its value should be one of the symbols below. */
961 SCM_VARIABLE (default_conversion_strategy_var,
962 "%default-port-conversion-strategy");
963
964 /* Whether the above fluid is initialized. */
965 static int scm_conversion_strategy_init = 0;
966
967 /* The possible conversion strategies. */
968 SCM_SYMBOL (sym_error, "error");
969 SCM_SYMBOL (sym_substitute, "substitute");
970 SCM_SYMBOL (sym_escape, "escape");
971
972 /* Return the default failed encoding conversion policy for new created
973 ports. */
974 scm_t_string_failed_conversion_handler
975 scm_i_default_port_conversion_handler (void)
976 {
977 scm_t_string_failed_conversion_handler handler;
978
979 if (!scm_conversion_strategy_init
980 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
981 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
982 else
983 {
984 SCM fluid, value;
985
986 fluid = SCM_VARIABLE_REF (default_conversion_strategy_var);
987 value = scm_fluid_ref (fluid);
988
989 if (scm_is_eq (sym_substitute, value))
990 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
991 else if (scm_is_eq (sym_escape, value))
992 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
993 else
994 /* Default to 'error also when the fluid's value is not one of
995 the valid symbols. */
996 handler = SCM_FAILED_CONVERSION_ERROR;
997 }
998
999 return handler;
1000 }
1001
1002 /* Use HANDLER as the default conversion strategy for future ports. */
1003 void
1004 scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler
1005 handler)
1006 {
1007 SCM strategy;
1008
1009 if (!scm_conversion_strategy_init
1010 || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
1011 scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
1012 SCM_EOL);
1013
1014 switch (handler)
1015 {
1016 case SCM_FAILED_CONVERSION_ERROR:
1017 strategy = sym_error;
1018 break;
1019
1020 case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
1021 strategy = sym_escape;
1022 break;
1023
1024 case SCM_FAILED_CONVERSION_QUESTION_MARK:
1025 strategy = sym_substitute;
1026 break;
1027
1028 default:
1029 abort ();
1030 }
1031
1032 scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var),
1033 strategy);
1034 }
1035
1036 static void
1037 scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port);
1038
1039 /* If the next LEN bytes from PORT are equal to those in BYTES, then
1040 return 1, else return 0. Leave the port position unchanged. */
1041 static int
1042 looking_at_bytes (SCM port, const unsigned char *bytes, int len)
1043 {
1044 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1045 int i = 0;
1046
1047 while (i < len && scm_peek_byte_or_eof_unlocked (port) == bytes[i])
1048 {
1049 pt->read_pos++;
1050 i++;
1051 }
1052 scm_i_unget_bytes_unlocked (bytes, i, port);
1053 return (i == len);
1054 }
1055
1056 static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF};
1057 static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
1058 static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
1059 static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
1060 static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
1061
1062 /* Decide what byte order to use for a UTF-16 port. Return "UTF-16BE"
1063 or "UTF-16LE". MODE must be either SCM_PORT_READ or SCM_PORT_WRITE,
1064 and specifies which operation is about to be done. The MODE
1065 determines how we will decide the byte order. We deliberately avoid
1066 reading from the port unless the user is about to do so. If the user
1067 is about to read, then we look for a BOM, and if present, we use it
1068 to determine the byte order. Otherwise we choose big endian, as
1069 recommended by the Unicode Standard. Note that the BOM (if any) is
1070 not consumed here. */
1071 static const char *
1072 decide_utf16_encoding (SCM port, scm_t_port_rw_active mode)
1073 {
1074 if (mode == SCM_PORT_READ
1075 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1076 && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom))
1077 return "UTF-16LE";
1078 else
1079 return "UTF-16BE";
1080 }
1081
1082 /* Decide what byte order to use for a UTF-32 port. Return "UTF-32BE"
1083 or "UTF-32LE". See the comment above 'decide_utf16_encoding' for
1084 details. */
1085 static const char *
1086 decide_utf32_encoding (SCM port, scm_t_port_rw_active mode)
1087 {
1088 if (mode == SCM_PORT_READ
1089 && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
1090 && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom))
1091 return "UTF-32LE";
1092 else
1093 return "UTF-32BE";
1094 }
1095
1096 static void
1097 finalize_iconv_descriptors (void *ptr, void *data)
1098 {
1099 close_iconv_descriptors (ptr);
1100 }
1101
1102 static scm_t_iconv_descriptors *
1103 open_iconv_descriptors (const char *encoding, int reading, int writing)
1104 {
1105 scm_t_iconv_descriptors *id;
1106 iconv_t input_cd, output_cd;
1107 size_t i;
1108
1109 input_cd = (iconv_t) -1;
1110 output_cd = (iconv_t) -1;
1111
1112 for (i = 0; encoding[i]; i++)
1113 if (encoding[i] > 127)
1114 goto invalid_encoding;
1115
1116 if (reading)
1117 {
1118 /* Open an input iconv conversion descriptor, from ENCODING
1119 to UTF-8. We choose UTF-8, not UTF-32, because iconv
1120 implementations can typically convert from anything to
1121 UTF-8, but not to UTF-32 (see
1122 <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
1123
1124 /* Assume opening an iconv descriptor causes about 16 KB of
1125 allocation. */
1126 scm_gc_register_allocation (16 * 1024);
1127
1128 input_cd = iconv_open ("UTF-8", encoding);
1129 if (input_cd == (iconv_t) -1)
1130 goto invalid_encoding;
1131 }
1132
1133 if (writing)
1134 {
1135 /* Assume opening an iconv descriptor causes about 16 KB of
1136 allocation. */
1137 scm_gc_register_allocation (16 * 1024);
1138
1139 output_cd = iconv_open (encoding, "UTF-8");
1140 if (output_cd == (iconv_t) -1)
1141 {
1142 if (input_cd != (iconv_t) -1)
1143 iconv_close (input_cd);
1144 goto invalid_encoding;
1145 }
1146 }
1147
1148 id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
1149 id->input_cd = input_cd;
1150 id->output_cd = output_cd;
1151
1152 /* Register a finalizer to close the descriptors. */
1153 scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
1154
1155 return id;
1156
1157 invalid_encoding:
1158 {
1159 SCM err;
1160 err = scm_from_latin1_string (encoding);
1161 scm_misc_error ("open_iconv_descriptors",
1162 "invalid or unknown character encoding ~s",
1163 scm_list_1 (err));
1164 }
1165 }
1166
1167 static void
1168 close_iconv_descriptors (scm_t_iconv_descriptors *id)
1169 {
1170 if (id->input_cd != (iconv_t) -1)
1171 iconv_close (id->input_cd);
1172 if (id->output_cd != (iconv_t) -1)
1173 iconv_close (id->output_cd);
1174 id->input_cd = (void *) -1;
1175 id->output_cd = (void *) -1;
1176 }
1177
1178 scm_t_iconv_descriptors *
1179 scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode)
1180 {
1181 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
1182
1183 assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
1184
1185 if (!pti->iconv_descriptors)
1186 {
1187 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1188 const char *precise_encoding;
1189
1190 if (!pt->encoding)
1191 pt->encoding = "ISO-8859-1";
1192
1193 /* If the specified encoding is UTF-16 or UTF-32, then make
1194 that more precise by deciding what byte order to use. */
1195 if (strcmp (pt->encoding, "UTF-16") == 0)
1196 precise_encoding = decide_utf16_encoding (port, mode);
1197 else if (strcmp (pt->encoding, "UTF-32") == 0)
1198 precise_encoding = decide_utf32_encoding (port, mode);
1199 else
1200 precise_encoding = pt->encoding;
1201
1202 pti->iconv_descriptors =
1203 open_iconv_descriptors (precise_encoding,
1204 SCM_INPUT_PORT_P (port),
1205 SCM_OUTPUT_PORT_P (port));
1206 }
1207
1208 return pti->iconv_descriptors;
1209 }
1210
1211 /* The name of the encoding is itself encoded in ASCII. */
1212 void
1213 scm_i_set_port_encoding_x (SCM port, const char *encoding)
1214 {
1215 scm_t_port *pt;
1216 scm_t_port_internal *pti;
1217 scm_t_iconv_descriptors *prev;
1218
1219 /* Set the character encoding for this port. */
1220 pt = SCM_PTAB_ENTRY (port);
1221 pti = SCM_PORT_GET_INTERNAL (port);
1222 prev = pti->iconv_descriptors;
1223
1224 /* In order to handle cases where the encoding changes mid-stream
1225 (e.g. within an HTTP stream, or within a file that is composed of
1226 segments with different encodings), we consider this to be "stream
1227 start" for purposes of BOM handling, regardless of our actual file
1228 position. */
1229 pti->at_stream_start_for_bom_read = 1;
1230 pti->at_stream_start_for_bom_write = 1;
1231
1232 if (encoding_matches (encoding, "UTF-8"))
1233 {
1234 pt->encoding = "UTF-8";
1235 pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
1236 }
1237 else if (encoding_matches (encoding, "ISO-8859-1"))
1238 {
1239 pt->encoding = "ISO-8859-1";
1240 pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
1241 }
1242 else
1243 {
1244 pt->encoding = canonicalize_encoding (encoding);
1245 pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
1246 }
1247
1248 pti->iconv_descriptors = NULL;
1249 if (prev)
1250 close_iconv_descriptors (prev);
1251 }
1252
1253 SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
1254 (SCM port),
1255 "Returns, as a string, the character encoding that @var{port}\n"
1256 "uses to interpret its input and output.\n")
1257 #define FUNC_NAME s_scm_port_encoding
1258 {
1259 SCM_VALIDATE_PORT (1, port);
1260
1261 return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding);
1262 }
1263 #undef FUNC_NAME
1264
1265 SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
1266 (SCM port, SCM enc),
1267 "Sets the character encoding that will be used to interpret all\n"
1268 "port I/O. New ports are created with the encoding\n"
1269 "appropriate for the current locale if @code{setlocale} has \n"
1270 "been called or ISO-8859-1 otherwise\n"
1271 "and this procedure can be used to modify that encoding.\n")
1272 #define FUNC_NAME s_scm_set_port_encoding_x
1273 {
1274 char *enc_str;
1275
1276 SCM_VALIDATE_PORT (1, port);
1277 SCM_VALIDATE_STRING (2, enc);
1278
1279 enc_str = scm_to_latin1_string (enc);
1280 scm_i_set_port_encoding_x (port, enc_str);
1281 free (enc_str);
1282
1283 return SCM_UNSPECIFIED;
1284 }
1285 #undef FUNC_NAME
1286
1287 SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
1288 1, 0, 0, (SCM port),
1289 "Returns the behavior of the port when handling a character that\n"
1290 "is not representable in the port's current encoding.\n"
1291 "It returns the symbol @code{error} if unrepresentable characters\n"
1292 "should cause exceptions, @code{substitute} if the port should\n"
1293 "try to replace unrepresentable characters with question marks or\n"
1294 "approximate characters, or @code{escape} if unrepresentable\n"
1295 "characters should be converted to string escapes.\n"
1296 "\n"
1297 "If @var{port} is @code{#f}, then the current default behavior\n"
1298 "will be returned. New ports will have this default behavior\n"
1299 "when they are created.\n")
1300 #define FUNC_NAME s_scm_port_conversion_strategy
1301 {
1302 scm_t_string_failed_conversion_handler h;
1303
1304 if (scm_is_false (port))
1305 h = scm_i_default_port_conversion_handler ();
1306 else
1307 {
1308 scm_t_port *pt;
1309
1310 SCM_VALIDATE_OPPORT (1, port);
1311 pt = SCM_PTAB_ENTRY (port);
1312
1313 h = pt->ilseq_handler;
1314 }
1315
1316 if (h == SCM_FAILED_CONVERSION_ERROR)
1317 return scm_from_latin1_symbol ("error");
1318 else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
1319 return scm_from_latin1_symbol ("substitute");
1320 else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1321 return scm_from_latin1_symbol ("escape");
1322 else
1323 abort ();
1324
1325 /* Never gets here. */
1326 return SCM_UNDEFINED;
1327 }
1328 #undef FUNC_NAME
1329
1330 SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
1331 2, 0, 0,
1332 (SCM port, SCM sym),
1333 "Sets the behavior of the interpreter when outputting a character\n"
1334 "that is not representable in the port's current encoding.\n"
1335 "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
1336 "@code{'escape}. If it is @code{'error}, an error will be thrown\n"
1337 "when an unconvertible character is encountered. If it is\n"
1338 "@code{'substitute}, then unconvertible characters will \n"
1339 "be replaced with approximate characters, or with question marks\n"
1340 "if no approximately correct character is available.\n"
1341 "If it is @code{'escape},\n"
1342 "it will appear as a hex escape when output.\n"
1343 "\n"
1344 "If @var{port} is an open port, the conversion error behavior\n"
1345 "is set for that port. If it is @code{#f}, it is set as the\n"
1346 "default behavior for any future ports that get created in\n"
1347 "this thread.\n")
1348 #define FUNC_NAME s_scm_set_port_conversion_strategy_x
1349 {
1350 scm_t_string_failed_conversion_handler handler;
1351
1352 if (scm_is_eq (sym, sym_error))
1353 handler = SCM_FAILED_CONVERSION_ERROR;
1354 else if (scm_is_eq (sym, sym_substitute))
1355 handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
1356 else if (scm_is_eq (sym, sym_escape))
1357 handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
1358 else
1359 SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
1360
1361 if (scm_is_false (port))
1362 scm_i_set_default_port_conversion_handler (handler);
1363 else
1364 {
1365 SCM_VALIDATE_OPPORT (1, port);
1366 SCM_PTAB_ENTRY (port)->ilseq_handler = handler;
1367 }
1368
1369 return SCM_UNSPECIFIED;
1370 }
1371 #undef FUNC_NAME
1372
1373
1374 \f
1375
1376 /* The port lock. */
1377
1378 static void
1379 lock_port (void *mutex)
1380 {
1381 scm_i_pthread_mutex_lock ((scm_i_pthread_mutex_t *) mutex);
1382 }
1383
1384 static void
1385 unlock_port (void *mutex)
1386 {
1387 scm_i_pthread_mutex_unlock ((scm_i_pthread_mutex_t *) mutex);
1388 }
1389
1390 void
1391 scm_dynwind_lock_port (SCM port)
1392 #define FUNC_NAME "dynwind-lock-port"
1393 {
1394 scm_i_pthread_mutex_t *lock;
1395 SCM_VALIDATE_OPPORT (SCM_ARG1, port);
1396 scm_c_lock_port (port, &lock);
1397 if (lock)
1398 {
1399 scm_dynwind_unwind_handler (unlock_port, lock, SCM_F_WIND_EXPLICITLY);
1400 scm_dynwind_rewind_handler (lock_port, lock, 0);
1401 }
1402 }
1403 #undef FUNC_NAME
1404
1405
1406 \f
1407
1408 /* Input. */
1409
1410 int
1411 scm_get_byte_or_eof (SCM port)
1412 {
1413 scm_i_pthread_mutex_t *lock;
1414 int ret;
1415
1416 scm_c_lock_port (port, &lock);
1417 ret = scm_get_byte_or_eof_unlocked (port);
1418 if (lock)
1419 scm_i_pthread_mutex_unlock (lock);
1420
1421 return ret;
1422 }
1423
1424 int
1425 scm_peek_byte_or_eof (SCM port)
1426 {
1427 scm_i_pthread_mutex_t *lock;
1428 int ret;
1429
1430 scm_c_lock_port (port, &lock);
1431 ret = scm_peek_byte_or_eof_unlocked (port);
1432 if (lock)
1433 scm_i_pthread_mutex_unlock (lock);
1434
1435 return ret;
1436 }
1437
1438 /* scm_c_read
1439 *
1440 * Used by an application to read arbitrary number of bytes from an
1441 * SCM port. Same semantics as libc read, except that scm_c_read only
1442 * returns less than SIZE bytes if at end-of-file.
1443 *
1444 * Warning: Doesn't update port line and column counts! */
1445
1446 /* This structure, and the following swap_buffer function, are used
1447 for temporarily swapping a port's own read buffer, and the buffer
1448 that the caller of scm_c_read provides. */
1449 struct port_and_swap_buffer
1450 {
1451 scm_t_port *pt;
1452 unsigned char *buffer;
1453 size_t size;
1454 };
1455
1456 static void
1457 swap_buffer (void *data)
1458 {
1459 struct port_and_swap_buffer *psb = (struct port_and_swap_buffer *) data;
1460 unsigned char *old_buf = psb->pt->read_buf;
1461 size_t old_size = psb->pt->read_buf_size;
1462
1463 /* Make the port use (buffer, size) from the struct. */
1464 psb->pt->read_pos = psb->pt->read_buf = psb->pt->read_end = psb->buffer;
1465 psb->pt->read_buf_size = psb->size;
1466
1467 /* Save the port's old (buffer, size) in the struct. */
1468 psb->buffer = old_buf;
1469 psb->size = old_size;
1470 }
1471
1472 static int scm_i_fill_input_unlocked (SCM port);
1473
1474 size_t
1475 scm_c_read_unlocked (SCM port, void *buffer, size_t size)
1476 #define FUNC_NAME "scm_c_read"
1477 {
1478 scm_t_port *pt;
1479 scm_t_port_internal *pti;
1480 size_t n_read = 0, n_available;
1481 struct port_and_swap_buffer psb;
1482
1483 SCM_VALIDATE_OPINPORT (1, port);
1484
1485 pt = SCM_PTAB_ENTRY (port);
1486 pti = SCM_PORT_GET_INTERNAL (port);
1487 if (pt->rw_active == SCM_PORT_WRITE)
1488 SCM_PORT_DESCRIPTOR (port)->flush (port);
1489
1490 if (pt->rw_random)
1491 pt->rw_active = SCM_PORT_READ;
1492
1493 /* Take bytes first from the port's read buffer. */
1494 if (pt->read_pos < pt->read_end)
1495 {
1496 n_available = min (size, pt->read_end - pt->read_pos);
1497 memcpy (buffer, pt->read_pos, n_available);
1498 buffer = (char *) buffer + n_available;
1499 pt->read_pos += n_available;
1500 n_read += n_available;
1501 size -= n_available;
1502 }
1503
1504 /* Avoid the scm_dynwind_* costs if we now have enough data. */
1505 if (size == 0)
1506 return n_read;
1507
1508 /* Now we will call scm_i_fill_input_unlocked repeatedly until we have
1509 read the requested number of bytes. (Note that a single
1510 scm_i_fill_input_unlocked call does not guarantee to fill the whole
1511 of the port's read buffer.) */
1512 if (pt->read_buf_size <= 1
1513 && pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
1514 {
1515 /* The port that we are reading from is unbuffered - i.e. does not
1516 have its own persistent buffer - but we have a buffer, provided
1517 by our caller, that is the right size for the data that is
1518 wanted. For the following scm_i_fill_input_unlocked calls,
1519 therefore, we use the buffer in hand as the port's read buffer.
1520
1521 We need to make sure that the port's normal (1 byte) buffer is
1522 reinstated in case one of the scm_i_fill_input_unlocked ()
1523 calls throws an exception; we use the scm_dynwind_* API to
1524 achieve that.
1525
1526 A consequence of this optimization is that the fill_input
1527 functions can't unget characters. That'll push data to the
1528 pushback buffer instead of this psb buffer. */
1529 #if SCM_DEBUG == 1
1530 unsigned char *pback = pt->putback_buf;
1531 #endif
1532 psb.pt = pt;
1533 psb.buffer = buffer;
1534 psb.size = size;
1535 scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
1536 scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1537 scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1538
1539 /* Call scm_i_fill_input_unlocked until we have all the bytes that
1540 we need, or we hit EOF. */
1541 while (pt->read_buf_size && (scm_i_fill_input_unlocked (port) != EOF))
1542 {
1543 pt->read_buf_size -= (pt->read_end - pt->read_pos);
1544 pt->read_pos = pt->read_buf = pt->read_end;
1545 }
1546 #if SCM_DEBUG == 1
1547 if (pback != pt->putback_buf
1548 || pt->read_buf - (unsigned char *) buffer < 0)
1549 scm_misc_error (FUNC_NAME,
1550 "scm_c_read must not call a fill function that pushes "
1551 "back characters onto an unbuffered port", SCM_EOL);
1552 #endif
1553 n_read += pt->read_buf - (unsigned char *) buffer;
1554
1555 /* Reinstate the port's normal buffer. */
1556 scm_dynwind_end ();
1557 }
1558 else
1559 {
1560 /* The port has its own buffer. It is important that we use it,
1561 even if it happens to be smaller than our caller's buffer, so
1562 that a custom port implementation's entry points (in
1563 particular, fill_input) can rely on the buffer always being
1564 the same as they first set up. */
1565 while (size && (scm_i_fill_input_unlocked (port) != EOF))
1566 {
1567 n_available = min (size, pt->read_end - pt->read_pos);
1568 memcpy (buffer, pt->read_pos, n_available);
1569 buffer = (char *) buffer + n_available;
1570 pt->read_pos += n_available;
1571 n_read += n_available;
1572 size -= n_available;
1573 }
1574 }
1575
1576 return n_read;
1577 }
1578 #undef FUNC_NAME
1579
1580 size_t
1581 scm_c_read (SCM port, void *buffer, size_t size)
1582 {
1583 scm_i_pthread_mutex_t *lock;
1584 size_t ret;
1585
1586 scm_c_lock_port (port, &lock);
1587 ret = scm_c_read_unlocked (port, buffer, size);
1588 if (lock)
1589 scm_i_pthread_mutex_unlock (lock);
1590
1591
1592 return ret;
1593 }
1594
1595 /* Update the line and column number of PORT after consumption of C. */
1596 static inline void
1597 update_port_lf (scm_t_wchar c, SCM port)
1598 {
1599 switch (c)
1600 {
1601 case '\a':
1602 case EOF:
1603 break;
1604 case '\b':
1605 SCM_DECCOL (port);
1606 break;
1607 case '\n':
1608 SCM_INCLINE (port);
1609 break;
1610 case '\r':
1611 SCM_ZEROCOL (port);
1612 break;
1613 case '\t':
1614 SCM_TABCOL (port);
1615 break;
1616 default:
1617 SCM_INCCOL (port);
1618 break;
1619 }
1620 }
1621
1622 #define SCM_MBCHAR_BUF_SIZE (4)
1623
1624 /* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
1625 UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
1626 static scm_t_wchar
1627 utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
1628 {
1629 scm_t_wchar codepoint;
1630
1631 if (utf8_buf[0] <= 0x7f)
1632 {
1633 assert (size == 1);
1634 codepoint = utf8_buf[0];
1635 }
1636 else if ((utf8_buf[0] & 0xe0) == 0xc0)
1637 {
1638 assert (size == 2);
1639 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
1640 | (utf8_buf[1] & 0x3f);
1641 }
1642 else if ((utf8_buf[0] & 0xf0) == 0xe0)
1643 {
1644 assert (size == 3);
1645 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
1646 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
1647 | (utf8_buf[2] & 0x3f);
1648 }
1649 else
1650 {
1651 assert (size == 4);
1652 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
1653 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
1654 | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
1655 | (utf8_buf[3] & 0x3f);
1656 }
1657
1658 return codepoint;
1659 }
1660
1661 /* Read a UTF-8 sequence from PORT. On success, return 0 and set
1662 *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
1663 representation, and set *LEN to the length in bytes. Return
1664 `EILSEQ' on error. */
1665 static int
1666 get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
1667 scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1668 {
1669 #define ASSERT_NOT_EOF(b) \
1670 if (SCM_UNLIKELY ((b) == EOF)) \
1671 goto invalid_seq
1672 #define CONSUME_PEEKED_BYTE() \
1673 pt->read_pos++
1674
1675 int byte;
1676 scm_t_port *pt;
1677
1678 *len = 0;
1679 pt = SCM_PTAB_ENTRY (port);
1680
1681 byte = scm_get_byte_or_eof_unlocked (port);
1682 if (byte == EOF)
1683 {
1684 *codepoint = EOF;
1685 return 0;
1686 }
1687
1688 buf[0] = (scm_t_uint8) byte;
1689 *len = 1;
1690
1691 if (buf[0] <= 0x7f)
1692 /* 1-byte form. */
1693 *codepoint = buf[0];
1694 else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
1695 {
1696 /* 2-byte form. */
1697 byte = scm_peek_byte_or_eof_unlocked (port);
1698 ASSERT_NOT_EOF (byte);
1699
1700 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1701 goto invalid_seq;
1702
1703 CONSUME_PEEKED_BYTE ();
1704 buf[1] = (scm_t_uint8) byte;
1705 *len = 2;
1706
1707 *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
1708 | (buf[1] & 0x3f);
1709 }
1710 else if ((buf[0] & 0xf0) == 0xe0)
1711 {
1712 /* 3-byte form. */
1713 byte = scm_peek_byte_or_eof_unlocked (port);
1714 ASSERT_NOT_EOF (byte);
1715
1716 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
1717 || (buf[0] == 0xe0 && byte < 0xa0)
1718 || (buf[0] == 0xed && byte > 0x9f)))
1719 goto invalid_seq;
1720
1721 CONSUME_PEEKED_BYTE ();
1722 buf[1] = (scm_t_uint8) byte;
1723 *len = 2;
1724
1725 byte = scm_peek_byte_or_eof_unlocked (port);
1726 ASSERT_NOT_EOF (byte);
1727
1728 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1729 goto invalid_seq;
1730
1731 CONSUME_PEEKED_BYTE ();
1732 buf[2] = (scm_t_uint8) byte;
1733 *len = 3;
1734
1735 *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
1736 | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
1737 | (buf[2] & 0x3f);
1738 }
1739 else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
1740 {
1741 /* 4-byte form. */
1742 byte = scm_peek_byte_or_eof_unlocked (port);
1743 ASSERT_NOT_EOF (byte);
1744
1745 if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
1746 || (buf[0] == 0xf0 && byte < 0x90)
1747 || (buf[0] == 0xf4 && byte > 0x8f)))
1748 goto invalid_seq;
1749
1750 CONSUME_PEEKED_BYTE ();
1751 buf[1] = (scm_t_uint8) byte;
1752 *len = 2;
1753
1754 byte = scm_peek_byte_or_eof_unlocked (port);
1755 ASSERT_NOT_EOF (byte);
1756
1757 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1758 goto invalid_seq;
1759
1760 CONSUME_PEEKED_BYTE ();
1761 buf[2] = (scm_t_uint8) byte;
1762 *len = 3;
1763
1764 byte = scm_peek_byte_or_eof_unlocked (port);
1765 ASSERT_NOT_EOF (byte);
1766
1767 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1768 goto invalid_seq;
1769
1770 CONSUME_PEEKED_BYTE ();
1771 buf[3] = (scm_t_uint8) byte;
1772 *len = 4;
1773
1774 *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
1775 | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
1776 | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
1777 | (buf[3] & 0x3f);
1778 }
1779 else
1780 goto invalid_seq;
1781
1782 return 0;
1783
1784 invalid_seq:
1785 /* Here we could choose the consume the faulty byte when it's not a
1786 valid starting byte, but it's not a requirement. What Section 3.9
1787 of Unicode 6.0.0 mandates, though, is to not consume a byte that
1788 would otherwise be a valid starting byte. */
1789
1790 return EILSEQ;
1791
1792 #undef CONSUME_PEEKED_BYTE
1793 #undef ASSERT_NOT_EOF
1794 }
1795
1796 /* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
1797 0 and set *CODEPOINT to the codepoint that was read, fill BUF with
1798 its UTF-8 representation, and set *LEN to the length in bytes.
1799 Return `EILSEQ' on error. */
1800 static int
1801 get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
1802 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1803 {
1804 *codepoint = scm_get_byte_or_eof_unlocked (port);
1805
1806 if (*codepoint == EOF)
1807 *len = 0;
1808 else
1809 {
1810 *len = 1;
1811 buf[0] = *codepoint;
1812 }
1813 return 0;
1814 }
1815
1816 /* Likewise, read a byte sequence from PORT, passing it through its
1817 input conversion descriptor. */
1818 static int
1819 get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
1820 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1821 {
1822 scm_t_iconv_descriptors *id;
1823 scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
1824 size_t input_size = 0;
1825
1826 id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ);
1827
1828 for (;;)
1829 {
1830 int byte_read;
1831 char *input, *output;
1832 size_t input_left, output_left, done;
1833
1834 byte_read = scm_get_byte_or_eof_unlocked (port);
1835 if (SCM_UNLIKELY (byte_read == EOF))
1836 {
1837 if (SCM_LIKELY (input_size == 0))
1838 {
1839 *codepoint = (scm_t_wchar) EOF;
1840 *len = input_size;
1841 return 0;
1842 }
1843 else
1844 {
1845 /* EOF found in the middle of a multibyte character. */
1846 scm_i_set_pending_eof (port);
1847 return EILSEQ;
1848 }
1849 }
1850
1851 buf[input_size++] = byte_read;
1852
1853 input = buf;
1854 input_left = input_size;
1855 output = (char *) utf8_buf;
1856 output_left = sizeof (utf8_buf);
1857
1858 done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
1859
1860 if (done == (size_t) -1)
1861 {
1862 int err = errno;
1863 if (SCM_LIKELY (err == EINVAL))
1864 /* The input byte sequence did not form a complete
1865 character. Read another byte and try again. */
1866 continue;
1867 else
1868 return err;
1869 }
1870 else
1871 {
1872 size_t output_size = sizeof (utf8_buf) - output_left;
1873 if (SCM_LIKELY (output_size > 0))
1874 {
1875 /* iconv generated output. Convert the UTF8_BUF sequence
1876 to a Unicode code point. */
1877 *codepoint = utf8_to_codepoint (utf8_buf, output_size);
1878 *len = input_size;
1879 return 0;
1880 }
1881 else
1882 {
1883 /* iconv consumed some bytes without producing any output.
1884 Most likely this means that a Unicode byte-order mark
1885 (BOM) was consumed, which should not be included in the
1886 returned buf. Shift any remaining bytes to the beginning
1887 of buf, and continue the loop. */
1888 memmove (buf, input, input_left);
1889 input_size = input_left;
1890 continue;
1891 }
1892 }
1893 }
1894 }
1895
1896 /* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
1897 with the byte representation of the codepoint in PORT's encoding, and
1898 set *LEN to the length in bytes of that representation. Return 0 on
1899 success and an errno value on error. */
1900 static SCM_C_INLINE int
1901 get_codepoint (SCM port, scm_t_wchar *codepoint,
1902 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1903 {
1904 int err;
1905 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1906 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
1907
1908 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
1909 err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
1910 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
1911 err = get_latin1_codepoint (port, codepoint, buf, len);
1912 else
1913 err = get_iconv_codepoint (port, codepoint, buf, len);
1914
1915 if (SCM_LIKELY (err == 0))
1916 {
1917 if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
1918 {
1919 /* Record that we're no longer at stream start. */
1920 pti->at_stream_start_for_bom_read = 0;
1921 if (pt->rw_random)
1922 pti->at_stream_start_for_bom_write = 0;
1923
1924 /* If we just read a BOM in an encoding that recognizes them,
1925 then silently consume it and read another code point. */
1926 if (SCM_UNLIKELY
1927 (*codepoint == SCM_UNICODE_BOM
1928 && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
1929 || strcmp (pt->encoding, "UTF-16") == 0
1930 || strcmp (pt->encoding, "UTF-32") == 0)))
1931 return get_codepoint (port, codepoint, buf, len);
1932 }
1933 update_port_lf (*codepoint, port);
1934 }
1935 else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
1936 {
1937 *codepoint = '?';
1938 err = 0;
1939 update_port_lf (*codepoint, port);
1940 }
1941
1942 return err;
1943 }
1944
1945 /* Read a codepoint from PORT and return it. */
1946 scm_t_wchar
1947 scm_getc_unlocked (SCM port)
1948 #define FUNC_NAME "scm_getc"
1949 {
1950 int err;
1951 size_t len;
1952 scm_t_wchar codepoint;
1953 char buf[SCM_MBCHAR_BUF_SIZE];
1954
1955 err = get_codepoint (port, &codepoint, buf, &len);
1956 if (SCM_UNLIKELY (err != 0))
1957 /* At this point PORT should point past the invalid encoding, as per
1958 R6RS-lib Section 8.2.4. */
1959 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
1960
1961 return codepoint;
1962 }
1963 #undef FUNC_NAME
1964
1965 scm_t_wchar
1966 scm_getc (SCM port)
1967 {
1968 scm_i_pthread_mutex_t *lock;
1969 scm_t_wchar ret;
1970
1971 scm_c_lock_port (port, &lock);
1972 ret = scm_getc_unlocked (port);
1973 if (lock)
1974 scm_i_pthread_mutex_unlock (lock);
1975
1976
1977 return ret;
1978 }
1979
1980 SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
1981 (SCM port),
1982 "Return the next character available from @var{port}, updating\n"
1983 "@var{port} to point to the following character. If no more\n"
1984 "characters are available, the end-of-file object is returned.\n"
1985 "\n"
1986 "When @var{port}'s data cannot be decoded according to its\n"
1987 "character encoding, a @code{decoding-error} is raised and\n"
1988 "@var{port} points past the erroneous byte sequence.\n")
1989 #define FUNC_NAME s_scm_read_char
1990 {
1991 scm_t_wchar c;
1992 if (SCM_UNBNDP (port))
1993 port = scm_current_input_port ();
1994 SCM_VALIDATE_OPINPORT (1, port);
1995 c = scm_getc_unlocked (port);
1996 if (EOF == c)
1997 return SCM_EOF_VAL;
1998 return SCM_MAKE_CHAR (c);
1999 }
2000 #undef FUNC_NAME
2001
2002
2003 \f
2004
2005 /* Pushback. */
2006 \f
2007
2008
2009 static void
2010 scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
2011 #define FUNC_NAME "scm_unget_bytes"
2012 {
2013 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2014 size_t old_len, new_len;
2015
2016 scm_i_clear_pending_eof (port);
2017
2018 if (pt->read_buf != pt->putback_buf)
2019 /* switch to the put-back buffer. */
2020 {
2021 if (pt->putback_buf == NULL)
2022 {
2023 pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
2024 ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
2025 pt->putback_buf
2026 = (unsigned char *) scm_gc_malloc_pointerless
2027 (pt->putback_buf_size, "putback buffer");
2028 }
2029
2030 pt->saved_read_buf = pt->read_buf;
2031 pt->saved_read_pos = pt->read_pos;
2032 pt->saved_read_end = pt->read_end;
2033 pt->saved_read_buf_size = pt->read_buf_size;
2034
2035 /* Put read_pos at the end of the buffer, so that ungets will not
2036 have to shift the buffer contents each time. */
2037 pt->read_buf = pt->putback_buf;
2038 pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
2039 pt->read_buf_size = pt->putback_buf_size;
2040 }
2041
2042 old_len = pt->read_end - pt->read_pos;
2043 new_len = old_len + len;
2044
2045 if (new_len > pt->read_buf_size)
2046 /* The putback buffer needs to be enlarged. */
2047 {
2048 size_t new_buf_size;
2049 unsigned char *new_buf, *new_end, *new_pos;
2050
2051 new_buf_size = pt->read_buf_size * 2;
2052 if (new_buf_size < new_len)
2053 new_buf_size = new_len;
2054
2055 new_buf = (unsigned char *)
2056 scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
2057
2058 /* Put the bytes at the end of the buffer, so that future
2059 ungets won't need to shift the buffer. */
2060 new_end = new_buf + new_buf_size;
2061 new_pos = new_end - old_len;
2062 memcpy (new_pos, pt->read_pos, old_len);
2063
2064 pt->read_buf = pt->putback_buf = new_buf;
2065 pt->read_pos = new_pos;
2066 pt->read_end = new_end;
2067 pt->read_buf_size = pt->putback_buf_size = new_buf_size;
2068 }
2069 else if (pt->read_buf + len < pt->read_pos)
2070 /* If needed, shift the existing buffer contents up.
2071 This should not happen unless some external code
2072 manipulates the putback buffer pointers. */
2073 {
2074 unsigned char *new_end = pt->read_buf + pt->read_buf_size;
2075 unsigned char *new_pos = new_end - old_len;
2076
2077 memmove (new_pos, pt->read_pos, old_len);
2078 pt->read_pos = new_pos;
2079 pt->read_end = new_end;
2080 }
2081
2082 /* Move read_pos back and copy the bytes there. */
2083 pt->read_pos -= len;
2084 memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
2085
2086 if (pt->rw_active == SCM_PORT_WRITE)
2087 scm_flush (port);
2088
2089 if (pt->rw_random)
2090 pt->rw_active = SCM_PORT_READ;
2091 }
2092 #undef FUNC_NAME
2093
2094 void
2095 scm_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
2096 {
2097 scm_i_unget_bytes_unlocked (buf, len, port);
2098 }
2099
2100 void
2101 scm_unget_byte_unlocked (int c, SCM port)
2102 {
2103 unsigned char byte = c;
2104 scm_i_unget_bytes_unlocked (&byte, 1, port);
2105 }
2106
2107 void
2108 scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
2109 {
2110 scm_i_pthread_mutex_t *lock;
2111 scm_c_lock_port (port, &lock);
2112 scm_i_unget_bytes_unlocked (buf, len, port);
2113 if (lock)
2114 scm_i_pthread_mutex_unlock (lock);
2115 }
2116
2117 void
2118 scm_unget_byte (int c, SCM port)
2119 {
2120 unsigned char byte = c;
2121 scm_i_pthread_mutex_t *lock;
2122 scm_c_lock_port (port, &lock);
2123 scm_i_unget_bytes_unlocked (&byte, 1, port);
2124 if (lock)
2125 scm_i_pthread_mutex_unlock (lock);
2126 }
2127
2128 void
2129 scm_ungetc_unlocked (scm_t_wchar c, SCM port)
2130 #define FUNC_NAME "scm_ungetc"
2131 {
2132 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2133 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
2134 char *result;
2135 char result_buf[10];
2136 size_t len;
2137
2138 len = sizeof (result_buf);
2139
2140 if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
2141 {
2142 if (c < 0x80)
2143 {
2144 result_buf[0] = (char) c;
2145 result = result_buf;
2146 len = 1;
2147 }
2148 else
2149 result =
2150 (char *) u32_to_u8 ((uint32_t *) &c, 1, (uint8_t *) result_buf, &len);
2151 }
2152 else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1 && c <= 0xff)
2153 {
2154 result_buf[0] = (char) c;
2155 result = result_buf;
2156 len = 1;
2157 }
2158 else
2159 result = u32_conv_to_encoding (pt->encoding,
2160 (enum iconv_ilseq_handler) pt->ilseq_handler,
2161 (uint32_t *) &c, 1, NULL,
2162 result_buf, &len);
2163
2164 if (SCM_UNLIKELY (result == NULL || len == 0))
2165 scm_encoding_error (FUNC_NAME, errno,
2166 "conversion to port encoding failed",
2167 SCM_BOOL_F, SCM_MAKE_CHAR (c));
2168
2169 scm_i_unget_bytes_unlocked ((unsigned char *) result, len, port);
2170
2171 if (SCM_UNLIKELY (result != result_buf))
2172 free (result);
2173
2174 if (c == '\n')
2175 SCM_LINUM (port) -= 1;
2176 SCM_DECCOL (port);
2177 }
2178 #undef FUNC_NAME
2179
2180 void
2181 scm_ungetc (scm_t_wchar c, SCM port)
2182 {
2183 scm_i_pthread_mutex_t *lock;
2184 scm_c_lock_port (port, &lock);
2185 scm_ungetc_unlocked (c, port);
2186 if (lock)
2187 scm_i_pthread_mutex_unlock (lock);
2188
2189 }
2190
2191 void
2192 scm_ungets_unlocked (const char *s, int n, SCM port)
2193 {
2194 /* This is simple minded and inefficient, but unreading strings is
2195 * probably not a common operation, and remember that line and
2196 * column numbers have to be handled...
2197 *
2198 * Please feel free to write an optimized version!
2199 */
2200 while (n--)
2201 scm_ungetc_unlocked (s[n], port);
2202 }
2203
2204 void
2205 scm_ungets (const char *s, int n, SCM port)
2206 {
2207 scm_i_pthread_mutex_t *lock;
2208 scm_c_lock_port (port, &lock);
2209 scm_ungets_unlocked (s, n, port);
2210 if (lock)
2211 scm_i_pthread_mutex_unlock (lock);
2212
2213 }
2214
2215 SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
2216 (SCM port),
2217 "Return the next character available from @var{port},\n"
2218 "@emph{without} updating @var{port} to point to the following\n"
2219 "character. If no more characters are available, the\n"
2220 "end-of-file object is returned.\n"
2221 "\n"
2222 "The value returned by\n"
2223 "a call to @code{peek-char} is the same as the value that would\n"
2224 "have been returned by a call to @code{read-char} on the same\n"
2225 "port. The only difference is that the very next call to\n"
2226 "@code{read-char} or @code{peek-char} on that @var{port} will\n"
2227 "return the value returned by the preceding call to\n"
2228 "@code{peek-char}. In particular, a call to @code{peek-char} on\n"
2229 "an interactive port will hang waiting for input whenever a call\n"
2230 "to @code{read-char} would have hung.\n"
2231 "\n"
2232 "As for @code{read-char}, a @code{decoding-error} may be raised\n"
2233 "if such a situation occurs. However, unlike with @code{read-char},\n"
2234 "@var{port} still points at the beginning of the erroneous byte\n"
2235 "sequence when the error is raised.\n")
2236 #define FUNC_NAME s_scm_peek_char
2237 {
2238 int err;
2239 SCM result;
2240 scm_t_wchar c;
2241 char bytes[SCM_MBCHAR_BUF_SIZE];
2242 long column, line;
2243 size_t len = 0;
2244
2245 if (SCM_UNBNDP (port))
2246 port = scm_current_input_port ();
2247 SCM_VALIDATE_OPINPORT (1, port);
2248
2249 column = SCM_COL (port);
2250 line = SCM_LINUM (port);
2251
2252 err = get_codepoint (port, &c, bytes, &len);
2253
2254 scm_i_unget_bytes_unlocked ((unsigned char *) bytes, len, port);
2255
2256 SCM_COL (port) = column;
2257 SCM_LINUM (port) = line;
2258
2259 if (SCM_UNLIKELY (err != 0))
2260 {
2261 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
2262
2263 /* Shouldn't happen since `catch' always aborts to prompt. */
2264 result = SCM_BOOL_F;
2265 }
2266 else if (c == EOF)
2267 {
2268 scm_i_set_pending_eof (port);
2269 result = SCM_EOF_VAL;
2270 }
2271 else
2272 result = SCM_MAKE_CHAR (c);
2273
2274 return result;
2275 }
2276 #undef FUNC_NAME
2277
2278 SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
2279 (SCM cobj, SCM port),
2280 "Place character @var{cobj} in @var{port} so that it will be\n"
2281 "read by the next read operation. If called multiple times, the\n"
2282 "unread characters will be read again in last-in first-out\n"
2283 "order. If @var{port} is not supplied, the current input port\n"
2284 "is used.")
2285 #define FUNC_NAME s_scm_unread_char
2286 {
2287 int c;
2288
2289 SCM_VALIDATE_CHAR (1, cobj);
2290 if (SCM_UNBNDP (port))
2291 port = scm_current_input_port ();
2292 SCM_VALIDATE_OPINPORT (2, port);
2293
2294 c = SCM_CHAR (cobj);
2295
2296 scm_ungetc_unlocked (c, port);
2297 return cobj;
2298 }
2299 #undef FUNC_NAME
2300
2301 SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
2302 (SCM str, SCM port),
2303 "Place the string @var{str} in @var{port} so that its characters will be\n"
2304 "read in subsequent read operations. If called multiple times, the\n"
2305 "unread characters will be read again in last-in first-out order. If\n"
2306 "@var{port} is not supplied, the current-input-port is used.")
2307 #define FUNC_NAME s_scm_unread_string
2308 {
2309 int n;
2310 SCM_VALIDATE_STRING (1, str);
2311 if (SCM_UNBNDP (port))
2312 port = scm_current_input_port ();
2313 SCM_VALIDATE_OPINPORT (2, port);
2314
2315 n = scm_i_string_length (str);
2316
2317 while (n--)
2318 scm_ungetc_unlocked (scm_i_string_ref (str, n), port);
2319
2320 return str;
2321 }
2322 #undef FUNC_NAME
2323
2324
2325 \f
2326
2327 /* Manipulating the buffers. */
2328
2329 /* This routine does not take any locks, as it is usually called as part
2330 of a port implementation. */
2331 void
2332 scm_port_non_buffer (scm_t_port *pt)
2333 {
2334 pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf;
2335 pt->write_buf = pt->write_pos = &pt->shortbuf;
2336 pt->read_buf_size = pt->write_buf_size = 1;
2337 pt->write_end = pt->write_buf + pt->write_buf_size;
2338 }
2339
2340 /* this should only be called when the read buffer is empty. it
2341 tries to refill the read buffer. it returns the first char from
2342 the port, which is either EOF or *(pt->read_pos). */
2343 static int
2344 scm_i_fill_input_unlocked (SCM port)
2345 {
2346 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2347 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
2348
2349 assert (pt->read_pos == pt->read_end);
2350
2351 if (pti->pending_eof)
2352 {
2353 pti->pending_eof = 0;
2354 return EOF;
2355 }
2356
2357 if (pt->read_buf == pt->putback_buf)
2358 {
2359 /* finished reading put-back chars. */
2360 pt->read_buf = pt->saved_read_buf;
2361 pt->read_pos = pt->saved_read_pos;
2362 pt->read_end = pt->saved_read_end;
2363 pt->read_buf_size = pt->saved_read_buf_size;
2364 if (pt->read_pos < pt->read_end)
2365 return *(pt->read_pos);
2366 }
2367 return SCM_PORT_DESCRIPTOR (port)->fill_input (port);
2368 }
2369
2370 int
2371 scm_fill_input (SCM port)
2372 {
2373 scm_i_pthread_mutex_t *lock;
2374 int ret;
2375
2376 scm_c_lock_port (port, &lock);
2377 ret = scm_fill_input_unlocked (port);
2378 if (lock)
2379 scm_i_pthread_mutex_unlock (lock);
2380
2381
2382 return ret;
2383 }
2384
2385 /* Slow-path fallback for 'scm_get_byte_or_eof_unlocked' */
2386 int
2387 scm_slow_get_byte_or_eof_unlocked (SCM port)
2388 {
2389 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2390
2391 if (pt->rw_active == SCM_PORT_WRITE)
2392 scm_flush_unlocked (port);
2393
2394 if (pt->rw_random)
2395 pt->rw_active = SCM_PORT_READ;
2396
2397 if (pt->read_pos >= pt->read_end)
2398 {
2399 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2400 return EOF;
2401 }
2402
2403 return *pt->read_pos++;
2404 }
2405
2406 /* Slow-path fallback for 'scm_peek_byte_or_eof_unlocked' */
2407 int
2408 scm_slow_peek_byte_or_eof_unlocked (SCM port)
2409 {
2410 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2411
2412 if (pt->rw_active == SCM_PORT_WRITE)
2413 scm_flush_unlocked (port);
2414
2415 if (pt->rw_random)
2416 pt->rw_active = SCM_PORT_READ;
2417
2418 if (pt->read_pos >= pt->read_end)
2419 {
2420 if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
2421 {
2422 scm_i_set_pending_eof (port);
2423 return EOF;
2424 }
2425 }
2426
2427 return *pt->read_pos;
2428 }
2429
2430 /* Move up to READ_LEN bytes from PORT's putback and/or read buffers
2431 into memory starting at DEST. Return the number of bytes moved.
2432 PORT's line/column numbers are left unchanged. */
2433 size_t
2434 scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
2435 {
2436 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2437 size_t bytes_read = 0;
2438 size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
2439
2440 if (from_buf > 0)
2441 {
2442 memcpy (dest, pt->read_pos, from_buf);
2443 pt->read_pos += from_buf;
2444 bytes_read += from_buf;
2445 read_len -= from_buf;
2446 dest += from_buf;
2447 }
2448
2449 /* if putback was active, try the real input buffer too. */
2450 if (pt->read_buf == pt->putback_buf)
2451 {
2452 from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len);
2453 if (from_buf > 0)
2454 {
2455 memcpy (dest, pt->saved_read_pos, from_buf);
2456 pt->saved_read_pos += from_buf;
2457 bytes_read += from_buf;
2458 }
2459 }
2460
2461 return bytes_read;
2462 }
2463
2464 /* Clear a port's read buffers, returning the contents. */
2465 SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
2466 (SCM port),
2467 "This procedure clears a port's input buffers, similar\n"
2468 "to the way that force-output clears the output buffer. The\n"
2469 "contents of the buffers are returned as a single string, e.g.,\n"
2470 "\n"
2471 "@lisp\n"
2472 "(define p (open-input-file ...))\n"
2473 "(drain-input p) => empty string, nothing buffered yet.\n"
2474 "(unread-char (read-char p) p)\n"
2475 "(drain-input p) => initial chars from p, up to the buffer size.\n"
2476 "@end lisp\n\n"
2477 "Draining the buffers may be useful for cleanly finishing\n"
2478 "buffered I/O so that the file descriptor can be used directly\n"
2479 "for further input.")
2480 #define FUNC_NAME s_scm_drain_input
2481 {
2482 SCM result;
2483 char *data;
2484 scm_t_port *pt;
2485 long count;
2486
2487 SCM_VALIDATE_OPINPORT (1, port);
2488 pt = SCM_PTAB_ENTRY (port);
2489
2490 count = pt->read_end - pt->read_pos;
2491 if (pt->read_buf == pt->putback_buf)
2492 count += pt->saved_read_end - pt->saved_read_pos;
2493
2494 if (count)
2495 {
2496 result = scm_i_make_string (count, &data, 0);
2497 scm_take_from_input_buffers (port, data, count);
2498 }
2499 else
2500 result = scm_nullstr;
2501
2502 return result;
2503 }
2504 #undef FUNC_NAME
2505
2506 void
2507 scm_end_input_unlocked (SCM port)
2508 {
2509 long offset;
2510 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2511
2512 scm_i_clear_pending_eof (port);
2513 if (pt->read_buf == pt->putback_buf)
2514 {
2515 offset = pt->read_end - pt->read_pos;
2516 pt->read_buf = pt->saved_read_buf;
2517 pt->read_pos = pt->saved_read_pos;
2518 pt->read_end = pt->saved_read_end;
2519 pt->read_buf_size = pt->saved_read_buf_size;
2520 }
2521 else
2522 offset = 0;
2523
2524 SCM_PORT_DESCRIPTOR (port)->end_input (port, offset);
2525 }
2526
2527 void
2528 scm_end_input (SCM port)
2529 {
2530 scm_i_pthread_mutex_t *lock;
2531 scm_c_lock_port (port, &lock);
2532 scm_end_input_unlocked (port);
2533 if (lock)
2534 scm_i_pthread_mutex_unlock (lock);
2535
2536 }
2537
2538 SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
2539 (SCM port),
2540 "Flush the specified output port, or the current output port if @var{port}\n"
2541 "is omitted. The current output buffer contents are passed to the\n"
2542 "underlying port implementation (e.g., in the case of fports, the\n"
2543 "data will be written to the file and the output buffer will be cleared.)\n"
2544 "It has no effect on an unbuffered port.\n\n"
2545 "The return value is unspecified.")
2546 #define FUNC_NAME s_scm_force_output
2547 {
2548 if (SCM_UNBNDP (port))
2549 port = scm_current_output_port ();
2550 else
2551 {
2552 port = SCM_COERCE_OUTPORT (port);
2553 SCM_VALIDATE_OPOUTPORT (1, port);
2554 }
2555 scm_flush_unlocked (port);
2556 return SCM_UNSPECIFIED;
2557 }
2558 #undef FUNC_NAME
2559
2560 void
2561 scm_flush_unlocked (SCM port)
2562 {
2563 SCM_PORT_DESCRIPTOR (port)->flush (port);
2564 }
2565
2566 void
2567 scm_flush (SCM port)
2568 {
2569 scm_i_pthread_mutex_t *lock;
2570 scm_c_lock_port (port, &lock);
2571 scm_flush_unlocked (port);
2572 if (lock)
2573 scm_i_pthread_mutex_unlock (lock);
2574
2575 }
2576
2577 int
2578 scm_fill_input_unlocked (SCM port)
2579 {
2580 return scm_i_fill_input_unlocked (port);
2581 }
2582
2583
2584 \f
2585
2586 /* Output. */
2587
2588 void
2589 scm_putc (char c, SCM port)
2590 {
2591 scm_i_pthread_mutex_t *lock;
2592 scm_c_lock_port (port, &lock);
2593 scm_putc_unlocked (c, port);
2594 if (lock)
2595 scm_i_pthread_mutex_unlock (lock);
2596
2597 }
2598
2599 void
2600 scm_puts (const char *s, SCM port)
2601 {
2602 scm_i_pthread_mutex_t *lock;
2603 scm_c_lock_port (port, &lock);
2604 scm_puts_unlocked (s, port);
2605 if (lock)
2606 scm_i_pthread_mutex_unlock (lock);
2607
2608 }
2609
2610 /* scm_c_write
2611 *
2612 * Used by an application to write arbitrary number of bytes to an SCM
2613 * port. Similar semantics as libc write. However, unlike libc
2614 * write, scm_c_write writes the requested number of bytes and has no
2615 * return value.
2616 *
2617 * Warning: Doesn't update port line and column counts!
2618 */
2619 void
2620 scm_c_write_unlocked (SCM port, const void *ptr, size_t size)
2621 #define FUNC_NAME "scm_c_write"
2622 {
2623 scm_t_port *pt;
2624 scm_t_ptob_descriptor *ptob;
2625
2626 SCM_VALIDATE_OPOUTPORT (1, port);
2627
2628 pt = SCM_PTAB_ENTRY (port);
2629 ptob = SCM_PORT_DESCRIPTOR (port);
2630
2631 if (pt->rw_active == SCM_PORT_READ)
2632 scm_end_input_unlocked (port);
2633
2634 ptob->write (port, ptr, size);
2635
2636 if (pt->rw_random)
2637 pt->rw_active = SCM_PORT_WRITE;
2638 }
2639 #undef FUNC_NAME
2640
2641 void
2642 scm_c_write (SCM port, const void *ptr, size_t size)
2643 {
2644 scm_i_pthread_mutex_t *lock;
2645 scm_c_lock_port (port, &lock);
2646 scm_c_write_unlocked (port, ptr, size);
2647 if (lock)
2648 scm_i_pthread_mutex_unlock (lock);
2649
2650 }
2651
2652 /* scm_lfwrite
2653 *
2654 * This function differs from scm_c_write; it updates port line and
2655 * column. */
2656 void
2657 scm_lfwrite_unlocked (const char *ptr, size_t size, SCM port)
2658 {
2659 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2660 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2661
2662 if (pt->rw_active == SCM_PORT_READ)
2663 scm_end_input_unlocked (port);
2664
2665 ptob->write (port, ptr, size);
2666
2667 for (; size; ptr++, size--)
2668 update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
2669
2670 if (pt->rw_random)
2671 pt->rw_active = SCM_PORT_WRITE;
2672 }
2673
2674 void
2675 scm_lfwrite (const char *ptr, size_t size, SCM port)
2676 {
2677 scm_i_pthread_mutex_t *lock;
2678 scm_c_lock_port (port, &lock);
2679 scm_lfwrite_unlocked (ptr, size, port);
2680 if (lock)
2681 scm_i_pthread_mutex_unlock (lock);
2682
2683 }
2684
2685 /* Write STR to PORT from START inclusive to END exclusive. */
2686 void
2687 scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
2688 {
2689 scm_t_port *pt = SCM_PTAB_ENTRY (port);
2690
2691 if (pt->rw_active == SCM_PORT_READ)
2692 scm_end_input_unlocked (port);
2693
2694 if (end == (size_t) -1)
2695 end = scm_i_string_length (str);
2696
2697 scm_i_display_substring (str, start, end, port);
2698
2699 if (pt->rw_random)
2700 pt->rw_active = SCM_PORT_WRITE;
2701 }
2702
2703
2704 \f
2705
2706 /* Querying and setting positions, and character availability. */
2707
2708 SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0,
2709 (SCM port),
2710 "Return @code{#t} if a character is ready on input @var{port}\n"
2711 "and return @code{#f} otherwise. If @code{char-ready?} returns\n"
2712 "@code{#t} then the next @code{read-char} operation on\n"
2713 "@var{port} is guaranteed not to hang. If @var{port} is a file\n"
2714 "port at end of file then @code{char-ready?} returns @code{#t}.\n"
2715 "\n"
2716 "@code{char-ready?} exists to make it possible for a\n"
2717 "program to accept characters from interactive ports without\n"
2718 "getting stuck waiting for input. Any input editors associated\n"
2719 "with such ports must make sure that characters whose existence\n"
2720 "has been asserted by @code{char-ready?} cannot be rubbed out.\n"
2721 "If @code{char-ready?} were to return @code{#f} at end of file,\n"
2722 "a port at end of file would be indistinguishable from an\n"
2723 "interactive port that has no ready characters.")
2724 #define FUNC_NAME s_scm_char_ready_p
2725 {
2726 scm_t_port *pt;
2727
2728 if (SCM_UNBNDP (port))
2729 port = scm_current_input_port ();
2730 /* It's possible to close the current input port, so validate even in
2731 this case. */
2732 SCM_VALIDATE_OPINPORT (1, port);
2733
2734 pt = SCM_PTAB_ENTRY (port);
2735
2736 /* if the current read buffer is filled, or the
2737 last pushed-back char has been read and the saved buffer is
2738 filled, result is true. */
2739 if (pt->read_pos < pt->read_end
2740 || (pt->read_buf == pt->putback_buf
2741 && pt->saved_read_pos < pt->saved_read_end))
2742 return SCM_BOOL_T;
2743 else
2744 {
2745 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
2746
2747 if (ptob->input_waiting)
2748 return scm_from_bool(ptob->input_waiting (port));
2749 else
2750 return SCM_BOOL_T;
2751 }
2752 }
2753 #undef FUNC_NAME
2754
2755 SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
2756 (SCM fd_port, SCM offset, SCM whence),
2757 "Sets the current position of @var{fd_port} to the integer\n"
2758 "@var{offset}, which is interpreted according to the value of\n"
2759 "@var{whence}.\n"
2760 "\n"
2761 "One of the following variables should be supplied for\n"
2762 "@var{whence}:\n"
2763 "@defvar SEEK_SET\n"
2764 "Seek from the beginning of the file.\n"
2765 "@end defvar\n"
2766 "@defvar SEEK_CUR\n"
2767 "Seek from the current position.\n"
2768 "@end defvar\n"
2769 "@defvar SEEK_END\n"
2770 "Seek from the end of the file.\n"
2771 "@end defvar\n"
2772 "If @var{fd_port} is a file descriptor, the underlying system\n"
2773 "call is @code{lseek}. @var{port} may be a string port.\n"
2774 "\n"
2775 "The value returned is the new position in the file. This means\n"
2776 "that the current position of a port can be obtained using:\n"
2777 "@lisp\n"
2778 "(seek port 0 SEEK_CUR)\n"
2779 "@end lisp")
2780 #define FUNC_NAME s_scm_seek
2781 {
2782 int how;
2783
2784 fd_port = SCM_COERCE_OUTPORT (fd_port);
2785
2786 how = scm_to_int (whence);
2787 if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
2788 SCM_OUT_OF_RANGE (3, whence);
2789
2790 if (SCM_OPPORTP (fd_port))
2791 {
2792 scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port);
2793 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port);
2794 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2795 off_t_or_off64_t rv;
2796
2797 if (!ptob->seek)
2798 SCM_MISC_ERROR ("port is not seekable",
2799 scm_cons (fd_port, SCM_EOL));
2800 else
2801 rv = ptob->seek (fd_port, off, how);
2802
2803 /* Set stream-start flags according to new position. */
2804 pti->at_stream_start_for_bom_read = (rv == 0);
2805 pti->at_stream_start_for_bom_write = (rv == 0);
2806
2807 scm_i_clear_pending_eof (fd_port);
2808
2809 return scm_from_off_t_or_off64_t (rv);
2810 }
2811 else /* file descriptor?. */
2812 {
2813 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
2814 off_t_or_off64_t rv;
2815 rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
2816 if (rv == -1)
2817 SCM_SYSERROR;
2818 return scm_from_off_t_or_off64_t (rv);
2819 }
2820 }
2821 #undef FUNC_NAME
2822
2823 #ifndef O_BINARY
2824 #define O_BINARY 0
2825 #endif
2826
2827 /* Mingw has ftruncate(), perhaps implemented above using chsize, but
2828 doesn't have the filename version truncate(), hence this code. */
2829 #if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
2830 static int
2831 truncate (const char *file, off_t length)
2832 {
2833 int ret, fdes;
2834
2835 fdes = open (file, O_BINARY | O_WRONLY);
2836 if (fdes == -1)
2837 return -1;
2838
2839 ret = ftruncate (fdes, length);
2840 if (ret == -1)
2841 {
2842 int save_errno = errno;
2843 close (fdes);
2844 errno = save_errno;
2845 return -1;
2846 }
2847
2848 return close (fdes);
2849 }
2850 #endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
2851
2852 SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
2853 (SCM object, SCM length),
2854 "Truncate file @var{object} to @var{length} bytes. @var{object}\n"
2855 "can be a filename string, a port object, or an integer file\n"
2856 "descriptor.\n"
2857 "The return value is unspecified.\n"
2858 "\n"
2859 "For a port or file descriptor @var{length} can be omitted, in\n"
2860 "which case the file is truncated at the current position (per\n"
2861 "@code{ftell} above).\n"
2862 "\n"
2863 "On most systems a file can be extended by giving a length\n"
2864 "greater than the current size, but this is not mandatory in the\n"
2865 "POSIX standard.")
2866 #define FUNC_NAME s_scm_truncate_file
2867 {
2868 int rv;
2869
2870 /* "object" can be a port, fdes or filename.
2871
2872 Negative "length" makes no sense, but it's left to truncate() or
2873 ftruncate() to give back an error for that (normally EINVAL).
2874 */
2875
2876 if (SCM_UNBNDP (length))
2877 {
2878 /* must supply length if object is a filename. */
2879 if (scm_is_string (object))
2880 SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
2881
2882 length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
2883 }
2884
2885 object = SCM_COERCE_OUTPORT (object);
2886 if (scm_is_integer (object))
2887 {
2888 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2889 SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
2890 c_length));
2891 }
2892 else if (SCM_OPOUTPORTP (object))
2893 {
2894 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2895 scm_t_port *pt = SCM_PTAB_ENTRY (object);
2896 scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object);
2897
2898 if (!ptob->truncate)
2899 SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
2900
2901 scm_i_clear_pending_eof (object);
2902 if (pt->rw_active == SCM_PORT_READ)
2903 scm_end_input_unlocked (object);
2904 else if (pt->rw_active == SCM_PORT_WRITE)
2905 ptob->flush (object);
2906
2907 ptob->truncate (object, c_length);
2908 rv = 0;
2909 }
2910 else
2911 {
2912 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
2913 char *str = scm_to_locale_string (object);
2914 int eno;
2915 SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
2916 eno = errno;
2917 free (str);
2918 errno = eno;
2919 }
2920 if (rv == -1)
2921 SCM_SYSERROR;
2922 return SCM_UNSPECIFIED;
2923 }
2924 #undef FUNC_NAME
2925
2926 SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0,
2927 (SCM port),
2928 "Return the current line number for @var{port}.\n"
2929 "\n"
2930 "The first line of a file is 0. But you might want to add 1\n"
2931 "when printing line numbers, since starting from 1 is\n"
2932 "traditional in error messages, and likely to be more natural to\n"
2933 "non-programmers.")
2934 #define FUNC_NAME s_scm_port_line
2935 {
2936 port = SCM_COERCE_OUTPORT (port);
2937 SCM_VALIDATE_OPENPORT (1, port);
2938 return scm_from_long (SCM_LINUM (port));
2939 }
2940 #undef FUNC_NAME
2941
2942 SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0,
2943 (SCM port, SCM line),
2944 "Set the current line number for @var{port} to @var{line}. The\n"
2945 "first line of a file is 0.")
2946 #define FUNC_NAME s_scm_set_port_line_x
2947 {
2948 port = SCM_COERCE_OUTPORT (port);
2949 SCM_VALIDATE_OPENPORT (1, port);
2950 SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line);
2951 return SCM_UNSPECIFIED;
2952 }
2953 #undef FUNC_NAME
2954
2955 SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0,
2956 (SCM port),
2957 "Return the current column number of @var{port}.\n"
2958 "If the number is\n"
2959 "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n"
2960 "- i.e. the first character of the first line is line 0, column 0.\n"
2961 "(However, when you display a file position, for example in an error\n"
2962 "message, we recommend you add 1 to get 1-origin integers. This is\n"
2963 "because lines and column numbers traditionally start with 1, and that is\n"
2964 "what non-programmers will find most natural.)")
2965 #define FUNC_NAME s_scm_port_column
2966 {
2967 port = SCM_COERCE_OUTPORT (port);
2968 SCM_VALIDATE_OPENPORT (1, port);
2969 return scm_from_int (SCM_COL (port));
2970 }
2971 #undef FUNC_NAME
2972
2973 SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0,
2974 (SCM port, SCM column),
2975 "Set the current column of @var{port}. Before reading the first\n"
2976 "character on a line the column should be 0.")
2977 #define FUNC_NAME s_scm_set_port_column_x
2978 {
2979 port = SCM_COERCE_OUTPORT (port);
2980 SCM_VALIDATE_OPENPORT (1, port);
2981 SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column);
2982 return SCM_UNSPECIFIED;
2983 }
2984 #undef FUNC_NAME
2985
2986 SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
2987 (SCM port),
2988 "Return the filename associated with @var{port}, or @code{#f}\n"
2989 "if no filename is associated with the port.")
2990 #define FUNC_NAME s_scm_port_filename
2991 {
2992 port = SCM_COERCE_OUTPORT (port);
2993 SCM_VALIDATE_OPENPORT (1, port);
2994 return SCM_FILENAME (port);
2995 }
2996 #undef FUNC_NAME
2997
2998 SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
2999 (SCM port, SCM filename),
3000 "Change the filename associated with @var{port}, using the current input\n"
3001 "port if none is specified. Note that this does not change the port's\n"
3002 "source of data, but only the value that is returned by\n"
3003 "@code{port-filename} and reported in diagnostic output.")
3004 #define FUNC_NAME s_scm_set_port_filename_x
3005 {
3006 port = SCM_COERCE_OUTPORT (port);
3007 SCM_VALIDATE_OPENPORT (1, port);
3008 /* We allow the user to set the filename to whatever he likes. */
3009 SCM_SET_FILENAME (port, filename);
3010 return SCM_UNSPECIFIED;
3011 }
3012 #undef FUNC_NAME
3013
3014
3015 \f
3016
3017 /* Implementation helpers for port printing functions. */
3018
3019 void
3020 scm_print_port_mode (SCM exp, SCM port)
3021 {
3022 scm_puts_unlocked (SCM_CLOSEDP (exp)
3023 ? "closed: "
3024 : (SCM_RDNG & SCM_CELL_WORD_0 (exp)
3025 ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
3026 ? "input-output: "
3027 : "input: ")
3028 : (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
3029 ? "output: "
3030 : "bogus: ")),
3031 port);
3032 }
3033
3034 int
3035 scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED)
3036 {
3037 char *type = SCM_PTOBNAME (SCM_PTOBNUM (exp));
3038 if (!type)
3039 type = "port";
3040 scm_puts_unlocked ("#<", port);
3041 scm_print_port_mode (exp, port);
3042 scm_puts_unlocked (type, port);
3043 scm_putc_unlocked (' ', port);
3044 scm_uintprint (SCM_CELL_WORD_1 (exp), 16, port);
3045 scm_putc_unlocked ('>', port);
3046 return 1;
3047 }
3048
3049
3050 \f
3051
3052 /* Iterating over all ports. */
3053
3054 struct for_each_data
3055 {
3056 void (*proc) (void *data, SCM p);
3057 void *data;
3058 };
3059
3060 static SCM
3061 for_each_trampoline (void *data, SCM port, SCM result)
3062 {
3063 struct for_each_data *d = data;
3064
3065 d->proc (d->data, port);
3066
3067 return result;
3068 }
3069
3070 void
3071 scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
3072 {
3073 struct for_each_data d;
3074
3075 d.proc = proc;
3076 d.data = data;
3077
3078 scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL,
3079 scm_i_port_weak_set);
3080 }
3081
3082 static void
3083 scm_for_each_trampoline (void *data, SCM port)
3084 {
3085 scm_call_1 (SCM_PACK_POINTER (data), port);
3086 }
3087
3088 SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
3089 (SCM proc),
3090 "Apply @var{proc} to each port in the Guile port table\n"
3091 "in turn. The return value is unspecified. More specifically,\n"
3092 "@var{proc} is applied exactly once to every port that exists\n"
3093 "in the system at the time @code{port-for-each} is invoked.\n"
3094 "Changes to the port table while @code{port-for-each} is running\n"
3095 "have no effect as far as @code{port-for-each} is concerned.")
3096 #define FUNC_NAME s_scm_port_for_each
3097 {
3098 SCM_VALIDATE_PROC (1, proc);
3099
3100 scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc));
3101
3102 return SCM_UNSPECIFIED;
3103 }
3104 #undef FUNC_NAME
3105
3106 static void
3107 flush_output_port (void *closure, SCM port)
3108 {
3109 if (SCM_OPOUTPORTP (port))
3110 scm_flush_unlocked (port);
3111 }
3112
3113 SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0,
3114 (),
3115 "Equivalent to calling @code{force-output} on\n"
3116 "all open output ports. The return value is unspecified.")
3117 #define FUNC_NAME s_scm_flush_all_ports
3118 {
3119 scm_c_port_for_each (&flush_output_port, NULL);
3120 return SCM_UNSPECIFIED;
3121 }
3122 #undef FUNC_NAME
3123
3124
3125 \f
3126
3127 /* Void ports. */
3128
3129 scm_t_bits scm_tc16_void_port = 0;
3130
3131 static int fill_input_void_port (SCM port SCM_UNUSED)
3132 {
3133 return EOF;
3134 }
3135
3136 static void
3137 write_void_port (SCM port SCM_UNUSED,
3138 const void *data SCM_UNUSED,
3139 size_t size SCM_UNUSED)
3140 {
3141 }
3142
3143 static SCM
3144 scm_i_void_port (long mode_bits)
3145 {
3146 SCM ret;
3147
3148 ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0);
3149
3150 scm_port_non_buffer (SCM_PTAB_ENTRY (ret));
3151
3152 return ret;
3153 }
3154
3155 SCM
3156 scm_void_port (char *mode_str)
3157 {
3158 return scm_i_void_port (scm_mode_bits (mode_str));
3159 }
3160
3161 SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0,
3162 (SCM mode),
3163 "Create and return a new void port. A void port acts like\n"
3164 "@file{/dev/null}. The @var{mode} argument\n"
3165 "specifies the input/output modes for this port: see the\n"
3166 "documentation for @code{open-file} in @ref{File Ports}.")
3167 #define FUNC_NAME s_scm_sys_make_void_port
3168 {
3169 return scm_i_void_port (scm_i_mode_bits (mode));
3170 }
3171 #undef FUNC_NAME
3172
3173
3174 \f
3175
3176 /* Initialization. */
3177
3178 void
3179 scm_init_ports ()
3180 {
3181 /* lseek() symbols. */
3182 scm_c_define ("SEEK_SET", scm_from_int (SEEK_SET));
3183 scm_c_define ("SEEK_CUR", scm_from_int (SEEK_CUR));
3184 scm_c_define ("SEEK_END", scm_from_int (SEEK_END));
3185
3186 scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
3187 write_void_port);
3188
3189 cur_inport_fluid = scm_make_fluid ();
3190 cur_outport_fluid = scm_make_fluid ();
3191 cur_errport_fluid = scm_make_fluid ();
3192 cur_warnport_fluid = scm_make_fluid ();
3193 cur_loadport_fluid = scm_make_fluid ();
3194
3195 scm_i_port_weak_set = scm_c_make_weak_set (31);
3196
3197 #include "libguile/ports.x"
3198
3199 /* Use Latin-1 as the default port encoding. */
3200 SCM_VARIABLE_SET (default_port_encoding_var,
3201 scm_make_fluid_with_default (SCM_BOOL_F));
3202 scm_port_encoding_init = 1;
3203
3204 SCM_VARIABLE_SET (default_conversion_strategy_var,
3205 scm_make_fluid_with_default (sym_substitute));
3206 scm_conversion_strategy_init = 1;
3207
3208 /* These bindings are used when boot-9 turns `current-input-port' et
3209 al into parameters. They are then removed from the guile module. */
3210 scm_c_define ("%current-input-port-fluid", cur_inport_fluid);
3211 scm_c_define ("%current-output-port-fluid", cur_outport_fluid);
3212 scm_c_define ("%current-error-port-fluid", cur_errport_fluid);
3213 scm_c_define ("%current-warning-port-fluid", cur_warnport_fluid);
3214 }
3215
3216 /*
3217 Local Variables:
3218 c-file-style: "gnu"
3219 End:
3220 */