remove scm_markstream
[bpt/guile.git] / libguile / ports.c
1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
2 * 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20
21 \f
22 /* Headers. */
23
24 #define _LARGEFILE64_SOURCE /* ask for stat64 etc */
25
26 #ifdef HAVE_CONFIG_H
27 # include <config.h>
28 #endif
29
30 #include <stdio.h>
31 #include <errno.h>
32 #include <fcntl.h> /* for chsize on mingw */
33 #include <assert.h>
34 #include <iconv.h>
35 #include <uniconv.h>
36 #include <unistr.h>
37 #include <striconveh.h>
38
39 #include <assert.h>
40
41 #include "libguile/_scm.h"
42 #include "libguile/async.h"
43 #include "libguile/deprecation.h"
44 #include "libguile/eval.h"
45 #include "libguile/fports.h" /* direct access for seek and truncate */
46 #include "libguile/goops.h"
47 #include "libguile/smob.h"
48 #include "libguile/chars.h"
49 #include "libguile/dynwind.h"
50
51 #include "libguile/keywords.h"
52 #include "libguile/hashtab.h"
53 #include "libguile/root.h"
54 #include "libguile/strings.h"
55 #include "libguile/mallocs.h"
56 #include "libguile/validate.h"
57 #include "libguile/ports.h"
58 #include "libguile/vectors.h"
59 #include "libguile/weak-set.h"
60 #include "libguile/fluids.h"
61 #include "libguile/eq.h"
62
63 #ifdef HAVE_STRING_H
64 #include <string.h>
65 #endif
66
67 #ifdef HAVE_IO_H
68 #include <io.h>
69 #endif
70
71 #ifdef HAVE_UNISTD_H
72 #include <unistd.h>
73 #endif
74
75 #ifdef HAVE_SYS_IOCTL_H
76 #include <sys/ioctl.h>
77 #endif
78
79 /* Mingw (version 3.4.5, circa 2006) has ftruncate as an alias for chsize
80 already, but have this code here in case that wasn't so in past versions,
81 or perhaps to help other minimal DOS environments.
82
83 gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
84 might be possibilities if we've got other systems without ftruncate. */
85
86 #if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
87 #define ftruncate(fd, size) chsize (fd, size)
88 #undef HAVE_FTRUNCATE
89 #define HAVE_FTRUNCATE 1
90 #endif
91
92 \f
93 /* The port kind table --- a dynamically resized array of port types. */
94
95
96 /* scm_ptobs scm_numptob
97 * implement a dynamically resized array of ptob records.
98 * Indexes into this table are used when generating type
99 * tags for smobjects (if you know a tag you can get an index and conversely).
100 */
101 scm_t_ptob_descriptor *scm_ptobs = NULL;
102 long scm_numptob = 0;
103
104 /*
105 * We choose to use an interface similar to the smob interface with
106 * fill_input and write as standard fields, passed to the port
107 * type constructor, and optional fields set by setters.
108 */
109
110 static void
111 flush_port_default (SCM port SCM_UNUSED)
112 {
113 }
114
115 static void
116 end_input_default (SCM port SCM_UNUSED, int offset SCM_UNUSED)
117 {
118 }
119
120 scm_t_bits
121 scm_make_port_type (char *name,
122 int (*fill_input) (SCM port),
123 void (*write) (SCM port, const void *data, size_t size))
124 {
125 char *tmp;
126 if (SCM_I_MAX_PORT_TYPE_COUNT - 1 <= scm_numptob)
127 goto ptoberr;
128 SCM_CRITICAL_SECTION_START;
129 tmp = (char *) scm_gc_realloc ((char *) scm_ptobs,
130 scm_numptob * sizeof (scm_t_ptob_descriptor),
131 (1 + scm_numptob)
132 * sizeof (scm_t_ptob_descriptor),
133 "port-type");
134 if (tmp)
135 {
136 scm_ptobs = (scm_t_ptob_descriptor *) tmp;
137
138 scm_ptobs[scm_numptob].name = name;
139 scm_ptobs[scm_numptob].mark = 0;
140 scm_ptobs[scm_numptob].free = NULL;
141 scm_ptobs[scm_numptob].print = scm_port_print;
142 scm_ptobs[scm_numptob].equalp = 0;
143 scm_ptobs[scm_numptob].close = 0;
144
145 scm_ptobs[scm_numptob].write = write;
146 scm_ptobs[scm_numptob].flush = flush_port_default;
147
148 scm_ptobs[scm_numptob].end_input = end_input_default;
149 scm_ptobs[scm_numptob].fill_input = fill_input;
150 scm_ptobs[scm_numptob].input_waiting = 0;
151
152 scm_ptobs[scm_numptob].seek = 0;
153 scm_ptobs[scm_numptob].truncate = 0;
154
155 scm_numptob++;
156 }
157 SCM_CRITICAL_SECTION_END;
158 if (!tmp)
159 {
160 ptoberr:
161 scm_memory_error ("scm_make_port_type");
162 }
163 /* Make a class object if Goops is present */
164 if (SCM_UNPACK (scm_port_class[0]) != 0)
165 scm_make_port_classes (scm_numptob - 1, SCM_PTOBNAME (scm_numptob - 1));
166 return scm_tc7_port + (scm_numptob - 1) * 256;
167 }
168
169 void
170 scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM))
171 {
172 scm_ptobs[SCM_TC2PTOBNUM (tc)].mark = mark;
173 }
174
175 void
176 scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM))
177 {
178 scm_ptobs[SCM_TC2PTOBNUM (tc)].free = free;
179 }
180
181 void
182 scm_set_port_print (scm_t_bits tc, int (*print) (SCM exp, SCM port,
183 scm_print_state *pstate))
184 {
185 scm_ptobs[SCM_TC2PTOBNUM (tc)].print = print;
186 }
187
188 void
189 scm_set_port_equalp (scm_t_bits tc, SCM (*equalp) (SCM, SCM))
190 {
191 scm_ptobs[SCM_TC2PTOBNUM (tc)].equalp = equalp;
192 }
193
194 void
195 scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port))
196 {
197 scm_ptobs[SCM_TC2PTOBNUM (tc)].flush = flush;
198 }
199
200 void
201 scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset))
202 {
203 scm_ptobs[SCM_TC2PTOBNUM (tc)].end_input = end_input;
204 }
205
206 void
207 scm_set_port_close (scm_t_bits tc, int (*close) (SCM))
208 {
209 scm_ptobs[SCM_TC2PTOBNUM (tc)].close = close;
210 }
211
212 void
213 scm_set_port_seek (scm_t_bits tc,
214 scm_t_off (*seek) (SCM, scm_t_off, int))
215 {
216 scm_ptobs[SCM_TC2PTOBNUM (tc)].seek = seek;
217 }
218
219 void
220 scm_set_port_truncate (scm_t_bits tc, void (*truncate) (SCM, scm_t_off))
221 {
222 scm_ptobs[SCM_TC2PTOBNUM (tc)].truncate = truncate;
223 }
224
225 void
226 scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM))
227 {
228 scm_ptobs[SCM_TC2PTOBNUM (tc)].input_waiting = input_waiting;
229 }
230
231 \f
232
233 SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0,
234 (SCM port),
235 "Return @code{#t} if a character is ready on input @var{port}\n"
236 "and return @code{#f} otherwise. If @code{char-ready?} returns\n"
237 "@code{#t} then the next @code{read-char} operation on\n"
238 "@var{port} is guaranteed not to hang. If @var{port} is a file\n"
239 "port at end of file then @code{char-ready?} returns @code{#t}.\n"
240 "\n"
241 "@code{char-ready?} exists to make it possible for a\n"
242 "program to accept characters from interactive ports without\n"
243 "getting stuck waiting for input. Any input editors associated\n"
244 "with such ports must make sure that characters whose existence\n"
245 "has been asserted by @code{char-ready?} cannot be rubbed out.\n"
246 "If @code{char-ready?} were to return @code{#f} at end of file,\n"
247 "a port at end of file would be indistinguishable from an\n"
248 "interactive port that has no ready characters.")
249 #define FUNC_NAME s_scm_char_ready_p
250 {
251 scm_t_port *pt;
252
253 if (SCM_UNBNDP (port))
254 port = scm_current_input_port ();
255 /* It's possible to close the current input port, so validate even in
256 this case. */
257 SCM_VALIDATE_OPINPORT (1, port);
258
259 pt = SCM_PTAB_ENTRY (port);
260
261 /* if the current read buffer is filled, or the
262 last pushed-back char has been read and the saved buffer is
263 filled, result is true. */
264 if (pt->read_pos < pt->read_end
265 || (pt->read_buf == pt->putback_buf
266 && pt->saved_read_pos < pt->saved_read_end))
267 return SCM_BOOL_T;
268 else
269 {
270 scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)];
271
272 if (ptob->input_waiting)
273 return scm_from_bool(ptob->input_waiting (port));
274 else
275 return SCM_BOOL_T;
276 }
277 }
278 #undef FUNC_NAME
279
280 /* move up to read_len chars from port's putback and/or read buffers
281 into memory starting at dest. returns the number of chars moved. */
282 size_t scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
283 {
284 scm_t_port *pt = SCM_PTAB_ENTRY (port);
285 size_t chars_read = 0;
286 size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
287
288 if (from_buf > 0)
289 {
290 memcpy (dest, pt->read_pos, from_buf);
291 pt->read_pos += from_buf;
292 chars_read += from_buf;
293 read_len -= from_buf;
294 dest += from_buf;
295 }
296
297 /* if putback was active, try the real input buffer too. */
298 if (pt->read_buf == pt->putback_buf)
299 {
300 from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len);
301 if (from_buf > 0)
302 {
303 memcpy (dest, pt->saved_read_pos, from_buf);
304 pt->saved_read_pos += from_buf;
305 chars_read += from_buf;
306 }
307 }
308 return chars_read;
309 }
310
311 /* Clear a port's read buffers, returning the contents. */
312 SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
313 (SCM port),
314 "This procedure clears a port's input buffers, similar\n"
315 "to the way that force-output clears the output buffer. The\n"
316 "contents of the buffers are returned as a single string, e.g.,\n"
317 "\n"
318 "@lisp\n"
319 "(define p (open-input-file ...))\n"
320 "(drain-input p) => empty string, nothing buffered yet.\n"
321 "(unread-char (read-char p) p)\n"
322 "(drain-input p) => initial chars from p, up to the buffer size.\n"
323 "@end lisp\n\n"
324 "Draining the buffers may be useful for cleanly finishing\n"
325 "buffered I/O so that the file descriptor can be used directly\n"
326 "for further input.")
327 #define FUNC_NAME s_scm_drain_input
328 {
329 SCM result;
330 char *data;
331 scm_t_port *pt;
332 long count;
333
334 SCM_VALIDATE_OPINPORT (1, port);
335 pt = SCM_PTAB_ENTRY (port);
336
337 count = pt->read_end - pt->read_pos;
338 if (pt->read_buf == pt->putback_buf)
339 count += pt->saved_read_end - pt->saved_read_pos;
340
341 if (count)
342 {
343 result = scm_i_make_string (count, &data, 0);
344 scm_take_from_input_buffers (port, data, count);
345 }
346 else
347 result = scm_nullstr;
348
349 return result;
350 }
351 #undef FUNC_NAME
352
353 \f
354 /* Standard ports --- current input, output, error, and more(!). */
355
356 static SCM cur_inport_fluid = SCM_BOOL_F;
357 static SCM cur_outport_fluid = SCM_BOOL_F;
358 static SCM cur_errport_fluid = SCM_BOOL_F;
359 static SCM cur_loadport_fluid = SCM_BOOL_F;
360
361 SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
362 (),
363 "Return the current input port. This is the default port used\n"
364 "by many input procedures. Initially, @code{current-input-port}\n"
365 "returns the @dfn{standard input} in Unix and C terminology.")
366 #define FUNC_NAME s_scm_current_input_port
367 {
368 if (scm_is_true (cur_inport_fluid))
369 return scm_fluid_ref (cur_inport_fluid);
370 else
371 return SCM_BOOL_F;
372 }
373 #undef FUNC_NAME
374
375 SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
376 (),
377 "Return the current output port. This is the default port used\n"
378 "by many output procedures. Initially,\n"
379 "@code{current-output-port} returns the @dfn{standard output} in\n"
380 "Unix and C terminology.")
381 #define FUNC_NAME s_scm_current_output_port
382 {
383 if (scm_is_true (cur_outport_fluid))
384 return scm_fluid_ref (cur_outport_fluid);
385 else
386 return SCM_BOOL_F;
387 }
388 #undef FUNC_NAME
389
390 SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
391 (),
392 "Return the port to which errors and warnings should be sent (the\n"
393 "@dfn{standard error} in Unix and C terminology).")
394 #define FUNC_NAME s_scm_current_error_port
395 {
396 if (scm_is_true (cur_errport_fluid))
397 return scm_fluid_ref (cur_errport_fluid);
398 else
399 return SCM_BOOL_F;
400 }
401 #undef FUNC_NAME
402
403 SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
404 (),
405 "Return the current-load-port.\n"
406 "The load port is used internally by @code{primitive-load}.")
407 #define FUNC_NAME s_scm_current_load_port
408 {
409 return scm_fluid_ref (cur_loadport_fluid);
410 }
411 #undef FUNC_NAME
412
413 SCM_DEFINE (scm_set_current_input_port, "set-current-input-port", 1, 0, 0,
414 (SCM port),
415 "@deffnx {Scheme Procedure} set-current-output-port port\n"
416 "@deffnx {Scheme Procedure} set-current-error-port port\n"
417 "Change the ports returned by @code{current-input-port},\n"
418 "@code{current-output-port} and @code{current-error-port}, respectively,\n"
419 "so that they use the supplied @var{port} for input or output.")
420 #define FUNC_NAME s_scm_set_current_input_port
421 {
422 SCM oinp = scm_fluid_ref (cur_inport_fluid);
423 SCM_VALIDATE_OPINPORT (1, port);
424 scm_fluid_set_x (cur_inport_fluid, port);
425 return oinp;
426 }
427 #undef FUNC_NAME
428
429
430 SCM_DEFINE (scm_set_current_output_port, "set-current-output-port", 1, 0, 0,
431 (SCM port),
432 "Set the current default output port to @var{port}.")
433 #define FUNC_NAME s_scm_set_current_output_port
434 {
435 SCM ooutp = scm_fluid_ref (cur_outport_fluid);
436 port = SCM_COERCE_OUTPORT (port);
437 SCM_VALIDATE_OPOUTPORT (1, port);
438 scm_fluid_set_x (cur_outport_fluid, port);
439 return ooutp;
440 }
441 #undef FUNC_NAME
442
443
444 SCM_DEFINE (scm_set_current_error_port, "set-current-error-port", 1, 0, 0,
445 (SCM port),
446 "Set the current default error port to @var{port}.")
447 #define FUNC_NAME s_scm_set_current_error_port
448 {
449 SCM oerrp = scm_fluid_ref (cur_errport_fluid);
450 port = SCM_COERCE_OUTPORT (port);
451 SCM_VALIDATE_OPOUTPORT (1, port);
452 scm_fluid_set_x (cur_errport_fluid, port);
453 return oerrp;
454 }
455 #undef FUNC_NAME
456
457 void
458 scm_dynwind_current_input_port (SCM port)
459 #define FUNC_NAME NULL
460 {
461 SCM_VALIDATE_OPINPORT (1, port);
462 scm_dynwind_fluid (cur_inport_fluid, port);
463 }
464 #undef FUNC_NAME
465
466 void
467 scm_dynwind_current_output_port (SCM port)
468 #define FUNC_NAME NULL
469 {
470 port = SCM_COERCE_OUTPORT (port);
471 SCM_VALIDATE_OPOUTPORT (1, port);
472 scm_dynwind_fluid (cur_outport_fluid, port);
473 }
474 #undef FUNC_NAME
475
476 void
477 scm_dynwind_current_error_port (SCM port)
478 #define FUNC_NAME NULL
479 {
480 port = SCM_COERCE_OUTPORT (port);
481 SCM_VALIDATE_OPOUTPORT (1, port);
482 scm_dynwind_fluid (cur_errport_fluid, port);
483 }
484 #undef FUNC_NAME
485
486 void
487 scm_i_dynwind_current_load_port (SCM port)
488 {
489 scm_dynwind_fluid (cur_loadport_fluid, port);
490 }
491
492 \f
493 /* The port table --- an array of pointers to ports. */
494
495 /*
496 We need a global registry of ports to flush them all at exit, and to
497 get all the ports matching a file descriptor.
498 */
499 SCM scm_i_port_weak_set;
500
501 \f
502 /* Port finalization. */
503
504
505 static void finalize_port (GC_PTR, GC_PTR);
506
507 /* Register a finalizer for PORT. */
508 static SCM_C_INLINE_KEYWORD void
509 register_finalizer_for_port (SCM port)
510 {
511 GC_finalization_proc prev_finalizer;
512 GC_PTR prev_finalization_data;
513
514 /* Register a finalizer for PORT so that its iconv CDs get freed and
515 optionally its type's `free' function gets called. */
516 GC_REGISTER_FINALIZER_NO_ORDER (SCM_HEAP_OBJECT_BASE (port),
517 finalize_port, 0,
518 &prev_finalizer,
519 &prev_finalization_data);
520 }
521
522 /* Finalize the object (a port) pointed to by PTR. */
523 static void
524 finalize_port (GC_PTR ptr, GC_PTR data)
525 {
526 long port_type;
527 SCM port = SCM_PACK_POINTER (ptr);
528
529 if (!SCM_PORTP (port))
530 abort ();
531
532 if (SCM_OPENP (port))
533 {
534 if (SCM_REVEALED (port) > 0)
535 /* Keep "revealed" ports alive and re-register a finalizer. */
536 register_finalizer_for_port (port);
537 else
538 {
539 scm_t_port *entry;
540
541 port_type = SCM_TC2PTOBNUM (SCM_CELL_TYPE (port));
542 if (port_type >= scm_numptob)
543 abort ();
544
545 if (scm_ptobs[port_type].free)
546 /* Yes, I really do mean `.free' rather than `.close'. `.close'
547 is for explicit `close-port' by user. */
548 scm_ptobs[port_type].free (port);
549
550 entry = SCM_PTAB_ENTRY (port);
551
552 if (entry->input_cd != (iconv_t) -1)
553 iconv_close (entry->input_cd);
554 if (entry->output_cd != (iconv_t) -1)
555 iconv_close (entry->output_cd);
556
557 SCM_SETSTREAM (port, 0);
558 SCM_CLR_PORT_OPEN_FLAG (port);
559
560 scm_gc_ports_collected++;
561 }
562 }
563 }
564
565
566
567 \f
568
569 SCM
570 scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
571 const char *encoding,
572 scm_t_string_failed_conversion_handler handler,
573 scm_t_bits stream)
574 {
575 SCM ret;
576 scm_t_port *entry;
577
578 entry = (scm_t_port *) scm_gc_calloc (sizeof (scm_t_port), "port");
579 ret = scm_cell (tag | mode_bits, (scm_t_bits)entry);
580
581 #if SCM_USE_PTHREAD_THREADS
582 scm_i_pthread_mutex_init (&entry->lock, scm_i_pthread_mutexattr_recursive);
583 #endif
584
585 entry->file_name = SCM_BOOL_F;
586 entry->rw_active = SCM_PORT_NEITHER;
587 entry->port = ret;
588 entry->stream = stream;
589 entry->encoding = encoding ? scm_gc_strdup (encoding, "port") : NULL;
590 /* The conversion descriptors will be opened lazily. */
591 entry->input_cd = (iconv_t) -1;
592 entry->output_cd = (iconv_t) -1;
593 entry->ilseq_handler = handler;
594
595 scm_weak_set_add_x (scm_i_port_weak_set, ret);
596
597 /* For each new port, register a finalizer so that it port type's free
598 function can be invoked eventually. */
599 register_finalizer_for_port (ret);
600
601 return ret;
602 }
603
604 SCM
605 scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
606 {
607 return scm_c_make_port_with_encoding (tag, mode_bits,
608 scm_i_default_port_encoding (),
609 scm_i_get_conversion_strategy (SCM_BOOL_F),
610 stream);
611 }
612
613 SCM
614 scm_new_port_table_entry (scm_t_bits tag)
615 {
616 return scm_c_make_port (tag, 0, 0);
617 }
618
619 /* Remove a port from the table and destroy it. */
620
621 static void
622 scm_i_remove_port (SCM port)
623 #define FUNC_NAME "scm_remove_port"
624 {
625 scm_t_port *p;
626
627 p = SCM_PTAB_ENTRY (port);
628 scm_port_non_buffer (p);
629 SCM_SETPTAB_ENTRY (port, 0);
630 scm_weak_set_remove_x (scm_i_port_weak_set, port);
631
632 p->putback_buf = NULL;
633 p->putback_buf_size = 0;
634
635 if (p->input_cd != (iconv_t) -1)
636 {
637 iconv_close (p->input_cd);
638 p->input_cd = (iconv_t) -1;
639 }
640
641 if (p->output_cd != (iconv_t) -1)
642 {
643 iconv_close (p->output_cd);
644 p->output_cd = (iconv_t) -1;
645 }
646 }
647 #undef FUNC_NAME
648
649
650 void
651 scm_port_non_buffer (scm_t_port *pt)
652 {
653 pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf;
654 pt->write_buf = pt->write_pos = &pt->shortbuf;
655 pt->read_buf_size = pt->write_buf_size = 1;
656 pt->write_end = pt->write_buf + pt->write_buf_size;
657 }
658
659 \f
660 /* Revealed counts --- an oddity inherited from SCSH. */
661
662 /* Find a port in the table and return its revealed count.
663 Also used by the garbage collector.
664 */
665
666 int
667 scm_revealed_count (SCM port)
668 {
669 return SCM_REVEALED(port);
670 }
671
672
673
674 /* Return the revealed count for a port. */
675
676 SCM_DEFINE (scm_port_revealed, "port-revealed", 1, 0, 0,
677 (SCM port),
678 "Return the revealed count for @var{port}.")
679 #define FUNC_NAME s_scm_port_revealed
680 {
681 port = SCM_COERCE_OUTPORT (port);
682 SCM_VALIDATE_OPENPORT (1, port);
683 return scm_from_int (scm_revealed_count (port));
684 }
685 #undef FUNC_NAME
686
687 /* Set the revealed count for a port. */
688 SCM_DEFINE (scm_set_port_revealed_x, "set-port-revealed!", 2, 0, 0,
689 (SCM port, SCM rcount),
690 "Sets the revealed count for a port to a given value.\n"
691 "The return value is unspecified.")
692 #define FUNC_NAME s_scm_set_port_revealed_x
693 {
694 port = SCM_COERCE_OUTPORT (port);
695 SCM_VALIDATE_OPENPORT (1, port);
696 SCM_REVEALED (port) = scm_to_int (rcount);
697 return SCM_UNSPECIFIED;
698 }
699 #undef FUNC_NAME
700
701
702 \f
703 /* Retrieving a port's mode. */
704
705 /* Return the flags that characterize a port based on the mode
706 * string used to open a file for that port.
707 *
708 * See PORT FLAGS in scm.h
709 */
710
711 static long
712 scm_i_mode_bits_n (SCM modes)
713 {
714 return (SCM_OPN
715 | (scm_i_string_contains_char (modes, 'r')
716 || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
717 | (scm_i_string_contains_char (modes, 'w')
718 || scm_i_string_contains_char (modes, 'a')
719 || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
720 | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
721 | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
722 }
723
724 long
725 scm_mode_bits (char *modes)
726 {
727 return scm_i_mode_bits (scm_from_locale_string (modes));
728 }
729
730 long
731 scm_i_mode_bits (SCM modes)
732 {
733 long bits;
734
735 if (!scm_is_string (modes))
736 scm_wrong_type_arg_msg (NULL, 0, modes, "string");
737
738 bits = scm_i_mode_bits_n (modes);
739 scm_remember_upto_here_1 (modes);
740 return bits;
741 }
742
743 /* Return the mode flags from an open port.
744 * Some modes such as "append" are only used when opening
745 * a file and are not returned here. */
746
747 SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
748 (SCM port),
749 "Return the port modes associated with the open port @var{port}.\n"
750 "These will not necessarily be identical to the modes used when\n"
751 "the port was opened, since modes such as \"append\" which are\n"
752 "used only during port creation are not retained.")
753 #define FUNC_NAME s_scm_port_mode
754 {
755 char modes[4];
756 modes[0] = '\0';
757
758 port = SCM_COERCE_OUTPORT (port);
759 SCM_VALIDATE_OPPORT (1, port);
760 if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
761 if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
762 strcpy (modes, "r+");
763 else
764 strcpy (modes, "r");
765 }
766 else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
767 strcpy (modes, "w");
768 if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
769 strcat (modes, "0");
770 return scm_from_locale_string (modes);
771 }
772 #undef FUNC_NAME
773
774
775 \f
776 /* Closing ports. */
777
778 /* scm_close_port
779 * Call the close operation on a port object.
780 * see also scm_close.
781 */
782 SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
783 (SCM port),
784 "Close the specified port object. Return @code{#t} if it\n"
785 "successfully closes a port or @code{#f} if it was already\n"
786 "closed. An exception may be raised if an error occurs, for\n"
787 "example when flushing buffered output. See also @ref{Ports and\n"
788 "File Descriptors, close}, for a procedure which can close file\n"
789 "descriptors.")
790 #define FUNC_NAME s_scm_close_port
791 {
792 size_t i;
793 int rv;
794
795 port = SCM_COERCE_OUTPORT (port);
796
797 SCM_VALIDATE_PORT (1, port);
798 if (SCM_CLOSEDP (port))
799 return SCM_BOOL_F;
800 i = SCM_PTOBNUM (port);
801 if (scm_ptobs[i].close)
802 rv = (scm_ptobs[i].close) (port);
803 else
804 rv = 0;
805 scm_i_remove_port (port);
806 SCM_CLR_PORT_OPEN_FLAG (port);
807 return scm_from_bool (rv >= 0);
808 }
809 #undef FUNC_NAME
810
811 SCM_DEFINE (scm_close_input_port, "close-input-port", 1, 0, 0,
812 (SCM port),
813 "Close the specified input port object. The routine has no effect if\n"
814 "the file has already been closed. An exception may be raised if an\n"
815 "error occurs. The value returned is unspecified.\n\n"
816 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
817 "which can close file descriptors.")
818 #define FUNC_NAME s_scm_close_input_port
819 {
820 SCM_VALIDATE_INPUT_PORT (1, port);
821 scm_close_port (port);
822 return SCM_UNSPECIFIED;
823 }
824 #undef FUNC_NAME
825
826 SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0,
827 (SCM port),
828 "Close the specified output port object. The routine has no effect if\n"
829 "the file has already been closed. An exception may be raised if an\n"
830 "error occurs. The value returned is unspecified.\n\n"
831 "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
832 "which can close file descriptors.")
833 #define FUNC_NAME s_scm_close_output_port
834 {
835 port = SCM_COERCE_OUTPORT (port);
836 SCM_VALIDATE_OUTPUT_PORT (1, port);
837 scm_close_port (port);
838 return SCM_UNSPECIFIED;
839 }
840 #undef FUNC_NAME
841
842 struct for_each_data
843 {
844 void (*proc) (void *data, SCM p);
845 void *data;
846 };
847
848 static SCM
849 for_each_trampoline (void *data, SCM port, SCM result)
850 {
851 struct for_each_data *d = data;
852
853 d->proc (d->data, port);
854
855 return result;
856 }
857
858 void
859 scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
860 {
861 struct for_each_data d;
862
863 d.proc = proc;
864 d.data = data;
865
866 scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL,
867 scm_i_port_weak_set);
868 }
869
870 static void
871 scm_for_each_trampoline (void *data, SCM port)
872 {
873 scm_call_1 (SCM_PACK_POINTER (data), port);
874 }
875
876 SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
877 (SCM proc),
878 "Apply @var{proc} to each port in the Guile port table\n"
879 "in turn. The return value is unspecified. More specifically,\n"
880 "@var{proc} is applied exactly once to every port that exists\n"
881 "in the system at the time @var{port-for-each} is invoked.\n"
882 "Changes to the port table while @var{port-for-each} is running\n"
883 "have no effect as far as @var{port-for-each} is concerned.")
884 #define FUNC_NAME s_scm_port_for_each
885 {
886 SCM_VALIDATE_PROC (1, proc);
887
888 scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc));
889
890 return SCM_UNSPECIFIED;
891 }
892 #undef FUNC_NAME
893
894
895 \f
896 /* Utter miscellany. Gosh, we should clean this up some time. */
897
898 SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
899 (SCM x),
900 "Return @code{#t} if @var{x} is an input port, otherwise return\n"
901 "@code{#f}. Any object satisfying this predicate also satisfies\n"
902 "@code{port?}.")
903 #define FUNC_NAME s_scm_input_port_p
904 {
905 return scm_from_bool (SCM_INPUT_PORT_P (x));
906 }
907 #undef FUNC_NAME
908
909 SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
910 (SCM x),
911 "Return @code{#t} if @var{x} is an output port, otherwise return\n"
912 "@code{#f}. Any object satisfying this predicate also satisfies\n"
913 "@code{port?}.")
914 #define FUNC_NAME s_scm_output_port_p
915 {
916 x = SCM_COERCE_OUTPORT (x);
917 return scm_from_bool (SCM_OUTPUT_PORT_P (x));
918 }
919 #undef FUNC_NAME
920
921 SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
922 (SCM x),
923 "Return a boolean indicating whether @var{x} is a port.\n"
924 "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
925 "@var{x}))}.")
926 #define FUNC_NAME s_scm_port_p
927 {
928 return scm_from_bool (SCM_PORTP (x));
929 }
930 #undef FUNC_NAME
931
932 SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
933 (SCM port),
934 "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
935 "open.")
936 #define FUNC_NAME s_scm_port_closed_p
937 {
938 SCM_VALIDATE_PORT (1, port);
939 return scm_from_bool (!SCM_OPPORTP (port));
940 }
941 #undef FUNC_NAME
942
943 SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
944 (SCM x),
945 "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
946 "return @code{#f}.")
947 #define FUNC_NAME s_scm_eof_object_p
948 {
949 return scm_from_bool(SCM_EOF_OBJECT_P (x));
950 }
951 #undef FUNC_NAME
952
953 SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
954 (SCM port),
955 "Flush the specified output port, or the current output port if @var{port}\n"
956 "is omitted. The current output buffer contents are passed to the\n"
957 "underlying port implementation (e.g., in the case of fports, the\n"
958 "data will be written to the file and the output buffer will be cleared.)\n"
959 "It has no effect on an unbuffered port.\n\n"
960 "The return value is unspecified.")
961 #define FUNC_NAME s_scm_force_output
962 {
963 if (SCM_UNBNDP (port))
964 port = scm_current_output_port ();
965 else
966 {
967 port = SCM_COERCE_OUTPORT (port);
968 SCM_VALIDATE_OPOUTPORT (1, port);
969 }
970 scm_flush (port);
971 return SCM_UNSPECIFIED;
972 }
973 #undef FUNC_NAME
974
975
976 static void
977 flush_output_port (void *closure, SCM port)
978 {
979 if (SCM_OPOUTPORTP (port))
980 scm_flush (port);
981 }
982
983 SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0,
984 (),
985 "Equivalent to calling @code{force-output} on\n"
986 "all open output ports. The return value is unspecified.")
987 #define FUNC_NAME s_scm_flush_all_ports
988 {
989 scm_c_port_for_each (&flush_output_port, NULL);
990 return SCM_UNSPECIFIED;
991 }
992 #undef FUNC_NAME
993
994 SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
995 (SCM port),
996 "Return the next character available from @var{port}, updating\n"
997 "@var{port} to point to the following character. If no more\n"
998 "characters are available, the end-of-file object is returned.\n"
999 "\n"
1000 "When @var{port}'s data cannot be decoded according to its\n"
1001 "character encoding, a @code{decoding-error} is raised and\n"
1002 "@var{port} points past the erroneous byte sequence.\n")
1003 #define FUNC_NAME s_scm_read_char
1004 {
1005 scm_t_wchar c;
1006 if (SCM_UNBNDP (port))
1007 port = scm_current_input_port ();
1008 SCM_VALIDATE_OPINPORT (1, port);
1009 c = scm_getc (port);
1010 if (EOF == c)
1011 return SCM_EOF_VAL;
1012 return SCM_MAKE_CHAR (c);
1013 }
1014 #undef FUNC_NAME
1015
1016 /* Update the line and column number of PORT after consumption of C. */
1017 static inline void
1018 update_port_lf (scm_t_wchar c, SCM port)
1019 {
1020 switch (c)
1021 {
1022 case '\a':
1023 case EOF:
1024 break;
1025 case '\b':
1026 SCM_DECCOL (port);
1027 break;
1028 case '\n':
1029 SCM_INCLINE (port);
1030 break;
1031 case '\r':
1032 SCM_ZEROCOL (port);
1033 break;
1034 case '\t':
1035 SCM_TABCOL (port);
1036 break;
1037 default:
1038 SCM_INCCOL (port);
1039 break;
1040 }
1041 }
1042
1043 #define SCM_MBCHAR_BUF_SIZE (4)
1044
1045 /* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
1046 UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
1047 static scm_t_wchar
1048 utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
1049 {
1050 scm_t_wchar codepoint;
1051
1052 if (utf8_buf[0] <= 0x7f)
1053 {
1054 assert (size == 1);
1055 codepoint = utf8_buf[0];
1056 }
1057 else if ((utf8_buf[0] & 0xe0) == 0xc0)
1058 {
1059 assert (size == 2);
1060 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
1061 | (utf8_buf[1] & 0x3f);
1062 }
1063 else if ((utf8_buf[0] & 0xf0) == 0xe0)
1064 {
1065 assert (size == 3);
1066 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
1067 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
1068 | (utf8_buf[2] & 0x3f);
1069 }
1070 else
1071 {
1072 assert (size == 4);
1073 codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
1074 | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
1075 | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
1076 | (utf8_buf[3] & 0x3f);
1077 }
1078
1079 return codepoint;
1080 }
1081
1082 /* Read a UTF-8 sequence from PORT. On success, return 0 and set
1083 *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
1084 representation, and set *LEN to the length in bytes. Return
1085 `EILSEQ' on error. */
1086 static int
1087 get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
1088 scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1089 {
1090 #define ASSERT_NOT_EOF(b) \
1091 if (SCM_UNLIKELY ((b) == EOF)) \
1092 goto invalid_seq
1093 #define CONSUME_PEEKED_BYTE() \
1094 pt->read_pos++
1095
1096 int byte;
1097 scm_t_port *pt;
1098
1099 *len = 0;
1100 pt = SCM_PTAB_ENTRY (port);
1101
1102 byte = scm_get_byte_or_eof (port);
1103 if (byte == EOF)
1104 {
1105 *codepoint = EOF;
1106 return 0;
1107 }
1108
1109 buf[0] = (scm_t_uint8) byte;
1110 *len = 1;
1111
1112 if (buf[0] <= 0x7f)
1113 /* 1-byte form. */
1114 *codepoint = buf[0];
1115 else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
1116 {
1117 /* 2-byte form. */
1118 byte = scm_peek_byte_or_eof (port);
1119 ASSERT_NOT_EOF (byte);
1120
1121 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1122 goto invalid_seq;
1123
1124 CONSUME_PEEKED_BYTE ();
1125 buf[1] = (scm_t_uint8) byte;
1126 *len = 2;
1127
1128 *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
1129 | (buf[1] & 0x3f);
1130 }
1131 else if ((buf[0] & 0xf0) == 0xe0)
1132 {
1133 /* 3-byte form. */
1134 byte = scm_peek_byte_or_eof (port);
1135 ASSERT_NOT_EOF (byte);
1136
1137 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
1138 || (buf[0] == 0xe0 && byte < 0xa0)
1139 || (buf[0] == 0xed && byte > 0x9f)))
1140 goto invalid_seq;
1141
1142 CONSUME_PEEKED_BYTE ();
1143 buf[1] = (scm_t_uint8) byte;
1144 *len = 2;
1145
1146 byte = scm_peek_byte_or_eof (port);
1147 ASSERT_NOT_EOF (byte);
1148
1149 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1150 goto invalid_seq;
1151
1152 CONSUME_PEEKED_BYTE ();
1153 buf[2] = (scm_t_uint8) byte;
1154 *len = 3;
1155
1156 *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
1157 | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
1158 | (buf[2] & 0x3f);
1159 }
1160 else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
1161 {
1162 /* 4-byte form. */
1163 byte = scm_peek_byte_or_eof (port);
1164 ASSERT_NOT_EOF (byte);
1165
1166 if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
1167 || (buf[0] == 0xf0 && byte < 0x90)
1168 || (buf[0] == 0xf4 && byte > 0x8f)))
1169 goto invalid_seq;
1170
1171 CONSUME_PEEKED_BYTE ();
1172 buf[1] = (scm_t_uint8) byte;
1173 *len = 2;
1174
1175 byte = scm_peek_byte_or_eof (port);
1176 ASSERT_NOT_EOF (byte);
1177
1178 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1179 goto invalid_seq;
1180
1181 CONSUME_PEEKED_BYTE ();
1182 buf[2] = (scm_t_uint8) byte;
1183 *len = 3;
1184
1185 byte = scm_peek_byte_or_eof (port);
1186 ASSERT_NOT_EOF (byte);
1187
1188 if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
1189 goto invalid_seq;
1190
1191 CONSUME_PEEKED_BYTE ();
1192 buf[3] = (scm_t_uint8) byte;
1193 *len = 4;
1194
1195 *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
1196 | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
1197 | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
1198 | (buf[3] & 0x3f);
1199 }
1200 else
1201 goto invalid_seq;
1202
1203 return 0;
1204
1205 invalid_seq:
1206 /* Here we could choose the consume the faulty byte when it's not a
1207 valid starting byte, but it's not a requirement. What Section 3.9
1208 of Unicode 6.0.0 mandates, though, is to not consume a byte that
1209 would otherwise be a valid starting byte. */
1210
1211 return EILSEQ;
1212
1213 #undef CONSUME_PEEKED_BYTE
1214 #undef ASSERT_NOT_EOF
1215 }
1216
1217 /* Likewise, read a byte sequence from PORT, passing it through its
1218 input conversion descriptor. */
1219 static int
1220 get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
1221 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1222 {
1223 scm_t_port *pt;
1224 int err, byte_read;
1225 size_t bytes_consumed, output_size;
1226 char *output;
1227 scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
1228
1229 pt = SCM_PTAB_ENTRY (port);
1230
1231 for (output_size = 0, output = (char *) utf8_buf,
1232 bytes_consumed = 0, err = 0;
1233 err == 0 && output_size == 0
1234 && (bytes_consumed == 0 || byte_read != EOF);
1235 bytes_consumed++)
1236 {
1237 char *input;
1238 size_t input_left, output_left, done;
1239
1240 byte_read = scm_get_byte_or_eof (port);
1241 if (byte_read == EOF)
1242 {
1243 if (bytes_consumed == 0)
1244 {
1245 *codepoint = (scm_t_wchar) EOF;
1246 *len = 0;
1247 return 0;
1248 }
1249 else
1250 continue;
1251 }
1252
1253 buf[bytes_consumed] = byte_read;
1254
1255 input = buf;
1256 input_left = bytes_consumed + 1;
1257 output_left = sizeof (utf8_buf);
1258
1259 done = iconv (pt->input_cd, &input, &input_left,
1260 &output, &output_left);
1261 if (done == (size_t) -1)
1262 {
1263 err = errno;
1264 if (err == EINVAL)
1265 /* Missing input: keep trying. */
1266 err = 0;
1267 }
1268 else
1269 output_size = sizeof (utf8_buf) - output_left;
1270 }
1271
1272 if (SCM_UNLIKELY (output_size == 0))
1273 /* An unterminated sequence. */
1274 err = EILSEQ;
1275 else if (SCM_LIKELY (err == 0))
1276 {
1277 /* Convert the UTF8_BUF sequence to a Unicode code point. */
1278 *codepoint = utf8_to_codepoint (utf8_buf, output_size);
1279 *len = bytes_consumed;
1280 }
1281
1282 return err;
1283 }
1284
1285 /* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
1286 with the byte representation of the codepoint in PORT's encoding, and
1287 set *LEN to the length in bytes of that representation. Return 0 on
1288 success and an errno value on error. */
1289 static int
1290 get_codepoint (SCM port, scm_t_wchar *codepoint,
1291 char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
1292 {
1293 int err;
1294 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1295
1296 if (pt->input_cd == (iconv_t) -1)
1297 /* Initialize the conversion descriptors, if needed. */
1298 scm_i_set_port_encoding_x (port, pt->encoding);
1299
1300 /* FIXME: In 2.1, add a flag to determine whether a port is UTF-8. */
1301 if (pt->input_cd == (iconv_t) -1)
1302 err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
1303 else
1304 err = get_iconv_codepoint (port, codepoint, buf, len);
1305
1306 if (SCM_LIKELY (err == 0))
1307 update_port_lf (*codepoint, port);
1308 else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
1309 {
1310 *codepoint = '?';
1311 err = 0;
1312 update_port_lf (*codepoint, port);
1313 }
1314
1315 return err;
1316 }
1317
1318 /* Read a codepoint from PORT and return it. */
1319 scm_t_wchar
1320 scm_getc (SCM port)
1321 #define FUNC_NAME "scm_getc"
1322 {
1323 int err;
1324 size_t len;
1325 scm_t_wchar codepoint;
1326 char buf[SCM_MBCHAR_BUF_SIZE];
1327
1328 err = get_codepoint (port, &codepoint, buf, &len);
1329 if (SCM_UNLIKELY (err != 0))
1330 /* At this point PORT should point past the invalid encoding, as per
1331 R6RS-lib Section 8.2.4. */
1332 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
1333
1334 return codepoint;
1335 }
1336 #undef FUNC_NAME
1337
1338 /* this should only be called when the read buffer is empty. it
1339 tries to refill the read buffer. it returns the first char from
1340 the port, which is either EOF or *(pt->read_pos). */
1341 int
1342 scm_fill_input (SCM port)
1343 {
1344 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1345
1346 assert (pt->read_pos == pt->read_end);
1347
1348 if (pt->read_buf == pt->putback_buf)
1349 {
1350 /* finished reading put-back chars. */
1351 pt->read_buf = pt->saved_read_buf;
1352 pt->read_pos = pt->saved_read_pos;
1353 pt->read_end = pt->saved_read_end;
1354 pt->read_buf_size = pt->saved_read_buf_size;
1355 if (pt->read_pos < pt->read_end)
1356 return *(pt->read_pos);
1357 }
1358 return scm_ptobs[SCM_PTOBNUM (port)].fill_input (port);
1359 }
1360
1361
1362 /* scm_lfwrite
1363 *
1364 * This function differs from scm_c_write; it updates port line and
1365 * column. */
1366
1367 void
1368 scm_lfwrite (const char *ptr, size_t size, SCM port)
1369 {
1370 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1371 scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)];
1372
1373 if (pt->rw_active == SCM_PORT_READ)
1374 scm_end_input (port);
1375
1376 ptob->write (port, ptr, size);
1377
1378 for (; size; ptr++, size--)
1379 update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
1380
1381 if (pt->rw_random)
1382 pt->rw_active = SCM_PORT_WRITE;
1383 }
1384
1385 /* Write STR to PORT from START inclusive to END exclusive. */
1386 void
1387 scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
1388 {
1389 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1390
1391 if (pt->rw_active == SCM_PORT_READ)
1392 scm_end_input (port);
1393
1394 if (end == (size_t) -1)
1395 end = scm_i_string_length (str);
1396
1397 scm_display (scm_c_substring (str, start, end), port);
1398
1399 if (pt->rw_random)
1400 pt->rw_active = SCM_PORT_WRITE;
1401 }
1402
1403 /* scm_c_read
1404 *
1405 * Used by an application to read arbitrary number of bytes from an
1406 * SCM port. Same semantics as libc read, except that scm_c_read only
1407 * returns less than SIZE bytes if at end-of-file.
1408 *
1409 * Warning: Doesn't update port line and column counts! */
1410
1411 /* This structure, and the following swap_buffer function, are used
1412 for temporarily swapping a port's own read buffer, and the buffer
1413 that the caller of scm_c_read provides. */
1414 struct port_and_swap_buffer
1415 {
1416 scm_t_port *pt;
1417 unsigned char *buffer;
1418 size_t size;
1419 };
1420
1421 static void
1422 swap_buffer (void *data)
1423 {
1424 struct port_and_swap_buffer *psb = (struct port_and_swap_buffer *) data;
1425 unsigned char *old_buf = psb->pt->read_buf;
1426 size_t old_size = psb->pt->read_buf_size;
1427
1428 /* Make the port use (buffer, size) from the struct. */
1429 psb->pt->read_pos = psb->pt->read_buf = psb->pt->read_end = psb->buffer;
1430 psb->pt->read_buf_size = psb->size;
1431
1432 /* Save the port's old (buffer, size) in the struct. */
1433 psb->buffer = old_buf;
1434 psb->size = old_size;
1435 }
1436
1437 size_t
1438 scm_c_read (SCM port, void *buffer, size_t size)
1439 #define FUNC_NAME "scm_c_read"
1440 {
1441 scm_t_port *pt;
1442 size_t n_read = 0, n_available;
1443 struct port_and_swap_buffer psb;
1444
1445 SCM_VALIDATE_OPINPORT (1, port);
1446
1447 pt = SCM_PTAB_ENTRY (port);
1448 if (pt->rw_active == SCM_PORT_WRITE)
1449 scm_ptobs[SCM_PTOBNUM (port)].flush (port);
1450
1451 if (pt->rw_random)
1452 pt->rw_active = SCM_PORT_READ;
1453
1454 /* Take bytes first from the port's read buffer. */
1455 if (pt->read_pos < pt->read_end)
1456 {
1457 n_available = min (size, pt->read_end - pt->read_pos);
1458 memcpy (buffer, pt->read_pos, n_available);
1459 buffer = (char *) buffer + n_available;
1460 pt->read_pos += n_available;
1461 n_read += n_available;
1462 size -= n_available;
1463 }
1464
1465 /* Avoid the scm_dynwind_* costs if we now have enough data. */
1466 if (size == 0)
1467 return n_read;
1468
1469 /* Now we will call scm_fill_input repeatedly until we have read the
1470 requested number of bytes. (Note that a single scm_fill_input
1471 call does not guarantee to fill the whole of the port's read
1472 buffer.) */
1473 if (pt->read_buf_size <= 1 && pt->encoding == NULL)
1474 {
1475 /* The port that we are reading from is unbuffered - i.e. does
1476 not have its own persistent buffer - but we have a buffer,
1477 provided by our caller, that is the right size for the data
1478 that is wanted. For the following scm_fill_input calls,
1479 therefore, we use the buffer in hand as the port's read
1480 buffer.
1481
1482 We need to make sure that the port's normal (1 byte) buffer
1483 is reinstated in case one of the scm_fill_input () calls
1484 throws an exception; we use the scm_dynwind_* API to achieve
1485 that.
1486
1487 A consequence of this optimization is that the fill_input
1488 functions can't unget characters. That'll push data to the
1489 pushback buffer instead of this psb buffer. */
1490 #if SCM_DEBUG == 1
1491 unsigned char *pback = pt->putback_buf;
1492 #endif
1493 psb.pt = pt;
1494 psb.buffer = buffer;
1495 psb.size = size;
1496 scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
1497 scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1498 scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
1499
1500 /* Call scm_fill_input until we have all the bytes that we need,
1501 or we hit EOF. */
1502 while (pt->read_buf_size && (scm_fill_input (port) != EOF))
1503 {
1504 pt->read_buf_size -= (pt->read_end - pt->read_pos);
1505 pt->read_pos = pt->read_buf = pt->read_end;
1506 }
1507 #if SCM_DEBUG == 1
1508 if (pback != pt->putback_buf
1509 || pt->read_buf - (unsigned char *) buffer < 0)
1510 scm_misc_error (FUNC_NAME,
1511 "scm_c_read must not call a fill function that pushes "
1512 "back characters onto an unbuffered port", SCM_EOL);
1513 #endif
1514 n_read += pt->read_buf - (unsigned char *) buffer;
1515
1516 /* Reinstate the port's normal buffer. */
1517 scm_dynwind_end ();
1518 }
1519 else
1520 {
1521 /* The port has its own buffer. It is important that we use it,
1522 even if it happens to be smaller than our caller's buffer, so
1523 that a custom port implementation's entry points (in
1524 particular, fill_input) can rely on the buffer always being
1525 the same as they first set up. */
1526 while (size && (scm_fill_input (port) != EOF))
1527 {
1528 n_available = min (size, pt->read_end - pt->read_pos);
1529 memcpy (buffer, pt->read_pos, n_available);
1530 buffer = (char *) buffer + n_available;
1531 pt->read_pos += n_available;
1532 n_read += n_available;
1533 size -= n_available;
1534 }
1535 }
1536
1537 return n_read;
1538 }
1539 #undef FUNC_NAME
1540
1541 /* scm_c_write
1542 *
1543 * Used by an application to write arbitrary number of bytes to an SCM
1544 * port. Similar semantics as libc write. However, unlike libc
1545 * write, scm_c_write writes the requested number of bytes and has no
1546 * return value.
1547 *
1548 * Warning: Doesn't update port line and column counts!
1549 */
1550
1551 void
1552 scm_c_write (SCM port, const void *ptr, size_t size)
1553 #define FUNC_NAME "scm_c_write"
1554 {
1555 scm_t_port *pt;
1556 scm_t_ptob_descriptor *ptob;
1557
1558 SCM_VALIDATE_OPOUTPORT (1, port);
1559
1560 pt = SCM_PTAB_ENTRY (port);
1561 ptob = &scm_ptobs[SCM_PTOBNUM (port)];
1562
1563 if (pt->rw_active == SCM_PORT_READ)
1564 scm_end_input (port);
1565
1566 ptob->write (port, ptr, size);
1567
1568 if (pt->rw_random)
1569 pt->rw_active = SCM_PORT_WRITE;
1570 }
1571 #undef FUNC_NAME
1572
1573 void
1574 scm_flush (SCM port)
1575 {
1576 long i = SCM_PTOBNUM (port);
1577 assert (i >= 0);
1578 (scm_ptobs[i].flush) (port);
1579 }
1580
1581 void
1582 scm_end_input (SCM port)
1583 {
1584 long offset;
1585 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1586
1587 if (pt->read_buf == pt->putback_buf)
1588 {
1589 offset = pt->read_end - pt->read_pos;
1590 pt->read_buf = pt->saved_read_buf;
1591 pt->read_pos = pt->saved_read_pos;
1592 pt->read_end = pt->saved_read_end;
1593 pt->read_buf_size = pt->saved_read_buf_size;
1594 }
1595 else
1596 offset = 0;
1597
1598 scm_ptobs[SCM_PTOBNUM (port)].end_input (port, offset);
1599 }
1600
1601 \f
1602
1603
1604 void
1605 scm_unget_byte (int c, SCM port)
1606 #define FUNC_NAME "scm_unget_byte"
1607 {
1608 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1609
1610 if (pt->read_buf == pt->putback_buf)
1611 /* already using the put-back buffer. */
1612 {
1613 /* enlarge putback_buf if necessary. */
1614 if (pt->read_end == pt->read_buf + pt->read_buf_size
1615 && pt->read_buf == pt->read_pos)
1616 {
1617 size_t new_size = pt->read_buf_size * 2;
1618 unsigned char *tmp = (unsigned char *)
1619 scm_gc_realloc (pt->putback_buf, pt->read_buf_size, new_size,
1620 "putback buffer");
1621
1622 pt->read_pos = pt->read_buf = pt->putback_buf = tmp;
1623 pt->read_end = pt->read_buf + pt->read_buf_size;
1624 pt->read_buf_size = pt->putback_buf_size = new_size;
1625 }
1626
1627 /* shift any existing bytes to buffer + 1. */
1628 if (pt->read_pos == pt->read_end)
1629 pt->read_end = pt->read_buf + 1;
1630 else if (pt->read_pos != pt->read_buf + 1)
1631 {
1632 int count = pt->read_end - pt->read_pos;
1633
1634 memmove (pt->read_buf + 1, pt->read_pos, count);
1635 pt->read_end = pt->read_buf + 1 + count;
1636 }
1637
1638 pt->read_pos = pt->read_buf;
1639 }
1640 else
1641 /* switch to the put-back buffer. */
1642 {
1643 if (pt->putback_buf == NULL)
1644 {
1645 pt->putback_buf
1646 = (unsigned char *) scm_gc_malloc_pointerless
1647 (SCM_INITIAL_PUTBACK_BUF_SIZE, "putback buffer");
1648 pt->putback_buf_size = SCM_INITIAL_PUTBACK_BUF_SIZE;
1649 }
1650
1651 pt->saved_read_buf = pt->read_buf;
1652 pt->saved_read_pos = pt->read_pos;
1653 pt->saved_read_end = pt->read_end;
1654 pt->saved_read_buf_size = pt->read_buf_size;
1655
1656 pt->read_pos = pt->read_buf = pt->putback_buf;
1657 pt->read_end = pt->read_buf + 1;
1658 pt->read_buf_size = pt->putback_buf_size;
1659 }
1660
1661 *pt->read_buf = c;
1662
1663 if (pt->rw_random)
1664 pt->rw_active = SCM_PORT_READ;
1665 }
1666 #undef FUNC_NAME
1667
1668 void
1669 scm_ungetc (scm_t_wchar c, SCM port)
1670 #define FUNC_NAME "scm_ungetc"
1671 {
1672 scm_t_port *pt = SCM_PTAB_ENTRY (port);
1673 char *result;
1674 char result_buf[10];
1675 const char *encoding;
1676 size_t len;
1677 int i;
1678
1679 if (pt->encoding != NULL)
1680 encoding = pt->encoding;
1681 else
1682 encoding = "ISO-8859-1";
1683
1684 len = sizeof (result_buf);
1685 result = u32_conv_to_encoding (encoding,
1686 (enum iconv_ilseq_handler) pt->ilseq_handler,
1687 (uint32_t *) &c, 1, NULL,
1688 result_buf, &len);
1689
1690 if (SCM_UNLIKELY (result == NULL || len == 0))
1691 scm_encoding_error (FUNC_NAME, errno,
1692 "conversion to port encoding failed",
1693 SCM_BOOL_F, SCM_MAKE_CHAR (c));
1694
1695 for (i = len - 1; i >= 0; i--)
1696 scm_unget_byte (result[i], port);
1697
1698 if (SCM_UNLIKELY (result != result_buf))
1699 free (result);
1700
1701 if (c == '\n')
1702 {
1703 /* What should col be in this case?
1704 * We'll leave it at -1.
1705 */
1706 SCM_LINUM (port) -= 1;
1707 }
1708 else
1709 SCM_COL(port) -= 1;
1710 }
1711 #undef FUNC_NAME
1712
1713
1714 void
1715 scm_ungets (const char *s, int n, SCM port)
1716 {
1717 /* This is simple minded and inefficient, but unreading strings is
1718 * probably not a common operation, and remember that line and
1719 * column numbers have to be handled...
1720 *
1721 * Please feel free to write an optimized version!
1722 */
1723 while (n--)
1724 scm_ungetc (s[n], port);
1725 }
1726
1727
1728 SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
1729 (SCM port),
1730 "Return the next character available from @var{port},\n"
1731 "@emph{without} updating @var{port} to point to the following\n"
1732 "character. If no more characters are available, the\n"
1733 "end-of-file object is returned.\n"
1734 "\n"
1735 "The value returned by\n"
1736 "a call to @code{peek-char} is the same as the value that would\n"
1737 "have been returned by a call to @code{read-char} on the same\n"
1738 "port. The only difference is that the very next call to\n"
1739 "@code{read-char} or @code{peek-char} on that @var{port} will\n"
1740 "return the value returned by the preceding call to\n"
1741 "@code{peek-char}. In particular, a call to @code{peek-char} on\n"
1742 "an interactive port will hang waiting for input whenever a call\n"
1743 "to @code{read-char} would have hung.\n"
1744 "\n"
1745 "As for @code{read-char}, a @code{decoding-error} may be raised\n"
1746 "if such a situation occurs. However, unlike with @code{read-char},\n"
1747 "@var{port} still points at the beginning of the erroneous byte\n"
1748 "sequence when the error is raised.\n")
1749 #define FUNC_NAME s_scm_peek_char
1750 {
1751 int err;
1752 SCM result;
1753 scm_t_wchar c;
1754 char bytes[SCM_MBCHAR_BUF_SIZE];
1755 long column, line, i;
1756 size_t len;
1757
1758 if (SCM_UNBNDP (port))
1759 port = scm_current_input_port ();
1760 SCM_VALIDATE_OPINPORT (1, port);
1761
1762 column = SCM_COL (port);
1763 line = SCM_LINUM (port);
1764
1765 err = get_codepoint (port, &c, bytes, &len);
1766
1767 for (i = len - 1; i >= 0; i--)
1768 scm_unget_byte (bytes[i], port);
1769
1770 SCM_COL (port) = column;
1771 SCM_LINUM (port) = line;
1772
1773 if (SCM_UNLIKELY (err != 0))
1774 {
1775 scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
1776
1777 /* Shouldn't happen since `catch' always aborts to prompt. */
1778 result = SCM_BOOL_F;
1779 }
1780 else if (c == EOF)
1781 result = SCM_EOF_VAL;
1782 else
1783 result = SCM_MAKE_CHAR (c);
1784
1785 return result;
1786 }
1787 #undef FUNC_NAME
1788
1789 SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
1790 (SCM cobj, SCM port),
1791 "Place @var{char} in @var{port} so that it will be read by the\n"
1792 "next read operation. If called multiple times, the unread characters\n"
1793 "will be read again in last-in first-out order. If @var{port} is\n"
1794 "not supplied, the current input port is used.")
1795 #define FUNC_NAME s_scm_unread_char
1796 {
1797 int c;
1798
1799 SCM_VALIDATE_CHAR (1, cobj);
1800 if (SCM_UNBNDP (port))
1801 port = scm_current_input_port ();
1802 SCM_VALIDATE_OPINPORT (2, port);
1803
1804 c = SCM_CHAR (cobj);
1805
1806 scm_ungetc (c, port);
1807 return cobj;
1808 }
1809 #undef FUNC_NAME
1810
1811 SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
1812 (SCM str, SCM port),
1813 "Place the string @var{str} in @var{port} so that its characters will be\n"
1814 "read in subsequent read operations. If called multiple times, the\n"
1815 "unread characters will be read again in last-in first-out order. If\n"
1816 "@var{port} is not supplied, the current-input-port is used.")
1817 #define FUNC_NAME s_scm_unread_string
1818 {
1819 int n;
1820 SCM_VALIDATE_STRING (1, str);
1821 if (SCM_UNBNDP (port))
1822 port = scm_current_input_port ();
1823 SCM_VALIDATE_OPINPORT (2, port);
1824
1825 n = scm_i_string_length (str);
1826
1827 while (n--)
1828 scm_ungetc (scm_i_string_ref (str, n), port);
1829
1830 return str;
1831 }
1832 #undef FUNC_NAME
1833
1834 SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
1835 (SCM fd_port, SCM offset, SCM whence),
1836 "Sets the current position of @var{fd/port} to the integer\n"
1837 "@var{offset}, which is interpreted according to the value of\n"
1838 "@var{whence}.\n"
1839 "\n"
1840 "One of the following variables should be supplied for\n"
1841 "@var{whence}:\n"
1842 "@defvar SEEK_SET\n"
1843 "Seek from the beginning of the file.\n"
1844 "@end defvar\n"
1845 "@defvar SEEK_CUR\n"
1846 "Seek from the current position.\n"
1847 "@end defvar\n"
1848 "@defvar SEEK_END\n"
1849 "Seek from the end of the file.\n"
1850 "@end defvar\n"
1851 "If @var{fd/port} is a file descriptor, the underlying system\n"
1852 "call is @code{lseek}. @var{port} may be a string port.\n"
1853 "\n"
1854 "The value returned is the new position in the file. This means\n"
1855 "that the current position of a port can be obtained using:\n"
1856 "@lisp\n"
1857 "(seek port 0 SEEK_CUR)\n"
1858 "@end lisp")
1859 #define FUNC_NAME s_scm_seek
1860 {
1861 int how;
1862
1863 fd_port = SCM_COERCE_OUTPORT (fd_port);
1864
1865 how = scm_to_int (whence);
1866 if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
1867 SCM_OUT_OF_RANGE (3, whence);
1868
1869 if (SCM_OPPORTP (fd_port))
1870 {
1871 scm_t_ptob_descriptor *ptob = scm_ptobs + SCM_PTOBNUM (fd_port);
1872 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
1873 off_t_or_off64_t rv;
1874
1875 if (!ptob->seek)
1876 SCM_MISC_ERROR ("port is not seekable",
1877 scm_cons (fd_port, SCM_EOL));
1878 else
1879 rv = ptob->seek (fd_port, off, how);
1880 return scm_from_off_t_or_off64_t (rv);
1881 }
1882 else /* file descriptor?. */
1883 {
1884 off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
1885 off_t_or_off64_t rv;
1886 rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
1887 if (rv == -1)
1888 SCM_SYSERROR;
1889 return scm_from_off_t_or_off64_t (rv);
1890 }
1891 }
1892 #undef FUNC_NAME
1893
1894 #ifndef O_BINARY
1895 #define O_BINARY 0
1896 #endif
1897
1898 /* Mingw has ftruncate(), perhaps implemented above using chsize, but
1899 doesn't have the filename version truncate(), hence this code. */
1900 #if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
1901 static int
1902 truncate (const char *file, off_t length)
1903 {
1904 int ret, fdes;
1905
1906 fdes = open (file, O_BINARY | O_WRONLY);
1907 if (fdes == -1)
1908 return -1;
1909
1910 ret = ftruncate (fdes, length);
1911 if (ret == -1)
1912 {
1913 int save_errno = errno;
1914 close (fdes);
1915 errno = save_errno;
1916 return -1;
1917 }
1918
1919 return close (fdes);
1920 }
1921 #endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
1922
1923 SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
1924 (SCM object, SCM length),
1925 "Truncate @var{file} to @var{length} bytes. @var{file} can be a\n"
1926 "filename string, a port object, or an integer file descriptor.\n"
1927 "The return value is unspecified.\n"
1928 "\n"
1929 "For a port or file descriptor @var{length} can be omitted, in\n"
1930 "which case the file is truncated at the current position (per\n"
1931 "@code{ftell} above).\n"
1932 "\n"
1933 "On most systems a file can be extended by giving a length\n"
1934 "greater than the current size, but this is not mandatory in the\n"
1935 "POSIX standard.")
1936 #define FUNC_NAME s_scm_truncate_file
1937 {
1938 int rv;
1939
1940 /* "object" can be a port, fdes or filename.
1941
1942 Negative "length" makes no sense, but it's left to truncate() or
1943 ftruncate() to give back an error for that (normally EINVAL).
1944 */
1945
1946 if (SCM_UNBNDP (length))
1947 {
1948 /* must supply length if object is a filename. */
1949 if (scm_is_string (object))
1950 SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
1951
1952 length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
1953 }
1954
1955 object = SCM_COERCE_OUTPORT (object);
1956 if (scm_is_integer (object))
1957 {
1958 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
1959 SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
1960 c_length));
1961 }
1962 else if (SCM_OPOUTPORTP (object))
1963 {
1964 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
1965 scm_t_port *pt = SCM_PTAB_ENTRY (object);
1966 scm_t_ptob_descriptor *ptob = scm_ptobs + SCM_PTOBNUM (object);
1967
1968 if (!ptob->truncate)
1969 SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
1970 if (pt->rw_active == SCM_PORT_READ)
1971 scm_end_input (object);
1972 else if (pt->rw_active == SCM_PORT_WRITE)
1973 ptob->flush (object);
1974
1975 ptob->truncate (object, c_length);
1976 rv = 0;
1977 }
1978 else
1979 {
1980 off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
1981 char *str = scm_to_locale_string (object);
1982 int eno;
1983 SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
1984 eno = errno;
1985 free (str);
1986 errno = eno;
1987 }
1988 if (rv == -1)
1989 SCM_SYSERROR;
1990 return SCM_UNSPECIFIED;
1991 }
1992 #undef FUNC_NAME
1993
1994 SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0,
1995 (SCM port),
1996 "Return the current line number for @var{port}.\n"
1997 "\n"
1998 "The first line of a file is 0. But you might want to add 1\n"
1999 "when printing line numbers, since starting from 1 is\n"
2000 "traditional in error messages, and likely to be more natural to\n"
2001 "non-programmers.")
2002 #define FUNC_NAME s_scm_port_line
2003 {
2004 port = SCM_COERCE_OUTPORT (port);
2005 SCM_VALIDATE_OPENPORT (1, port);
2006 return scm_from_long (SCM_LINUM (port));
2007 }
2008 #undef FUNC_NAME
2009
2010 SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0,
2011 (SCM port, SCM line),
2012 "Set the current line number for @var{port} to @var{line}. The\n"
2013 "first line of a file is 0.")
2014 #define FUNC_NAME s_scm_set_port_line_x
2015 {
2016 port = SCM_COERCE_OUTPORT (port);
2017 SCM_VALIDATE_OPENPORT (1, port);
2018 SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line);
2019 return SCM_UNSPECIFIED;
2020 }
2021 #undef FUNC_NAME
2022
2023 SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0,
2024 (SCM port),
2025 "Return the current column number of @var{port}.\n"
2026 "If the number is\n"
2027 "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n"
2028 "- i.e. the first character of the first line is line 0, column 0.\n"
2029 "(However, when you display a file position, for example in an error\n"
2030 "message, we recommend you add 1 to get 1-origin integers. This is\n"
2031 "because lines and column numbers traditionally start with 1, and that is\n"
2032 "what non-programmers will find most natural.)")
2033 #define FUNC_NAME s_scm_port_column
2034 {
2035 port = SCM_COERCE_OUTPORT (port);
2036 SCM_VALIDATE_OPENPORT (1, port);
2037 return scm_from_int (SCM_COL (port));
2038 }
2039 #undef FUNC_NAME
2040
2041 SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0,
2042 (SCM port, SCM column),
2043 "Set the current column of @var{port}. Before reading the first\n"
2044 "character on a line the column should be 0.")
2045 #define FUNC_NAME s_scm_set_port_column_x
2046 {
2047 port = SCM_COERCE_OUTPORT (port);
2048 SCM_VALIDATE_OPENPORT (1, port);
2049 SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column);
2050 return SCM_UNSPECIFIED;
2051 }
2052 #undef FUNC_NAME
2053
2054 SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
2055 (SCM port),
2056 "Return the filename associated with @var{port}, or @code{#f}\n"
2057 "if no filename is associated with the port.")
2058 #define FUNC_NAME s_scm_port_filename
2059 {
2060 port = SCM_COERCE_OUTPORT (port);
2061 SCM_VALIDATE_OPENPORT (1, port);
2062 return SCM_FILENAME (port);
2063 }
2064 #undef FUNC_NAME
2065
2066 SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
2067 (SCM port, SCM filename),
2068 "Change the filename associated with @var{port}, using the current input\n"
2069 "port if none is specified. Note that this does not change the port's\n"
2070 "source of data, but only the value that is returned by\n"
2071 "@code{port-filename} and reported in diagnostic output.")
2072 #define FUNC_NAME s_scm_set_port_filename_x
2073 {
2074 port = SCM_COERCE_OUTPORT (port);
2075 SCM_VALIDATE_OPENPORT (1, port);
2076 /* We allow the user to set the filename to whatever he likes. */
2077 SCM_SET_FILENAME (port, filename);
2078 return SCM_UNSPECIFIED;
2079 }
2080 #undef FUNC_NAME
2081
2082 /* A fluid specifying the default encoding for newly created ports. If it is
2083 a string, that is the encoding. If it is #f, it is in the "native"
2084 (Latin-1) encoding. */
2085 SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
2086
2087 static int scm_port_encoding_init = 0;
2088
2089 /* Use ENCODING as the default encoding for future ports. */
2090 void
2091 scm_i_set_default_port_encoding (const char *encoding)
2092 {
2093 if (!scm_port_encoding_init
2094 || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
2095 scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
2096 SCM_EOL);
2097
2098 if (encoding == NULL
2099 || !strcmp (encoding, "ASCII")
2100 || !strcmp (encoding, "ANSI_X3.4-1968")
2101 || !strcmp (encoding, "ISO-8859-1"))
2102 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
2103 else
2104 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
2105 scm_from_locale_string (encoding));
2106 }
2107
2108 /* Return the name of the default encoding for newly created ports; a
2109 return value of NULL means "ISO-8859-1". */
2110 const char *
2111 scm_i_default_port_encoding (void)
2112 {
2113 if (!scm_port_encoding_init)
2114 return NULL;
2115 else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
2116 return NULL;
2117 else
2118 {
2119 SCM encoding;
2120
2121 encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
2122 if (!scm_is_string (encoding))
2123 return NULL;
2124 else
2125 return scm_i_string_chars (encoding);
2126 }
2127 }
2128
2129 void
2130 scm_i_set_port_encoding_x (SCM port, const char *encoding)
2131 {
2132 scm_t_port *pt;
2133 iconv_t new_input_cd, new_output_cd;
2134
2135 new_input_cd = (iconv_t) -1;
2136 new_output_cd = (iconv_t) -1;
2137
2138 /* Set the character encoding for this port. */
2139 pt = SCM_PTAB_ENTRY (port);
2140
2141 if (encoding == NULL)
2142 encoding = "ISO-8859-1";
2143
2144 if (pt->encoding != encoding)
2145 pt->encoding = scm_gc_strdup (encoding, "port");
2146
2147 /* If ENCODING is UTF-8, then no conversion descriptor is opened
2148 because we do I/O ourselves. This saves 100+ KiB for each
2149 descriptor. */
2150 if (strcmp (encoding, "UTF-8"))
2151 {
2152 if (SCM_CELL_WORD_0 (port) & SCM_RDNG)
2153 {
2154 /* Open an input iconv conversion descriptor, from ENCODING
2155 to UTF-8. We choose UTF-8, not UTF-32, because iconv
2156 implementations can typically convert from anything to
2157 UTF-8, but not to UTF-32 (see
2158 <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
2159 new_input_cd = iconv_open ("UTF-8", encoding);
2160 if (new_input_cd == (iconv_t) -1)
2161 goto invalid_encoding;
2162 }
2163
2164 if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
2165 {
2166 new_output_cd = iconv_open (encoding, "UTF-8");
2167 if (new_output_cd == (iconv_t) -1)
2168 {
2169 if (new_input_cd != (iconv_t) -1)
2170 iconv_close (new_input_cd);
2171 goto invalid_encoding;
2172 }
2173 }
2174 }
2175
2176 if (pt->input_cd != (iconv_t) -1)
2177 iconv_close (pt->input_cd);
2178 if (pt->output_cd != (iconv_t) -1)
2179 iconv_close (pt->output_cd);
2180
2181 pt->input_cd = new_input_cd;
2182 pt->output_cd = new_output_cd;
2183
2184 return;
2185
2186 invalid_encoding:
2187 {
2188 SCM err;
2189 err = scm_from_locale_string (encoding);
2190 scm_misc_error ("scm_i_set_port_encoding_x",
2191 "invalid or unknown character encoding ~s",
2192 scm_list_1 (err));
2193 }
2194 }
2195
2196 SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
2197 (SCM port),
2198 "Returns, as a string, the character encoding that @var{port}\n"
2199 "uses to interpret its input and output.\n")
2200 #define FUNC_NAME s_scm_port_encoding
2201 {
2202 scm_t_port *pt;
2203 const char *enc;
2204
2205 SCM_VALIDATE_PORT (1, port);
2206
2207 pt = SCM_PTAB_ENTRY (port);
2208 enc = pt->encoding;
2209 if (enc)
2210 return scm_from_locale_string (pt->encoding);
2211 else
2212 return SCM_BOOL_F;
2213 }
2214 #undef FUNC_NAME
2215
2216 SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
2217 (SCM port, SCM enc),
2218 "Sets the character encoding that will be used to interpret all\n"
2219 "port I/O. New ports are created with the encoding\n"
2220 "appropriate for the current locale if @code{setlocale} has \n"
2221 "been called or ISO-8859-1 otherwise\n"
2222 "and this procedure can be used to modify that encoding.\n")
2223 #define FUNC_NAME s_scm_set_port_encoding_x
2224 {
2225 char *enc_str;
2226
2227 SCM_VALIDATE_PORT (1, port);
2228 SCM_VALIDATE_STRING (2, enc);
2229
2230 enc_str = scm_to_locale_string (enc);
2231 scm_i_set_port_encoding_x (port, enc_str);
2232 free (enc_str);
2233
2234 return SCM_UNSPECIFIED;
2235 }
2236 #undef FUNC_NAME
2237
2238
2239 /* This determines how conversions handle unconvertible characters. */
2240 SCM_GLOBAL_VARIABLE (scm_conversion_strategy, "%port-conversion-strategy");
2241 static int scm_conversion_strategy_init = 0;
2242
2243 scm_t_string_failed_conversion_handler
2244 scm_i_get_conversion_strategy (SCM port)
2245 {
2246 SCM encoding;
2247
2248 if (scm_is_false (port))
2249 {
2250 if (!scm_conversion_strategy_init
2251 || !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
2252 return SCM_FAILED_CONVERSION_QUESTION_MARK;
2253 else
2254 {
2255 encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_conversion_strategy));
2256 if (scm_is_false (encoding))
2257 return SCM_FAILED_CONVERSION_QUESTION_MARK;
2258 else
2259 return (scm_t_string_failed_conversion_handler) scm_to_int (encoding);
2260 }
2261 }
2262 else
2263 {
2264 scm_t_port *pt;
2265 pt = SCM_PTAB_ENTRY (port);
2266 return pt->ilseq_handler;
2267 }
2268
2269 }
2270
2271 void
2272 scm_i_set_conversion_strategy_x (SCM port,
2273 scm_t_string_failed_conversion_handler handler)
2274 {
2275 SCM strategy;
2276 scm_t_port *pt;
2277
2278 strategy = scm_from_int ((int) handler);
2279
2280 if (scm_is_false (port))
2281 {
2282 /* Set the default encoding for future ports. */
2283 if (!scm_conversion_strategy_init
2284 || !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
2285 scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
2286 SCM_EOL);
2287 scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy), strategy);
2288 }
2289 else
2290 {
2291 /* Set the character encoding for this port. */
2292 pt = SCM_PTAB_ENTRY (port);
2293 pt->ilseq_handler = handler;
2294 }
2295 }
2296
2297 SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
2298 1, 0, 0, (SCM port),
2299 "Returns the behavior of the port when handling a character that\n"
2300 "is not representable in the port's current encoding.\n"
2301 "It returns the symbol @code{error} if unrepresentable characters\n"
2302 "should cause exceptions, @code{substitute} if the port should\n"
2303 "try to replace unrepresentable characters with question marks or\n"
2304 "approximate characters, or @code{escape} if unrepresentable\n"
2305 "characters should be converted to string escapes.\n"
2306 "\n"
2307 "If @var{port} is @code{#f}, then the current default behavior\n"
2308 "will be returned. New ports will have this default behavior\n"
2309 "when they are created.\n")
2310 #define FUNC_NAME s_scm_port_conversion_strategy
2311 {
2312 scm_t_string_failed_conversion_handler h;
2313
2314 SCM_VALIDATE_OPPORT (1, port);
2315
2316 if (!scm_is_false (port))
2317 {
2318 SCM_VALIDATE_OPPORT (1, port);
2319 }
2320
2321 h = scm_i_get_conversion_strategy (port);
2322 if (h == SCM_FAILED_CONVERSION_ERROR)
2323 return scm_from_latin1_symbol ("error");
2324 else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
2325 return scm_from_latin1_symbol ("substitute");
2326 else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
2327 return scm_from_latin1_symbol ("escape");
2328 else
2329 abort ();
2330
2331 /* Never gets here. */
2332 return SCM_UNDEFINED;
2333 }
2334 #undef FUNC_NAME
2335
2336 SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
2337 2, 0, 0,
2338 (SCM port, SCM sym),
2339 "Sets the behavior of the interpreter when outputting a character\n"
2340 "that is not representable in the port's current encoding.\n"
2341 "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
2342 "@code{'escape}. If it is @code{'error}, an error will be thrown\n"
2343 "when an unconvertible character is encountered. If it is\n"
2344 "@code{'substitute}, then unconvertible characters will \n"
2345 "be replaced with approximate characters, or with question marks\n"
2346 "if no approximately correct character is available.\n"
2347 "If it is @code{'escape},\n"
2348 "it will appear as a hex escape when output.\n"
2349 "\n"
2350 "If @var{port} is an open port, the conversion error behavior\n"
2351 "is set for that port. If it is @code{#f}, it is set as the\n"
2352 "default behavior for any future ports that get created in\n"
2353 "this thread.\n")
2354 #define FUNC_NAME s_scm_set_port_conversion_strategy_x
2355 {
2356 SCM err;
2357 SCM qm;
2358 SCM esc;
2359
2360 if (!scm_is_false (port))
2361 {
2362 SCM_VALIDATE_OPPORT (1, port);
2363 }
2364
2365 err = scm_from_latin1_symbol ("error");
2366 if (scm_is_true (scm_eqv_p (sym, err)))
2367 {
2368 scm_i_set_conversion_strategy_x (port, SCM_FAILED_CONVERSION_ERROR);
2369 return SCM_UNSPECIFIED;
2370 }
2371
2372 qm = scm_from_latin1_symbol ("substitute");
2373 if (scm_is_true (scm_eqv_p (sym, qm)))
2374 {
2375 scm_i_set_conversion_strategy_x (port,
2376 SCM_FAILED_CONVERSION_QUESTION_MARK);
2377 return SCM_UNSPECIFIED;
2378 }
2379
2380 esc = scm_from_latin1_symbol ("escape");
2381 if (scm_is_true (scm_eqv_p (sym, esc)))
2382 {
2383 scm_i_set_conversion_strategy_x (port,
2384 SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
2385 return SCM_UNSPECIFIED;
2386 }
2387
2388 SCM_MISC_ERROR ("unknown conversion behavior ~s", scm_list_1 (sym));
2389
2390 return SCM_UNSPECIFIED;
2391 }
2392 #undef FUNC_NAME
2393
2394
2395
2396 void
2397 scm_print_port_mode (SCM exp, SCM port)
2398 {
2399 scm_puts (SCM_CLOSEDP (exp)
2400 ? "closed: "
2401 : (SCM_RDNG & SCM_CELL_WORD_0 (exp)
2402 ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
2403 ? "input-output: "
2404 : "input: ")
2405 : (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
2406 ? "output: "
2407 : "bogus: ")),
2408 port);
2409 }
2410
2411 int
2412 scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED)
2413 {
2414 char *type = SCM_PTOBNAME (SCM_PTOBNUM (exp));
2415 if (!type)
2416 type = "port";
2417 scm_puts ("#<", port);
2418 scm_print_port_mode (exp, port);
2419 scm_puts (type, port);
2420 scm_putc (' ', port);
2421 scm_uintprint (SCM_CELL_WORD_1 (exp), 16, port);
2422 scm_putc ('>', port);
2423 return 1;
2424 }
2425
2426 \f
2427
2428 /* Void ports. */
2429
2430 scm_t_bits scm_tc16_void_port = 0;
2431
2432 static int fill_input_void_port (SCM port SCM_UNUSED)
2433 {
2434 return EOF;
2435 }
2436
2437 static void
2438 write_void_port (SCM port SCM_UNUSED,
2439 const void *data SCM_UNUSED,
2440 size_t size SCM_UNUSED)
2441 {
2442 }
2443
2444 static SCM
2445 scm_i_void_port (long mode_bits)
2446 {
2447 SCM ret;
2448
2449 ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0);
2450
2451 scm_port_non_buffer (SCM_PTAB_ENTRY (ret));
2452
2453 return ret;
2454 }
2455
2456 SCM
2457 scm_void_port (char *mode_str)
2458 {
2459 return scm_i_void_port (scm_mode_bits (mode_str));
2460 }
2461
2462 SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0,
2463 (SCM mode),
2464 "Create and return a new void port. A void port acts like\n"
2465 "@file{/dev/null}. The @var{mode} argument\n"
2466 "specifies the input/output modes for this port: see the\n"
2467 "documentation for @code{open-file} in @ref{File Ports}.")
2468 #define FUNC_NAME s_scm_sys_make_void_port
2469 {
2470 return scm_i_void_port (scm_i_mode_bits (mode));
2471 }
2472 #undef FUNC_NAME
2473
2474 \f
2475 /* Initialization. */
2476
2477 void
2478 scm_init_ports ()
2479 {
2480 /* lseek() symbols. */
2481 scm_c_define ("SEEK_SET", scm_from_int (SEEK_SET));
2482 scm_c_define ("SEEK_CUR", scm_from_int (SEEK_CUR));
2483 scm_c_define ("SEEK_END", scm_from_int (SEEK_END));
2484
2485 scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
2486 write_void_port);
2487
2488 cur_inport_fluid = scm_make_fluid ();
2489 cur_outport_fluid = scm_make_fluid ();
2490 cur_errport_fluid = scm_make_fluid ();
2491 cur_loadport_fluid = scm_make_fluid ();
2492
2493 scm_i_port_weak_set = scm_c_make_weak_set (31);
2494
2495 #include "libguile/ports.x"
2496
2497 /* Use Latin-1 as the default port encoding. */
2498 SCM_VARIABLE_SET (default_port_encoding_var, scm_make_fluid ());
2499 scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
2500 scm_port_encoding_init = 1;
2501
2502 SCM_VARIABLE_SET (scm_conversion_strategy, scm_make_fluid ());
2503 scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy),
2504 scm_from_int ((int) SCM_FAILED_CONVERSION_QUESTION_MARK));
2505 scm_conversion_strategy_init = 1;
2506
2507 }
2508
2509 /*
2510 Local Variables:
2511 c-file-style: "gnu"
2512 End:
2513 */