X-Git-Url: https://git.hcoop.net/bpt/guile.git/blobdiff_plain/e610dc3851da716e6ee4568f94f5f7cace84d2d9..01a301d1b606b84d986b735049e7155d2f4cd6aa:/libguile/rdelim.c diff --git a/libguile/rdelim.c b/libguile/rdelim.c index 2ce1803c8..9d1496795 100644 --- a/libguile/rdelim.c +++ b/libguile/rdelim.c @@ -1,21 +1,23 @@ -/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2006 Free Software Foundation, Inc. - * +/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2006, + * 2011 Free Software Foundation, Inc. + * * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 3 of + * the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA */ -#if HAVE_CONFIG_H +#ifdef HAVE_CONFIG_H # include #endif @@ -58,12 +60,10 @@ SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0, size_t j; size_t cstart; size_t cend; - int c; - const char *cdelims; + scm_t_wchar c; size_t num_delims; SCM_VALIDATE_STRING (1, delims); - cdelims = scm_i_string_chars (delims); num_delims = scm_i_string_length (delims); SCM_VALIDATE_STRING (2, str); @@ -82,7 +82,7 @@ SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0, c = scm_getc (port); for (k = 0; k < num_delims; k++) { - if (cdelims[k] == c) + if (scm_i_string_ref (delims, k) == c) { if (scm_is_false (gobble)) scm_ungetc (c, port); @@ -101,88 +101,6 @@ SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0, } #undef FUNC_NAME -static unsigned char * -scm_do_read_line (SCM port, size_t *len_p) -{ - scm_t_port *pt = SCM_PTAB_ENTRY (port); - unsigned char *end; - - /* I thought reading lines was simple. Mercy me. */ - - /* The common case: the buffer contains a complete line. - This needs to be fast. */ - if ((end = memchr (pt->read_pos, '\n', (pt->read_end - pt->read_pos))) - != 0) - { - size_t buf_len = (end + 1) - pt->read_pos; - /* Allocate a buffer of the perfect size. */ - unsigned char *buf = scm_malloc (buf_len + 1); - - memcpy (buf, pt->read_pos, buf_len); - pt->read_pos += buf_len; - - buf[buf_len] = '\0'; - - *len_p = buf_len; - return buf; - } - - /* The buffer contains no newlines. */ - { - /* When live, len is always the number of characters in the - current buffer that are part of the current line. */ - size_t len = (pt->read_end - pt->read_pos); - size_t buf_size = (len < 50) ? 60 : len * 2; - /* Invariant: buf always has buf_size + 1 characters allocated; - the `+ 1' is for the final '\0'. */ - unsigned char *buf = scm_malloc (buf_size + 1); - size_t buf_len = 0; - - for (;;) - { - if (buf_len + len > buf_size) - { - size_t new_size = (buf_len + len) * 2; - buf = scm_realloc (buf, new_size + 1); - buf_size = new_size; - } - - /* Copy what we've got out of the port, into our buffer. */ - memcpy (buf + buf_len, pt->read_pos, len); - buf_len += len; - pt->read_pos += len; - - /* If we had seen a newline, we're done now. */ - if (end) - break; - - /* Get more characters. */ - if (scm_fill_input (port) == EOF) - { - /* If we're missing a final newline in the file, return - what we did get, sans newline. */ - if (buf_len > 0) - break; - - free (buf); - return 0; - } - - /* Search the buffer for newlines. */ - if ((end = memchr (pt->read_pos, '\n', - (len = (pt->read_end - pt->read_pos)))) - != 0) - len = (end - pt->read_pos) + 1; - } - - /* I wonder how expensive this realloc is. */ - buf = scm_realloc (buf, buf_len + 1); - buf[buf_len] = '\0'; - *len_p = buf_len; - return buf; - } -} - /* * %read-line @@ -202,45 +120,68 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0, "@code{(# . #)}.") #define FUNC_NAME s_scm_read_line { - scm_t_port *pt; - char *s; - size_t slen = 0; - SCM line, term; +/* Threshold under which the only allocation performed is that of the + resulting string and pair. */ +#define LINE_BUFFER_SIZE 256 + + SCM line, strings, result; + scm_t_wchar buf[LINE_BUFFER_SIZE], delim; + size_t index; if (SCM_UNBNDP (port)) port = scm_current_input_port (); - SCM_VALIDATE_OPINPORT (1,port); - pt = SCM_PTAB_ENTRY (port); - if (pt->rw_active == SCM_PORT_WRITE) - scm_ptobs[SCM_PTOBNUM (port)].flush (port); + SCM_VALIDATE_OPINPORT (1,port); - s = (char *) scm_do_read_line (port, &slen); + index = 0; + delim = 0; + strings = SCM_BOOL_F; - if (s == NULL) - term = line = SCM_EOF_VAL; - else + do { - if (s[slen-1] == '\n') + if (SCM_UNLIKELY (index >= LINE_BUFFER_SIZE)) { - term = SCM_MAKE_CHAR ('\n'); - s[slen-1] = '\0'; - line = scm_take_locale_stringn (s, slen-1); - SCM_INCLINE (port); + /* The line is getting longer than BUF so store its current + contents in STRINGS. */ + strings = scm_cons (scm_from_utf32_stringn (buf, index), + scm_is_false (strings) ? SCM_EOL : strings); + index = 0; } else { - /* Fix: we should check for eof on the port before assuming this. */ - term = SCM_EOF_VAL; - line = scm_take_locale_stringn (s, slen); - SCM_COL (port) += slen; + buf[index] = scm_getc (port); + switch (buf[index]) + { + case EOF: + case '\n': + delim = buf[index]; + break; + + default: + index++; + } } } + while (delim == 0); + + if (SCM_LIKELY (scm_is_false (strings))) + /* The fast path. */ + line = scm_from_utf32_stringn (buf, index); + else + { + /* Aggregate the intermediary results. */ + strings = scm_cons (scm_from_utf32_stringn (buf, index), strings); + line = scm_string_concatenate (scm_reverse (strings)); + } - if (pt->rw_random) - pt->rw_active = SCM_PORT_READ; + if (delim == EOF && scm_i_string_length (line) == 0) + result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL); + else + result = scm_cons (line, + delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim)); - return scm_cons (line, term); + return result; +#undef LINE_BUFFER_SIZE } #undef FUNC_NAME