From: Ludovic Courtès Date: Sun, 6 May 2012 22:32:01 +0000 (+0200) Subject: Optimize `scm_read_string'. X-Git-Url: http://git.hcoop.net/bpt/guile.git/commitdiff_plain/ff4d3672757fec3c8509e26bc60abf95f9e8f51a?hp=5bbd632fc36b14f59d51e4ba2d8e189fd3cc0f76 Optimize `scm_read_string'. According to the new benchmarks, this leads a 5% speed improvement when reading small strings, and a 27% improvement when reading large strings. * libguile/read.c (READER_STRING_BUFFER_SIZE): Change to 128; update comment to mention codepoints. (scm_read_string): Make `str' a list of strings, instead of a string. Store characters read in buffer `c_str'. Cons to STR when C_STR is full, and concatenate/reverse at the end. * benchmark-suite/benchmarks/read.bm (small, large): New variables. Set %DEFAULT-PORT-ENCODING to "UTF-8". ("read")["small strings", "large strings"]: New benchmarks. --- diff --git a/benchmark-suite/benchmarks/read.bm b/benchmark-suite/benchmarks/read.bm index e5cf7de93..f0b25f541 100644 --- a/benchmark-suite/benchmarks/read.bm +++ b/benchmark-suite/benchmarks/read.bm @@ -1,6 +1,6 @@ ;;; read.bm --- Exercise the reader. -*- Scheme -*- ;;; -;;; Copyright (C) 2008, 2010 Free Software Foundation, Inc. +;;; Copyright (C) 2008, 2010, 2012 Free Software Foundation, Inc. ;;; ;;; This program is free software; you can redistribute it and/or ;;; modify it under the terms of the GNU Lesser General Public License @@ -43,6 +43,11 @@ (load-file-with-reader file read buffering)) %files-to-load)) +(define small "\"hello, world!\"") +(define large (string-append "\"" (make-string 1234 #\A) "\"")) + +(fluid-set! %default-port-encoding "UTF-8") ; for string ports + (with-benchmark-prefix "read" @@ -59,4 +64,10 @@ (exercise-read (list _IOFBF 8192))) (benchmark "_IOFBF 16384" 10 - (exercise-read (list _IOFBF 16384)))) + (exercise-read (list _IOFBF 16384))) + + (benchmark "small strings" 100000 + (call-with-input-string small read)) + + (benchmark "large strings" 100000 + (call-with-input-string large read))) diff --git a/libguile/read.c b/libguile/read.c index 12b4c56d8..87d73bfbe 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -161,8 +161,8 @@ scm_i_read_hash_procedures_set_x (SCM value) /* Size of the C buffer used to read symbols and numbers. */ #define READER_BUFFER_SIZE 128 -/* Size of the C buffer used to read strings. */ -#define READER_STRING_BUFFER_SIZE 512 +/* Number of 32-bit codepoints in the buffer used to read strings. */ +#define READER_STRING_BUFFER_SIZE 128 /* The maximum size of Scheme character names. */ #define READER_CHAR_NAME_MAX_SIZE 50 @@ -493,15 +493,14 @@ scm_read_string (int chr, SCM port) /* For strings smaller than C_STR, this function creates only one Scheme object (the string returned). */ - SCM str = SCM_BOOL_F; - unsigned c_str_len = 0; - scm_t_wchar c; + SCM str = SCM_EOL; + size_t c_str_len = 0; + scm_t_wchar c, c_str[READER_STRING_BUFFER_SIZE]; /* Need to capture line and column numbers here. */ long line = SCM_LINUM (port); int column = SCM_COL (port) - 1; - str = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL, 0); while ('"' != (c = scm_getc (port))) { if (c == EOF) @@ -511,12 +510,11 @@ scm_read_string (int chr, SCM port) "end of file in string constant", SCM_EOL); } - if (c_str_len + 1 >= scm_i_string_length (str)) - { - SCM addy = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL, 0); - - str = scm_string_append (scm_list_2 (str, addy)); - } + if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE) + { + str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str); + c_str_len = 0; + } if (c == '\\') { @@ -580,12 +578,22 @@ scm_read_string (int chr, SCM port) scm_list_1 (SCM_MAKE_CHAR (c))); } } - str = scm_i_string_start_writing (str); - scm_i_string_set_x (str, c_str_len++, c); - scm_i_string_stop_writing (); + + c_str[c_str_len++] = c; } - return maybe_annotate_source (scm_i_substring_copy (str, 0, c_str_len), - port, line, column); + + if (scm_is_null (str)) + /* Fast path: we got a string that fits in C_STR. */ + str = scm_from_utf32_stringn (c_str, c_str_len); + else + { + if (c_str_len > 0) + str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str); + + str = scm_string_concatenate_reverse (str, SCM_UNDEFINED, SCM_UNDEFINED); + } + + return maybe_annotate_source (str, port, line, column); } #undef FUNC_NAME