;;; -*- Mode: scheme; coding: utf-8; -*- ;;; strings.bm ;;; ;;; Copyright (C) 2011 Free Software Foundation, Inc. ;;; ;;; ;;; This program is free software; you can redistribute it and/or ;;; modify it under the terms of the GNU Lesser General Public License ;;; as published by the Free Software Foundation; either version 3, or ;;; (at your option) any later version. ;;; ;;; This program is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU Lesser General Public License for more details. ;;; ;;; You should have received a copy of the GNU Lesser General Public ;;; License along with this software; see the file COPYING.LESSER. If ;;; not, write to the Free Software Foundation, Inc., 51 Franklin ;;; Street, Fifth Floor, Boston, MA 02110-1301 USA (define-module (benchmarks strings) #:use-module (benchmark-suite lib) #:use-module (ice-9 i18n)) (use-modules (ice-9 i18n)) (seed->random-state 1) ;; Start from a known locale state (setlocale LC_ALL "C") (define char-set:cased (char-set-union char-set:lower-case char-set:upper-case char-set:title-case)) (define *latin1* (char-set->list (char-set-xor (char-set-intersection (ucs-range->char-set 0 255) char-set:cased) (->char-set #\ยต)))) ; Can't do a case-insensitive comparison of a string ; with mu in fr_FR.iso88591 since it case-folds into a ; non-Latin-1 character. (define *cased* (char-set->list char-set:cased)) (define (random-string c-list n) (let ((len (length c-list))) (apply string (map (lambda (x) (list-ref c-list (random len))) (iota n))))) (define (diff-at-start str) (string-append "!" (substring str 1))) (define (diff-in-middle str) (let ((x (floor (/ (string-length str) 2)))) (string-append (substring str 0 x) "!" (substring str (1+ x))))) (define (diff-at-end str) (string-append (substring str 0 (1- (string-length str))) "!")) (define short-latin1-string (random-string *latin1* 10)) (define medium-latin1-string (random-string *latin1* 100)) (define long-latin1-string (random-string *latin1* 1000)) (define short-latin1-string-diff-at-start (diff-at-start short-latin1-string)) (define medium-latin1-string-diff-at-start (diff-at-start medium-latin1-string)) (define long-latin1-string-diff-at-start (diff-at-start long-latin1-string)) (define short-latin1-string-diff-in-middle (diff-in-middle short-latin1-string)) (define medium-latin1-string-diff-in-middle (diff-in-middle medium-latin1-string)) (define long-latin1-string-diff-in-middle (diff-in-middle long-latin1-string)) (define short-latin1-string-diff-at-end (diff-at-end short-latin1-string)) (define medium-latin1-string-diff-at-end (diff-at-end medium-latin1-string)) (define long-latin1-string-diff-at-end (diff-at-end long-latin1-string)) (define short-cased-string (random-string *cased* 10)) (define medium-cased-string (random-string *cased* 100)) (define long-cased-string (random-string *cased* 1000)) (define short-cased-string-diff-at-start (diff-at-start short-cased-string)) (define medium-cased-string-diff-at-start (diff-at-start medium-cased-string)) (define long-cased-string-diff-at-start (diff-at-start long-cased-string)) (define short-cased-string-diff-in-middle (diff-in-middle short-cased-string)) (define medium-cased-string-diff-in-middle (diff-in-middle medium-cased-string)) (define long-cased-string-diff-in-middle (diff-in-middle long-cased-string)) (define short-cased-string-diff-at-end (diff-at-end short-cased-string)) (define medium-cased-string-diff-at-end (diff-at-end medium-cased-string)) (define long-cased-string-diff-at-end (diff-at-end long-cased-string)) (define %french-locale-name "fr_FR.ISO-8859-1") (define %french-utf8-locale-name "fr_FR.UTF-8") (define %french-locale (false-if-exception (make-locale (list LC_CTYPE LC_COLLATE LC_NUMERIC LC_TIME) %french-locale-name))) (define %french-utf8-locale (false-if-exception (make-locale (list LC_CTYPE LC_COLLATE LC_NUMERIC LC_TIME) %french-utf8-locale-name))) (define (under-locale-or-unresolved locale thunk) ;; On non-GNU systems, an exception may be raised only when the locale is ;; actually used rather than at `make-locale'-time. Thus, we must guard ;; against both. (if locale (if (string-contains %host-type "-gnu") (thunk) (catch 'system-error thunk (lambda (key . args) (throw 'unresolved)))) (throw 'unresolved))) (define (under-french-locale-or-unresolved thunk) (under-locale-or-unresolved %french-locale thunk)) (define (under-french-utf8-locale-or-unresolved thunk) (under-locale-or-unresolved %french-utf8-locale thunk)) (define (string-op str1 str2) (string? str1 str2)) (define (string-ci-op str1 str2) (string-ci? str1 str2)) (define (string-fr-op str1 str2) (under-french-locale-or-unresolved (lambda () (string-locale? str1 str2 %french-locale)))) (define (string-fr-utf8-op str1 str2) (under-french-utf8-locale-or-unresolved (lambda () (string-locale? str1 str2 %french-utf8-locale)))) (define (string-fr-ci-op str1 str2) (under-french-locale-or-unresolved (lambda () (string-locale-ci? str1 str2 %french-locale)))) (define (string-fr-utf8-ci-op str1 str2) (under-french-utf8-locale-or-unresolved (lambda () (string-locale-ci? str1 str2 %french-utf8-locale)))) (with-benchmark-prefix "string ops" (with-benchmark-prefix "short Latin1" (benchmark "compare initially differing strings" 100000 (string-op short-latin1-string short-latin1-string-diff-at-start)) (benchmark "compare medially differing strings" 100000 (string-op short-latin1-string short-latin1-string-diff-in-middle)) (benchmark "compare terminally differing strings" 100000 (string-op short-latin1-string short-latin1-string-diff-at-end)) (benchmark "compare identical strings" 100000 (string-op short-latin1-string short-latin1-string)) (benchmark "case compare initially differing strings" 100000 (string-ci-op short-latin1-string short-latin1-string-diff-at-start)) (benchmark "case compare medially differing strings" 100000 (string-ci-op short-latin1-string short-latin1-string-diff-in-middle)) (benchmark "case compare terminally differing strings" 100000 (string-ci-op short-latin1-string short-latin1-string-diff-at-end)) (benchmark "case compare identical strings" 100000 (string-ci-op short-latin1-string short-latin1-string)) (benchmark "French Latin-1 locale compare initially differing strings" 100000 (string-fr-op short-latin1-string short-latin1-string-diff-at-start)) (benchmark "French Latin-1 locale compare medially differing strings" 100000 (string-fr-op short-latin1-string short-latin1-string-diff-in-middle)) (benchmark "French Latin-1 locale compare terminally differing strings" 100000 (string-fr-op short-latin1-string short-latin1-string-diff-at-end)) (benchmark "French Latin-1 locale compare identical strings" 100000 (string-fr-op short-latin1-string short-latin1-string)) (benchmark "French Latin-1 locale case compare initially differing strings" 100000 (string-fr-ci-op short-latin1-string short-latin1-string-diff-at-start)) (benchmark "French Latin-1 locale case compare medially differing strings" 100000 (string-fr-ci-op short-latin1-string short-latin1-string-diff-in-middle)) (benchmark "French Latin-1 locale case compare terminally differing strings" 100000 (string-fr-ci-op short-latin1-string short-latin1-string-diff-at-end)) (benchmark "French Latin-1 locale case compare identical strings" 100000 (string-fr-ci-op short-latin1-string short-latin1-string)) (benchmark "French UTF-8 locale compare initially differing strings" 100000 (string-fr-utf8-op short-latin1-string short-latin1-string-diff-at-start)) (benchmark "French UTF-8 locale compare medially differing strings" 100000 (string-fr-utf8-op short-latin1-string short-latin1-string-diff-in-middle)) (benchmark "French UTF-8 locale compare terminally differing strings" 100000 (string-fr-utf8-op short-latin1-string short-latin1-string-diff-at-end)) (benchmark "French UTF-8 locale compare identical strings" 100000 (string-fr-utf8-op short-latin1-string short-latin1-string)) (benchmark "French UTF-8 locale case compare initially differing strings" 100000 (string-fr-utf8-ci-op short-latin1-string short-latin1-string-diff-at-start)) (benchmark "French UTF-8 locale case compare medially differing strings" 100000 (string-fr-utf8-ci-op short-latin1-string short-latin1-string-diff-in-middle)) (benchmark "French UTF-8 locale case compare terminally differing strings" 100000 (string-fr-utf8-ci-op short-latin1-string short-latin1-string-diff-at-end)) (benchmark "French UTF-8 locale case compare identical strings" 100000 (string-fr-utf8-ci-op short-latin1-string short-latin1-string))) (with-benchmark-prefix "medium Latin1" (benchmark "compare initially differing strings" 10000 (string-op medium-latin1-string medium-latin1-string-diff-at-start)) (benchmark "compare medially differing strings" 10000 (string-op medium-latin1-string medium-latin1-string-diff-in-middle)) (benchmark "compare terminally differing strings" 10000 (string-op medium-latin1-string medium-latin1-string-diff-at-end)) (benchmark "compare identical strings" 10000 (string-op medium-latin1-string medium-latin1-string)) (benchmark "case compare initially differing strings" 10000 (string-ci-op medium-latin1-string medium-latin1-string-diff-at-start)) (benchmark "case compare medially differing strings" 10000 (string-ci-op medium-latin1-string medium-latin1-string-diff-in-middle)) (benchmark "case compare terminally differing strings" 10000 (string-ci-op medium-latin1-string medium-latin1-string-diff-at-end)) (benchmark "case compare identical strings" 10000 (string-ci-op medium-latin1-string medium-latin1-string)) (benchmark "French Latin-1 locale compare initially differing strings" 10000 (string-fr-op medium-latin1-string medium-latin1-string-diff-at-start)) (benchmark "French Latin-1 locale compare medially differing strings" 10000 (string-fr-op medium-latin1-string medium-latin1-string-diff-in-middle)) (benchmark "French Latin-1 locale compare terminally differing strings" 10000 (string-fr-op medium-latin1-string medium-latin1-string-diff-at-end)) (benchmark "French Latin-1 locale compare identical strings" 10000 (string-fr-op medium-latin1-string medium-latin1-string)) (benchmark "French Latin-1 locale case compare initially differing strings" 10000 (string-fr-ci-op medium-latin1-string medium-latin1-string-diff-at-start)) (benchmark "French Latin-1 locale case compare medially differing strings" 10000 (string-fr-ci-op medium-latin1-string medium-latin1-string-diff-in-middle)) (benchmark "French Latin-1 locale case compare terminally differing strings" 10000 (string-fr-ci-op medium-latin1-string medium-latin1-string-diff-at-end)) (benchmark "French Latin-1 locale case compare identical strings" 10000 (string-fr-ci-op medium-latin1-string medium-latin1-string)) (benchmark "French UTF-8 locale compare initially differing strings" 10000 (string-fr-utf8-op medium-latin1-string medium-latin1-string-diff-at-start)) (benchmark "French UTF-8 locale compare medially differing strings" 10000 (string-fr-utf8-op medium-latin1-string medium-latin1-string-diff-in-middle)) (benchmark "French UTF-8 locale compare terminally differing strings" 10000 (string-fr-utf8-op medium-latin1-string medium-latin1-string-diff-at-end)) (benchmark "French UTF-8 locale compare identical strings" 10000 (string-fr-utf8-op medium-latin1-string medium-latin1-string)) (benchmark "French UTF-8 locale case compare initially differing strings" 10000 (string-fr-utf8-ci-op medium-latin1-string medium-latin1-string-diff-at-start)) (benchmark "French UTF-8 locale case compare medially differing strings" 10000 (string-fr-utf8-ci-op medium-latin1-string medium-latin1-string-diff-in-middle)) (benchmark "French UTF-8 locale case compare terminally differing strings" 10000 (string-fr-utf8-ci-op medium-latin1-string medium-latin1-string-diff-at-end)) (benchmark "French UTF-8 locale case compare identical strings" 10000 (string-fr-utf8-ci-op medium-latin1-string medium-latin1-string))) (with-benchmark-prefix "long Latin1" (benchmark "compare initially differing strings" 1000 (string-op long-latin1-string long-latin1-string-diff-at-start)) (benchmark "compare medially differing strings" 1000 (string-op long-latin1-string long-latin1-string-diff-in-middle)) (benchmark "compare terminally differing strings" 1000 (string-op long-latin1-string long-latin1-string-diff-at-end)) (benchmark "compare identical strings" 1000 (string-op long-latin1-string long-latin1-string)) (benchmark "case compare initially differing strings" 1000 (string-ci-op long-latin1-string long-latin1-string-diff-at-start)) (benchmark "case compare medially differing strings" 1000 (string-ci-op long-latin1-string long-latin1-string-diff-in-middle)) (benchmark "case compare terminally differing strings" 1000 (string-ci-op long-latin1-string long-latin1-string-diff-at-end)) (benchmark "case compare identical strings" 1000 (string-ci-op long-latin1-string long-latin1-string)) (benchmark "French Latin-1 locale compare initially differing strings" 1000 (string-fr-op long-latin1-string long-latin1-string-diff-at-start)) (benchmark "French Latin-1 locale compare medially differing strings" 1000 (string-fr-op long-latin1-string long-latin1-string-diff-in-middle)) (benchmark "French Latin-1 locale compare terminally differing strings" 1000 (string-fr-op long-latin1-string long-latin1-string-diff-at-end)) (benchmark "French Latin-1 locale compare identical strings" 1000 (string-fr-op long-latin1-string long-latin1-string)) (benchmark "French Latin-1 locale case compare initially differing strings" 1000 (string-fr-ci-op long-latin1-string long-latin1-string-diff-at-start)) (benchmark "French Latin-1 locale case compare medially differing strings" 1000 (string-fr-ci-op long-latin1-string long-latin1-string-diff-in-middle)) (benchmark "French Latin-1 locale case compare terminally differing strings" 1000 (string-fr-ci-op long-latin1-string long-latin1-string-diff-at-end)) (benchmark "French Latin-1 locale case compare identical strings" 1000 (string-fr-ci-op long-latin1-string long-latin1-string)) (benchmark "French UTF-8 locale compare initially differing strings" 1000 (string-fr-utf8-op long-latin1-string long-latin1-string-diff-at-start)) (benchmark "French UTF-8 locale compare medially differing strings" 1000 (string-fr-utf8-op long-latin1-string long-latin1-string-diff-in-middle)) (benchmark "French UTF-8 locale compare terminally differing strings" 1000 (string-fr-utf8-op long-latin1-string long-latin1-string-diff-at-end)) (benchmark "French UTF-8 locale compare identical strings" 1000 (string-fr-utf8-op long-latin1-string long-latin1-string)) (benchmark "French UTF-8 locale case compare initially differing strings" 1000 (string-fr-utf8-ci-op long-latin1-string long-latin1-string-diff-at-start)) (benchmark "French UTF-8 locale case compare medially differing strings" 1000 (string-fr-utf8-ci-op long-latin1-string long-latin1-string-diff-in-middle)) (benchmark "French UTF-8 locale case compare terminally differing strings" 1000 (string-fr-utf8-ci-op long-latin1-string long-latin1-string-diff-at-end)) (benchmark "French UTF-8 locale case compare identical strings" 1000 (string-fr-utf8-ci-op long-latin1-string long-latin1-string))) (with-benchmark-prefix "short Unicode" (benchmark "compare initially differing strings" 100000 (string-op short-cased-string short-cased-string-diff-at-start)) (benchmark "compare medially differing strings" 100000 (string-op short-cased-string short-cased-string-diff-in-middle)) (benchmark "compare terminally differing strings" 100000 (string-op short-cased-string short-cased-string-diff-at-end)) (benchmark "compare identical strings" 100000 (string-op short-cased-string short-cased-string)) (benchmark "case compare initially differing strings" 100000 (string-ci-op short-cased-string short-cased-string-diff-at-start)) (benchmark "case compare medially differing strings" 100000 (string-ci-op short-cased-string short-cased-string-diff-in-middle)) (benchmark "case compare terminally differing strings" 100000 (string-ci-op short-cased-string short-cased-string-diff-at-end)) (benchmark "case compare identical strings" 100000 (string-ci-op short-cased-string short-cased-string)) (benchmark "French UTF-8 locale compare initially differing strings" 100000 (string-fr-utf8-op short-cased-string short-cased-string-diff-at-start)) (benchmark "French UTF-8 locale compare medially differing strings" 100000 (string-fr-utf8-op short-cased-string short-cased-string-diff-in-middle)) (benchmark "French UTF-8 locale compare terminally differing strings" 100000 (string-fr-utf8-op short-cased-string short-cased-string-diff-at-end)) (benchmark "French UTF-8 locale compare identical strings" 100000 (string-fr-utf8-op short-cased-string short-cased-string)) (benchmark "French UTF-8 locale case compare initially differing strings" 100000 (string-fr-utf8-ci-op short-cased-string short-cased-string-diff-at-start)) (benchmark "French UTF-8 locale case compare medially differing strings" 100000 (string-fr-utf8-ci-op short-cased-string short-cased-string-diff-in-middle)) (benchmark "French UTF-8 locale case compare terminally differing strings" 100000 (string-fr-utf8-ci-op short-cased-string short-cased-string-diff-at-end)) (benchmark "French UTF-8 locale case compare identical strings" 100000 (string-fr-utf8-ci-op short-cased-string short-cased-string))) (with-benchmark-prefix "medium Unicode" (benchmark "compare initially differing strings" 10000 (string-op medium-cased-string medium-cased-string-diff-at-start)) (benchmark "compare medially differing strings" 10000 (string-op medium-cased-string medium-cased-string-diff-in-middle)) (benchmark "compare terminally differing strings" 10000 (string-op medium-cased-string medium-cased-string-diff-at-end)) (benchmark "compare identical strings" 10000 (string-op medium-cased-string medium-cased-string)) (benchmark "case compare initially differing strings" 10000 (string-ci-op medium-cased-string medium-cased-string-diff-at-start)) (benchmark "case compare medially differing strings" 10000 (string-ci-op medium-cased-string medium-cased-string-diff-in-middle)) (benchmark "case compare terminally differing strings" 10000 (string-ci-op medium-cased-string medium-cased-string-diff-at-end)) (benchmark "case compare identical strings" 10000 (string-ci-op medium-cased-string medium-cased-string)) (benchmark "French UTF-8 locale compare initially differing strings" 10000 (string-fr-utf8-op medium-cased-string medium-cased-string-diff-at-start)) (benchmark "French UTF-8 locale compare medially differing strings" 10000 (string-fr-utf8-op medium-cased-string medium-cased-string-diff-in-middle)) (benchmark "French UTF-8 locale compare terminally differing strings" 10000 (string-fr-utf8-op medium-cased-string medium-cased-string-diff-at-end)) (benchmark "French UTF-8 locale compare identical strings" 10000 (string-fr-utf8-op medium-cased-string medium-cased-string)) (benchmark "French UTF-8 locale case compare initially differing strings" 10000 (string-fr-utf8-ci-op medium-cased-string medium-cased-string-diff-at-start)) (benchmark "French UTF-8 locale case compare medially differing strings" 10000 (string-fr-utf8-ci-op medium-cased-string medium-cased-string-diff-in-middle)) (benchmark "French UTF-8 locale case compare terminally differing strings" 10000 (string-fr-utf8-ci-op medium-cased-string medium-cased-string-diff-at-end)) (benchmark "French UTF-8 locale case compare identical strings" 10000 (string-fr-utf8-ci-op medium-cased-string medium-cased-string))) (with-benchmark-prefix "long Unicode" (benchmark "compare initially differing strings" 1000 (string-op long-cased-string long-cased-string-diff-at-start)) (benchmark "compare medially differing strings" 1000 (string-op long-cased-string long-cased-string-diff-in-middle)) (benchmark "compare terminally differing strings" 1000 (string-op long-cased-string long-cased-string-diff-at-end)) (benchmark "compare identical strings" 1000 (string-op long-cased-string long-cased-string)) (benchmark "case compare initially differing strings" 1000 (string-ci-op long-cased-string long-cased-string-diff-at-start)) (benchmark "case compare medially differing strings" 1000 (string-ci-op long-cased-string long-cased-string-diff-in-middle)) (benchmark "case compare terminally differing strings" 1000 (string-ci-op long-cased-string long-cased-string-diff-at-end)) (benchmark "case compare identical strings" 1000 (string-ci-op long-cased-string long-cased-string)) (benchmark "French UTF-8 locale compare initially differing strings" 1000 (string-fr-utf8-op long-cased-string long-cased-string-diff-at-start)) (benchmark "French UTF-8 locale compare medially differing strings" 1000 (string-fr-utf8-op long-cased-string long-cased-string-diff-in-middle)) (benchmark "French UTF-8 locale compare terminally differing strings" 1000 (string-fr-utf8-op long-cased-string long-cased-string-diff-at-end)) (benchmark "French UTF-8 locale compare identical strings" 1000 (string-fr-utf8-op long-cased-string long-cased-string)) (benchmark "French UTF-8 locale case compare initially differing strings" 1000 (string-fr-utf8-ci-op long-cased-string long-cased-string-diff-at-start)) (benchmark "French UTF-8 locale case compare medially differing strings" 1000 (string-fr-utf8-ci-op long-cased-string long-cased-string-diff-in-middle)) (benchmark "French UTF-8 locale case compare terminally differing strings" 1000 (string-fr-utf8-ci-op long-cased-string long-cased-string-diff-at-end)) (benchmark "French UTF-8 locale case compare identical strings" 1000 (string-fr-utf8-ci-op long-cased-string long-cased-string))))