1 ;;; -*- Mode: scheme; coding: utf-8; -*-
4 ;;; Copyright (C) 2011 Free Software Foundation, Inc.
7 ;;; This program is free software; you can redistribute it and/or
8 ;;; modify it under the terms of the GNU Lesser General Public License
9 ;;; as published by the Free Software Foundation; either version 3, or
10 ;;; (at your option) any later version.
12 ;;; This program is distributed in the hope that it will be useful,
13 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;;; GNU Lesser General Public License for more details.
17 ;;; You should have received a copy of the GNU Lesser General Public
18 ;;; License along with this software; see the file COPYING.LESSER. If
19 ;;; not, write to the Free Software Foundation, Inc., 51 Franklin
20 ;;; Street, Fifth Floor, Boston, MA 02110-1301 USA
22 (define-module (benchmarks strings)
23 #:use-module (benchmark-suite lib)
24 #:use-module (ice-9 i18n))
26 (use-modules (ice-9 i18n))
28 (seed->random-state 1)
30 ;; Start from a known locale state
31 (setlocale LC_ALL "C")
33 (define char-set:cased (char-set-union char-set:lower-case
37 (char-set->list (char-set-xor
38 (char-set-intersection (ucs-range->char-set 0 255)
40 (->char-set #\ยต)))) ; Can't do a case-insensitive comparison of a string
41 ; with mu in fr_FR.iso88591 since it case-folds into a
42 ; non-Latin-1 character.
45 (char-set->list char-set:cased))
47 (define (random-string c-list n)
48 (let ((len (length c-list)))
52 (list-ref c-list (random len)))
55 (define (diff-at-start str)
56 (string-append "!" (substring str 1)))
57 (define (diff-in-middle str)
58 (let ((x (floor (/ (string-length str) 2))))
59 (string-append (substring str 0 x)
61 (substring str (1+ x)))))
62 (define (diff-at-end str)
63 (string-append (substring str 0 (1- (string-length str)))
66 (define short-latin1-string (random-string *latin1* 10))
67 (define medium-latin1-string (random-string *latin1* 100))
68 (define long-latin1-string (random-string *latin1* 1000))
70 (define short-latin1-string-diff-at-start (diff-at-start short-latin1-string))
71 (define medium-latin1-string-diff-at-start (diff-at-start medium-latin1-string))
72 (define long-latin1-string-diff-at-start (diff-at-start long-latin1-string))
74 (define short-latin1-string-diff-in-middle (diff-in-middle short-latin1-string))
75 (define medium-latin1-string-diff-in-middle (diff-in-middle medium-latin1-string))
76 (define long-latin1-string-diff-in-middle (diff-in-middle long-latin1-string))
78 (define short-latin1-string-diff-at-end (diff-at-end short-latin1-string))
79 (define medium-latin1-string-diff-at-end (diff-at-end medium-latin1-string))
80 (define long-latin1-string-diff-at-end (diff-at-end long-latin1-string))
82 (define short-cased-string (random-string *cased* 10))
83 (define medium-cased-string (random-string *cased* 100))
84 (define long-cased-string (random-string *cased* 1000))
86 (define short-cased-string-diff-at-start (diff-at-start short-cased-string))
87 (define medium-cased-string-diff-at-start (diff-at-start medium-cased-string))
88 (define long-cased-string-diff-at-start (diff-at-start long-cased-string))
90 (define short-cased-string-diff-in-middle (diff-in-middle short-cased-string))
91 (define medium-cased-string-diff-in-middle (diff-in-middle medium-cased-string))
92 (define long-cased-string-diff-in-middle (diff-in-middle long-cased-string))
94 (define short-cased-string-diff-at-end (diff-at-end short-cased-string))
95 (define medium-cased-string-diff-at-end (diff-at-end medium-cased-string))
96 (define long-cased-string-diff-at-end (diff-at-end long-cased-string))
98 (define %french-locale-name "fr_FR.ISO-8859-1")
100 (define %french-utf8-locale-name "fr_FR.UTF-8")
102 (define %french-locale
104 (make-locale (list LC_CTYPE LC_COLLATE LC_NUMERIC LC_TIME)
105 %french-locale-name)))
107 (define %french-utf8-locale
109 (make-locale (list LC_CTYPE LC_COLLATE LC_NUMERIC LC_TIME)
110 %french-utf8-locale-name)))
112 (define (under-locale-or-unresolved locale thunk)
113 ;; On non-GNU systems, an exception may be raised only when the locale is
114 ;; actually used rather than at `make-locale'-time. Thus, we must guard
117 (if (string-contains %host-type "-gnu")
119 (catch 'system-error thunk
121 (throw 'unresolved))))
122 (throw 'unresolved)))
124 (define (under-french-locale-or-unresolved thunk)
125 (under-locale-or-unresolved %french-locale thunk))
127 (define (under-french-utf8-locale-or-unresolved thunk)
128 (under-locale-or-unresolved %french-utf8-locale thunk))
130 (define (string-op str1 str2)
132 (string>? str1 str2))
134 (define (string-ci-op str1 str2)
135 (string-ci<? str1 str2)
136 (string-ci>? str1 str2))
138 (define (string-fr-op str1 str2)
139 (under-french-locale-or-unresolved
141 (string-locale<? str1 str2 %french-locale)
142 (string-locale>? str1 str2 %french-locale))))
144 (define (string-fr-utf8-op str1 str2)
145 (under-french-utf8-locale-or-unresolved
147 (string-locale<? str1 str2 %french-utf8-locale)
148 (string-locale>? str1 str2 %french-utf8-locale))))
150 (define (string-fr-ci-op str1 str2)
151 (under-french-locale-or-unresolved
153 (string-locale-ci<? str1 str2 %french-locale)
154 (string-locale-ci>? str1 str2 %french-locale))))
156 (define (string-fr-utf8-ci-op str1 str2)
157 (under-french-utf8-locale-or-unresolved
159 (string-locale-ci<? str1 str2 %french-utf8-locale)
160 (string-locale-ci>? str1 str2 %french-utf8-locale))))
163 (with-benchmark-prefix "string ops"
165 (with-benchmark-prefix "short Latin1"
167 (benchmark "compare initially differing strings" 100000
168 (string-op short-latin1-string short-latin1-string-diff-at-start))
170 (benchmark "compare medially differing strings" 100000
171 (string-op short-latin1-string short-latin1-string-diff-in-middle))
173 (benchmark "compare terminally differing strings" 100000
174 (string-op short-latin1-string short-latin1-string-diff-at-end))
176 (benchmark "compare identical strings" 100000
177 (string-op short-latin1-string short-latin1-string))
179 (benchmark "case compare initially differing strings" 100000
180 (string-ci-op short-latin1-string short-latin1-string-diff-at-start))
182 (benchmark "case compare medially differing strings" 100000
183 (string-ci-op short-latin1-string short-latin1-string-diff-in-middle))
185 (benchmark "case compare terminally differing strings" 100000
186 (string-ci-op short-latin1-string short-latin1-string-diff-at-end))
188 (benchmark "case compare identical strings" 100000
189 (string-ci-op short-latin1-string short-latin1-string))
191 (benchmark "French Latin-1 locale compare initially differing strings" 100000
192 (string-fr-op short-latin1-string short-latin1-string-diff-at-start))
194 (benchmark "French Latin-1 locale compare medially differing strings" 100000
195 (string-fr-op short-latin1-string short-latin1-string-diff-in-middle))
197 (benchmark "French Latin-1 locale compare terminally differing strings" 100000
198 (string-fr-op short-latin1-string short-latin1-string-diff-at-end))
200 (benchmark "French Latin-1 locale compare identical strings" 100000
201 (string-fr-op short-latin1-string short-latin1-string))
203 (benchmark "French Latin-1 locale case compare initially differing strings" 100000
204 (string-fr-ci-op short-latin1-string short-latin1-string-diff-at-start))
206 (benchmark "French Latin-1 locale case compare medially differing strings" 100000
207 (string-fr-ci-op short-latin1-string short-latin1-string-diff-in-middle))
209 (benchmark "French Latin-1 locale case compare terminally differing strings" 100000
210 (string-fr-ci-op short-latin1-string short-latin1-string-diff-at-end))
212 (benchmark "French Latin-1 locale case compare identical strings" 100000
213 (string-fr-ci-op short-latin1-string short-latin1-string))
215 (benchmark "French UTF-8 locale compare initially differing strings" 100000
216 (string-fr-utf8-op short-latin1-string short-latin1-string-diff-at-start))
218 (benchmark "French UTF-8 locale compare medially differing strings" 100000
219 (string-fr-utf8-op short-latin1-string short-latin1-string-diff-in-middle))
221 (benchmark "French UTF-8 locale compare terminally differing strings" 100000
222 (string-fr-utf8-op short-latin1-string short-latin1-string-diff-at-end))
224 (benchmark "French UTF-8 locale compare identical strings" 100000
225 (string-fr-utf8-op short-latin1-string short-latin1-string))
227 (benchmark "French UTF-8 locale case compare initially differing strings" 100000
228 (string-fr-utf8-ci-op short-latin1-string short-latin1-string-diff-at-start))
230 (benchmark "French UTF-8 locale case compare medially differing strings" 100000
231 (string-fr-utf8-ci-op short-latin1-string short-latin1-string-diff-in-middle))
233 (benchmark "French UTF-8 locale case compare terminally differing strings" 100000
234 (string-fr-utf8-ci-op short-latin1-string short-latin1-string-diff-at-end))
236 (benchmark "French UTF-8 locale case compare identical strings" 100000
237 (string-fr-utf8-ci-op short-latin1-string short-latin1-string)))
239 (with-benchmark-prefix "medium Latin1"
241 (benchmark "compare initially differing strings" 10000
242 (string-op medium-latin1-string medium-latin1-string-diff-at-start))
244 (benchmark "compare medially differing strings" 10000
245 (string-op medium-latin1-string medium-latin1-string-diff-in-middle))
247 (benchmark "compare terminally differing strings" 10000
248 (string-op medium-latin1-string medium-latin1-string-diff-at-end))
250 (benchmark "compare identical strings" 10000
251 (string-op medium-latin1-string medium-latin1-string))
253 (benchmark "case compare initially differing strings" 10000
254 (string-ci-op medium-latin1-string medium-latin1-string-diff-at-start))
256 (benchmark "case compare medially differing strings" 10000
257 (string-ci-op medium-latin1-string medium-latin1-string-diff-in-middle))
259 (benchmark "case compare terminally differing strings" 10000
260 (string-ci-op medium-latin1-string medium-latin1-string-diff-at-end))
262 (benchmark "case compare identical strings" 10000
263 (string-ci-op medium-latin1-string medium-latin1-string))
265 (benchmark "French Latin-1 locale compare initially differing strings" 10000
266 (string-fr-op medium-latin1-string medium-latin1-string-diff-at-start))
268 (benchmark "French Latin-1 locale compare medially differing strings" 10000
269 (string-fr-op medium-latin1-string medium-latin1-string-diff-in-middle))
271 (benchmark "French Latin-1 locale compare terminally differing strings" 10000
272 (string-fr-op medium-latin1-string medium-latin1-string-diff-at-end))
274 (benchmark "French Latin-1 locale compare identical strings" 10000
275 (string-fr-op medium-latin1-string medium-latin1-string))
277 (benchmark "French Latin-1 locale case compare initially differing strings" 10000
278 (string-fr-ci-op medium-latin1-string medium-latin1-string-diff-at-start))
280 (benchmark "French Latin-1 locale case compare medially differing strings" 10000
281 (string-fr-ci-op medium-latin1-string medium-latin1-string-diff-in-middle))
283 (benchmark "French Latin-1 locale case compare terminally differing strings" 10000
284 (string-fr-ci-op medium-latin1-string medium-latin1-string-diff-at-end))
286 (benchmark "French Latin-1 locale case compare identical strings" 10000
287 (string-fr-ci-op medium-latin1-string medium-latin1-string))
289 (benchmark "French UTF-8 locale compare initially differing strings" 10000
290 (string-fr-utf8-op medium-latin1-string medium-latin1-string-diff-at-start))
292 (benchmark "French UTF-8 locale compare medially differing strings" 10000
293 (string-fr-utf8-op medium-latin1-string medium-latin1-string-diff-in-middle))
295 (benchmark "French UTF-8 locale compare terminally differing strings" 10000
296 (string-fr-utf8-op medium-latin1-string medium-latin1-string-diff-at-end))
298 (benchmark "French UTF-8 locale compare identical strings" 10000
299 (string-fr-utf8-op medium-latin1-string medium-latin1-string))
301 (benchmark "French UTF-8 locale case compare initially differing strings" 10000
302 (string-fr-utf8-ci-op medium-latin1-string medium-latin1-string-diff-at-start))
304 (benchmark "French UTF-8 locale case compare medially differing strings" 10000
305 (string-fr-utf8-ci-op medium-latin1-string medium-latin1-string-diff-in-middle))
307 (benchmark "French UTF-8 locale case compare terminally differing strings" 10000
308 (string-fr-utf8-ci-op medium-latin1-string medium-latin1-string-diff-at-end))
310 (benchmark "French UTF-8 locale case compare identical strings" 10000
311 (string-fr-utf8-ci-op medium-latin1-string medium-latin1-string)))
313 (with-benchmark-prefix "long Latin1"
315 (benchmark "compare initially differing strings" 1000
316 (string-op long-latin1-string long-latin1-string-diff-at-start))
318 (benchmark "compare medially differing strings" 1000
319 (string-op long-latin1-string long-latin1-string-diff-in-middle))
321 (benchmark "compare terminally differing strings" 1000
322 (string-op long-latin1-string long-latin1-string-diff-at-end))
324 (benchmark "compare identical strings" 1000
325 (string-op long-latin1-string long-latin1-string))
327 (benchmark "case compare initially differing strings" 1000
328 (string-ci-op long-latin1-string long-latin1-string-diff-at-start))
330 (benchmark "case compare medially differing strings" 1000
331 (string-ci-op long-latin1-string long-latin1-string-diff-in-middle))
333 (benchmark "case compare terminally differing strings" 1000
334 (string-ci-op long-latin1-string long-latin1-string-diff-at-end))
336 (benchmark "case compare identical strings" 1000
337 (string-ci-op long-latin1-string long-latin1-string))
339 (benchmark "French Latin-1 locale compare initially differing strings" 1000
340 (string-fr-op long-latin1-string long-latin1-string-diff-at-start))
342 (benchmark "French Latin-1 locale compare medially differing strings" 1000
343 (string-fr-op long-latin1-string long-latin1-string-diff-in-middle))
345 (benchmark "French Latin-1 locale compare terminally differing strings" 1000
346 (string-fr-op long-latin1-string long-latin1-string-diff-at-end))
348 (benchmark "French Latin-1 locale compare identical strings" 1000
349 (string-fr-op long-latin1-string long-latin1-string))
351 (benchmark "French Latin-1 locale case compare initially differing strings" 1000
352 (string-fr-ci-op long-latin1-string long-latin1-string-diff-at-start))
354 (benchmark "French Latin-1 locale case compare medially differing strings" 1000
355 (string-fr-ci-op long-latin1-string long-latin1-string-diff-in-middle))
357 (benchmark "French Latin-1 locale case compare terminally differing strings" 1000
358 (string-fr-ci-op long-latin1-string long-latin1-string-diff-at-end))
360 (benchmark "French Latin-1 locale case compare identical strings" 1000
361 (string-fr-ci-op long-latin1-string long-latin1-string))
363 (benchmark "French UTF-8 locale compare initially differing strings" 1000
364 (string-fr-utf8-op long-latin1-string long-latin1-string-diff-at-start))
366 (benchmark "French UTF-8 locale compare medially differing strings" 1000
367 (string-fr-utf8-op long-latin1-string long-latin1-string-diff-in-middle))
369 (benchmark "French UTF-8 locale compare terminally differing strings" 1000
370 (string-fr-utf8-op long-latin1-string long-latin1-string-diff-at-end))
372 (benchmark "French UTF-8 locale compare identical strings" 1000
373 (string-fr-utf8-op long-latin1-string long-latin1-string))
375 (benchmark "French UTF-8 locale case compare initially differing strings" 1000
376 (string-fr-utf8-ci-op long-latin1-string long-latin1-string-diff-at-start))
378 (benchmark "French UTF-8 locale case compare medially differing strings" 1000
379 (string-fr-utf8-ci-op long-latin1-string long-latin1-string-diff-in-middle))
381 (benchmark "French UTF-8 locale case compare terminally differing strings" 1000
382 (string-fr-utf8-ci-op long-latin1-string long-latin1-string-diff-at-end))
384 (benchmark "French UTF-8 locale case compare identical strings" 1000
385 (string-fr-utf8-ci-op long-latin1-string long-latin1-string)))
387 (with-benchmark-prefix "short Unicode"
389 (benchmark "compare initially differing strings" 100000
390 (string-op short-cased-string short-cased-string-diff-at-start))
392 (benchmark "compare medially differing strings" 100000
393 (string-op short-cased-string short-cased-string-diff-in-middle))
395 (benchmark "compare terminally differing strings" 100000
396 (string-op short-cased-string short-cased-string-diff-at-end))
398 (benchmark "compare identical strings" 100000
399 (string-op short-cased-string short-cased-string))
401 (benchmark "case compare initially differing strings" 100000
402 (string-ci-op short-cased-string short-cased-string-diff-at-start))
404 (benchmark "case compare medially differing strings" 100000
405 (string-ci-op short-cased-string short-cased-string-diff-in-middle))
407 (benchmark "case compare terminally differing strings" 100000
408 (string-ci-op short-cased-string short-cased-string-diff-at-end))
410 (benchmark "case compare identical strings" 100000
411 (string-ci-op short-cased-string short-cased-string))
413 (benchmark "French UTF-8 locale compare initially differing strings" 100000
414 (string-fr-utf8-op short-cased-string short-cased-string-diff-at-start))
416 (benchmark "French UTF-8 locale compare medially differing strings" 100000
417 (string-fr-utf8-op short-cased-string short-cased-string-diff-in-middle))
419 (benchmark "French UTF-8 locale compare terminally differing strings" 100000
420 (string-fr-utf8-op short-cased-string short-cased-string-diff-at-end))
422 (benchmark "French UTF-8 locale compare identical strings" 100000
423 (string-fr-utf8-op short-cased-string short-cased-string))
425 (benchmark "French UTF-8 locale case compare initially differing strings" 100000
426 (string-fr-utf8-ci-op short-cased-string short-cased-string-diff-at-start))
428 (benchmark "French UTF-8 locale case compare medially differing strings" 100000
429 (string-fr-utf8-ci-op short-cased-string short-cased-string-diff-in-middle))
431 (benchmark "French UTF-8 locale case compare terminally differing strings" 100000
432 (string-fr-utf8-ci-op short-cased-string short-cased-string-diff-at-end))
434 (benchmark "French UTF-8 locale case compare identical strings" 100000
435 (string-fr-utf8-ci-op short-cased-string short-cased-string)))
437 (with-benchmark-prefix "medium Unicode"
439 (benchmark "compare initially differing strings" 10000
440 (string-op medium-cased-string medium-cased-string-diff-at-start))
442 (benchmark "compare medially differing strings" 10000
443 (string-op medium-cased-string medium-cased-string-diff-in-middle))
445 (benchmark "compare terminally differing strings" 10000
446 (string-op medium-cased-string medium-cased-string-diff-at-end))
448 (benchmark "compare identical strings" 10000
449 (string-op medium-cased-string medium-cased-string))
451 (benchmark "case compare initially differing strings" 10000
452 (string-ci-op medium-cased-string medium-cased-string-diff-at-start))
454 (benchmark "case compare medially differing strings" 10000
455 (string-ci-op medium-cased-string medium-cased-string-diff-in-middle))
457 (benchmark "case compare terminally differing strings" 10000
458 (string-ci-op medium-cased-string medium-cased-string-diff-at-end))
460 (benchmark "case compare identical strings" 10000
461 (string-ci-op medium-cased-string medium-cased-string))
463 (benchmark "French UTF-8 locale compare initially differing strings" 10000
464 (string-fr-utf8-op medium-cased-string medium-cased-string-diff-at-start))
466 (benchmark "French UTF-8 locale compare medially differing strings" 10000
467 (string-fr-utf8-op medium-cased-string medium-cased-string-diff-in-middle))
469 (benchmark "French UTF-8 locale compare terminally differing strings" 10000
470 (string-fr-utf8-op medium-cased-string medium-cased-string-diff-at-end))
472 (benchmark "French UTF-8 locale compare identical strings" 10000
473 (string-fr-utf8-op medium-cased-string medium-cased-string))
475 (benchmark "French UTF-8 locale case compare initially differing strings" 10000
476 (string-fr-utf8-ci-op medium-cased-string medium-cased-string-diff-at-start))
478 (benchmark "French UTF-8 locale case compare medially differing strings" 10000
479 (string-fr-utf8-ci-op medium-cased-string medium-cased-string-diff-in-middle))
481 (benchmark "French UTF-8 locale case compare terminally differing strings" 10000
482 (string-fr-utf8-ci-op medium-cased-string medium-cased-string-diff-at-end))
484 (benchmark "French UTF-8 locale case compare identical strings" 10000
485 (string-fr-utf8-ci-op medium-cased-string medium-cased-string)))
487 (with-benchmark-prefix "long Unicode"
489 (benchmark "compare initially differing strings" 1000
490 (string-op long-cased-string long-cased-string-diff-at-start))
492 (benchmark "compare medially differing strings" 1000
493 (string-op long-cased-string long-cased-string-diff-in-middle))
495 (benchmark "compare terminally differing strings" 1000
496 (string-op long-cased-string long-cased-string-diff-at-end))
498 (benchmark "compare identical strings" 1000
499 (string-op long-cased-string long-cased-string))
501 (benchmark "case compare initially differing strings" 1000
502 (string-ci-op long-cased-string long-cased-string-diff-at-start))
504 (benchmark "case compare medially differing strings" 1000
505 (string-ci-op long-cased-string long-cased-string-diff-in-middle))
507 (benchmark "case compare terminally differing strings" 1000
508 (string-ci-op long-cased-string long-cased-string-diff-at-end))
510 (benchmark "case compare identical strings" 1000
511 (string-ci-op long-cased-string long-cased-string))
513 (benchmark "French UTF-8 locale compare initially differing strings" 1000
514 (string-fr-utf8-op long-cased-string long-cased-string-diff-at-start))
516 (benchmark "French UTF-8 locale compare medially differing strings" 1000
517 (string-fr-utf8-op long-cased-string long-cased-string-diff-in-middle))
519 (benchmark "French UTF-8 locale compare terminally differing strings" 1000
520 (string-fr-utf8-op long-cased-string long-cased-string-diff-at-end))
522 (benchmark "French UTF-8 locale compare identical strings" 1000
523 (string-fr-utf8-op long-cased-string long-cased-string))
525 (benchmark "French UTF-8 locale case compare initially differing strings" 1000
526 (string-fr-utf8-ci-op long-cased-string long-cased-string-diff-at-start))
528 (benchmark "French UTF-8 locale case compare medially differing strings" 1000
529 (string-fr-utf8-ci-op long-cased-string long-cased-string-diff-in-middle))
531 (benchmark "French UTF-8 locale case compare terminally differing strings" 1000
532 (string-fr-utf8-ci-op long-cased-string long-cased-string-diff-at-end))
534 (benchmark "French UTF-8 locale case compare identical strings" 1000
535 (string-fr-utf8-ci-op long-cased-string long-cased-string))))