1 ;;;; benchmark-suite/lib.scm --- generic support for benchmarking
2 ;;;; Copyright (C) 2002 Free Software Foundation, Inc.
4 ;;;; This program is free software; you can redistribute it and/or modify
5 ;;;; it under the terms of the GNU General Public License as published by
6 ;;;; the Free Software Foundation; either version 2, or (at your option)
7 ;;;; any later version.
9 ;;;; This program is distributed in the hope that it will be useful,
10 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 ;;;; GNU General Public License for more details.
14 ;;;; You should have received a copy of the GNU General Public License
15 ;;;; along with this software; see the file COPYING. If not, write to
16 ;;;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
17 ;;;; Boston, MA 02111-1307 USA
19 (define-module (benchmark-suite lib)
22 ;; Controlling the execution.
26 ;; Running benchmarks.
30 ;; Naming groups of benchmarks in a regular fashion.
31 with-benchmark-prefix with-benchmark-prefix* current-benchmark-prefix
34 ;; Computing timing results
36 benchmark-total-time benchmark-user-time benchmark-system-time
37 benchmark-frame-time benchmark-core-time
38 benchmark-user-time\interpreter benchmark-core-time\interpreter
40 ;; Reporting results in various ways.
41 register-reporter unregister-reporter reporter-registered?
47 ;;;; If you're using Emacs's Scheme mode:
48 ;;;; (put 'with-benchmark-prefix 'scheme-indent-function 1)
49 ;;;; (put 'benchmark 'scheme-indent-function 1)
54 ;;;; The function (run-benchmark name iterations thunk) is the heart of the
55 ;;;; benchmarking environment. The first parameter NAME is a unique name for
56 ;;;; the benchmark to be executed (for an explanation of this parameter see
57 ;;;; below under ;;;; NAMES. The second parameter ITERATIONS is a positive
58 ;;;; integer value that indicates how often the thunk shall be executed (for
59 ;;;; an explanation of how iteration counts should be used, see below under
60 ;;;; ;;;; ITERATION COUNTS). For example:
62 ;;;; (run-benchmark "small integer addition" 100000 (lambda () (+ 1 1)))
64 ;;;; This will run the function (lambda () (+ 1 1)) a 100000 times (the
65 ;;;; iteration count can, however be scaled. See below for details). Some
66 ;;;; different time data for running the thunk for the given number of
67 ;;;; iterations is measured and reported.
69 ;;;; Convenience macro
71 ;;;; * (benchmark name iterations body) is a short form for
72 ;;;; (run-benchmark name iterations (lambda () body))
77 ;;;; Every benchmark in the benchmark suite has a unique name to be able to
78 ;;;; compare the results of individual benchmarks across several runs of the
81 ;;;; A benchmark name is a list of printable objects. For example:
82 ;;;; ("ports.scm" "file" "read and write back list of strings")
83 ;;;; ("ports.scm" "pipe" "read")
85 ;;;; Benchmark names may contain arbitrary objects, but they always have
86 ;;;; the following properties:
87 ;;;; - Benchmark names can be compared with EQUAL?.
88 ;;;; - Benchmark names can be reliably stored and retrieved with the standard
89 ;;;; WRITE and READ procedures; doing so preserves their identity.
93 ;;;; (benchmark "simple addition" 100000 (+ 2 2))
95 ;;;; In that case, the benchmark name is the list ("simple addition").
97 ;;;; The WITH-BENCHMARK-PREFIX syntax and WITH-BENCHMARK-PREFIX* procedure
98 ;;;; establish a prefix for the names of all benchmarks whose results are
99 ;;;; reported within their dynamic scope. For example:
102 ;;;; (with-benchmark-prefix "basic arithmetic"
103 ;;;; (benchmark "addition" 100000 (+ 2 2))
104 ;;;; (benchmark "subtraction" 100000 (- 4 2)))
105 ;;;; (benchmark "multiplication" 100000 (* 2 2))))
107 ;;;; In that example, the three benchmark names are:
108 ;;;; ("basic arithmetic" "addition"),
109 ;;;; ("basic arithmetic" "subtraction"), and
110 ;;;; ("multiplication").
112 ;;;; WITH-BENCHMARK-PREFIX can be nested. Each WITH-BENCHMARK-PREFIX
113 ;;;; postpends a new element to the current prefix:
115 ;;;; (with-benchmark-prefix "arithmetic"
116 ;;;; (with-benchmark-prefix "addition"
117 ;;;; (benchmark "integer" 100000 (+ 2 2))
118 ;;;; (benchmark "complex" 100000 (+ 2+3i 4+5i)))
119 ;;;; (with-benchmark-prefix "subtraction"
120 ;;;; (benchmark "integer" 100000 (- 2 2))
121 ;;;; (benchmark "complex" 100000 (- 2+3i 1+2i))))
123 ;;;; The four benchmark names here are:
124 ;;;; ("arithmetic" "addition" "integer")
125 ;;;; ("arithmetic" "addition" "complex")
126 ;;;; ("arithmetic" "subtraction" "integer")
127 ;;;; ("arithmetic" "subtraction" "complex")
129 ;;;; To print a name for a human reader, we DISPLAY its elements,
130 ;;;; separated by ": ". So, the last set of benchmark names would be
133 ;;;; arithmetic: addition: integer
134 ;;;; arithmetic: addition: complex
135 ;;;; arithmetic: subtraction: integer
136 ;;;; arithmetic: subtraction: complex
138 ;;;; The Guile benchmarks use with-benchmark-prefix to include the name of
139 ;;;; the source file containing the benchmark in the benchmark name, to
140 ;;;; provide each file with its own namespace.
143 ;;;; ITERATION COUNTS
145 ;;;; Every benchmark has to be given an iteration count that indicates how
146 ;;;; often it should be executed. The reason is, that in most cases a single
147 ;;;; execution of the benchmark code would not deliver usable timing results:
148 ;;;; The resolution of the system time is not arbitrarily fine. Thus, some
149 ;;;; benchmarks would be executed too quickly to be measured at all. A rule
150 ;;;; of thumb is, that the longer a benchmark runs, the more exact is the
151 ;;;; information about the execution time.
153 ;;;; However, execution time depends on several influences: First, the
154 ;;;; machine you are running the benchmark on. Second, the compiler you use.
155 ;;;; Third, which compiler options you use. Fourth, which version of guile
156 ;;;; you are using. Fifth, which guile options you are using (for example if
157 ;;;; you are using the debugging evaluator or not). There are even more
160 ;;;; For this reason, the same number of iterations for a single benchmark may
161 ;;;; lead to completely different execution times in different
162 ;;;; constellations. For someone working on a slow machine, the default
163 ;;;; execution counts may lead to an inacceptable execution time of the
164 ;;;; benchmark suite. For someone on a very fast machine, however, it may be
165 ;;;; desireable to increase the number of iterations in order to increase the
166 ;;;; accuracy of the time data.
168 ;;;; For this reason, the benchmark suite allows to scale the number of
169 ;;;; executions by a global factor, stored in the exported variable
170 ;;;; iteration-factor. The default for iteration-factor is 1. A number of 2
171 ;;;; means, that all benchmarks are executed twice as often, which will also
172 ;;;; roughly double the execution time for the benchmark suite. Similarly, if
173 ;;;; iteration-factor holds a value of 0.5, only about half the execution time
174 ;;;; will be required.
176 ;;;; It is probably a good idea to choose the iteration count for each
177 ;;;; benchmark such that all benchmarks will take about the same time, for
178 ;;;; example one second. To achieve this, the benchmark suite holds an empty
179 ;;;; benchmark in the file 0-reference.bm named "reference benchmark for
180 ;;;; iteration counts". It's iteration count is calibrated to make the
181 ;;;; benchmark run about one second on Dirk's laptop :-) If you are adding
182 ;;;; benchmarks to the suite, it would be nice if you could calibrate the
183 ;;;; number of iterations such that each of your added benchmarks takes about
184 ;;;; as long to run as the reference benchmark. But: Don't be too accurate
185 ;;;; to figure out the correct iteration count.
190 ;;;; A reporter is a function which we apply to each benchmark outcome.
191 ;;;; Reporters can log results, print interesting results to the standard
192 ;;;; output, collect statistics, etc.
194 ;;;; A reporter function takes the following arguments: NAME ITERATIONS
195 ;;;; BEFORE AFTER GC-TIME. The argument NAME holds the name of the benchmark,
196 ;;;; ITERATIONS holds the actual number of iterations that were performed.
197 ;;;; BEFORE holds the result of the function (times) at the very beginning of
198 ;;;; the excution of the benchmark, AFTER holds the result of the function
199 ;;;; (times) after the execution of the benchmark. GC-TIME, finally, holds
200 ;;;; the difference of calls to (gc-run-time) before and after the execution
201 ;;;; of the benchmark.
203 ;;;; This library provides some standard reporters for logging results
204 ;;;; to a file, reporting interesting results to the user, (FIXME: and
205 ;;;; collecting totals).
207 ;;;; You can use the REGISTER-REPORTER function and friends to add whatever
208 ;;;; reporting functions you like. See under ;;;; TIMING DATA to see how the
209 ;;;; library helps you to extract relevant timing information from the values
210 ;;;; ITERATIONS, BEFORE, AFTER and GC-TIME. If you don't register any
211 ;;;; reporters, the library uses USER-REPORTER, which writes the most
212 ;;;; interesting results to the standard output.
215 ;;;; TIME CALCULATION
217 ;;;; The library uses the guile functions (times) and (gc-run-time) to
218 ;;;; determine the execution time for a single benchmark. Based on these
219 ;;;; functions, the values of BEFORE, AFTER and GC-TIME are computed, which
220 ;;;; are then passed to the reporter functions. All three values BEFORE,
221 ;;;; AFTER and GC-TIME include the time needed to executed the benchmark code
222 ;;;; itself, but also the surrounding code that implements the loop to run the
223 ;;;; benchmark code for the given number of times. This is undesirable, since
224 ;;;; one would prefer to only get the timing data for the benchmarking code.
226 ;;;; To cope with this, the benchmarking framework uses a trick: During
227 ;;;; initialization of the library, the time for executing an empty benchmark
228 ;;;; is measured and stored. This is an estimate for the time needed by the
229 ;;;; benchmarking framework itself. For later benchmarks, this time can then
230 ;;;; be subtracted from the measured execution times.
232 ;;;; In order to simplify the time calculation for users who want to write
233 ;;;; their own reporters, benchmarking framework provides the following
236 ;;;; benchmark-time-base : This variable holds the number of time units that
237 ;;;; make up a second. By deviding the results of each of the functions
238 ;;;; below by this value, you get the corresponding time in seconds. For
239 ;;;; example (/ (benchmark-total-time before after) benchmark-time-base)
240 ;;;; will give you the total time in seconds.
241 ;;;; benchmark-total-time : this function takes two arguments BEFORE and AFTER
242 ;;;; and computes the total time between the two timestamps. The result
243 ;;;; of this function is what the time command of the unix command line
244 ;;;; would report as real time.
245 ;;;; benchmark-user-time : this function takes two arguments BEFORE and AFTER
246 ;;;; and computes the time spent in the benchmarking process between the
247 ;;;; two timestamps. That means, the time consumed by other processes
248 ;;;; running on the same machine is not part of the resulting time,
249 ;;;; neither is time spent within the operating system. The result of
250 ;;;; this function is what the time command of the unix command line would
251 ;;;; report as user time.
252 ;;;; benchmark-system-time : similar to benchmark-user-time, but here the time
253 ;;;; spent within the operating system is given. The result of this
254 ;;;; function is what the time command of the unix command line would
255 ;;;; report as system time.
256 ;;;; benchmark-frame-time : this function takes the argument ITERATIONS. It
257 ;;;; reports the part of the user time that is consumed by the
258 ;;;; benchmarking framework itself to run some benchmark for the giben
259 ;;;; number of iterations. You can think of this as the time that would
260 ;;;; still be consumed, even if the benchmarking code itself was empty.
261 ;;;; This value does not include any time for garbage collection, even if
262 ;;;; it is the benchmarking framework which is responsible for causing a
263 ;;;; garbage collection.
264 ;;;; benchmark-core-time : this function takes three arguments ITERATIONS,
265 ;;;; BEFORE and AFTER. It reports the part of the user time that is
266 ;;;; actually spent within the benchmarking code. That is, the time
267 ;;;; needed for the benchmarking framework is subtracted from the user
268 ;;;; time. This value, however, includes all garbage collection times,
269 ;;;; even if some part of the gc-time had actually to be attributed to the
270 ;;;; benchmarking framework.
271 ;;;; benchmark-user-time\interpreter : this function takes three arguments
272 ;;;; BEFORE AFTER and GC-TIME. It reports the part of the user time that
273 ;;;; is spent in the interpreter (and not in garbage collection).
274 ;;;; benchmark-core-time\interpreter : this function takes four arguments
275 ;;;; ITERATIONS, BEFORE, AFTER. and GC-TIME. It reports the part of the
276 ;;;; benchmark-core-time that is spent in the interpreter (and not in
277 ;;;; garbage collection). This value is most probably the one you are
278 ;;;; interested in, except if you are doing some garbage collection
281 ;;;; There is no function to calculate the garbage-collection time, since the
282 ;;;; garbage collection time is already passed as an argument GC-TIME to the
283 ;;;; reporter functions.
289 ;;; Scale the number of iterations according to the given scaling factor.
290 (define iteration-factor 1)
291 (define (scale-iterations iterations)
292 (let* ((i (inexact->exact (round (* iterations iteration-factor)))))
298 ;;; The central routine for executing benchmarks.
299 ;;; The idea is taken from Greg, the GNUstep regression test environment.
300 (define run-benchmark #f)
301 (let ((benchmark-running #f))
302 (define (local-run-benchmark name iterations thunk)
303 (if benchmark-running
304 (error "Nested calls to run-benchmark are not permitted.")
305 (let ((benchmark-name (full-name name))
306 (iterations (scale-iterations iterations)))
307 (set! benchmark-running #t)
308 (let ((before #f) (after #f) (gc-time #f))
310 (set! gc-time (gc-run-time))
311 (set! before (times))
316 (set! gc-time (- (gc-run-time) gc-time))
317 (report benchmark-name iterations before after gc-time))
318 (set! benchmark-running #f))))
319 (set! run-benchmark local-run-benchmark))
321 ;;; A short form for benchmarks.
322 (defmacro benchmark (name iterations body . rest)
323 `(,run-benchmark ,name ,iterations (lambda () ,body ,@rest)))
329 ;;;; Turn a benchmark name into a nice human-readable string.
330 (define (format-benchmark-name name)
331 (call-with-output-string
333 (let loop ((name name)
337 (display separator port)
338 (display (car name) port)
339 (loop (cdr name) ": ")))))))
341 ;;;; For a given benchmark-name, deliver the full name including all prefixes.
342 (define (full-name name)
343 (append (current-benchmark-prefix) (list name)))
345 ;;; A fluid containing the current benchmark prefix, as a list.
346 (define prefix-fluid (make-fluid))
347 (fluid-set! prefix-fluid '())
348 (define (current-benchmark-prefix)
349 (fluid-ref prefix-fluid))
351 ;;; Postpend PREFIX to the current name prefix while evaluting THUNK.
352 ;;; The name prefix is only changed within the dynamic scope of the
353 ;;; call to with-benchmark-prefix*. Return the value returned by THUNK.
354 (define (with-benchmark-prefix* prefix thunk)
355 (with-fluids ((prefix-fluid
356 (append (fluid-ref prefix-fluid) (list prefix))))
359 ;;; (with-benchmark-prefix PREFIX BODY ...)
360 ;;; Postpend PREFIX to the current name prefix while evaluating BODY ...
361 ;;; The name prefix is only changed within the dynamic scope of the
362 ;;; with-benchmark-prefix expression. Return the value returned by the last
364 (defmacro with-benchmark-prefix (prefix . body)
365 `(with-benchmark-prefix* ,prefix (lambda () ,@body)))
368 ;;;; TIME CALCULATION
371 (define benchmark-time-base
372 internal-time-units-per-second)
374 (define time-base ;; short-cut, not exported
377 (define frame-time/iteration
378 "<will be set during initialization>")
380 (define (benchmark-total-time before after)
381 (- (tms:clock after) (tms:clock before)))
383 (define (benchmark-user-time before after)
384 (- (tms:utime after) (tms:utime before)))
386 (define (benchmark-system-time before after)
387 (- (tms:stime after) (tms:stime before)))
389 (define (benchmark-frame-time iterations)
390 (* iterations frame-time/iteration))
392 (define (benchmark-core-time iterations before after)
393 (- (benchmark-user-time before after) (benchmark-frame-time iterations)))
395 (define (benchmark-user-time\interpreter before after gc-time)
396 (- (benchmark-user-time before after) gc-time))
398 (define (benchmark-core-time\interpreter iterations before after gc-time)
399 (- (benchmark-core-time iterations before after) gc-time))
405 ;;; The global list of reporters.
406 (define reporters '())
408 ;;; The default reporter, to be used only if no others exist.
409 (define default-reporter #f)
411 ;;; Add the procedure REPORTER to the current set of reporter functions.
412 ;;; Signal an error if that reporter procedure object is already registered.
413 (define (register-reporter reporter)
414 (if (memq reporter reporters)
415 (error "register-reporter: reporter already registered: " reporter))
416 (set! reporters (cons reporter reporters)))
418 ;;; Remove the procedure REPORTER from the current set of reporter
419 ;;; functions. Signal an error if REPORTER is not currently registered.
420 (define (unregister-reporter reporter)
421 (if (memq reporter reporters)
422 (set! reporters (delq! reporter reporters))
423 (error "unregister-reporter: reporter not registered: " reporter)))
425 ;;; Return true iff REPORTER is in the current set of reporter functions.
426 (define (reporter-registered? reporter)
427 (if (memq reporter reporters) #t #f))
429 ;;; Send RESULT to all currently registered reporter functions.
430 (define (report . args)
431 (if (pair? reporters)
432 (for-each (lambda (reporter) (apply reporter args))
434 (apply default-reporter args)))
437 ;;;; Some useful standard reporters:
438 ;;;; Log reporters write all benchmark results to a given log file.
439 ;;;; Full reporters write all benchmark results to the standard output.
440 ;;;; User reporters write some interesting results to the standard output.
442 ;;; Display a single benchmark result to the given port
443 (define (print-result port name iterations before after gc-time)
444 (let* ((name (format-benchmark-name name))
445 (total-time (benchmark-total-time before after))
446 (user-time (benchmark-user-time before after))
447 (system-time (benchmark-system-time before after))
448 (frame-time (benchmark-frame-time iterations))
449 (benchmark-time (benchmark-core-time iterations before after))
450 (user-time\interpreter
451 (benchmark-user-time\interpreter before after gc-time))
452 (benchmark-core-time\interpreter
453 (benchmark-core-time\interpreter iterations before after gc-time))
454 (i/ (lambda (a b) (exact->inexact (/ a b)))))
455 (write (list name iterations
456 'total (i/ total-time time-base)
457 'user (i/ user-time time-base)
458 'system (i/ system-time time-base)
459 'frame (i/ frame-time time-base)
460 'benchmark (i/ benchmark-time time-base)
461 'user/interp (i/ user-time\interpreter time-base)
462 'bench/interp (i/ benchmark-core-time\interpreter time-base)
463 'gc (i/ gc-time time-base))
467 ;;; Return a reporter procedure which prints all results to the file
468 ;;; FILE, in human-readable form. FILE may be a filename, or a port.
469 (define (make-log-reporter file)
470 (let ((port (if (output-port? file) file
471 (open-output-file file))))
473 (apply print-result port args)
474 (force-output port))))
476 ;;; A reporter that reports all results to the user.
477 (define (full-reporter . args)
478 (apply print-result (current-output-port) args))
480 ;;; Display interesting results of a single benchmark to the given port
481 (define (print-user-result port name iterations before after gc-time)
482 (let* ((name (format-benchmark-name name))
483 (user-time (benchmark-user-time before after))
484 (benchmark-time (benchmark-core-time iterations before after))
485 (benchmark-core-time\interpreter
486 (benchmark-core-time\interpreter iterations before after gc-time))
487 (i/ (lambda (a b) (exact->inexact (/ a b)))))
488 (write (list name iterations
489 'user (i/ user-time time-base)
490 'benchmark (i/ benchmark-time time-base)
491 'bench/interp (i/ benchmark-core-time\interpreter time-base)
492 'gc (i/ gc-time time-base))
496 ;;; A reporter that reports interesting results to the user.
497 (define (user-reporter . args)
498 (apply print-user-result (current-output-port) args))
501 ;;;; Initialize the benchmarking system:
504 ;;; First, make sure the benchmarking routines are compiled.
505 (define (null-reporter . args) #t)
506 (set! default-reporter null-reporter)
507 (benchmark "empty initialization benchmark" 2 #t)
509 ;;; Second, initialize the system constants
510 (display ";; calibrating the benchmarking framework..." (current-output-port))
511 (newline (current-output-port))
512 (define (initialization-reporter name iterations before after gc-time)
513 (let* ((frame-time (- (tms:utime after) (tms:utime before) gc-time 3)))
514 (set! frame-time/iteration (/ frame-time iterations))
515 (display ";; framework time per iteration: " (current-output-port))
516 (display (/ frame-time/iteration time-base) (current-output-port))
517 (newline (current-output-port))))
518 (set! default-reporter initialization-reporter)
519 (benchmark "empty initialization benchmark" 524288 #t)
521 ;;; Finally, set the default reporter
522 (set! default-reporter user-reporter)