1 ;;;; (statprof) -- a statistical profiler for Guile
4 ;;;; Copyright (C) 2009, 2010 Free Software Foundation, Inc.
5 ;;;; Copyright (C) 2004, 2009 Andy Wingo <wingo at pobox dot com>
6 ;;;; Copyright (C) 2001 Rob Browning <rlb at defaultvalue dot org>
8 ;;;; This library is free software; you can redistribute it and/or
9 ;;;; modify it under the terms of the GNU Lesser General Public
10 ;;;; License as published by the Free Software Foundation; either
11 ;;;; version 3 of the License, or (at your option) any later version.
13 ;;;; This library is distributed in the hope that it will be useful,
14 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 ;;;; Lesser General Public License for more details.
18 ;;;; You should have received a copy of the GNU Lesser General Public
19 ;;;; License along with this library; if not, write to the Free Software
20 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 ;;@code{(statprof)} is intended to be a fairly simple
27 ;;statistical profiler for guile. It is in the early stages yet, so
28 ;;consider its output still suspect, and please report any bugs to
29 ;;@email{guile-devel at gnu.org}, or to me directly at @email{rlb at
32 ;;A simple use of statprof would look like this:
35 ;; (statprof-reset 0 50000 #t)
42 ;;This would reset statprof, clearing all accumulated statistics, then
43 ;;start profiling, run some code, stop profiling, and finally display a
44 ;;gprof flat-style table of statistics which will look something like
48 ;; % cumulative self self total
49 ;; time seconds seconds calls ms/call ms/call name
50 ;; 35.29 0.23 0.23 2002 0.11 0.11 -
51 ;; 23.53 0.15 0.15 2001 0.08 0.08 positive?
52 ;; 23.53 0.15 0.15 2000 0.08 0.08 +
53 ;; 11.76 0.23 0.08 2000 0.04 0.11 do-nothing
54 ;; 5.88 0.64 0.04 2001 0.02 0.32 loop
55 ;; 0.00 0.15 0.00 1 0.00 150.59 do-something
59 ;;All of the numerical data with the exception of the calls column is
60 ;;statistically approximate. In the following column descriptions, and
61 ;;in all of statprof, "time" refers to execution time (both user and
62 ;;system), not wall clock time.
66 ;;The percent of the time spent inside the procedure itself
67 ;;(not counting children).
68 ;;@item cumulative seconds
69 ;;The total number of seconds spent in the procedure, including
72 ;;The total number of seconds spent in the procedure itself (not counting
75 ;;The total number of times the procedure was called.
77 ;;The average time taken by the procedure itself on each call, in ms.
79 ;;The average time taken by each call to the procedure, including time
80 ;;spent in child functions.
82 ;;The name of the procedure.
85 ;;The profiler uses @code{eq?} and the procedure object itself to
86 ;;identify the procedures, so it won't confuse different procedures with
87 ;;the same name. They will show up as two different rows in the output.
89 ;;Right now the profiler is quite simplistic. I cannot provide
90 ;;call-graphs or other higher level information. What you see in the
91 ;;table is pretty much all there is. Patches are welcome :-)
93 ;;@section Implementation notes
95 ;;The profiler works by setting the unix profiling signal
96 ;;@code{ITIMER_PROF} to go off after the interval you define in the call
97 ;;to @code{statprof-reset}. When the signal fires, a sampling routine is
98 ;;run which looks at the current procedure that's executing, and then
99 ;;crawls up the stack, and for each procedure encountered, increments
100 ;;that procedure's sample count. Note that if a procedure is encountered
101 ;;multiple times on a given stack, it is only counted once. After the
102 ;;sampling is complete, the profiler resets profiling timer to fire
103 ;;again after the appropriate interval.
105 ;;Meanwhile, the profiler keeps track, via @code{get-internal-run-time},
106 ;;how much CPU time (system and user -- which is also what
107 ;;@code{ITIMER_PROF} tracks), has elapsed while code has been executing
108 ;;within a statprof-start/stop block.
110 ;;The profiler also tries to avoid counting or timing its own code as
115 ;; When you add new features, please also add tests to ./tests/ if you
116 ;; have time, and then add the new files to ./run-tests. Also, if
117 ;; anyone's bored, there are a lot of existing API bits that don't
122 ;; Check about profiling C functions -- does profiling primitives work?
123 ;; Also look into stealing code from qprof so we can sample the C stack
126 (define-module (statprof)
127 #:use-module (srfi srfi-1)
128 #:autoload (ice-9 format) (format)
129 #:use-module (system vm vm)
130 #:use-module (system vm frame)
131 #:use-module (system vm program)
132 #:export (statprof-active?
137 statprof-accumulated-time
138 statprof-sample-count
139 statprof-fold-call-data
140 statprof-proc-call-data
141 statprof-call-data-name
142 statprof-call-data-calls
143 statprof-call-data-cum-samples
144 statprof-call-data-self-samples
145 statprof-call-data->stats
147 statprof-stats-proc-name
148 statprof-stats-%-time-in-proc
149 statprof-stats-cum-secs-in-proc
150 statprof-stats-self-secs-in-proc
152 statprof-stats-self-secs-per-call
153 statprof-stats-cum-secs-per-call
156 statprof-display-anomolies
158 statprof-fetch-stacks
159 statprof-fetch-call-tree
165 ;; This profiler tracks two numbers for every function called while
166 ;; it's active. It tracks the total number of calls, and the number
167 ;; of times the function was active when the sampler fired.
169 ;; Globally the profiler tracks the total time elapsed and the number
170 ;; of times the sampler was fired.
172 ;; Right now, this profiler is not per-thread and is not thread safe.
174 (define accumulated-time #f) ; total so far.
175 (define last-start-time #f) ; start-time when timer is active.
176 (define sample-count #f) ; total count of sampler calls.
177 (define sampling-frequency #f) ; in (seconds . microseconds)
178 (define remaining-prof-time #f) ; time remaining when prof suspended.
179 (define profile-level 0) ; for user start/stop nesting.
180 (define %count-calls? #t) ; whether to catch apply-frame.
181 (define gc-time-taken 0) ; gc time between statprof-start and
183 (define record-full-stacks? #f) ; if #t, stash away the stacks
184 ; for later analysis.
187 ;; procedure-data will be a hash where the key is the function object
188 ;; itself and the value is the data. The data will be a vector like
189 ;; this: #(name call-count cum-sample-count self-sample-count)
190 (define procedure-data #f)
192 ;; If you change the call-data data structure, you need to also change
193 ;; sample-uncount-frame.
194 (define (make-call-data proc call-count cum-sample-count self-sample-count)
195 (vector proc call-count cum-sample-count self-sample-count))
196 (define (call-data-proc cd) (vector-ref cd 0))
197 (define (call-data-name cd) (procedure-name (call-data-proc cd)))
198 (define (call-data-printable cd)
199 (or (call-data-name cd)
200 (with-output-to-string (lambda () (write (call-data-proc cd))))))
201 (define (call-data-call-count cd) (vector-ref cd 1))
202 (define (call-data-cum-sample-count cd) (vector-ref cd 2))
203 (define (call-data-self-sample-count cd) (vector-ref cd 3))
205 (define (inc-call-data-call-count! cd)
206 (vector-set! cd 1 (1+ (vector-ref cd 1))))
207 (define (inc-call-data-cum-sample-count! cd)
208 (vector-set! cd 2 (1+ (vector-ref cd 2))))
209 (define (inc-call-data-self-sample-count! cd)
210 (vector-set! cd 3 (1+ (vector-ref cd 3))))
212 (define-macro (accumulate-time stop-time)
213 `(set! accumulated-time
214 (+ accumulated-time 0.0 (- ,stop-time last-start-time))))
216 (define (get-call-data proc)
217 (let ((k (if (or (not (program? proc))
218 (zero? (program-num-free-variables proc)))
220 (program-objcode proc))))
221 (or (hashq-ref procedure-data k)
222 (let ((call-data (make-call-data proc 0 0 0)))
223 (hashq-set! procedure-data k call-data)
226 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
229 (define (sample-stack-procs stack)
230 (let ((stacklen (stack-length stack))
231 (hit-count-call? #f))
233 (if record-full-stacks?
234 (set! stacks (cons stack stacks)))
236 (set! sample-count (+ sample-count 1))
237 ;; Now accumulate stats for the whole stack.
238 (let loop ((frame (stack-ref stack 0))
239 (procs-seen (make-hash-table 13))
244 (lambda (proc val accum)
245 (inc-call-data-cum-sample-count!
246 (get-call-data proc)))
249 (and=> (and=> self get-call-data)
250 inc-call-data-self-sample-count!))
251 ((frame-procedure frame)
254 ((eq? proc count-call)
255 ;; We're not supposed to be sampling count-call and
256 ;; its sub-functions, so loop again with a clean
258 (set! hit-count-call? #t)
259 (loop (frame-previous frame) (make-hash-table 13) #f))
261 (hashq-set! procs-seen proc #t)
262 (loop (frame-previous frame)
266 (loop (frame-previous frame) procs-seen self))))
269 (define inside-profiler? #f)
271 (define (profile-signal-handler sig)
272 (set! inside-profiler? #t)
274 ;; FIXME: with-statprof should be able to set an outer frame for the
276 (if (positive? profile-level)
277 (let* ((stop-time (get-internal-run-time))
278 ;; cut down to the signal handler. note that this will only
279 ;; work if statprof.scm is compiled; otherwise we get
280 ;; `eval' on the stack instead, because if it's not
281 ;; compiled, profile-signal-handler is a thunk that
282 ;; tail-calls eval. perhaps we should always compile the
283 ;; signal handler instead...
284 (stack (or (make-stack #t profile-signal-handler)
285 (pk 'what! (make-stack #t))))
286 (inside-apply-trap? (sample-stack-procs stack)))
288 (if (not inside-apply-trap?)
290 ;; disabling here is just a little more efficient, but
291 ;; not necessary given inside-profiler?. We can't just
292 ;; disable unconditionally at the top of this function
293 ;; and eliminate inside-profiler? because it seems to
294 ;; confuse guile wrt re-enabling the trap when
295 ;; count-call finishes.
297 (set-vm-trace-level! (the-vm)
298 (1- (vm-trace-level (the-vm)))))
299 (accumulate-time stop-time)))
301 (setitimer ITIMER_PROF
303 (car sampling-frequency)
304 (cdr sampling-frequency))
306 (if (not inside-apply-trap?)
308 (set! last-start-time (get-internal-run-time))
310 (set-vm-trace-level! (the-vm)
311 (1+ (vm-trace-level (the-vm)))))))))
313 (set! inside-profiler? #f))
315 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
316 ;; Count total calls.
318 (define (count-call frame)
319 (if (not inside-profiler?)
321 (accumulate-time (get-internal-run-time))
323 (and=> (frame-procedure frame)
325 (inc-call-data-call-count!
326 (get-call-data proc))))
328 (set! last-start-time (get-internal-run-time)))))
330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
332 (define (statprof-active?)
333 "Returns @code{#t} if @code{statprof-start} has been called more times
334 than @code{statprof-stop}, @code{#f} otherwise."
335 (positive? profile-level))
337 ;; Do not call this from statprof internal functions -- user only.
338 (define (statprof-start)
339 "Start the profiler.@code{}"
340 ;; After some head-scratching, I don't *think* I need to mask/unmask
341 ;; signals here, but if I'm wrong, please let me know.
342 (set! profile-level (+ profile-level 1))
343 (if (= profile-level 1)
344 (let* ((rpt remaining-prof-time)
346 (or (positive? (car rpt))
347 (positive? (cdr rpt))))))
348 (set! remaining-prof-time #f)
349 (set! last-start-time (get-internal-run-time))
351 (cdr (assq 'gc-time-taken (gc-stats))))
353 (setitimer ITIMER_PROF 0 0 (car rpt) (cdr rpt))
354 (setitimer ITIMER_PROF
356 (car sampling-frequency)
357 (cdr sampling-frequency)))
359 (add-hook! (vm-apply-hook (the-vm)) count-call))
360 (set-vm-trace-level! (the-vm) (1+ (vm-trace-level (the-vm))))
363 ;; Do not call this from statprof internal functions -- user only.
364 (define (statprof-stop)
365 "Stop the profiler.@code{}"
366 ;; After some head-scratching, I don't *think* I need to mask/unmask
367 ;; signals here, but if I'm wrong, please let me know.
368 (set! profile-level (- profile-level 1))
369 (if (zero? profile-level)
372 (- (cdr (assq 'gc-time-taken (gc-stats))) gc-time-taken))
373 (set-vm-trace-level! (the-vm) (1- (vm-trace-level (the-vm))))
375 (remove-hook! (vm-apply-hook (the-vm)) count-call))
376 ;; I believe that we need to do this before getting the time
377 ;; (unless we want to make things even more complicated).
378 (set! remaining-prof-time (setitimer ITIMER_PROF 0 0 0 0))
379 (accumulate-time (get-internal-run-time))
380 (set! last-start-time #f))))
382 (define (statprof-reset sample-seconds sample-microseconds count-calls?
384 "Reset the statprof sampler interval to @var{sample-seconds} and
385 @var{sample-microseconds}. If @var{count-calls?} is true, arrange to
386 instrument procedure calls as well as collecting statistical profiling
387 data. If @var{full-stacks?} is true, collect all sampled stacks into a
388 list for later analysis.
390 Enables traps and debugging as necessary."
391 (if (positive? profile-level)
392 (error "Can't reset profiler while profiler is running."))
393 (set! %count-calls? count-calls?)
394 (set! accumulated-time 0)
395 (set! last-start-time #f)
396 (set! sample-count 0)
397 (set! sampling-frequency (cons sample-seconds sample-microseconds))
398 (set! remaining-prof-time #f)
399 (set! procedure-data (make-hash-table 131))
400 (set! record-full-stacks? (and (pair? full-stacks?) (car full-stacks?)))
402 (debug-enable 'debug)
403 (sigaction SIGPROF profile-signal-handler)
406 (define (statprof-fold-call-data proc init)
407 "Fold @var{proc} over the call-data accumulated by statprof. Cannot be
408 called while statprof is active. @var{proc} should take two arguments,
409 @code{(@var{call-data} @var{prior-result})}.
411 Note that a given proc-name may appear multiple times, but if it does,
412 it represents different functions with the same name."
413 (if (positive? profile-level)
414 (error "Can't call statprof-fold-called while profiler is running."))
417 (lambda (key value prior-result)
418 (proc value prior-result))
422 (define (statprof-proc-call-data proc)
423 "Returns the call-data associated with @var{proc}, or @code{#f} if
425 (if (positive? profile-level)
426 (error "Can't call statprof-fold-called while profiler is running."))
428 (hashq-ref procedure-data proc))
430 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
433 (define (statprof-call-data->stats call-data)
434 "Returns an object of type @code{statprof-stats}."
435 ;; returns (vector proc-name
437 ;; cum-seconds-in-proc
438 ;; self-seconds-in-proc
440 ;; self-secs-per-call
441 ;; total-secs-per-call)
443 (let* ((proc-name (call-data-printable call-data))
444 (self-samples (call-data-self-sample-count call-data))
445 (cum-samples (call-data-cum-sample-count call-data))
446 (all-samples (statprof-sample-count))
447 (secs-per-sample (/ (statprof-accumulated-time)
448 (statprof-sample-count)))
449 (num-calls (and %count-calls? (statprof-call-data-calls call-data))))
452 (* (/ self-samples all-samples) 100.0)
453 (* cum-samples secs-per-sample 1.0)
454 (* self-samples secs-per-sample 1.0)
456 (and num-calls ;; maybe we only sampled in children
457 (if (zero? self-samples) 0.0
458 (/ (* self-samples secs-per-sample) 1.0 num-calls)))
459 (and num-calls ;; cum-samples must be positive
460 (/ (* cum-samples secs-per-sample)
462 ;; num-calls might be 0 if we entered statprof during the
463 ;; dynamic extent of the call
464 (max num-calls 1))))))
466 (define (statprof-stats-proc-name stats) (vector-ref stats 0))
467 (define (statprof-stats-%-time-in-proc stats) (vector-ref stats 1))
468 (define (statprof-stats-cum-secs-in-proc stats) (vector-ref stats 2))
469 (define (statprof-stats-self-secs-in-proc stats) (vector-ref stats 3))
470 (define (statprof-stats-calls stats) (vector-ref stats 4))
471 (define (statprof-stats-self-secs-per-call stats) (vector-ref stats 5))
472 (define (statprof-stats-cum-secs-per-call stats) (vector-ref stats 6))
474 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
476 (define (stats-sorter x y)
477 (let ((diff (- (statprof-stats-self-secs-in-proc x)
478 (statprof-stats-self-secs-in-proc y))))
481 (- (statprof-stats-cum-secs-in-proc x)
482 (statprof-stats-cum-secs-in-proc y))
485 (define (statprof-display . port)
486 "Displays a gprof-like summary of the statistics collected. Unless an
487 optional @var{port} argument is passed, uses the current output port."
488 (if (null? port) (set! port (current-output-port)))
491 ((zero? (statprof-sample-count))
492 (format port "No samples recorded.\n"))
494 (let* ((stats-list (statprof-fold-call-data
495 (lambda (data prior-value)
496 (cons (statprof-call-data->stats data)
499 (sorted-stats (sort stats-list stats-sorter)))
501 (define (display-stats-line stats)
503 (format port "~6,2f ~9,2f ~9,2f ~7d ~8,2f ~8,2f "
504 (statprof-stats-%-time-in-proc stats)
505 (statprof-stats-cum-secs-in-proc stats)
506 (statprof-stats-self-secs-in-proc stats)
507 (statprof-stats-calls stats)
508 (* 1000 (statprof-stats-self-secs-per-call stats))
509 (* 1000 (statprof-stats-cum-secs-per-call stats)))
510 (format port "~6,2f ~9,2f ~9,2f "
511 (statprof-stats-%-time-in-proc stats)
512 (statprof-stats-cum-secs-in-proc stats)
513 (statprof-stats-self-secs-in-proc stats)))
514 (display (statprof-stats-proc-name stats) port)
519 (format port "~5a ~10a ~7a ~8a ~8a ~8a ~8@a\n"
520 "% " "cumulative" "self" "" "self" "total" "")
521 (format port "~5a ~9a ~8a ~8a ~8a ~8a ~8@a\n"
522 "time" "seconds" "seconds" "calls" "ms/call" "ms/call" "name"))
524 (format port "~5a ~10a ~7a ~8@a\n"
525 "%" "cumulative" "self" "")
526 (format port "~5a ~10a ~7a ~8@a\n"
527 "time" "seconds" "seconds" "name")))
529 (for-each display-stats-line sorted-stats)
531 (display "---\n" port)
532 (simple-format #t "Sample count: ~A\n" (statprof-sample-count))
533 (simple-format #t "Total time: ~A seconds (~A seconds in GC)\n"
534 (statprof-accumulated-time)
535 (/ gc-time-taken internal-time-units-per-second))))))
537 (define (statprof-display-anomolies)
538 "A sanity check that attempts to detect anomolies in statprof's
540 (statprof-fold-call-data
541 (lambda (data prior-value)
542 (if (and %count-calls?
543 (zero? (call-data-call-count data))
544 (positive? (call-data-cum-sample-count data)))
547 (call-data-name data)
548 (call-data-call-count data)
549 (call-data-cum-sample-count data))))
551 (simple-format #t "Total time: ~A\n" (statprof-accumulated-time))
552 (simple-format #t "Sample count: ~A\n" (statprof-sample-count)))
554 (define (statprof-accumulated-time)
555 "Returns the time accumulated during the last statprof run.@code{}"
556 (if (positive? profile-level)
557 (error "Can't get accumulated time while profiler is running."))
558 (/ accumulated-time internal-time-units-per-second))
560 (define (statprof-sample-count)
561 "Returns the number of samples taken during the last statprof run.@code{}"
562 (if (positive? profile-level)
563 (error "Can't get accumulated time while profiler is running."))
566 (define statprof-call-data-name call-data-name)
567 (define statprof-call-data-calls call-data-call-count)
568 (define statprof-call-data-cum-samples call-data-cum-sample-count)
569 (define statprof-call-data-self-samples call-data-self-sample-count)
571 (define (statprof-fetch-stacks)
572 "Returns a list of stacks, as they were captured since the last call
573 to @code{statprof-reset}.
575 Note that stacks are only collected if the @var{full-stacks?} argument
576 to @code{statprof-reset} is true."
583 ((and (program? a) (program? b))
584 (eq? (program-objcode a) (program-objcode b)))
588 ;; tree ::= (car n . tree*)
590 (define (lists->trees lists equal?)
591 (let lp ((in lists) (n-terminal 0) (tails '()))
594 (let ((trees (map (lambda (tail)
596 (lists->trees (cdr tail) equal?)))
598 (cons (apply + n-terminal (map cadr trees))
600 (lambda (a b) (> (cadr a) (cadr b)))))))
602 (lp (cdr in) (1+ n-terminal) tails))
603 ((find (lambda (x) (equal? (car x) (caar in)))
610 (cons (cdar in) (cdr tail))))))
614 (acons (caar in) (list (cdar in)) tails))))))
616 (define (stack->procedures stack)
618 (unfold-right (lambda (x) (not x))
621 (stack-ref stack 0))))
623 (define (statprof-fetch-call-tree)
624 "Return a call tree for the previous statprof run.
626 The return value is a list of nodes, each of which is of the type:
628 node ::= (@var{proc} @var{count} . @var{nodes})
630 (cons #t (lists->trees (map stack->procedures stacks) procedure=?)))
632 (define* (statprof thunk #:key (loop 1) (hz 100) (count-calls? #f)
634 "Profiles the execution of @var{thunk}.
636 The stack will be sampled @var{hz} times per second, and the thunk itself will
637 be called @var{loop} times.
639 If @var{count-calls?} is true, all procedure calls will be recorded. This
640 operation is somewhat expensive.
642 If @var{full-stacks?} is true, at each sample, statprof will store away the
643 whole call tree, for later analysis. Use @code{statprof-fetch-stacks} or
644 @code{statprof-fetch-call-tree} to retrieve the last-stored stacks."
648 (statprof-reset (inexact->exact (floor (/ 1 hz)))
649 (inexact->exact (* 1e6 (- (/ 1 hz)
663 (set! procedure-data #f))))
665 (define-macro (with-statprof . args)
666 "Profiles the expressions in its body.
672 Execute the body @var{loop} number of times, or @code{#f} for no looping
680 Whether to instrument each function call (expensive)
684 Whether to collect away all sampled stacks into a list
688 (define (kw-arg-ref kw args def)
690 ((null? args) (error "Invalid macro body"))
691 ((keyword? (car args))
692 (if (eq? (car args) kw)
694 (kw-arg-ref kw (cddr args) def)))
695 ((eq? kw #f def) ;; asking for the body
697 (else def))) ;; kw not found
698 `((@ (statprof) statprof)
699 (lambda () ,@(kw-arg-ref #f args #f))
700 #:loop ,(kw-arg-ref #:loop args 1)
701 #:hz ,(kw-arg-ref #:hz args 100)
702 #:count-calls? ,(kw-arg-ref #:count-calls? args #f)
703 #:full-stacks? ,(kw-arg-ref #:full-stacks? args #f)))