| 1 | ;;;; (statprof) -- a statistical profiler for Guile |
| 2 | ;;;; -*-scheme-*- |
| 3 | ;;;; |
| 4 | ;;;; Copyright (C) 2009, 2010, 2011, 2013 Free Software Foundation, Inc. |
| 5 | ;;;; Copyright (C) 2004, 2009 Andy Wingo <wingo at pobox dot com> |
| 6 | ;;;; Copyright (C) 2001 Rob Browning <rlb at defaultvalue dot org> |
| 7 | ;;;; |
| 8 | ;;;; This library is free software; you can redistribute it and/or |
| 9 | ;;;; modify it under the terms of the GNU Lesser General Public |
| 10 | ;;;; License as published by the Free Software Foundation; either |
| 11 | ;;;; version 3 of the License, or (at your option) any later version. |
| 12 | ;;;; |
| 13 | ;;;; This library is distributed in the hope that it will be useful, |
| 14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 | ;;;; Lesser General Public License for more details. |
| 17 | ;;;; |
| 18 | ;;;; You should have received a copy of the GNU Lesser General Public |
| 19 | ;;;; License along with this library; if not, write to the Free Software |
| 20 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 21 | ;;;; |
| 22 | \f |
| 23 | |
| 24 | ;;; Commentary: |
| 25 | ;; |
| 26 | ;;@code{(statprof)} is intended to be a fairly simple |
| 27 | ;;statistical profiler for guile. It is in the early stages yet, so |
| 28 | ;;consider its output still suspect, and please report any bugs to |
| 29 | ;;@email{guile-devel at gnu.org}, or to me directly at @email{rlb at |
| 30 | ;;defaultvalue.org}. |
| 31 | ;; |
| 32 | ;;A simple use of statprof would look like this: |
| 33 | ;; |
| 34 | ;;@example |
| 35 | ;; (statprof-reset 0 50000 #t) |
| 36 | ;; (statprof-start) |
| 37 | ;; (do-something) |
| 38 | ;; (statprof-stop) |
| 39 | ;; (statprof-display) |
| 40 | ;;@end example |
| 41 | ;; |
| 42 | ;;This would reset statprof, clearing all accumulated statistics, then |
| 43 | ;;start profiling, run some code, stop profiling, and finally display a |
| 44 | ;;gprof flat-style table of statistics which will look something like |
| 45 | ;;this: |
| 46 | ;; |
| 47 | ;;@example |
| 48 | ;; % cumulative self self total |
| 49 | ;; time seconds seconds calls ms/call ms/call name |
| 50 | ;; 35.29 0.23 0.23 2002 0.11 0.11 - |
| 51 | ;; 23.53 0.15 0.15 2001 0.08 0.08 positive? |
| 52 | ;; 23.53 0.15 0.15 2000 0.08 0.08 + |
| 53 | ;; 11.76 0.23 0.08 2000 0.04 0.11 do-nothing |
| 54 | ;; 5.88 0.64 0.04 2001 0.02 0.32 loop |
| 55 | ;; 0.00 0.15 0.00 1 0.00 150.59 do-something |
| 56 | ;; ... |
| 57 | ;;@end example |
| 58 | ;; |
| 59 | ;;All of the numerical data with the exception of the calls column is |
| 60 | ;;statistically approximate. In the following column descriptions, and |
| 61 | ;;in all of statprof, "time" refers to execution time (both user and |
| 62 | ;;system), not wall clock time. |
| 63 | ;; |
| 64 | ;;@table @asis |
| 65 | ;;@item % time |
| 66 | ;;The percent of the time spent inside the procedure itself |
| 67 | ;;(not counting children). |
| 68 | ;;@item cumulative seconds |
| 69 | ;;The total number of seconds spent in the procedure, including |
| 70 | ;;children. |
| 71 | ;;@item self seconds |
| 72 | ;;The total number of seconds spent in the procedure itself (not counting |
| 73 | ;;children). |
| 74 | ;;@item calls |
| 75 | ;;The total number of times the procedure was called. |
| 76 | ;;@item self ms/call |
| 77 | ;;The average time taken by the procedure itself on each call, in ms. |
| 78 | ;;@item total ms/call |
| 79 | ;;The average time taken by each call to the procedure, including time |
| 80 | ;;spent in child functions. |
| 81 | ;;@item name |
| 82 | ;;The name of the procedure. |
| 83 | ;;@end table |
| 84 | ;; |
| 85 | ;;The profiler uses @code{eq?} and the procedure object itself to |
| 86 | ;;identify the procedures, so it won't confuse different procedures with |
| 87 | ;;the same name. They will show up as two different rows in the output. |
| 88 | ;; |
| 89 | ;;Right now the profiler is quite simplistic. I cannot provide |
| 90 | ;;call-graphs or other higher level information. What you see in the |
| 91 | ;;table is pretty much all there is. Patches are welcome :-) |
| 92 | ;; |
| 93 | ;;@section Implementation notes |
| 94 | ;; |
| 95 | ;;The profiler works by setting the unix profiling signal |
| 96 | ;;@code{ITIMER_PROF} to go off after the interval you define in the call |
| 97 | ;;to @code{statprof-reset}. When the signal fires, a sampling routine is |
| 98 | ;;run which looks at the current procedure that's executing, and then |
| 99 | ;;crawls up the stack, and for each procedure encountered, increments |
| 100 | ;;that procedure's sample count. Note that if a procedure is encountered |
| 101 | ;;multiple times on a given stack, it is only counted once. After the |
| 102 | ;;sampling is complete, the profiler resets profiling timer to fire |
| 103 | ;;again after the appropriate interval. |
| 104 | ;; |
| 105 | ;;Meanwhile, the profiler keeps track, via @code{get-internal-run-time}, |
| 106 | ;;how much CPU time (system and user -- which is also what |
| 107 | ;;@code{ITIMER_PROF} tracks), has elapsed while code has been executing |
| 108 | ;;within a statprof-start/stop block. |
| 109 | ;; |
| 110 | ;;The profiler also tries to avoid counting or timing its own code as |
| 111 | ;;much as possible. |
| 112 | ;; |
| 113 | ;;; Code: |
| 114 | |
| 115 | ;; When you add new features, please also add tests to ./tests/ if you |
| 116 | ;; have time, and then add the new files to ./run-tests. Also, if |
| 117 | ;; anyone's bored, there are a lot of existing API bits that don't |
| 118 | ;; have tests yet. |
| 119 | |
| 120 | ;; TODO |
| 121 | ;; |
| 122 | ;; Check about profiling C functions -- does profiling primitives work? |
| 123 | ;; Also look into stealing code from qprof so we can sample the C stack |
| 124 | ;; Call graphs? |
| 125 | |
| 126 | (define-module (statprof) |
| 127 | #:use-module (srfi srfi-1) |
| 128 | #:autoload (ice-9 format) (format) |
| 129 | #:use-module (system vm vm) |
| 130 | #:use-module (system vm frame) |
| 131 | #:use-module (system vm program) |
| 132 | #:export (statprof-active? |
| 133 | statprof-start |
| 134 | statprof-stop |
| 135 | statprof-reset |
| 136 | |
| 137 | statprof-accumulated-time |
| 138 | statprof-sample-count |
| 139 | statprof-fold-call-data |
| 140 | statprof-proc-call-data |
| 141 | statprof-call-data-name |
| 142 | statprof-call-data-calls |
| 143 | statprof-call-data-cum-samples |
| 144 | statprof-call-data-self-samples |
| 145 | statprof-call-data->stats |
| 146 | |
| 147 | statprof-stats-proc-name |
| 148 | statprof-stats-%-time-in-proc |
| 149 | statprof-stats-cum-secs-in-proc |
| 150 | statprof-stats-self-secs-in-proc |
| 151 | statprof-stats-calls |
| 152 | statprof-stats-self-secs-per-call |
| 153 | statprof-stats-cum-secs-per-call |
| 154 | |
| 155 | statprof-display |
| 156 | statprof-display-anomolies |
| 157 | |
| 158 | statprof-fetch-stacks |
| 159 | statprof-fetch-call-tree |
| 160 | |
| 161 | statprof |
| 162 | with-statprof |
| 163 | |
| 164 | gcprof)) |
| 165 | |
| 166 | |
| 167 | ;; This profiler tracks two numbers for every function called while |
| 168 | ;; it's active. It tracks the total number of calls, and the number |
| 169 | ;; of times the function was active when the sampler fired. |
| 170 | ;; |
| 171 | ;; Globally the profiler tracks the total time elapsed and the number |
| 172 | ;; of times the sampler was fired. |
| 173 | ;; |
| 174 | ;; Right now, this profiler is not per-thread and is not thread safe. |
| 175 | |
| 176 | (define accumulated-time #f) ; total so far. |
| 177 | (define last-start-time #f) ; start-time when timer is active. |
| 178 | (define sample-count #f) ; total count of sampler calls. |
| 179 | (define sampling-frequency #f) ; in (seconds . microseconds) |
| 180 | (define remaining-prof-time #f) ; time remaining when prof suspended. |
| 181 | (define profile-level 0) ; for user start/stop nesting. |
| 182 | (define %count-calls? #t) ; whether to catch apply-frame. |
| 183 | (define gc-time-taken 0) ; gc time between statprof-start and |
| 184 | ; statprof-stop. |
| 185 | (define record-full-stacks? #f) ; if #t, stash away the stacks |
| 186 | ; for later analysis. |
| 187 | (define stacks '()) |
| 188 | |
| 189 | ;; procedure-data will be a hash where the key is the function object |
| 190 | ;; itself and the value is the data. The data will be a vector like |
| 191 | ;; this: #(name call-count cum-sample-count self-sample-count) |
| 192 | (define procedure-data #f) |
| 193 | |
| 194 | ;; If you change the call-data data structure, you need to also change |
| 195 | ;; sample-uncount-frame. |
| 196 | (define (make-call-data proc call-count cum-sample-count self-sample-count) |
| 197 | (vector proc call-count cum-sample-count self-sample-count)) |
| 198 | (define (call-data-proc cd) (vector-ref cd 0)) |
| 199 | (define (call-data-name cd) (procedure-name (call-data-proc cd))) |
| 200 | (define (call-data-printable cd) |
| 201 | (or (call-data-name cd) |
| 202 | (with-output-to-string (lambda () (write (call-data-proc cd)))))) |
| 203 | (define (call-data-call-count cd) (vector-ref cd 1)) |
| 204 | (define (call-data-cum-sample-count cd) (vector-ref cd 2)) |
| 205 | (define (call-data-self-sample-count cd) (vector-ref cd 3)) |
| 206 | |
| 207 | (define (inc-call-data-call-count! cd) |
| 208 | (vector-set! cd 1 (1+ (vector-ref cd 1)))) |
| 209 | (define (inc-call-data-cum-sample-count! cd) |
| 210 | (vector-set! cd 2 (1+ (vector-ref cd 2)))) |
| 211 | (define (inc-call-data-self-sample-count! cd) |
| 212 | (vector-set! cd 3 (1+ (vector-ref cd 3)))) |
| 213 | |
| 214 | (define-macro (accumulate-time stop-time) |
| 215 | `(set! accumulated-time |
| 216 | (+ accumulated-time 0.0 (- ,stop-time last-start-time)))) |
| 217 | |
| 218 | (define (get-call-data proc) |
| 219 | (let ((k (cond |
| 220 | ((rtl-program? proc) (rtl-program-code proc)) |
| 221 | (else proc)))) |
| 222 | (or (hashv-ref procedure-data k) |
| 223 | (let ((call-data (make-call-data proc 0 0 0))) |
| 224 | (hashv-set! procedure-data k call-data) |
| 225 | call-data)))) |
| 226 | |
| 227 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 228 | ;; SIGPROF handler |
| 229 | |
| 230 | (define (sample-stack-procs stack) |
| 231 | (let ((stacklen (stack-length stack)) |
| 232 | (hit-count-call? #f)) |
| 233 | |
| 234 | (if record-full-stacks? |
| 235 | (set! stacks (cons stack stacks))) |
| 236 | |
| 237 | (set! sample-count (+ sample-count 1)) |
| 238 | ;; Now accumulate stats for the whole stack. |
| 239 | (let loop ((frame (stack-ref stack 0)) |
| 240 | (procs-seen (make-hash-table 13)) |
| 241 | (self #f)) |
| 242 | (cond |
| 243 | ((not frame) |
| 244 | (hash-fold |
| 245 | (lambda (proc val accum) |
| 246 | (inc-call-data-cum-sample-count! |
| 247 | (get-call-data proc))) |
| 248 | #f |
| 249 | procs-seen) |
| 250 | (and=> (and=> self get-call-data) |
| 251 | inc-call-data-self-sample-count!)) |
| 252 | ((frame-procedure frame) |
| 253 | => (lambda (proc) |
| 254 | (cond |
| 255 | ((eq? proc count-call) |
| 256 | ;; We're not supposed to be sampling count-call and |
| 257 | ;; its sub-functions, so loop again with a clean |
| 258 | ;; slate. |
| 259 | (set! hit-count-call? #t) |
| 260 | (loop (frame-previous frame) (make-hash-table 13) #f)) |
| 261 | (else |
| 262 | (hashq-set! procs-seen proc #t) |
| 263 | (loop (frame-previous frame) |
| 264 | procs-seen |
| 265 | (or self proc)))))) |
| 266 | (else |
| 267 | (loop (frame-previous frame) procs-seen self)))) |
| 268 | hit-count-call?)) |
| 269 | |
| 270 | (define inside-profiler? #f) |
| 271 | |
| 272 | (define (profile-signal-handler sig) |
| 273 | (set! inside-profiler? #t) |
| 274 | |
| 275 | ;; FIXME: with-statprof should be able to set an outer frame for the |
| 276 | ;; stack cut |
| 277 | (if (positive? profile-level) |
| 278 | (let* ((stop-time (get-internal-run-time)) |
| 279 | ;; cut down to the signal handler. note that this will only |
| 280 | ;; work if statprof.scm is compiled; otherwise we get |
| 281 | ;; `eval' on the stack instead, because if it's not |
| 282 | ;; compiled, profile-signal-handler is a thunk that |
| 283 | ;; tail-calls eval. perhaps we should always compile the |
| 284 | ;; signal handler instead... |
| 285 | (stack (or (make-stack #t profile-signal-handler) |
| 286 | (pk 'what! (make-stack #t)))) |
| 287 | (inside-apply-trap? (sample-stack-procs stack))) |
| 288 | |
| 289 | (if (not inside-apply-trap?) |
| 290 | (begin |
| 291 | ;; disabling here is just a little more efficient, but |
| 292 | ;; not necessary given inside-profiler?. We can't just |
| 293 | ;; disable unconditionally at the top of this function |
| 294 | ;; and eliminate inside-profiler? because it seems to |
| 295 | ;; confuse guile wrt re-enabling the trap when |
| 296 | ;; count-call finishes. |
| 297 | (if %count-calls? |
| 298 | (set-vm-trace-level! (the-vm) |
| 299 | (1- (vm-trace-level (the-vm))))) |
| 300 | (accumulate-time stop-time))) |
| 301 | |
| 302 | (setitimer ITIMER_PROF |
| 303 | 0 0 |
| 304 | (car sampling-frequency) |
| 305 | (cdr sampling-frequency)) |
| 306 | |
| 307 | (if (not inside-apply-trap?) |
| 308 | (begin |
| 309 | (set! last-start-time (get-internal-run-time)) |
| 310 | (if %count-calls? |
| 311 | (set-vm-trace-level! (the-vm) |
| 312 | (1+ (vm-trace-level (the-vm))))))))) |
| 313 | |
| 314 | (set! inside-profiler? #f)) |
| 315 | |
| 316 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 317 | ;; Count total calls. |
| 318 | |
| 319 | (define (count-call frame) |
| 320 | (if (not inside-profiler?) |
| 321 | (begin |
| 322 | (accumulate-time (get-internal-run-time)) |
| 323 | |
| 324 | (and=> (frame-procedure frame) |
| 325 | (lambda (proc) |
| 326 | (inc-call-data-call-count! |
| 327 | (get-call-data proc)))) |
| 328 | |
| 329 | (set! last-start-time (get-internal-run-time))))) |
| 330 | |
| 331 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 332 | |
| 333 | (define (statprof-active?) |
| 334 | "Returns @code{#t} if @code{statprof-start} has been called more times |
| 335 | than @code{statprof-stop}, @code{#f} otherwise." |
| 336 | (positive? profile-level)) |
| 337 | |
| 338 | ;; Do not call this from statprof internal functions -- user only. |
| 339 | (define (statprof-start) |
| 340 | "Start the profiler.@code{}" |
| 341 | ;; After some head-scratching, I don't *think* I need to mask/unmask |
| 342 | ;; signals here, but if I'm wrong, please let me know. |
| 343 | (set! profile-level (+ profile-level 1)) |
| 344 | (if (= profile-level 1) |
| 345 | (let* ((rpt remaining-prof-time) |
| 346 | (use-rpt? (and rpt |
| 347 | (or (positive? (car rpt)) |
| 348 | (positive? (cdr rpt)))))) |
| 349 | (set! remaining-prof-time #f) |
| 350 | (set! last-start-time (get-internal-run-time)) |
| 351 | (set! gc-time-taken |
| 352 | (cdr (assq 'gc-time-taken (gc-stats)))) |
| 353 | (if use-rpt? |
| 354 | (setitimer ITIMER_PROF 0 0 (car rpt) (cdr rpt)) |
| 355 | (setitimer ITIMER_PROF |
| 356 | 0 0 |
| 357 | (car sampling-frequency) |
| 358 | (cdr sampling-frequency))) |
| 359 | (if %count-calls? |
| 360 | (add-hook! (vm-apply-hook (the-vm)) count-call)) |
| 361 | (set-vm-trace-level! (the-vm) (1+ (vm-trace-level (the-vm)))) |
| 362 | #t))) |
| 363 | |
| 364 | ;; Do not call this from statprof internal functions -- user only. |
| 365 | (define (statprof-stop) |
| 366 | "Stop the profiler.@code{}" |
| 367 | ;; After some head-scratching, I don't *think* I need to mask/unmask |
| 368 | ;; signals here, but if I'm wrong, please let me know. |
| 369 | (set! profile-level (- profile-level 1)) |
| 370 | (if (zero? profile-level) |
| 371 | (begin |
| 372 | (set! gc-time-taken |
| 373 | (- (cdr (assq 'gc-time-taken (gc-stats))) gc-time-taken)) |
| 374 | (set-vm-trace-level! (the-vm) (1- (vm-trace-level (the-vm)))) |
| 375 | (if %count-calls? |
| 376 | (remove-hook! (vm-apply-hook (the-vm)) count-call)) |
| 377 | ;; I believe that we need to do this before getting the time |
| 378 | ;; (unless we want to make things even more complicated). |
| 379 | (set! remaining-prof-time (setitimer ITIMER_PROF 0 0 0 0)) |
| 380 | (accumulate-time (get-internal-run-time)) |
| 381 | (set! last-start-time #f)))) |
| 382 | |
| 383 | (define* (statprof-reset sample-seconds sample-microseconds count-calls? |
| 384 | #:optional full-stacks?) |
| 385 | "Reset the statprof sampler interval to @var{sample-seconds} and |
| 386 | @var{sample-microseconds}. If @var{count-calls?} is true, arrange to |
| 387 | instrument procedure calls as well as collecting statistical profiling |
| 388 | data. If @var{full-stacks?} is true, collect all sampled stacks into a |
| 389 | list for later analysis. |
| 390 | |
| 391 | Enables traps and debugging as necessary." |
| 392 | (if (positive? profile-level) |
| 393 | (error "Can't reset profiler while profiler is running.")) |
| 394 | (set! %count-calls? count-calls?) |
| 395 | (set! accumulated-time 0) |
| 396 | (set! last-start-time #f) |
| 397 | (set! sample-count 0) |
| 398 | (set! sampling-frequency (cons sample-seconds sample-microseconds)) |
| 399 | (set! remaining-prof-time #f) |
| 400 | (set! procedure-data (make-hash-table 131)) |
| 401 | (set! record-full-stacks? full-stacks?) |
| 402 | (set! stacks '()) |
| 403 | (sigaction SIGPROF profile-signal-handler) |
| 404 | #t) |
| 405 | |
| 406 | (define (statprof-fold-call-data proc init) |
| 407 | "Fold @var{proc} over the call-data accumulated by statprof. Cannot be |
| 408 | called while statprof is active. @var{proc} should take two arguments, |
| 409 | @code{(@var{call-data} @var{prior-result})}. |
| 410 | |
| 411 | Note that a given proc-name may appear multiple times, but if it does, |
| 412 | it represents different functions with the same name." |
| 413 | (if (positive? profile-level) |
| 414 | (error "Can't call statprof-fold-called while profiler is running.")) |
| 415 | |
| 416 | (hash-fold |
| 417 | (lambda (key value prior-result) |
| 418 | (proc value prior-result)) |
| 419 | init |
| 420 | procedure-data)) |
| 421 | |
| 422 | (define (statprof-proc-call-data proc) |
| 423 | "Returns the call-data associated with @var{proc}, or @code{#f} if |
| 424 | none is available." |
| 425 | (if (positive? profile-level) |
| 426 | (error "Can't call statprof-fold-called while profiler is running.")) |
| 427 | |
| 428 | (get-call-data proc)) |
| 429 | |
| 430 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 431 | ;; Stats |
| 432 | |
| 433 | (define (statprof-call-data->stats call-data) |
| 434 | "Returns an object of type @code{statprof-stats}." |
| 435 | ;; returns (vector proc-name |
| 436 | ;; %-time-in-proc |
| 437 | ;; cum-seconds-in-proc |
| 438 | ;; self-seconds-in-proc |
| 439 | ;; num-calls |
| 440 | ;; self-secs-per-call |
| 441 | ;; total-secs-per-call) |
| 442 | |
| 443 | (let* ((proc-name (call-data-printable call-data)) |
| 444 | (self-samples (call-data-self-sample-count call-data)) |
| 445 | (cum-samples (call-data-cum-sample-count call-data)) |
| 446 | (all-samples (statprof-sample-count)) |
| 447 | (secs-per-sample (/ (statprof-accumulated-time) |
| 448 | (statprof-sample-count))) |
| 449 | (num-calls (and %count-calls? (statprof-call-data-calls call-data)))) |
| 450 | |
| 451 | (vector proc-name |
| 452 | (* (/ self-samples all-samples) 100.0) |
| 453 | (* cum-samples secs-per-sample 1.0) |
| 454 | (* self-samples secs-per-sample 1.0) |
| 455 | num-calls |
| 456 | (and num-calls ;; maybe we only sampled in children |
| 457 | (if (zero? self-samples) 0.0 |
| 458 | (/ (* self-samples secs-per-sample) 1.0 num-calls))) |
| 459 | (and num-calls ;; cum-samples must be positive |
| 460 | (/ (* cum-samples secs-per-sample) |
| 461 | 1.0 |
| 462 | ;; num-calls might be 0 if we entered statprof during the |
| 463 | ;; dynamic extent of the call |
| 464 | (max num-calls 1)))))) |
| 465 | |
| 466 | (define (statprof-stats-proc-name stats) (vector-ref stats 0)) |
| 467 | (define (statprof-stats-%-time-in-proc stats) (vector-ref stats 1)) |
| 468 | (define (statprof-stats-cum-secs-in-proc stats) (vector-ref stats 2)) |
| 469 | (define (statprof-stats-self-secs-in-proc stats) (vector-ref stats 3)) |
| 470 | (define (statprof-stats-calls stats) (vector-ref stats 4)) |
| 471 | (define (statprof-stats-self-secs-per-call stats) (vector-ref stats 5)) |
| 472 | (define (statprof-stats-cum-secs-per-call stats) (vector-ref stats 6)) |
| 473 | |
| 474 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 475 | |
| 476 | (define (stats-sorter x y) |
| 477 | (let ((diff (- (statprof-stats-self-secs-in-proc x) |
| 478 | (statprof-stats-self-secs-in-proc y)))) |
| 479 | (positive? |
| 480 | (if (= diff 0) |
| 481 | (- (statprof-stats-cum-secs-in-proc x) |
| 482 | (statprof-stats-cum-secs-in-proc y)) |
| 483 | diff)))) |
| 484 | |
| 485 | (define (statprof-display . port) |
| 486 | "Displays a gprof-like summary of the statistics collected. Unless an |
| 487 | optional @var{port} argument is passed, uses the current output port." |
| 488 | (if (null? port) (set! port (current-output-port))) |
| 489 | |
| 490 | (cond |
| 491 | ((zero? (statprof-sample-count)) |
| 492 | (format port "No samples recorded.\n")) |
| 493 | (else |
| 494 | (let* ((stats-list (statprof-fold-call-data |
| 495 | (lambda (data prior-value) |
| 496 | (cons (statprof-call-data->stats data) |
| 497 | prior-value)) |
| 498 | '())) |
| 499 | (sorted-stats (sort stats-list stats-sorter))) |
| 500 | |
| 501 | (define (display-stats-line stats) |
| 502 | (if %count-calls? |
| 503 | (format port "~6,2f ~9,2f ~9,2f ~7d ~8,2f ~8,2f " |
| 504 | (statprof-stats-%-time-in-proc stats) |
| 505 | (statprof-stats-cum-secs-in-proc stats) |
| 506 | (statprof-stats-self-secs-in-proc stats) |
| 507 | (statprof-stats-calls stats) |
| 508 | (* 1000 (statprof-stats-self-secs-per-call stats)) |
| 509 | (* 1000 (statprof-stats-cum-secs-per-call stats))) |
| 510 | (format port "~6,2f ~9,2f ~9,2f " |
| 511 | (statprof-stats-%-time-in-proc stats) |
| 512 | (statprof-stats-cum-secs-in-proc stats) |
| 513 | (statprof-stats-self-secs-in-proc stats))) |
| 514 | (display (statprof-stats-proc-name stats) port) |
| 515 | (newline port)) |
| 516 | |
| 517 | (if %count-calls? |
| 518 | (begin |
| 519 | (format port "~5a ~10a ~7a ~8a ~8a ~8a ~8@a\n" |
| 520 | "% " "cumulative" "self" "" "self" "total" "") |
| 521 | (format port "~5a ~9a ~8a ~8a ~8a ~8a ~8@a\n" |
| 522 | "time" "seconds" "seconds" "calls" "ms/call" "ms/call" "name")) |
| 523 | (begin |
| 524 | (format port "~5a ~10a ~7a ~8@a\n" |
| 525 | "%" "cumulative" "self" "") |
| 526 | (format port "~5a ~10a ~7a ~8@a\n" |
| 527 | "time" "seconds" "seconds" "name"))) |
| 528 | |
| 529 | (for-each display-stats-line sorted-stats) |
| 530 | |
| 531 | (display "---\n" port) |
| 532 | (simple-format #t "Sample count: ~A\n" (statprof-sample-count)) |
| 533 | (simple-format #t "Total time: ~A seconds (~A seconds in GC)\n" |
| 534 | (statprof-accumulated-time) |
| 535 | (/ gc-time-taken 1.0 internal-time-units-per-second)))))) |
| 536 | |
| 537 | (define (statprof-display-anomolies) |
| 538 | "A sanity check that attempts to detect anomolies in statprof's |
| 539 | statistics.@code{}" |
| 540 | (statprof-fold-call-data |
| 541 | (lambda (data prior-value) |
| 542 | (if (and %count-calls? |
| 543 | (zero? (call-data-call-count data)) |
| 544 | (positive? (call-data-cum-sample-count data))) |
| 545 | (simple-format #t |
| 546 | "==[~A ~A ~A]\n" |
| 547 | (call-data-name data) |
| 548 | (call-data-call-count data) |
| 549 | (call-data-cum-sample-count data)))) |
| 550 | #f) |
| 551 | (simple-format #t "Total time: ~A\n" (statprof-accumulated-time)) |
| 552 | (simple-format #t "Sample count: ~A\n" (statprof-sample-count))) |
| 553 | |
| 554 | (define (statprof-accumulated-time) |
| 555 | "Returns the time accumulated during the last statprof run.@code{}" |
| 556 | (if (positive? profile-level) |
| 557 | (error "Can't get accumulated time while profiler is running.")) |
| 558 | (/ accumulated-time internal-time-units-per-second)) |
| 559 | |
| 560 | (define (statprof-sample-count) |
| 561 | "Returns the number of samples taken during the last statprof run.@code{}" |
| 562 | (if (positive? profile-level) |
| 563 | (error "Can't get accumulated time while profiler is running.")) |
| 564 | sample-count) |
| 565 | |
| 566 | (define statprof-call-data-name call-data-name) |
| 567 | (define statprof-call-data-calls call-data-call-count) |
| 568 | (define statprof-call-data-cum-samples call-data-cum-sample-count) |
| 569 | (define statprof-call-data-self-samples call-data-self-sample-count) |
| 570 | |
| 571 | (define (statprof-fetch-stacks) |
| 572 | "Returns a list of stacks, as they were captured since the last call |
| 573 | to @code{statprof-reset}. |
| 574 | |
| 575 | Note that stacks are only collected if the @var{full-stacks?} argument |
| 576 | to @code{statprof-reset} is true." |
| 577 | stacks) |
| 578 | |
| 579 | (define procedure=? |
| 580 | (lambda (a b) |
| 581 | (cond |
| 582 | ((eq? a b)) |
| 583 | ((and (rtl-program? a) (rtl-program? b)) |
| 584 | (eq? (rtl-program-code a) (rtl-program-code b))) |
| 585 | (else |
| 586 | #f)))) |
| 587 | |
| 588 | ;; tree ::= (car n . tree*) |
| 589 | |
| 590 | (define (lists->trees lists equal?) |
| 591 | (let lp ((in lists) (n-terminal 0) (tails '())) |
| 592 | (cond |
| 593 | ((null? in) |
| 594 | (let ((trees (map (lambda (tail) |
| 595 | (cons (car tail) |
| 596 | (lists->trees (cdr tail) equal?))) |
| 597 | tails))) |
| 598 | (cons (apply + n-terminal (map cadr trees)) |
| 599 | (sort trees |
| 600 | (lambda (a b) (> (cadr a) (cadr b))))))) |
| 601 | ((null? (car in)) |
| 602 | (lp (cdr in) (1+ n-terminal) tails)) |
| 603 | ((find (lambda (x) (equal? (car x) (caar in))) |
| 604 | tails) |
| 605 | => (lambda (tail) |
| 606 | (lp (cdr in) |
| 607 | n-terminal |
| 608 | (assq-set! tails |
| 609 | (car tail) |
| 610 | (cons (cdar in) (cdr tail)))))) |
| 611 | (else |
| 612 | (lp (cdr in) |
| 613 | n-terminal |
| 614 | (acons (caar in) (list (cdar in)) tails)))))) |
| 615 | |
| 616 | (define (stack->procedures stack) |
| 617 | (filter identity |
| 618 | (unfold-right (lambda (x) (not x)) |
| 619 | frame-procedure |
| 620 | frame-previous |
| 621 | (stack-ref stack 0)))) |
| 622 | |
| 623 | (define (statprof-fetch-call-tree) |
| 624 | "Return a call tree for the previous statprof run. |
| 625 | |
| 626 | The return value is a list of nodes, each of which is of the type: |
| 627 | @code |
| 628 | node ::= (@var{proc} @var{count} . @var{nodes}) |
| 629 | @end code" |
| 630 | (cons #t (lists->trees (map stack->procedures stacks) procedure=?))) |
| 631 | |
| 632 | (define* (statprof thunk #:key (loop 1) (hz 100) (count-calls? #f) |
| 633 | (full-stacks? #f)) |
| 634 | "Profiles the execution of @var{thunk}. |
| 635 | |
| 636 | The stack will be sampled @var{hz} times per second, and the thunk itself will |
| 637 | be called @var{loop} times. |
| 638 | |
| 639 | If @var{count-calls?} is true, all procedure calls will be recorded. This |
| 640 | operation is somewhat expensive. |
| 641 | |
| 642 | If @var{full-stacks?} is true, at each sample, statprof will store away the |
| 643 | whole call tree, for later analysis. Use @code{statprof-fetch-stacks} or |
| 644 | @code{statprof-fetch-call-tree} to retrieve the last-stored stacks." |
| 645 | |
| 646 | (dynamic-wind |
| 647 | (lambda () |
| 648 | (statprof-reset (inexact->exact (floor (/ 1 hz))) |
| 649 | (inexact->exact (* 1e6 (- (/ 1 hz) |
| 650 | (floor (/ 1 hz))))) |
| 651 | count-calls? |
| 652 | full-stacks?) |
| 653 | (statprof-start)) |
| 654 | (lambda () |
| 655 | (let lp ((i loop)) |
| 656 | (if (not (zero? i)) |
| 657 | (begin |
| 658 | (thunk) |
| 659 | (lp (1- i)))))) |
| 660 | (lambda () |
| 661 | (statprof-stop) |
| 662 | (statprof-display) |
| 663 | (set! procedure-data #f)))) |
| 664 | |
| 665 | (define-macro (with-statprof . args) |
| 666 | "Profiles the expressions in its body. |
| 667 | |
| 668 | Keyword arguments: |
| 669 | |
| 670 | @table @code |
| 671 | @item #:loop |
| 672 | Execute the body @var{loop} number of times, or @code{#f} for no looping |
| 673 | |
| 674 | default: @code{#f} |
| 675 | @item #:hz |
| 676 | Sampling rate |
| 677 | |
| 678 | default: @code{20} |
| 679 | @item #:count-calls? |
| 680 | Whether to instrument each function call (expensive) |
| 681 | |
| 682 | default: @code{#f} |
| 683 | @item #:full-stacks? |
| 684 | Whether to collect away all sampled stacks into a list |
| 685 | |
| 686 | default: @code{#f} |
| 687 | @end table" |
| 688 | (define (kw-arg-ref kw args def) |
| 689 | (cond |
| 690 | ((null? args) (error "Invalid macro body")) |
| 691 | ((keyword? (car args)) |
| 692 | (if (eq? (car args) kw) |
| 693 | (cadr args) |
| 694 | (kw-arg-ref kw (cddr args) def))) |
| 695 | ((eq? kw #f def) ;; asking for the body |
| 696 | args) |
| 697 | (else def))) ;; kw not found |
| 698 | `((@ (statprof) statprof) |
| 699 | (lambda () ,@(kw-arg-ref #f args #f)) |
| 700 | #:loop ,(kw-arg-ref #:loop args 1) |
| 701 | #:hz ,(kw-arg-ref #:hz args 100) |
| 702 | #:count-calls? ,(kw-arg-ref #:count-calls? args #f) |
| 703 | #:full-stacks? ,(kw-arg-ref #:full-stacks? args #f))) |
| 704 | |
| 705 | (define* (gcprof thunk #:key (loop 1) (full-stacks? #f)) |
| 706 | "Do an allocation profile of the execution of @var{thunk}. |
| 707 | |
| 708 | The stack will be sampled soon after every garbage collection, yielding |
| 709 | an approximate idea of what is causing allocation in your program. |
| 710 | |
| 711 | Since GC does not occur very frequently, you may need to use the |
| 712 | @var{loop} parameter, to cause @var{thunk} to be called @var{loop} |
| 713 | times. |
| 714 | |
| 715 | If @var{full-stacks?} is true, at each sample, statprof will store away the |
| 716 | whole call tree, for later analysis. Use @code{statprof-fetch-stacks} or |
| 717 | @code{statprof-fetch-call-tree} to retrieve the last-stored stacks." |
| 718 | |
| 719 | (define (reset) |
| 720 | (if (positive? profile-level) |
| 721 | (error "Can't reset profiler while profiler is running.")) |
| 722 | (set! accumulated-time 0) |
| 723 | (set! last-start-time #f) |
| 724 | (set! sample-count 0) |
| 725 | (set! %count-calls? #f) |
| 726 | (set! procedure-data (make-hash-table 131)) |
| 727 | (set! record-full-stacks? full-stacks?) |
| 728 | (set! stacks '())) |
| 729 | |
| 730 | (define (gc-callback) |
| 731 | (cond |
| 732 | (inside-profiler?) |
| 733 | (else |
| 734 | (set! inside-profiler? #t) |
| 735 | |
| 736 | ;; FIXME: should be able to set an outer frame for the stack cut |
| 737 | (let ((stop-time (get-internal-run-time)) |
| 738 | ;; Cut down to gc-callback, and then one before (the |
| 739 | ;; after-gc async). See the note in profile-signal-handler |
| 740 | ;; also. |
| 741 | (stack (or (make-stack #t gc-callback 0 1) |
| 742 | (pk 'what! (make-stack #t))))) |
| 743 | (sample-stack-procs stack) |
| 744 | (accumulate-time stop-time) |
| 745 | (set! last-start-time (get-internal-run-time))) |
| 746 | |
| 747 | (set! inside-profiler? #f)))) |
| 748 | |
| 749 | (define (start) |
| 750 | (set! profile-level (+ profile-level 1)) |
| 751 | (if (= profile-level 1) |
| 752 | (begin |
| 753 | (set! remaining-prof-time #f) |
| 754 | (set! last-start-time (get-internal-run-time)) |
| 755 | (set! gc-time-taken (cdr (assq 'gc-time-taken (gc-stats)))) |
| 756 | (add-hook! after-gc-hook gc-callback) |
| 757 | (set-vm-trace-level! (the-vm) (1+ (vm-trace-level (the-vm)))) |
| 758 | #t))) |
| 759 | |
| 760 | (define (stop) |
| 761 | (set! profile-level (- profile-level 1)) |
| 762 | (if (zero? profile-level) |
| 763 | (begin |
| 764 | (set! gc-time-taken |
| 765 | (- (cdr (assq 'gc-time-taken (gc-stats))) gc-time-taken)) |
| 766 | (remove-hook! after-gc-hook gc-callback) |
| 767 | (accumulate-time (get-internal-run-time)) |
| 768 | (set! last-start-time #f)))) |
| 769 | |
| 770 | (dynamic-wind |
| 771 | (lambda () |
| 772 | (reset) |
| 773 | (start)) |
| 774 | (lambda () |
| 775 | (let lp ((i loop)) |
| 776 | (if (not (zero? i)) |
| 777 | (begin |
| 778 | (thunk) |
| 779 | (lp (1- i)))))) |
| 780 | (lambda () |
| 781 | (stop) |
| 782 | (statprof-display) |
| 783 | (set! procedure-data #f)))) |