| 1 | ;;;; (statprof) -- a statistical profiler for Guile |
| 2 | ;;;; -*-scheme-*- |
| 3 | ;;;; |
| 4 | ;;;; Copyright (C) 2009, 2010, 2011, 2013-2015 Free Software Foundation, Inc. |
| 5 | ;;;; Copyright (C) 2004, 2009 Andy Wingo <wingo at pobox dot com> |
| 6 | ;;;; Copyright (C) 2001 Rob Browning <rlb at defaultvalue dot org> |
| 7 | ;;;; |
| 8 | ;;;; This library is free software; you can redistribute it and/or |
| 9 | ;;;; modify it under the terms of the GNU Lesser General Public |
| 10 | ;;;; License as published by the Free Software Foundation; either |
| 11 | ;;;; version 3 of the License, or (at your option) any later version. |
| 12 | ;;;; |
| 13 | ;;;; This library is distributed in the hope that it will be useful, |
| 14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 | ;;;; Lesser General Public License for more details. |
| 17 | ;;;; |
| 18 | ;;;; You should have received a copy of the GNU Lesser General Public |
| 19 | ;;;; License along with this library; if not, write to the Free Software |
| 20 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 21 | ;;;; |
| 22 | \f |
| 23 | |
| 24 | ;;; Commentary: |
| 25 | ;;; |
| 26 | ;;; @code{(statprof)} is a statistical profiler for Guile. |
| 27 | ;;; |
| 28 | ;;; A simple use of statprof would look like this: |
| 29 | ;;; |
| 30 | ;;; @example |
| 31 | ;;; (statprof (lambda () (do-something)) |
| 32 | ;;; #:hz 100 |
| 33 | ;;; #:count-calls? #t) |
| 34 | ;;; @end example |
| 35 | ;;; |
| 36 | ;;; This would run the thunk with statistical profiling, finally |
| 37 | ;;; displaying a gprof flat-style table of statistics which could |
| 38 | ;;; something like this: |
| 39 | ;;; |
| 40 | ;;; @example |
| 41 | ;;; % cumulative self self total |
| 42 | ;;; time seconds seconds calls ms/call ms/call name |
| 43 | ;;; 35.29 0.23 0.23 2002 0.11 0.11 - |
| 44 | ;;; 23.53 0.15 0.15 2001 0.08 0.08 positive? |
| 45 | ;;; 23.53 0.15 0.15 2000 0.08 0.08 + |
| 46 | ;;; 11.76 0.23 0.08 2000 0.04 0.11 do-nothing |
| 47 | ;;; 5.88 0.64 0.04 2001 0.02 0.32 loop |
| 48 | ;;; 0.00 0.15 0.00 1 0.00 150.59 do-something |
| 49 | ;;; ... |
| 50 | ;;; @end example |
| 51 | ;;; |
| 52 | ;;; All of the numerical data with the exception of the calls column is |
| 53 | ;;; statistically approximate. In the following column descriptions, and |
| 54 | ;;; in all of statprof, "time" refers to execution time (both user and |
| 55 | ;;; system), not wall clock time. |
| 56 | ;;; |
| 57 | ;;; @table @asis |
| 58 | ;;; @item % time |
| 59 | ;;; The percent of the time spent inside the procedure itself |
| 60 | ;;; (not counting children). |
| 61 | ;;; @item cumulative seconds |
| 62 | ;;; The total number of seconds spent in the procedure, including |
| 63 | ;;; children. |
| 64 | ;;; @item self seconds |
| 65 | ;;; The total number of seconds spent in the procedure itself (not counting |
| 66 | ;;; children). |
| 67 | ;;; @item calls |
| 68 | ;;; The total number of times the procedure was called. |
| 69 | ;;; @item self ms/call |
| 70 | ;;; The average time taken by the procedure itself on each call, in ms. |
| 71 | ;;; @item total ms/call |
| 72 | ;;; The average time taken by each call to the procedure, including time |
| 73 | ;;; spent in child functions. |
| 74 | ;;; @item name |
| 75 | ;;; The name of the procedure. |
| 76 | ;;; @end table |
| 77 | ;;; |
| 78 | ;;; The profiler uses @code{eq?} and the procedure object itself to |
| 79 | ;;; identify the procedures, so it won't confuse different procedures with |
| 80 | ;;; the same name. They will show up as two different rows in the output. |
| 81 | ;;; |
| 82 | ;;; Right now the profiler is quite simplistic. I cannot provide |
| 83 | ;;; call-graphs or other higher level information. What you see in the |
| 84 | ;;; table is pretty much all there is. Patches are welcome :-) |
| 85 | ;;; |
| 86 | ;;; @section Implementation notes |
| 87 | ;;; |
| 88 | ;;; The profiler works by setting the unix profiling signal |
| 89 | ;;; @code{ITIMER_PROF} to go off after the interval you define in the call |
| 90 | ;;; to @code{statprof-reset}. When the signal fires, a sampling routine is |
| 91 | ;;; run which looks at the current procedure that's executing, and then |
| 92 | ;;; crawls up the stack, and for each procedure encountered, increments |
| 93 | ;;; that procedure's sample count. Note that if a procedure is encountered |
| 94 | ;;; multiple times on a given stack, it is only counted once. After the |
| 95 | ;;; sampling is complete, the profiler resets profiling timer to fire |
| 96 | ;;; again after the appropriate interval. |
| 97 | ;;; |
| 98 | ;;; Meanwhile, the profiler keeps track, via @code{get-internal-run-time}, |
| 99 | ;;; how much CPU time (system and user -- which is also what |
| 100 | ;;; @code{ITIMER_PROF} tracks), has elapsed while code has been executing |
| 101 | ;;; within a statprof-start/stop block. |
| 102 | ;;; |
| 103 | ;;; The profiler also tries to avoid counting or timing its own code as |
| 104 | ;;; much as possible. |
| 105 | ;;; |
| 106 | ;;; Code: |
| 107 | |
| 108 | (define-module (statprof) |
| 109 | #:use-module (srfi srfi-1) |
| 110 | #:use-module (srfi srfi-9) |
| 111 | #:use-module (srfi srfi-9 gnu) |
| 112 | #:autoload (ice-9 format) (format) |
| 113 | #:use-module (system vm vm) |
| 114 | #:use-module (system vm frame) |
| 115 | #:use-module (system vm debug) |
| 116 | #:use-module (system vm program) |
| 117 | #:export (statprof-active? |
| 118 | statprof-start |
| 119 | statprof-stop |
| 120 | statprof-reset |
| 121 | |
| 122 | statprof-accumulated-time |
| 123 | statprof-sample-count |
| 124 | statprof-fold-call-data |
| 125 | statprof-proc-call-data |
| 126 | statprof-call-data-name |
| 127 | statprof-call-data-calls |
| 128 | statprof-call-data-cum-samples |
| 129 | statprof-call-data-self-samples |
| 130 | statprof-call-data->stats |
| 131 | |
| 132 | statprof-stats-proc-name |
| 133 | statprof-stats-proc-source |
| 134 | statprof-stats-%-time-in-proc |
| 135 | statprof-stats-cum-secs-in-proc |
| 136 | statprof-stats-self-secs-in-proc |
| 137 | statprof-stats-calls |
| 138 | statprof-stats-self-secs-per-call |
| 139 | statprof-stats-cum-secs-per-call |
| 140 | |
| 141 | statprof-display |
| 142 | statprof-display-anomalies |
| 143 | statprof-display-anomolies ; Deprecated spelling. |
| 144 | |
| 145 | statprof-fetch-stacks |
| 146 | statprof-fetch-call-tree |
| 147 | |
| 148 | statprof |
| 149 | with-statprof |
| 150 | |
| 151 | gcprof)) |
| 152 | |
| 153 | |
| 154 | ;;; ~ Implementation notes ~ |
| 155 | ;;; |
| 156 | ;;; Statprof can be divided into two pieces: data collection and data |
| 157 | ;;; analysis. |
| 158 | ;;; |
| 159 | ;;; The data collection runs concurrently with the program, and is |
| 160 | ;;; designed to be as cheap as possible. The main data collection |
| 161 | ;;; instrument is the stack sampler, driven by SIGPROF signals that are |
| 162 | ;;; scheduled with periodic setitimer calls. The stack sampler simply |
| 163 | ;;; looks at every frame on the stack, and writes a representation of |
| 164 | ;;; the frame's procedure into a growable buffer. |
| 165 | ;;; |
| 166 | ;;; For most frames, this representation is the instruction pointer of |
| 167 | ;;; that frame, because it's cheap to get and you can map from |
| 168 | ;;; instruction pointer to procedure fairly cheaply. This won't |
| 169 | ;;; distinguish between different closures which share the same code, |
| 170 | ;;; but that is usually what we want anyway. |
| 171 | ;;; |
| 172 | ;;; One case in which we do want to distinguish closures is the case of |
| 173 | ;;; primitive procedures. If slot 0 in the frame is a primitive |
| 174 | ;;; procedure, we record the procedure's name into the buffer instead of |
| 175 | ;;; the IP. It's fairly cheap to check whether a value is a primitive |
| 176 | ;;; procedure, and then get its name, as its name is stored in the |
| 177 | ;;; closure data. Calling procedure-name in the stack sampler isn't |
| 178 | ;;; something you want to do for other kinds of procedures, though, as |
| 179 | ;;; that involves grovelling the debug information. |
| 180 | ;;; |
| 181 | ;;; The other part of data collection is the exact call counter, which |
| 182 | ;;; uses the VM's "apply" hook to record each procedure call. |
| 183 | ;;; Naturally, this is quite expensive, and it is off by default. |
| 184 | ;;; Running code at every procedure call effectively penalizes procedure |
| 185 | ;;; calls. Still, it's useful sometimes. If the profiler state has a |
| 186 | ;;; call-counts table, then calls will be counted. As with the stack |
| 187 | ;;; counter, usually the key in the hash table is the code pointer of |
| 188 | ;;; the procedure being called, except for primitive procedures, in |
| 189 | ;;; which case it is the name of the primitive. The call counter can |
| 190 | ;;; also see calls of non-programs, for example in the case of |
| 191 | ;;; applicable structs. In that case the key is the procedure itself. |
| 192 | ;;; |
| 193 | ;;; After collection is finished, the data can be analyzed. The first |
| 194 | ;;; step is usually to run over the stack traces, tabulating sample |
| 195 | ;;; counts by procedure; the stack-samples->procedure-data does that. |
| 196 | ;;; The result of stack-samples->procedure-data is a hash table mapping |
| 197 | ;;; procedures to "call data" records. The call data values are exposed |
| 198 | ;;; to users via the statprof-fold-call-data procedure. |
| 199 | ;;; |
| 200 | ;;; Usually all the analysis is triggered by calling statprof-display, |
| 201 | ;;; or having the statprof procedure call it for you. |
| 202 | ;;; |
| 203 | ;;; The other thing we can do is to look at the stacks themselves, for |
| 204 | ;;; example via statprof-fetch-call-tree. |
| 205 | ;;; |
| 206 | |
| 207 | ;;; ~ Threads and state ~ |
| 208 | ;;; |
| 209 | ;;; The state of the profiler is contained in a <state> record, which is |
| 210 | ;;; bound to a thread-local parameter. The accurate call counter uses |
| 211 | ;;; the VM apply hook, which is also local to the current thread, so all |
| 212 | ;;; is good there. |
| 213 | ;;; |
| 214 | ;;; The problem comes in the statistical stack sampler's use of |
| 215 | ;;; `setitimer' and SIGPROF. The timer manipulated by setitimer is a |
| 216 | ;;; whole-process timer, so it decrements as other threads execute, |
| 217 | ;;; which is the wrong thing if you want to profile just one thread. On |
| 218 | ;;; the other hand, SIGPROF is delivered to the process as a whole, |
| 219 | ;;; which is fine given Guile's signal-handling thread, but then only |
| 220 | ;;; delivered to the thread running statprof, which isn't the right |
| 221 | ;;; thing if you want to profile the whole system. |
| 222 | ;;; |
| 223 | ;;; The summary is that statprof works more or less well as a per-thread |
| 224 | ;;; profiler if no other threads are running on their own when |
| 225 | ;;; profiling. If the other threads are running on behalf of the thread |
| 226 | ;;; being profiled (as via futures or parallel marking) things still |
| 227 | ;;; mostly work as expected. You can run statprof in one thread, |
| 228 | ;;; finish, and then run statprof in another thread, and the profile |
| 229 | ;;; runs won't affect each other. But if you want true per-thread |
| 230 | ;;; profiles when other things are happening in the process, including |
| 231 | ;;; other statprof runs, or whole-process profiles with per-thread |
| 232 | ;;; breakdowns, the use of setitimer currently prevents that. |
| 233 | ;;; |
| 234 | ;;; The solution would be to switch to POSIX.1-2001's timer_create(2), |
| 235 | ;;; and to add some more threading-related API to statprof. Some other |
| 236 | ;;; day. |
| 237 | ;;; |
| 238 | |
| 239 | (define-record-type <state> |
| 240 | (make-state accumulated-time last-start-time sample-count |
| 241 | sampling-period remaining-prof-time profile-level |
| 242 | call-counts gc-time-taken inside-profiler? |
| 243 | prev-sigprof-handler outer-cut buffer buffer-pos) |
| 244 | state? |
| 245 | ;; Total time so far. |
| 246 | (accumulated-time accumulated-time set-accumulated-time!) |
| 247 | ;; Start-time when timer is active. |
| 248 | (last-start-time last-start-time set-last-start-time!) |
| 249 | ;; Total count of sampler calls. |
| 250 | (sample-count sample-count set-sample-count!) |
| 251 | ;; Microseconds. |
| 252 | (sampling-period sampling-period set-sampling-period!) |
| 253 | ;; Time remaining when prof suspended. |
| 254 | (remaining-prof-time remaining-prof-time set-remaining-prof-time!) |
| 255 | ;; For user start/stop nesting. |
| 256 | (profile-level profile-level set-profile-level!) |
| 257 | ;; Hash table mapping ip -> call count, or #f if not counting calls. |
| 258 | (call-counts call-counts set-call-counts!) |
| 259 | ;; GC time between statprof-start and statprof-stop. |
| 260 | (gc-time-taken gc-time-taken set-gc-time-taken!) |
| 261 | ;; True if we are inside the profiler. |
| 262 | (inside-profiler? inside-profiler? set-inside-profiler?!) |
| 263 | ;; Previous sigprof handler. |
| 264 | (prev-sigprof-handler prev-sigprof-handler set-prev-sigprof-handler!) |
| 265 | ;; Outer stack cut, or 0. |
| 266 | (outer-cut outer-cut) |
| 267 | ;; Stack samples. |
| 268 | (buffer buffer set-buffer!) |
| 269 | (buffer-pos buffer-pos set-buffer-pos!)) |
| 270 | |
| 271 | (define profiler-state (make-parameter #f)) |
| 272 | |
| 273 | (define (fresh-buffer) |
| 274 | (make-vector 1024 #f)) |
| 275 | |
| 276 | (define (expand-buffer buf) |
| 277 | (let* ((size (vector-length buf)) |
| 278 | (new (make-vector (* size 2) #f))) |
| 279 | (vector-move-left! buf 0 (vector-length buf) new 0) |
| 280 | new)) |
| 281 | |
| 282 | (define* (fresh-profiler-state #:key (count-calls? #f) |
| 283 | (sampling-period 10000) |
| 284 | (outer-cut 0)) |
| 285 | (make-state 0 #f 0 |
| 286 | sampling-period 0 0 |
| 287 | (and count-calls? (make-hash-table)) 0 #f |
| 288 | #f outer-cut (fresh-buffer) 0)) |
| 289 | |
| 290 | (define (ensure-profiler-state) |
| 291 | (or (profiler-state) |
| 292 | (let ((state (fresh-profiler-state))) |
| 293 | (profiler-state state) |
| 294 | state))) |
| 295 | |
| 296 | (define (existing-profiler-state) |
| 297 | (or (profiler-state) |
| 298 | (error "expected there to be a profiler state"))) |
| 299 | |
| 300 | (define (accumulate-time state stop-time) |
| 301 | (set-accumulated-time! state |
| 302 | (+ (accumulated-time state) |
| 303 | (- stop-time (last-start-time state))))) |
| 304 | |
| 305 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 306 | ;; SIGPROF handler |
| 307 | |
| 308 | (define (sample-stack-procs state stack) |
| 309 | (set-sample-count! state (+ (sample-count state) 1)) |
| 310 | |
| 311 | (let lp ((frame (stack-ref stack 0)) |
| 312 | (len (stack-length stack)) |
| 313 | (buffer (buffer state)) |
| 314 | (pos (buffer-pos state))) |
| 315 | (define (write-sample sample) |
| 316 | (vector-set! buffer pos sample)) |
| 317 | (define (continue pos) |
| 318 | (lp (frame-previous frame) (1- len) buffer pos)) |
| 319 | (define (write-sample-and-continue sample) |
| 320 | (write-sample sample) |
| 321 | (continue (1+ pos))) |
| 322 | (cond |
| 323 | ((= pos (vector-length buffer)) |
| 324 | (lp frame len (expand-buffer buffer) pos)) |
| 325 | ((or (zero? len) (not frame)) |
| 326 | (write-sample #f) |
| 327 | (set-buffer! state buffer) |
| 328 | (set-buffer-pos! state (1+ pos))) |
| 329 | (else |
| 330 | (let ((proc (frame-procedure frame))) |
| 331 | (write-sample-and-continue (if (primitive? proc) |
| 332 | (procedure-name proc) |
| 333 | (frame-instruction-pointer frame)))))))) |
| 334 | |
| 335 | (define (reset-sigprof-timer usecs) |
| 336 | ;; Guile's setitimer binding is terrible. |
| 337 | (let ((prev (setitimer ITIMER_PROF 0 0 0 usecs))) |
| 338 | (+ (* (caadr prev) #e1e6) (cdadr prev)))) |
| 339 | |
| 340 | (define profile-signal-handler |
| 341 | (let () |
| 342 | (define (profile-signal-handler sig) |
| 343 | (define state (existing-profiler-state)) |
| 344 | |
| 345 | (set-inside-profiler?! state #t) |
| 346 | |
| 347 | (when (positive? (profile-level state)) |
| 348 | (let* ((stop-time (get-internal-run-time)) |
| 349 | ;; Cut down to the signal handler. Note that this will |
| 350 | ;; only work if statprof.scm is compiled; otherwise we |
| 351 | ;; get `eval' on the stack instead, because if it's not |
| 352 | ;; compiled, profile-signal-handler is a thunk that |
| 353 | ;; tail-calls eval. For the same reason we define the |
| 354 | ;; handler in an inner letrec, so that the compiler sees |
| 355 | ;; the inner reference to profile-signal-handler as the |
| 356 | ;; same as the procedure, and therefore keeps slot 0 |
| 357 | ;; alive. Nastiness, that. |
| 358 | (stack |
| 359 | (or (make-stack #t profile-signal-handler (outer-cut state)) |
| 360 | (pk 'what! (make-stack #t))))) |
| 361 | |
| 362 | (sample-stack-procs state stack) |
| 363 | (accumulate-time state stop-time) |
| 364 | (set-last-start-time! state (get-internal-run-time)) |
| 365 | |
| 366 | (reset-sigprof-timer (sampling-period state)))) |
| 367 | |
| 368 | (set-inside-profiler?! state #f)) |
| 369 | profile-signal-handler)) |
| 370 | |
| 371 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 372 | ;; Count total calls. |
| 373 | |
| 374 | (define (count-call frame) |
| 375 | (let ((state (existing-profiler-state))) |
| 376 | (unless (inside-profiler? state) |
| 377 | (accumulate-time state (get-internal-run-time)) |
| 378 | |
| 379 | (let* ((key (let ((proc (frame-procedure frame))) |
| 380 | (cond |
| 381 | ((primitive? proc) (procedure-name proc)) |
| 382 | ((program? proc) (program-code proc)) |
| 383 | (else proc)))) |
| 384 | (handle (hashv-create-handle! (call-counts state) key 0))) |
| 385 | (set-cdr! handle (1+ (cdr handle)))) |
| 386 | |
| 387 | (set-last-start-time! state (get-internal-run-time))))) |
| 388 | |
| 389 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 390 | |
| 391 | (define (statprof-active?) |
| 392 | "Returns @code{#t} if @code{statprof-start} has been called more times |
| 393 | than @code{statprof-stop}, @code{#f} otherwise." |
| 394 | (define state (profiler-state)) |
| 395 | (and state (positive? (profile-level state)))) |
| 396 | |
| 397 | ;; Do not call this from statprof internal functions -- user only. |
| 398 | (define* (statprof-start #:optional (state (ensure-profiler-state))) |
| 399 | "Start the profiler.@code{}" |
| 400 | ;; After some head-scratching, I don't *think* I need to mask/unmask |
| 401 | ;; signals here, but if I'm wrong, please let me know. |
| 402 | (set-profile-level! state (+ (profile-level state) 1)) |
| 403 | (when (= (profile-level state) 1) |
| 404 | (let ((rpt (remaining-prof-time state))) |
| 405 | (set-remaining-prof-time! state 0) |
| 406 | ;; FIXME: Use per-thread run time. |
| 407 | (set-last-start-time! state (get-internal-run-time)) |
| 408 | (set-gc-time-taken! state (assq-ref (gc-stats) 'gc-time-taken)) |
| 409 | (let ((prev (sigaction SIGPROF profile-signal-handler))) |
| 410 | (set-prev-sigprof-handler! state (car prev))) |
| 411 | (reset-sigprof-timer (if (zero? rpt) (sampling-period state) rpt)) |
| 412 | (when (call-counts state) |
| 413 | (add-hook! (vm-apply-hook) count-call) |
| 414 | (set-vm-trace-level! (1+ (vm-trace-level)))) |
| 415 | #t))) |
| 416 | |
| 417 | ;; Do not call this from statprof internal functions -- user only. |
| 418 | (define* (statprof-stop #:optional (state (ensure-profiler-state))) |
| 419 | "Stop the profiler.@code{}" |
| 420 | ;; After some head-scratching, I don't *think* I need to mask/unmask |
| 421 | ;; signals here, but if I'm wrong, please let me know. |
| 422 | (set-profile-level! state (- (profile-level state) 1)) |
| 423 | (when (zero? (profile-level state)) |
| 424 | (when (call-counts state) |
| 425 | (set-vm-trace-level! (1- (vm-trace-level))) |
| 426 | (remove-hook! (vm-apply-hook) count-call)) |
| 427 | (set-gc-time-taken! state |
| 428 | (- (assq-ref (gc-stats) 'gc-time-taken) |
| 429 | (gc-time-taken state))) |
| 430 | ;; I believe that we need to do this before getting the time |
| 431 | ;; (unless we want to make things even more complicated). |
| 432 | (set-remaining-prof-time! state (reset-sigprof-timer 0)) |
| 433 | (accumulate-time state (get-internal-run-time)) |
| 434 | (sigaction SIGPROF (prev-sigprof-handler state)) |
| 435 | (set-prev-sigprof-handler! state #f) |
| 436 | (set-last-start-time! state #f))) |
| 437 | |
| 438 | (define* (statprof-reset sample-seconds sample-microseconds count-calls? |
| 439 | #:optional full-stacks?) |
| 440 | "Reset the statprof sampler interval to @var{sample-seconds} and |
| 441 | @var{sample-microseconds}. If @var{count-calls?} is true, arrange to |
| 442 | instrument procedure calls as well as collecting statistical profiling |
| 443 | data. (The optional @var{full-stacks?} argument is deprecated; statprof |
| 444 | always collects full stacks.)" |
| 445 | (when (statprof-active?) |
| 446 | (error "Can't reset profiler while profiler is running.")) |
| 447 | (profiler-state |
| 448 | (fresh-profiler-state #:count-calls? count-calls? |
| 449 | #:sampling-period (+ (* sample-seconds #e1e6) |
| 450 | sample-microseconds))) |
| 451 | (values)) |
| 452 | |
| 453 | (define-record-type call-data |
| 454 | (make-call-data name printable source |
| 455 | call-count cum-sample-count self-sample-count) |
| 456 | call-data? |
| 457 | (name call-data-name) |
| 458 | (printable call-data-printable) |
| 459 | (source call-data-source) |
| 460 | (call-count call-data-call-count set-call-data-call-count!) |
| 461 | (cum-sample-count call-data-cum-sample-count set-call-data-cum-sample-count!) |
| 462 | (self-sample-count call-data-self-sample-count set-call-data-self-sample-count!)) |
| 463 | |
| 464 | (define (source->string source) |
| 465 | (format #f "~a:~a:~a" |
| 466 | (or (source-file source) "<current input>") |
| 467 | (source-line-for-user source) |
| 468 | (source-column source))) |
| 469 | |
| 470 | (define (program-debug-info-printable pdi) |
| 471 | (let* ((addr (program-debug-info-addr pdi)) |
| 472 | (name (or (and=> (program-debug-info-name pdi) symbol->string) |
| 473 | (string-append "#x" (number->string addr 16)))) |
| 474 | (loc (and=> (find-source-for-addr addr) source->string))) |
| 475 | (if loc |
| 476 | (string-append name " at " loc) |
| 477 | name))) |
| 478 | |
| 479 | (define (addr->pdi addr cache) |
| 480 | (cond |
| 481 | ((hashv-get-handle cache addr) => cdr) |
| 482 | (else |
| 483 | (let ((data (find-program-debug-info addr))) |
| 484 | (hashv-set! cache addr data) |
| 485 | data)))) |
| 486 | |
| 487 | (define (addr->printable addr pdi) |
| 488 | (or (and=> (and=> pdi program-debug-info-name) symbol->string) |
| 489 | (string-append "anon #x" (number->string addr 16)))) |
| 490 | |
| 491 | (define (inc-call-data-cum-sample-count! cd) |
| 492 | (set-call-data-cum-sample-count! cd (1+ (call-data-cum-sample-count cd)))) |
| 493 | (define (inc-call-data-self-sample-count! cd) |
| 494 | (set-call-data-self-sample-count! cd (1+ (call-data-self-sample-count cd)))) |
| 495 | |
| 496 | (define (stack-samples->procedure-data state) |
| 497 | (let ((table (make-hash-table)) |
| 498 | (addr-cache (make-hash-table)) |
| 499 | (call-counts (call-counts state)) |
| 500 | (buffer (buffer state)) |
| 501 | (len (buffer-pos state))) |
| 502 | (define (addr->call-data addr) |
| 503 | (let* ((pdi (addr->pdi addr addr-cache)) |
| 504 | (entry (if pdi (program-debug-info-addr pdi) addr))) |
| 505 | (or (hashv-ref table entry) |
| 506 | (let ((data (make-call-data (and=> pdi program-debug-info-name) |
| 507 | (addr->printable entry pdi) |
| 508 | (find-source-for-addr entry) |
| 509 | (and call-counts |
| 510 | (hashv-ref call-counts entry)) |
| 511 | 0 |
| 512 | 0))) |
| 513 | (hashv-set! table entry data) |
| 514 | data)))) |
| 515 | |
| 516 | (define (callee->call-data callee) |
| 517 | (cond |
| 518 | ((number? callee) (addr->call-data callee)) |
| 519 | ((hashv-ref table callee)) |
| 520 | (else |
| 521 | (let ((data (make-call-data |
| 522 | (cond ((procedure? callee) (procedure-name callee)) |
| 523 | ;; a primitive |
| 524 | ((symbol? callee) callee) |
| 525 | (else #f)) |
| 526 | (with-output-to-string (lambda () (write callee))) |
| 527 | #f |
| 528 | (and call-counts (hashv-ref call-counts callee)) |
| 529 | 0 |
| 530 | 0))) |
| 531 | (hashv-set! table callee data) |
| 532 | data)))) |
| 533 | |
| 534 | (when call-counts |
| 535 | (hash-for-each (lambda (callee count) |
| 536 | (callee->call-data callee)) |
| 537 | call-counts)) |
| 538 | |
| 539 | (let visit-stacks ((pos 0)) |
| 540 | (cond |
| 541 | ((< pos len) |
| 542 | ;; FIXME: if we are counting all procedure calls, and |
| 543 | ;; count-call is on the stack, we need to not count the part |
| 544 | ;; of the stack that is within count-call. |
| 545 | (inc-call-data-self-sample-count! |
| 546 | (callee->call-data (vector-ref buffer pos))) |
| 547 | (let visit-stack ((pos pos)) |
| 548 | (cond |
| 549 | ((vector-ref buffer pos) |
| 550 | => (lambda (callee) |
| 551 | (inc-call-data-cum-sample-count! (callee->call-data callee)) |
| 552 | (visit-stack (1+ pos)))) |
| 553 | (else |
| 554 | (visit-stacks (1+ pos)))))) |
| 555 | (else table))))) |
| 556 | |
| 557 | (define (stack-samples->callee-lists state) |
| 558 | (let ((buffer (buffer state)) |
| 559 | (len (buffer-pos state))) |
| 560 | (let visit-stacks ((pos 0) (out '())) |
| 561 | (cond |
| 562 | ((< pos len) |
| 563 | ;; FIXME: if we are counting all procedure calls, and |
| 564 | ;; count-call is on the stack, we need to not count the part |
| 565 | ;; of the stack that is within count-call. |
| 566 | (let visit-stack ((pos pos) (stack '())) |
| 567 | (cond |
| 568 | ((vector-ref buffer pos) |
| 569 | => (lambda (callee) |
| 570 | (visit-stack (1+ pos) (cons callee stack)))) |
| 571 | (else |
| 572 | (visit-stacks (1+ pos) (cons (reverse stack) out)))))) |
| 573 | (else (reverse out)))))) |
| 574 | |
| 575 | (define* (statprof-fold-call-data proc init #:optional |
| 576 | (state (existing-profiler-state))) |
| 577 | "Fold @var{proc} over the call-data accumulated by statprof. Cannot be |
| 578 | called while statprof is active. @var{proc} should take two arguments, |
| 579 | @code{(@var{call-data} @var{prior-result})}. |
| 580 | |
| 581 | Note that a given proc-name may appear multiple times, but if it does, |
| 582 | it represents different functions with the same name." |
| 583 | (when (statprof-active?) |
| 584 | (error "Can't call statprof-fold-call-data while profiler is running.")) |
| 585 | (hash-fold |
| 586 | (lambda (key value prior-result) |
| 587 | (proc value prior-result)) |
| 588 | init |
| 589 | (stack-samples->procedure-data state))) |
| 590 | |
| 591 | (define* (statprof-proc-call-data proc #:optional |
| 592 | (state (existing-profiler-state))) |
| 593 | "Returns the call-data associated with @var{proc}, or @code{#f} if |
| 594 | none is available." |
| 595 | (when (statprof-active?) |
| 596 | (error "Can't call statprof-proc-call-data while profiler is running.")) |
| 597 | (hashv-ref (stack-samples->procedure-data state) |
| 598 | (cond |
| 599 | ((primitive? proc) (procedure-name proc)) |
| 600 | ((program? proc) (program-code proc)) |
| 601 | (else (program-code proc))))) |
| 602 | |
| 603 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 604 | ;; Stats |
| 605 | |
| 606 | (define-record-type stats |
| 607 | (make-stats proc-name proc-source |
| 608 | %-time-in-proc cum-secs-in-proc self-secs-in-proc |
| 609 | calls self-secs-per-call cum-secs-per-call) |
| 610 | stats? |
| 611 | (proc-name statprof-stats-proc-name) |
| 612 | (proc-source statprof-stats-proc-source) |
| 613 | (%-time-in-proc statprof-stats-%-time-in-proc) |
| 614 | (cum-secs-in-proc statprof-stats-cum-secs-in-proc) |
| 615 | (self-secs-in-proc statprof-stats-self-secs-in-proc) |
| 616 | (calls statprof-stats-calls) |
| 617 | (self-secs-per-call statprof-stats-self-secs-per-call) |
| 618 | (cum-secs-per-call statprof-stats-cum-secs-per-call)) |
| 619 | |
| 620 | (define (statprof-call-data->stats call-data) |
| 621 | "Returns an object of type @code{statprof-stats}." |
| 622 | (define state (existing-profiler-state)) |
| 623 | |
| 624 | (let* ((proc-name (call-data-name call-data)) |
| 625 | (proc-source (and=> (call-data-source call-data) source->string)) |
| 626 | (self-samples (call-data-self-sample-count call-data)) |
| 627 | (cum-samples (call-data-cum-sample-count call-data)) |
| 628 | (all-samples (statprof-sample-count state)) |
| 629 | (secs-per-sample (/ (statprof-accumulated-time state) |
| 630 | (statprof-sample-count state))) |
| 631 | (num-calls (and (call-counts state) |
| 632 | (statprof-call-data-calls call-data)))) |
| 633 | |
| 634 | (make-stats (or proc-name |
| 635 | ;; If there is no name and no source, fall back to |
| 636 | ;; printable. |
| 637 | (and (not proc-source) (call-data-printable call-data))) |
| 638 | proc-source |
| 639 | (* (/ self-samples all-samples) 100.0) |
| 640 | (* cum-samples secs-per-sample 1.0) |
| 641 | (* self-samples secs-per-sample 1.0) |
| 642 | num-calls |
| 643 | (and num-calls ;; maybe we only sampled in children |
| 644 | (if (zero? self-samples) 0.0 |
| 645 | (/ (* self-samples secs-per-sample) 1.0 num-calls))) |
| 646 | (and num-calls ;; cum-samples must be positive |
| 647 | (/ (* cum-samples secs-per-sample) |
| 648 | 1.0 |
| 649 | ;; num-calls might be 0 if we entered statprof during the |
| 650 | ;; dynamic extent of the call |
| 651 | (max num-calls 1)))))) |
| 652 | |
| 653 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 654 | |
| 655 | (define (stats-sorter x y) |
| 656 | (let ((diff (- (statprof-stats-self-secs-in-proc x) |
| 657 | (statprof-stats-self-secs-in-proc y)))) |
| 658 | (positive? |
| 659 | (if (= diff 0) |
| 660 | (- (statprof-stats-cum-secs-in-proc x) |
| 661 | (statprof-stats-cum-secs-in-proc y)) |
| 662 | diff)))) |
| 663 | |
| 664 | (define* (statprof-display #:optional (port (current-output-port)) |
| 665 | (state (existing-profiler-state))) |
| 666 | "Displays a gprof-like summary of the statistics collected. Unless an |
| 667 | optional @var{port} argument is passed, uses the current output port." |
| 668 | (cond |
| 669 | ((zero? (statprof-sample-count state)) |
| 670 | (format port "No samples recorded.\n")) |
| 671 | (else |
| 672 | (let* ((stats-list (statprof-fold-call-data |
| 673 | (lambda (data prior-value) |
| 674 | (cons (statprof-call-data->stats data) |
| 675 | prior-value)) |
| 676 | '() |
| 677 | state)) |
| 678 | (sorted-stats (sort stats-list stats-sorter))) |
| 679 | |
| 680 | (define (display-stats-line stats) |
| 681 | (format port "~6,2f ~9,2f ~9,2f" |
| 682 | (statprof-stats-%-time-in-proc stats) |
| 683 | (statprof-stats-cum-secs-in-proc stats) |
| 684 | (statprof-stats-self-secs-in-proc stats)) |
| 685 | (if (call-counts state) |
| 686 | (if (statprof-stats-calls stats) |
| 687 | (format port " ~7d ~8,2f ~8,2f " |
| 688 | (statprof-stats-calls stats) |
| 689 | (* 1000 (statprof-stats-self-secs-per-call stats)) |
| 690 | (* 1000 (statprof-stats-cum-secs-per-call stats))) |
| 691 | (format port " ")) |
| 692 | (display " " port)) |
| 693 | (let ((source (statprof-stats-proc-source stats)) |
| 694 | (name (statprof-stats-proc-name stats))) |
| 695 | (when source |
| 696 | (display source port) |
| 697 | (when name |
| 698 | (display ":" port))) |
| 699 | (when name |
| 700 | (display name port)) |
| 701 | (newline port))) |
| 702 | |
| 703 | (if (call-counts state) |
| 704 | (begin |
| 705 | (format port "~5a ~10a ~7a ~8a ~8a ~8a ~8@a\n" |
| 706 | "% " "cumulative" "self" "" "self" "total" "") |
| 707 | (format port "~5a ~9a ~8a ~8a ~8a ~8a ~a\n" |
| 708 | "time" "seconds" "seconds" "calls" "ms/call" "ms/call" "procedure")) |
| 709 | (begin |
| 710 | (format port "~5a ~10a ~7a ~8a\n" |
| 711 | "%" "cumulative" "self" "") |
| 712 | (format port "~5a ~10a ~7a ~a\n" |
| 713 | "time" "seconds" "seconds" "procedure"))) |
| 714 | |
| 715 | (for-each display-stats-line sorted-stats) |
| 716 | |
| 717 | (display "---\n" port) |
| 718 | (simple-format #t "Sample count: ~A\n" (statprof-sample-count state)) |
| 719 | (simple-format #t "Total time: ~A seconds (~A seconds in GC)\n" |
| 720 | (statprof-accumulated-time state) |
| 721 | (/ (gc-time-taken state) |
| 722 | 1.0 internal-time-units-per-second)))))) |
| 723 | |
| 724 | (define* (statprof-display-anomalies #:optional (state |
| 725 | (existing-profiler-state))) |
| 726 | "A sanity check that attempts to detect anomalies in statprof's |
| 727 | statistics.@code{}" |
| 728 | (statprof-fold-call-data |
| 729 | (lambda (data prior-value) |
| 730 | (when (and (call-counts state) |
| 731 | (zero? (call-data-call-count data)) |
| 732 | (positive? (call-data-cum-sample-count data))) |
| 733 | (simple-format #t |
| 734 | "==[~A ~A ~A]\n" |
| 735 | (call-data-name data) |
| 736 | (call-data-call-count data) |
| 737 | (call-data-cum-sample-count data)))) |
| 738 | #f |
| 739 | state) |
| 740 | (simple-format #t "Total time: ~A\n" (statprof-accumulated-time state)) |
| 741 | (simple-format #t "Sample count: ~A\n" (statprof-sample-count state))) |
| 742 | |
| 743 | (define (statprof-display-anomolies) |
| 744 | (issue-deprecation-warning "statprof-display-anomolies is a misspelling. " |
| 745 | "Use statprof-display-anomalies instead.") |
| 746 | (statprof-display-anomalies)) |
| 747 | |
| 748 | (define* (statprof-accumulated-time #:optional (state |
| 749 | (existing-profiler-state))) |
| 750 | "Returns the time accumulated during the last statprof run.@code{}" |
| 751 | (/ (accumulated-time state) 1.0 internal-time-units-per-second)) |
| 752 | |
| 753 | (define* (statprof-sample-count #:optional (state (existing-profiler-state))) |
| 754 | "Returns the number of samples taken during the last statprof run.@code{}" |
| 755 | (sample-count state)) |
| 756 | |
| 757 | (define statprof-call-data-name call-data-name) |
| 758 | (define statprof-call-data-calls call-data-call-count) |
| 759 | (define statprof-call-data-cum-samples call-data-cum-sample-count) |
| 760 | (define statprof-call-data-self-samples call-data-self-sample-count) |
| 761 | |
| 762 | (define* (statprof-fetch-stacks #:optional (state (existing-profiler-state))) |
| 763 | "Returns a list of stacks, as they were captured since the last call |
| 764 | to @code{statprof-reset}." |
| 765 | (stack-samples->callee-lists state)) |
| 766 | |
| 767 | (define procedure=? |
| 768 | (lambda (a b) |
| 769 | (cond |
| 770 | ((eq? a b)) |
| 771 | ((and (program? a) (program? b)) |
| 772 | (eq? (program-code a) (program-code b))) |
| 773 | (else |
| 774 | #f)))) |
| 775 | |
| 776 | ;; tree ::= (car n . tree*) |
| 777 | |
| 778 | (define (lists->trees lists equal?) |
| 779 | (let lp ((in lists) (n-terminal 0) (tails '())) |
| 780 | (cond |
| 781 | ((null? in) |
| 782 | (let ((trees (map (lambda (tail) |
| 783 | (cons (car tail) |
| 784 | (lists->trees (cdr tail) equal?))) |
| 785 | tails))) |
| 786 | (cons (apply + n-terminal (map cadr trees)) |
| 787 | (sort trees |
| 788 | (lambda (a b) (> (cadr a) (cadr b))))))) |
| 789 | ((null? (car in)) |
| 790 | (lp (cdr in) (1+ n-terminal) tails)) |
| 791 | ((find (lambda (x) (equal? (car x) (caar in))) |
| 792 | tails) |
| 793 | => (lambda (tail) |
| 794 | (lp (cdr in) |
| 795 | n-terminal |
| 796 | (assq-set! tails |
| 797 | (car tail) |
| 798 | (cons (cdar in) (cdr tail)))))) |
| 799 | (else |
| 800 | (lp (cdr in) |
| 801 | n-terminal |
| 802 | (acons (caar in) (list (cdar in)) tails)))))) |
| 803 | |
| 804 | (define* (statprof-fetch-call-tree #:optional (state (existing-profiler-state))) |
| 805 | "Return a call tree for the previous statprof run. |
| 806 | |
| 807 | The return value is a list of nodes, each of which is of the type: |
| 808 | @code |
| 809 | node ::= (@var{proc} @var{count} . @var{nodes}) |
| 810 | @end code" |
| 811 | (define (callee->printable callee) |
| 812 | (cond |
| 813 | ((number? callee) |
| 814 | (addr->printable callee (find-program-debug-info callee))) |
| 815 | (else |
| 816 | (with-output-to-string (lambda () (write callee)))))) |
| 817 | (define (memoizev/1 proc table) |
| 818 | (lambda (x) |
| 819 | (cond |
| 820 | ((hashv-get-handle table x) => cdr) |
| 821 | (else |
| 822 | (let ((res (proc x))) |
| 823 | (hashv-set! table x res) |
| 824 | res))))) |
| 825 | (let ((callee->printable (memoizev/1 callee->printable (make-hash-table)))) |
| 826 | (cons #t (lists->trees (map (lambda (callee-list) |
| 827 | (map callee->printable callee-list)) |
| 828 | (stack-samples->callee-lists state)) |
| 829 | equal?)))) |
| 830 | |
| 831 | (define (call-thunk thunk) |
| 832 | (call-with-values (lambda () (thunk)) |
| 833 | (lambda results |
| 834 | (apply values results)))) |
| 835 | |
| 836 | (define* (statprof thunk #:key (loop 1) (hz 100) (count-calls? #f) |
| 837 | (port (current-output-port)) full-stacks?) |
| 838 | "Profile the execution of @var{thunk}, and return its return values. |
| 839 | |
| 840 | The stack will be sampled @var{hz} times per second, and the thunk |
| 841 | itself will be called @var{loop} times. |
| 842 | |
| 843 | If @var{count-calls?} is true, all procedure calls will be recorded. This |
| 844 | operation is somewhat expensive." |
| 845 | |
| 846 | (let ((state (fresh-profiler-state #:count-calls? count-calls? |
| 847 | #:sampling-period |
| 848 | (inexact->exact (round (/ 1e6 hz))) |
| 849 | #:outer-cut |
| 850 | (program-address-range call-thunk)))) |
| 851 | (parameterize ((profiler-state state)) |
| 852 | (dynamic-wind |
| 853 | (lambda () |
| 854 | (statprof-start state)) |
| 855 | (lambda () |
| 856 | (let lp ((i loop)) |
| 857 | (unless (= i 1) |
| 858 | (call-thunk thunk) |
| 859 | (lp (1- i)))) |
| 860 | (call-thunk thunk)) |
| 861 | (lambda () |
| 862 | (statprof-stop state) |
| 863 | (statprof-display port state)))))) |
| 864 | |
| 865 | (define-macro (with-statprof . args) |
| 866 | "Profile the expressions in the body, and return the body's return values. |
| 867 | |
| 868 | Keyword arguments: |
| 869 | |
| 870 | @table @code |
| 871 | @item #:loop |
| 872 | Execute the body @var{loop} number of times, or @code{#f} for no looping |
| 873 | |
| 874 | default: @code{#f} |
| 875 | @item #:hz |
| 876 | Sampling rate |
| 877 | |
| 878 | default: @code{20} |
| 879 | @item #:count-calls? |
| 880 | Whether to instrument each function call (expensive) |
| 881 | |
| 882 | default: @code{#f} |
| 883 | @end table" |
| 884 | (define (kw-arg-ref kw args def) |
| 885 | (cond |
| 886 | ((null? args) (error "Invalid macro body")) |
| 887 | ((keyword? (car args)) |
| 888 | (if (eq? (car args) kw) |
| 889 | (cadr args) |
| 890 | (kw-arg-ref kw (cddr args) def))) |
| 891 | ((eq? kw #f def) ;; asking for the body |
| 892 | args) |
| 893 | (else def))) ;; kw not found |
| 894 | `((@ (statprof) statprof) |
| 895 | (lambda () ,@(kw-arg-ref #f args #f)) |
| 896 | #:loop ,(kw-arg-ref #:loop args 1) |
| 897 | #:hz ,(kw-arg-ref #:hz args 100) |
| 898 | #:count-calls? ,(kw-arg-ref #:count-calls? args #f) |
| 899 | #:full-stacks? ,(kw-arg-ref #:full-stacks? args #f))) |
| 900 | |
| 901 | (define* (gcprof thunk #:key (loop 1) full-stacks? (port (current-output-port))) |
| 902 | "Do an allocation profile of the execution of @var{thunk}. |
| 903 | |
| 904 | The stack will be sampled soon after every garbage collection, yielding |
| 905 | an approximate idea of what is causing allocation in your program. |
| 906 | |
| 907 | Since GC does not occur very frequently, you may need to use the |
| 908 | @var{loop} parameter, to cause @var{thunk} to be called @var{loop} |
| 909 | times." |
| 910 | |
| 911 | (let ((state (fresh-profiler-state #:outer-cut |
| 912 | (program-address-range call-thunk)))) |
| 913 | (parameterize ((profiler-state state)) |
| 914 | (define (gc-callback) |
| 915 | (unless (inside-profiler? state) |
| 916 | (set-inside-profiler?! state #t) |
| 917 | |
| 918 | (let ((stop-time (get-internal-run-time)) |
| 919 | ;; Cut down to gc-callback, and then one before (the |
| 920 | ;; after-gc async). See the note in profile-signal-handler |
| 921 | ;; also. |
| 922 | (stack (or (make-stack #t gc-callback (outer-cut state) 1) |
| 923 | (pk 'what! (make-stack #t))))) |
| 924 | (sample-stack-procs state stack) |
| 925 | (accumulate-time state stop-time) |
| 926 | (set-last-start-time! state (get-internal-run-time))) |
| 927 | |
| 928 | (set-inside-profiler?! state #f))) |
| 929 | |
| 930 | (dynamic-wind |
| 931 | (lambda () |
| 932 | (set-profile-level! state 1) |
| 933 | (set-last-start-time! state (get-internal-run-time)) |
| 934 | (set-gc-time-taken! state (assq-ref (gc-stats) 'gc-time-taken)) |
| 935 | (add-hook! after-gc-hook gc-callback)) |
| 936 | (lambda () |
| 937 | (let lp ((i loop)) |
| 938 | (unless (zero? i) |
| 939 | (call-thunk thunk) |
| 940 | (lp (1- i))))) |
| 941 | (lambda () |
| 942 | (remove-hook! after-gc-hook gc-callback) |
| 943 | (set-gc-time-taken! state |
| 944 | (- (assq-ref (gc-stats) 'gc-time-taken) |
| 945 | (gc-time-taken state))) |
| 946 | (accumulate-time state (get-internal-run-time)) |
| 947 | (set-profile-level! state 0) |
| 948 | (statprof-display port state)))))) |