Commit | Line | Data |
---|---|---|
47f3ce52 AW |
1 | ;;;; (statprof) -- a statistical profiler for Guile |
2 | ;;;; -*-scheme-*- | |
3 | ;;;; | |
998f8494 | 4 | ;;;; Copyright (C) 2009, 2010, 2011, 2013, 2014 Free Software Foundation, Inc. |
47f3ce52 AW |
5 | ;;;; Copyright (C) 2004, 2009 Andy Wingo <wingo at pobox dot com> |
6 | ;;;; Copyright (C) 2001 Rob Browning <rlb at defaultvalue dot org> | |
7 | ;;;; | |
8 | ;;;; This library is free software; you can redistribute it and/or | |
9 | ;;;; modify it under the terms of the GNU Lesser General Public | |
10 | ;;;; License as published by the Free Software Foundation; either | |
11 | ;;;; version 3 of the License, or (at your option) any later version. | |
12 | ;;;; | |
13 | ;;;; This library is distributed in the hope that it will be useful, | |
14 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | ;;;; Lesser General Public License for more details. | |
17 | ;;;; | |
18 | ;;;; You should have received a copy of the GNU Lesser General Public | |
19 | ;;;; License along with this library; if not, write to the Free Software | |
20 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | ;;;; | |
22 | \f | |
23 | ||
24 | ;;; Commentary: | |
998f8494 | 25 | ;;; |
62fd93e2 | 26 | ;;; @code{(statprof)} is a statistical profiler for Guile. |
998f8494 AW |
27 | ;;; |
28 | ;;; A simple use of statprof would look like this: | |
29 | ;;; | |
30 | ;;; @example | |
31 | ;;; (statprof-reset 0 50000 #t) | |
32 | ;;; (statprof-start) | |
33 | ;;; (do-something) | |
34 | ;;; (statprof-stop) | |
35 | ;;; (statprof-display) | |
36 | ;;; @end example | |
37 | ;;; | |
38 | ;;; This would reset statprof, clearing all accumulated statistics, then | |
39 | ;;; start profiling, run some code, stop profiling, and finally display a | |
40 | ;;; gprof flat-style table of statistics which will look something like | |
41 | ;;; this: | |
42 | ;;; | |
43 | ;;; @example | |
44 | ;;; % cumulative self self total | |
45 | ;;; time seconds seconds calls ms/call ms/call name | |
46 | ;;; 35.29 0.23 0.23 2002 0.11 0.11 - | |
47 | ;;; 23.53 0.15 0.15 2001 0.08 0.08 positive? | |
48 | ;;; 23.53 0.15 0.15 2000 0.08 0.08 + | |
49 | ;;; 11.76 0.23 0.08 2000 0.04 0.11 do-nothing | |
50 | ;;; 5.88 0.64 0.04 2001 0.02 0.32 loop | |
51 | ;;; 0.00 0.15 0.00 1 0.00 150.59 do-something | |
52 | ;;; ... | |
53 | ;;; @end example | |
54 | ;;; | |
55 | ;;; All of the numerical data with the exception of the calls column is | |
56 | ;;; statistically approximate. In the following column descriptions, and | |
57 | ;;; in all of statprof, "time" refers to execution time (both user and | |
58 | ;;; system), not wall clock time. | |
59 | ;;; | |
60 | ;;; @table @asis | |
61 | ;;; @item % time | |
62 | ;;; The percent of the time spent inside the procedure itself | |
63 | ;;; (not counting children). | |
64 | ;;; @item cumulative seconds | |
65 | ;;; The total number of seconds spent in the procedure, including | |
66 | ;;; children. | |
67 | ;;; @item self seconds | |
68 | ;;; The total number of seconds spent in the procedure itself (not counting | |
69 | ;;; children). | |
70 | ;;; @item calls | |
71 | ;;; The total number of times the procedure was called. | |
72 | ;;; @item self ms/call | |
73 | ;;; The average time taken by the procedure itself on each call, in ms. | |
74 | ;;; @item total ms/call | |
75 | ;;; The average time taken by each call to the procedure, including time | |
76 | ;;; spent in child functions. | |
77 | ;;; @item name | |
78 | ;;; The name of the procedure. | |
79 | ;;; @end table | |
80 | ;;; | |
81 | ;;; The profiler uses @code{eq?} and the procedure object itself to | |
82 | ;;; identify the procedures, so it won't confuse different procedures with | |
83 | ;;; the same name. They will show up as two different rows in the output. | |
84 | ;;; | |
85 | ;;; Right now the profiler is quite simplistic. I cannot provide | |
86 | ;;; call-graphs or other higher level information. What you see in the | |
87 | ;;; table is pretty much all there is. Patches are welcome :-) | |
88 | ;;; | |
89 | ;;; @section Implementation notes | |
90 | ;;; | |
91 | ;;; The profiler works by setting the unix profiling signal | |
92 | ;;; @code{ITIMER_PROF} to go off after the interval you define in the call | |
93 | ;;; to @code{statprof-reset}. When the signal fires, a sampling routine is | |
94 | ;;; run which looks at the current procedure that's executing, and then | |
95 | ;;; crawls up the stack, and for each procedure encountered, increments | |
96 | ;;; that procedure's sample count. Note that if a procedure is encountered | |
97 | ;;; multiple times on a given stack, it is only counted once. After the | |
98 | ;;; sampling is complete, the profiler resets profiling timer to fire | |
99 | ;;; again after the appropriate interval. | |
100 | ;;; | |
101 | ;;; Meanwhile, the profiler keeps track, via @code{get-internal-run-time}, | |
102 | ;;; how much CPU time (system and user -- which is also what | |
103 | ;;; @code{ITIMER_PROF} tracks), has elapsed while code has been executing | |
104 | ;;; within a statprof-start/stop block. | |
105 | ;;; | |
106 | ;;; The profiler also tries to avoid counting or timing its own code as | |
107 | ;;; much as possible. | |
108 | ;;; | |
47f3ce52 AW |
109 | ;;; Code: |
110 | ||
47f3ce52 AW |
111 | (define-module (statprof) |
112 | #:use-module (srfi srfi-1) | |
62fd93e2 | 113 | #:use-module (srfi srfi-9) |
e4a8775d | 114 | #:use-module (srfi srfi-9 gnu) |
47f3ce52 | 115 | #:autoload (ice-9 format) (format) |
e1138ba1 AW |
116 | #:use-module (system vm vm) |
117 | #:use-module (system vm frame) | |
118 | #:use-module (system vm program) | |
47f3ce52 AW |
119 | #:export (statprof-active? |
120 | statprof-start | |
121 | statprof-stop | |
122 | statprof-reset | |
123 | ||
124 | statprof-accumulated-time | |
125 | statprof-sample-count | |
126 | statprof-fold-call-data | |
127 | statprof-proc-call-data | |
128 | statprof-call-data-name | |
129 | statprof-call-data-calls | |
130 | statprof-call-data-cum-samples | |
131 | statprof-call-data-self-samples | |
132 | statprof-call-data->stats | |
133 | ||
134 | statprof-stats-proc-name | |
135 | statprof-stats-%-time-in-proc | |
136 | statprof-stats-cum-secs-in-proc | |
137 | statprof-stats-self-secs-in-proc | |
138 | statprof-stats-calls | |
139 | statprof-stats-self-secs-per-call | |
140 | statprof-stats-cum-secs-per-call | |
141 | ||
142 | statprof-display | |
143 | statprof-display-anomolies | |
144 | ||
145 | statprof-fetch-stacks | |
146 | statprof-fetch-call-tree | |
147 | ||
e1138ba1 | 148 | statprof |
2d239a78 AW |
149 | with-statprof |
150 | ||
151 | gcprof)) | |
47f3ce52 AW |
152 | |
153 | ||
154 | ;; This profiler tracks two numbers for every function called while | |
155 | ;; it's active. It tracks the total number of calls, and the number | |
156 | ;; of times the function was active when the sampler fired. | |
157 | ;; | |
158 | ;; Globally the profiler tracks the total time elapsed and the number | |
159 | ;; of times the sampler was fired. | |
160 | ;; | |
161 | ;; Right now, this profiler is not per-thread and is not thread safe. | |
162 | ||
62fd93e2 AW |
163 | (define-record-type <state> |
164 | (make-state accumulated-time last-start-time sample-count | |
165 | sampling-frequency remaining-prof-time profile-level | |
166 | count-calls? gc-time-taken record-full-stacks? | |
56bfce7c | 167 | stacks procedure-data inside-profiler?) |
62fd93e2 AW |
168 | state? |
169 | ;; Total time so far. | |
170 | (accumulated-time accumulated-time set-accumulated-time!) | |
171 | ;; Start-time when timer is active. | |
172 | (last-start-time last-start-time set-last-start-time!) | |
173 | ;; Total count of sampler calls. | |
174 | (sample-count sample-count set-sample-count!) | |
175 | ;; (seconds . microseconds) | |
176 | (sampling-frequency sampling-frequency set-sampling-frequency!) | |
177 | ;; Time remaining when prof suspended. | |
178 | (remaining-prof-time remaining-prof-time set-remaining-prof-time!) | |
179 | ;; For user start/stop nesting. | |
180 | (profile-level profile-level set-profile-level!) | |
181 | ;; Whether to catch apply-frame. | |
182 | (count-calls? count-calls? set-count-calls?!) | |
183 | ;; GC time between statprof-start and statprof-stop. | |
184 | (gc-time-taken gc-time-taken set-gc-time-taken!) | |
185 | ;; If #t, stash away the stacks for future analysis. | |
186 | (record-full-stacks? record-full-stacks? set-record-full-stacks?!) | |
187 | ;; If record-full-stacks?, the stashed full stacks. | |
188 | (stacks stacks set-stacks!) | |
189 | ;; A hash where the key is the function object itself and the value is | |
190 | ;; the data. The data will be a vector like this: | |
191 | ;; #(name call-count cum-sample-count self-sample-count) | |
56bfce7c AW |
192 | (procedure-data procedure-data set-procedure-data!) |
193 | ;; True if we are inside the profiler. | |
194 | (inside-profiler? inside-profiler? set-inside-profiler?!)) | |
62fd93e2 AW |
195 | |
196 | (define profiler-state (make-parameter #f)) | |
197 | ||
4eb1fb9b AW |
198 | (define* (fresh-profiler-state #:key (count-calls? #f) |
199 | (sampling-frequency '(0 . 10000)) | |
200 | (full-stacks? #f)) | |
3476a369 | 201 | (make-state 0 #f 0 sampling-frequency #f 0 count-calls? 0 #f '() |
4eb1fb9b AW |
202 | (make-hash-table) #f)) |
203 | ||
62fd93e2 AW |
204 | (define (ensure-profiler-state) |
205 | (or (profiler-state) | |
4eb1fb9b | 206 | (let ((state (fresh-profiler-state))) |
62fd93e2 AW |
207 | (profiler-state state) |
208 | state))) | |
47f3ce52 | 209 | |
45a7de82 AW |
210 | (define (existing-profiler-state) |
211 | (or (profiler-state) | |
212 | (error "expected there to be a profiler state"))) | |
213 | ||
e70a42d4 AW |
214 | (define-record-type call-data |
215 | (make-call-data proc call-count cum-sample-count self-sample-count) | |
216 | call-data? | |
217 | (proc call-data-proc) | |
218 | (call-count call-data-call-count set-call-data-call-count!) | |
219 | (cum-sample-count call-data-cum-sample-count set-call-data-cum-sample-count!) | |
220 | (self-sample-count call-data-self-sample-count set-call-data-self-sample-count!)) | |
221 | ||
c165c50d AW |
222 | (define (call-data-name cd) (procedure-name (call-data-proc cd))) |
223 | (define (call-data-printable cd) | |
224 | (or (call-data-name cd) | |
225 | (with-output-to-string (lambda () (write (call-data-proc cd)))))) | |
47f3ce52 | 226 | |
47f3ce52 | 227 | (define (inc-call-data-call-count! cd) |
e70a42d4 | 228 | (set-call-data-call-count! cd (1+ (call-data-call-count cd)))) |
47f3ce52 | 229 | (define (inc-call-data-cum-sample-count! cd) |
e70a42d4 | 230 | (set-call-data-cum-sample-count! cd (1+ (call-data-cum-sample-count cd)))) |
47f3ce52 | 231 | (define (inc-call-data-self-sample-count! cd) |
e70a42d4 | 232 | (set-call-data-self-sample-count! cd (1+ (call-data-self-sample-count cd)))) |
47f3ce52 | 233 | |
62fd93e2 AW |
234 | (define (accumulate-time state stop-time) |
235 | (set-accumulated-time! state | |
236 | (+ (accumulated-time state) | |
62fd93e2 | 237 | (- stop-time (last-start-time state))))) |
47f3ce52 | 238 | |
e4a8775d | 239 | (define (get-call-data state proc) |
0bd6b1ca | 240 | (let ((k (cond |
d1100525 | 241 | ((program? proc) (program-code proc)) |
0bd6b1ca | 242 | (else proc)))) |
62fd93e2 | 243 | (or (hashv-ref (procedure-data state) k) |
663212bb | 244 | (let ((call-data (make-call-data proc 0 0 0))) |
62fd93e2 | 245 | (hashv-set! (procedure-data state) k call-data) |
663212bb | 246 | call-data)))) |
47f3ce52 AW |
247 | |
248 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
249 | ;; SIGPROF handler | |
250 | ||
d20dd74e AW |
251 | ;; FIXME: Instead of this messing about with hash tables and |
252 | ;; frame-procedure, just record the stack of return addresses into a | |
253 | ;; growable vector, and resolve them to procedures when analyzing | |
254 | ;; instead of at collection time. | |
255 | ;; | |
e4a8775d | 256 | (define (sample-stack-procs state stack) |
47f3ce52 | 257 | (let ((stacklen (stack-length stack)) |
e4a8775d | 258 | (hit-count-call? #f)) |
47f3ce52 | 259 | |
cad444e3 AW |
260 | (when (record-full-stacks? state) |
261 | (set-stacks! state (cons stack (stacks state)))) | |
47f3ce52 | 262 | |
62fd93e2 | 263 | (set-sample-count! state (+ (sample-count state) 1)) |
47f3ce52 AW |
264 | ;; Now accumulate stats for the whole stack. |
265 | (let loop ((frame (stack-ref stack 0)) | |
266 | (procs-seen (make-hash-table 13)) | |
267 | (self #f)) | |
268 | (cond | |
269 | ((not frame) | |
270 | (hash-fold | |
271 | (lambda (proc val accum) | |
272 | (inc-call-data-cum-sample-count! | |
e4a8775d | 273 | (get-call-data state proc))) |
47f3ce52 AW |
274 | #f |
275 | procs-seen) | |
e4a8775d AW |
276 | (and=> (and=> self (lambda (proc) |
277 | (get-call-data state proc))) | |
47f3ce52 AW |
278 | inc-call-data-self-sample-count!)) |
279 | ((frame-procedure frame) | |
280 | => (lambda (proc) | |
281 | (cond | |
282 | ((eq? proc count-call) | |
283 | ;; We're not supposed to be sampling count-call and | |
284 | ;; its sub-functions, so loop again with a clean | |
285 | ;; slate. | |
286 | (set! hit-count-call? #t) | |
287 | (loop (frame-previous frame) (make-hash-table 13) #f)) | |
c165c50d | 288 | (else |
47f3ce52 AW |
289 | (hashq-set! procs-seen proc #t) |
290 | (loop (frame-previous frame) | |
291 | procs-seen | |
c165c50d | 292 | (or self proc)))))) |
47f3ce52 AW |
293 | (else |
294 | (loop (frame-previous frame) procs-seen self)))) | |
295 | hit-count-call?)) | |
296 | ||
47f3ce52 | 297 | (define (profile-signal-handler sig) |
45a7de82 | 298 | (define state (existing-profiler-state)) |
62fd93e2 | 299 | |
56bfce7c | 300 | (set-inside-profiler?! state #t) |
47f3ce52 AW |
301 | |
302 | ;; FIXME: with-statprof should be able to set an outer frame for the | |
303 | ;; stack cut | |
cad444e3 AW |
304 | (when (positive? (profile-level state)) |
305 | (let* ((stop-time (get-internal-run-time)) | |
306 | ;; cut down to the signal handler. note that this will only | |
307 | ;; work if statprof.scm is compiled; otherwise we get | |
308 | ;; `eval' on the stack instead, because if it's not | |
309 | ;; compiled, profile-signal-handler is a thunk that | |
310 | ;; tail-calls eval. perhaps we should always compile the | |
311 | ;; signal handler instead... | |
312 | (stack (or (make-stack #t profile-signal-handler) | |
546efe25 AW |
313 | (pk 'what! (make-stack #t))))) |
314 | ||
315 | (sample-stack-procs state stack) | |
316 | (accumulate-time state stop-time) | |
317 | (set-last-start-time! state (get-internal-run-time)) | |
318 | ||
cad444e3 AW |
319 | (setitimer ITIMER_PROF |
320 | 0 0 | |
321 | (car (sampling-frequency state)) | |
546efe25 | 322 | (cdr (sampling-frequency state))))) |
e1138ba1 | 323 | |
56bfce7c | 324 | (set-inside-profiler?! state #f)) |
47f3ce52 AW |
325 | |
326 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
327 | ;; Count total calls. | |
328 | ||
e1138ba1 | 329 | (define (count-call frame) |
45a7de82 | 330 | (define state (existing-profiler-state)) |
62fd93e2 | 331 | |
cad444e3 AW |
332 | (unless (inside-profiler? state) |
333 | (accumulate-time state (get-internal-run-time)) | |
47f3ce52 | 334 | |
cad444e3 AW |
335 | (and=> (frame-procedure frame) |
336 | (lambda (proc) | |
337 | (inc-call-data-call-count! | |
e4a8775d | 338 | (get-call-data state proc)))) |
47f3ce52 | 339 | |
cad444e3 | 340 | (set-last-start-time! state (get-internal-run-time)))) |
47f3ce52 AW |
341 | |
342 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
343 | ||
344 | (define (statprof-active?) | |
345 | "Returns @code{#t} if @code{statprof-start} has been called more times | |
346 | than @code{statprof-stop}, @code{#f} otherwise." | |
45a7de82 AW |
347 | (define state (profiler-state)) |
348 | (and state (positive? (profile-level state)))) | |
47f3ce52 AW |
349 | |
350 | ;; Do not call this from statprof internal functions -- user only. | |
351 | (define (statprof-start) | |
352 | "Start the profiler.@code{}" | |
353 | ;; After some head-scratching, I don't *think* I need to mask/unmask | |
354 | ;; signals here, but if I'm wrong, please let me know. | |
62fd93e2 AW |
355 | (define state (ensure-profiler-state)) |
356 | (set-profile-level! state (+ (profile-level state) 1)) | |
cad444e3 AW |
357 | (when (= (profile-level state) 1) |
358 | (let* ((rpt (remaining-prof-time state)) | |
359 | (use-rpt? (and rpt | |
360 | (or (positive? (car rpt)) | |
361 | (positive? (cdr rpt)))))) | |
362 | (set-remaining-prof-time! state #f) | |
363 | ;; FIXME: Use per-thread run time. | |
364 | (set-last-start-time! state (get-internal-run-time)) | |
3476a369 | 365 | (set-gc-time-taken! state (assq-ref (gc-stats) 'gc-time-taken)) |
cad444e3 AW |
366 | (if use-rpt? |
367 | (setitimer ITIMER_PROF 0 0 (car rpt) (cdr rpt)) | |
368 | (setitimer ITIMER_PROF | |
369 | 0 0 | |
370 | (car (sampling-frequency state)) | |
371 | (cdr (sampling-frequency state)))) | |
372 | (when (count-calls? state) | |
373 | (add-hook! (vm-apply-hook) count-call)) | |
374 | (set-vm-trace-level! (1+ (vm-trace-level))) | |
375 | #t))) | |
47f3ce52 AW |
376 | |
377 | ;; Do not call this from statprof internal functions -- user only. | |
378 | (define (statprof-stop) | |
379 | "Stop the profiler.@code{}" | |
380 | ;; After some head-scratching, I don't *think* I need to mask/unmask | |
381 | ;; signals here, but if I'm wrong, please let me know. | |
62fd93e2 AW |
382 | (define state (ensure-profiler-state)) |
383 | (set-profile-level! state (- (profile-level state) 1)) | |
cad444e3 AW |
384 | (when (zero? (profile-level state)) |
385 | (set-gc-time-taken! state | |
3476a369 | 386 | (- (assq-ref (gc-stats) 'gc-time-taken) |
cad444e3 AW |
387 | (gc-time-taken state))) |
388 | (set-vm-trace-level! (1- (vm-trace-level))) | |
389 | (when (count-calls? state) | |
390 | (remove-hook! (vm-apply-hook) count-call)) | |
391 | ;; I believe that we need to do this before getting the time | |
392 | ;; (unless we want to make things even more complicated). | |
393 | (set-remaining-prof-time! state (setitimer ITIMER_PROF 0 0 0 0)) | |
394 | (accumulate-time state (get-internal-run-time)) | |
395 | (set-last-start-time! state #f))) | |
47f3ce52 | 396 | |
e640b440 AW |
397 | (define* (statprof-reset sample-seconds sample-microseconds count-calls? |
398 | #:optional full-stacks?) | |
47f3ce52 AW |
399 | "Reset the statprof sampler interval to @var{sample-seconds} and |
400 | @var{sample-microseconds}. If @var{count-calls?} is true, arrange to | |
401 | instrument procedure calls as well as collecting statistical profiling | |
402 | data. If @var{full-stacks?} is true, collect all sampled stacks into a | |
403 | list for later analysis. | |
404 | ||
405 | Enables traps and debugging as necessary." | |
4d0c358b | 406 | (when (statprof-active?) |
4eb1fb9b AW |
407 | (error "Can't reset profiler while profiler is running.")) |
408 | (let ((state (fresh-profiler-state #:count-calls? count-calls? | |
409 | #:sampling-frequency | |
410 | (cons sample-seconds sample-microseconds) | |
411 | #:full-stacks? full-stacks?))) | |
412 | (profiler-state state) | |
413 | (sigaction SIGPROF profile-signal-handler) | |
414 | #t)) | |
47f3ce52 AW |
415 | |
416 | (define (statprof-fold-call-data proc init) | |
417 | "Fold @var{proc} over the call-data accumulated by statprof. Cannot be | |
418 | called while statprof is active. @var{proc} should take two arguments, | |
419 | @code{(@var{call-data} @var{prior-result})}. | |
420 | ||
421 | Note that a given proc-name may appear multiple times, but if it does, | |
422 | it represents different functions with the same name." | |
4d0c358b AW |
423 | (when (statprof-active?) |
424 | (error "Can't call statprof-fold-call-data while profiler is running.")) | |
47f3ce52 AW |
425 | (hash-fold |
426 | (lambda (key value prior-result) | |
427 | (proc value prior-result)) | |
428 | init | |
4d0c358b | 429 | (procedure-data (existing-profiler-state)))) |
47f3ce52 AW |
430 | |
431 | (define (statprof-proc-call-data proc) | |
432 | "Returns the call-data associated with @var{proc}, or @code{#f} if | |
433 | none is available." | |
4d0c358b AW |
434 | (when (statprof-active?) |
435 | (error "Can't call statprof-proc-call-data while profiler is running.")) | |
e4a8775d | 436 | (get-call-data (existing-profiler-state) proc)) |
47f3ce52 AW |
437 | |
438 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
439 | ;; Stats | |
440 | ||
441 | (define (statprof-call-data->stats call-data) | |
442 | "Returns an object of type @code{statprof-stats}." | |
443 | ;; returns (vector proc-name | |
444 | ;; %-time-in-proc | |
445 | ;; cum-seconds-in-proc | |
446 | ;; self-seconds-in-proc | |
447 | ;; num-calls | |
448 | ;; self-secs-per-call | |
449 | ;; total-secs-per-call) | |
450 | ||
45a7de82 | 451 | (define state (existing-profiler-state)) |
62fd93e2 | 452 | |
c165c50d | 453 | (let* ((proc-name (call-data-printable call-data)) |
47f3ce52 AW |
454 | (self-samples (call-data-self-sample-count call-data)) |
455 | (cum-samples (call-data-cum-sample-count call-data)) | |
456 | (all-samples (statprof-sample-count)) | |
457 | (secs-per-sample (/ (statprof-accumulated-time) | |
458 | (statprof-sample-count))) | |
62fd93e2 | 459 | (num-calls (and (count-calls? state) (statprof-call-data-calls call-data)))) |
47f3ce52 AW |
460 | |
461 | (vector proc-name | |
462 | (* (/ self-samples all-samples) 100.0) | |
463 | (* cum-samples secs-per-sample 1.0) | |
464 | (* self-samples secs-per-sample 1.0) | |
465 | num-calls | |
466 | (and num-calls ;; maybe we only sampled in children | |
467 | (if (zero? self-samples) 0.0 | |
468 | (/ (* self-samples secs-per-sample) 1.0 num-calls))) | |
469 | (and num-calls ;; cum-samples must be positive | |
e1138ba1 AW |
470 | (/ (* cum-samples secs-per-sample) |
471 | 1.0 | |
472 | ;; num-calls might be 0 if we entered statprof during the | |
473 | ;; dynamic extent of the call | |
474 | (max num-calls 1)))))) | |
47f3ce52 AW |
475 | |
476 | (define (statprof-stats-proc-name stats) (vector-ref stats 0)) | |
477 | (define (statprof-stats-%-time-in-proc stats) (vector-ref stats 1)) | |
478 | (define (statprof-stats-cum-secs-in-proc stats) (vector-ref stats 2)) | |
479 | (define (statprof-stats-self-secs-in-proc stats) (vector-ref stats 3)) | |
480 | (define (statprof-stats-calls stats) (vector-ref stats 4)) | |
481 | (define (statprof-stats-self-secs-per-call stats) (vector-ref stats 5)) | |
482 | (define (statprof-stats-cum-secs-per-call stats) (vector-ref stats 6)) | |
483 | ||
484 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
485 | ||
486 | (define (stats-sorter x y) | |
487 | (let ((diff (- (statprof-stats-self-secs-in-proc x) | |
488 | (statprof-stats-self-secs-in-proc y)))) | |
489 | (positive? | |
490 | (if (= diff 0) | |
491 | (- (statprof-stats-cum-secs-in-proc x) | |
492 | (statprof-stats-cum-secs-in-proc y)) | |
493 | diff)))) | |
494 | ||
62fd93e2 | 495 | (define* (statprof-display #:optional (port (current-output-port))) |
47f3ce52 AW |
496 | "Displays a gprof-like summary of the statistics collected. Unless an |
497 | optional @var{port} argument is passed, uses the current output port." | |
45a7de82 | 498 | (define state (existing-profiler-state)) |
47f3ce52 AW |
499 | |
500 | (cond | |
501 | ((zero? (statprof-sample-count)) | |
502 | (format port "No samples recorded.\n")) | |
503 | (else | |
504 | (let* ((stats-list (statprof-fold-call-data | |
505 | (lambda (data prior-value) | |
506 | (cons (statprof-call-data->stats data) | |
507 | prior-value)) | |
508 | '())) | |
509 | (sorted-stats (sort stats-list stats-sorter))) | |
510 | ||
511 | (define (display-stats-line stats) | |
62fd93e2 | 512 | (if (count-calls? state) |
e1138ba1 | 513 | (format port "~6,2f ~9,2f ~9,2f ~7d ~8,2f ~8,2f " |
47f3ce52 AW |
514 | (statprof-stats-%-time-in-proc stats) |
515 | (statprof-stats-cum-secs-in-proc stats) | |
516 | (statprof-stats-self-secs-in-proc stats) | |
517 | (statprof-stats-calls stats) | |
518 | (* 1000 (statprof-stats-self-secs-per-call stats)) | |
519 | (* 1000 (statprof-stats-cum-secs-per-call stats))) | |
520 | (format port "~6,2f ~9,2f ~9,2f " | |
521 | (statprof-stats-%-time-in-proc stats) | |
522 | (statprof-stats-cum-secs-in-proc stats) | |
523 | (statprof-stats-self-secs-in-proc stats))) | |
524 | (display (statprof-stats-proc-name stats) port) | |
525 | (newline port)) | |
526 | ||
62fd93e2 | 527 | (if (count-calls? state) |
47f3ce52 AW |
528 | (begin |
529 | (format port "~5a ~10a ~7a ~8a ~8a ~8a ~8@a\n" | |
530 | "% " "cumulative" "self" "" "self" "total" "") | |
531 | (format port "~5a ~9a ~8a ~8a ~8a ~8a ~8@a\n" | |
532 | "time" "seconds" "seconds" "calls" "ms/call" "ms/call" "name")) | |
533 | (begin | |
534 | (format port "~5a ~10a ~7a ~8@a\n" | |
535 | "%" "cumulative" "self" "") | |
536 | (format port "~5a ~10a ~7a ~8@a\n" | |
537 | "time" "seconds" "seconds" "name"))) | |
538 | ||
539 | (for-each display-stats-line sorted-stats) | |
540 | ||
541 | (display "---\n" port) | |
542 | (simple-format #t "Sample count: ~A\n" (statprof-sample-count)) | |
543 | (simple-format #t "Total time: ~A seconds (~A seconds in GC)\n" | |
544 | (statprof-accumulated-time) | |
3476a369 AW |
545 | (/ (gc-time-taken state) |
546 | 1.0 internal-time-units-per-second)))))) | |
47f3ce52 AW |
547 | |
548 | (define (statprof-display-anomolies) | |
549 | "A sanity check that attempts to detect anomolies in statprof's | |
550 | statistics.@code{}" | |
45a7de82 | 551 | (define state (existing-profiler-state)) |
62fd93e2 | 552 | |
47f3ce52 AW |
553 | (statprof-fold-call-data |
554 | (lambda (data prior-value) | |
cad444e3 AW |
555 | (when (and (count-calls? state) |
556 | (zero? (call-data-call-count data)) | |
557 | (positive? (call-data-cum-sample-count data))) | |
558 | (simple-format #t | |
559 | "==[~A ~A ~A]\n" | |
560 | (call-data-name data) | |
561 | (call-data-call-count data) | |
562 | (call-data-cum-sample-count data)))) | |
47f3ce52 AW |
563 | #f) |
564 | (simple-format #t "Total time: ~A\n" (statprof-accumulated-time)) | |
565 | (simple-format #t "Sample count: ~A\n" (statprof-sample-count))) | |
566 | ||
567 | (define (statprof-accumulated-time) | |
568 | "Returns the time accumulated during the last statprof run.@code{}" | |
4d0c358b AW |
569 | (when (statprof-active?) |
570 | (error "Can't get accumulated time while profiler is running.")) | |
3476a369 | 571 | (/ (accumulated-time (existing-profiler-state)) 1.0 internal-time-units-per-second)) |
47f3ce52 AW |
572 | |
573 | (define (statprof-sample-count) | |
574 | "Returns the number of samples taken during the last statprof run.@code{}" | |
4d0c358b AW |
575 | (when (statprof-active?) |
576 | (error "Can't get sample count while profiler is running.")) | |
577 | (sample-count (existing-profiler-state))) | |
47f3ce52 AW |
578 | |
579 | (define statprof-call-data-name call-data-name) | |
580 | (define statprof-call-data-calls call-data-call-count) | |
581 | (define statprof-call-data-cum-samples call-data-cum-sample-count) | |
582 | (define statprof-call-data-self-samples call-data-self-sample-count) | |
583 | ||
584 | (define (statprof-fetch-stacks) | |
585 | "Returns a list of stacks, as they were captured since the last call | |
586 | to @code{statprof-reset}. | |
587 | ||
588 | Note that stacks are only collected if the @var{full-stacks?} argument | |
589 | to @code{statprof-reset} is true." | |
45a7de82 | 590 | (define state (existing-profiler-state)) |
62fd93e2 | 591 | (stacks state)) |
47f3ce52 AW |
592 | |
593 | (define procedure=? | |
663212bb AW |
594 | (lambda (a b) |
595 | (cond | |
596 | ((eq? a b)) | |
0bd1e9c6 | 597 | ((and (program? a) (program? b)) |
d1100525 | 598 | (eq? (program-code a) (program-code b))) |
663212bb AW |
599 | (else |
600 | #f)))) | |
47f3ce52 AW |
601 | |
602 | ;; tree ::= (car n . tree*) | |
603 | ||
604 | (define (lists->trees lists equal?) | |
605 | (let lp ((in lists) (n-terminal 0) (tails '())) | |
606 | (cond | |
607 | ((null? in) | |
608 | (let ((trees (map (lambda (tail) | |
609 | (cons (car tail) | |
610 | (lists->trees (cdr tail) equal?))) | |
611 | tails))) | |
612 | (cons (apply + n-terminal (map cadr trees)) | |
613 | (sort trees | |
614 | (lambda (a b) (> (cadr a) (cadr b))))))) | |
615 | ((null? (car in)) | |
616 | (lp (cdr in) (1+ n-terminal) tails)) | |
617 | ((find (lambda (x) (equal? (car x) (caar in))) | |
618 | tails) | |
619 | => (lambda (tail) | |
620 | (lp (cdr in) | |
621 | n-terminal | |
622 | (assq-set! tails | |
623 | (car tail) | |
624 | (cons (cdar in) (cdr tail)))))) | |
625 | (else | |
626 | (lp (cdr in) | |
627 | n-terminal | |
628 | (acons (caar in) (list (cdar in)) tails)))))) | |
629 | ||
630 | (define (stack->procedures stack) | |
631 | (filter identity | |
632 | (unfold-right (lambda (x) (not x)) | |
633 | frame-procedure | |
634 | frame-previous | |
635 | (stack-ref stack 0)))) | |
636 | ||
637 | (define (statprof-fetch-call-tree) | |
638 | "Return a call tree for the previous statprof run. | |
639 | ||
640 | The return value is a list of nodes, each of which is of the type: | |
641 | @code | |
642 | node ::= (@var{proc} @var{count} . @var{nodes}) | |
643 | @end code" | |
45a7de82 | 644 | (define state (existing-profiler-state)) |
62fd93e2 | 645 | (cons #t (lists->trees (map stack->procedures (stacks state)) procedure=?))) |
47f3ce52 | 646 | |
e1138ba1 AW |
647 | (define* (statprof thunk #:key (loop 1) (hz 100) (count-calls? #f) |
648 | (full-stacks? #f)) | |
649 | "Profiles the execution of @var{thunk}. | |
650 | ||
651 | The stack will be sampled @var{hz} times per second, and the thunk itself will | |
652 | be called @var{loop} times. | |
653 | ||
654 | If @var{count-calls?} is true, all procedure calls will be recorded. This | |
655 | operation is somewhat expensive. | |
656 | ||
657 | If @var{full-stacks?} is true, at each sample, statprof will store away the | |
658 | whole call tree, for later analysis. Use @code{statprof-fetch-stacks} or | |
659 | @code{statprof-fetch-call-tree} to retrieve the last-stored stacks." | |
660 | ||
fd5dfcce AW |
661 | (let ((state (fresh-profiler-state))) |
662 | (parameterize ((profiler-state state)) | |
663 | (dynamic-wind | |
664 | (lambda () | |
665 | (statprof-reset (inexact->exact (floor (/ 1 hz))) | |
666 | (inexact->exact (* 1e6 (- (/ 1 hz) | |
667 | (floor (/ 1 hz))))) | |
668 | count-calls? | |
669 | full-stacks?) | |
670 | (statprof-start)) | |
671 | (lambda () | |
672 | (let lp ((i loop)) | |
673 | (unless (zero? i) | |
674 | (thunk) | |
675 | (lp (1- i))))) | |
676 | (lambda () | |
677 | (statprof-stop) | |
678 | (statprof-display)))))) | |
e1138ba1 | 679 | |
47f3ce52 AW |
680 | (define-macro (with-statprof . args) |
681 | "Profiles the expressions in its body. | |
682 | ||
683 | Keyword arguments: | |
684 | ||
685 | @table @code | |
686 | @item #:loop | |
687 | Execute the body @var{loop} number of times, or @code{#f} for no looping | |
688 | ||
689 | default: @code{#f} | |
690 | @item #:hz | |
691 | Sampling rate | |
692 | ||
693 | default: @code{20} | |
694 | @item #:count-calls? | |
695 | Whether to instrument each function call (expensive) | |
696 | ||
697 | default: @code{#f} | |
698 | @item #:full-stacks? | |
699 | Whether to collect away all sampled stacks into a list | |
700 | ||
701 | default: @code{#f} | |
702 | @end table" | |
703 | (define (kw-arg-ref kw args def) | |
704 | (cond | |
705 | ((null? args) (error "Invalid macro body")) | |
706 | ((keyword? (car args)) | |
707 | (if (eq? (car args) kw) | |
708 | (cadr args) | |
709 | (kw-arg-ref kw (cddr args) def))) | |
710 | ((eq? kw #f def) ;; asking for the body | |
711 | args) | |
712 | (else def))) ;; kw not found | |
e1138ba1 AW |
713 | `((@ (statprof) statprof) |
714 | (lambda () ,@(kw-arg-ref #f args #f)) | |
715 | #:loop ,(kw-arg-ref #:loop args 1) | |
716 | #:hz ,(kw-arg-ref #:hz args 100) | |
717 | #:count-calls? ,(kw-arg-ref #:count-calls? args #f) | |
718 | #:full-stacks? ,(kw-arg-ref #:full-stacks? args #f))) | |
719 | ||
2d239a78 AW |
720 | (define* (gcprof thunk #:key (loop 1) (full-stacks? #f)) |
721 | "Do an allocation profile of the execution of @var{thunk}. | |
722 | ||
723 | The stack will be sampled soon after every garbage collection, yielding | |
724 | an approximate idea of what is causing allocation in your program. | |
725 | ||
726 | Since GC does not occur very frequently, you may need to use the | |
727 | @var{loop} parameter, to cause @var{thunk} to be called @var{loop} | |
728 | times. | |
729 | ||
730 | If @var{full-stacks?} is true, at each sample, statprof will store away the | |
731 | whole call tree, for later analysis. Use @code{statprof-fetch-stacks} or | |
732 | @code{statprof-fetch-call-tree} to retrieve the last-stored stacks." | |
733 | ||
fd5dfcce AW |
734 | (let ((state (fresh-profiler-state))) |
735 | (parameterize ((profiler-state state)) | |
736 | ||
737 | (define (reset) | |
738 | (when (positive? (profile-level state)) | |
739 | (error "Can't reset profiler while profiler is running.")) | |
740 | (set-accumulated-time! state 0) | |
741 | (set-last-start-time! state #f) | |
742 | (set-sample-count! state 0) | |
743 | (set-count-calls?! state #f) | |
744 | (set-procedure-data! state (make-hash-table 131)) | |
745 | (set-record-full-stacks?! state full-stacks?) | |
746 | (set-stacks! state '())) | |
747 | ||
748 | (define (gc-callback) | |
749 | (cond | |
750 | ((inside-profiler? state)) | |
751 | (else | |
752 | (set-inside-profiler?! state #t) | |
753 | ||
754 | ;; FIXME: should be able to set an outer frame for the stack cut | |
755 | (let ((stop-time (get-internal-run-time)) | |
756 | ;; Cut down to gc-callback, and then one before (the | |
757 | ;; after-gc async). See the note in profile-signal-handler | |
758 | ;; also. | |
759 | (stack (or (make-stack #t gc-callback 0 1) | |
760 | (pk 'what! (make-stack #t))))) | |
761 | (sample-stack-procs state stack) | |
762 | (accumulate-time state stop-time) | |
763 | (set-last-start-time! state (get-internal-run-time))) | |
2d239a78 | 764 | |
fd5dfcce AW |
765 | (set-inside-profiler?! state #f)))) |
766 | ||
767 | (define (start) | |
768 | (set-profile-level! state (+ (profile-level state) 1)) | |
769 | (when (= (profile-level state) 1) | |
770 | (set-remaining-prof-time! state #f) | |
771 | (set-last-start-time! state (get-internal-run-time)) | |
772 | (set-gc-time-taken! state (assq-ref (gc-stats) 'gc-time-taken)) | |
773 | (add-hook! after-gc-hook gc-callback) | |
774 | (set-vm-trace-level! (1+ (vm-trace-level))) | |
775 | #t)) | |
776 | ||
777 | (define (stop) | |
778 | (set-profile-level! state (- (profile-level state) 1)) | |
779 | (when (zero? (profile-level state)) | |
780 | (set-gc-time-taken! state | |
781 | (- (assq-ref (gc-stats) 'gc-time-taken) | |
782 | (gc-time-taken state))) | |
783 | (remove-hook! after-gc-hook gc-callback) | |
784 | (accumulate-time state (get-internal-run-time)) | |
785 | (set-last-start-time! state #f))) | |
786 | ||
787 | (dynamic-wind | |
788 | (lambda () | |
789 | (reset) | |
790 | (start)) | |
791 | (lambda () | |
792 | (let lp ((i loop)) | |
793 | (unless (zero? i) | |
794 | (thunk) | |
795 | (lp (1- i))))) | |
796 | (lambda () | |
797 | (stop) | |
798 | (statprof-display)))))) |