X-Git-Url: http://git.hcoop.net/bpt/guile.git/blobdiff_plain/fd5dfcce807482a8c46e6c47cc6b2fb97c04fd74..d76d80d23cc001c6582fa5ca40e552815311335a:/module/statprof.scm diff --git a/module/statprof.scm b/module/statprof.scm index b43210533..e613aad2d 100644 --- a/module/statprof.scm +++ b/module/statprof.scm @@ -1,7 +1,7 @@ ;;;; (statprof) -- a statistical profiler for Guile ;;;; -*-scheme-*- ;;;; -;;;; Copyright (C) 2009, 2010, 2011, 2013, 2014 Free Software Foundation, Inc. +;;;; Copyright (C) 2009, 2010, 2011, 2013-2015 Free Software Foundation, Inc. ;;;; Copyright (C) 2004, 2009 Andy Wingo ;;;; Copyright (C) 2001 Rob Browning ;;;; @@ -28,17 +28,14 @@ ;;; A simple use of statprof would look like this: ;;; ;;; @example -;;; (statprof-reset 0 50000 #t) -;;; (statprof-start) -;;; (do-something) -;;; (statprof-stop) -;;; (statprof-display) +;;; (statprof (lambda () (do-something)) +;;; #:hz 100 +;;; #:count-calls? #t) ;;; @end example ;;; -;;; This would reset statprof, clearing all accumulated statistics, then -;;; start profiling, run some code, stop profiling, and finally display a -;;; gprof flat-style table of statistics which will look something like -;;; this: +;;; This would run the thunk with statistical profiling, finally +;;; displaying a gprof flat-style table of statistics which could +;;; something like this: ;;; ;;; @example ;;; % cumulative self self total @@ -115,6 +112,7 @@ #:autoload (ice-9 format) (format) #:use-module (system vm vm) #:use-module (system vm frame) + #:use-module (system vm debug) #:use-module (system vm program) #:export (statprof-active? statprof-start @@ -132,6 +130,7 @@ statprof-call-data->stats statprof-stats-proc-name + statprof-stats-proc-source statprof-stats-%-time-in-proc statprof-stats-cum-secs-in-proc statprof-stats-self-secs-in-proc @@ -140,7 +139,8 @@ statprof-stats-cum-secs-per-call statprof-display - statprof-display-anomolies + statprof-display-anomalies + statprof-display-anomolies ; Deprecated spelling. statprof-fetch-stacks statprof-fetch-call-tree @@ -151,20 +151,96 @@ gcprof)) -;; This profiler tracks two numbers for every function called while -;; it's active. It tracks the total number of calls, and the number -;; of times the function was active when the sampler fired. -;; -;; Globally the profiler tracks the total time elapsed and the number -;; of times the sampler was fired. -;; -;; Right now, this profiler is not per-thread and is not thread safe. +;;; ~ Implementation notes ~ +;;; +;;; Statprof can be divided into two pieces: data collection and data +;;; analysis. +;;; +;;; The data collection runs concurrently with the program, and is +;;; designed to be as cheap as possible. The main data collection +;;; instrument is the stack sampler, driven by SIGPROF signals that are +;;; scheduled with periodic setitimer calls. The stack sampler simply +;;; looks at every frame on the stack, and writes a representation of +;;; the frame's procedure into a growable buffer. +;;; +;;; For most frames, this representation is the instruction pointer of +;;; that frame, because it's cheap to get and you can map from +;;; instruction pointer to procedure fairly cheaply. This won't +;;; distinguish between different closures which share the same code, +;;; but that is usually what we want anyway. +;;; +;;; One case in which we do want to distinguish closures is the case of +;;; primitive procedures. If slot 0 in the frame is a primitive +;;; procedure, we record the procedure's name into the buffer instead of +;;; the IP. It's fairly cheap to check whether a value is a primitive +;;; procedure, and then get its name, as its name is stored in the +;;; closure data. Calling procedure-name in the stack sampler isn't +;;; something you want to do for other kinds of procedures, though, as +;;; that involves grovelling the debug information. +;;; +;;; The other part of data collection is the exact call counter, which +;;; uses the VM's "apply" hook to record each procedure call. +;;; Naturally, this is quite expensive, and it is off by default. +;;; Running code at every procedure call effectively penalizes procedure +;;; calls. Still, it's useful sometimes. If the profiler state has a +;;; call-counts table, then calls will be counted. As with the stack +;;; counter, usually the key in the hash table is the code pointer of +;;; the procedure being called, except for primitive procedures, in +;;; which case it is the name of the primitive. The call counter can +;;; also see calls of non-programs, for example in the case of +;;; applicable structs. In that case the key is the procedure itself. +;;; +;;; After collection is finished, the data can be analyzed. The first +;;; step is usually to run over the stack traces, tabulating sample +;;; counts by procedure; the stack-samples->procedure-data does that. +;;; The result of stack-samples->procedure-data is a hash table mapping +;;; procedures to "call data" records. The call data values are exposed +;;; to users via the statprof-fold-call-data procedure. +;;; +;;; Usually all the analysis is triggered by calling statprof-display, +;;; or having the statprof procedure call it for you. +;;; +;;; The other thing we can do is to look at the stacks themselves, for +;;; example via statprof-fetch-call-tree. +;;; + +;;; ~ Threads and state ~ +;;; +;;; The state of the profiler is contained in a record, which is +;;; bound to a thread-local parameter. The accurate call counter uses +;;; the VM apply hook, which is also local to the current thread, so all +;;; is good there. +;;; +;;; The problem comes in the statistical stack sampler's use of +;;; `setitimer' and SIGPROF. The timer manipulated by setitimer is a +;;; whole-process timer, so it decrements as other threads execute, +;;; which is the wrong thing if you want to profile just one thread. On +;;; the other hand, SIGPROF is delivered to the process as a whole, +;;; which is fine given Guile's signal-handling thread, but then only +;;; delivered to the thread running statprof, which isn't the right +;;; thing if you want to profile the whole system. +;;; +;;; The summary is that statprof works more or less well as a per-thread +;;; profiler if no other threads are running on their own when +;;; profiling. If the other threads are running on behalf of the thread +;;; being profiled (as via futures or parallel marking) things still +;;; mostly work as expected. You can run statprof in one thread, +;;; finish, and then run statprof in another thread, and the profile +;;; runs won't affect each other. But if you want true per-thread +;;; profiles when other things are happening in the process, including +;;; other statprof runs, or whole-process profiles with per-thread +;;; breakdowns, the use of setitimer currently prevents that. +;;; +;;; The solution would be to switch to POSIX.1-2001's timer_create(2), +;;; and to add some more threading-related API to statprof. Some other +;;; day. +;;; (define-record-type (make-state accumulated-time last-start-time sample-count - sampling-frequency remaining-prof-time profile-level - count-calls? gc-time-taken record-full-stacks? - stacks procedure-data inside-profiler?) + sampling-period remaining-prof-time profile-level + call-counts gc-time-taken inside-profiler? + prev-sigprof-handler outer-cut buffer buffer-pos) state? ;; Total time so far. (accumulated-time accumulated-time set-accumulated-time!) @@ -172,34 +248,44 @@ (last-start-time last-start-time set-last-start-time!) ;; Total count of sampler calls. (sample-count sample-count set-sample-count!) - ;; (seconds . microseconds) - (sampling-frequency sampling-frequency set-sampling-frequency!) + ;; Microseconds. + (sampling-period sampling-period set-sampling-period!) ;; Time remaining when prof suspended. (remaining-prof-time remaining-prof-time set-remaining-prof-time!) ;; For user start/stop nesting. (profile-level profile-level set-profile-level!) - ;; Whether to catch apply-frame. - (count-calls? count-calls? set-count-calls?!) + ;; Hash table mapping ip -> call count, or #f if not counting calls. + (call-counts call-counts set-call-counts!) ;; GC time between statprof-start and statprof-stop. (gc-time-taken gc-time-taken set-gc-time-taken!) - ;; If #t, stash away the stacks for future analysis. - (record-full-stacks? record-full-stacks? set-record-full-stacks?!) - ;; If record-full-stacks?, the stashed full stacks. - (stacks stacks set-stacks!) - ;; A hash where the key is the function object itself and the value is - ;; the data. The data will be a vector like this: - ;; #(name call-count cum-sample-count self-sample-count) - (procedure-data procedure-data set-procedure-data!) ;; True if we are inside the profiler. - (inside-profiler? inside-profiler? set-inside-profiler?!)) + (inside-profiler? inside-profiler? set-inside-profiler?!) + ;; Previous sigprof handler. + (prev-sigprof-handler prev-sigprof-handler set-prev-sigprof-handler!) + ;; Outer stack cut, or 0. + (outer-cut outer-cut) + ;; Stack samples. + (buffer buffer set-buffer!) + (buffer-pos buffer-pos set-buffer-pos!)) (define profiler-state (make-parameter #f)) +(define (fresh-buffer) + (make-vector 1024 #f)) + +(define (expand-buffer buf) + (let* ((size (vector-length buf)) + (new (make-vector (* size 2) #f))) + (vector-move-left! buf 0 (vector-length buf) new 0) + new)) + (define* (fresh-profiler-state #:key (count-calls? #f) - (sampling-frequency '(0 . 10000)) - (full-stacks? #f)) - (make-state 0 #f 0 sampling-frequency #f 0 count-calls? 0 #f '() - (make-hash-table) #f)) + (sampling-period 10000) + (outer-cut 0)) + (make-state 0 #f 0 + sampling-period 0 0 + (and count-calls? (make-hash-table)) 0 #f + #f outer-cut (fresh-buffer) 0)) (define (ensure-profiler-state) (or (profiler-state) @@ -211,133 +297,94 @@ (or (profiler-state) (error "expected there to be a profiler state"))) -(define-record-type call-data - (make-call-data proc call-count cum-sample-count self-sample-count) - call-data? - (proc call-data-proc) - (call-count call-data-call-count set-call-data-call-count!) - (cum-sample-count call-data-cum-sample-count set-call-data-cum-sample-count!) - (self-sample-count call-data-self-sample-count set-call-data-self-sample-count!)) - -(define (call-data-name cd) (procedure-name (call-data-proc cd))) -(define (call-data-printable cd) - (or (call-data-name cd) - (with-output-to-string (lambda () (write (call-data-proc cd)))))) - -(define (inc-call-data-call-count! cd) - (set-call-data-call-count! cd (1+ (call-data-call-count cd)))) -(define (inc-call-data-cum-sample-count! cd) - (set-call-data-cum-sample-count! cd (1+ (call-data-cum-sample-count cd)))) -(define (inc-call-data-self-sample-count! cd) - (set-call-data-self-sample-count! cd (1+ (call-data-self-sample-count cd)))) - (define (accumulate-time state stop-time) (set-accumulated-time! state (+ (accumulated-time state) (- stop-time (last-start-time state))))) -(define (get-call-data state proc) - (let ((k (cond - ((program? proc) (program-code proc)) - (else proc)))) - (or (hashv-ref (procedure-data state) k) - (let ((call-data (make-call-data proc 0 0 0))) - (hashv-set! (procedure-data state) k call-data) - call-data)))) - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; SIGPROF handler -;; FIXME: Instead of this messing about with hash tables and -;; frame-procedure, just record the stack of return addresses into a -;; growable vector, and resolve them to procedures when analyzing -;; instead of at collection time. -;; (define (sample-stack-procs state stack) - (let ((stacklen (stack-length stack)) - (hit-count-call? #f)) - - (when (record-full-stacks? state) - (set-stacks! state (cons stack (stacks state)))) - - (set-sample-count! state (+ (sample-count state) 1)) - ;; Now accumulate stats for the whole stack. - (let loop ((frame (stack-ref stack 0)) - (procs-seen (make-hash-table 13)) - (self #f)) - (cond - ((not frame) - (hash-fold - (lambda (proc val accum) - (inc-call-data-cum-sample-count! - (get-call-data state proc))) - #f - procs-seen) - (and=> (and=> self (lambda (proc) - (get-call-data state proc))) - inc-call-data-self-sample-count!)) - ((frame-procedure frame) - => (lambda (proc) - (cond - ((eq? proc count-call) - ;; We're not supposed to be sampling count-call and - ;; its sub-functions, so loop again with a clean - ;; slate. - (set! hit-count-call? #t) - (loop (frame-previous frame) (make-hash-table 13) #f)) - (else - (hashq-set! procs-seen proc #t) - (loop (frame-previous frame) - procs-seen - (or self proc)))))) - (else - (loop (frame-previous frame) procs-seen self)))) - hit-count-call?)) - -(define (profile-signal-handler sig) - (define state (existing-profiler-state)) + (set-sample-count! state (+ (sample-count state) 1)) + + (let lp ((frame (stack-ref stack 0)) + (len (stack-length stack)) + (buffer (buffer state)) + (pos (buffer-pos state))) + (define (write-sample sample) + (vector-set! buffer pos sample)) + (define (continue pos) + (lp (frame-previous frame) (1- len) buffer pos)) + (define (write-sample-and-continue sample) + (write-sample sample) + (continue (1+ pos))) + (cond + ((= pos (vector-length buffer)) + (lp frame len (expand-buffer buffer) pos)) + ((or (zero? len) (not frame)) + (write-sample #f) + (set-buffer! state buffer) + (set-buffer-pos! state (1+ pos))) + (else + (let ((proc (frame-procedure frame))) + (write-sample-and-continue (if (primitive? proc) + (procedure-name proc) + (frame-instruction-pointer frame)))))))) + +(define (reset-sigprof-timer usecs) + ;; Guile's setitimer binding is terrible. + (let ((prev (setitimer ITIMER_PROF 0 0 0 usecs))) + (+ (* (caadr prev) #e1e6) (cdadr prev)))) + +(define profile-signal-handler + (let () + (define (profile-signal-handler sig) + (define state (existing-profiler-state)) + + (set-inside-profiler?! state #t) + + (when (positive? (profile-level state)) + (let* ((stop-time (get-internal-run-time)) + ;; Cut down to the signal handler. Note that this will + ;; only work if statprof.scm is compiled; otherwise we + ;; get `eval' on the stack instead, because if it's not + ;; compiled, profile-signal-handler is a thunk that + ;; tail-calls eval. For the same reason we define the + ;; handler in an inner letrec, so that the compiler sees + ;; the inner reference to profile-signal-handler as the + ;; same as the procedure, and therefore keeps slot 0 + ;; alive. Nastiness, that. + (stack + (or (make-stack #t profile-signal-handler (outer-cut state)) + (pk 'what! (make-stack #t))))) + + (sample-stack-procs state stack) + (accumulate-time state stop-time) + (set-last-start-time! state (get-internal-run-time)) - (set-inside-profiler?! state #t) - - ;; FIXME: with-statprof should be able to set an outer frame for the - ;; stack cut - (when (positive? (profile-level state)) - (let* ((stop-time (get-internal-run-time)) - ;; cut down to the signal handler. note that this will only - ;; work if statprof.scm is compiled; otherwise we get - ;; `eval' on the stack instead, because if it's not - ;; compiled, profile-signal-handler is a thunk that - ;; tail-calls eval. perhaps we should always compile the - ;; signal handler instead... - (stack (or (make-stack #t profile-signal-handler) - (pk 'what! (make-stack #t))))) - - (sample-stack-procs state stack) - (accumulate-time state stop-time) - (set-last-start-time! state (get-internal-run-time)) + (reset-sigprof-timer (sampling-period state)))) - (setitimer ITIMER_PROF - 0 0 - (car (sampling-frequency state)) - (cdr (sampling-frequency state))))) - - (set-inside-profiler?! state #f)) + (set-inside-profiler?! state #f)) + profile-signal-handler)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Count total calls. (define (count-call frame) - (define state (existing-profiler-state)) + (let ((state (existing-profiler-state))) + (unless (inside-profiler? state) + (accumulate-time state (get-internal-run-time)) - (unless (inside-profiler? state) - (accumulate-time state (get-internal-run-time)) + (let* ((key (let ((proc (frame-procedure frame))) + (cond + ((primitive? proc) (procedure-name proc)) + ((program? proc) (program-code proc)) + (else proc)))) + (handle (hashv-create-handle! (call-counts state) key 0))) + (set-cdr! handle (1+ (cdr handle)))) - (and=> (frame-procedure frame) - (lambda (proc) - (inc-call-data-call-count! - (get-call-data state proc)))) - - (set-last-start-time! state (get-internal-run-time)))) + (set-last-start-time! state (get-internal-run-time))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -348,50 +395,44 @@ than @code{statprof-stop}, @code{#f} otherwise." (and state (positive? (profile-level state)))) ;; Do not call this from statprof internal functions -- user only. -(define (statprof-start) +(define* (statprof-start #:optional (state (ensure-profiler-state))) "Start the profiler.@code{}" ;; After some head-scratching, I don't *think* I need to mask/unmask ;; signals here, but if I'm wrong, please let me know. - (define state (ensure-profiler-state)) (set-profile-level! state (+ (profile-level state) 1)) (when (= (profile-level state) 1) - (let* ((rpt (remaining-prof-time state)) - (use-rpt? (and rpt - (or (positive? (car rpt)) - (positive? (cdr rpt)))))) - (set-remaining-prof-time! state #f) + (let ((rpt (remaining-prof-time state))) + (set-remaining-prof-time! state 0) ;; FIXME: Use per-thread run time. (set-last-start-time! state (get-internal-run-time)) (set-gc-time-taken! state (assq-ref (gc-stats) 'gc-time-taken)) - (if use-rpt? - (setitimer ITIMER_PROF 0 0 (car rpt) (cdr rpt)) - (setitimer ITIMER_PROF - 0 0 - (car (sampling-frequency state)) - (cdr (sampling-frequency state)))) - (when (count-calls? state) - (add-hook! (vm-apply-hook) count-call)) - (set-vm-trace-level! (1+ (vm-trace-level))) + (let ((prev (sigaction SIGPROF profile-signal-handler))) + (set-prev-sigprof-handler! state (car prev))) + (reset-sigprof-timer (if (zero? rpt) (sampling-period state) rpt)) + (when (call-counts state) + (add-hook! (vm-apply-hook) count-call) + (set-vm-trace-level! (1+ (vm-trace-level)))) #t))) ;; Do not call this from statprof internal functions -- user only. -(define (statprof-stop) +(define* (statprof-stop #:optional (state (ensure-profiler-state))) "Stop the profiler.@code{}" ;; After some head-scratching, I don't *think* I need to mask/unmask ;; signals here, but if I'm wrong, please let me know. - (define state (ensure-profiler-state)) (set-profile-level! state (- (profile-level state) 1)) (when (zero? (profile-level state)) + (when (call-counts state) + (set-vm-trace-level! (1- (vm-trace-level))) + (remove-hook! (vm-apply-hook) count-call)) (set-gc-time-taken! state (- (assq-ref (gc-stats) 'gc-time-taken) (gc-time-taken state))) - (set-vm-trace-level! (1- (vm-trace-level))) - (when (count-calls? state) - (remove-hook! (vm-apply-hook) count-call)) ;; I believe that we need to do this before getting the time ;; (unless we want to make things even more complicated). - (set-remaining-prof-time! state (setitimer ITIMER_PROF 0 0 0 0)) + (set-remaining-prof-time! state (reset-sigprof-timer 0)) (accumulate-time state (get-internal-run-time)) + (sigaction SIGPROF (prev-sigprof-handler state)) + (set-prev-sigprof-handler! state #f) (set-last-start-time! state #f))) (define* (statprof-reset sample-seconds sample-microseconds count-calls? @@ -399,21 +440,140 @@ than @code{statprof-stop}, @code{#f} otherwise." "Reset the statprof sampler interval to @var{sample-seconds} and @var{sample-microseconds}. If @var{count-calls?} is true, arrange to instrument procedure calls as well as collecting statistical profiling -data. If @var{full-stacks?} is true, collect all sampled stacks into a -list for later analysis. - -Enables traps and debugging as necessary." +data. (The optional @var{full-stacks?} argument is deprecated; statprof +always collects full stacks.)" (when (statprof-active?) (error "Can't reset profiler while profiler is running.")) - (let ((state (fresh-profiler-state #:count-calls? count-calls? - #:sampling-frequency - (cons sample-seconds sample-microseconds) - #:full-stacks? full-stacks?))) - (profiler-state state) - (sigaction SIGPROF profile-signal-handler) - #t)) - -(define (statprof-fold-call-data proc init) + (profiler-state + (fresh-profiler-state #:count-calls? count-calls? + #:sampling-period (+ (* sample-seconds #e1e6) + sample-microseconds))) + (values)) + +(define-record-type call-data + (make-call-data name printable source + call-count cum-sample-count self-sample-count) + call-data? + (name call-data-name) + (printable call-data-printable) + (source call-data-source) + (call-count call-data-call-count set-call-data-call-count!) + (cum-sample-count call-data-cum-sample-count set-call-data-cum-sample-count!) + (self-sample-count call-data-self-sample-count set-call-data-self-sample-count!)) + +(define (source->string source) + (format #f "~a:~a:~a" + (or (source-file source) "") + (source-line-for-user source) + (source-column source))) + +(define (program-debug-info-printable pdi) + (let* ((addr (program-debug-info-addr pdi)) + (name (or (and=> (program-debug-info-name pdi) symbol->string) + (string-append "#x" (number->string addr 16)))) + (loc (and=> (find-source-for-addr addr) source->string))) + (if loc + (string-append name " at " loc) + name))) + +(define (addr->pdi addr cache) + (cond + ((hashv-get-handle cache addr) => cdr) + (else + (let ((data (find-program-debug-info addr))) + (hashv-set! cache addr data) + data)))) + +(define (addr->printable addr pdi) + (or (and=> (and=> pdi program-debug-info-name) symbol->string) + (string-append "anon #x" (number->string addr 16)))) + +(define (inc-call-data-cum-sample-count! cd) + (set-call-data-cum-sample-count! cd (1+ (call-data-cum-sample-count cd)))) +(define (inc-call-data-self-sample-count! cd) + (set-call-data-self-sample-count! cd (1+ (call-data-self-sample-count cd)))) + +(define (stack-samples->procedure-data state) + (let ((table (make-hash-table)) + (addr-cache (make-hash-table)) + (call-counts (call-counts state)) + (buffer (buffer state)) + (len (buffer-pos state))) + (define (addr->call-data addr) + (let* ((pdi (addr->pdi addr addr-cache)) + (entry (if pdi (program-debug-info-addr pdi) addr))) + (or (hashv-ref table entry) + (let ((data (make-call-data (and=> pdi program-debug-info-name) + (addr->printable entry pdi) + (find-source-for-addr entry) + (and call-counts + (hashv-ref call-counts entry)) + 0 + 0))) + (hashv-set! table entry data) + data)))) + + (define (callee->call-data callee) + (cond + ((number? callee) (addr->call-data callee)) + ((hashv-ref table callee)) + (else + (let ((data (make-call-data + (cond ((procedure? callee) (procedure-name callee)) + ;; a primitive + ((symbol? callee) callee) + (else #f)) + (with-output-to-string (lambda () (write callee))) + #f + (and call-counts (hashv-ref call-counts callee)) + 0 + 0))) + (hashv-set! table callee data) + data)))) + + (when call-counts + (hash-for-each (lambda (callee count) + (callee->call-data callee)) + call-counts)) + + (let visit-stacks ((pos 0)) + (cond + ((< pos len) + ;; FIXME: if we are counting all procedure calls, and + ;; count-call is on the stack, we need to not count the part + ;; of the stack that is within count-call. + (inc-call-data-self-sample-count! + (callee->call-data (vector-ref buffer pos))) + (let visit-stack ((pos pos)) + (cond + ((vector-ref buffer pos) + => (lambda (callee) + (inc-call-data-cum-sample-count! (callee->call-data callee)) + (visit-stack (1+ pos)))) + (else + (visit-stacks (1+ pos)))))) + (else table))))) + +(define (stack-samples->callee-lists state) + (let ((buffer (buffer state)) + (len (buffer-pos state))) + (let visit-stacks ((pos 0) (out '())) + (cond + ((< pos len) + ;; FIXME: if we are counting all procedure calls, and + ;; count-call is on the stack, we need to not count the part + ;; of the stack that is within count-call. + (let visit-stack ((pos pos) (stack '())) + (cond + ((vector-ref buffer pos) + => (lambda (callee) + (visit-stack (1+ pos) (cons callee stack)))) + (else + (visit-stacks (1+ pos) (cons (reverse stack) out)))))) + (else (reverse out)))))) + +(define* (statprof-fold-call-data proc init #:optional + (state (existing-profiler-state))) "Fold @var{proc} over the call-data accumulated by statprof. Cannot be called while statprof is active. @var{proc} should take two arguments, @code{(@var{call-data} @var{prior-result})}. @@ -426,60 +586,69 @@ it represents different functions with the same name." (lambda (key value prior-result) (proc value prior-result)) init - (procedure-data (existing-profiler-state)))) + (stack-samples->procedure-data state))) -(define (statprof-proc-call-data proc) +(define* (statprof-proc-call-data proc #:optional + (state (existing-profiler-state))) "Returns the call-data associated with @var{proc}, or @code{#f} if none is available." (when (statprof-active?) (error "Can't call statprof-proc-call-data while profiler is running.")) - (get-call-data (existing-profiler-state) proc)) + (hashv-ref (stack-samples->procedure-data state) + (cond + ((primitive? proc) (procedure-name proc)) + ((program? proc) (program-code proc)) + (else (program-code proc))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Stats +(define-record-type stats + (make-stats proc-name proc-source + %-time-in-proc cum-secs-in-proc self-secs-in-proc + calls self-secs-per-call cum-secs-per-call) + stats? + (proc-name statprof-stats-proc-name) + (proc-source statprof-stats-proc-source) + (%-time-in-proc statprof-stats-%-time-in-proc) + (cum-secs-in-proc statprof-stats-cum-secs-in-proc) + (self-secs-in-proc statprof-stats-self-secs-in-proc) + (calls statprof-stats-calls) + (self-secs-per-call statprof-stats-self-secs-per-call) + (cum-secs-per-call statprof-stats-cum-secs-per-call)) + (define (statprof-call-data->stats call-data) "Returns an object of type @code{statprof-stats}." - ;; returns (vector proc-name - ;; %-time-in-proc - ;; cum-seconds-in-proc - ;; self-seconds-in-proc - ;; num-calls - ;; self-secs-per-call - ;; total-secs-per-call) - (define state (existing-profiler-state)) - (let* ((proc-name (call-data-printable call-data)) + (let* ((proc-name (call-data-name call-data)) + (proc-source (and=> (call-data-source call-data) source->string)) (self-samples (call-data-self-sample-count call-data)) (cum-samples (call-data-cum-sample-count call-data)) - (all-samples (statprof-sample-count)) - (secs-per-sample (/ (statprof-accumulated-time) - (statprof-sample-count))) - (num-calls (and (count-calls? state) (statprof-call-data-calls call-data)))) - - (vector proc-name - (* (/ self-samples all-samples) 100.0) - (* cum-samples secs-per-sample 1.0) - (* self-samples secs-per-sample 1.0) - num-calls - (and num-calls ;; maybe we only sampled in children - (if (zero? self-samples) 0.0 - (/ (* self-samples secs-per-sample) 1.0 num-calls))) - (and num-calls ;; cum-samples must be positive - (/ (* cum-samples secs-per-sample) - 1.0 - ;; num-calls might be 0 if we entered statprof during the - ;; dynamic extent of the call - (max num-calls 1)))))) - -(define (statprof-stats-proc-name stats) (vector-ref stats 0)) -(define (statprof-stats-%-time-in-proc stats) (vector-ref stats 1)) -(define (statprof-stats-cum-secs-in-proc stats) (vector-ref stats 2)) -(define (statprof-stats-self-secs-in-proc stats) (vector-ref stats 3)) -(define (statprof-stats-calls stats) (vector-ref stats 4)) -(define (statprof-stats-self-secs-per-call stats) (vector-ref stats 5)) -(define (statprof-stats-cum-secs-per-call stats) (vector-ref stats 6)) + (all-samples (statprof-sample-count state)) + (secs-per-sample (/ (statprof-accumulated-time state) + (statprof-sample-count state))) + (num-calls (and (call-counts state) + (statprof-call-data-calls call-data)))) + + (make-stats (or proc-name + ;; If there is no name and no source, fall back to + ;; printable. + (and (not proc-source) (call-data-printable call-data))) + proc-source + (* (/ self-samples all-samples) 100.0) + (* cum-samples secs-per-sample 1.0) + (* self-samples secs-per-sample 1.0) + num-calls + (and num-calls ;; maybe we only sampled in children + (if (zero? self-samples) 0.0 + (/ (* self-samples secs-per-sample) 1.0 num-calls))) + (and num-calls ;; cum-samples must be positive + (/ (* cum-samples secs-per-sample) + 1.0 + ;; num-calls might be 0 if we entered statprof during the + ;; dynamic extent of the call + (max num-calls 1)))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -492,67 +661,73 @@ none is available." (statprof-stats-cum-secs-in-proc y)) diff)))) -(define* (statprof-display #:optional (port (current-output-port))) +(define* (statprof-display #:optional (port (current-output-port)) + (state (existing-profiler-state))) "Displays a gprof-like summary of the statistics collected. Unless an optional @var{port} argument is passed, uses the current output port." - (define state (existing-profiler-state)) - (cond - ((zero? (statprof-sample-count)) + ((zero? (statprof-sample-count state)) (format port "No samples recorded.\n")) (else (let* ((stats-list (statprof-fold-call-data (lambda (data prior-value) (cons (statprof-call-data->stats data) prior-value)) - '())) + '() + state)) (sorted-stats (sort stats-list stats-sorter))) (define (display-stats-line stats) - (if (count-calls? state) - (format port "~6,2f ~9,2f ~9,2f ~7d ~8,2f ~8,2f " - (statprof-stats-%-time-in-proc stats) - (statprof-stats-cum-secs-in-proc stats) - (statprof-stats-self-secs-in-proc stats) - (statprof-stats-calls stats) - (* 1000 (statprof-stats-self-secs-per-call stats)) - (* 1000 (statprof-stats-cum-secs-per-call stats))) - (format port "~6,2f ~9,2f ~9,2f " - (statprof-stats-%-time-in-proc stats) - (statprof-stats-cum-secs-in-proc stats) - (statprof-stats-self-secs-in-proc stats))) - (display (statprof-stats-proc-name stats) port) - (newline port)) + (format port "~6,2f ~9,2f ~9,2f" + (statprof-stats-%-time-in-proc stats) + (statprof-stats-cum-secs-in-proc stats) + (statprof-stats-self-secs-in-proc stats)) + (if (call-counts state) + (if (statprof-stats-calls stats) + (format port " ~7d ~8,2f ~8,2f " + (statprof-stats-calls stats) + (* 1000 (statprof-stats-self-secs-per-call stats)) + (* 1000 (statprof-stats-cum-secs-per-call stats))) + (format port " ")) + (display " " port)) + (let ((source (statprof-stats-proc-source stats)) + (name (statprof-stats-proc-name stats))) + (when source + (display source port) + (when name + (display ":" port))) + (when name + (display name port)) + (newline port))) - (if (count-calls? state) + (if (call-counts state) (begin (format port "~5a ~10a ~7a ~8a ~8a ~8a ~8@a\n" "% " "cumulative" "self" "" "self" "total" "") - (format port "~5a ~9a ~8a ~8a ~8a ~8a ~8@a\n" - "time" "seconds" "seconds" "calls" "ms/call" "ms/call" "name")) + (format port "~5a ~9a ~8a ~8a ~8a ~8a ~a\n" + "time" "seconds" "seconds" "calls" "ms/call" "ms/call" "procedure")) (begin - (format port "~5a ~10a ~7a ~8@a\n" + (format port "~5a ~10a ~7a ~8a\n" "%" "cumulative" "self" "") - (format port "~5a ~10a ~7a ~8@a\n" - "time" "seconds" "seconds" "name"))) + (format port "~5a ~10a ~7a ~a\n" + "time" "seconds" "seconds" "procedure"))) (for-each display-stats-line sorted-stats) (display "---\n" port) - (simple-format #t "Sample count: ~A\n" (statprof-sample-count)) + (simple-format #t "Sample count: ~A\n" (statprof-sample-count state)) (simple-format #t "Total time: ~A seconds (~A seconds in GC)\n" - (statprof-accumulated-time) + (statprof-accumulated-time state) (/ (gc-time-taken state) 1.0 internal-time-units-per-second)))))) -(define (statprof-display-anomolies) - "A sanity check that attempts to detect anomolies in statprof's +(define* (statprof-display-anomalies #:optional (state + (existing-profiler-state))) + "A sanity check that attempts to detect anomalies in statprof's statistics.@code{}" - (define state (existing-profiler-state)) - (statprof-fold-call-data (lambda (data prior-value) - (when (and (count-calls? state) + (when (and (call-counts state) (zero? (call-data-call-count data)) (positive? (call-data-cum-sample-count data))) (simple-format #t @@ -560,35 +735,34 @@ statistics.@code{}" (call-data-name data) (call-data-call-count data) (call-data-cum-sample-count data)))) - #f) - (simple-format #t "Total time: ~A\n" (statprof-accumulated-time)) - (simple-format #t "Sample count: ~A\n" (statprof-sample-count))) + #f + state) + (simple-format #t "Total time: ~A\n" (statprof-accumulated-time state)) + (simple-format #t "Sample count: ~A\n" (statprof-sample-count state))) + +(define (statprof-display-anomolies) + (issue-deprecation-warning "statprof-display-anomolies is a misspelling. " + "Use statprof-display-anomalies instead.") + (statprof-display-anomalies)) -(define (statprof-accumulated-time) +(define* (statprof-accumulated-time #:optional (state + (existing-profiler-state))) "Returns the time accumulated during the last statprof run.@code{}" - (when (statprof-active?) - (error "Can't get accumulated time while profiler is running.")) - (/ (accumulated-time (existing-profiler-state)) 1.0 internal-time-units-per-second)) + (/ (accumulated-time state) 1.0 internal-time-units-per-second)) -(define (statprof-sample-count) +(define* (statprof-sample-count #:optional (state (existing-profiler-state))) "Returns the number of samples taken during the last statprof run.@code{}" - (when (statprof-active?) - (error "Can't get sample count while profiler is running.")) - (sample-count (existing-profiler-state))) + (sample-count state)) (define statprof-call-data-name call-data-name) (define statprof-call-data-calls call-data-call-count) (define statprof-call-data-cum-samples call-data-cum-sample-count) (define statprof-call-data-self-samples call-data-self-sample-count) -(define (statprof-fetch-stacks) +(define* (statprof-fetch-stacks #:optional (state (existing-profiler-state))) "Returns a list of stacks, as they were captured since the last call -to @code{statprof-reset}. - -Note that stacks are only collected if the @var{full-stacks?} argument -to @code{statprof-reset} is true." - (define state (existing-profiler-state)) - (stacks state)) +to @code{statprof-reset}." + (stack-samples->callee-lists state)) (define procedure=? (lambda (a b) @@ -627,58 +801,69 @@ to @code{statprof-reset} is true." n-terminal (acons (caar in) (list (cdar in)) tails)))))) -(define (stack->procedures stack) - (filter identity - (unfold-right (lambda (x) (not x)) - frame-procedure - frame-previous - (stack-ref stack 0)))) - -(define (statprof-fetch-call-tree) +(define* (statprof-fetch-call-tree #:optional (state (existing-profiler-state))) "Return a call tree for the previous statprof run. The return value is a list of nodes, each of which is of the type: @code node ::= (@var{proc} @var{count} . @var{nodes}) @end code" - (define state (existing-profiler-state)) - (cons #t (lists->trees (map stack->procedures (stacks state)) procedure=?))) + (define (callee->printable callee) + (cond + ((number? callee) + (addr->printable callee (find-program-debug-info callee))) + (else + (with-output-to-string (lambda () (write callee)))))) + (define (memoizev/1 proc table) + (lambda (x) + (cond + ((hashv-get-handle table x) => cdr) + (else + (let ((res (proc x))) + (hashv-set! table x res) + res))))) + (let ((callee->printable (memoizev/1 callee->printable (make-hash-table)))) + (cons #t (lists->trees (map (lambda (callee-list) + (map callee->printable callee-list)) + (stack-samples->callee-lists state)) + equal?)))) + +(define (call-thunk thunk) + (call-with-values (lambda () (thunk)) + (lambda results + (apply values results)))) (define* (statprof thunk #:key (loop 1) (hz 100) (count-calls? #f) - (full-stacks? #f)) - "Profiles the execution of @var{thunk}. + (port (current-output-port)) full-stacks?) + "Profile the execution of @var{thunk}, and return its return values. -The stack will be sampled @var{hz} times per second, and the thunk itself will -be called @var{loop} times. +The stack will be sampled @var{hz} times per second, and the thunk +itself will be called @var{loop} times. If @var{count-calls?} is true, all procedure calls will be recorded. This -operation is somewhat expensive. - -If @var{full-stacks?} is true, at each sample, statprof will store away the -whole call tree, for later analysis. Use @code{statprof-fetch-stacks} or -@code{statprof-fetch-call-tree} to retrieve the last-stored stacks." +operation is somewhat expensive." - (let ((state (fresh-profiler-state))) + (let ((state (fresh-profiler-state #:count-calls? count-calls? + #:sampling-period + (inexact->exact (round (/ 1e6 hz))) + #:outer-cut + (program-address-range call-thunk)))) (parameterize ((profiler-state state)) (dynamic-wind (lambda () - (statprof-reset (inexact->exact (floor (/ 1 hz))) - (inexact->exact (* 1e6 (- (/ 1 hz) - (floor (/ 1 hz))))) - count-calls? - full-stacks?) - (statprof-start)) + (statprof-start state)) (lambda () (let lp ((i loop)) - (unless (zero? i) - (thunk) - (lp (1- i))))) + (unless (= i 1) + (call-thunk thunk) + (lp (1- i)))) + (call-thunk thunk)) (lambda () - (statprof-stop) - (statprof-display)))))) + (statprof-stop state) + (statprof-display port state)))))) (define-macro (with-statprof . args) - "Profiles the expressions in its body. + "Profile the expressions in the body, and return the body's return values. Keyword arguments: @@ -694,10 +879,6 @@ default: @code{20} @item #:count-calls? Whether to instrument each function call (expensive) -default: @code{#f} -@item #:full-stacks? -Whether to collect away all sampled stacks into a list - default: @code{#f} @end table" (define (kw-arg-ref kw args def) @@ -717,7 +898,7 @@ default: @code{#f} #:count-calls? ,(kw-arg-ref #:count-calls? args #f) #:full-stacks? ,(kw-arg-ref #:full-stacks? args #f))) -(define* (gcprof thunk #:key (loop 1) (full-stacks? #f)) +(define* (gcprof thunk #:key (loop 1) full-stacks? (port (current-output-port))) "Do an allocation profile of the execution of @var{thunk}. The stack will be sampled soon after every garbage collection, yielding @@ -725,74 +906,43 @@ an approximate idea of what is causing allocation in your program. Since GC does not occur very frequently, you may need to use the @var{loop} parameter, to cause @var{thunk} to be called @var{loop} -times. - -If @var{full-stacks?} is true, at each sample, statprof will store away the -whole call tree, for later analysis. Use @code{statprof-fetch-stacks} or -@code{statprof-fetch-call-tree} to retrieve the last-stored stacks." +times." - (let ((state (fresh-profiler-state))) + (let ((state (fresh-profiler-state #:outer-cut + (program-address-range call-thunk)))) (parameterize ((profiler-state state)) - - (define (reset) - (when (positive? (profile-level state)) - (error "Can't reset profiler while profiler is running.")) - (set-accumulated-time! state 0) - (set-last-start-time! state #f) - (set-sample-count! state 0) - (set-count-calls?! state #f) - (set-procedure-data! state (make-hash-table 131)) - (set-record-full-stacks?! state full-stacks?) - (set-stacks! state '())) - (define (gc-callback) - (cond - ((inside-profiler? state)) - (else + (unless (inside-profiler? state) (set-inside-profiler?! state #t) - ;; FIXME: should be able to set an outer frame for the stack cut (let ((stop-time (get-internal-run-time)) ;; Cut down to gc-callback, and then one before (the ;; after-gc async). See the note in profile-signal-handler ;; also. - (stack (or (make-stack #t gc-callback 0 1) + (stack (or (make-stack #t gc-callback (outer-cut state) 1) (pk 'what! (make-stack #t))))) (sample-stack-procs state stack) (accumulate-time state stop-time) (set-last-start-time! state (get-internal-run-time))) - - (set-inside-profiler?! state #f)))) - - (define (start) - (set-profile-level! state (+ (profile-level state) 1)) - (when (= (profile-level state) 1) - (set-remaining-prof-time! state #f) - (set-last-start-time! state (get-internal-run-time)) - (set-gc-time-taken! state (assq-ref (gc-stats) 'gc-time-taken)) - (add-hook! after-gc-hook gc-callback) - (set-vm-trace-level! (1+ (vm-trace-level))) - #t)) - (define (stop) - (set-profile-level! state (- (profile-level state) 1)) - (when (zero? (profile-level state)) - (set-gc-time-taken! state - (- (assq-ref (gc-stats) 'gc-time-taken) - (gc-time-taken state))) - (remove-hook! after-gc-hook gc-callback) - (accumulate-time state (get-internal-run-time)) - (set-last-start-time! state #f))) + (set-inside-profiler?! state #f))) (dynamic-wind (lambda () - (reset) - (start)) + (set-profile-level! state 1) + (set-last-start-time! state (get-internal-run-time)) + (set-gc-time-taken! state (assq-ref (gc-stats) 'gc-time-taken)) + (add-hook! after-gc-hook gc-callback)) (lambda () (let lp ((i loop)) (unless (zero? i) - (thunk) + (call-thunk thunk) (lp (1- i))))) (lambda () - (stop) - (statprof-display)))))) + (remove-hook! after-gc-hook gc-callback) + (set-gc-time-taken! state + (- (assq-ref (gc-stats) 'gc-time-taken) + (gc-time-taken state))) + (accumulate-time state (get-internal-run-time)) + (set-profile-level! state 0) + (statprof-display port state))))))