Merge commit 'f6ddf827f8f192af7a8cd255bd8374a0d38bbb74'
[bpt/guile.git] / module / system / vm / coverage.scm
1 ;;; -*- mode: scheme; coding: utf-8; -*-
2 ;;;
3 ;;; Copyright (C) 2010, 2013 Free Software Foundation, Inc.
4 ;;;
5 ;;; This library is free software; you can redistribute it and/or
6 ;;; modify it under the terms of the GNU Lesser General Public
7 ;;; License as published by the Free Software Foundation; either
8 ;;; version 3 of the License, or (at your option) any later version.
9 ;;;
10 ;;; This library is distributed in the hope that it will be useful,
11 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;; Lesser General Public License for more details.
14 ;;;
15 ;;; You should have received a copy of the GNU Lesser General Public
16 ;;; License along with this library; if not, write to the Free Software
17 ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 (define-module (system vm coverage)
20 #:use-module (system vm vm)
21 #:use-module (system vm frame)
22 #:use-module (system vm program)
23 #:use-module (system vm debug)
24 #:use-module (ice-9 format)
25 #:use-module (srfi srfi-1)
26 #:use-module (srfi srfi-9)
27 #:use-module (srfi srfi-9 gnu)
28 #:use-module (srfi srfi-11)
29 #:use-module (srfi srfi-26)
30 #:use-module (ice-9 match)
31 #:export (with-code-coverage
32 coverage-data?
33 instrumented-source-files
34 instrumented/executed-lines
35 line-execution-counts
36 procedure-execution-count
37 coverage-data->lcov))
38
39 ;;; Author: Ludovic Courtès
40 ;;;
41 ;;; Commentary:
42 ;;;
43 ;;; This module provides support to gather code coverage data by instrumenting
44 ;;; the VM.
45 ;;;
46 ;;; Code:
47
48 \f
49 ;;;
50 ;;; Gathering coverage data.
51 ;;;
52
53 (define (with-code-coverage thunk)
54 "Run THUNK, a zero-argument procedure, while instrumenting Guile's VM to
55 collect code coverage data. Return code coverage data and the values returned
56 by THUNK."
57
58 (define ip-counts
59 ;; A table mapping instruction pointers to the number of times they were
60 ;; executed.
61 (make-hash-table 5000))
62
63 (define (collect! frame)
64 ;; Update IP-COUNTS with info from FRAME.
65 (let* ((ip (frame-instruction-pointer frame))
66 (ip-entry (hashv-create-handle! ip-counts ip 0)))
67 (set-cdr! ip-entry (+ (cdr ip-entry) 1))))
68
69 ;; FIXME: It's unclear what the dynamic-wind is for, given that if the
70 ;; VM is different from the current one, continuations will not be
71 ;; resumable.
72 (call-with-values (lambda ()
73 (let ((level (vm-trace-level))
74 (hook (vm-next-hook)))
75 (dynamic-wind
76 (lambda ()
77 (set-vm-trace-level! (+ level 1))
78 (add-hook! hook collect!))
79 (lambda ()
80 (call-with-vm thunk))
81 (lambda ()
82 (set-vm-trace-level! level)
83 (remove-hook! hook collect!)))))
84 (lambda args
85 (apply values (make-coverage-data ip-counts) args))))
86
87
88 \f
89
90 ;;;
91 ;;; Source chunks.
92 ;;;
93
94 (define-record-type <source-chunk>
95 (make-source-chunk base length sources)
96 source-chunk?
97 (base source-chunk-base)
98 (length source-chunk-length)
99 (sources source-chunk-sources))
100
101 (set-record-type-printer!
102 <source-chunk>
103 (lambda (obj port)
104 (format port "<source-chunk #x~x-#x~x>"
105 (source-chunk-base obj)
106 (+ (source-chunk-base obj) (source-chunk-length obj)))))
107
108 (define (compute-source-chunk ctx)
109 "Build a sorted vector of source information for a given debugging
110 context (ELF image). The return value is a @code{<source-chunk>}, which also
111 records the address range to which the source information applies."
112 (make-source-chunk
113 (debug-context-base ctx)
114 (debug-context-length ctx)
115 ;; The source locations are sorted already, but collected in reverse order.
116 (list->vector (reverse! (fold-source-locations cons '() ctx)))))
117
118 (define (all-source-information)
119 "Build and return a vector of source information corresponding to all
120 loaded code. The vector will be sorted by ascending address order."
121 (sort! (list->vector (fold-all-debug-contexts
122 (lambda (ctx seed)
123 (cons (compute-source-chunk ctx) seed))
124 '()))
125 (lambda (x y)
126 (< (source-chunk-base x) (source-chunk-base y)))))
127
128 \f
129 ;;;
130 ;;; Coverage data summary.
131 ;;;
132
133 (define-record-type <coverage-data>
134 (%make-coverage-data ip-counts
135 sources
136 file->procedures
137 file->line-counts)
138 coverage-data?
139
140 ;; Mapping from instruction pointers to the number of times they were
141 ;; executed, as a sorted vector of IP-count pairs.
142 (ip-counts data-ip-counts)
143
144 ;; Complete source census at the time the coverage analysis was run, as a
145 ;; sorted vector of <source-chunk> values.
146 (sources data-sources)
147
148 ;; Mapping from source file names to lists of procedures defined in the file.
149 ;; FIXME.
150 (file->procedures data-file->procedures)
151
152 ;; Mapping from file names to hash tables, which in turn map from line numbers
153 ;; to execution counts.
154 (file->line-counts data-file->line-counts))
155
156 (set-record-type-printer!
157 <coverage-data>
158 (lambda (obj port)
159 (format port "<coverage-data ~x>" (object-address obj))))
160
161 (define (make-coverage-data ip-counts)
162 ;; Return a `coverage-data' object based on the coverage data available in
163 ;; IP-COUNTS. Precompute the other hash tables that make up `coverage-data'
164 ;; objects.
165 (let* ((all-sources (all-source-information))
166 (all-counts (sort! (list->vector (hash-fold acons '() ip-counts))
167 (lambda (x y)
168 (< (car x) (car y)))))
169 (file->procedures (make-hash-table 100))
170 (file->line-counts (make-hash-table 100))
171 (data (%make-coverage-data all-counts
172 all-sources
173 file->procedures
174 file->line-counts)))
175
176 (define (observe-execution-count! file line count)
177 ;; Make the execution count of FILE:LINE the maximum of its current value
178 ;; and COUNT. This is so that LINE's execution count is correct when
179 ;; several instruction pointers map to LINE.
180 (when file
181 (let ((file-entry (hash-create-handle! file->line-counts file #f)))
182 (if (not (cdr file-entry))
183 (set-cdr! file-entry (make-hash-table 500)))
184 (let ((line-entry (hashv-create-handle! (cdr file-entry) line 0)))
185 (set-cdr! line-entry (max (cdr line-entry) count))))))
186
187 ;; First, visit every known source location and mark it as instrumented but
188 ;; unvisited.
189 ;;
190 ;; FIXME: This is not always necessary. It's important to have the ability
191 ;; to know when a source location is not reached, but sometimes all we need
192 ;; to know is that a particular site *was* reached. In that case we
193 ;; wouldn't need to load up all the DWARF sections. As it is, though, we
194 ;; use the complete source census as part of the later phase.
195 (let visit-chunk ((chunk-idx 0))
196 (when (< chunk-idx (vector-length all-sources))
197 (match (vector-ref all-sources chunk-idx)
198 (($ <source-chunk> base chunk-length chunk-sources)
199 (let visit-source ((source-idx 0))
200 (when (< source-idx (vector-length chunk-sources))
201 (let ((s (vector-ref chunk-sources source-idx)))
202 (observe-execution-count! (source-file s) (source-line s) 0)
203 (visit-source (1+ source-idx)))))))
204 (visit-chunk (1+ chunk-idx))))
205
206 ;; Then, visit the measured execution counts, walking the complete source
207 ;; census at the same time. This allows us to map observed addresses to
208 ;; source locations. Record observed execution counts.
209 (let visit-chunk ((chunk-idx 0) (count-idx 0))
210 (when (< chunk-idx (vector-length all-sources))
211 (match (vector-ref all-sources chunk-idx)
212 (($ <source-chunk> base chunk-length chunk-sources)
213 (let visit-count ((count-idx count-idx) (source-idx 0) (source #f))
214 (when (< count-idx (vector-length all-counts))
215 (match (vector-ref all-counts count-idx)
216 ((ip . count)
217 (cond
218 ((< ip base)
219 ;; Address before chunk base; no corresponding source.
220 (visit-count (1+ count-idx) source-idx source))
221 ((< ip (+ base chunk-length))
222 ;; Address in chunk; count it.
223 (let visit-source ((source-idx source-idx) (source source))
224 (define (finish)
225 (when source
226 (observe-execution-count! (source-file source)
227 (source-line source)
228 count))
229 (visit-count (1+ count-idx) source-idx source))
230 (cond
231 ((< source-idx (vector-length chunk-sources))
232 (let ((source* (vector-ref chunk-sources source-idx)))
233 (if (<= (source-pre-pc source*) ip)
234 (visit-source (1+ source-idx) source*)
235 (finish))))
236 (else
237 (finish)))))
238 (else
239 ;; Address past chunk; fetch the next chunk.
240 (visit-chunk (1+ chunk-idx) count-idx)))))))))))
241
242 data))
243
244 (define (procedure-execution-count data proc)
245 "Return the number of times PROC's code was executed, according to DATA. When
246 PROC is a closure, the number of times its code was executed is returned, not
247 the number of times this code associated with this particular closure was
248 executed."
249 (define (binary-search v key val)
250 (let lp ((start 0) (end (vector-length v)))
251 (and (not (eqv? start end))
252 (let* ((idx (floor/ (+ start end) 2))
253 (elt (vector-ref v idx))
254 (val* (key elt)))
255 (cond
256 ((< val val*)
257 (lp start idx))
258 ((< val* val)
259 (lp (1+ idx) end))
260 (else elt))))))
261 (and (program? proc)
262 (match (binary-search (data-ip-counts data) car (program-code proc))
263 (#f 0)
264 ((ip . code) code))))
265
266 (define (instrumented/executed-lines data file)
267 "Return the number of instrumented and the number of executed source lines in
268 FILE according to DATA."
269 (define instr+exec
270 (and=> (hash-ref (data-file->line-counts data) file)
271 (lambda (line-counts)
272 (hash-fold (lambda (line count instr+exec)
273 (let ((instr (car instr+exec))
274 (exec (cdr instr+exec)))
275 (cons (+ 1 instr)
276 (if (> count 0)
277 (+ 1 exec)
278 exec))))
279 '(0 . 0)
280 line-counts))))
281
282 (values (car instr+exec) (cdr instr+exec)))
283
284 (define (line-execution-counts data file)
285 "Return a list of line number/execution count pairs for FILE, or #f if FILE
286 is not among the files covered by DATA."
287 (and=> (hash-ref (data-file->line-counts data) file)
288 (lambda (line-counts)
289 (hash-fold alist-cons '() line-counts))))
290
291 (define (instrumented-source-files data)
292 "Return the list of `instrumented' source files, i.e., source files whose code
293 was loaded at the time DATA was collected."
294 (hash-fold (lambda (file counts files)
295 (cons file files))
296 '()
297 (data-file->line-counts data)))
298
299 \f
300 ;;;
301 ;;; LCOV output.
302 ;;;
303
304 (define* (coverage-data->lcov data port)
305 "Traverse code coverage information DATA, as obtained with
306 `with-code-coverage', and write coverage information in the LCOV format to PORT.
307 The report will include all the modules loaded at the time coverage data was
308 gathered, even if their code was not executed."
309
310 ;; FIXME: Re-enable this code, but using for-each-elf-symbol on each source
311 ;; chunk. Use that to build a map of file -> proc-addr + line + name. Then
312 ;; use something like procedure-execution-count to get the execution count.
313 #;
314 (define (dump-function proc)
315 ;; Dump source location and basic coverage data for PROC.
316 (and (or (program? proc))
317 (let ((sources (program-sources* data proc)))
318 (and (pair? sources)
319 (let* ((line (source:line-for-user (car sources)))
320 (name (or (procedure-name proc)
321 (format #f "anonymous-l~a" line))))
322 (format port "FN:~A,~A~%" line name)
323 (and=> (procedure-execution-count data proc)
324 (lambda (count)
325 (format port "FNDA:~A,~A~%" count name))))))))
326
327 ;; Output per-file coverage data.
328 (format port "TN:~%")
329 (for-each (lambda (file)
330 (let ((path (search-path %load-path file)))
331 (if (string? path)
332 (begin
333 (format port "SF:~A~%" path)
334 #;
335 (for-each dump-function procs)
336 (for-each (lambda (line+count)
337 (let ((line (car line+count))
338 (count (cdr line+count)))
339 (format port "DA:~A,~A~%"
340 (+ 1 line) count)))
341 (line-execution-counts data file))
342 (let-values (((instr exec)
343 (instrumented/executed-lines data file)))
344 (format port "LH: ~A~%" exec)
345 (format port "LF: ~A~%" instr))
346 (format port "end_of_record~%"))
347 (begin
348 (format (current-error-port)
349 "skipping unknown source file: ~a~%"
350 file)))))
351 (instrumented-source-files data)))