Rename objcodes?.{scm,c,h} to loader.{scm,c,h}
[bpt/guile.git] / module / system / vm / disassembler.scm
1 ;;; Guile RTL disassembler
2
3 ;;; Copyright (C) 2001, 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
4 ;;;
5 ;;; This library is free software; you can redistribute it and/or
6 ;;; modify it under the terms of the GNU Lesser General Public
7 ;;; License as published by the Free Software Foundation; either
8 ;;; version 3 of the License, or (at your option) any later version.
9 ;;;
10 ;;; This library is distributed in the hope that it will be useful,
11 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;; Lesser General Public License for more details.
14 ;;;
15 ;;; You should have received a copy of the GNU Lesser General Public
16 ;;; License along with this library; if not, write to the Free Software
17 ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 ;;; Code:
20
21 (define-module (system vm disassembler)
22 #:use-module (language rtl)
23 #:use-module (system vm elf)
24 #:use-module (system vm debug)
25 #:use-module (system vm program)
26 #:use-module (system vm loader)
27 #:use-module (system foreign)
28 #:use-module (rnrs bytevectors)
29 #:use-module (ice-9 format)
30 #:use-module (ice-9 match)
31 #:use-module (ice-9 vlist)
32 #:use-module (srfi srfi-1)
33 #:use-module (srfi srfi-4)
34 #:export (disassemble-program
35 fold-program-code
36 disassemble-image
37 disassemble-file))
38
39 (define-syntax-rule (u32-ref buf n)
40 (bytevector-u32-native-ref buf (* n 4)))
41
42 (define-syntax-rule (s32-ref buf n)
43 (bytevector-s32-native-ref buf (* n 4)))
44
45 (define-syntax visit-opcodes
46 (lambda (x)
47 (syntax-case x ()
48 ((visit-opcodes macro arg ...)
49 (with-syntax (((inst ...)
50 (map (lambda (x) (datum->syntax #'macro x))
51 (instruction-list))))
52 #'(begin
53 (macro arg ... . inst)
54 ...))))))
55
56 (eval-when (expand compile load eval)
57 (define (id-append ctx a b)
58 (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b)))))
59
60 (define (unpack-scm n)
61 (pointer->scm (make-pointer n)))
62
63 (define (unpack-s24 s)
64 (if (zero? (logand s (ash 1 23)))
65 s
66 (- s (ash 1 24))))
67
68 (define (unpack-s32 s)
69 (if (zero? (logand s (ash 1 31)))
70 s
71 (- s (ash 1 32))))
72
73 (define-syntax disassembler
74 (lambda (x)
75 (define (parse-first-word word type)
76 (with-syntax ((word word))
77 (case type
78 ((U8_X24)
79 #'())
80 ((U8_U24)
81 #'((ash word -8)))
82 ((U8_L24)
83 #'((unpack-s24 (ash word -8))))
84 ((U8_U8_I16)
85 #'((logand (ash word -8) #xff)
86 (ash word -16)))
87 ((U8_U12_U12)
88 #'((logand (ash word -8) #xfff)
89 (ash word -20)))
90 ((U8_U8_U8_U8)
91 #'((logand (ash word -8) #xff)
92 (logand (ash word -16) #xff)
93 (ash word -24)))
94 (else
95 (error "bad kind" type)))))
96
97 (define (parse-tail-word word type)
98 (with-syntax ((word word))
99 (case type
100 ((U8_X24)
101 #'((logand word #ff)))
102 ((U8_U24)
103 #'((logand word #xff)
104 (ash word -8)))
105 ((U8_L24)
106 #'((logand word #xff)
107 (unpack-s24 (ash word -8))))
108 ((U8_U8_I16)
109 #'((logand word #xff)
110 (logand (ash word -8) #xff)
111 (ash word -16)))
112 ((U8_U12_U12)
113 #'((logand word #xff)
114 (logand (ash word -8) #xfff)
115 (ash word -20)))
116 ((U8_U8_U8_U8)
117 #'((logand word #xff)
118 (logand (ash word -8) #xff)
119 (logand (ash word -16) #xff)
120 (ash word -24)))
121 ((U32)
122 #'(word))
123 ((I32)
124 #'(word))
125 ((A32)
126 #'(word))
127 ((B32)
128 #'(word))
129 ((N32)
130 #'((unpack-s32 word)))
131 ((S32)
132 #'((unpack-s32 word)))
133 ((L32)
134 #'((unpack-s32 word)))
135 ((LO32)
136 #'((unpack-s32 word)))
137 ((X8_U24)
138 #'((ash word -8)))
139 ((X8_U12_U12)
140 #'((logand (ash word -8) #xfff)
141 (ash word -20)))
142 ((X8_L24)
143 #'((unpack-s24 (ash word -8))))
144 ((B1_X7_L24)
145 #'((not (zero? (logand word #x1)))
146 (unpack-s24 (ash word -8))))
147 ((B1_U7_L24)
148 #'((not (zero? (logand word #x1)))
149 (logand (ash word -1) #x7f)
150 (unpack-s24 (ash word -8))))
151 ((B1_X31)
152 #'((not (zero? (logand word #x1)))))
153 ((B1_X7_U24)
154 #'((not (zero? (logand word #x1)))
155 (ash word -8)))
156 (else
157 (error "bad kind" type)))))
158
159 (syntax-case x ()
160 ((_ name opcode word0 word* ...)
161 (let ((vars (generate-temporaries #'(word* ...))))
162 (with-syntax (((word* ...) vars)
163 ((n ...) (map 1+ (iota (length #'(word* ...)))))
164 ((asm ...)
165 (parse-first-word #'first (syntax->datum #'word0)))
166 (((asm* ...) ...)
167 (map (lambda (word type)
168 (parse-tail-word word type))
169 vars
170 (syntax->datum #'(word* ...)))))
171 #'(lambda (buf offset first)
172 (let ((word* (u32-ref buf (+ offset n)))
173 ...)
174 (values (+ 1 (length '(word* ...)))
175 (list 'name asm ... asm* ... ...))))))))))
176
177 (define (disasm-invalid buf offset first)
178 (error "bad instruction" (logand first #xff) first buf offset))
179
180 (define disassemblers (make-vector 256 disasm-invalid))
181
182 (define-syntax define-disassembler
183 (lambda (x)
184 (syntax-case x ()
185 ((_ name opcode kind arg ...)
186 (with-syntax ((parse (id-append #'name #'parse- #'name)))
187 #'(let ((parse (disassembler name opcode arg ...)))
188 (vector-set! disassemblers opcode parse)))))))
189
190 (visit-opcodes define-disassembler)
191
192 ;; -> len list
193 (define (disassemble-one buf offset)
194 (let ((first (u32-ref buf offset)))
195 ((vector-ref disassemblers (logand first #xff)) buf offset first)))
196
197 (define (u32-offset->addr offset context)
198 "Given an offset into an image in 32-bit units, return the absolute
199 address of that offset."
200 (+ (debug-context-base context) (* offset 4)))
201
202 (define (code-annotation code len offset start labels context)
203 ;; FIXME: Print names for register loads and stores that correspond to
204 ;; access to named locals.
205 (define (reference-scm target)
206 (unpack-scm (u32-offset->addr (+ offset target) context)))
207
208 (define (dereference-scm target)
209 (let ((addr (u32-offset->addr (+ offset target)
210 context)))
211 (pointer->scm
212 (dereference-pointer (make-pointer addr)))))
213
214 (match code
215 (((or 'br
216 'br-if-nargs-ne 'br-if-nargs-lt 'br-if-nargs-gt
217 'br-if-true 'br-if-null 'br-if-nil 'br-if-pair 'br-if-struct
218 'br-if-char 'br-if-eq 'br-if-eqv 'br-if-equal
219 'br-if-= 'br-if-< 'br-if-<= 'br-if-> 'br-if->=) _ ... target)
220 (list "-> ~A" (vector-ref labels (- (+ offset target) start))))
221 (('br-if-tc7 slot invert? tc7 target)
222 (list "~A -> ~A"
223 (let ((tag (case tc7
224 ((5) "symbol?")
225 ((7) "variable?")
226 ((13) "vector?")
227 ((15) "string?")
228 ((77) "bytevector?")
229 ((95) "bitvector?")
230 (else (number->string tc7)))))
231 (if invert? (string-append "not " tag) tag))
232 (vector-ref labels (- (+ offset target) start))))
233 (('prompt tag escape-only? proc-slot handler)
234 ;; The H is for handler.
235 (list "H -> ~A" (vector-ref labels (- (+ offset handler) start))))
236 (((or 'make-short-immediate 'make-long-immediate) _ imm)
237 (list "~S" (unpack-scm imm)))
238 (('make-long-long-immediate _ high low)
239 (list "~S" (unpack-scm (logior (ash high 32) low))))
240 (('assert-nargs-ee/locals nargs locals)
241 ;; The nargs includes the procedure.
242 (list "~a arg~:p, ~a local~:p" (1- nargs) locals))
243 (('tail-call nargs proc)
244 (list "~a arg~:p" nargs))
245 (('make-closure dst target nfree)
246 (let* ((addr (u32-offset->addr (+ offset target) context))
247 (pdi (find-program-debug-info addr context)))
248 ;; FIXME: Disassemble embedded closures as well.
249 (list "~A at 0x~X (~A free var~:p)"
250 (or (and pdi (program-debug-info-name pdi))
251 "(anonymous procedure)")
252 addr
253 nfree)))
254 (('make-non-immediate dst target)
255 (list "~@Y" (reference-scm target)))
256 (('builtin-ref dst idx)
257 (list "~A" (builtin-index->name idx)))
258 (((or 'static-ref 'static-set!) _ target)
259 (list "~@Y" (dereference-scm target)))
260 (('resolve-module dst name public)
261 (list "~a" (if (zero? public) "private" "public")))
262 (('toplevel-box _ var-offset mod-offset sym-offset bound?)
263 (list "`~A'~A" (dereference-scm sym-offset)
264 (if bound? "" " (maybe unbound)")))
265 (('module-box _ var-offset mod-name-offset sym-offset bound?)
266 (let ((mod-name (reference-scm mod-name-offset)))
267 (list "`(~A ~A ~A)'~A" (if (car mod-name) '@ '@@) (cdr mod-name)
268 (dereference-scm sym-offset)
269 (if bound? "" " (maybe unbound)"))))
270 (('load-typed-array dst type shape target len)
271 (let ((addr (u32-offset->addr (+ offset target) context)))
272 (list "~a bytes from #x~X" len addr)))
273 (_ #f)))
274
275 (define (compute-labels bv start end)
276 (let ((labels (make-vector (- end start) #f)))
277 (define (add-label! pos header)
278 (unless (vector-ref labels (- pos start))
279 (vector-set! labels (- pos start) header)))
280
281 (let lp ((offset start))
282 (when (< offset end)
283 (call-with-values (lambda () (disassemble-one bv offset))
284 (lambda (len elt)
285 (match elt
286 ((inst arg ...)
287 (case inst
288 ((br
289 br-if-nargs-ne br-if-nargs-lt br-if-nargs-gt
290 br-if-true br-if-null br-if-nil br-if-pair br-if-struct
291 br-if-char br-if-tc7 br-if-eq br-if-eqv br-if-equal
292 br-if-= br-if-< br-if-<= br-if-> br-if->=)
293 (match arg
294 ((_ ... target)
295 (add-label! (+ offset target) "L"))))
296 ((prompt)
297 (match arg
298 ((_ ... target)
299 (add-label! (+ offset target) "H")))))))
300 (lp (+ offset len))))))
301 (let lp ((offset start) (n 1))
302 (when (< offset end)
303 (let* ((pos (- offset start))
304 (label (vector-ref labels pos)))
305 (if label
306 (begin
307 (vector-set! labels
308 pos
309 (string->symbol
310 (string-append label (number->string n))))
311 (lp (1+ offset) (1+ n)))
312 (lp (1+ offset) n)))))
313 labels))
314
315 (define (print-info port addr label info extra src)
316 (when label
317 (format port "~A:\n" label))
318 (format port "~4@S ~32S~@[;; ~1{~@?~}~]~@[~61t at ~a~]\n"
319 addr info extra src))
320
321 (define (disassemble-buffer port bv start end context)
322 (let ((labels (compute-labels bv start end))
323 (sources (find-program-sources (u32-offset->addr start context)
324 context)))
325 (define (lookup-source addr)
326 (let lp ((sources sources))
327 (match sources
328 (() #f)
329 ((source . sources)
330 (let ((pc (source-pre-pc source)))
331 (cond
332 ((< pc addr) (lp sources))
333 ((= pc addr)
334 (format #f "~a:~a:~a"
335 (source-file source)
336 (source-line-for-user source)
337 (source-column source)))
338 (else #f)))))))
339 (let lp ((offset start))
340 (when (< offset end)
341 (call-with-values (lambda () (disassemble-one bv offset))
342 (lambda (len elt)
343 (let ((pos (- offset start))
344 (addr (u32-offset->addr offset context))
345 (annotation (code-annotation elt len offset start labels
346 context)))
347 (print-info port pos (vector-ref labels pos) elt annotation
348 (lookup-source addr))
349 (lp (+ offset len)))))))))
350
351 (define* (disassemble-program program #:optional (port (current-output-port)))
352 (cond
353 ((find-program-debug-info (program-code program))
354 => (lambda (pdi)
355 (format port "Disassembly of ~S at #x~X:\n\n" program
356 (program-debug-info-addr pdi))
357 (disassemble-buffer port
358 (program-debug-info-image pdi)
359 (program-debug-info-u32-offset pdi)
360 (program-debug-info-u32-offset-end pdi)
361 (program-debug-info-context pdi))))
362 (else
363 (format port "Debugging information unavailable.~%")))
364 (values))
365
366 (define (fold-code-range proc seed bv start end context raw?)
367 (define (cook code offset)
368 (define (reference-scm target)
369 (unpack-scm (u32-offset->addr (+ offset target) context)))
370
371 (define (dereference-scm target)
372 (let ((addr (u32-offset->addr (+ offset target)
373 context)))
374 (pointer->scm
375 (dereference-pointer (make-pointer addr)))))
376 (match code
377 (((or 'make-short-immediate 'make-long-immediate) dst imm)
378 `(,(car code) ,dst ,(unpack-scm imm)))
379 (('make-long-long-immediate dst high low)
380 `(make-long-long-immediate ,dst
381 ,(unpack-scm (logior (ash high 32) low))))
382 (('make-closure dst target nfree)
383 `(make-closure ,dst
384 ,(u32-offset->addr (+ offset target) context)
385 ,nfree))
386 (('make-non-immediate dst target)
387 `(make-non-immediate ,dst ,(reference-scm target)))
388 (('builtin-ref dst idx)
389 `(builtin-ref ,dst ,(builtin-index->name idx)))
390 (((or 'static-ref 'static-set!) dst target)
391 `(,(car code) ,dst ,(dereference-scm target)))
392 (('toplevel-box dst var-offset mod-offset sym-offset bound?)
393 `(toplevel-box ,dst
394 ,(dereference-scm var-offset)
395 ,(dereference-scm mod-offset)
396 ,(dereference-scm sym-offset)
397 ,bound?))
398 (('module-box dst var-offset mod-name-offset sym-offset bound?)
399 (let ((mod-name (reference-scm mod-name-offset)))
400 `(module-box ,dst
401 ,(dereference-scm var-offset)
402 ,(car mod-name)
403 ,(cdr mod-name)
404 ,(dereference-scm sym-offset)
405 ,bound?)))
406 (_ code)))
407 (let lp ((offset start) (seed seed))
408 (cond
409 ((< offset end)
410 (call-with-values (lambda () (disassemble-one bv offset))
411 (lambda (len elt)
412 (lp (+ offset len)
413 (proc (if raw? elt (cook elt offset))
414 seed)))))
415 (else seed))))
416
417 (define* (fold-program-code proc seed program-or-addr #:key raw?)
418 (cond
419 ((find-program-debug-info (if (program? program-or-addr)
420 (program-code program-or-addr)
421 program-or-addr))
422 => (lambda (pdi)
423 (fold-code-range proc seed
424 (program-debug-info-image pdi)
425 (program-debug-info-u32-offset pdi)
426 (program-debug-info-u32-offset-end pdi)
427 (program-debug-info-context pdi)
428 raw?)))
429 (else seed)))
430
431 (define* (disassemble-image bv #:optional (port (current-output-port)))
432 (let* ((ctx (debug-context-from-image bv))
433 (base (debug-context-text-base ctx)))
434 (for-each-elf-symbol
435 ctx
436 (lambda (sym)
437 (let ((name (elf-symbol-name sym))
438 (value (elf-symbol-value sym))
439 (size (elf-symbol-size sym)))
440 (format port "Disassembly of ~A at #x~X:\n\n"
441 (if (and (string? name) (not (string-null? name)))
442 name
443 "<unnamed function>")
444 (+ base value))
445 (disassemble-buffer port
446 bv
447 (/ (+ base value) 4)
448 (/ (+ base value size) 4)
449 ctx)
450 (display "\n\n" port)))))
451 (values))
452
453 (define (disassemble-file file)
454 (let* ((thunk (load-thunk-from-file file))
455 (elf (find-mapped-elf-image (program-code thunk))))
456 (disassemble-image elf)))