VM has "builtins": primitives addressable by emitted RTL code
[bpt/guile.git] / module / system / vm / disassembler.scm
CommitLineData
82e299f3
AW
1;;; Guile RTL disassembler
2
3;;; Copyright (C) 2001, 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
4;;;
5;;; This library is free software; you can redistribute it and/or
6;;; modify it under the terms of the GNU Lesser General Public
7;;; License as published by the Free Software Foundation; either
8;;; version 3 of the License, or (at your option) any later version.
9;;;
10;;; This library is distributed in the hope that it will be useful,
11;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13;;; Lesser General Public License for more details.
14;;;
15;;; You should have received a copy of the GNU Lesser General Public
16;;; License along with this library; if not, write to the Free Software
17;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19;;; Code:
20
21(define-module (system vm disassembler)
486013d6 22 #:use-module (language rtl)
82e299f3
AW
23 #:use-module (system vm elf)
24 #:use-module (system vm debug)
25 #:use-module (system vm program)
26 #:use-module (system vm objcode)
27 #:use-module (system foreign)
28 #:use-module (rnrs bytevectors)
29 #:use-module (ice-9 format)
30 #:use-module (ice-9 match)
31 #:use-module (ice-9 vlist)
32 #:use-module (srfi srfi-1)
33 #:use-module (srfi srfi-4)
610295ec 34 #:export (disassemble-program
93009a7a
AW
35 disassemble-image
36 disassemble-file))
82e299f3
AW
37
38(define-syntax-rule (u32-ref buf n)
39 (bytevector-u32-native-ref buf (* n 4)))
40
41(define-syntax-rule (s32-ref buf n)
42 (bytevector-s32-native-ref buf (* n 4)))
43
44(define-syntax visit-opcodes
45 (lambda (x)
46 (syntax-case x ()
47 ((visit-opcodes macro arg ...)
48 (with-syntax (((inst ...)
49 (map (lambda (x) (datum->syntax #'macro x))
50 (rtl-instruction-list))))
51 #'(begin
52 (macro arg ... . inst)
53 ...))))))
54
55(eval-when (expand compile load eval)
56 (define (id-append ctx a b)
57 (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b)))))
58
59(define (unpack-scm n)
60 (pointer->scm (make-pointer n)))
61
62(define (unpack-s24 s)
63 (if (zero? (logand s (ash 1 23)))
64 s
65 (- s (ash 1 24))))
66
67(define (unpack-s32 s)
68 (if (zero? (logand s (ash 1 31)))
69 s
70 (- s (ash 1 32))))
71
72(define-syntax disassembler
73 (lambda (x)
74 (define (parse-first-word word type)
75 (with-syntax ((word word))
76 (case type
77 ((U8_X24)
78 #'())
79 ((U8_U24)
80 #'((ash word -8)))
81 ((U8_L24)
82 #'((unpack-s24 (ash word -8))))
82e299f3
AW
83 ((U8_U8_I16)
84 #'((logand (ash word -8) #xff)
85 (ash word -16)))
86 ((U8_U12_U12)
87 #'((logand (ash word -8) #xfff)
88 (ash word -20)))
89 ((U8_U8_U8_U8)
90 #'((logand (ash word -8) #xff)
91 (logand (ash word -16) #xff)
92 (ash word -24)))
93 (else
94 (error "bad kind" type)))))
95
96 (define (parse-tail-word word type)
97 (with-syntax ((word word))
98 (case type
99 ((U8_X24)
100 #'((logand word #ff)))
101 ((U8_U24)
102 #'((logand word #xff)
103 (ash word -8)))
104 ((U8_L24)
105 #'((logand word #xff)
106 (unpack-s24 (ash word -8))))
82e299f3
AW
107 ((U8_U8_I16)
108 #'((logand word #xff)
109 (logand (ash word -8) #xff)
110 (ash word -16)))
111 ((U8_U12_U12)
112 #'((logand word #xff)
113 (logand (ash word -8) #xfff)
114 (ash word -20)))
115 ((U8_U8_U8_U8)
116 #'((logand word #xff)
117 (logand (ash word -8) #xff)
118 (logand (ash word -16) #xff)
119 (ash word -24)))
120 ((U32)
121 #'(word))
122 ((I32)
123 #'(word))
124 ((A32)
125 #'(word))
126 ((B32)
127 #'(word))
128 ((N32)
129 #'((unpack-s32 word)))
130 ((S32)
131 #'((unpack-s32 word)))
132 ((L32)
133 #'((unpack-s32 word)))
134 ((LO32)
135 #'((unpack-s32 word)))
136 ((X8_U24)
137 #'((ash word -8)))
138 ((X8_U12_U12)
139 #'((logand (ash word -8) #xfff)
140 (ash word -20)))
82e299f3
AW
141 ((X8_L24)
142 #'((unpack-s24 (ash word -8))))
143 ((B1_X7_L24)
144 #'((not (zero? (logand word #x1)))
145 (unpack-s24 (ash word -8))))
146 ((B1_U7_L24)
147 #'((not (zero? (logand word #x1)))
148 (logand (ash word -1) #x7f)
149 (unpack-s24 (ash word -8))))
af95414f
AW
150 ((B1_X31)
151 #'((not (zero? (logand word #x1)))))
152 ((B1_X7_U24)
153 #'((not (zero? (logand word #x1)))
154 (ash word -8)))
82e299f3
AW
155 (else
156 (error "bad kind" type)))))
157
158 (syntax-case x ()
159 ((_ name opcode word0 word* ...)
160 (let ((vars (generate-temporaries #'(word* ...))))
161 (with-syntax (((word* ...) vars)
162 ((n ...) (map 1+ (iota (length #'(word* ...)))))
163 ((asm ...)
164 (parse-first-word #'first (syntax->datum #'word0)))
165 (((asm* ...) ...)
166 (map (lambda (word type)
167 (parse-tail-word word type))
168 vars
169 (syntax->datum #'(word* ...)))))
170 #'(lambda (buf offset first)
171 (let ((word* (u32-ref buf (+ offset n)))
172 ...)
173 (values (+ 1 (length '(word* ...)))
174 (list 'name asm ... asm* ... ...))))))))))
175
176(define (disasm-invalid buf offset first)
177 (error "bad instruction" (logand first #xff) first buf offset))
178
179(define disassemblers (make-vector 256 disasm-invalid))
180
181(define-syntax define-disassembler
182 (lambda (x)
183 (syntax-case x ()
2a294c7c 184 ((_ name opcode kind arg ...)
82e299f3
AW
185 (with-syntax ((parse (id-append #'name #'parse- #'name)))
186 #'(let ((parse (disassembler name opcode arg ...)))
187 (vector-set! disassemblers opcode parse)))))))
188
189(visit-opcodes define-disassembler)
190
191;; -> len list
192(define (disassemble-one buf offset)
193 (let ((first (u32-ref buf offset)))
78ff7847 194 ((vector-ref disassemblers (logand first #xff)) buf offset first)))
82e299f3
AW
195
196(define (u32-offset->addr offset context)
197 "Given an offset into an image in 32-bit units, return the absolute
198address of that offset."
199 (+ (debug-context-base context) (* offset 4)))
200
201(define (code-annotation code len offset start labels context)
202 ;; FIXME: Print names for register loads and stores that correspond to
203 ;; access to named locals.
204 (define (reference-scm target)
205 (unpack-scm (u32-offset->addr (+ offset target) context)))
206
207 (define (dereference-scm target)
208 (let ((addr (u32-offset->addr (+ offset target)
209 context)))
210 (pointer->scm
211 (dereference-pointer (make-pointer addr)))))
212
213 (match code
214 (((or 'br
215 'br-if-nargs-ne 'br-if-nargs-lt 'br-if-nargs-gt
216 'br-if-true 'br-if-null 'br-if-nil 'br-if-pair 'br-if-struct
be8b62ca 217 'br-if-char 'br-if-eq 'br-if-eqv 'br-if-equal
82e299f3
AW
218 'br-if-= 'br-if-< 'br-if-<= 'br-if-> 'br-if->=) _ ... target)
219 (list "-> ~A" (vector-ref labels (- (+ offset target) start))))
be8b62ca
AW
220 (('br-if-tc7 slot invert? tc7 target)
221 (list "~A -> ~A"
222 (let ((tag (case tc7
223 ((5) "symbol?")
224 ((7) "variable?")
225 ((13) "vector?")
226 ((15) "string?")
227 (else (number->string tc7)))))
228 (if invert? (string-append "not " tag) tag))
229 (vector-ref labels (- (+ offset target) start))))
8d59d55e 230 (('prompt tag escape-only? proc-slot handler)
82e299f3
AW
231 ;; The H is for handler.
232 (list "H -> ~A" (vector-ref labels (- (+ offset handler) start))))
233 (((or 'make-short-immediate 'make-long-immediate) _ imm)
234 (list "~S" (unpack-scm imm)))
235 (('make-long-long-immediate _ high low)
236 (list "~S" (unpack-scm (logior (ash high 32) low))))
237 (('assert-nargs-ee/locals nargs locals)
7396d216
AW
238 ;; The nargs includes the procedure.
239 (list "~a arg~:p, ~a local~:p" (1- nargs) locals))
82e299f3
AW
240 (('tail-call nargs proc)
241 (list "~a arg~:p" nargs))
7396d216 242 (('make-closure dst target nfree)
82e299f3
AW
243 (let* ((addr (u32-offset->addr (+ offset target) context))
244 (pdi (find-program-debug-info addr context)))
245 ;; FIXME: Disassemble embedded closures as well.
7396d216 246 (list "~A at 0x~X (~A free var~:p)"
82e299f3
AW
247 (or (and pdi (program-debug-info-name pdi))
248 "(anonymous procedure)")
7396d216
AW
249 addr
250 nfree)))
82e299f3
AW
251 (('make-non-immediate dst target)
252 (list "~@Y" (reference-scm target)))
486013d6
AW
253 (('builtin-ref dst idx)
254 (list "~A" (builtin-index->name idx)))
82e299f3
AW
255 (((or 'static-ref 'static-set!) _ target)
256 (list "~@Y" (dereference-scm target)))
257 (('link-procedure! src target)
258 (let* ((addr (u32-offset->addr (+ offset target) context))
259 (pdi (find-program-debug-info addr context)))
260 (list "~A at 0x~X"
261 (or (and pdi (program-debug-info-name pdi))
262 "(anonymous procedure)")
263 addr)))
264 (('resolve-module dst name public)
265 (list "~a" (if (zero? public) "private" "public")))
af95414f
AW
266 (('toplevel-box _ var-offset mod-offset sym-offset bound?)
267 (list "`~A'~A" (dereference-scm sym-offset)
268 (if bound? "" " (maybe unbound)")))
269 (('module-box _ var-offset mod-name-offset sym-offset bound?)
82e299f3 270 (let ((mod-name (reference-scm mod-name-offset)))
af95414f
AW
271 (list "`(~A ~A ~A)'~A" (if (car mod-name) '@ '@@) (cdr mod-name)
272 (dereference-scm sym-offset)
273 (if bound? "" " (maybe unbound)"))))
82e299f3
AW
274 (('load-typed-array dst type shape target len)
275 (let ((addr (u32-offset->addr (+ offset target) context)))
276 (list "~a bytes from #x~X" len addr)))
277 (_ #f)))
278
279(define (compute-labels bv start end)
280 (let ((labels (make-vector (- end start) #f)))
281 (define (add-label! pos header)
282 (unless (vector-ref labels (- pos start))
283 (vector-set! labels (- pos start) header)))
284
285 (let lp ((offset start))
286 (when (< offset end)
287 (call-with-values (lambda () (disassemble-one bv offset))
288 (lambda (len elt)
289 (match elt
290 ((inst arg ...)
291 (case inst
292 ((br
293 br-if-nargs-ne br-if-nargs-lt br-if-nargs-gt
294 br-if-true br-if-null br-if-nil br-if-pair br-if-struct
295 br-if-char br-if-tc7 br-if-eq br-if-eqv br-if-equal
296 br-if-= br-if-< br-if-<= br-if-> br-if->=)
297 (match arg
298 ((_ ... target)
299 (add-label! (+ offset target) "L"))))
300 ((prompt)
301 (match arg
302 ((_ ... target)
70a20431 303 (add-label! (+ offset target) "H")))))))
82e299f3
AW
304 (lp (+ offset len))))))
305 (let lp ((offset start) (n 1))
306 (when (< offset end)
307 (let* ((pos (- offset start))
308 (label (vector-ref labels pos)))
309 (if label
310 (begin
311 (vector-set! labels
312 pos
313 (string->symbol
314 (string-append label (number->string n))))
315 (lp (1+ offset) (1+ n)))
316 (lp (1+ offset) n)))))
317 labels))
318
319(define (print-info port addr label info extra src)
320 (when label
321 (format port "~A:\n" label))
322 (format port "~4@S ~32S~@[;; ~1{~@?~}~]~@[~61t at ~a~]\n"
323 addr info extra src))
324
325(define (disassemble-buffer port bv start end context)
e9588e70
AW
326 (let ((labels (compute-labels bv start end))
327 (sources (find-program-sources (u32-offset->addr start context)
328 context)))
329 (define (lookup-source addr)
330 (let lp ((sources sources))
331 (match sources
332 (() #f)
333 ((source . sources)
334 (let ((pc (source-pre-pc source)))
335 (cond
336 ((< pc addr) (lp sources))
337 ((= pc addr)
338 (format #f "~a:~a:~a"
339 (source-file source)
340 (source-line-for-user source)
341 (source-column source)))
342 (else #f)))))))
82e299f3
AW
343 (let lp ((offset start))
344 (when (< offset end)
345 (call-with-values (lambda () (disassemble-one bv offset))
346 (lambda (len elt)
347 (let ((pos (- offset start))
e9588e70 348 (addr (u32-offset->addr offset context))
82e299f3
AW
349 (annotation (code-annotation elt len offset start labels
350 context)))
e9588e70
AW
351 (print-info port pos (vector-ref labels pos) elt annotation
352 (lookup-source addr))
82e299f3
AW
353 (lp (+ offset len)))))))))
354
355(define* (disassemble-program program #:optional (port (current-output-port)))
356 (cond
357 ((find-program-debug-info (rtl-program-code program))
358 => (lambda (pdi)
359 (format port "Disassembly of ~S at #x~X:\n\n" program
360 (program-debug-info-addr pdi))
361 (disassemble-buffer port
362 (program-debug-info-image pdi)
363 (program-debug-info-u32-offset pdi)
364 (program-debug-info-u32-offset-end pdi)
365 (program-debug-info-context pdi))))
366 (else
367 (format port "Debugging information unavailable.~%")))
368 (values))
610295ec
AW
369
370(define* (disassemble-image bv #:optional (port (current-output-port)))
371 (let* ((ctx (debug-context-from-image bv))
372 (base (debug-context-text-base ctx)))
373 (for-each-elf-symbol
374 ctx
375 (lambda (sym)
376 (let ((name (elf-symbol-name sym))
377 (value (elf-symbol-value sym))
378 (size (elf-symbol-size sym)))
379 (format port "Disassembly of ~A at #x~X:\n\n"
380 (if (and (string? name) (not (string-null? name)))
381 name
382 "<unnamed function>")
383 (+ base value))
384 (disassemble-buffer port
385 bv
386 (/ (+ base value) 4)
387 (/ (+ base value size) 4)
388 ctx)
389 (display "\n\n" port)))))
390 (values))
93009a7a
AW
391
392(define (disassemble-file file)
393 (let* ((thunk (load-thunk-from-file file))
394 (elf (find-mapped-elf-image (rtl-program-code thunk))))
395 (disassemble-image elf)))