fix `nil?' type inference
[bpt/guile.git] / module / system / vm / disassembler.scm
CommitLineData
691697de 1;;; Guile bytecode disassembler
82e299f3 2
e2fafeb9 3;;; Copyright (C) 2001, 2009, 2010, 2012, 2013, 2014, 2015 Free Software Foundation, Inc.
82e299f3
AW
4;;;
5;;; This library is free software; you can redistribute it and/or
6;;; modify it under the terms of the GNU Lesser General Public
7;;; License as published by the Free Software Foundation; either
8;;; version 3 of the License, or (at your option) any later version.
9;;;
10;;; This library is distributed in the hope that it will be useful,
11;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13;;; Lesser General Public License for more details.
14;;;
15;;; You should have received a copy of the GNU Lesser General Public
16;;; License along with this library; if not, write to the Free Software
17;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19;;; Code:
20
21(define-module (system vm disassembler)
691697de 22 #:use-module (language bytecode)
82e299f3
AW
23 #:use-module (system vm elf)
24 #:use-module (system vm debug)
25 #:use-module (system vm program)
4cbc95f1 26 #:use-module (system vm loader)
82e299f3
AW
27 #:use-module (system foreign)
28 #:use-module (rnrs bytevectors)
29 #:use-module (ice-9 format)
30 #:use-module (ice-9 match)
31 #:use-module (ice-9 vlist)
32 #:use-module (srfi srfi-1)
33 #:use-module (srfi srfi-4)
610295ec 34 #:export (disassemble-program
850e80da 35 fold-program-code
93009a7a 36 disassemble-image
20d7d682
AW
37 disassemble-file
38
39 instruction-length
40 instruction-has-fallthrough?
41 instruction-relative-jump-targets
42 instruction-slot-clobbers))
82e299f3
AW
43
44(define-syntax-rule (u32-ref buf n)
45 (bytevector-u32-native-ref buf (* n 4)))
46
47(define-syntax-rule (s32-ref buf n)
48 (bytevector-s32-native-ref buf (* n 4)))
49
50(define-syntax visit-opcodes
51 (lambda (x)
52 (syntax-case x ()
53 ((visit-opcodes macro arg ...)
54 (with-syntax (((inst ...)
55 (map (lambda (x) (datum->syntax #'macro x))
1b780c13 56 (instruction-list))))
82e299f3
AW
57 #'(begin
58 (macro arg ... . inst)
59 ...))))))
60
61(eval-when (expand compile load eval)
62 (define (id-append ctx a b)
63 (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b)))))
64
65(define (unpack-scm n)
66 (pointer->scm (make-pointer n)))
67
68(define (unpack-s24 s)
69 (if (zero? (logand s (ash 1 23)))
70 s
71 (- s (ash 1 24))))
72
73(define (unpack-s32 s)
74 (if (zero? (logand s (ash 1 31)))
75 s
76 (- s (ash 1 32))))
77
78(define-syntax disassembler
79 (lambda (x)
80 (define (parse-first-word word type)
81 (with-syntax ((word word))
82 (case type
83 ((U8_X24)
84 #'())
85 ((U8_U24)
86 #'((ash word -8)))
87 ((U8_L24)
88 #'((unpack-s24 (ash word -8))))
82e299f3
AW
89 ((U8_U8_I16)
90 #'((logand (ash word -8) #xff)
91 (ash word -16)))
92 ((U8_U12_U12)
93 #'((logand (ash word -8) #xfff)
94 (ash word -20)))
95 ((U8_U8_U8_U8)
96 #'((logand (ash word -8) #xff)
97 (logand (ash word -16) #xff)
98 (ash word -24)))
99 (else
100 (error "bad kind" type)))))
101
102 (define (parse-tail-word word type)
103 (with-syntax ((word word))
104 (case type
105 ((U8_X24)
106 #'((logand word #ff)))
107 ((U8_U24)
108 #'((logand word #xff)
109 (ash word -8)))
110 ((U8_L24)
111 #'((logand word #xff)
112 (unpack-s24 (ash word -8))))
82e299f3
AW
113 ((U32)
114 #'(word))
115 ((I32)
116 #'(word))
117 ((A32)
118 #'(word))
119 ((B32)
120 #'(word))
121 ((N32)
122 #'((unpack-s32 word)))
123 ((S32)
124 #'((unpack-s32 word)))
125 ((L32)
126 #'((unpack-s32 word)))
127 ((LO32)
128 #'((unpack-s32 word)))
129 ((X8_U24)
130 #'((ash word -8)))
82e299f3
AW
131 ((X8_L24)
132 #'((unpack-s24 (ash word -8))))
133 ((B1_X7_L24)
134 #'((not (zero? (logand word #x1)))
135 (unpack-s24 (ash word -8))))
136 ((B1_U7_L24)
137 #'((not (zero? (logand word #x1)))
138 (logand (ash word -1) #x7f)
139 (unpack-s24 (ash word -8))))
af95414f
AW
140 ((B1_X31)
141 #'((not (zero? (logand word #x1)))))
142 ((B1_X7_U24)
143 #'((not (zero? (logand word #x1)))
144 (ash word -8)))
82e299f3
AW
145 (else
146 (error "bad kind" type)))))
147
148 (syntax-case x ()
149 ((_ name opcode word0 word* ...)
150 (let ((vars (generate-temporaries #'(word* ...))))
151 (with-syntax (((word* ...) vars)
152 ((n ...) (map 1+ (iota (length #'(word* ...)))))
153 ((asm ...)
154 (parse-first-word #'first (syntax->datum #'word0)))
155 (((asm* ...) ...)
156 (map (lambda (word type)
157 (parse-tail-word word type))
158 vars
159 (syntax->datum #'(word* ...)))))
160 #'(lambda (buf offset first)
161 (let ((word* (u32-ref buf (+ offset n)))
162 ...)
163 (values (+ 1 (length '(word* ...)))
164 (list 'name asm ... asm* ... ...))))))))))
165
166(define (disasm-invalid buf offset first)
167 (error "bad instruction" (logand first #xff) first buf offset))
168
169(define disassemblers (make-vector 256 disasm-invalid))
170
171(define-syntax define-disassembler
172 (lambda (x)
173 (syntax-case x ()
2a294c7c 174 ((_ name opcode kind arg ...)
82e299f3
AW
175 (with-syntax ((parse (id-append #'name #'parse- #'name)))
176 #'(let ((parse (disassembler name opcode arg ...)))
177 (vector-set! disassemblers opcode parse)))))))
178
179(visit-opcodes define-disassembler)
180
181;; -> len list
182(define (disassemble-one buf offset)
183 (let ((first (u32-ref buf offset)))
78ff7847 184 ((vector-ref disassemblers (logand first #xff)) buf offset first)))
82e299f3
AW
185
186(define (u32-offset->addr offset context)
187 "Given an offset into an image in 32-bit units, return the absolute
188address of that offset."
189 (+ (debug-context-base context) (* offset 4)))
190
321c32dc 191(define (code-annotation code len offset start labels context push-addr!)
82e299f3
AW
192 ;; FIXME: Print names for register loads and stores that correspond to
193 ;; access to named locals.
194 (define (reference-scm target)
195 (unpack-scm (u32-offset->addr (+ offset target) context)))
196
197 (define (dereference-scm target)
198 (let ((addr (u32-offset->addr (+ offset target)
199 context)))
200 (pointer->scm
201 (dereference-pointer (make-pointer addr)))))
202
203 (match code
204 (((or 'br
205 'br-if-nargs-ne 'br-if-nargs-lt 'br-if-nargs-gt
206 'br-if-true 'br-if-null 'br-if-nil 'br-if-pair 'br-if-struct
be8b62ca 207 'br-if-char 'br-if-eq 'br-if-eqv 'br-if-equal
8c6206f3
AW
208 'br-if-= 'br-if-< 'br-if-<= 'br-if-> 'br-if->=
209 'br-if-logtest) _ ... target)
82e299f3 210 (list "-> ~A" (vector-ref labels (- (+ offset target) start))))
be8b62ca
AW
211 (('br-if-tc7 slot invert? tc7 target)
212 (list "~A -> ~A"
213 (let ((tag (case tc7
214 ((5) "symbol?")
215 ((7) "variable?")
216 ((13) "vector?")
217 ((15) "string?")
e2fafeb9 218 ((53) "keyword?")
becce37b 219 ((77) "bytevector?")
d65514a2 220 ((95) "bitvector?")
be8b62ca
AW
221 (else (number->string tc7)))))
222 (if invert? (string-append "not " tag) tag))
223 (vector-ref labels (- (+ offset target) start))))
8d59d55e 224 (('prompt tag escape-only? proc-slot handler)
82e299f3
AW
225 ;; The H is for handler.
226 (list "H -> ~A" (vector-ref labels (- (+ offset handler) start))))
227 (((or 'make-short-immediate 'make-long-immediate) _ imm)
228 (list "~S" (unpack-scm imm)))
229 (('make-long-long-immediate _ high low)
230 (list "~S" (unpack-scm (logior (ash high 32) low))))
231 (('assert-nargs-ee/locals nargs locals)
7396d216
AW
232 ;; The nargs includes the procedure.
233 (list "~a arg~:p, ~a local~:p" (1- nargs) locals))
82e299f3
AW
234 (('tail-call nargs proc)
235 (list "~a arg~:p" nargs))
7396d216 236 (('make-closure dst target nfree)
82e299f3 237 (let* ((addr (u32-offset->addr (+ offset target) context))
321c32dc
AW
238 (pdi (find-program-debug-info addr context))
239 (name (or (and pdi (program-debug-info-name pdi))
240 "anonymous procedure")))
241 (push-addr! addr name)
242 (list "~A at #x~X (~A free var~:p)" name addr nfree)))
560bfa92
AW
243 (('call-label closure nlocals target)
244 (let* ((addr (u32-offset->addr (+ offset target) context))
245 (pdi (find-program-debug-info addr context))
246 (name (or (and pdi (program-debug-info-name pdi))
247 "anonymous procedure")))
248 (push-addr! addr name)
249 (list "~A at #x~X" name addr)))
250 (('tail-call-label nlocals target)
251 (let* ((addr (u32-offset->addr (+ offset target) context))
252 (pdi (find-program-debug-info addr context))
253 (name (or (and pdi (program-debug-info-name pdi))
254 "anonymous procedure")))
255 (push-addr! addr name)
256 (list "~A at #x~X" name addr)))
82e299f3 257 (('make-non-immediate dst target)
321c32dc
AW
258 (let ((val (reference-scm target)))
259 (when (program? val)
260 (push-addr! (program-code val) val))
261 (list "~@Y" val)))
486013d6
AW
262 (('builtin-ref dst idx)
263 (list "~A" (builtin-index->name idx)))
82e299f3
AW
264 (((or 'static-ref 'static-set!) _ target)
265 (list "~@Y" (dereference-scm target)))
321c32dc
AW
266 (((or 'free-ref 'free-set!) _ _ index)
267 (list "free var ~a" index))
82e299f3
AW
268 (('resolve-module dst name public)
269 (list "~a" (if (zero? public) "private" "public")))
af95414f
AW
270 (('toplevel-box _ var-offset mod-offset sym-offset bound?)
271 (list "`~A'~A" (dereference-scm sym-offset)
272 (if bound? "" " (maybe unbound)")))
273 (('module-box _ var-offset mod-name-offset sym-offset bound?)
82e299f3 274 (let ((mod-name (reference-scm mod-name-offset)))
af95414f
AW
275 (list "`(~A ~A ~A)'~A" (if (car mod-name) '@ '@@) (cdr mod-name)
276 (dereference-scm sym-offset)
277 (if bound? "" " (maybe unbound)"))))
82e299f3
AW
278 (('load-typed-array dst type shape target len)
279 (let ((addr (u32-offset->addr (+ offset target) context)))
280 (list "~a bytes from #x~X" len addr)))
281 (_ #f)))
282
283(define (compute-labels bv start end)
284 (let ((labels (make-vector (- end start) #f)))
285 (define (add-label! pos header)
286 (unless (vector-ref labels (- pos start))
287 (vector-set! labels (- pos start) header)))
288
289 (let lp ((offset start))
290 (when (< offset end)
291 (call-with-values (lambda () (disassemble-one bv offset))
292 (lambda (len elt)
293 (match elt
294 ((inst arg ...)
295 (case inst
296 ((br
297 br-if-nargs-ne br-if-nargs-lt br-if-nargs-gt
298 br-if-true br-if-null br-if-nil br-if-pair br-if-struct
299 br-if-char br-if-tc7 br-if-eq br-if-eqv br-if-equal
d613ccaa 300 br-if-= br-if-< br-if-<= br-if-> br-if->= br-if-logtest)
82e299f3
AW
301 (match arg
302 ((_ ... target)
303 (add-label! (+ offset target) "L"))))
304 ((prompt)
305 (match arg
306 ((_ ... target)
70a20431 307 (add-label! (+ offset target) "H")))))))
82e299f3
AW
308 (lp (+ offset len))))))
309 (let lp ((offset start) (n 1))
310 (when (< offset end)
311 (let* ((pos (- offset start))
312 (label (vector-ref labels pos)))
313 (if label
314 (begin
315 (vector-set! labels
316 pos
317 (string->symbol
318 (string-append label (number->string n))))
319 (lp (1+ offset) (1+ n)))
320 (lp (1+ offset) n)))))
321 labels))
322
323(define (print-info port addr label info extra src)
324 (when label
325 (format port "~A:\n" label))
326 (format port "~4@S ~32S~@[;; ~1{~@?~}~]~@[~61t at ~a~]\n"
327 addr info extra src))
328
321c32dc 329(define (disassemble-buffer port bv start end context push-addr!)
e9588e70
AW
330 (let ((labels (compute-labels bv start end))
331 (sources (find-program-sources (u32-offset->addr start context)
332 context)))
333 (define (lookup-source addr)
334 (let lp ((sources sources))
335 (match sources
336 (() #f)
337 ((source . sources)
338 (let ((pc (source-pre-pc source)))
339 (cond
340 ((< pc addr) (lp sources))
341 ((= pc addr)
342 (format #f "~a:~a:~a"
1b1c9125 343 (or (source-file source) "(unknown file)")
e9588e70
AW
344 (source-line-for-user source)
345 (source-column source)))
346 (else #f)))))))
82e299f3
AW
347 (let lp ((offset start))
348 (when (< offset end)
349 (call-with-values (lambda () (disassemble-one bv offset))
350 (lambda (len elt)
351 (let ((pos (- offset start))
e9588e70 352 (addr (u32-offset->addr offset context))
82e299f3 353 (annotation (code-annotation elt len offset start labels
321c32dc 354 context push-addr!)))
e9588e70
AW
355 (print-info port pos (vector-ref labels pos) elt annotation
356 (lookup-source addr))
82e299f3
AW
357 (lp (+ offset len)))))))))
358
560bfa92 359(define* (disassemble-addr addr label port #:optional (seen (make-hash-table)))
321c32dc 360 (format port "Disassembly of ~A at #x~X:\n\n" label addr)
82e299f3 361 (cond
321c32dc 362 ((find-program-debug-info addr)
82e299f3 363 => (lambda (pdi)
321c32dc
AW
364 (let ((worklist '()))
365 (define (push-addr! addr label)
560bfa92
AW
366 (unless (hashv-ref seen addr)
367 (hashv-set! seen addr #t)
321c32dc
AW
368 (set! worklist (acons addr label worklist))))
369 (disassemble-buffer port
370 (program-debug-info-image pdi)
371 (program-debug-info-u32-offset pdi)
372 (program-debug-info-u32-offset-end pdi)
373 (program-debug-info-context pdi)
374 push-addr!)
375 (for-each (match-lambda
376 ((addr . label)
377 (display "\n----------------------------------------\n"
378 port)
560bfa92 379 (disassemble-addr addr label port seen)))
321c32dc 380 worklist))))
82e299f3
AW
381 (else
382 (format port "Debugging information unavailable.~%")))
383 (values))
610295ec 384
321c32dc
AW
385(define* (disassemble-program program #:optional (port (current-output-port)))
386 (disassemble-addr (program-code program) program port))
387
850e80da
AW
388(define (fold-code-range proc seed bv start end context raw?)
389 (define (cook code offset)
390 (define (reference-scm target)
391 (unpack-scm (u32-offset->addr (+ offset target) context)))
392
393 (define (dereference-scm target)
394 (let ((addr (u32-offset->addr (+ offset target)
395 context)))
396 (pointer->scm
397 (dereference-pointer (make-pointer addr)))))
398 (match code
399 (((or 'make-short-immediate 'make-long-immediate) dst imm)
400 `(,(car code) ,dst ,(unpack-scm imm)))
401 (('make-long-long-immediate dst high low)
402 `(make-long-long-immediate ,dst
403 ,(unpack-scm (logior (ash high 32) low))))
404 (('make-closure dst target nfree)
405 `(make-closure ,dst
406 ,(u32-offset->addr (+ offset target) context)
407 ,nfree))
408 (('make-non-immediate dst target)
409 `(make-non-immediate ,dst ,(reference-scm target)))
410 (('builtin-ref dst idx)
411 `(builtin-ref ,dst ,(builtin-index->name idx)))
412 (((or 'static-ref 'static-set!) dst target)
413 `(,(car code) ,dst ,(dereference-scm target)))
414 (('toplevel-box dst var-offset mod-offset sym-offset bound?)
415 `(toplevel-box ,dst
416 ,(dereference-scm var-offset)
417 ,(dereference-scm mod-offset)
418 ,(dereference-scm sym-offset)
419 ,bound?))
420 (('module-box dst var-offset mod-name-offset sym-offset bound?)
421 (let ((mod-name (reference-scm mod-name-offset)))
422 `(module-box ,dst
423 ,(dereference-scm var-offset)
424 ,(car mod-name)
425 ,(cdr mod-name)
426 ,(dereference-scm sym-offset)
427 ,bound?)))
428 (_ code)))
429 (let lp ((offset start) (seed seed))
430 (cond
431 ((< offset end)
432 (call-with-values (lambda () (disassemble-one bv offset))
433 (lambda (len elt)
434 (lp (+ offset len)
435 (proc (if raw? elt (cook elt offset))
436 seed)))))
437 (else seed))))
438
439(define* (fold-program-code proc seed program-or-addr #:key raw?)
440 (cond
0bd1e9c6 441 ((find-program-debug-info (if (program? program-or-addr)
d1100525 442 (program-code program-or-addr)
850e80da
AW
443 program-or-addr))
444 => (lambda (pdi)
445 (fold-code-range proc seed
446 (program-debug-info-image pdi)
447 (program-debug-info-u32-offset pdi)
448 (program-debug-info-u32-offset-end pdi)
449 (program-debug-info-context pdi)
450 raw?)))
451 (else seed)))
452
610295ec
AW
453(define* (disassemble-image bv #:optional (port (current-output-port)))
454 (let* ((ctx (debug-context-from-image bv))
455 (base (debug-context-text-base ctx)))
456 (for-each-elf-symbol
457 ctx
458 (lambda (sym)
459 (let ((name (elf-symbol-name sym))
460 (value (elf-symbol-value sym))
461 (size (elf-symbol-size sym)))
462 (format port "Disassembly of ~A at #x~X:\n\n"
463 (if (and (string? name) (not (string-null? name)))
464 name
465 "<unnamed function>")
466 (+ base value))
467 (disassemble-buffer port
468 bv
469 (/ (+ base value) 4)
470 (/ (+ base value size) 4)
321c32dc
AW
471 ctx
472 (lambda (addr name) #t))
610295ec
AW
473 (display "\n\n" port)))))
474 (values))
93009a7a
AW
475
476(define (disassemble-file file)
477 (let* ((thunk (load-thunk-from-file file))
d1100525 478 (elf (find-mapped-elf-image (program-code thunk))))
93009a7a 479 (disassemble-image elf)))
20d7d682
AW
480
481(define-syntax instruction-lengths-vector
482 (lambda (x)
483 (syntax-case x ()
484 ((_)
485 (let ((lengths (make-vector 256 #f)))
486 (for-each (match-lambda
487 ((name opcode kind words ...)
488 (vector-set! lengths opcode (* 4 (length words)))))
489 (instruction-list))
490 (datum->syntax x lengths))))))
491
492(define (instruction-length code pos)
493 (let ((opcode (logand (bytevector-u32-native-ref code pos) #xff)))
494 (or (vector-ref (instruction-lengths-vector) opcode)
495 (error "Unknown opcode" opcode))))
496
497(define-syntax static-opcode-set
498 (lambda (x)
499 (define (instruction-opcode inst)
500 (cond
501 ((assq inst (instruction-list))
502 => (match-lambda ((name opcode . _) opcode)))
503 (else
504 (error "unknown instruction" inst))))
505
506 (syntax-case x ()
507 ((static-opcode-set inst ...)
508 (let ((bv (make-bitvector 256 #f)))
509 (for-each (lambda (inst)
510 (bitvector-set! bv (instruction-opcode inst) #t))
511 (syntax->datum #'(inst ...)))
512 (datum->syntax #'static-opcode-set bv))))))
513
514(define (instruction-has-fallthrough? code pos)
515 (define non-fallthrough-set
516 (static-opcode-set halt
517 tail-call tail-call-label tail-call/shuffle
518 return return-values
519 subr-call foreign-call continuation-call
520 tail-apply
521 br))
522 (let ((opcode (logand (bytevector-u32-native-ref code pos) #xff)))
523 (not (bitvector-ref non-fallthrough-set opcode))))
524
525(define-syntax define-jump-parser
526 (lambda (x)
527 (syntax-case x ()
528 ((_ name opcode kind word0 word* ...)
529 (let ((symname (syntax->datum #'name)))
530 (if (or (memq symname '(br prompt))
531 (string-prefix? "br-" (symbol->string symname)))
532 (let ((offset (* 4 (length #'(word* ...)))))
533 #`(vector-set!
534 jump-parsers
535 opcode
536 (lambda (code pos)
537 (let ((target
538 (bytevector-s32-native-ref code (+ pos #,offset))))
539 ;; Assume that the target is in the last word, as
540 ;; an L24 in the high bits.
541 (list (* 4 (ash target -8)))))))
542 #'(begin)))))))
543
544(define jump-parsers (make-vector 256 (lambda (code pos) '())))
545(visit-opcodes define-jump-parser)
546
547(define (instruction-relative-jump-targets code pos)
548 (let ((opcode (logand (bytevector-u32-native-ref code pos) #xff)))
549 ((vector-ref jump-parsers opcode) code pos)))
550
551(define-syntax define-clobber-parser
552 (lambda (x)
553 (syntax-case x ()
554 ((_ name opcode kind arg ...)
555 (case (syntax->datum #'kind)
556 ((!)
557 (case (syntax->datum #'name)
558 ((call call-label)
559 #'(let ((parse (lambda (code pos nslots)
560 (call-with-values
561 (lambda ()
562 (disassemble-one code (/ pos 4)))
563 (lambda (len elt)
564 (match elt
565 ((_ proc . _)
566 (let lp ((slot (- proc 2)))
567 (if (< slot nslots)
568 (cons slot (lp (1+ slot)))
569 '())))))))))
570 (vector-set! clobber-parsers opcode parse)))
571 (else
572 #'(begin))))
573 ((<-)
574 #'(let ((parse (lambda (code pos nslots)
575 (call-with-values
576 (lambda ()
577 (disassemble-one code (/ pos 4)))
578 (lambda (len elt)
579 (match elt
580 ((_ dst . _) (list dst))))))))
581 (vector-set! clobber-parsers opcode parse)))
582 (else (error "unexpected instruction kind" #'kind)))))))
583
584(define clobber-parsers (make-vector 256 (lambda (code pos nslots) '())))
585(visit-opcodes define-clobber-parser)
586
587(define (instruction-slot-clobbers code pos nslots)
588 (let ((opcode (logand (bytevector-u32-native-ref code pos) #xff)))
589 ((vector-ref clobber-parsers opcode) code pos nslots)))