Commit | Line | Data |
---|---|---|
82e299f3 AW |
1 | ;;; Guile RTL disassembler |
2 | ||
3 | ;;; Copyright (C) 2001, 2009, 2010, 2012, 2013 Free Software Foundation, Inc. | |
4 | ;;; | |
5 | ;;; This library is free software; you can redistribute it and/or | |
6 | ;;; modify it under the terms of the GNU Lesser General Public | |
7 | ;;; License as published by the Free Software Foundation; either | |
8 | ;;; version 3 of the License, or (at your option) any later version. | |
9 | ;;; | |
10 | ;;; This library is distributed in the hope that it will be useful, | |
11 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | ;;; Lesser General Public License for more details. | |
14 | ;;; | |
15 | ;;; You should have received a copy of the GNU Lesser General Public | |
16 | ;;; License along with this library; if not, write to the Free Software | |
17 | ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 | ||
19 | ;;; Code: | |
20 | ||
21 | (define-module (system vm disassembler) | |
486013d6 | 22 | #:use-module (language rtl) |
82e299f3 AW |
23 | #:use-module (system vm elf) |
24 | #:use-module (system vm debug) | |
25 | #:use-module (system vm program) | |
26 | #:use-module (system vm objcode) | |
27 | #:use-module (system foreign) | |
28 | #:use-module (rnrs bytevectors) | |
29 | #:use-module (ice-9 format) | |
30 | #:use-module (ice-9 match) | |
31 | #:use-module (ice-9 vlist) | |
32 | #:use-module (srfi srfi-1) | |
33 | #:use-module (srfi srfi-4) | |
610295ec | 34 | #:export (disassemble-program |
93009a7a AW |
35 | disassemble-image |
36 | disassemble-file)) | |
82e299f3 AW |
37 | |
38 | (define-syntax-rule (u32-ref buf n) | |
39 | (bytevector-u32-native-ref buf (* n 4))) | |
40 | ||
41 | (define-syntax-rule (s32-ref buf n) | |
42 | (bytevector-s32-native-ref buf (* n 4))) | |
43 | ||
44 | (define-syntax visit-opcodes | |
45 | (lambda (x) | |
46 | (syntax-case x () | |
47 | ((visit-opcodes macro arg ...) | |
48 | (with-syntax (((inst ...) | |
49 | (map (lambda (x) (datum->syntax #'macro x)) | |
50 | (rtl-instruction-list)))) | |
51 | #'(begin | |
52 | (macro arg ... . inst) | |
53 | ...)))))) | |
54 | ||
55 | (eval-when (expand compile load eval) | |
56 | (define (id-append ctx a b) | |
57 | (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b))))) | |
58 | ||
59 | (define (unpack-scm n) | |
60 | (pointer->scm (make-pointer n))) | |
61 | ||
62 | (define (unpack-s24 s) | |
63 | (if (zero? (logand s (ash 1 23))) | |
64 | s | |
65 | (- s (ash 1 24)))) | |
66 | ||
67 | (define (unpack-s32 s) | |
68 | (if (zero? (logand s (ash 1 31))) | |
69 | s | |
70 | (- s (ash 1 32)))) | |
71 | ||
72 | (define-syntax disassembler | |
73 | (lambda (x) | |
74 | (define (parse-first-word word type) | |
75 | (with-syntax ((word word)) | |
76 | (case type | |
77 | ((U8_X24) | |
78 | #'()) | |
79 | ((U8_U24) | |
80 | #'((ash word -8))) | |
81 | ((U8_L24) | |
82 | #'((unpack-s24 (ash word -8)))) | |
82e299f3 AW |
83 | ((U8_U8_I16) |
84 | #'((logand (ash word -8) #xff) | |
85 | (ash word -16))) | |
86 | ((U8_U12_U12) | |
87 | #'((logand (ash word -8) #xfff) | |
88 | (ash word -20))) | |
89 | ((U8_U8_U8_U8) | |
90 | #'((logand (ash word -8) #xff) | |
91 | (logand (ash word -16) #xff) | |
92 | (ash word -24))) | |
93 | (else | |
94 | (error "bad kind" type))))) | |
95 | ||
96 | (define (parse-tail-word word type) | |
97 | (with-syntax ((word word)) | |
98 | (case type | |
99 | ((U8_X24) | |
100 | #'((logand word #ff))) | |
101 | ((U8_U24) | |
102 | #'((logand word #xff) | |
103 | (ash word -8))) | |
104 | ((U8_L24) | |
105 | #'((logand word #xff) | |
106 | (unpack-s24 (ash word -8)))) | |
82e299f3 AW |
107 | ((U8_U8_I16) |
108 | #'((logand word #xff) | |
109 | (logand (ash word -8) #xff) | |
110 | (ash word -16))) | |
111 | ((U8_U12_U12) | |
112 | #'((logand word #xff) | |
113 | (logand (ash word -8) #xfff) | |
114 | (ash word -20))) | |
115 | ((U8_U8_U8_U8) | |
116 | #'((logand word #xff) | |
117 | (logand (ash word -8) #xff) | |
118 | (logand (ash word -16) #xff) | |
119 | (ash word -24))) | |
120 | ((U32) | |
121 | #'(word)) | |
122 | ((I32) | |
123 | #'(word)) | |
124 | ((A32) | |
125 | #'(word)) | |
126 | ((B32) | |
127 | #'(word)) | |
128 | ((N32) | |
129 | #'((unpack-s32 word))) | |
130 | ((S32) | |
131 | #'((unpack-s32 word))) | |
132 | ((L32) | |
133 | #'((unpack-s32 word))) | |
134 | ((LO32) | |
135 | #'((unpack-s32 word))) | |
136 | ((X8_U24) | |
137 | #'((ash word -8))) | |
138 | ((X8_U12_U12) | |
139 | #'((logand (ash word -8) #xfff) | |
140 | (ash word -20))) | |
82e299f3 AW |
141 | ((X8_L24) |
142 | #'((unpack-s24 (ash word -8)))) | |
143 | ((B1_X7_L24) | |
144 | #'((not (zero? (logand word #x1))) | |
145 | (unpack-s24 (ash word -8)))) | |
146 | ((B1_U7_L24) | |
147 | #'((not (zero? (logand word #x1))) | |
148 | (logand (ash word -1) #x7f) | |
149 | (unpack-s24 (ash word -8)))) | |
af95414f AW |
150 | ((B1_X31) |
151 | #'((not (zero? (logand word #x1))))) | |
152 | ((B1_X7_U24) | |
153 | #'((not (zero? (logand word #x1))) | |
154 | (ash word -8))) | |
82e299f3 AW |
155 | (else |
156 | (error "bad kind" type))))) | |
157 | ||
158 | (syntax-case x () | |
159 | ((_ name opcode word0 word* ...) | |
160 | (let ((vars (generate-temporaries #'(word* ...)))) | |
161 | (with-syntax (((word* ...) vars) | |
162 | ((n ...) (map 1+ (iota (length #'(word* ...))))) | |
163 | ((asm ...) | |
164 | (parse-first-word #'first (syntax->datum #'word0))) | |
165 | (((asm* ...) ...) | |
166 | (map (lambda (word type) | |
167 | (parse-tail-word word type)) | |
168 | vars | |
169 | (syntax->datum #'(word* ...))))) | |
170 | #'(lambda (buf offset first) | |
171 | (let ((word* (u32-ref buf (+ offset n))) | |
172 | ...) | |
173 | (values (+ 1 (length '(word* ...))) | |
174 | (list 'name asm ... asm* ... ...)))))))))) | |
175 | ||
176 | (define (disasm-invalid buf offset first) | |
177 | (error "bad instruction" (logand first #xff) first buf offset)) | |
178 | ||
179 | (define disassemblers (make-vector 256 disasm-invalid)) | |
180 | ||
181 | (define-syntax define-disassembler | |
182 | (lambda (x) | |
183 | (syntax-case x () | |
2a294c7c | 184 | ((_ name opcode kind arg ...) |
82e299f3 AW |
185 | (with-syntax ((parse (id-append #'name #'parse- #'name))) |
186 | #'(let ((parse (disassembler name opcode arg ...))) | |
187 | (vector-set! disassemblers opcode parse))))))) | |
188 | ||
189 | (visit-opcodes define-disassembler) | |
190 | ||
191 | ;; -> len list | |
192 | (define (disassemble-one buf offset) | |
193 | (let ((first (u32-ref buf offset))) | |
78ff7847 | 194 | ((vector-ref disassemblers (logand first #xff)) buf offset first))) |
82e299f3 AW |
195 | |
196 | (define (u32-offset->addr offset context) | |
197 | "Given an offset into an image in 32-bit units, return the absolute | |
198 | address of that offset." | |
199 | (+ (debug-context-base context) (* offset 4))) | |
200 | ||
201 | (define (code-annotation code len offset start labels context) | |
202 | ;; FIXME: Print names for register loads and stores that correspond to | |
203 | ;; access to named locals. | |
204 | (define (reference-scm target) | |
205 | (unpack-scm (u32-offset->addr (+ offset target) context))) | |
206 | ||
207 | (define (dereference-scm target) | |
208 | (let ((addr (u32-offset->addr (+ offset target) | |
209 | context))) | |
210 | (pointer->scm | |
211 | (dereference-pointer (make-pointer addr))))) | |
212 | ||
213 | (match code | |
214 | (((or 'br | |
215 | 'br-if-nargs-ne 'br-if-nargs-lt 'br-if-nargs-gt | |
216 | 'br-if-true 'br-if-null 'br-if-nil 'br-if-pair 'br-if-struct | |
be8b62ca | 217 | 'br-if-char 'br-if-eq 'br-if-eqv 'br-if-equal |
82e299f3 AW |
218 | 'br-if-= 'br-if-< 'br-if-<= 'br-if-> 'br-if->=) _ ... target) |
219 | (list "-> ~A" (vector-ref labels (- (+ offset target) start)))) | |
be8b62ca AW |
220 | (('br-if-tc7 slot invert? tc7 target) |
221 | (list "~A -> ~A" | |
222 | (let ((tag (case tc7 | |
223 | ((5) "symbol?") | |
224 | ((7) "variable?") | |
225 | ((13) "vector?") | |
226 | ((15) "string?") | |
227 | (else (number->string tc7))))) | |
228 | (if invert? (string-append "not " tag) tag)) | |
229 | (vector-ref labels (- (+ offset target) start)))) | |
8d59d55e | 230 | (('prompt tag escape-only? proc-slot handler) |
82e299f3 AW |
231 | ;; The H is for handler. |
232 | (list "H -> ~A" (vector-ref labels (- (+ offset handler) start)))) | |
233 | (((or 'make-short-immediate 'make-long-immediate) _ imm) | |
234 | (list "~S" (unpack-scm imm))) | |
235 | (('make-long-long-immediate _ high low) | |
236 | (list "~S" (unpack-scm (logior (ash high 32) low)))) | |
237 | (('assert-nargs-ee/locals nargs locals) | |
7396d216 AW |
238 | ;; The nargs includes the procedure. |
239 | (list "~a arg~:p, ~a local~:p" (1- nargs) locals)) | |
82e299f3 AW |
240 | (('tail-call nargs proc) |
241 | (list "~a arg~:p" nargs)) | |
7396d216 | 242 | (('make-closure dst target nfree) |
82e299f3 AW |
243 | (let* ((addr (u32-offset->addr (+ offset target) context)) |
244 | (pdi (find-program-debug-info addr context))) | |
245 | ;; FIXME: Disassemble embedded closures as well. | |
7396d216 | 246 | (list "~A at 0x~X (~A free var~:p)" |
82e299f3 AW |
247 | (or (and pdi (program-debug-info-name pdi)) |
248 | "(anonymous procedure)") | |
7396d216 AW |
249 | addr |
250 | nfree))) | |
82e299f3 AW |
251 | (('make-non-immediate dst target) |
252 | (list "~@Y" (reference-scm target))) | |
486013d6 AW |
253 | (('builtin-ref dst idx) |
254 | (list "~A" (builtin-index->name idx))) | |
82e299f3 AW |
255 | (((or 'static-ref 'static-set!) _ target) |
256 | (list "~@Y" (dereference-scm target))) | |
257 | (('link-procedure! src target) | |
258 | (let* ((addr (u32-offset->addr (+ offset target) context)) | |
259 | (pdi (find-program-debug-info addr context))) | |
260 | (list "~A at 0x~X" | |
261 | (or (and pdi (program-debug-info-name pdi)) | |
262 | "(anonymous procedure)") | |
263 | addr))) | |
264 | (('resolve-module dst name public) | |
265 | (list "~a" (if (zero? public) "private" "public"))) | |
af95414f AW |
266 | (('toplevel-box _ var-offset mod-offset sym-offset bound?) |
267 | (list "`~A'~A" (dereference-scm sym-offset) | |
268 | (if bound? "" " (maybe unbound)"))) | |
269 | (('module-box _ var-offset mod-name-offset sym-offset bound?) | |
82e299f3 | 270 | (let ((mod-name (reference-scm mod-name-offset))) |
af95414f AW |
271 | (list "`(~A ~A ~A)'~A" (if (car mod-name) '@ '@@) (cdr mod-name) |
272 | (dereference-scm sym-offset) | |
273 | (if bound? "" " (maybe unbound)")))) | |
82e299f3 AW |
274 | (('load-typed-array dst type shape target len) |
275 | (let ((addr (u32-offset->addr (+ offset target) context))) | |
276 | (list "~a bytes from #x~X" len addr))) | |
277 | (_ #f))) | |
278 | ||
279 | (define (compute-labels bv start end) | |
280 | (let ((labels (make-vector (- end start) #f))) | |
281 | (define (add-label! pos header) | |
282 | (unless (vector-ref labels (- pos start)) | |
283 | (vector-set! labels (- pos start) header))) | |
284 | ||
285 | (let lp ((offset start)) | |
286 | (when (< offset end) | |
287 | (call-with-values (lambda () (disassemble-one bv offset)) | |
288 | (lambda (len elt) | |
289 | (match elt | |
290 | ((inst arg ...) | |
291 | (case inst | |
292 | ((br | |
293 | br-if-nargs-ne br-if-nargs-lt br-if-nargs-gt | |
294 | br-if-true br-if-null br-if-nil br-if-pair br-if-struct | |
295 | br-if-char br-if-tc7 br-if-eq br-if-eqv br-if-equal | |
296 | br-if-= br-if-< br-if-<= br-if-> br-if->=) | |
297 | (match arg | |
298 | ((_ ... target) | |
299 | (add-label! (+ offset target) "L")))) | |
300 | ((prompt) | |
301 | (match arg | |
302 | ((_ ... target) | |
70a20431 | 303 | (add-label! (+ offset target) "H"))))))) |
82e299f3 AW |
304 | (lp (+ offset len)))))) |
305 | (let lp ((offset start) (n 1)) | |
306 | (when (< offset end) | |
307 | (let* ((pos (- offset start)) | |
308 | (label (vector-ref labels pos))) | |
309 | (if label | |
310 | (begin | |
311 | (vector-set! labels | |
312 | pos | |
313 | (string->symbol | |
314 | (string-append label (number->string n)))) | |
315 | (lp (1+ offset) (1+ n))) | |
316 | (lp (1+ offset) n))))) | |
317 | labels)) | |
318 | ||
319 | (define (print-info port addr label info extra src) | |
320 | (when label | |
321 | (format port "~A:\n" label)) | |
322 | (format port "~4@S ~32S~@[;; ~1{~@?~}~]~@[~61t at ~a~]\n" | |
323 | addr info extra src)) | |
324 | ||
325 | (define (disassemble-buffer port bv start end context) | |
e9588e70 AW |
326 | (let ((labels (compute-labels bv start end)) |
327 | (sources (find-program-sources (u32-offset->addr start context) | |
328 | context))) | |
329 | (define (lookup-source addr) | |
330 | (let lp ((sources sources)) | |
331 | (match sources | |
332 | (() #f) | |
333 | ((source . sources) | |
334 | (let ((pc (source-pre-pc source))) | |
335 | (cond | |
336 | ((< pc addr) (lp sources)) | |
337 | ((= pc addr) | |
338 | (format #f "~a:~a:~a" | |
339 | (source-file source) | |
340 | (source-line-for-user source) | |
341 | (source-column source))) | |
342 | (else #f))))))) | |
82e299f3 AW |
343 | (let lp ((offset start)) |
344 | (when (< offset end) | |
345 | (call-with-values (lambda () (disassemble-one bv offset)) | |
346 | (lambda (len elt) | |
347 | (let ((pos (- offset start)) | |
e9588e70 | 348 | (addr (u32-offset->addr offset context)) |
82e299f3 AW |
349 | (annotation (code-annotation elt len offset start labels |
350 | context))) | |
e9588e70 AW |
351 | (print-info port pos (vector-ref labels pos) elt annotation |
352 | (lookup-source addr)) | |
82e299f3 AW |
353 | (lp (+ offset len))))))))) |
354 | ||
355 | (define* (disassemble-program program #:optional (port (current-output-port))) | |
356 | (cond | |
357 | ((find-program-debug-info (rtl-program-code program)) | |
358 | => (lambda (pdi) | |
359 | (format port "Disassembly of ~S at #x~X:\n\n" program | |
360 | (program-debug-info-addr pdi)) | |
361 | (disassemble-buffer port | |
362 | (program-debug-info-image pdi) | |
363 | (program-debug-info-u32-offset pdi) | |
364 | (program-debug-info-u32-offset-end pdi) | |
365 | (program-debug-info-context pdi)))) | |
366 | (else | |
367 | (format port "Debugging information unavailable.~%"))) | |
368 | (values)) | |
610295ec AW |
369 | |
370 | (define* (disassemble-image bv #:optional (port (current-output-port))) | |
371 | (let* ((ctx (debug-context-from-image bv)) | |
372 | (base (debug-context-text-base ctx))) | |
373 | (for-each-elf-symbol | |
374 | ctx | |
375 | (lambda (sym) | |
376 | (let ((name (elf-symbol-name sym)) | |
377 | (value (elf-symbol-value sym)) | |
378 | (size (elf-symbol-size sym))) | |
379 | (format port "Disassembly of ~A at #x~X:\n\n" | |
380 | (if (and (string? name) (not (string-null? name))) | |
381 | name | |
382 | "<unnamed function>") | |
383 | (+ base value)) | |
384 | (disassemble-buffer port | |
385 | bv | |
386 | (/ (+ base value) 4) | |
387 | (/ (+ base value size) 4) | |
388 | ctx) | |
389 | (display "\n\n" port))))) | |
390 | (values)) | |
93009a7a AW |
391 | |
392 | (define (disassemble-file file) | |
393 | (let* ((thunk (load-thunk-from-file file)) | |
394 | (elf (find-mapped-elf-image (rtl-program-code thunk)))) | |
395 | (disassemble-image elf))) |