Commit | Line | Data |
---|---|---|
ae4735db | 1 | open Common |
978fd7e5 | 2 | |
ae4735db | 3 | open Oset |
978fd7e5 | 4 | |
ae4735db | 5 | open Parser_c |
978fd7e5 C |
6 | |
7 | (*****************************************************************************) | |
8 | (* Prelude *) | |
9 | (*****************************************************************************) | |
10 | (* | |
ae4735db C |
11 | * Is this module make all the tricks used in parsing_hacks and |
12 | * most definitions in standard.h obsolete ? It depends. In a | |
978fd7e5 C |
13 | * static analysis context we want to be accurate, and so expand |
14 | * all the code that will make our type/callgraph analysis simpler. | |
15 | * So we want to expand many macros, based on heuristics in this file. | |
16 | * In a transformation context, we want to let the programmer | |
ae4735db | 17 | * match over certain constructs such as declarator, iterator, |
978fd7e5 | 18 | * macro_field, etc, and in this case we want to parse as-is. |
ae4735db | 19 | * |
978fd7e5 C |
20 | * What could be done is that some of the analysis performed in this |
21 | * file could then be injected in parsing_hacks, for instance via | |
22 | * hints, to make the parse as-is job easier too. | |
ae4735db C |
23 | * |
24 | * | |
978fd7e5 C |
25 | * |
26 | * todo: right now I find dangerous macro based on ## and go upward | |
27 | * to also include calling macros. But this dangerous macro itself | |
28 | * may use other macros that looks ok but that should also be expanded | |
29 | * because it defines some entities. So also recurse downward ? | |
ae4735db | 30 | * |
978fd7e5 C |
31 | * todo? do analysis a la Astec ? try infer the meaning of the macro |
32 | * from its body but also from its context of use ? Can then | |
33 | * do a taxonomy of macro ? not just foreach or declarator but | |
ae4735db | 34 | * polymorphic function (e.g. MAX), type generator, etc. Cf astec paper |
978fd7e5 | 35 | * or Ernst cpp study paper ? |
ae4735db | 36 | * |
978fd7e5 C |
37 | *) |
38 | ||
39 | (*****************************************************************************) | |
40 | (* Types *) | |
41 | (*****************************************************************************) | |
42 | ||
43 | (* callgraph of macros *) | |
44 | type key = string | |
45 | type node = (Common.filename * Cpp_token_c.define_def) list ref | |
46 | type edge = Direct | |
47 | ||
48 | type callgraph_macros = (key, node, edge) Ograph_simple.ograph_mutable | |
49 | ||
50 | let rootname = "__ROOT__" | |
ae4735db | 51 | |
978fd7e5 C |
52 | (*****************************************************************************) |
53 | (* Helpers *) | |
54 | (*****************************************************************************) | |
55 | let bodytoks_of_body body = | |
56 | match body with | |
ae4735db | 57 | | Cpp_token_c.DefineHint _ -> |
978fd7e5 C |
58 | pr2 "weird, hint in cpp_analysis_c"; |
59 | [] | |
ae4735db | 60 | | Cpp_token_c.DefineBody xs -> |
978fd7e5 C |
61 | xs |
62 | ||
63 | ||
64 | let build_empty_set () = new Osetb.osetb Setb.empty | |
65 | ||
66 | ||
67 | (*****************************************************************************) | |
68 | (* Builder *) | |
69 | (*****************************************************************************) | |
70 | ||
ae4735db | 71 | let build_callgraph_macros xs = |
978fd7e5 C |
72 | let (g: callgraph_macros) = new Ograph_simple.ograph_mutable in |
73 | ||
74 | g#add_node rootname (ref []); | |
75 | ||
76 | (* build nodes *) | |
ae4735db | 77 | xs +> List.iter (fun (file, (x, def)) -> |
978fd7e5 C |
78 | (* todo? if exist already ? *) |
79 | g#add_node x (ref []); | |
80 | g#add_arc (rootname, x) Direct; | |
81 | ); | |
ae4735db | 82 | xs +> List.iter (fun (file, (x, def)) -> |
978fd7e5 C |
83 | let node = g#nodes#find x in |
84 | Common.push2 (file, def) node; | |
85 | ); | |
86 | ||
87 | (* build edges *) | |
ae4735db C |
88 | xs +> List.iter (fun (file, (x, def)) -> |
89 | let (s, params, body) = def in | |
978fd7e5 | 90 | let toks = bodytoks_of_body body in |
ae4735db | 91 | toks +> List.iter (fun tok -> |
978fd7e5 | 92 | match tok with |
ae4735db C |
93 | | TIdent (x2,ii) -> |
94 | (try | |
978fd7e5 C |
95 | let _ = g#nodes#find x2 in |
96 | g#add_arc (x, x2) Direct; | |
ae4735db | 97 | with |
978fd7e5 C |
98 | Not_found -> () |
99 | ) | |
ae4735db | 100 | | _ -> |
978fd7e5 C |
101 | () |
102 | ); | |
ae4735db | 103 | |
978fd7e5 C |
104 | ); |
105 | g | |
106 | ||
107 | ||
108 | (* ---------------------------------------------------------------------- *) | |
ae4735db | 109 | let check_no_loop_graph g = |
978fd7e5 | 110 | |
ae4735db | 111 | let self_referential = ref [] in |
978fd7e5 C |
112 | let macros_in_loop_with_path = ref [] in |
113 | ||
114 | let already = Hashtbl.create 101 in | |
115 | ||
116 | let already_error_msg = Hashtbl.create 101 in | |
117 | ||
ae4735db | 118 | let rec aux_dfs path xi = |
978fd7e5 C |
119 | if Hashtbl.mem already xi && List.mem xi path |
120 | then begin | |
121 | let node = g#nodes#find xi in | |
ae4735db | 122 | let file = |
978fd7e5 C |
123 | match !node with |
124 | | (file, _)::xs -> file | |
abad11c5 | 125 | | [] -> raise (Impossible 74) |
978fd7e5 C |
126 | in |
127 | (* in apache/srclib/apr/include/arch/win32/apr_dbg_win32_handles.h | |
128 | * we get some __ROOT__ -> CreateMutexA -> CreateMutexA because | |
129 | * the macro is self referential. Probably cpp has | |
130 | * some special handling of such case and does not expand | |
ae4735db C |
131 | * recursively. |
132 | * | |
978fd7e5 | 133 | *) |
ae4735db | 134 | let is_self_reference = |
978fd7e5 C |
135 | match xi::path with |
136 | | x::y::z -> x = y | |
137 | | _ -> false | |
138 | in | |
139 | if not is_self_reference && not (Hashtbl.mem already_error_msg xi) | |
140 | then begin | |
141 | Hashtbl.add already_error_msg xi true; | |
142 | pr2 (spf "PB: loop in macro %s of file %s" xi file); | |
143 | pr2 (spf "path is: %s" (Common.join " -> " (List.rev (xi::path)))); | |
144 | Common.push2 (xi, path) macros_in_loop_with_path; | |
145 | end | |
ae4735db | 146 | else begin |
978fd7e5 C |
147 | Common.push2 xi self_referential; |
148 | end | |
149 | end else begin | |
150 | Hashtbl.add already xi true; | |
151 | (* f xi path; *) | |
152 | let succ = g#successors xi in | |
153 | let succ' = succ#tolist +> List.map fst in | |
ae4735db | 154 | succ' +> List.iter (fun yi -> |
978fd7e5 C |
155 | aux_dfs (xi::path) yi |
156 | ); | |
157 | end | |
158 | in | |
159 | aux_dfs [] rootname; | |
160 | !self_referential, !macros_in_loop_with_path | |
ae4735db | 161 | |
978fd7e5 C |
162 | (* ---------------------------------------------------------------------- *) |
163 | let slice_of_callgraph_macros (g: callgraph_macros) goodnodes = | |
164 | ||
165 | let (g': callgraph_macros) = new Ograph_simple.ograph_mutable in | |
166 | ||
ae4735db | 167 | goodnodes#tolist +> List.iter (fun k -> |
978fd7e5 C |
168 | let v = g#nodes#find k in |
169 | g'#add_node k v; | |
170 | ); | |
ae4735db | 171 | goodnodes#tolist +> List.iter (fun k -> |
978fd7e5 C |
172 | let succ = g#successors k in |
173 | let succ = Oset.mapo (fun (k', edge) -> k') (build_empty_set()) succ in | |
174 | let inter = succ $**$ goodnodes in | |
ae4735db | 175 | inter#tolist +> List.iter (fun k' -> |
978fd7e5 C |
176 | g'#add_arc (k, k') Direct; |
177 | ) | |
178 | ); | |
179 | g' | |
180 | ||
181 | (*****************************************************************************) | |
182 | (* Macros expansion *) | |
183 | (*****************************************************************************) | |
184 | ||
ae4735db | 185 | (* get the longuest one ? or the one that contains the dangerous macro ? *) |
978fd7e5 C |
186 | let get_single_file_and_def_of_node k v = |
187 | match !v with | |
abad11c5 | 188 | | [] -> raise (Impossible 75) |
978fd7e5 | 189 | | [file, def] -> file, def |
ae4735db | 190 | | (file, def)::y::ys -> |
978fd7e5 C |
191 | pr2 (spf "multiple def for %s but I kept only one" k); |
192 | file, def | |
193 | ||
194 | module TV = Token_views_c | |
195 | ||
ae4735db C |
196 | let (macro_expand: |
197 | (string, Cpp_token_c.define_def) Hashtbl.t -> | |
978fd7e5 | 198 | Cpp_token_c.define_def -> Cpp_token_c.define_def) = |
ae4735db C |
199 | fun current_def def -> |
200 | let (s, params, body) = def in | |
201 | let body' = | |
978fd7e5 | 202 | match body with |
ae4735db | 203 | | Cpp_token_c.DefineHint _ -> |
978fd7e5 | 204 | body |
ae4735db | 205 | | Cpp_token_c.DefineBody xs -> |
978fd7e5 C |
206 | (* bugfix: we dont want to evalute the x ## b at this moment. |
207 | * so can not use fix_tokens_cpp in the same we use it | |
208 | * to parse C code. | |
ae4735db | 209 | let xs' = |
978fd7e5 C |
210 | Parsing_hacks.fix_tokens_cpp ~macro_defs:current_def xs |
211 | in | |
212 | *) | |
213 | let tokens = xs in | |
214 | let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in | |
215 | let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in | |
216 | let paren_grouped = TV.mk_parenthised cleaner in | |
217 | Cpp_token_c.apply_macro_defs | |
ae4735db | 218 | ~msg_apply_known_macro:(fun s2 -> |
978fd7e5 | 219 | pr2 (spf "APPLYING: %s in definition of %s" s2 s)) |
ae4735db | 220 | ~msg_apply_known_macro_hint:(fun s -> |
978fd7e5 | 221 | pr2 "hint") |
ae4735db | 222 | ~evaluate_concatop:false |
978fd7e5 C |
223 | ~inplace_when_single:false |
224 | current_def paren_grouped; | |
225 | (* because the before field is used by apply_macro_defs *) | |
ae4735db | 226 | tokens2 := TV.rebuild_tokens_extented !tokens2; |
978fd7e5 C |
227 | |
228 | (* bugfix *) | |
229 | let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in | |
230 | ||
ae4735db C |
231 | let xs' = |
232 | Parsing_hacks.insert_virtual_positions | |
978fd7e5 C |
233 | (cleaner +> Common.acc_map (fun x -> x.TV.tok)) |
234 | in | |
235 | ||
236 | Cpp_token_c.DefineBody xs' | |
237 | in | |
238 | (s, params, body') | |
239 | ||
240 | ||
ae4735db | 241 | (* work by side effect as both the binding and callgraph are mutable |
978fd7e5 C |
242 | * data structure |
243 | *) | |
244 | let no_inlining = ref false | |
245 | ||
ae4735db C |
246 | let rec (recurse_expand_macro_topological_order: |
247 | int -> (string, Cpp_token_c.define_def) Hashtbl.t -> | |
978fd7e5 C |
248 | callgraph_macros -> unit) = |
249 | fun depth current_def g -> | |
250 | ||
251 | (* naive: *) | |
ae4735db C |
252 | if !no_inlining then |
253 | g#nodes#tolist +> List.iter (fun (k, v) -> | |
254 | if k =$= rootname then () | |
978fd7e5 C |
255 | else |
256 | let def = get_single_file_and_def_of_node k v +> snd in | |
257 | Hashtbl.add current_def k def | |
258 | ) | |
ae4735db | 259 | else |
978fd7e5 C |
260 | let remaining = g#nodes#tolist in |
261 | (match remaining with | |
abad11c5 C |
262 | | [] -> () (* christia: commented this out: raise (Impossible 76) |
263 | * This seems to be the case when there are no | |
264 | * problematic macros. Which is possible. | |
265 | *) | |
ae4735db | 266 | | [(k,n)] -> |
978fd7e5 C |
267 | assert (k = rootname); |
268 | (* end recursion *) | |
269 | () | |
abad11c5 | 270 | | (k, v)::y::xs -> |
978fd7e5 C |
271 | let leafs = (g#leaf_nodes ())#tolist in |
272 | pr2 (spf "step: %d, %s" depth (leafs +> Common.join " ")); | |
273 | ||
274 | Ograph_simple.print_ograph_generic | |
275 | ~str_of_key:(fun k -> k) | |
276 | ~str_of_node:(fun k node -> k) | |
277 | (spf "/tmp/graph-%d.dot" depth) | |
278 | g; | |
279 | ||
abad11c5 | 280 | assert(not (null leafs)); |
978fd7e5 C |
281 | |
282 | ||
283 | (* little specialisation to avoid useless work *) | |
ae4735db | 284 | if depth = 0 |
978fd7e5 | 285 | then begin |
ae4735db | 286 | leafs +> List.iter (fun k -> |
978fd7e5 C |
287 | let node = g#nodes#find k in |
288 | let def = get_single_file_and_def_of_node k node +> snd in | |
289 | Hashtbl.add current_def k def | |
290 | ) | |
291 | end else begin | |
ae4735db C |
292 | let new_defs = |
293 | leafs +> List.map (fun k -> | |
978fd7e5 C |
294 | let node = g#nodes#find k in |
295 | let def = get_single_file_and_def_of_node k node +> snd in | |
296 | let def' = macro_expand current_def def in | |
297 | k, def' | |
298 | ) | |
299 | in | |
300 | new_defs +> List.iter (fun (k,def) -> Hashtbl.add current_def k def); | |
301 | end; | |
302 | leafs +> List.iter (fun k -> g#del_leaf_node_and_its_edges k); | |
303 | recurse_expand_macro_topological_order (depth+1) current_def g; | |
304 | ) | |
305 | ||
306 | ||
307 | ||
308 | (*****************************************************************************) | |
309 | (* Macros def analysis *) | |
310 | (*****************************************************************************) | |
311 | ||
ae4735db C |
312 | let is_dangerous_macro def = |
313 | let (s, params, body) = def in | |
978fd7e5 C |
314 | let toks = bodytoks_of_body body in |
315 | ||
316 | (match params, body with | |
317 | ||
318 | (* ex: APU_DECLARE_DATA *) | |
ae4735db | 319 | | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [] -> |
978fd7e5 C |
320 | if s =~ ".*_H_*" |
321 | then false | |
322 | else true | |
323 | ||
324 | (* ex: AP_DECLARE(x) x *) | |
abad11c5 C |
325 | | Cpp_token_c.Params([s1]), |
326 | Cpp_token_c.DefineBody [TIdent (s2,i1)] -> | |
327 | (match s1 with | |
328 | Cpp_token_c.FixedArg s1 -> s1 =$= s2 | |
329 | | Cpp_token_c.VariadicArg _ -> false) | |
978fd7e5 C |
330 | |
331 | (* keyword aliases. eg: APR_inline __inline__ *) | |
332 | | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [x] -> | |
333 | (match x with | |
334 | | Tinline _ -> true | |
335 | | Tconst _ -> true | |
336 | | Tstatic _ -> true | |
337 | | Textern _ -> true | |
338 | | _ -> false | |
339 | ) | |
340 | ||
341 | | _ , Cpp_token_c.DefineBody xs -> | |
342 | (match List.rev xs with | |
343 | (* make extract_macros looping on apache, get some infinite "step x" *) | |
344 | | TPtVirg _::_ -> true | |
345 | | _ -> false | |
346 | ) | |
347 | ||
348 | | _ -> false | |
349 | ) || | |
978fd7e5 | 350 | |
ae4735db C |
351 | |
352 | (toks +> List.exists (fun tok -> | |
978fd7e5 C |
353 | match tok with |
354 | | TCppConcatOp _ -> true | |
355 | ||
356 | | Tattribute (ii) -> true | |
ae4735db | 357 | | TattributeNoarg (ii) -> true |
978fd7e5 C |
358 | |
359 | (* FP with local variable. | |
ae4735db C |
360 | | TIdent (s,ii) -> |
361 | s ==~ Parsing_hacks.regexp_annot && not (List.mem s | |
978fd7e5 C |
362 | ["__FILE__";"__LINE__";"__FUNCTION__"]) |
363 | *) | |
364 | | _ -> false | |
365 | )) | |
366 | ||
367 | ||
ae4735db C |
368 | let is_trivial_macro def = |
369 | let (s, params, body) = def in | |
978fd7e5 C |
370 | match params, body with |
371 | | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [Parser_c.TInt _] | |
372 | (* no!!! those are not trivial macro, they are dangerous too. | |
ae4735db | 373 | | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [] -> |
978fd7e5 C |
374 | true |
375 | *) | |
ae4735db | 376 | | _ -> |
978fd7e5 C |
377 | false |
378 | ||
379 | (* | |
380 | | () when s ==~ Parsing_hacks.regexp_annot -> true | |
381 | | () when List.exists (function | |
382 | (*| Parser_c.Tattribute _ -> true*) | |
383 | | Parser_c.TCppConcatOp _ -> true | |
ae4735db | 384 | | _ -> false) bodytoks |
978fd7e5 C |
385 | -> true |
386 | | () -> false | |
387 | in | |
388 | *) | |
389 | ||
ae4735db | 390 | |
978fd7e5 C |
391 | (*****************************************************************************) |
392 | (* Main entry point *) | |
393 | (*****************************************************************************) | |
394 | ||
ae4735db | 395 | let extract_dangerous_macros xs = |
978fd7e5 C |
396 | |
397 | (* prepare initial set of macro definitions to work on *) | |
ae4735db C |
398 | let all_macros = |
399 | xs +> List.map (fun (file, defs) -> | |
978fd7e5 C |
400 | defs +> List.map (fun def -> file, def) |
401 | ) +> List.flatten | |
402 | in | |
ae4735db | 403 | let macros = |
978fd7e5 C |
404 | all_macros +> Common.exclude(fun (file,(x,def)) -> is_trivial_macro def) in |
405 | ||
406 | (* initial set of problematic macros *) | |
ae4735db | 407 | let problematic_macros = |
978fd7e5 C |
408 | macros +> Common.filter (fun (file, (x, def)) -> is_dangerous_macro def) in |
409 | ||
410 | ||
411 | (* include the ancestors of problematic macros *) | |
ae4735db | 412 | let g = |
978fd7e5 | 413 | build_callgraph_macros macros in |
ae4735db | 414 | let self_referiential, macros_in_loop_with_path = |
978fd7e5 C |
415 | check_no_loop_graph g in |
416 | ||
417 | Ograph_simple.print_ograph_generic | |
418 | ~str_of_key:(fun k -> k) | |
419 | ~str_of_node:(fun k node -> k) | |
420 | "/tmp/graph.dot" | |
421 | g; | |
ae4735db | 422 | let start = |
978fd7e5 | 423 | problematic_macros +> List.map (fun (file, (x, def)) -> x) +> Common.nub in |
ae4735db C |
424 | |
425 | let finalset = | |
426 | start +> List.fold_left (fun acc x -> | |
978fd7e5 C |
427 | if List.exists (fun y -> fst y = x) macros_in_loop_with_path |
428 | || List.mem x self_referiential | |
429 | then begin | |
430 | pr2 (spf "PB: ignoring %s macro as it is in a loop" x); | |
431 | acc | |
ae4735db C |
432 | end |
433 | else | |
978fd7e5 C |
434 | let acc = acc#add x in |
435 | let ancestors = g#ancestors x in | |
436 | acc $++$ ancestors | |
437 | ) (build_empty_set ()) | |
438 | in | |
439 | ||
978fd7e5 C |
440 | (* Now prepare for fixpoint expansion of macros to avoid doing |
441 | * the work in cpp_engine. | |
442 | *) | |
ae4735db | 443 | let sliced_g = |
978fd7e5 C |
444 | slice_of_callgraph_macros g finalset |
445 | in | |
446 | Ograph_simple.print_ograph_generic | |
447 | ~str_of_key:(fun k -> k) | |
448 | ~str_of_node:(fun k node -> k) | |
449 | "/tmp/graph2.dot" | |
450 | sliced_g; | |
451 | ||
452 | ||
453 | (* do fixpoint expansion *) | |
ae4735db | 454 | let (binding: (string, Cpp_token_c.define_def) Hashtbl.t) = |
978fd7e5 C |
455 | Hashtbl.create 101 in |
456 | (* work by side effects on the hashtbl and graph *) | |
457 | recurse_expand_macro_topological_order 0 binding sliced_g; | |
458 | ||
459 | ||
ae4735db | 460 | |
978fd7e5 | 461 | (* prepare final result *) |
ae4735db C |
462 | let final_macros = |
463 | binding +> Common.hash_to_list +> List.map (fun (x, def) -> | |
978fd7e5 C |
464 | let node = g#nodes#find x in |
465 | let file = get_single_file_and_def_of_node x node +> fst in | |
466 | (file, (x, def)) | |
467 | ) | |
468 | in | |
469 | ||
470 | pr2 (spf "total macros numbers: %d" | |
471 | (List.length all_macros)); | |
472 | pr2 (spf "problematic macros numbers: %d" | |
473 | (List.length problematic_macros)); | |
474 | pr2 (spf "final (after closure) problematic macros numbers: %d" | |
475 | (List.length final_macros)); | |
476 | ||
477 | let grouped = Common.group_assoc_bykey_eff final_macros in | |
478 | grouped | |
abad11c5 | 479 |