7 (*****************************************************************************)
9 (*****************************************************************************)
11 * Is this module make all the tricks used in parsing_hacks and
12 * most definitions in standard.h obsolete ? It depends. In a
13 * static analysis context we want to be accurate, and so expand
14 * all the code that will make our type/callgraph analysis simpler.
15 * So we want to expand many macros, based on heuristics in this file.
16 * In a transformation context, we want to let the programmer
17 * match over certain constructs such as declarator, iterator,
18 * macro_field, etc, and in this case we want to parse as-is.
20 * What could be done is that some of the analysis performed in this
21 * file could then be injected in parsing_hacks, for instance via
22 * hints, to make the parse as-is job easier too.
26 * todo: right now I find dangerous macro based on ## and go upward
27 * to also include calling macros. But this dangerous macro itself
28 * may use other macros that looks ok but that should also be expanded
29 * because it defines some entities. So also recurse downward ?
31 * todo? do analysis a la Astec ? try infer the meaning of the macro
32 * from its body but also from its context of use ? Can then
33 * do a taxonomy of macro ? not just foreach or declarator but
34 * polymorphic function (e.g. MAX), type generator, etc. Cf astec paper
35 * or Ernst cpp study paper ?
39 (*****************************************************************************)
41 (*****************************************************************************)
43 (* callgraph of macros *)
45 type node
= (Common.filename
* Cpp_token_c.define_def
) list
ref
48 type callgraph_macros
= (key
, node
, edge
) Ograph_simple.ograph_mutable
50 let rootname = "__ROOT__"
52 (*****************************************************************************)
54 (*****************************************************************************)
55 let bodytoks_of_body body
=
57 | Cpp_token_c.DefineHint _
->
58 pr2
"weird, hint in cpp_analysis_c";
60 | Cpp_token_c.DefineBody xs
->
64 let build_empty_set () = new Osetb.osetb
Setb.empty
67 (*****************************************************************************)
69 (*****************************************************************************)
71 let build_callgraph_macros xs
=
72 let (g
: callgraph_macros
) = new Ograph_simple.ograph_mutable
in
74 g#add_node
rootname (ref []);
77 xs
+> List.iter
(fun (file
, (x
, def
)) ->
78 (* todo? if exist already ? *)
79 g#add_node x
(ref []);
80 g#add_arc
(rootname, x
) Direct
;
82 xs
+> List.iter
(fun (file
, (x
, def
)) ->
83 let node = g#nodes#find x
in
84 Common.push2
(file
, def
) node;
88 xs
+> List.iter
(fun (file
, (x
, def
)) ->
89 let (s
, params
, body
) = def
in
90 let toks = bodytoks_of_body body
in
91 toks +> List.iter
(fun tok
->
95 let _ = g#nodes#find x2
in
96 g#add_arc
(x
, x2
) Direct
;
108 (* ---------------------------------------------------------------------- *)
109 let check_no_loop_graph g
=
111 let self_referential = ref [] in
112 let macros_in_loop_with_path = ref [] in
114 let already = Hashtbl.create
101 in
116 let already_error_msg = Hashtbl.create
101 in
118 let rec aux_dfs path xi
=
119 if Hashtbl.mem
already xi
&& List.mem xi path
121 let node = g#nodes#find xi
in
124 | (file, _)::xs
-> file
125 | [] -> raise
(Impossible
74)
127 (* in apache/srclib/apr/include/arch/win32/apr_dbg_win32_handles.h
128 * we get some __ROOT__ -> CreateMutexA -> CreateMutexA because
129 * the macro is self referential. Probably cpp has
130 * some special handling of such case and does not expand
134 let is_self_reference =
139 if not
is_self_reference && not
(Hashtbl.mem
already_error_msg xi
)
141 Hashtbl.add
already_error_msg xi
true;
142 pr2
(spf
"PB: loop in macro %s of file %s" xi
file);
143 pr2
(spf
"path is: %s" (Common.join
" -> " (List.rev
(xi
::path
))));
144 Common.push2
(xi
, path
) macros_in_loop_with_path;
147 Common.push2 xi
self_referential;
150 Hashtbl.add
already xi
true;
152 let succ = g#successors xi
in
153 let succ'
= succ#tolist
+> List.map fst
in
154 succ'
+> List.iter
(fun yi
->
155 aux_dfs (xi
::path
) yi
160 !self_referential, !macros_in_loop_with_path
162 (* ---------------------------------------------------------------------- *)
163 let slice_of_callgraph_macros (g
: callgraph_macros
) goodnodes
=
165 let (g'
: callgraph_macros
) = new Ograph_simple.ograph_mutable
in
167 goodnodes#tolist
+> List.iter
(fun k
->
168 let v = g#nodes#find k
in
171 goodnodes#tolist
+> List.iter
(fun k
->
172 let succ = g#successors k
in
173 let succ = Oset.mapo
(fun (k'
, edge
) -> k'
) (build_empty_set()) succ in
174 let inter = succ $
**$ goodnodes
in
175 inter#tolist
+> List.iter
(fun k'
->
176 g'#add_arc
(k
, k'
) Direct
;
181 (*****************************************************************************)
182 (* Macros expansion *)
183 (*****************************************************************************)
185 (* get the longuest one ? or the one that contains the dangerous macro ? *)
186 let get_single_file_and_def_of_node k
v =
188 | [] -> raise
(Impossible
75)
189 | [file, def
] -> file, def
190 | (file, def
)::y
::ys
->
191 pr2
(spf
"multiple def for %s but I kept only one" k
);
194 module TV
= Token_views_c
197 (string, Cpp_token_c.define_def
) Hashtbl.t
->
198 Cpp_token_c.define_def
-> Cpp_token_c.define_def
) =
199 fun current_def def
->
200 let (s
, params
, body
) = def
in
203 | Cpp_token_c.DefineHint
_ ->
205 | Cpp_token_c.DefineBody xs
->
206 (* bugfix: we dont want to evalute the x ## b at this moment.
207 * so can not use fix_tokens_cpp in the same we use it
210 Parsing_hacks.fix_tokens_cpp ~macro_defs:current_def xs
214 let tokens2 = ref (tokens +> Common.acc_map
TV.mk_token_extended
) in
215 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff
in
216 let paren_grouped = TV.mk_parenthised
cleaner in
217 Cpp_token_c.apply_macro_defs
218 ~msg_apply_known_macro
:(fun s2
->
219 pr2
(spf
"APPLYING: %s in definition of %s" s2 s
))
220 ~msg_apply_known_macro_hint
:(fun s
->
222 ~evaluate_concatop
:false
223 ~inplace_when_single
:false
224 current_def
paren_grouped;
225 (* because the before field is used by apply_macro_defs *)
226 tokens2 := TV.rebuild_tokens_extented
!tokens2;
229 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff
in
232 Parsing_hacks.insert_virtual_positions
233 (cleaner +> Common.acc_map
(fun x
-> x
.TV.tok
))
236 Cpp_token_c.DefineBody
xs'
241 (* work by side effect as both the binding and callgraph are mutable
244 let no_inlining = ref false
246 let rec (recurse_expand_macro_topological_order
:
247 int -> (string, Cpp_token_c.define_def
) Hashtbl.t
->
248 callgraph_macros
-> unit) =
249 fun depth current_def g
->
253 g#nodes#tolist
+> List.iter
(fun (k
, v) ->
254 if k
=$
= rootname then ()
256 let def = get_single_file_and_def_of_node k
v +> snd
in
257 Hashtbl.add current_def k
def
260 let remaining = g#nodes#tolist
in
261 (match remaining with
262 | [] -> () (* christia: commented this out: raise (Impossible 76)
263 * This seems to be the case when there are no
264 * problematic macros. Which is possible.
267 assert (k
= rootname);
271 let leafs = (g#leaf_nodes
())#tolist
in
272 pr2
(spf
"step: %d, %s" depth
(leafs +> Common.join
" "));
274 Ograph_simple.print_ograph_generic
275 ~str_of_key
:(fun k
-> k
)
276 ~str_of_node
:(fun k
node -> k
)
277 (spf
"/tmp/graph-%d.dot" depth
)
280 assert(not
(null
leafs));
283 (* little specialisation to avoid useless work *)
286 leafs +> List.iter
(fun k
->
287 let node = g#nodes#find k
in
288 let def = get_single_file_and_def_of_node k
node +> snd
in
289 Hashtbl.add current_def k
def
293 leafs +> List.map
(fun k
->
294 let node = g#nodes#find k
in
295 let def = get_single_file_and_def_of_node k
node +> snd
in
296 let def'
= macro_expand current_def
def in
300 new_defs +> List.iter
(fun (k
,def) -> Hashtbl.add current_def k
def);
302 leafs +> List.iter
(fun k
-> g#del_leaf_node_and_its_edges k
);
303 recurse_expand_macro_topological_order
(depth
+1) current_def g
;
308 (*****************************************************************************)
309 (* Macros def analysis *)
310 (*****************************************************************************)
312 let is_dangerous_macro def =
313 let (s
, params
, body) = def in
314 let toks = bodytoks_of_body body in
316 (match params
, body with
318 (* ex: APU_DECLARE_DATA *)
319 | Cpp_token_c.NoParam
, Cpp_token_c.DefineBody
[] ->
324 (* ex: AP_DECLARE(x) x *)
325 | Cpp_token_c.Params
([s1
]),
326 Cpp_token_c.DefineBody
[TIdent
(s2
,i1
)] ->
328 Cpp_token_c.FixedArg s1
-> s1
=$
= s2
329 | Cpp_token_c.VariadicArg
_ -> false)
331 (* keyword aliases. eg: APR_inline __inline__ *)
332 | Cpp_token_c.NoParam
, Cpp_token_c.DefineBody
[x
] ->
341 | _ , Cpp_token_c.DefineBody
xs ->
342 (match List.rev
xs with
343 (* make extract_macros looping on apache, get some infinite "step x" *)
344 | TPtVirg
_::_ -> true
352 (toks +> List.exists
(fun tok
->
354 | TCppConcatOp
_ -> true
356 | Tattribute
(ii
) -> true
357 | TattributeNoarg
(ii
) -> true
359 (* FP with local variable.
361 s ==~ Parsing_hacks.regexp_annot && not (List.mem s
362 ["__FILE__";"__LINE__";"__FUNCTION__"])
368 let is_trivial_macro def =
369 let (s
, params
, body) = def in
370 match params
, body with
371 | Cpp_token_c.NoParam
, Cpp_token_c.DefineBody
[Parser_c.TInt
_]
372 (* no!!! those are not trivial macro, they are dangerous too.
373 | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [] ->
380 | () when s ==~ Parsing_hacks.regexp_annot -> true
381 | () when List.exists (function
382 (*| Parser_c.Tattribute _ -> true*)
383 | Parser_c.TCppConcatOp
_ -> true
384 | _ -> false) bodytoks
391 (*****************************************************************************)
392 (* Main entry point *)
393 (*****************************************************************************)
395 let extract_dangerous_macros xs =
397 (* prepare initial set of macro definitions to work on *)
399 xs +> List.map
(fun (file, defs
) ->
400 defs
+> List.map
(fun def -> file, def)
404 all_macros +> Common.exclude
(fun (file,(x
,def)) -> is_trivial_macro def) in
406 (* initial set of problematic macros *)
407 let problematic_macros =
408 macros +> Common.filter
(fun (file, (x
, def)) -> is_dangerous_macro def) in
411 (* include the ancestors of problematic macros *)
413 build_callgraph_macros macros in
414 let self_referiential, macros_in_loop_with_path =
415 check_no_loop_graph g in
417 Ograph_simple.print_ograph_generic
418 ~str_of_key
:(fun k
-> k
)
419 ~str_of_node
:(fun k
node -> k
)
423 problematic_macros +> List.map
(fun (file, (x
, def)) -> x
) +> Common.nub
in
426 start +> List.fold_left
(fun acc x
->
427 if List.exists
(fun y
-> fst y
= x
) macros_in_loop_with_path
428 || List.mem x
self_referiential
430 pr2
(spf
"PB: ignoring %s macro as it is in a loop" x
);
434 let acc = acc#add x
in
435 let ancestors = g#
ancestors x
in
437 ) (build_empty_set ())
440 (* Now prepare for fixpoint expansion of macros to avoid doing
441 * the work in cpp_engine.
444 slice_of_callgraph_macros g finalset
446 Ograph_simple.print_ograph_generic
447 ~str_of_key
:(fun k
-> k
)
448 ~str_of_node
:(fun k
node -> k
)
453 (* do fixpoint expansion *)
454 let (binding
: (string, Cpp_token_c.define_def
) Hashtbl.t
) =
455 Hashtbl.create
101 in
456 (* work by side effects on the hashtbl and graph *)
457 recurse_expand_macro_topological_order
0 binding
sliced_g;
461 (* prepare final result *)
463 binding
+> Common.hash_to_list
+> List.map
(fun (x
, def) ->
464 let node = g#nodes#find x
in
465 let file = get_single_file_and_def_of_node x
node +> fst
in
470 pr2
(spf
"total macros numbers: %d"
471 (List.length
all_macros));
472 pr2
(spf
"problematic macros numbers: %d"
473 (List.length
problematic_macros));
474 pr2
(spf
"final (after closure) problematic macros numbers: %d"
475 (List.length
final_macros));
477 let grouped = Common.group_assoc_bykey_eff
final_macros in