3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2009 University of Urbana Champaign
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
22 (*****************************************************************************)
24 (*****************************************************************************)
26 * Is this module make all the tricks used in parsing_hacks and
27 * most definitions in standard.h obsolete ? It depends. In a
28 * static analysis context we want to be accurate, and so expand
29 * all the code that will make our type/callgraph analysis simpler.
30 * So we want to expand many macros, based on heuristics in this file.
31 * In a transformation context, we want to let the programmer
32 * match over certain constructs such as declarator, iterator,
33 * macro_field, etc, and in this case we want to parse as-is.
35 * What could be done is that some of the analysis performed in this
36 * file could then be injected in parsing_hacks, for instance via
37 * hints, to make the parse as-is job easier too.
41 * todo: right now I find dangerous macro based on ## and go upward
42 * to also include calling macros. But this dangerous macro itself
43 * may use other macros that looks ok but that should also be expanded
44 * because it defines some entities. So also recurse downward ?
46 * todo? do analysis a la Astec ? try infer the meaning of the macro
47 * from its body but also from its context of use ? Can then
48 * do a taxonomy of macro ? not just foreach or declarator but
49 * polymorphic function (e.g. MAX), type generator, etc. Cf astec paper
50 * or Ernst cpp study paper ?
54 (*****************************************************************************)
56 (*****************************************************************************)
58 (* callgraph of macros *)
60 type node
= (Common.filename
* Cpp_token_c.define_def
) list
ref
63 type callgraph_macros
= (key
, node
, edge
) Ograph_simple.ograph_mutable
65 let rootname = "__ROOT__"
67 (*****************************************************************************)
69 (*****************************************************************************)
70 let bodytoks_of_body body
=
72 | Cpp_token_c.DefineHint _
->
73 pr2
"weird, hint in cpp_analysis_c";
75 | Cpp_token_c.DefineBody xs
->
79 let build_empty_set () = new Osetb.osetb
Setb.empty
82 (*****************************************************************************)
84 (*****************************************************************************)
86 let build_callgraph_macros xs
=
87 let (g
: callgraph_macros
) = new Ograph_simple.ograph_mutable
in
89 g#add_node
rootname (ref []);
92 xs
+> List.iter
(fun (file
, (x
, def
)) ->
93 (* todo? if exist already ? *)
94 g#add_node x
(ref []);
95 g#add_arc
(rootname, x
) Direct
;
97 xs
+> List.iter
(fun (file
, (x
, def
)) ->
98 let node = g#nodes#find x
in
99 Common.push2
(file
, def
) node;
103 xs
+> List.iter
(fun (file
, (x
, def
)) ->
104 let (s
, params
, body
) = def
in
105 let toks = bodytoks_of_body body
in
106 toks +> List.iter
(fun tok
->
110 let _ = g#nodes#find x2
in
111 g#add_arc
(x
, x2
) Direct
;
123 (* ---------------------------------------------------------------------- *)
124 let check_no_loop_graph g
=
126 let self_referential = ref [] in
127 let macros_in_loop_with_path = ref [] in
129 let already = Hashtbl.create
101 in
131 let already_error_msg = Hashtbl.create
101 in
133 let rec aux_dfs path xi
=
134 if Hashtbl.mem
already xi
&& List.mem xi path
136 let node = g#nodes#find xi
in
139 | (file, _)::xs
-> file
140 | [] -> raise Impossible
142 (* in apache/srclib/apr/include/arch/win32/apr_dbg_win32_handles.h
143 * we get some __ROOT__ -> CreateMutexA -> CreateMutexA because
144 * the macro is self referential. Probably cpp has
145 * some special handling of such case and does not expand
149 let is_self_reference =
154 if not
is_self_reference && not
(Hashtbl.mem
already_error_msg xi
)
156 Hashtbl.add
already_error_msg xi
true;
157 pr2
(spf
"PB: loop in macro %s of file %s" xi
file);
158 pr2
(spf
"path is: %s" (Common.join
" -> " (List.rev
(xi
::path
))));
159 Common.push2
(xi
, path
) macros_in_loop_with_path;
162 Common.push2 xi
self_referential;
165 Hashtbl.add
already xi
true;
167 let succ = g#successors xi
in
168 let succ'
= succ#tolist
+> List.map fst
in
169 succ'
+> List.iter
(fun yi
->
170 aux_dfs (xi
::path
) yi
175 !self_referential, !macros_in_loop_with_path
177 (* ---------------------------------------------------------------------- *)
178 let slice_of_callgraph_macros (g
: callgraph_macros
) goodnodes
=
180 let (g'
: callgraph_macros
) = new Ograph_simple.ograph_mutable
in
182 goodnodes#tolist
+> List.iter
(fun k
->
183 let v = g#nodes#find k
in
186 goodnodes#tolist
+> List.iter
(fun k
->
187 let succ = g#successors k
in
188 let succ = Oset.mapo
(fun (k'
, edge
) -> k'
) (build_empty_set()) succ in
189 let inter = succ $
**$ goodnodes
in
190 inter#tolist
+> List.iter
(fun k'
->
191 g'#add_arc
(k
, k'
) Direct
;
196 (*****************************************************************************)
197 (* Macros expansion *)
198 (*****************************************************************************)
200 (* get the longuest one ? or the one that contains the dangerous macro ? *)
201 let get_single_file_and_def_of_node k
v =
203 | [] -> raise Impossible
204 | [file, def
] -> file, def
205 | (file, def
)::y
::ys
->
206 pr2
(spf
"multiple def for %s but I kept only one" k
);
209 module TV
= Token_views_c
212 (string, Cpp_token_c.define_def
) Hashtbl.t
->
213 Cpp_token_c.define_def
-> Cpp_token_c.define_def
) =
214 fun current_def def
->
215 let (s
, params
, body
) = def
in
218 | Cpp_token_c.DefineHint
_ ->
220 | Cpp_token_c.DefineBody xs
->
221 (* bugfix: we dont want to evalute the x ## b at this moment.
222 * so can not use fix_tokens_cpp in the same we use it
225 Parsing_hacks.fix_tokens_cpp ~macro_defs:current_def xs
229 let tokens2 = ref (tokens +> Common.acc_map
TV.mk_token_extended
) in
230 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff
in
231 let paren_grouped = TV.mk_parenthised
cleaner in
232 Cpp_token_c.apply_macro_defs
233 ~msg_apply_known_macro
:(fun s2
->
234 pr2
(spf
"APPLYING: %s in definition of %s" s2 s
))
235 ~msg_apply_known_macro_hint
:(fun s
->
237 ~evaluate_concatop
:false
238 ~inplace_when_single
:false
239 current_def
paren_grouped;
240 (* because the before field is used by apply_macro_defs *)
241 tokens2 := TV.rebuild_tokens_extented
!tokens2;
244 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff
in
247 Parsing_hacks.insert_virtual_positions
248 (cleaner +> Common.acc_map
(fun x
-> x
.TV.tok
))
251 Cpp_token_c.DefineBody
xs'
256 (* work by side effect as both the binding and callgraph are mutable
259 let no_inlining = ref false
261 let rec (recurse_expand_macro_topological_order
:
262 int -> (string, Cpp_token_c.define_def
) Hashtbl.t
->
263 callgraph_macros
-> unit) =
264 fun depth current_def g
->
268 g#nodes#tolist
+> List.iter
(fun (k
, v) ->
269 if k
=$
= rootname then ()
271 let def = get_single_file_and_def_of_node k
v +> snd
in
272 Hashtbl.add current_def k
def
275 let remaining = g#nodes#tolist
in
276 (match remaining with
277 | [] -> raise Impossible
279 assert (k
= rootname);
283 let leafs = (g#leaf_nodes
())#tolist
in
284 pr2
(spf
"step: %d, %s" depth
(leafs +> Common.join
" "));
286 Ograph_simple.print_ograph_generic
287 ~str_of_key
:(fun k
-> k
)
288 ~str_of_node
:(fun k
node -> k
)
289 (spf
"/tmp/graph-%d.dot" depth
)
292 assert(not
(null
leafs));
295 (* little specialisation to avoid useless work *)
298 leafs +> List.iter
(fun k
->
299 let node = g#nodes#find k
in
300 let def = get_single_file_and_def_of_node k
node +> snd
in
301 Hashtbl.add current_def k
def
305 leafs +> List.map
(fun k
->
306 let node = g#nodes#find k
in
307 let def = get_single_file_and_def_of_node k
node +> snd
in
308 let def'
= macro_expand current_def
def in
312 new_defs +> List.iter
(fun (k
,def) -> Hashtbl.add current_def k
def);
314 leafs +> List.iter
(fun k
-> g#del_leaf_node_and_its_edges k
);
315 recurse_expand_macro_topological_order
(depth
+1) current_def g
;
320 (*****************************************************************************)
321 (* Macros def analysis *)
322 (*****************************************************************************)
324 let is_dangerous_macro def =
325 let (s
, params
, body) = def in
326 let toks = bodytoks_of_body body in
328 (match params
, body with
330 (* ex: APU_DECLARE_DATA *)
331 | Cpp_token_c.NoParam
, Cpp_token_c.DefineBody
[] ->
336 (* ex: AP_DECLARE(x) x *)
337 | Cpp_token_c.Params
([s1
]), Cpp_token_c.DefineBody
[TIdent
(s2
,i1
)] ->
340 (* keyword aliases. eg: APR_inline __inline__ *)
341 | Cpp_token_c.NoParam
, Cpp_token_c.DefineBody
[x
] ->
350 | _ , Cpp_token_c.DefineBody
xs ->
351 (match List.rev
xs with
352 (* make extract_macros looping on apache, get some infinite "step x" *)
353 | TPtVirg
_::_ -> true
361 (toks +> List.exists
(fun tok
->
363 | TCppConcatOp
_ -> true
365 | Tattribute
(ii
) -> true
366 | TattributeNoarg
(ii
) -> true
368 (* FP with local variable.
370 s ==~ Parsing_hacks.regexp_annot && not (List.mem s
371 ["__FILE__";"__LINE__";"__FUNCTION__"])
377 let is_trivial_macro def =
378 let (s
, params
, body) = def in
379 match params
, body with
380 | Cpp_token_c.NoParam
, Cpp_token_c.DefineBody
[Parser_c.TInt
_]
381 (* no!!! those are not trivial macro, they are dangerous too.
382 | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [] ->
389 | () when s ==~ Parsing_hacks.regexp_annot -> true
390 | () when List.exists (function
391 (*| Parser_c.Tattribute _ -> true*)
392 | Parser_c.TCppConcatOp
_ -> true
393 | _ -> false) bodytoks
400 (*****************************************************************************)
401 (* Main entry point *)
402 (*****************************************************************************)
404 let extract_dangerous_macros xs =
406 (* prepare initial set of macro definitions to work on *)
408 xs +> List.map
(fun (file, defs
) ->
409 defs
+> List.map
(fun def -> file, def)
413 all_macros +> Common.exclude
(fun (file,(x
,def)) -> is_trivial_macro def) in
415 (* initial set of problematic macros *)
416 let problematic_macros =
417 macros +> Common.filter
(fun (file, (x
, def)) -> is_dangerous_macro def) in
420 (* include the ancestors of problematic macros *)
422 build_callgraph_macros macros in
423 let self_referiential, macros_in_loop_with_path =
424 check_no_loop_graph g in
426 Ograph_simple.print_ograph_generic
427 ~str_of_key
:(fun k
-> k
)
428 ~str_of_node
:(fun k
node -> k
)
432 problematic_macros +> List.map
(fun (file, (x
, def)) -> x
) +> Common.nub
in
435 start +> List.fold_left
(fun acc x
->
436 if List.exists
(fun y
-> fst y
= x
) macros_in_loop_with_path
437 || List.mem x
self_referiential
439 pr2
(spf
"PB: ignoring %s macro as it is in a loop" x
);
443 let acc = acc#add x
in
444 let ancestors = g#
ancestors x
in
446 ) (build_empty_set ())
449 (* Now prepare for fixpoint expansion of macros to avoid doing
450 * the work in cpp_engine.
453 slice_of_callgraph_macros g finalset
455 Ograph_simple.print_ograph_generic
456 ~str_of_key
:(fun k
-> k
)
457 ~str_of_node
:(fun k
node -> k
)
462 (* do fixpoint expansion *)
463 let (binding
: (string, Cpp_token_c.define_def
) Hashtbl.t
) =
464 Hashtbl.create
101 in
465 (* work by side effects on the hashtbl and graph *)
466 recurse_expand_macro_topological_order
0 binding
sliced_g;
470 (* prepare final result *)
472 binding
+> Common.hash_to_list
+> List.map
(fun (x
, def) ->
473 let node = g#nodes#find x
in
474 let file = get_single_file_and_def_of_node x
node +> fst
in
479 pr2
(spf
"total macros numbers: %d"
480 (List.length
all_macros));
481 pr2
(spf
"problematic macros numbers: %d"
482 (List.length
problematic_macros));
483 pr2
(spf
"final (after closure) problematic macros numbers: %d"
484 (List.length
final_macros));
486 let grouped = Common.group_assoc_bykey_eff
final_macros in