Release of coccinelle 1.0.0-rc9
[bpt/coccinelle.git] / parsing_c / cpp_analysis_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2009 University of Urbana Champaign
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16 open Common
17
18 open Oset
19
20 open Parser_c
21
22 (*****************************************************************************)
23 (* Prelude *)
24 (*****************************************************************************)
25 (*
26 * Is this module make all the tricks used in parsing_hacks and
27 * most definitions in standard.h obsolete ? It depends. In a
28 * static analysis context we want to be accurate, and so expand
29 * all the code that will make our type/callgraph analysis simpler.
30 * So we want to expand many macros, based on heuristics in this file.
31 * In a transformation context, we want to let the programmer
32 * match over certain constructs such as declarator, iterator,
33 * macro_field, etc, and in this case we want to parse as-is.
34 *
35 * What could be done is that some of the analysis performed in this
36 * file could then be injected in parsing_hacks, for instance via
37 * hints, to make the parse as-is job easier too.
38 *
39 *
40 *
41 * todo: right now I find dangerous macro based on ## and go upward
42 * to also include calling macros. But this dangerous macro itself
43 * may use other macros that looks ok but that should also be expanded
44 * because it defines some entities. So also recurse downward ?
45 *
46 * todo? do analysis a la Astec ? try infer the meaning of the macro
47 * from its body but also from its context of use ? Can then
48 * do a taxonomy of macro ? not just foreach or declarator but
49 * polymorphic function (e.g. MAX), type generator, etc. Cf astec paper
50 * or Ernst cpp study paper ?
51 *
52 *)
53
54 (*****************************************************************************)
55 (* Types *)
56 (*****************************************************************************)
57
58 (* callgraph of macros *)
59 type key = string
60 type node = (Common.filename * Cpp_token_c.define_def) list ref
61 type edge = Direct
62
63 type callgraph_macros = (key, node, edge) Ograph_simple.ograph_mutable
64
65 let rootname = "__ROOT__"
66
67 (*****************************************************************************)
68 (* Helpers *)
69 (*****************************************************************************)
70 let bodytoks_of_body body =
71 match body with
72 | Cpp_token_c.DefineHint _ ->
73 pr2 "weird, hint in cpp_analysis_c";
74 []
75 | Cpp_token_c.DefineBody xs ->
76 xs
77
78
79 let build_empty_set () = new Osetb.osetb Setb.empty
80
81
82 (*****************************************************************************)
83 (* Builder *)
84 (*****************************************************************************)
85
86 let build_callgraph_macros xs =
87 let (g: callgraph_macros) = new Ograph_simple.ograph_mutable in
88
89 g#add_node rootname (ref []);
90
91 (* build nodes *)
92 xs +> List.iter (fun (file, (x, def)) ->
93 (* todo? if exist already ? *)
94 g#add_node x (ref []);
95 g#add_arc (rootname, x) Direct;
96 );
97 xs +> List.iter (fun (file, (x, def)) ->
98 let node = g#nodes#find x in
99 Common.push2 (file, def) node;
100 );
101
102 (* build edges *)
103 xs +> List.iter (fun (file, (x, def)) ->
104 let (s, params, body) = def in
105 let toks = bodytoks_of_body body in
106 toks +> List.iter (fun tok ->
107 match tok with
108 | TIdent (x2,ii) ->
109 (try
110 let _ = g#nodes#find x2 in
111 g#add_arc (x, x2) Direct;
112 with
113 Not_found -> ()
114 )
115 | _ ->
116 ()
117 );
118
119 );
120 g
121
122
123 (* ---------------------------------------------------------------------- *)
124 let check_no_loop_graph g =
125
126 let self_referential = ref [] in
127 let macros_in_loop_with_path = ref [] in
128
129 let already = Hashtbl.create 101 in
130
131 let already_error_msg = Hashtbl.create 101 in
132
133 let rec aux_dfs path xi =
134 if Hashtbl.mem already xi && List.mem xi path
135 then begin
136 let node = g#nodes#find xi in
137 let file =
138 match !node with
139 | (file, _)::xs -> file
140 | [] -> raise Impossible
141 in
142 (* in apache/srclib/apr/include/arch/win32/apr_dbg_win32_handles.h
143 * we get some __ROOT__ -> CreateMutexA -> CreateMutexA because
144 * the macro is self referential. Probably cpp has
145 * some special handling of such case and does not expand
146 * recursively.
147 *
148 *)
149 let is_self_reference =
150 match xi::path with
151 | x::y::z -> x = y
152 | _ -> false
153 in
154 if not is_self_reference && not (Hashtbl.mem already_error_msg xi)
155 then begin
156 Hashtbl.add already_error_msg xi true;
157 pr2 (spf "PB: loop in macro %s of file %s" xi file);
158 pr2 (spf "path is: %s" (Common.join " -> " (List.rev (xi::path))));
159 Common.push2 (xi, path) macros_in_loop_with_path;
160 end
161 else begin
162 Common.push2 xi self_referential;
163 end
164 end else begin
165 Hashtbl.add already xi true;
166 (* f xi path; *)
167 let succ = g#successors xi in
168 let succ' = succ#tolist +> List.map fst in
169 succ' +> List.iter (fun yi ->
170 aux_dfs (xi::path) yi
171 );
172 end
173 in
174 aux_dfs [] rootname;
175 !self_referential, !macros_in_loop_with_path
176
177 (* ---------------------------------------------------------------------- *)
178 let slice_of_callgraph_macros (g: callgraph_macros) goodnodes =
179
180 let (g': callgraph_macros) = new Ograph_simple.ograph_mutable in
181
182 goodnodes#tolist +> List.iter (fun k ->
183 let v = g#nodes#find k in
184 g'#add_node k v;
185 );
186 goodnodes#tolist +> List.iter (fun k ->
187 let succ = g#successors k in
188 let succ = Oset.mapo (fun (k', edge) -> k') (build_empty_set()) succ in
189 let inter = succ $**$ goodnodes in
190 inter#tolist +> List.iter (fun k' ->
191 g'#add_arc (k, k') Direct;
192 )
193 );
194 g'
195
196 (*****************************************************************************)
197 (* Macros expansion *)
198 (*****************************************************************************)
199
200 (* get the longuest one ? or the one that contains the dangerous macro ? *)
201 let get_single_file_and_def_of_node k v =
202 match !v with
203 | [] -> raise Impossible
204 | [file, def] -> file, def
205 | (file, def)::y::ys ->
206 pr2 (spf "multiple def for %s but I kept only one" k);
207 file, def
208
209 module TV = Token_views_c
210
211 let (macro_expand:
212 (string, Cpp_token_c.define_def) Hashtbl.t ->
213 Cpp_token_c.define_def -> Cpp_token_c.define_def) =
214 fun current_def def ->
215 let (s, params, body) = def in
216 let body' =
217 match body with
218 | Cpp_token_c.DefineHint _ ->
219 body
220 | Cpp_token_c.DefineBody xs ->
221 (* bugfix: we dont want to evalute the x ## b at this moment.
222 * so can not use fix_tokens_cpp in the same we use it
223 * to parse C code.
224 let xs' =
225 Parsing_hacks.fix_tokens_cpp ~macro_defs:current_def xs
226 in
227 *)
228 let tokens = xs in
229 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
230 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
231 let paren_grouped = TV.mk_parenthised cleaner in
232 Cpp_token_c.apply_macro_defs
233 ~msg_apply_known_macro:(fun s2 ->
234 pr2 (spf "APPLYING: %s in definition of %s" s2 s))
235 ~msg_apply_known_macro_hint:(fun s ->
236 pr2 "hint")
237 ~evaluate_concatop:false
238 ~inplace_when_single:false
239 current_def paren_grouped;
240 (* because the before field is used by apply_macro_defs *)
241 tokens2 := TV.rebuild_tokens_extented !tokens2;
242
243 (* bugfix *)
244 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
245
246 let xs' =
247 Parsing_hacks.insert_virtual_positions
248 (cleaner +> Common.acc_map (fun x -> x.TV.tok))
249 in
250
251 Cpp_token_c.DefineBody xs'
252 in
253 (s, params, body')
254
255
256 (* work by side effect as both the binding and callgraph are mutable
257 * data structure
258 *)
259 let no_inlining = ref false
260
261 let rec (recurse_expand_macro_topological_order:
262 int -> (string, Cpp_token_c.define_def) Hashtbl.t ->
263 callgraph_macros -> unit) =
264 fun depth current_def g ->
265
266 (* naive: *)
267 if !no_inlining then
268 g#nodes#tolist +> List.iter (fun (k, v) ->
269 if k =$= rootname then ()
270 else
271 let def = get_single_file_and_def_of_node k v +> snd in
272 Hashtbl.add current_def k def
273 )
274 else
275 let remaining = g#nodes#tolist in
276 (match remaining with
277 | [] -> raise Impossible
278 | [(k,n)] ->
279 assert (k = rootname);
280 (* end recursion *)
281 ()
282 | x::y::xs ->
283 let leafs = (g#leaf_nodes ())#tolist in
284 pr2 (spf "step: %d, %s" depth (leafs +> Common.join " "));
285
286 Ograph_simple.print_ograph_generic
287 ~str_of_key:(fun k -> k)
288 ~str_of_node:(fun k node -> k)
289 (spf "/tmp/graph-%d.dot" depth)
290 g;
291
292 assert(not (null leafs));
293
294
295 (* little specialisation to avoid useless work *)
296 if depth = 0
297 then begin
298 leafs +> List.iter (fun k ->
299 let node = g#nodes#find k in
300 let def = get_single_file_and_def_of_node k node +> snd in
301 Hashtbl.add current_def k def
302 )
303 end else begin
304 let new_defs =
305 leafs +> List.map (fun k ->
306 let node = g#nodes#find k in
307 let def = get_single_file_and_def_of_node k node +> snd in
308 let def' = macro_expand current_def def in
309 k, def'
310 )
311 in
312 new_defs +> List.iter (fun (k,def) -> Hashtbl.add current_def k def);
313 end;
314 leafs +> List.iter (fun k -> g#del_leaf_node_and_its_edges k);
315 recurse_expand_macro_topological_order (depth+1) current_def g;
316 )
317
318
319
320 (*****************************************************************************)
321 (* Macros def analysis *)
322 (*****************************************************************************)
323
324 let is_dangerous_macro def =
325 let (s, params, body) = def in
326 let toks = bodytoks_of_body body in
327
328 (match params, body with
329
330 (* ex: APU_DECLARE_DATA *)
331 | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [] ->
332 if s =~ ".*_H_*"
333 then false
334 else true
335
336 (* ex: AP_DECLARE(x) x *)
337 | Cpp_token_c.Params([s1]), Cpp_token_c.DefineBody [TIdent (s2,i1)] ->
338 s1 =$= s2
339
340 (* keyword aliases. eg: APR_inline __inline__ *)
341 | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [x] ->
342 (match x with
343 | Tinline _ -> true
344 | Tconst _ -> true
345 | Tstatic _ -> true
346 | Textern _ -> true
347 | _ -> false
348 )
349
350 | _ , Cpp_token_c.DefineBody xs ->
351 (match List.rev xs with
352 (* make extract_macros looping on apache, get some infinite "step x" *)
353 | TPtVirg _::_ -> true
354 | _ -> false
355 )
356
357 | _ -> false
358 ) ||
359
360
361 (toks +> List.exists (fun tok ->
362 match tok with
363 | TCppConcatOp _ -> true
364
365 | Tattribute (ii) -> true
366 | TattributeNoarg (ii) -> true
367
368 (* FP with local variable.
369 | TIdent (s,ii) ->
370 s ==~ Parsing_hacks.regexp_annot && not (List.mem s
371 ["__FILE__";"__LINE__";"__FUNCTION__"])
372 *)
373 | _ -> false
374 ))
375
376
377 let is_trivial_macro def =
378 let (s, params, body) = def in
379 match params, body with
380 | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [Parser_c.TInt _]
381 (* no!!! those are not trivial macro, they are dangerous too.
382 | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [] ->
383 true
384 *)
385 | _ ->
386 false
387
388 (*
389 | () when s ==~ Parsing_hacks.regexp_annot -> true
390 | () when List.exists (function
391 (*| Parser_c.Tattribute _ -> true*)
392 | Parser_c.TCppConcatOp _ -> true
393 | _ -> false) bodytoks
394 -> true
395 | () -> false
396 in
397 *)
398
399
400 (*****************************************************************************)
401 (* Main entry point *)
402 (*****************************************************************************)
403
404 let extract_dangerous_macros xs =
405
406 (* prepare initial set of macro definitions to work on *)
407 let all_macros =
408 xs +> List.map (fun (file, defs) ->
409 defs +> List.map (fun def -> file, def)
410 ) +> List.flatten
411 in
412 let macros =
413 all_macros +> Common.exclude(fun (file,(x,def)) -> is_trivial_macro def) in
414
415 (* initial set of problematic macros *)
416 let problematic_macros =
417 macros +> Common.filter (fun (file, (x, def)) -> is_dangerous_macro def) in
418
419
420 (* include the ancestors of problematic macros *)
421 let g =
422 build_callgraph_macros macros in
423 let self_referiential, macros_in_loop_with_path =
424 check_no_loop_graph g in
425
426 Ograph_simple.print_ograph_generic
427 ~str_of_key:(fun k -> k)
428 ~str_of_node:(fun k node -> k)
429 "/tmp/graph.dot"
430 g;
431 let start =
432 problematic_macros +> List.map (fun (file, (x, def)) -> x) +> Common.nub in
433
434 let finalset =
435 start +> List.fold_left (fun acc x ->
436 if List.exists (fun y -> fst y = x) macros_in_loop_with_path
437 || List.mem x self_referiential
438 then begin
439 pr2 (spf "PB: ignoring %s macro as it is in a loop" x);
440 acc
441 end
442 else
443 let acc = acc#add x in
444 let ancestors = g#ancestors x in
445 acc $++$ ancestors
446 ) (build_empty_set ())
447 in
448
449 (* Now prepare for fixpoint expansion of macros to avoid doing
450 * the work in cpp_engine.
451 *)
452 let sliced_g =
453 slice_of_callgraph_macros g finalset
454 in
455 Ograph_simple.print_ograph_generic
456 ~str_of_key:(fun k -> k)
457 ~str_of_node:(fun k node -> k)
458 "/tmp/graph2.dot"
459 sliced_g;
460
461
462 (* do fixpoint expansion *)
463 let (binding: (string, Cpp_token_c.define_def) Hashtbl.t) =
464 Hashtbl.create 101 in
465 (* work by side effects on the hashtbl and graph *)
466 recurse_expand_macro_topological_order 0 binding sliced_g;
467
468
469
470 (* prepare final result *)
471 let final_macros =
472 binding +> Common.hash_to_list +> List.map (fun (x, def) ->
473 let node = g#nodes#find x in
474 let file = get_single_file_and_def_of_node x node +> fst in
475 (file, (x, def))
476 )
477 in
478
479 pr2 (spf "total macros numbers: %d"
480 (List.length all_macros));
481 pr2 (spf "problematic macros numbers: %d"
482 (List.length problematic_macros));
483 pr2 (spf "final (after closure) problematic macros numbers: %d"
484 (List.length final_macros));
485
486 let grouped = Common.group_assoc_bykey_eff final_macros in
487 grouped