permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / cpp_analysis_c.ml
CommitLineData
ae4735db 1open Common
978fd7e5 2
ae4735db 3open Oset
978fd7e5 4
ae4735db 5open Parser_c
978fd7e5
C
6
7(*****************************************************************************)
8(* Prelude *)
9(*****************************************************************************)
10(*
ae4735db
C
11 * Is this module make all the tricks used in parsing_hacks and
12 * most definitions in standard.h obsolete ? It depends. In a
978fd7e5
C
13 * static analysis context we want to be accurate, and so expand
14 * all the code that will make our type/callgraph analysis simpler.
15 * So we want to expand many macros, based on heuristics in this file.
16 * In a transformation context, we want to let the programmer
ae4735db 17 * match over certain constructs such as declarator, iterator,
978fd7e5 18 * macro_field, etc, and in this case we want to parse as-is.
ae4735db 19 *
978fd7e5
C
20 * What could be done is that some of the analysis performed in this
21 * file could then be injected in parsing_hacks, for instance via
22 * hints, to make the parse as-is job easier too.
ae4735db
C
23 *
24 *
978fd7e5
C
25 *
26 * todo: right now I find dangerous macro based on ## and go upward
27 * to also include calling macros. But this dangerous macro itself
28 * may use other macros that looks ok but that should also be expanded
29 * because it defines some entities. So also recurse downward ?
ae4735db 30 *
978fd7e5
C
31 * todo? do analysis a la Astec ? try infer the meaning of the macro
32 * from its body but also from its context of use ? Can then
33 * do a taxonomy of macro ? not just foreach or declarator but
ae4735db 34 * polymorphic function (e.g. MAX), type generator, etc. Cf astec paper
978fd7e5 35 * or Ernst cpp study paper ?
ae4735db 36 *
978fd7e5
C
37 *)
38
39(*****************************************************************************)
40(* Types *)
41(*****************************************************************************)
42
43(* callgraph of macros *)
44type key = string
45type node = (Common.filename * Cpp_token_c.define_def) list ref
46type edge = Direct
47
48type callgraph_macros = (key, node, edge) Ograph_simple.ograph_mutable
49
50let rootname = "__ROOT__"
ae4735db 51
978fd7e5
C
52(*****************************************************************************)
53(* Helpers *)
54(*****************************************************************************)
55let bodytoks_of_body body =
56 match body with
ae4735db 57 | Cpp_token_c.DefineHint _ ->
978fd7e5
C
58 pr2 "weird, hint in cpp_analysis_c";
59 []
ae4735db 60 | Cpp_token_c.DefineBody xs ->
978fd7e5
C
61 xs
62
63
64let build_empty_set () = new Osetb.osetb Setb.empty
65
66
67(*****************************************************************************)
68(* Builder *)
69(*****************************************************************************)
70
ae4735db 71let build_callgraph_macros xs =
978fd7e5
C
72 let (g: callgraph_macros) = new Ograph_simple.ograph_mutable in
73
74 g#add_node rootname (ref []);
75
76 (* build nodes *)
ae4735db 77 xs +> List.iter (fun (file, (x, def)) ->
978fd7e5
C
78 (* todo? if exist already ? *)
79 g#add_node x (ref []);
80 g#add_arc (rootname, x) Direct;
81 );
ae4735db 82 xs +> List.iter (fun (file, (x, def)) ->
978fd7e5
C
83 let node = g#nodes#find x in
84 Common.push2 (file, def) node;
85 );
86
87 (* build edges *)
ae4735db
C
88 xs +> List.iter (fun (file, (x, def)) ->
89 let (s, params, body) = def in
978fd7e5 90 let toks = bodytoks_of_body body in
ae4735db 91 toks +> List.iter (fun tok ->
978fd7e5 92 match tok with
ae4735db
C
93 | TIdent (x2,ii) ->
94 (try
978fd7e5
C
95 let _ = g#nodes#find x2 in
96 g#add_arc (x, x2) Direct;
ae4735db 97 with
978fd7e5
C
98 Not_found -> ()
99 )
ae4735db 100 | _ ->
978fd7e5
C
101 ()
102 );
ae4735db 103
978fd7e5
C
104 );
105 g
106
107
108(* ---------------------------------------------------------------------- *)
ae4735db 109let check_no_loop_graph g =
978fd7e5 110
ae4735db 111 let self_referential = ref [] in
978fd7e5
C
112 let macros_in_loop_with_path = ref [] in
113
114 let already = Hashtbl.create 101 in
115
116 let already_error_msg = Hashtbl.create 101 in
117
ae4735db 118 let rec aux_dfs path xi =
978fd7e5
C
119 if Hashtbl.mem already xi && List.mem xi path
120 then begin
121 let node = g#nodes#find xi in
ae4735db 122 let file =
978fd7e5
C
123 match !node with
124 | (file, _)::xs -> file
abad11c5 125 | [] -> raise (Impossible 74)
978fd7e5
C
126 in
127 (* in apache/srclib/apr/include/arch/win32/apr_dbg_win32_handles.h
128 * we get some __ROOT__ -> CreateMutexA -> CreateMutexA because
129 * the macro is self referential. Probably cpp has
130 * some special handling of such case and does not expand
ae4735db
C
131 * recursively.
132 *
978fd7e5 133 *)
ae4735db 134 let is_self_reference =
978fd7e5
C
135 match xi::path with
136 | x::y::z -> x = y
137 | _ -> false
138 in
139 if not is_self_reference && not (Hashtbl.mem already_error_msg xi)
140 then begin
141 Hashtbl.add already_error_msg xi true;
142 pr2 (spf "PB: loop in macro %s of file %s" xi file);
143 pr2 (spf "path is: %s" (Common.join " -> " (List.rev (xi::path))));
144 Common.push2 (xi, path) macros_in_loop_with_path;
145 end
ae4735db 146 else begin
978fd7e5
C
147 Common.push2 xi self_referential;
148 end
149 end else begin
150 Hashtbl.add already xi true;
151 (* f xi path; *)
152 let succ = g#successors xi in
153 let succ' = succ#tolist +> List.map fst in
ae4735db 154 succ' +> List.iter (fun yi ->
978fd7e5
C
155 aux_dfs (xi::path) yi
156 );
157 end
158 in
159 aux_dfs [] rootname;
160 !self_referential, !macros_in_loop_with_path
ae4735db 161
978fd7e5
C
162(* ---------------------------------------------------------------------- *)
163let slice_of_callgraph_macros (g: callgraph_macros) goodnodes =
164
165 let (g': callgraph_macros) = new Ograph_simple.ograph_mutable in
166
ae4735db 167 goodnodes#tolist +> List.iter (fun k ->
978fd7e5
C
168 let v = g#nodes#find k in
169 g'#add_node k v;
170 );
ae4735db 171 goodnodes#tolist +> List.iter (fun k ->
978fd7e5
C
172 let succ = g#successors k in
173 let succ = Oset.mapo (fun (k', edge) -> k') (build_empty_set()) succ in
174 let inter = succ $**$ goodnodes in
ae4735db 175 inter#tolist +> List.iter (fun k' ->
978fd7e5
C
176 g'#add_arc (k, k') Direct;
177 )
178 );
179 g'
180
181(*****************************************************************************)
182(* Macros expansion *)
183(*****************************************************************************)
184
ae4735db 185(* get the longuest one ? or the one that contains the dangerous macro ? *)
978fd7e5
C
186let get_single_file_and_def_of_node k v =
187 match !v with
abad11c5 188 | [] -> raise (Impossible 75)
978fd7e5 189 | [file, def] -> file, def
ae4735db 190 | (file, def)::y::ys ->
978fd7e5
C
191 pr2 (spf "multiple def for %s but I kept only one" k);
192 file, def
193
194module TV = Token_views_c
195
ae4735db
C
196let (macro_expand:
197 (string, Cpp_token_c.define_def) Hashtbl.t ->
978fd7e5 198 Cpp_token_c.define_def -> Cpp_token_c.define_def) =
ae4735db
C
199 fun current_def def ->
200 let (s, params, body) = def in
201 let body' =
978fd7e5 202 match body with
ae4735db 203 | Cpp_token_c.DefineHint _ ->
978fd7e5 204 body
ae4735db 205 | Cpp_token_c.DefineBody xs ->
978fd7e5
C
206 (* bugfix: we dont want to evalute the x ## b at this moment.
207 * so can not use fix_tokens_cpp in the same we use it
208 * to parse C code.
ae4735db 209 let xs' =
978fd7e5
C
210 Parsing_hacks.fix_tokens_cpp ~macro_defs:current_def xs
211 in
212 *)
213 let tokens = xs in
214 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
215 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
216 let paren_grouped = TV.mk_parenthised cleaner in
217 Cpp_token_c.apply_macro_defs
ae4735db 218 ~msg_apply_known_macro:(fun s2 ->
978fd7e5 219 pr2 (spf "APPLYING: %s in definition of %s" s2 s))
ae4735db 220 ~msg_apply_known_macro_hint:(fun s ->
978fd7e5 221 pr2 "hint")
ae4735db 222 ~evaluate_concatop:false
978fd7e5
C
223 ~inplace_when_single:false
224 current_def paren_grouped;
225 (* because the before field is used by apply_macro_defs *)
ae4735db 226 tokens2 := TV.rebuild_tokens_extented !tokens2;
978fd7e5
C
227
228 (* bugfix *)
229 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
230
ae4735db
C
231 let xs' =
232 Parsing_hacks.insert_virtual_positions
978fd7e5
C
233 (cleaner +> Common.acc_map (fun x -> x.TV.tok))
234 in
235
236 Cpp_token_c.DefineBody xs'
237 in
238 (s, params, body')
239
240
ae4735db 241(* work by side effect as both the binding and callgraph are mutable
978fd7e5
C
242 * data structure
243 *)
244let no_inlining = ref false
245
ae4735db
C
246let rec (recurse_expand_macro_topological_order:
247 int -> (string, Cpp_token_c.define_def) Hashtbl.t ->
978fd7e5
C
248 callgraph_macros -> unit) =
249 fun depth current_def g ->
250
251 (* naive: *)
ae4735db
C
252 if !no_inlining then
253 g#nodes#tolist +> List.iter (fun (k, v) ->
254 if k =$= rootname then ()
978fd7e5
C
255 else
256 let def = get_single_file_and_def_of_node k v +> snd in
257 Hashtbl.add current_def k def
258 )
ae4735db 259 else
978fd7e5
C
260 let remaining = g#nodes#tolist in
261 (match remaining with
abad11c5
C
262 | [] -> () (* christia: commented this out: raise (Impossible 76)
263 * This seems to be the case when there are no
264 * problematic macros. Which is possible.
265 *)
ae4735db 266 | [(k,n)] ->
978fd7e5
C
267 assert (k = rootname);
268 (* end recursion *)
269 ()
abad11c5 270 | (k, v)::y::xs ->
978fd7e5
C
271 let leafs = (g#leaf_nodes ())#tolist in
272 pr2 (spf "step: %d, %s" depth (leafs +> Common.join " "));
273
274 Ograph_simple.print_ograph_generic
275 ~str_of_key:(fun k -> k)
276 ~str_of_node:(fun k node -> k)
277 (spf "/tmp/graph-%d.dot" depth)
278 g;
279
abad11c5 280 assert(not (null leafs));
978fd7e5
C
281
282
283 (* little specialisation to avoid useless work *)
ae4735db 284 if depth = 0
978fd7e5 285 then begin
ae4735db 286 leafs +> List.iter (fun k ->
978fd7e5
C
287 let node = g#nodes#find k in
288 let def = get_single_file_and_def_of_node k node +> snd in
289 Hashtbl.add current_def k def
290 )
291 end else begin
ae4735db
C
292 let new_defs =
293 leafs +> List.map (fun k ->
978fd7e5
C
294 let node = g#nodes#find k in
295 let def = get_single_file_and_def_of_node k node +> snd in
296 let def' = macro_expand current_def def in
297 k, def'
298 )
299 in
300 new_defs +> List.iter (fun (k,def) -> Hashtbl.add current_def k def);
301 end;
302 leafs +> List.iter (fun k -> g#del_leaf_node_and_its_edges k);
303 recurse_expand_macro_topological_order (depth+1) current_def g;
304 )
305
306
307
308(*****************************************************************************)
309(* Macros def analysis *)
310(*****************************************************************************)
311
ae4735db
C
312let is_dangerous_macro def =
313 let (s, params, body) = def in
978fd7e5
C
314 let toks = bodytoks_of_body body in
315
316 (match params, body with
317
318 (* ex: APU_DECLARE_DATA *)
ae4735db 319 | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [] ->
978fd7e5
C
320 if s =~ ".*_H_*"
321 then false
322 else true
323
324 (* ex: AP_DECLARE(x) x *)
abad11c5
C
325 | Cpp_token_c.Params([s1]),
326 Cpp_token_c.DefineBody [TIdent (s2,i1)] ->
327 (match s1 with
328 Cpp_token_c.FixedArg s1 -> s1 =$= s2
329 | Cpp_token_c.VariadicArg _ -> false)
978fd7e5
C
330
331 (* keyword aliases. eg: APR_inline __inline__ *)
332 | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [x] ->
333 (match x with
334 | Tinline _ -> true
335 | Tconst _ -> true
336 | Tstatic _ -> true
337 | Textern _ -> true
338 | _ -> false
339 )
340
341 | _ , Cpp_token_c.DefineBody xs ->
342 (match List.rev xs with
343 (* make extract_macros looping on apache, get some infinite "step x" *)
344 | TPtVirg _::_ -> true
345 | _ -> false
346 )
347
348 | _ -> false
349 ) ||
978fd7e5 350
ae4735db
C
351
352 (toks +> List.exists (fun tok ->
978fd7e5
C
353 match tok with
354 | TCppConcatOp _ -> true
355
356 | Tattribute (ii) -> true
ae4735db 357 | TattributeNoarg (ii) -> true
978fd7e5
C
358
359(* FP with local variable.
ae4735db
C
360 | TIdent (s,ii) ->
361 s ==~ Parsing_hacks.regexp_annot && not (List.mem s
978fd7e5
C
362 ["__FILE__";"__LINE__";"__FUNCTION__"])
363*)
364 | _ -> false
365 ))
366
367
ae4735db
C
368let is_trivial_macro def =
369 let (s, params, body) = def in
978fd7e5
C
370 match params, body with
371 | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [Parser_c.TInt _]
372(* no!!! those are not trivial macro, they are dangerous too.
ae4735db 373 | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [] ->
978fd7e5
C
374 true
375*)
ae4735db 376 | _ ->
978fd7e5
C
377 false
378
379(*
380 | () when s ==~ Parsing_hacks.regexp_annot -> true
381 | () when List.exists (function
382 (*| Parser_c.Tattribute _ -> true*)
383 | Parser_c.TCppConcatOp _ -> true
ae4735db 384 | _ -> false) bodytoks
978fd7e5
C
385 -> true
386 | () -> false
387 in
388*)
389
ae4735db 390
978fd7e5
C
391(*****************************************************************************)
392(* Main entry point *)
393(*****************************************************************************)
394
ae4735db 395let extract_dangerous_macros xs =
978fd7e5
C
396
397 (* prepare initial set of macro definitions to work on *)
ae4735db
C
398 let all_macros =
399 xs +> List.map (fun (file, defs) ->
978fd7e5
C
400 defs +> List.map (fun def -> file, def)
401 ) +> List.flatten
402 in
ae4735db 403 let macros =
978fd7e5
C
404 all_macros +> Common.exclude(fun (file,(x,def)) -> is_trivial_macro def) in
405
406 (* initial set of problematic macros *)
ae4735db 407 let problematic_macros =
978fd7e5
C
408 macros +> Common.filter (fun (file, (x, def)) -> is_dangerous_macro def) in
409
410
411 (* include the ancestors of problematic macros *)
ae4735db 412 let g =
978fd7e5 413 build_callgraph_macros macros in
ae4735db 414 let self_referiential, macros_in_loop_with_path =
978fd7e5
C
415 check_no_loop_graph g in
416
417 Ograph_simple.print_ograph_generic
418 ~str_of_key:(fun k -> k)
419 ~str_of_node:(fun k node -> k)
420 "/tmp/graph.dot"
421 g;
ae4735db 422 let start =
978fd7e5 423 problematic_macros +> List.map (fun (file, (x, def)) -> x) +> Common.nub in
ae4735db
C
424
425 let finalset =
426 start +> List.fold_left (fun acc x ->
978fd7e5
C
427 if List.exists (fun y -> fst y = x) macros_in_loop_with_path
428 || List.mem x self_referiential
429 then begin
430 pr2 (spf "PB: ignoring %s macro as it is in a loop" x);
431 acc
ae4735db
C
432 end
433 else
978fd7e5
C
434 let acc = acc#add x in
435 let ancestors = g#ancestors x in
436 acc $++$ ancestors
437 ) (build_empty_set ())
438 in
439
978fd7e5
C
440 (* Now prepare for fixpoint expansion of macros to avoid doing
441 * the work in cpp_engine.
442 *)
ae4735db 443 let sliced_g =
978fd7e5
C
444 slice_of_callgraph_macros g finalset
445 in
446 Ograph_simple.print_ograph_generic
447 ~str_of_key:(fun k -> k)
448 ~str_of_node:(fun k node -> k)
449 "/tmp/graph2.dot"
450 sliced_g;
451
452
453 (* do fixpoint expansion *)
ae4735db 454 let (binding: (string, Cpp_token_c.define_def) Hashtbl.t) =
978fd7e5
C
455 Hashtbl.create 101 in
456 (* work by side effects on the hashtbl and graph *)
457 recurse_expand_macro_topological_order 0 binding sliced_g;
458
459
ae4735db 460
978fd7e5 461 (* prepare final result *)
ae4735db
C
462 let final_macros =
463 binding +> Common.hash_to_list +> List.map (fun (x, def) ->
978fd7e5
C
464 let node = g#nodes#find x in
465 let file = get_single_file_and_def_of_node x node +> fst in
466 (file, (x, def))
467 )
468 in
469
470 pr2 (spf "total macros numbers: %d"
471 (List.length all_macros));
472 pr2 (spf "problematic macros numbers: %d"
473 (List.length problematic_macros));
474 pr2 (spf "final (after closure) problematic macros numbers: %d"
475 (List.length final_macros));
476
477 let grouped = Common.group_assoc_bykey_eff final_macros in
478 grouped
abad11c5 479