permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / cpp_analysis_c.ml
1 open Common
2
3 open Oset
4
5 open Parser_c
6
7 (*****************************************************************************)
8 (* Prelude *)
9 (*****************************************************************************)
10 (*
11 * Is this module make all the tricks used in parsing_hacks and
12 * most definitions in standard.h obsolete ? It depends. In a
13 * static analysis context we want to be accurate, and so expand
14 * all the code that will make our type/callgraph analysis simpler.
15 * So we want to expand many macros, based on heuristics in this file.
16 * In a transformation context, we want to let the programmer
17 * match over certain constructs such as declarator, iterator,
18 * macro_field, etc, and in this case we want to parse as-is.
19 *
20 * What could be done is that some of the analysis performed in this
21 * file could then be injected in parsing_hacks, for instance via
22 * hints, to make the parse as-is job easier too.
23 *
24 *
25 *
26 * todo: right now I find dangerous macro based on ## and go upward
27 * to also include calling macros. But this dangerous macro itself
28 * may use other macros that looks ok but that should also be expanded
29 * because it defines some entities. So also recurse downward ?
30 *
31 * todo? do analysis a la Astec ? try infer the meaning of the macro
32 * from its body but also from its context of use ? Can then
33 * do a taxonomy of macro ? not just foreach or declarator but
34 * polymorphic function (e.g. MAX), type generator, etc. Cf astec paper
35 * or Ernst cpp study paper ?
36 *
37 *)
38
39 (*****************************************************************************)
40 (* Types *)
41 (*****************************************************************************)
42
43 (* callgraph of macros *)
44 type key = string
45 type node = (Common.filename * Cpp_token_c.define_def) list ref
46 type edge = Direct
47
48 type callgraph_macros = (key, node, edge) Ograph_simple.ograph_mutable
49
50 let rootname = "__ROOT__"
51
52 (*****************************************************************************)
53 (* Helpers *)
54 (*****************************************************************************)
55 let bodytoks_of_body body =
56 match body with
57 | Cpp_token_c.DefineHint _ ->
58 pr2 "weird, hint in cpp_analysis_c";
59 []
60 | Cpp_token_c.DefineBody xs ->
61 xs
62
63
64 let build_empty_set () = new Osetb.osetb Setb.empty
65
66
67 (*****************************************************************************)
68 (* Builder *)
69 (*****************************************************************************)
70
71 let build_callgraph_macros xs =
72 let (g: callgraph_macros) = new Ograph_simple.ograph_mutable in
73
74 g#add_node rootname (ref []);
75
76 (* build nodes *)
77 xs +> List.iter (fun (file, (x, def)) ->
78 (* todo? if exist already ? *)
79 g#add_node x (ref []);
80 g#add_arc (rootname, x) Direct;
81 );
82 xs +> List.iter (fun (file, (x, def)) ->
83 let node = g#nodes#find x in
84 Common.push2 (file, def) node;
85 );
86
87 (* build edges *)
88 xs +> List.iter (fun (file, (x, def)) ->
89 let (s, params, body) = def in
90 let toks = bodytoks_of_body body in
91 toks +> List.iter (fun tok ->
92 match tok with
93 | TIdent (x2,ii) ->
94 (try
95 let _ = g#nodes#find x2 in
96 g#add_arc (x, x2) Direct;
97 with
98 Not_found -> ()
99 )
100 | _ ->
101 ()
102 );
103
104 );
105 g
106
107
108 (* ---------------------------------------------------------------------- *)
109 let check_no_loop_graph g =
110
111 let self_referential = ref [] in
112 let macros_in_loop_with_path = ref [] in
113
114 let already = Hashtbl.create 101 in
115
116 let already_error_msg = Hashtbl.create 101 in
117
118 let rec aux_dfs path xi =
119 if Hashtbl.mem already xi && List.mem xi path
120 then begin
121 let node = g#nodes#find xi in
122 let file =
123 match !node with
124 | (file, _)::xs -> file
125 | [] -> raise (Impossible 74)
126 in
127 (* in apache/srclib/apr/include/arch/win32/apr_dbg_win32_handles.h
128 * we get some __ROOT__ -> CreateMutexA -> CreateMutexA because
129 * the macro is self referential. Probably cpp has
130 * some special handling of such case and does not expand
131 * recursively.
132 *
133 *)
134 let is_self_reference =
135 match xi::path with
136 | x::y::z -> x = y
137 | _ -> false
138 in
139 if not is_self_reference && not (Hashtbl.mem already_error_msg xi)
140 then begin
141 Hashtbl.add already_error_msg xi true;
142 pr2 (spf "PB: loop in macro %s of file %s" xi file);
143 pr2 (spf "path is: %s" (Common.join " -> " (List.rev (xi::path))));
144 Common.push2 (xi, path) macros_in_loop_with_path;
145 end
146 else begin
147 Common.push2 xi self_referential;
148 end
149 end else begin
150 Hashtbl.add already xi true;
151 (* f xi path; *)
152 let succ = g#successors xi in
153 let succ' = succ#tolist +> List.map fst in
154 succ' +> List.iter (fun yi ->
155 aux_dfs (xi::path) yi
156 );
157 end
158 in
159 aux_dfs [] rootname;
160 !self_referential, !macros_in_loop_with_path
161
162 (* ---------------------------------------------------------------------- *)
163 let slice_of_callgraph_macros (g: callgraph_macros) goodnodes =
164
165 let (g': callgraph_macros) = new Ograph_simple.ograph_mutable in
166
167 goodnodes#tolist +> List.iter (fun k ->
168 let v = g#nodes#find k in
169 g'#add_node k v;
170 );
171 goodnodes#tolist +> List.iter (fun k ->
172 let succ = g#successors k in
173 let succ = Oset.mapo (fun (k', edge) -> k') (build_empty_set()) succ in
174 let inter = succ $**$ goodnodes in
175 inter#tolist +> List.iter (fun k' ->
176 g'#add_arc (k, k') Direct;
177 )
178 );
179 g'
180
181 (*****************************************************************************)
182 (* Macros expansion *)
183 (*****************************************************************************)
184
185 (* get the longuest one ? or the one that contains the dangerous macro ? *)
186 let get_single_file_and_def_of_node k v =
187 match !v with
188 | [] -> raise (Impossible 75)
189 | [file, def] -> file, def
190 | (file, def)::y::ys ->
191 pr2 (spf "multiple def for %s but I kept only one" k);
192 file, def
193
194 module TV = Token_views_c
195
196 let (macro_expand:
197 (string, Cpp_token_c.define_def) Hashtbl.t ->
198 Cpp_token_c.define_def -> Cpp_token_c.define_def) =
199 fun current_def def ->
200 let (s, params, body) = def in
201 let body' =
202 match body with
203 | Cpp_token_c.DefineHint _ ->
204 body
205 | Cpp_token_c.DefineBody xs ->
206 (* bugfix: we dont want to evalute the x ## b at this moment.
207 * so can not use fix_tokens_cpp in the same we use it
208 * to parse C code.
209 let xs' =
210 Parsing_hacks.fix_tokens_cpp ~macro_defs:current_def xs
211 in
212 *)
213 let tokens = xs in
214 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
215 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
216 let paren_grouped = TV.mk_parenthised cleaner in
217 Cpp_token_c.apply_macro_defs
218 ~msg_apply_known_macro:(fun s2 ->
219 pr2 (spf "APPLYING: %s in definition of %s" s2 s))
220 ~msg_apply_known_macro_hint:(fun s ->
221 pr2 "hint")
222 ~evaluate_concatop:false
223 ~inplace_when_single:false
224 current_def paren_grouped;
225 (* because the before field is used by apply_macro_defs *)
226 tokens2 := TV.rebuild_tokens_extented !tokens2;
227
228 (* bugfix *)
229 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
230
231 let xs' =
232 Parsing_hacks.insert_virtual_positions
233 (cleaner +> Common.acc_map (fun x -> x.TV.tok))
234 in
235
236 Cpp_token_c.DefineBody xs'
237 in
238 (s, params, body')
239
240
241 (* work by side effect as both the binding and callgraph are mutable
242 * data structure
243 *)
244 let no_inlining = ref false
245
246 let rec (recurse_expand_macro_topological_order:
247 int -> (string, Cpp_token_c.define_def) Hashtbl.t ->
248 callgraph_macros -> unit) =
249 fun depth current_def g ->
250
251 (* naive: *)
252 if !no_inlining then
253 g#nodes#tolist +> List.iter (fun (k, v) ->
254 if k =$= rootname then ()
255 else
256 let def = get_single_file_and_def_of_node k v +> snd in
257 Hashtbl.add current_def k def
258 )
259 else
260 let remaining = g#nodes#tolist in
261 (match remaining with
262 | [] -> () (* christia: commented this out: raise (Impossible 76)
263 * This seems to be the case when there are no
264 * problematic macros. Which is possible.
265 *)
266 | [(k,n)] ->
267 assert (k = rootname);
268 (* end recursion *)
269 ()
270 | (k, v)::y::xs ->
271 let leafs = (g#leaf_nodes ())#tolist in
272 pr2 (spf "step: %d, %s" depth (leafs +> Common.join " "));
273
274 Ograph_simple.print_ograph_generic
275 ~str_of_key:(fun k -> k)
276 ~str_of_node:(fun k node -> k)
277 (spf "/tmp/graph-%d.dot" depth)
278 g;
279
280 assert(not (null leafs));
281
282
283 (* little specialisation to avoid useless work *)
284 if depth = 0
285 then begin
286 leafs +> List.iter (fun k ->
287 let node = g#nodes#find k in
288 let def = get_single_file_and_def_of_node k node +> snd in
289 Hashtbl.add current_def k def
290 )
291 end else begin
292 let new_defs =
293 leafs +> List.map (fun k ->
294 let node = g#nodes#find k in
295 let def = get_single_file_and_def_of_node k node +> snd in
296 let def' = macro_expand current_def def in
297 k, def'
298 )
299 in
300 new_defs +> List.iter (fun (k,def) -> Hashtbl.add current_def k def);
301 end;
302 leafs +> List.iter (fun k -> g#del_leaf_node_and_its_edges k);
303 recurse_expand_macro_topological_order (depth+1) current_def g;
304 )
305
306
307
308 (*****************************************************************************)
309 (* Macros def analysis *)
310 (*****************************************************************************)
311
312 let is_dangerous_macro def =
313 let (s, params, body) = def in
314 let toks = bodytoks_of_body body in
315
316 (match params, body with
317
318 (* ex: APU_DECLARE_DATA *)
319 | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [] ->
320 if s =~ ".*_H_*"
321 then false
322 else true
323
324 (* ex: AP_DECLARE(x) x *)
325 | Cpp_token_c.Params([s1]),
326 Cpp_token_c.DefineBody [TIdent (s2,i1)] ->
327 (match s1 with
328 Cpp_token_c.FixedArg s1 -> s1 =$= s2
329 | Cpp_token_c.VariadicArg _ -> false)
330
331 (* keyword aliases. eg: APR_inline __inline__ *)
332 | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [x] ->
333 (match x with
334 | Tinline _ -> true
335 | Tconst _ -> true
336 | Tstatic _ -> true
337 | Textern _ -> true
338 | _ -> false
339 )
340
341 | _ , Cpp_token_c.DefineBody xs ->
342 (match List.rev xs with
343 (* make extract_macros looping on apache, get some infinite "step x" *)
344 | TPtVirg _::_ -> true
345 | _ -> false
346 )
347
348 | _ -> false
349 ) ||
350
351
352 (toks +> List.exists (fun tok ->
353 match tok with
354 | TCppConcatOp _ -> true
355
356 | Tattribute (ii) -> true
357 | TattributeNoarg (ii) -> true
358
359 (* FP with local variable.
360 | TIdent (s,ii) ->
361 s ==~ Parsing_hacks.regexp_annot && not (List.mem s
362 ["__FILE__";"__LINE__";"__FUNCTION__"])
363 *)
364 | _ -> false
365 ))
366
367
368 let is_trivial_macro def =
369 let (s, params, body) = def in
370 match params, body with
371 | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [Parser_c.TInt _]
372 (* no!!! those are not trivial macro, they are dangerous too.
373 | Cpp_token_c.NoParam, Cpp_token_c.DefineBody [] ->
374 true
375 *)
376 | _ ->
377 false
378
379 (*
380 | () when s ==~ Parsing_hacks.regexp_annot -> true
381 | () when List.exists (function
382 (*| Parser_c.Tattribute _ -> true*)
383 | Parser_c.TCppConcatOp _ -> true
384 | _ -> false) bodytoks
385 -> true
386 | () -> false
387 in
388 *)
389
390
391 (*****************************************************************************)
392 (* Main entry point *)
393 (*****************************************************************************)
394
395 let extract_dangerous_macros xs =
396
397 (* prepare initial set of macro definitions to work on *)
398 let all_macros =
399 xs +> List.map (fun (file, defs) ->
400 defs +> List.map (fun def -> file, def)
401 ) +> List.flatten
402 in
403 let macros =
404 all_macros +> Common.exclude(fun (file,(x,def)) -> is_trivial_macro def) in
405
406 (* initial set of problematic macros *)
407 let problematic_macros =
408 macros +> Common.filter (fun (file, (x, def)) -> is_dangerous_macro def) in
409
410
411 (* include the ancestors of problematic macros *)
412 let g =
413 build_callgraph_macros macros in
414 let self_referiential, macros_in_loop_with_path =
415 check_no_loop_graph g in
416
417 Ograph_simple.print_ograph_generic
418 ~str_of_key:(fun k -> k)
419 ~str_of_node:(fun k node -> k)
420 "/tmp/graph.dot"
421 g;
422 let start =
423 problematic_macros +> List.map (fun (file, (x, def)) -> x) +> Common.nub in
424
425 let finalset =
426 start +> List.fold_left (fun acc x ->
427 if List.exists (fun y -> fst y = x) macros_in_loop_with_path
428 || List.mem x self_referiential
429 then begin
430 pr2 (spf "PB: ignoring %s macro as it is in a loop" x);
431 acc
432 end
433 else
434 let acc = acc#add x in
435 let ancestors = g#ancestors x in
436 acc $++$ ancestors
437 ) (build_empty_set ())
438 in
439
440 (* Now prepare for fixpoint expansion of macros to avoid doing
441 * the work in cpp_engine.
442 *)
443 let sliced_g =
444 slice_of_callgraph_macros g finalset
445 in
446 Ograph_simple.print_ograph_generic
447 ~str_of_key:(fun k -> k)
448 ~str_of_node:(fun k node -> k)
449 "/tmp/graph2.dot"
450 sliced_g;
451
452
453 (* do fixpoint expansion *)
454 let (binding: (string, Cpp_token_c.define_def) Hashtbl.t) =
455 Hashtbl.create 101 in
456 (* work by side effects on the hashtbl and graph *)
457 recurse_expand_macro_topological_order 0 binding sliced_g;
458
459
460
461 (* prepare final result *)
462 let final_macros =
463 binding +> Common.hash_to_list +> List.map (fun (x, def) ->
464 let node = g#nodes#find x in
465 let file = get_single_file_and_def_of_node x node +> fst in
466 (file, (x, def))
467 )
468 in
469
470 pr2 (spf "total macros numbers: %d"
471 (List.length all_macros));
472 pr2 (spf "problematic macros numbers: %d"
473 (List.length problematic_macros));
474 pr2 (spf "final (after closure) problematic macros numbers: %d"
475 (List.length final_macros));
476
477 let grouped = Common.group_assoc_bykey_eff final_macros in
478 grouped
479