Release coccinelle-0.1.8
[bpt/coccinelle.git] / parsing_c / cpp_ast_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2008, 2009 University of Urbana Champaign
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *)
14 open Common
15
16 open Ast_c
17
18 (*****************************************************************************)
19 (* Wrappers *)
20 (*****************************************************************************)
21 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_cpp_ast
22 let pr2_debug,pr2_debug_once =
23 Common.mk_pr2_wrappers Flag_parsing_c.debug_cpp_ast
24
25 (*****************************************************************************)
26 (* Cpp Ast Manipulations *)
27 (*****************************************************************************)
28
29 (*
30 * cpp-include-expander-builtin.
31 *
32 * alternative1: parse and call cpp tour a tour. So let cpp work at
33 * the token level. That's what most tools do.
34 * alternative2: apply cpp at the very end. Process that go through ast
35 * and do the stuff such as #include, macro expand,
36 * ifdef but on the ast!
37 *
38 * But need keep those info in ast at least, even bad
39 * macro for instance, and for parse error region ? maybe can
40 * get another chance ?
41 * I think it's better to do the cpp-include-expander in a different step
42 * rather than embedding it in the parser. The parser is already too complex.
43 * Also keep with the tradition to try to parse as-is.
44 *
45 * todo? but maybe could discover new info that could help reparse
46 * the ParseError in original file. Try again parsing it by
47 * putting it in a minifile ?
48 *
49 *
50 * todo? maybe can do some pass that work at the ifdef level and for instance
51 * try to paren them, so have in Ast some stuff that are not
52 * present at parsing time but that can then be constructed after
53 * some processing (a little bit like my type for expression filler,
54 * or position info filler, or include relative position filler).
55 *
56 * ??add such info about what was done somewhere ? could build new
57 * ??ast each time but too tedious (maybe need delta-programming!)
58 *
59 * todo? maybe change cpp_ast_c to go deeper on local "" ?
60 *
61 *
62 * TODO: macro expand,
63 * TODO: handle ifdef
64 *
65 *
66 *
67 * cpp_ifdef_statementize: again better to separate concern and in parser
68 * just add the directives in a flat way (IfdefStmt) and later do more
69 * processing and transform them in a tree with some IfdefStmt2.
70 *)
71
72
73
74 (*****************************************************************************)
75 (* Types *)
76 (*****************************************************************************)
77
78 type cpp_option =
79 | I of Common.dirname
80 | D of string * string option
81
82
83
84 let i_of_cpp_options xs =
85 xs +> Common.map_filter (function
86 | I f -> Some f
87 | D _ -> None
88 )
89
90 let cpp_option_of_cmdline (xs, ys) =
91 (xs +> List.map (fun s -> I s)) ++
92 (ys +> List.map (fun s ->
93 if s =~ "\\([A-Z][A-Z0-9_]*\\)=\\(.*\\)"
94 then
95 let (def, value) = matched2 s in
96 D (def, Some value)
97 else
98 D (s, None)
99 ))
100
101 (*****************************************************************************)
102 (* Helpers *)
103 (*****************************************************************************)
104
105 let _hcandidates = Hashtbl.create 101
106
107 let init_adjust_candidate_header_files dir =
108 let ext = "[h]" in
109 let files = Common.files_of_dir_or_files ext [dir] in
110
111 files +> List.iter (fun file ->
112 let base = Filename.basename file in
113 pr2_debug file;
114 Hashtbl.add _hcandidates base file;
115 );
116 ()
117
118
119
120 (* may return a list of match ? *)
121 let find_header_file1 cppopts dirname inc_file =
122 match inc_file with
123 | Local f ->
124 let finalfile =
125 Filename.concat dirname (Ast_c.s_of_inc_file inc_file) in
126 if Sys.file_exists finalfile
127 then [finalfile]
128 else []
129 | NonLocal f ->
130 i_of_cpp_options cppopts +> Common.map_filter (fun dirname ->
131 let finalfile =
132 Filename.concat dirname (Ast_c.s_of_inc_file inc_file) in
133 if Sys.file_exists finalfile
134 then Some finalfile
135 else None
136 )
137 | Weird s ->
138 pr2 ("CPPAST: weird include not handled:" ^ s);
139 []
140
141 (* todo? can try find most precise ? first just use basename but
142 * then maybe look if have also some dir in common ?
143 *)
144 let find_header_file2 inc_file =
145 match inc_file with
146 | Local f
147 | NonLocal f ->
148 let s = (Ast_c.s_of_inc_file inc_file) in
149 let base = Filename.basename s in
150
151 let res = Hashtbl.find_all _hcandidates base in
152 (match res with
153 | [file] ->
154 pr2_debug ("CPPAST: find header in other dir: " ^ file);
155 res
156 | [] ->
157 []
158 | x::y::xs -> res
159 )
160 | Weird s ->
161 []
162
163
164 let find_header_file cppopts dirname inc_file =
165 let res1 = find_header_file1 cppopts dirname inc_file in
166 match res1 with
167 | [file] -> res1
168 | [] -> find_header_file2 inc_file
169 | x::y::xs -> res1
170
171
172
173
174 (* ---------------------------------------------------------------------- *)
175 let trace_cpp_process depth mark inc_file =
176 pr2_debug (spf "%s>%s %s"
177 (Common.repeat "-" depth +> Common.join "")
178 mark
179 (s_of_inc_file_bis inc_file));
180 ()
181
182
183 (* ---------------------------------------------------------------------- *)
184 let _headers_hash = Hashtbl.create 101
185
186 (* On freebsd ocaml is trashing, use up to 1.6Go of memory and then
187 * building the database_c takes ages.
188 *
189 * So just limit with following threshold to avoid this trashing, simple.
190 *
191 * On netbsd, got a Out_of_memory exn on this file;
192 * /home/pad/software-os-src2/netbsd/dev/microcode/cyclades-z/
193 * even if the cache is small. That's because huge single
194 * ast element and probably the ast marshalling fail.
195 *)
196 let threshold_cache_nb_files = ref 200
197
198 let parse_c_and_cpp_cache file =
199 if Hashtbl.length _headers_hash > !threshold_cache_nb_files
200 then Hashtbl.clear _headers_hash;
201
202 Common.memoized _headers_hash file (fun () ->
203 Parse_c.parse_c_and_cpp file
204 )
205
206
207 (* ---------------------------------------------------------------------- *)
208 let (show_cpp_i_opts: string list -> unit) = fun xs ->
209 if not (null xs) then begin
210 pr2 "-I";
211 xs +> List.iter pr2
212 end
213
214
215 let (show_cpp_d_opts: string list -> unit) = fun xs ->
216 if not (null xs) then begin
217 pr2 "-D";
218 xs +> List.iter pr2
219 end
220
221 (*****************************************************************************)
222 (* Main entry *)
223 (*****************************************************************************)
224
225
226 let (cpp_expand_include2:
227 ?depth_limit:int option ->
228 cpp_option list -> Common.dirname -> Ast_c.program -> Ast_c.program) =
229 fun ?(depth_limit=None) iops dirname ast ->
230
231 if !Flag_parsing_c.debug_cpp_ast
232 then pr2_xxxxxxxxxxxxxxxxx();
233
234 let already_included = ref [] in
235
236 let rec aux stack dirname ast =
237 let depth = List.length stack in
238
239 ast +> Visitor_c.vk_program_s { Visitor_c.default_visitor_c_s with
240 Visitor_c.kcppdirective_s = (fun (k, bigf) cpp ->
241 match cpp with
242 | Include {i_include = (inc_file, ii);
243 i_rel_pos = h_rel_pos;
244 i_is_in_ifdef = b;
245 i_content = copt;
246 }
247 ->
248 (match depth_limit with
249 | Some limit when depth >= limit -> cpp
250 | _ ->
251
252 (match find_header_file iops dirname inc_file with
253 | [file] ->
254 if List.mem file !already_included
255 then begin
256 (* pr2 ("already included: " ^ file); *)
257 trace_cpp_process depth "*" inc_file;
258 k cpp
259 end else begin
260 trace_cpp_process depth "" inc_file;
261 Common.push2 file already_included;
262 (* CONFIG *)
263 Flag_parsing_c.verbose_parsing := false;
264 Flag_parsing_c.verbose_lexing := false;
265 let (ast2, _stat) = parse_c_and_cpp_cache file in
266
267 let ast = Parse_c.program_of_program2 ast2 in
268 let dirname' = Filename.dirname file in
269
270 (* recurse *)
271 let ast' = aux (file::stack) dirname' ast in
272
273 Include {i_include = (inc_file, ii);
274 i_rel_pos = h_rel_pos;
275 i_is_in_ifdef = b;
276 i_content = Some (file, ast');
277 }
278 end
279 | [] ->
280 trace_cpp_process depth "!!" inc_file;
281 pr2 "CPPAST: file not found";
282 k cpp
283 | x::y::zs ->
284 trace_cpp_process depth "!!" inc_file;
285 pr2 "CPPAST: too much candidates";
286 k cpp
287 )
288 )
289 | _ -> k cpp
290 );
291 }
292 in
293 aux [] dirname ast
294
295
296 let cpp_expand_include ?depth_limit a b c =
297 Common.profile_code "cpp_expand_include"
298 (fun () -> cpp_expand_include2 ?depth_limit a b c)
299
300 (*
301 let unparse_showing_include_content ?
302 *)
303
304
305 (*****************************************************************************)
306 (* Ifdef-statementize *)
307 (*****************************************************************************)
308
309
310 let is_ifdef_and_same_tag tag x =
311 match x with
312 | IfdefStmt (IfdefDirective ((_, tag2),_)) ->
313 tag =*= tag2
314 | StmtElem _ | CppDirectiveStmt _ -> false
315 | IfdefStmt2 _ -> raise Impossible
316
317
318
319 (* What if I skipped in the parser only some of the ifdef elements
320 * of the same tag. Once I passed one, I should pass all of them and so
321 * at least should detect here that one tag is not "valid". Maybe in the parser
322 * can return or marked some tags as "partially_passed_ifdef_tag".
323 * Maybe could do in ast_c a MatchingTag of int * bool ref (* one_was_passed *)
324 * where the ref will be shared by the ifdefs with the same matching tag
325 * indice. Or simply count the number of directives with the same tag and
326 * put this information in the tag. Hence the total_with_this_tag below.
327 *)
328 let should_ifdefize tag ifdefs_directives xxs =
329 let IfdefTag (_tag, total_with_this_tag) = tag in
330
331 if total_with_this_tag <> List.length ifdefs_directives
332 then begin
333 pr2 "CPPASTC: can not ifdefize, some of its directives were passed";
334 false
335 end else
336 (* todo? put more condition ? dont ifdefize declaration ? *)
337 true
338
339
340
341
342
343 (* return a triple, (ifdefs directive * grouped xs * remaining sequencable)
344 * XXX1 XXX2 elsif YYY1 else ZZZ1 endif WWW1 WWW2
345 * => [elsif, else, endif], [XXX1 XXX2; YYY1; ZZZ1], [WWW1 WWW2]
346 *)
347 let group_ifdef tag xs =
348 let (xxs, xs) = group_by_post (is_ifdef_and_same_tag tag) xs in
349
350 xxs +> List.map snd +> List.map (fun x ->
351 match x with
352 | IfdefStmt y -> y
353 | StmtElem _ | CppDirectiveStmt _ | IfdefStmt2 _ -> raise Impossible
354 ),
355 xxs +> List.map fst,
356 xs
357
358
359 let rec cpp_ifdef_statementize ast =
360 Visitor_c.vk_program_s { Visitor_c.default_visitor_c_s with
361 Visitor_c.kstatementseq_list_s = (fun (k, bigf) xs ->
362 let rec aux xs =
363 match xs with
364 | [] -> []
365 | stseq::xs ->
366 (match stseq with
367 | StmtElem st ->
368 Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs
369 | CppDirectiveStmt directive ->
370 Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs
371 | IfdefStmt ifdef ->
372 (match ifdef with
373 | IfdefDirective ((Ifdef,tag),ii) ->
374
375 let (restifdefs, xxs, xs') = group_ifdef tag xs in
376 if should_ifdefize tag (ifdef::restifdefs) xxs
377 then
378 let res = IfdefStmt2 (ifdef::restifdefs, xxs) in
379 Visitor_c.vk_statement_sequencable_s bigf res::aux xs'
380 else
381 Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs
382
383 | IfdefDirective (((IfdefElseif|IfdefElse|IfdefEndif),b),ii) ->
384 pr2 "weird: first directive is not a ifdef";
385 (* maybe not weird, just that should_ifdefize
386 * returned false *)
387 Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs
388 )
389
390 | IfdefStmt2 (ifdef, xxs) ->
391 failwith "already applied cpp_ifdef_statementize"
392 )
393 in
394 aux xs
395 );
396 } ast
397
398
399 (*****************************************************************************)
400 (* Macro *)
401 (*****************************************************************************)
402
403 let (cpp_expand_macro_expr:
404 Ast_c.define_kind -> Ast_c.argument Ast_c.wrap2 list ->
405 Ast_c.expression option) =
406 fun defkind args ->
407 raise Todo