af246f2c72671bfb82c1f3f189ff3b24daf2400e
[bpt/coccinelle.git] / parsing_c / cpp_ast_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2008, 2009 University of Urbana Champaign
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *)
14 open Common
15
16 open Ast_c
17
18 (*****************************************************************************)
19 (* Cpp Ast Manipulations *)
20 (*****************************************************************************)
21
22 (*
23 * cpp-include-expander-builtin.
24 *
25 * alternative1: parse and call cpp tour a tour. So let cpp work at
26 * the token level. That's what most tools do.
27 * alternative2: apply cpp at the very end. Process that go through ast
28 * and do the stuff such as #include, macro expand,
29 * ifdef but on the ast!
30 *
31 * But need keep those info in ast at least, even bad
32 * macro for instance, and for parse error region ? maybe can
33 * get another chance ?
34 * I think it's better to do the cpp-include-expander in a different step
35 * rather than embedding it in the parser. The parser is already too complex.
36 * Also keep with the tradition to try to parse as-is.
37 *
38 * todo? but maybe could discover new info that could help reparse
39 * the ParseError in original file. Try again parsing it by
40 * putting it in a minifile ?
41 *
42 *
43 * todo? maybe can do some pass that work at the ifdef level and for instance
44 * try to paren them, so have in Ast some stuff that are not
45 * present at parsing time but that can then be constructed after
46 * some processing (a little bit like my type for expression filler,
47 * or position info filler, or include relative position filler).
48 *
49 * ??add such info about what was done somewhere ? could build new
50 * ??ast each time but too tedious (maybe need delta-programming!)
51 *
52 * todo? maybe change cpp_ast_c to go deeper on local "" ?
53 *
54 *
55 * TODO: macro expand,
56 * TODO: handle ifdef
57 *
58 *
59 *
60 * cpp_ifdef_statementize: again better to separate concern and in parser
61 * just add the directives in a flat way (IfdefStmt) and later do more
62 * processing and transform them in a tree with some IfdefStmt2.
63 *)
64
65
66
67 (*****************************************************************************)
68 (* Types *)
69 (*****************************************************************************)
70
71 type cpp_option =
72 | I of Common.dirname
73 | D of string * string option
74
75
76
77 let i_of_cpp_options xs =
78 xs +> Common.map_filter (function
79 | I f -> Some f
80 | D _ -> None
81 )
82
83 let cpp_option_of_cmdline (xs, ys) =
84 (xs +> List.map (fun s -> I s)) ++
85 (ys +> List.map (fun s ->
86 if s =~ "\\([A-Z][A-Z0-9_]*\\)=\\(.*\\)"
87 then
88 let (def, value) = matched2 s in
89 D (def, Some value)
90 else
91 D (s, None)
92 ))
93
94 (*****************************************************************************)
95 (* Helpers *)
96 (*****************************************************************************)
97
98 let _hcandidates = Hashtbl.create 101
99
100 let init_adjust_candidate_header_files dir =
101 let ext = "[h]" in
102 let files = Common.files_of_dir_or_files ext [dir] in
103
104 files +> List.iter (fun file ->
105 let base = Filename.basename file in
106 pr2 file;
107 Hashtbl.add _hcandidates base file;
108 );
109 ()
110
111
112
113 (* may return a list of match ? *)
114 let find_header_file1 cppopts dirname inc_file =
115 match inc_file with
116 | Local f ->
117 let finalfile =
118 Filename.concat dirname (Ast_c.s_of_inc_file inc_file) in
119 if Sys.file_exists finalfile
120 then [finalfile]
121 else []
122 | NonLocal f ->
123 i_of_cpp_options cppopts +> Common.map_filter (fun dirname ->
124 let finalfile =
125 Filename.concat dirname (Ast_c.s_of_inc_file inc_file) in
126 if Sys.file_exists finalfile
127 then Some finalfile
128 else None
129 )
130 | Weird s ->
131 pr2 ("CPPAST: weird include not handled:" ^ s);
132 []
133
134 (* todo? can try find most precise ? first just use basename but
135 * then maybe look if have also some dir in common ?
136 *)
137 let find_header_file2 inc_file =
138 match inc_file with
139 | Local f
140 | NonLocal f ->
141 let s = (Ast_c.s_of_inc_file inc_file) in
142 let base = Filename.basename s in
143
144 let res = Hashtbl.find_all _hcandidates base in
145 (match res with
146 | [file] ->
147 pr2 ("CPPAST: find header in other dir: " ^ file);
148 res
149 | [] ->
150 []
151 | x::y::xs -> res
152 )
153 | Weird s ->
154 []
155
156
157 let find_header_file cppopts dirname inc_file =
158 let res1 = find_header_file1 cppopts dirname inc_file in
159 match res1 with
160 | [file] -> res1
161 | [] -> find_header_file2 inc_file
162 | x::y::xs -> res1
163
164
165
166
167 (* ---------------------------------------------------------------------- *)
168 let trace_cpp_process depth mark inc_file =
169 pr2 (spf "%s>%s %s"
170 (Common.repeat "-" depth +> Common.join "")
171 mark
172 (s_of_inc_file_bis inc_file));
173 ()
174
175
176 (* ---------------------------------------------------------------------- *)
177 let _headers_hash = Hashtbl.create 101
178
179 (* On freebsd ocaml is trashing, use up to 1.6Go of memory and then
180 * building the database_c takes ages.
181 *
182 * So just limit with following threshold to avoid this trashing, simple.
183 *
184 * On netbsd, got a Out_of_memory exn on this file;
185 * /home/pad/software-os-src2/netbsd/dev/microcode/cyclades-z/
186 * even if the cache is small. That's because huge single
187 * ast element and probably the ast marshalling fail.
188 *)
189 let threshold_cache_nb_files = ref 200
190
191 let parse_c_and_cpp_cache file =
192 if Hashtbl.length _headers_hash > !threshold_cache_nb_files
193 then Hashtbl.clear _headers_hash;
194
195 Common.memoized _headers_hash file (fun () ->
196 Parse_c.parse_c_and_cpp file
197 )
198
199
200 (* ---------------------------------------------------------------------- *)
201 let (show_cpp_i_opts: string list -> unit) = fun xs ->
202 if not (null xs) then begin
203 pr2 "-I";
204 xs +> List.iter pr2
205 end
206
207
208 let (show_cpp_d_opts: string list -> unit) = fun xs ->
209 if not (null xs) then begin
210 pr2 "-D";
211 xs +> List.iter pr2
212 end
213
214 (*****************************************************************************)
215 (* Main entry *)
216 (*****************************************************************************)
217
218
219 let (cpp_expand_include2:
220 ?depth_limit:int option ->
221 cpp_option list -> Common.dirname -> Ast_c.program -> Ast_c.program) =
222 fun ?(depth_limit=None) iops dirname ast ->
223
224 pr2_xxxxxxxxxxxxxxxxx();
225 let already_included = ref [] in
226
227 let rec aux stack dirname ast =
228 let depth = List.length stack in
229
230 ast +> Visitor_c.vk_program_s { Visitor_c.default_visitor_c_s with
231 Visitor_c.kcppdirective_s = (fun (k, bigf) cpp ->
232 match cpp with
233 | Include {i_include = (inc_file, ii);
234 i_rel_pos = h_rel_pos;
235 i_is_in_ifdef = b;
236 i_content = copt;
237 }
238 ->
239 (match depth_limit with
240 | Some limit when depth >= limit -> cpp
241 | _ ->
242
243 (match find_header_file iops dirname inc_file with
244 | [file] ->
245 if List.mem file !already_included
246 then begin
247 (* pr2 ("already included: " ^ file); *)
248 trace_cpp_process depth "*" inc_file;
249 k cpp
250 end else begin
251 trace_cpp_process depth "" inc_file;
252 Common.push2 file already_included;
253 (* CONFIG *)
254 Flag_parsing_c.verbose_parsing := false;
255 Flag_parsing_c.verbose_lexing := false;
256 let (ast2, _stat) = parse_c_and_cpp_cache file in
257
258 let ast = Parse_c.program_of_program2 ast2 in
259 let dirname' = Filename.dirname file in
260
261 (* recurse *)
262 let ast' = aux (file::stack) dirname' ast in
263
264 Include {i_include = (inc_file, ii);
265 i_rel_pos = h_rel_pos;
266 i_is_in_ifdef = b;
267 i_content = Some (file, ast');
268 }
269 end
270 | [] ->
271 trace_cpp_process depth "!!" inc_file;
272 pr2 "CPPAST: file not found";
273 k cpp
274 | x::y::zs ->
275 trace_cpp_process depth "!!" inc_file;
276 pr2 "CPPAST: too much candidates";
277 k cpp
278 )
279 )
280 | _ -> k cpp
281 );
282 }
283 in
284 aux [] dirname ast
285
286
287 let cpp_expand_include ?depth_limit a b c =
288 Common.profile_code "cpp_expand_include"
289 (fun () -> cpp_expand_include2 ?depth_limit a b c)
290
291 (*
292 let unparse_showing_include_content ?
293 *)
294
295
296 (*****************************************************************************)
297 (* Ifdef-statementize *)
298 (*****************************************************************************)
299
300
301 let is_ifdef_and_same_tag tag x =
302 match x with
303 | IfdefStmt (IfdefDirective ((_, tag2),_)) ->
304 tag =*= tag2
305 | StmtElem _ | CppDirectiveStmt _ -> false
306 | IfdefStmt2 _ -> raise Impossible
307
308
309
310 (* What if I skipped in the parser only some of the ifdef elements
311 * of the same tag. Once I passed one, I should pass all of them and so
312 * at least should detect here that one tag is not "valid". Maybe in the parser
313 * can return or marked some tags as "partially_passed_ifdef_tag".
314 * Maybe could do in ast_c a MatchingTag of int * bool ref (* one_was_passed *)
315 * where the ref will be shared by the ifdefs with the same matching tag
316 * indice. Or simply count the number of directives with the same tag and
317 * put this information in the tag. Hence the total_with_this_tag below.
318 *)
319 let should_ifdefize tag ifdefs_directives xxs =
320 let IfdefTag (_tag, total_with_this_tag) = tag in
321
322 if total_with_this_tag <> List.length ifdefs_directives
323 then begin
324 pr2 "CPPASTC: can not ifdefize, some of its directives were passed";
325 false
326 end else
327 (* todo? put more condition ? dont ifdefize declaration ? *)
328 true
329
330
331
332
333
334 (* return a triple, (ifdefs directive * grouped xs * remaining sequencable)
335 * XXX1 XXX2 elsif YYY1 else ZZZ1 endif WWW1 WWW2
336 * => [elsif, else, endif], [XXX1 XXX2; YYY1; ZZZ1], [WWW1 WWW2]
337 *)
338 let group_ifdef tag xs =
339 let (xxs, xs) = group_by_post (is_ifdef_and_same_tag tag) xs in
340
341 xxs +> List.map snd +> List.map (fun x ->
342 match x with
343 | IfdefStmt y -> y
344 | StmtElem _ | CppDirectiveStmt _ | IfdefStmt2 _ -> raise Impossible
345 ),
346 xxs +> List.map fst,
347 xs
348
349
350 let rec cpp_ifdef_statementize ast =
351 Visitor_c.vk_program_s { Visitor_c.default_visitor_c_s with
352 Visitor_c.kstatementseq_list_s = (fun (k, bigf) xs ->
353 let rec aux xs =
354 match xs with
355 | [] -> []
356 | stseq::xs ->
357 (match stseq with
358 | StmtElem st ->
359 Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs
360 | CppDirectiveStmt directive ->
361 Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs
362 | IfdefStmt ifdef ->
363 (match ifdef with
364 | IfdefDirective ((Ifdef,tag),ii) ->
365
366 let (restifdefs, xxs, xs') = group_ifdef tag xs in
367 if should_ifdefize tag (ifdef::restifdefs) xxs
368 then
369 let res = IfdefStmt2 (ifdef::restifdefs, xxs) in
370 Visitor_c.vk_statement_sequencable_s bigf res::aux xs'
371 else
372 Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs
373
374 | IfdefDirective (((IfdefElseif|IfdefElse|IfdefEndif),b),ii) ->
375 pr2 "weird: first directive is not a ifdef";
376 (* maybe not weird, just that should_ifdefize
377 * returned false *)
378 Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs
379 )
380
381 | IfdefStmt2 (ifdef, xxs) ->
382 failwith "already applied cpp_ifdef_statementize"
383 )
384 in
385 aux xs
386 );
387 } ast
388
389
390 (*****************************************************************************)
391 (* Macro *)
392 (*****************************************************************************)
393
394 let (cpp_expand_macro_expr:
395 Ast_c.define_kind -> Ast_c.argument Ast_c.wrap2 list ->
396 Ast_c.expression option) =
397 fun defkind args ->
398 raise Todo