permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / cpp_ast_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2008, 2009 University of Urbana Champaign
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15 open Common
16
17 open Ast_c
18
19 (*****************************************************************************)
20 (* Wrappers *)
21 (*****************************************************************************)
22 let pr2, pr2_once =
23 Common.mk_pr2_wrappers Flag_parsing_c.verbose_cpp_ast
24 let pr2_debug,pr2_debug_once =
25 Common.mk_pr2_wrappers Flag_parsing_c.debug_cpp_ast
26
27 (*****************************************************************************)
28 (* Cpp Ast Manipulations *)
29 (*****************************************************************************)
30
31 (*
32 * cpp-include-expander-builtin.
33 *
34 * alternative1: parse and call cpp tour a tour. So let cpp work at
35 * the token level. That's what most tools do.
36 * alternative2: apply cpp at the very end. Process that go through ast
37 * and do the stuff such as #include, macro expand,
38 * ifdef but on the ast!
39 *
40 * But need keep those info in ast at least, even bad
41 * macro for instance, and for parse error region ? maybe can
42 * get another chance ?
43 * I think it's better to do the cpp-include-expander in a different step
44 * rather than embedding it in the parser. The parser is already too complex.
45 * Also keep with the tradition to try to parse as-is.
46 *
47 * todo? but maybe could discover new info that could help reparse
48 * the ParseError in original file. Try again parsing it by
49 * putting it in a minifile ?
50 *
51 *
52 * todo? maybe can do some pass that work at the ifdef level and for instance
53 * try to paren them, so have in Ast some stuff that are not
54 * present at parsing time but that can then be constructed after
55 * some processing (a little bit like my type for expression filler,
56 * or position info filler, or include relative position filler).
57 *
58 * ??add such info about what was done somewhere ? could build new
59 * ??ast each time but too tedious (maybe need delta-programming!)
60 *
61 * todo? maybe change cpp_ast_c to go deeper on local "" ?
62 *
63 *
64 * TODO: macro expand,
65 * TODO: handle ifdef
66 *
67 *
68 *
69 * cpp_ifdef_statementize: again better to separate concern and in parser
70 * just add the directives in a flat way (IfdefStmt) and later do more
71 * processing and transform them in a tree with some IfdefStmt2.
72 *)
73
74
75
76 (*****************************************************************************)
77 (* Types *)
78 (*****************************************************************************)
79
80 type cpp_option =
81 | I of Common.dirname
82 | D of string * string option
83
84
85
86 let i_of_cpp_options xs =
87 xs +> Common.map_filter (function
88 | I f -> Some f
89 | D _ -> None
90 )
91
92 let cpp_option_of_cmdline (xs, ys) =
93 (xs +> List.map (fun s -> I s)) ++
94 (ys +> List.map (fun s ->
95 if s =~ "\\([A-Z][A-Z0-9_]*\\)=\\(.*\\)"
96 then
97 let (def, value) = matched2 s in
98 D (def, Some value)
99 else
100 D (s, None)
101 ))
102
103 (*****************************************************************************)
104 (* Debug *)
105 (*****************************************************************************)
106 let (show_cpp_i_opts: string list -> unit) = fun xs ->
107 if not (null xs) then begin
108 pr2 "-I";
109 xs +> List.iter pr2
110 end
111
112
113 let (show_cpp_d_opts: string list -> unit) = fun xs ->
114 if not (null xs) then begin
115 pr2 "-D";
116 xs +> List.iter pr2
117 end
118
119 (* ---------------------------------------------------------------------- *)
120 let trace_cpp_process depth mark inc_file =
121 pr2_debug (spf "%s>%s %s"
122 (Common.repeat "-" depth +> Common.join "")
123 mark
124 (s_of_inc_file_bis inc_file));
125 ()
126
127
128
129 (*****************************************************************************)
130 (* Helpers *)
131 (*****************************************************************************)
132
133
134 let _hcandidates = Hashtbl.create 101
135
136 let init_adjust_candidate_header_files dir =
137 let ext = "[h]" in
138 let files = Common.files_of_dir_or_files ext [dir] in
139
140 files +> List.iter (fun file ->
141 let base = Filename.basename file in
142 pr2_debug file;
143 Hashtbl.add _hcandidates base file;
144 );
145 ()
146
147
148
149 (* may return a list of match ? *)
150 let find_header_file1 cppopts dirname inc_file =
151 match inc_file with
152 | Local f ->
153 let finalfile =
154 Filename.concat dirname (Ast_c.s_of_inc_file inc_file) in
155 if Sys.file_exists finalfile
156 then [finalfile]
157 else []
158 | NonLocal f ->
159 i_of_cpp_options cppopts +> Common.map_filter (fun dirname ->
160 let finalfile =
161 Filename.concat dirname (Ast_c.s_of_inc_file inc_file) in
162 if Sys.file_exists finalfile
163 then Some finalfile
164 else None
165 )
166 | Weird s ->
167 pr2 ("CPPAST: weird include not handled:" ^ s);
168 []
169
170 (* todo? can try find most precise ? first just use basename but
171 * then maybe look if have also some dir in common ?
172 *)
173 let find_header_file2 inc_file =
174 match inc_file with
175 | Local f
176 | NonLocal f ->
177 let s = (Ast_c.s_of_inc_file inc_file) in
178 let base = Filename.basename s in
179
180 let res = Hashtbl.find_all _hcandidates base in
181 (match res with
182 | [file] ->
183 pr2_debug ("CPPAST: find header in other dir: " ^ file);
184 res
185 | [] ->
186 []
187 | x::y::xs -> res
188 )
189 | Weird s ->
190 []
191
192
193 let find_header_file cppopts dirname inc_file =
194 let res1 = find_header_file1 cppopts dirname inc_file in
195 match res1 with
196 | [file] -> res1
197 | [] -> find_header_file2 inc_file
198 | x::y::xs -> res1
199
200
201
202
203 (* ---------------------------------------------------------------------- *)
204 let _headers_hash = Hashtbl.create 101
205
206 (* On freebsd ocaml is trashing, use up to 1.6Go of memory and then
207 * building the database_c takes ages.
208 *
209 * So just limit with following threshold to avoid this trashing, simple.
210 *
211 * On netbsd, got a Out_of_memory exn on this file;
212 * /home/pad/software-os-src2/netbsd/dev/microcode/cyclades-z/
213 * even if the cache is small. That's because huge single
214 * ast element and probably the ast marshalling fail.
215 *)
216 let default_threshold_cache_nb_files = 200
217
218 let parse_c_and_cpp_cache
219 ?(threshold_cache_nb_files= default_threshold_cache_nb_files) file =
220
221 if Hashtbl.length _headers_hash > threshold_cache_nb_files
222 then Hashtbl.clear _headers_hash;
223
224 Common.memoized _headers_hash file (fun () ->
225 Parse_c.parse_c_and_cpp file
226 )
227
228
229
230 (*****************************************************************************)
231 (* Main entry *)
232 (*****************************************************************************)
233
234
235 let (cpp_expand_include2:
236 ?depth_limit:int option ->
237 ?threshold_cache_nb_files:int ->
238 cpp_option list -> Common.dirname -> Ast_c.program -> Ast_c.program) =
239 fun ?(depth_limit=None) ?threshold_cache_nb_files iops dirname ast ->
240
241 if !Flag_parsing_c.debug_cpp_ast
242 then pr2_xxxxxxxxxxxxxxxxx();
243
244 let already_included = ref [] in
245
246 let rec aux stack dirname ast =
247 let depth = List.length stack in
248
249 ast +> Visitor_c.vk_program_s { Visitor_c.default_visitor_c_s with
250 Visitor_c.kcppdirective_s = (fun (k, bigf) cpp ->
251 match cpp with
252 | Include {i_include = (inc_file, ii);
253 i_rel_pos = h_rel_pos;
254 i_is_in_ifdef = b;
255 i_content = copt;
256 }
257 ->
258 (match depth_limit with
259 | Some limit when depth >= limit -> cpp
260 | _ ->
261
262 (match find_header_file iops dirname inc_file with
263 | [file] ->
264 if List.mem file !already_included
265 then begin
266 (* pr2 ("already included: " ^ file); *)
267 trace_cpp_process depth "*" inc_file;
268 k cpp
269 end else begin
270 trace_cpp_process depth "" inc_file;
271 Common.push2 file already_included;
272 (* CONFIG *)
273 Flag_parsing_c.verbose_parsing := false;
274 Flag_parsing_c.verbose_lexing := false;
275 let (ast2, _stat) =
276 parse_c_and_cpp_cache ?threshold_cache_nb_files file
277 in
278
279 let ast = Parse_c.program_of_program2 ast2 in
280 let dirname' = Filename.dirname file in
281
282 (* recurse *)
283 let ast' = aux (file::stack) dirname' ast in
284
285 Include {i_include = (inc_file, ii);
286 i_rel_pos = h_rel_pos;
287 i_is_in_ifdef = b;
288 i_content = Some (file, ast');
289 }
290 end
291 | [] ->
292 trace_cpp_process depth "!!" inc_file;
293 pr2 "CPPAST: file not found";
294 k cpp
295 | x::y::zs ->
296 trace_cpp_process depth "!!" inc_file;
297 pr2 "CPPAST: too much candidates";
298 k cpp
299 )
300 )
301 | _ -> k cpp
302 );
303 }
304 in
305 aux [] dirname ast
306
307
308 let cpp_expand_include ?depth_limit ?threshold_cache_nb_files a b c =
309 Common.profile_code "cpp_expand_include"
310 (fun () -> cpp_expand_include2 ?depth_limit ?threshold_cache_nb_files a b c)
311
312 (*
313 let unparse_showing_include_content ?
314 *)
315
316
317 (*****************************************************************************)
318 (* Ifdef-statementize *)
319 (*****************************************************************************)
320
321
322 let is_ifdef_and_same_tag tag x =
323 match x with
324 | IfdefStmt (IfdefDirective ((_, tag2),_)) ->
325 tag =*= tag2
326 | StmtElem _ | CppDirectiveStmt _ -> false
327 | IfdefStmt2 _ -> raise (Impossible 77)
328
329
330
331 (* What if I skipped in the parser only some of the ifdef elements
332 * of the same tag. Once I passed one, I should pass all of them and so
333 * at least should detect here that one tag is not "valid". Maybe in the parser
334 * can return or marked some tags as "partially_passed_ifdef_tag".
335 * Maybe could do in ast_c a MatchingTag of int * bool ref (* one_was_passed *)
336 * where the ref will be shared by the ifdefs with the same matching tag
337 * indice. Or simply count the number of directives with the same tag and
338 * put this information in the tag. Hence the total_with_this_tag below.
339 *)
340 let should_ifdefize (tag,ii) ifdefs_directives xxs =
341 let IfdefTag (_tag, total_with_this_tag) = tag in
342
343 if total_with_this_tag <> List.length ifdefs_directives
344 then begin
345 let strloc = Ast_c.strloc_of_info (List.hd ii) in
346 pr2 (spf "CPPASTC: can not ifdefize ifdef at %s" strloc);
347 pr2 "CPPASTC: some of its directives were passed";
348 false
349 end else
350 (* todo? put more condition ? dont ifdefize declaration ? *)
351 true
352
353
354
355
356
357 (* return a triple, (ifdefs directive * grouped xs * remaining sequencable)
358 * XXX1 XXX2 elsif YYY1 else ZZZ1 endif WWW1 WWW2
359 * => [elsif, else, endif], [XXX1 XXX2; YYY1; ZZZ1], [WWW1 WWW2]
360 *)
361 let group_ifdef tag xs =
362 let (xxs, xs) = group_by_post (is_ifdef_and_same_tag tag) xs in
363
364 xxs +> List.map snd +> List.map (fun x ->
365 match x with
366 | IfdefStmt y -> y
367 | StmtElem _ | CppDirectiveStmt _ | IfdefStmt2 _ -> raise (Impossible 78)
368 ),
369 xxs +> List.map fst,
370 xs
371
372
373 let rec cpp_ifdef_statementize ast =
374 Visitor_c.vk_program_s { Visitor_c.default_visitor_c_s with
375 Visitor_c.kstatementseq_list_s = (fun (k, bigf) xs ->
376 let rec aux xs =
377 match xs with
378 | [] -> []
379 | stseq::xs ->
380 (match stseq with
381 | StmtElem st ->
382 Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs
383 | CppDirectiveStmt directive ->
384 Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs
385 | IfdefStmt ifdef ->
386 (match ifdef with
387 | IfdefDirective ((Ifdef,tag),ii) ->
388
389 let (restifdefs, xxs, xs') = group_ifdef tag xs in
390 if should_ifdefize (tag,ii) (ifdef::restifdefs) xxs
391 then
392 let res = IfdefStmt2 (ifdef::restifdefs, xxs) in
393 Visitor_c.vk_statement_sequencable_s bigf res::aux xs'
394 else
395 Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs
396
397 | IfdefDirective (((IfdefElseif|IfdefElse|IfdefEndif),b),ii) ->
398 pr2 "weird: first directive is not a ifdef";
399 (* maybe not weird, just that should_ifdefize
400 * returned false *)
401 Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs
402 )
403
404 | IfdefStmt2 (ifdef, xxs) ->
405 failwith "already applied cpp_ifdef_statementize"
406 )
407 in
408 aux xs
409 );
410 } ast
411
412
413 (*****************************************************************************)
414 (* Macro *)
415 (*****************************************************************************)
416
417 let (cpp_expand_macro_expr:
418 Ast_c.define_kind -> Ast_c.argument Ast_c.wrap2 list ->
419 Ast_c.expression option) =
420 fun defkind args ->
421 raise Todo