permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / cpp_ast_c.ml
CommitLineData
0708f913 1(* Yoann Padioleau
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913 4 * Copyright (C) 2008, 2009 University of Urbana Champaign
faf9a90c
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
faf9a90c
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
485bce71
C
15open Common
16
17open Ast_c
18
708f4980
C
19(*****************************************************************************)
20(* Wrappers *)
21(*****************************************************************************)
ae4735db 22let pr2, pr2_once =
978fd7e5 23 Common.mk_pr2_wrappers Flag_parsing_c.verbose_cpp_ast
ae4735db 24let pr2_debug,pr2_debug_once =
708f4980
C
25 Common.mk_pr2_wrappers Flag_parsing_c.debug_cpp_ast
26
485bce71
C
27(*****************************************************************************)
28(* Cpp Ast Manipulations *)
29(*****************************************************************************)
30
31(*
32 * cpp-include-expander-builtin.
ae4735db
C
33 *
34 * alternative1: parse and call cpp tour a tour. So let cpp work at
91eba41f 35 * the token level. That's what most tools do.
485bce71 36 * alternative2: apply cpp at the very end. Process that go through ast
ae4735db 37 * and do the stuff such as #include, macro expand,
91eba41f 38 * ifdef but on the ast!
ae4735db 39 *
485bce71 40 * But need keep those info in ast at least, even bad
ae4735db 41 * macro for instance, and for parse error region ? maybe can
485bce71
C
42 * get another chance ?
43 * I think it's better to do the cpp-include-expander in a different step
ae4735db 44 * rather than embedding it in the parser. The parser is already too complex.
485bce71 45 * Also keep with the tradition to try to parse as-is.
ae4735db 46 *
485bce71
C
47 * todo? but maybe could discover new info that could help reparse
48 * the ParseError in original file. Try again parsing it by
ae4735db
C
49 * putting it in a minifile ?
50 *
51 *
485bce71
C
52 * todo? maybe can do some pass that work at the ifdef level and for instance
53 * try to paren them, so have in Ast some stuff that are not
54 * present at parsing time but that can then be constructed after
55 * some processing (a little bit like my type for expression filler,
56 * or position info filler, or include relative position filler).
ae4735db 57 *
485bce71
C
58 * ??add such info about what was done somewhere ? could build new
59 * ??ast each time but too tedious (maybe need delta-programming!)
60 *
91eba41f 61 * todo? maybe change cpp_ast_c to go deeper on local "" ?
ae4735db
C
62 *
63 *
64 * TODO: macro expand,
485bce71 65 * TODO: handle ifdef
ae4735db
C
66 *
67 *
68 *
485bce71
C
69 * cpp_ifdef_statementize: again better to separate concern and in parser
70 * just add the directives in a flat way (IfdefStmt) and later do more
71 * processing and transform them in a tree with some IfdefStmt2.
72 *)
73
74
75
76(*****************************************************************************)
77(* Types *)
78(*****************************************************************************)
79
ae4735db 80type cpp_option =
91eba41f 81 | I of Common.dirname
485bce71
C
82 | D of string * string option
83
84
85
ae4735db 86let i_of_cpp_options xs =
485bce71
C
87 xs +> Common.map_filter (function
88 | I f -> Some f
89 | D _ -> None
90 )
91
ae4735db 92let cpp_option_of_cmdline (xs, ys) =
485bce71 93 (xs +> List.map (fun s -> I s)) ++
ae4735db 94 (ys +> List.map (fun s ->
485bce71
C
95 if s =~ "\\([A-Z][A-Z0-9_]*\\)=\\(.*\\)"
96 then
97 let (def, value) = matched2 s in
98 D (def, Some value)
ae4735db 99 else
485bce71
C
100 D (s, None)
101 ))
102
978fd7e5
C
103(*****************************************************************************)
104(* Debug *)
105(*****************************************************************************)
ae4735db
C
106let (show_cpp_i_opts: string list -> unit) = fun xs ->
107 if not (null xs) then begin
978fd7e5
C
108 pr2 "-I";
109 xs +> List.iter pr2
110 end
111
ae4735db 112
978fd7e5
C
113let (show_cpp_d_opts: string list -> unit) = fun xs ->
114 if not (null xs) then begin
115 pr2 "-D";
116 xs +> List.iter pr2
117 end
118
119(* ---------------------------------------------------------------------- *)
120let trace_cpp_process depth mark inc_file =
ae4735db 121 pr2_debug (spf "%s>%s %s"
978fd7e5
C
122 (Common.repeat "-" depth +> Common.join "")
123 mark
124 (s_of_inc_file_bis inc_file));
125 ()
126
127
128
485bce71
C
129(*****************************************************************************)
130(* Helpers *)
131(*****************************************************************************)
132
978fd7e5 133
91eba41f
C
134let _hcandidates = Hashtbl.create 101
135
ae4735db 136let init_adjust_candidate_header_files dir =
91eba41f
C
137 let ext = "[h]" in
138 let files = Common.files_of_dir_or_files ext [dir] in
139
ae4735db 140 files +> List.iter (fun file ->
91eba41f 141 let base = Filename.basename file in
708f4980 142 pr2_debug file;
91eba41f
C
143 Hashtbl.add _hcandidates base file;
144 );
145 ()
146
147
148
485bce71 149(* may return a list of match ? *)
91eba41f 150let find_header_file1 cppopts dirname inc_file =
485bce71 151 match inc_file with
ae4735db
C
152 | Local f ->
153 let finalfile =
485bce71 154 Filename.concat dirname (Ast_c.s_of_inc_file inc_file) in
ae4735db 155 if Sys.file_exists finalfile
485bce71
C
156 then [finalfile]
157 else []
ae4735db
C
158 | NonLocal f ->
159 i_of_cpp_options cppopts +> Common.map_filter (fun dirname ->
160 let finalfile =
485bce71 161 Filename.concat dirname (Ast_c.s_of_inc_file inc_file) in
ae4735db 162 if Sys.file_exists finalfile
485bce71
C
163 then Some finalfile
164 else None
165 )
ae4735db 166 | Weird s ->
0708f913 167 pr2 ("CPPAST: weird include not handled:" ^ s);
485bce71
C
168 []
169
91eba41f
C
170(* todo? can try find most precise ? first just use basename but
171 * then maybe look if have also some dir in common ?
172 *)
ae4735db 173let find_header_file2 inc_file =
91eba41f 174 match inc_file with
ae4735db
C
175 | Local f
176 | NonLocal f ->
91eba41f
C
177 let s = (Ast_c.s_of_inc_file inc_file) in
178 let base = Filename.basename s in
179
180 let res = Hashtbl.find_all _hcandidates base in
181 (match res with
ae4735db 182 | [file] ->
708f4980 183 pr2_debug ("CPPAST: find header in other dir: " ^ file);
91eba41f 184 res
ae4735db 185 | [] ->
91eba41f
C
186 []
187 | x::y::xs -> res
188 )
ae4735db 189 | Weird s ->
91eba41f
C
190 []
191
192
193let find_header_file cppopts dirname inc_file =
194 let res1 = find_header_file1 cppopts dirname inc_file in
195 match res1 with
ae4735db 196 | [file] -> res1
91eba41f
C
197 | [] -> find_header_file2 inc_file
198 | x::y::xs -> res1
199
200
485bce71 201
91eba41f 202
91eba41f 203(* ---------------------------------------------------------------------- *)
ae4735db 204let _headers_hash = Hashtbl.create 101
91eba41f
C
205
206(* On freebsd ocaml is trashing, use up to 1.6Go of memory and then
ae4735db
C
207 * building the database_c takes ages.
208 *
91eba41f 209 * So just limit with following threshold to avoid this trashing, simple.
ae4735db 210 *
91eba41f
C
211 * On netbsd, got a Out_of_memory exn on this file;
212 * /home/pad/software-os-src2/netbsd/dev/microcode/cyclades-z/
ae4735db 213 * even if the cache is small. That's because huge single
91eba41f
C
214 * ast element and probably the ast marshalling fail.
215 *)
978fd7e5
C
216let default_threshold_cache_nb_files = 200
217
ae4735db 218let parse_c_and_cpp_cache
978fd7e5 219 ?(threshold_cache_nb_files= default_threshold_cache_nb_files) file =
91eba41f 220
978fd7e5 221 if Hashtbl.length _headers_hash > threshold_cache_nb_files
91eba41f
C
222 then Hashtbl.clear _headers_hash;
223
ae4735db 224 Common.memoized _headers_hash file (fun () ->
91eba41f
C
225 Parse_c.parse_c_and_cpp file
226 )
227
228
91eba41f 229
485bce71
C
230(*****************************************************************************)
231(* Main entry *)
232(*****************************************************************************)
233
234
ae4735db 235let (cpp_expand_include2:
91eba41f 236 ?depth_limit:int option ->
978fd7e5 237 ?threshold_cache_nb_files:int ->
485bce71 238 cpp_option list -> Common.dirname -> Ast_c.program -> Ast_c.program) =
ae4735db 239 fun ?(depth_limit=None) ?threshold_cache_nb_files iops dirname ast ->
485bce71 240
708f4980
C
241 if !Flag_parsing_c.debug_cpp_ast
242 then pr2_xxxxxxxxxxxxxxxxx();
243
485bce71
C
244 let already_included = ref [] in
245
ae4735db 246 let rec aux stack dirname ast =
485bce71
C
247 let depth = List.length stack in
248
249 ast +> Visitor_c.vk_program_s { Visitor_c.default_visitor_c_s with
ae4735db
C
250 Visitor_c.kcppdirective_s = (fun (k, bigf) cpp ->
251 match cpp with
485bce71
C
252 | Include {i_include = (inc_file, ii);
253 i_rel_pos = h_rel_pos;
254 i_is_in_ifdef = b;
255 i_content = copt;
ae4735db
C
256 }
257 ->
91eba41f
C
258 (match depth_limit with
259 | Some limit when depth >= limit -> cpp
ae4735db
C
260 | _ ->
261
485bce71 262 (match find_header_file iops dirname inc_file with
ae4735db 263 | [file] ->
485bce71 264 if List.mem file !already_included
ae4735db 265 then begin
485bce71
C
266 (* pr2 ("already included: " ^ file); *)
267 trace_cpp_process depth "*" inc_file;
268 k cpp
269 end else begin
270 trace_cpp_process depth "" inc_file;
271 Common.push2 file already_included;
272 (* CONFIG *)
ae4735db
C
273 Flag_parsing_c.verbose_parsing := false;
274 Flag_parsing_c.verbose_lexing := false;
275 let (ast2, _stat) =
276 parse_c_and_cpp_cache ?threshold_cache_nb_files file
978fd7e5 277 in
485bce71
C
278
279 let ast = Parse_c.program_of_program2 ast2 in
ae4735db 280 let dirname' = Filename.dirname file in
485bce71
C
281
282 (* recurse *)
283 let ast' = aux (file::stack) dirname' ast in
284
285 Include {i_include = (inc_file, ii);
286 i_rel_pos = h_rel_pos;
287 i_is_in_ifdef = b;
288 i_content = Some (file, ast');
289 }
290 end
ae4735db 291 | [] ->
485bce71
C
292 trace_cpp_process depth "!!" inc_file;
293 pr2 "CPPAST: file not found";
294 k cpp
ae4735db 295 | x::y::zs ->
485bce71
C
296 trace_cpp_process depth "!!" inc_file;
297 pr2 "CPPAST: too much candidates";
298 k cpp
299 )
91eba41f 300 )
485bce71
C
301 | _ -> k cpp
302 );
303 }
304 in
305 aux [] dirname ast
485bce71 306
ae4735db
C
307
308let cpp_expand_include ?depth_limit ?threshold_cache_nb_files a b c =
91eba41f 309 Common.profile_code "cpp_expand_include"
978fd7e5 310 (fun () -> cpp_expand_include2 ?depth_limit ?threshold_cache_nb_files a b c)
485bce71 311
ae4735db 312(*
485bce71
C
313let unparse_showing_include_content ?
314*)
315
316
317(*****************************************************************************)
318(* Ifdef-statementize *)
319(*****************************************************************************)
320
321
ae4735db 322let is_ifdef_and_same_tag tag x =
485bce71 323 match x with
ae4735db 324 | IfdefStmt (IfdefDirective ((_, tag2),_)) ->
b1b2de81 325 tag =*= tag2
485bce71 326 | StmtElem _ | CppDirectiveStmt _ -> false
abad11c5 327 | IfdefStmt2 _ -> raise (Impossible 77)
485bce71
C
328
329
330
331(* What if I skipped in the parser only some of the ifdef elements
332 * of the same tag. Once I passed one, I should pass all of them and so
333 * at least should detect here that one tag is not "valid". Maybe in the parser
334 * can return or marked some tags as "partially_passed_ifdef_tag".
335 * Maybe could do in ast_c a MatchingTag of int * bool ref (* one_was_passed *)
336 * where the ref will be shared by the ifdefs with the same matching tag
337 * indice. Or simply count the number of directives with the same tag and
338 * put this information in the tag. Hence the total_with_this_tag below.
339 *)
ae4735db 340let should_ifdefize (tag,ii) ifdefs_directives xxs =
485bce71 341 let IfdefTag (_tag, total_with_this_tag) = tag in
ae4735db 342
485bce71
C
343 if total_with_this_tag <> List.length ifdefs_directives
344 then begin
978fd7e5
C
345 let strloc = Ast_c.strloc_of_info (List.hd ii) in
346 pr2 (spf "CPPASTC: can not ifdefize ifdef at %s" strloc);
347 pr2 "CPPASTC: some of its directives were passed";
ae4735db
C
348 false
349 end else
485bce71
C
350 (* todo? put more condition ? dont ifdefize declaration ? *)
351 true
352
353
354
355
356
ae4735db
C
357(* return a triple, (ifdefs directive * grouped xs * remaining sequencable)
358 * XXX1 XXX2 elsif YYY1 else ZZZ1 endif WWW1 WWW2
485bce71
C
359 * => [elsif, else, endif], [XXX1 XXX2; YYY1; ZZZ1], [WWW1 WWW2]
360 *)
ae4735db 361let group_ifdef tag xs =
485bce71 362 let (xxs, xs) = group_by_post (is_ifdef_and_same_tag tag) xs in
ae4735db
C
363
364 xxs +> List.map snd +> List.map (fun x ->
365 match x with
485bce71 366 | IfdefStmt y -> y
abad11c5 367 | StmtElem _ | CppDirectiveStmt _ | IfdefStmt2 _ -> raise (Impossible 78)
485bce71 368 ),
ae4735db 369 xxs +> List.map fst,
485bce71
C
370 xs
371
372
ae4735db 373let rec cpp_ifdef_statementize ast =
485bce71 374 Visitor_c.vk_program_s { Visitor_c.default_visitor_c_s with
ae4735db
C
375 Visitor_c.kstatementseq_list_s = (fun (k, bigf) xs ->
376 let rec aux xs =
485bce71
C
377 match xs with
378 | [] -> []
b1b2de81 379 | stseq::xs ->
485bce71 380 (match stseq with
ae4735db 381 | StmtElem st ->
485bce71 382 Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs
ae4735db 383 | CppDirectiveStmt directive ->
485bce71 384 Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs
b1b2de81 385 | IfdefStmt ifdef ->
485bce71 386 (match ifdef with
ae4735db 387 | IfdefDirective ((Ifdef,tag),ii) ->
485bce71
C
388
389 let (restifdefs, xxs, xs') = group_ifdef tag xs in
ae4735db 390 if should_ifdefize (tag,ii) (ifdef::restifdefs) xxs
485bce71
C
391 then
392 let res = IfdefStmt2 (ifdef::restifdefs, xxs) in
393 Visitor_c.vk_statement_sequencable_s bigf res::aux xs'
b1b2de81 394 else
485bce71 395 Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs
ae4735db 396
b1b2de81 397 | IfdefDirective (((IfdefElseif|IfdefElse|IfdefEndif),b),ii) ->
0708f913 398 pr2 "weird: first directive is not a ifdef";
ae4735db 399 (* maybe not weird, just that should_ifdefize
485bce71
C
400 * returned false *)
401 Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs
402 )
403
ae4735db 404 | IfdefStmt2 (ifdef, xxs) ->
485bce71
C
405 failwith "already applied cpp_ifdef_statementize"
406 )
407 in
408 aux xs
409 );
410 } ast
91eba41f
C
411
412
413(*****************************************************************************)
414(* Macro *)
415(*****************************************************************************)
416
ae4735db
C
417let (cpp_expand_macro_expr:
418 Ast_c.define_kind -> Ast_c.argument Ast_c.wrap2 list ->
419 Ast_c.expression option) =
420 fun defkind args ->
91eba41f 421 raise Todo