| 1 | (* Copyright (C) 2008, 2009 University of Urbana Champaign |
| 2 | * |
| 3 | * This program is free software; you can redistribute it and/or |
| 4 | * modify it under the terms of the GNU General Public License (GPL) |
| 5 | * version 2 as published by the Free Software Foundation. |
| 6 | * |
| 7 | * This program is distributed in the hope that it will be useful, |
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 10 | * file license.txt for more details. |
| 11 | *) |
| 12 | open Common |
| 13 | |
| 14 | open Ast_c |
| 15 | |
| 16 | (*****************************************************************************) |
| 17 | (* Cpp Ast Manipulations *) |
| 18 | (*****************************************************************************) |
| 19 | |
| 20 | (* |
| 21 | * cpp-include-expander-builtin. |
| 22 | * |
| 23 | * alternative1: parse and call cpp tour a tour. So let cpp work at |
| 24 | * the token level. That's what most tools do. |
| 25 | * alternative2: apply cpp at the very end. Process that go through ast |
| 26 | * and do the stuff such as #include, macro expand, |
| 27 | * ifdef but on the ast! |
| 28 | * |
| 29 | * But need keep those info in ast at least, even bad |
| 30 | * macro for instance, and for parse error region ? maybe can |
| 31 | * get another chance ? |
| 32 | * I think it's better to do the cpp-include-expander in a different step |
| 33 | * rather than embedding it in the parser. The parser is already too complex. |
| 34 | * Also keep with the tradition to try to parse as-is. |
| 35 | * |
| 36 | * todo? but maybe could discover new info that could help reparse |
| 37 | * the ParseError in original file. Try again parsing it by |
| 38 | * putting it in a minifile ? |
| 39 | * |
| 40 | * |
| 41 | * todo? maybe can do some pass that work at the ifdef level and for instance |
| 42 | * try to paren them, so have in Ast some stuff that are not |
| 43 | * present at parsing time but that can then be constructed after |
| 44 | * some processing (a little bit like my type for expression filler, |
| 45 | * or position info filler, or include relative position filler). |
| 46 | * |
| 47 | * ??add such info about what was done somewhere ? could build new |
| 48 | * ??ast each time but too tedious (maybe need delta-programming!) |
| 49 | * |
| 50 | * todo? maybe change cpp_ast_c to go deeper on local "" ? |
| 51 | * |
| 52 | * |
| 53 | * TODO: macro expand, |
| 54 | * TODO: handle ifdef |
| 55 | * |
| 56 | * |
| 57 | * |
| 58 | * cpp_ifdef_statementize: again better to separate concern and in parser |
| 59 | * just add the directives in a flat way (IfdefStmt) and later do more |
| 60 | * processing and transform them in a tree with some IfdefStmt2. |
| 61 | *) |
| 62 | |
| 63 | |
| 64 | |
| 65 | (*****************************************************************************) |
| 66 | (* Types *) |
| 67 | (*****************************************************************************) |
| 68 | |
| 69 | type cpp_option = |
| 70 | | I of Common.dirname |
| 71 | | D of string * string option |
| 72 | |
| 73 | |
| 74 | |
| 75 | let i_of_cpp_options xs = |
| 76 | xs +> Common.map_filter (function |
| 77 | | I f -> Some f |
| 78 | | D _ -> None |
| 79 | ) |
| 80 | |
| 81 | let cpp_option_of_cmdline (xs, ys) = |
| 82 | (xs +> List.map (fun s -> I s)) ++ |
| 83 | (ys +> List.map (fun s -> |
| 84 | if s =~ "\\([A-Z][A-Z0-9_]*\\)=\\(.*\\)" |
| 85 | then |
| 86 | let (def, value) = matched2 s in |
| 87 | D (def, Some value) |
| 88 | else |
| 89 | D (s, None) |
| 90 | )) |
| 91 | |
| 92 | (*****************************************************************************) |
| 93 | (* Helpers *) |
| 94 | (*****************************************************************************) |
| 95 | |
| 96 | let _hcandidates = Hashtbl.create 101 |
| 97 | |
| 98 | let init_adjust_candidate_header_files dir = |
| 99 | let ext = "[h]" in |
| 100 | let files = Common.files_of_dir_or_files ext [dir] in |
| 101 | |
| 102 | files +> List.iter (fun file -> |
| 103 | let base = Filename.basename file in |
| 104 | pr2 file; |
| 105 | Hashtbl.add _hcandidates base file; |
| 106 | ); |
| 107 | () |
| 108 | |
| 109 | |
| 110 | |
| 111 | (* may return a list of match ? *) |
| 112 | let find_header_file1 cppopts dirname inc_file = |
| 113 | match inc_file with |
| 114 | | Local f -> |
| 115 | let finalfile = |
| 116 | Filename.concat dirname (Ast_c.s_of_inc_file inc_file) in |
| 117 | if Sys.file_exists finalfile |
| 118 | then [finalfile] |
| 119 | else [] |
| 120 | | NonLocal f -> |
| 121 | i_of_cpp_options cppopts +> Common.map_filter (fun dirname -> |
| 122 | let finalfile = |
| 123 | Filename.concat dirname (Ast_c.s_of_inc_file inc_file) in |
| 124 | if Sys.file_exists finalfile |
| 125 | then Some finalfile |
| 126 | else None |
| 127 | ) |
| 128 | | Wierd s -> |
| 129 | pr2 ("CPPAST: wierd include not handled:" ^ s); |
| 130 | [] |
| 131 | |
| 132 | (* todo? can try find most precise ? first just use basename but |
| 133 | * then maybe look if have also some dir in common ? |
| 134 | *) |
| 135 | let find_header_file2 inc_file = |
| 136 | match inc_file with |
| 137 | | Local f |
| 138 | | NonLocal f -> |
| 139 | let s = (Ast_c.s_of_inc_file inc_file) in |
| 140 | let base = Filename.basename s in |
| 141 | |
| 142 | let res = Hashtbl.find_all _hcandidates base in |
| 143 | (match res with |
| 144 | | [file] -> |
| 145 | pr2 ("CPPAST: find header in other dir: " ^ file); |
| 146 | res |
| 147 | | [] -> |
| 148 | [] |
| 149 | | x::y::xs -> res |
| 150 | ) |
| 151 | | Wierd s -> |
| 152 | [] |
| 153 | |
| 154 | |
| 155 | let find_header_file cppopts dirname inc_file = |
| 156 | let res1 = find_header_file1 cppopts dirname inc_file in |
| 157 | match res1 with |
| 158 | | [file] -> res1 |
| 159 | | [] -> find_header_file2 inc_file |
| 160 | | x::y::xs -> res1 |
| 161 | |
| 162 | |
| 163 | |
| 164 | |
| 165 | (* ---------------------------------------------------------------------- *) |
| 166 | let trace_cpp_process depth mark inc_file = |
| 167 | pr2 (spf "%s>%s %s" |
| 168 | (Common.repeat "-" depth +> Common.join "") |
| 169 | mark |
| 170 | (s_of_inc_file_bis inc_file)); |
| 171 | () |
| 172 | |
| 173 | |
| 174 | (* ---------------------------------------------------------------------- *) |
| 175 | let _headers_hash = Hashtbl.create 101 |
| 176 | |
| 177 | (* On freebsd ocaml is trashing, use up to 1.6Go of memory and then |
| 178 | * building the database_c takes ages. |
| 179 | * |
| 180 | * So just limit with following threshold to avoid this trashing, simple. |
| 181 | * |
| 182 | * On netbsd, got a Out_of_memory exn on this file; |
| 183 | * /home/pad/software-os-src2/netbsd/dev/microcode/cyclades-z/ |
| 184 | * even if the cache is small. That's because huge single |
| 185 | * ast element and probably the ast marshalling fail. |
| 186 | *) |
| 187 | let threshold_cache_nb_files = ref 200 |
| 188 | |
| 189 | let parse_c_and_cpp_cache file = |
| 190 | if Hashtbl.length _headers_hash > !threshold_cache_nb_files |
| 191 | then Hashtbl.clear _headers_hash; |
| 192 | |
| 193 | Common.memoized _headers_hash file (fun () -> |
| 194 | Parse_c.parse_c_and_cpp file |
| 195 | ) |
| 196 | |
| 197 | |
| 198 | (* ---------------------------------------------------------------------- *) |
| 199 | let (show_cpp_i_opts: string list -> unit) = fun xs -> |
| 200 | if not (null xs) then begin |
| 201 | pr2 "-I"; |
| 202 | xs +> List.iter pr2 |
| 203 | end |
| 204 | |
| 205 | |
| 206 | let (show_cpp_d_opts: string list -> unit) = fun xs -> |
| 207 | if not (null xs) then begin |
| 208 | pr2 "-D"; |
| 209 | xs +> List.iter pr2 |
| 210 | end |
| 211 | |
| 212 | (*****************************************************************************) |
| 213 | (* Main entry *) |
| 214 | (*****************************************************************************) |
| 215 | |
| 216 | |
| 217 | let (cpp_expand_include2: |
| 218 | ?depth_limit:int option -> |
| 219 | cpp_option list -> Common.dirname -> Ast_c.program -> Ast_c.program) = |
| 220 | fun ?(depth_limit=None) iops dirname ast -> |
| 221 | |
| 222 | pr2_xxxxxxxxxxxxxxxxx(); |
| 223 | let already_included = ref [] in |
| 224 | |
| 225 | let rec aux stack dirname ast = |
| 226 | let depth = List.length stack in |
| 227 | |
| 228 | ast +> Visitor_c.vk_program_s { Visitor_c.default_visitor_c_s with |
| 229 | Visitor_c.kcppdirective_s = (fun (k, bigf) cpp -> |
| 230 | match cpp with |
| 231 | | Include {i_include = (inc_file, ii); |
| 232 | i_rel_pos = h_rel_pos; |
| 233 | i_is_in_ifdef = b; |
| 234 | i_content = copt; |
| 235 | } |
| 236 | -> |
| 237 | (match depth_limit with |
| 238 | | Some limit when depth >= limit -> cpp |
| 239 | | _ -> |
| 240 | |
| 241 | (match find_header_file iops dirname inc_file with |
| 242 | | [file] -> |
| 243 | if List.mem file !already_included |
| 244 | then begin |
| 245 | (* pr2 ("already included: " ^ file); *) |
| 246 | trace_cpp_process depth "*" inc_file; |
| 247 | k cpp |
| 248 | end else begin |
| 249 | trace_cpp_process depth "" inc_file; |
| 250 | Common.push2 file already_included; |
| 251 | (* CONFIG *) |
| 252 | Flag_parsing_c.verbose_parsing := false; |
| 253 | Flag_parsing_c.verbose_lexing := false; |
| 254 | let (ast2, _stat) = parse_c_and_cpp_cache file in |
| 255 | |
| 256 | let ast = Parse_c.program_of_program2 ast2 in |
| 257 | let dirname' = Filename.dirname file in |
| 258 | |
| 259 | (* recurse *) |
| 260 | let ast' = aux (file::stack) dirname' ast in |
| 261 | |
| 262 | Include {i_include = (inc_file, ii); |
| 263 | i_rel_pos = h_rel_pos; |
| 264 | i_is_in_ifdef = b; |
| 265 | i_content = Some (file, ast'); |
| 266 | } |
| 267 | end |
| 268 | | [] -> |
| 269 | trace_cpp_process depth "!!" inc_file; |
| 270 | pr2 "CPPAST: file not found"; |
| 271 | k cpp |
| 272 | | x::y::zs -> |
| 273 | trace_cpp_process depth "!!" inc_file; |
| 274 | pr2 "CPPAST: too much candidates"; |
| 275 | k cpp |
| 276 | ) |
| 277 | ) |
| 278 | | _ -> k cpp |
| 279 | ); |
| 280 | } |
| 281 | in |
| 282 | aux [] dirname ast |
| 283 | |
| 284 | |
| 285 | let cpp_expand_include ?depth_limit a b c = |
| 286 | Common.profile_code "cpp_expand_include" |
| 287 | (fun () -> cpp_expand_include2 ?depth_limit a b c) |
| 288 | |
| 289 | (* |
| 290 | let unparse_showing_include_content ? |
| 291 | *) |
| 292 | |
| 293 | |
| 294 | (*****************************************************************************) |
| 295 | (* Ifdef-statementize *) |
| 296 | (*****************************************************************************) |
| 297 | |
| 298 | |
| 299 | let is_ifdef_and_same_tag tag x = |
| 300 | match x with |
| 301 | | IfdefStmt (IfdefDirective ((_, tag2),_)) -> |
| 302 | tag = tag2 |
| 303 | | StmtElem _ | CppDirectiveStmt _ -> false |
| 304 | | IfdefStmt2 _ -> raise Impossible |
| 305 | |
| 306 | |
| 307 | |
| 308 | (* What if I skipped in the parser only some of the ifdef elements |
| 309 | * of the same tag. Once I passed one, I should pass all of them and so |
| 310 | * at least should detect here that one tag is not "valid". Maybe in the parser |
| 311 | * can return or marked some tags as "partially_passed_ifdef_tag". |
| 312 | * Maybe could do in ast_c a MatchingTag of int * bool ref (* one_was_passed *) |
| 313 | * where the ref will be shared by the ifdefs with the same matching tag |
| 314 | * indice. Or simply count the number of directives with the same tag and |
| 315 | * put this information in the tag. Hence the total_with_this_tag below. |
| 316 | *) |
| 317 | let should_ifdefize tag ifdefs_directives xxs = |
| 318 | let IfdefTag (_tag, total_with_this_tag) = tag in |
| 319 | |
| 320 | if total_with_this_tag <> List.length ifdefs_directives |
| 321 | then begin |
| 322 | pr2 "CPPASTC: can not ifdefize, some of its directives were passed"; |
| 323 | false |
| 324 | end else |
| 325 | (* todo? put more condition ? dont ifdefize declaration ? *) |
| 326 | true |
| 327 | |
| 328 | |
| 329 | |
| 330 | |
| 331 | |
| 332 | (* return a triple, (ifdefs directive * grouped xs * remaining sequencable) |
| 333 | * XXX1 XXX2 elsif YYY1 else ZZZ1 endif WWW1 WWW2 |
| 334 | * => [elsif, else, endif], [XXX1 XXX2; YYY1; ZZZ1], [WWW1 WWW2] |
| 335 | *) |
| 336 | let group_ifdef tag xs = |
| 337 | let (xxs, xs) = group_by_post (is_ifdef_and_same_tag tag) xs in |
| 338 | |
| 339 | xxs +> List.map snd +> List.map (fun x -> |
| 340 | match x with |
| 341 | | IfdefStmt y -> y |
| 342 | | StmtElem _ | CppDirectiveStmt _ | IfdefStmt2 _ -> raise Impossible |
| 343 | ), |
| 344 | xxs +> List.map fst, |
| 345 | xs |
| 346 | |
| 347 | |
| 348 | let rec cpp_ifdef_statementize ast = |
| 349 | Visitor_c.vk_program_s { Visitor_c.default_visitor_c_s with |
| 350 | Visitor_c.kstatementseq_list_s = (fun (k, bigf) xs -> |
| 351 | |
| 352 | let rec aux xs = |
| 353 | match xs with |
| 354 | | [] -> [] |
| 355 | | stseq::xs -> |
| 356 | (match stseq with |
| 357 | | StmtElem st -> |
| 358 | Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs |
| 359 | | CppDirectiveStmt directive -> |
| 360 | Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs |
| 361 | | IfdefStmt ifdef -> |
| 362 | (match ifdef with |
| 363 | | IfdefDirective ((Ifdef,tag),ii) -> |
| 364 | |
| 365 | let (restifdefs, xxs, xs') = group_ifdef tag xs in |
| 366 | if should_ifdefize tag (ifdef::restifdefs) xxs |
| 367 | then |
| 368 | let res = IfdefStmt2 (ifdef::restifdefs, xxs) in |
| 369 | Visitor_c.vk_statement_sequencable_s bigf res::aux xs' |
| 370 | else |
| 371 | Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs |
| 372 | |
| 373 | | IfdefDirective (((IfdefElseif|IfdefElse|IfdefEndif),b),ii) -> |
| 374 | pr2 "wierd: first directive is not a ifdef"; |
| 375 | (* maybe not wierd, just that should_ifdefize |
| 376 | * returned false *) |
| 377 | Visitor_c.vk_statement_sequencable_s bigf stseq::aux xs |
| 378 | ) |
| 379 | |
| 380 | | IfdefStmt2 (ifdef, xxs) -> |
| 381 | failwith "already applied cpp_ifdef_statementize" |
| 382 | ) |
| 383 | in |
| 384 | aux xs |
| 385 | ); |
| 386 | } ast |
| 387 | |
| 388 | |
| 389 | (*****************************************************************************) |
| 390 | (* Macro *) |
| 391 | (*****************************************************************************) |
| 392 | |
| 393 | let (cpp_expand_macro_expr: |
| 394 | Ast_c.define_kind -> Ast_c.argument Ast_c.wrap2 list -> |
| 395 | Ast_c.expression option) = |
| 396 | fun defkind args -> |
| 397 | raise Todo |