| 1 | (* Yoann Padioleau |
| 2 | * |
| 3 | * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. |
| 4 | * Copyright (C) 2007, 2008 Ecole des Mines de Nantes, |
| 5 | * Copyright (C) 2009 University of Urbana Champaign |
| 6 | * |
| 7 | * This program is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU General Public License (GPL) |
| 9 | * version 2 as published by the Free Software Foundation. |
| 10 | * |
| 11 | * This program is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | * file license.txt for more details. |
| 15 | *) |
| 16 | |
| 17 | open Common |
| 18 | |
| 19 | open Ast_c |
| 20 | |
| 21 | module Lib = Lib_parsing_c |
| 22 | |
| 23 | (*****************************************************************************) |
| 24 | (* Prelude *) |
| 25 | (*****************************************************************************) |
| 26 | (* History: |
| 27 | * - Done a first type checker in 2002, cf typing-semantic/, but |
| 28 | * was assuming that have all type info, and so was assuming had called |
| 29 | * cpp and everything was right. |
| 30 | * - Wrote this file, in 2006?, as we added pattern matching on type |
| 31 | * in coccinelle. Partial type annotater. |
| 32 | * - Julia extended it in 2008? to have localvar/notlocalvar and |
| 33 | * test/notest information, again used by coccinelle. |
| 34 | * - I extended it in Fall 2008 to have more type information for the |
| 35 | * global analysis. I also added some optimisations to process |
| 36 | * included code faster. |
| 37 | * |
| 38 | * |
| 39 | * Design choices. Can either do: |
| 40 | * - a kind of inferer |
| 41 | * - can first do a simple inferer, that just pass context |
| 42 | * - then a real inferer, managing partial info. |
| 43 | * type context = fullType option |
| 44 | * |
| 45 | * - extract the information from the .h files |
| 46 | * (so no inference at all needed) |
| 47 | * |
| 48 | * Difference with julia's code in parsing_cocci/type_infer.ml: |
| 49 | * - She handles just the variable namespace. She does not type |
| 50 | * field access or enum or macros. This is because cocci programs are |
| 51 | * usually simple and have no structure definition or macro definitions |
| 52 | * that we need to type anyway. |
| 53 | * - She does more propagation. |
| 54 | * - She does not have to handle the typedef isomorphism which force me |
| 55 | * to use those typedef_fix and type_unfold_one_step |
| 56 | * - She does not handle I think the function pointer C isomorphism. |
| 57 | * |
| 58 | * - She has a cleaner type_cocci without any info. In my case |
| 59 | * I need to do those ugly al_type, or generate fake infos. |
| 60 | * - She has more compact code. Perhaps because she does not have to |
| 61 | * handle the extra exp_info that she added on me :) So I need those |
| 62 | * do_with_type, make_info_xxx, etc. |
| 63 | * |
| 64 | * Note: if need to debug this annotater, use -show_trace_profile, it can |
| 65 | * help. You can also set the typedef_debug flag below. |
| 66 | * |
| 67 | * |
| 68 | * |
| 69 | * todo: expression contain types, and statements, which in turn can contain |
| 70 | * expression, so need recurse. Need define an annote_statement and |
| 71 | * annotate_type. |
| 72 | * |
| 73 | * todo: how deal with typedef isomorphisms ? How store them in Ast_c ? |
| 74 | * store all posible variations in ast_c ? a list of type instead of just |
| 75 | * the type ? |
| 76 | * |
| 77 | * todo: how to handle multiple possible definitions for entities like |
| 78 | * struct or typedefs ? Because of ifdef, we should store list of |
| 79 | * possibilities sometimes. |
| 80 | * |
| 81 | * todo: define a new type ? like type_cocci ? where have a bool ? |
| 82 | * |
| 83 | * semi: How handle scope ? When search for type of field, we return |
| 84 | * a type, but this type makes sense only in a certain scope. |
| 85 | * We could add a tag to each typedef, structUnionName to differentiate |
| 86 | * them and also associate in ast_c to the type the scope |
| 87 | * of this type, the env that were used to define this type. |
| 88 | * |
| 89 | * todo: handle better the search in previous env, the env'. Cf the |
| 90 | * termination problem in typedef_fix when I was searching in the same |
| 91 | * env. |
| 92 | * |
| 93 | *) |
| 94 | |
| 95 | (*****************************************************************************) |
| 96 | (* Wrappers *) |
| 97 | (*****************************************************************************) |
| 98 | let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_type |
| 99 | |
| 100 | (*****************************************************************************) |
| 101 | (* Environment *) |
| 102 | (*****************************************************************************) |
| 103 | |
| 104 | (* The different namespaces from stdC manual: |
| 105 | * |
| 106 | * You introduce two new name spaces with every block that you write. |
| 107 | * |
| 108 | * One name space includes all |
| 109 | * - functions, |
| 110 | * - objects, |
| 111 | * - type definitions, |
| 112 | * - and enumeration constants |
| 113 | * that you declare or define within the block. |
| 114 | * |
| 115 | * The other name space includes all |
| 116 | * - enumeration, |
| 117 | * - structure, |
| 118 | * - and union |
| 119 | * *tags* that you define within the block. |
| 120 | * |
| 121 | * You introduce a new member name space with every structure or union |
| 122 | * whose content you define. You identify a member name space by the |
| 123 | * type of left operand that you write for a member selection |
| 124 | * operator, as in x.y or p->y. A member name space ends with the end |
| 125 | * of the block in which you declare it. |
| 126 | * |
| 127 | * You introduce a new goto label name space with every function |
| 128 | * definition you write. Each goto label name space ends with its |
| 129 | * function definition. |
| 130 | *) |
| 131 | |
| 132 | (* But I don't try to do a type-checker, I try to "resolve" type of var |
| 133 | * so don't need make difference between namespaces here. |
| 134 | * |
| 135 | * But, why not make simply a (string, kindstring) assoc ? |
| 136 | * Because we dont want that a variable shadow a struct definition, because |
| 137 | * they are still in 2 different namespace. But could for typedef, |
| 138 | * because VarOrFunc and Typedef are in the same namespace. |
| 139 | * But could do a record as in c_info.ml |
| 140 | *) |
| 141 | |
| 142 | |
| 143 | (* This type contains all "ident" like notion of C. Each time in Ast_c |
| 144 | * you have a string type (as in expression, function name, fields) |
| 145 | * then you need to manage the scope of this ident. |
| 146 | * |
| 147 | * The wrap for StructUnionNameDef contain the whole ii, the i for |
| 148 | * the string, the structUnion and the structType. |
| 149 | * |
| 150 | * Put Macro here ? after all the scoping rules for cpp macros is different |
| 151 | * and so does not vanish after the closing '}'. |
| 152 | * |
| 153 | * todo: EnumDef |
| 154 | *) |
| 155 | type namedef = |
| 156 | | VarOrFunc of string * Ast_c.exp_type |
| 157 | | EnumConstant of string * string option |
| 158 | |
| 159 | (* also used for macro type aliases *) |
| 160 | | TypeDef of string * fullType |
| 161 | (* the structType contains nested "idents" with struct scope *) |
| 162 | | StructUnionNameDef of string * (structUnion * structType) wrap |
| 163 | |
| 164 | (* cppext: *) |
| 165 | | Macro of string * (define_kind * define_val) |
| 166 | |
| 167 | let print_scoped_env e = |
| 168 | List.iter |
| 169 | (function e -> |
| 170 | List.iter |
| 171 | (function |
| 172 | VarOrFunc(s,_) -> Printf.printf "%s " s |
| 173 | | EnumConstant(s,_) -> Printf.printf "%s " s |
| 174 | | TypeDef(s,t) -> Printf.printf "%s" s |
| 175 | | StructUnionNameDef(s,_) -> Printf.printf "%s " s |
| 176 | | Macro(s,_) -> Printf.printf "%s " s) |
| 177 | e; |
| 178 | Printf.printf "\n") |
| 179 | e |
| 180 | |
| 181 | (* Because have nested scope, have nested list, hence the list list. |
| 182 | * |
| 183 | * opti? use a hash to accelerate ? hmm but may have some problems |
| 184 | * with hash to handle recursive lookup. For instance for the typedef |
| 185 | * example where have mutually recursive definition of the type, |
| 186 | * we must take care to not loop by starting the second search |
| 187 | * from the previous environment. With the list scheme in |
| 188 | * lookup_env below it's quite easy to do. With hash it may be |
| 189 | * more complicated. |
| 190 | *) |
| 191 | type environment = namedef list list |
| 192 | |
| 193 | |
| 194 | (* ------------------------------------------------------------ *) |
| 195 | (* can be modified by the init_env function below, by |
| 196 | * the file environment_unix.h |
| 197 | *) |
| 198 | let initial_env = ref [ |
| 199 | [VarOrFunc("NULL", |
| 200 | (Lib.al_type (Parse_c.type_of_string "void *"), |
| 201 | Ast_c.NotLocalVar)); |
| 202 | |
| 203 | (* |
| 204 | VarOrFunc("malloc", |
| 205 | (Lib.al_type(Parse_c.type_of_string "void* ( * )(int size)"), |
| 206 | Ast_c.NotLocalVar)); |
| 207 | VarOrFunc("free", |
| 208 | (Lib.al_type(Parse_c.type_of_string "void ( * )(void *ptr)"), |
| 209 | Ast_c.NotLocalVar)); |
| 210 | *) |
| 211 | ] |
| 212 | ] |
| 213 | |
| 214 | |
| 215 | let typedef_debug = ref false |
| 216 | |
| 217 | |
| 218 | (* ------------------------------------------------------------ *) |
| 219 | (* generic, lookup and also return remaining env for further lookup *) |
| 220 | let rec lookup_env2 f env = |
| 221 | match env with |
| 222 | | [] -> raise Not_found |
| 223 | | []::zs -> lookup_env2 f zs |
| 224 | | (x::xs)::zs -> |
| 225 | (match f x with |
| 226 | | None -> lookup_env2 f (xs::zs) |
| 227 | | Some y -> y, xs::zs |
| 228 | ) |
| 229 | let lookup_env a b = |
| 230 | Common.profile_code "TAC.lookup_env" (fun () -> lookup_env2 a b) |
| 231 | |
| 232 | |
| 233 | |
| 234 | let member_env lookupf env = |
| 235 | try |
| 236 | let _ = lookupf env in |
| 237 | true |
| 238 | with Not_found -> false |
| 239 | |
| 240 | |
| 241 | |
| 242 | |
| 243 | (* ------------------------------------------------------------ *) |
| 244 | |
| 245 | |
| 246 | let lookup_var s env = |
| 247 | let f = function |
| 248 | | VarOrFunc (s2, typ) -> if s2 =$= s then Some typ else None |
| 249 | | _ -> None |
| 250 | in |
| 251 | lookup_env f env |
| 252 | |
| 253 | let lookup_typedef s env = |
| 254 | if !typedef_debug then pr2 ("looking for: " ^ s); |
| 255 | let f = function |
| 256 | | TypeDef (s2, typ) -> if s2 =$= s then Some typ else None |
| 257 | | _ -> None |
| 258 | in |
| 259 | lookup_env f env |
| 260 | |
| 261 | let lookup_structunion (_su, s) env = |
| 262 | let f = function |
| 263 | | StructUnionNameDef (s2, typ) -> if s2 =$= s then Some typ else None |
| 264 | | _ -> None |
| 265 | in |
| 266 | lookup_env f env |
| 267 | |
| 268 | let lookup_macro s env = |
| 269 | let f = function |
| 270 | | Macro (s2, typ) -> if s2 =$= s then Some typ else None |
| 271 | | _ -> None |
| 272 | in |
| 273 | lookup_env f env |
| 274 | |
| 275 | let lookup_enum s env = |
| 276 | let f = function |
| 277 | | EnumConstant (s2, typ) -> if s2 =$= s then Some typ else None |
| 278 | | _ -> None |
| 279 | in |
| 280 | lookup_env f env |
| 281 | |
| 282 | |
| 283 | let lookup_typedef a b = |
| 284 | Common.profile_code "TAC.lookup_typedef" (fun () -> lookup_typedef a b) |
| 285 | |
| 286 | |
| 287 | |
| 288 | (*****************************************************************************) |
| 289 | (* "type-lookup" *) |
| 290 | (*****************************************************************************) |
| 291 | |
| 292 | (* find_final_type is used to know to what type a field correspond in |
| 293 | * x.foo. Sometimes the type of x is a typedef or a structName in which |
| 294 | * case we must look in environment to find the complete type, here |
| 295 | * structUnion that contains the information. |
| 296 | * |
| 297 | * Because in C one can redefine in nested blocks some typedefs, |
| 298 | * struct, or variables, we have a static scoping resolving process. |
| 299 | * So, when we look for the type of a var, if this var is in an |
| 300 | * enclosing block, then maybe its type refer to a typdef of this |
| 301 | * enclosing block, so must restart the "type-resolving" of this |
| 302 | * typedef from this enclosing block, not from the bottom. So our |
| 303 | * "resolving-type functions" take an env and also return an env from |
| 304 | * where the next search must be performed. *) |
| 305 | |
| 306 | (* |
| 307 | let rec find_final_type ty env = |
| 308 | |
| 309 | match Ast_c.unwrap_typeC ty with |
| 310 | | BaseType x -> (BaseType x) +> Ast_c.rewrap_typeC ty |
| 311 | |
| 312 | | Pointer t -> (Pointer (find_final_type t env)) +> Ast_c.rewrap_typeC ty |
| 313 | | Array (e, t) -> Array (e, find_final_type t env) +> Ast_c.rewrap_typeC ty |
| 314 | |
| 315 | | StructUnion (sopt, su) -> StructUnion (sopt, su) +> Ast_c.rewrap_typeC ty |
| 316 | |
| 317 | | FunctionType t -> (FunctionType t) (* todo ? *) +> Ast_c.rewrap_typeC ty |
| 318 | | Enum (s, enumt) -> (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty |
| 319 | | EnumName s -> (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty |
| 320 | |
| 321 | | StructUnionName (su, s) -> |
| 322 | (try |
| 323 | let ((structtyp,ii), env') = lookup_structunion (su, s) env in |
| 324 | Ast_c.nQ, (StructUnion (Some s, structtyp), ii) |
| 325 | (* old: +> Ast_c.rewrap_typeC ty |
| 326 | * but must wrap with good ii, otherwise pretty_print_c |
| 327 | * will be lost and raise some Impossible |
| 328 | *) |
| 329 | with Not_found -> |
| 330 | ty |
| 331 | ) |
| 332 | |
| 333 | | TypeName s -> |
| 334 | (try |
| 335 | let (t', env') = lookup_typedef s env in |
| 336 | find_final_type t' env' |
| 337 | with Not_found -> |
| 338 | ty |
| 339 | ) |
| 340 | |
| 341 | | ParenType t -> find_final_type t env |
| 342 | | Typeof e -> failwith "typeof" |
| 343 | *) |
| 344 | |
| 345 | |
| 346 | |
| 347 | |
| 348 | (* ------------------------------------------------------------ *) |
| 349 | let rec type_unfold_one_step ty env = |
| 350 | let rec loop seen ty env = |
| 351 | |
| 352 | match Ast_c.unwrap_typeC ty with |
| 353 | | NoType -> ty |
| 354 | | BaseType x -> ty |
| 355 | | Pointer t -> ty |
| 356 | | Array (e, t) -> ty |
| 357 | |
| 358 | | StructUnion (sopt, su, fields) -> ty |
| 359 | |
| 360 | | FunctionType t -> ty |
| 361 | | Enum (s, enumt) -> ty |
| 362 | |
| 363 | | EnumName s -> ty (* todo: look in env when will have EnumDef *) |
| 364 | |
| 365 | | StructUnionName (su, s) -> |
| 366 | (try |
| 367 | let (((su,fields),ii), env') = lookup_structunion (su, s) env in |
| 368 | Ast_c.mk_ty (StructUnion (su, Some s, fields)) ii |
| 369 | (* old: +> Ast_c.rewrap_typeC ty |
| 370 | * but must wrap with good ii, otherwise pretty_print_c |
| 371 | * will be lost and raise some Impossible |
| 372 | *) |
| 373 | with Not_found -> |
| 374 | ty |
| 375 | ) |
| 376 | |
| 377 | | TypeName (name, _typ) -> |
| 378 | let s = Ast_c.str_of_name name in |
| 379 | (try |
| 380 | if !typedef_debug then pr2 "type_unfold_one_step: lookup_typedef"; |
| 381 | let (t', env') = lookup_typedef s env in |
| 382 | if List.mem s seen (* avoid pb with recursive typedefs *) |
| 383 | then type_unfold_one_step t' env' |
| 384 | else loop (s::seen) t' env |
| 385 | with Not_found -> |
| 386 | ty |
| 387 | ) |
| 388 | |
| 389 | | ParenType t -> type_unfold_one_step t env |
| 390 | | TypeOfExpr e -> |
| 391 | pr2_once ("Type_annoter: not handling typeof"); |
| 392 | ty |
| 393 | | TypeOfType t -> type_unfold_one_step t env in |
| 394 | loop [] ty env |
| 395 | |
| 396 | |
| 397 | |
| 398 | |
| 399 | |
| 400 | |
| 401 | |
| 402 | |
| 403 | |
| 404 | (* normalizer. can be seen as the opposite of the previous function as |
| 405 | * we "fold" at least for the structUnion. Should return something that |
| 406 | * Type_c.is_completed_fullType likes, something that makes it easier |
| 407 | * for the programmer to work on, that has all the needed information |
| 408 | * for most tasks. |
| 409 | *) |
| 410 | let rec typedef_fix ty env = |
| 411 | let rec loop seen ty env = |
| 412 | match Ast_c.unwrap_typeC ty with |
| 413 | | NoType -> |
| 414 | ty |
| 415 | | BaseType x -> |
| 416 | ty |
| 417 | | Pointer t -> |
| 418 | Pointer (typedef_fix t env) +> Ast_c.rewrap_typeC ty |
| 419 | | Array (e, t) -> |
| 420 | Array (e, typedef_fix t env) +> Ast_c.rewrap_typeC ty |
| 421 | | StructUnion (su, sopt, fields) -> |
| 422 | (* normalize, fold. |
| 423 | * todo? but what if correspond to a nested struct def ? |
| 424 | *) |
| 425 | Type_c.structdef_to_struct_name ty |
| 426 | | FunctionType ft -> |
| 427 | (FunctionType ft) (* todo ? *) +> Ast_c.rewrap_typeC ty |
| 428 | | Enum (s, enumt) -> |
| 429 | (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty |
| 430 | | EnumName s -> |
| 431 | (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty |
| 432 | |
| 433 | (* we prefer StructUnionName to StructUnion when it comes to typed metavar *) |
| 434 | | StructUnionName (su, s) -> |
| 435 | ty |
| 436 | |
| 437 | (* keep the typename but complete with more information *) |
| 438 | | TypeName (name, typ) -> |
| 439 | let s = Ast_c.str_of_name name in |
| 440 | (match typ with |
| 441 | | Some _ -> |
| 442 | pr2 ("typedef value already there:" ^ s); |
| 443 | ty |
| 444 | | None -> |
| 445 | (try |
| 446 | if !typedef_debug then pr2 "typedef_fix: lookup_typedef"; |
| 447 | let (t', env') = lookup_typedef s env in |
| 448 | |
| 449 | (* bugfix: termination bug if use env instead of env' below, because |
| 450 | * can have some weird mutually recursive typedef which |
| 451 | * each new type alias search for its mutual def. |
| 452 | * seen is an attempt to do better. |
| 453 | *) |
| 454 | let fixed = |
| 455 | if List.mem s seen |
| 456 | then loop (s::seen) t' env |
| 457 | else typedef_fix t' env' in |
| 458 | TypeName (name, Some fixed) +> |
| 459 | Ast_c.rewrap_typeC ty |
| 460 | with Not_found -> |
| 461 | ty)) |
| 462 | |
| 463 | (* remove paren for better matching with typed metavar. kind of iso again *) |
| 464 | | ParenType t -> |
| 465 | typedef_fix t env |
| 466 | | TypeOfExpr e -> |
| 467 | pr2_once ("Type_annoter: not handling typeof"); |
| 468 | ty |
| 469 | |
| 470 | | TypeOfType t -> |
| 471 | typedef_fix t env in |
| 472 | loop [] ty env |
| 473 | |
| 474 | |
| 475 | (*****************************************************************************) |
| 476 | (* Helpers, part 1 *) |
| 477 | (*****************************************************************************) |
| 478 | |
| 479 | let type_of_s2 s = |
| 480 | (Lib.al_type (Parse_c.type_of_string s)) |
| 481 | let type_of_s a = |
| 482 | Common.profile_code "Type_c.type_of_s" (fun () -> type_of_s2 a) |
| 483 | |
| 484 | |
| 485 | (* pad: pb on: |
| 486 | * /home/pad/software-os-src2/freebsd/contrib/ipfilter/netinet/ip_fil_freebsd.c |
| 487 | * because in the code there is: |
| 488 | * static iss_seq_off = 0; |
| 489 | * which in the parser was generating a default int without a parse_info. |
| 490 | * I now add a fake parse_info for such default int so no more failwith |
| 491 | * normally. |
| 492 | *) |
| 493 | |
| 494 | let rec is_simple_expr expr = |
| 495 | match Ast_c.unwrap_expr expr with |
| 496 | (* todo? handle more special cases ? *) |
| 497 | |
| 498 | | Ident _ -> |
| 499 | true |
| 500 | | Constant (_) -> |
| 501 | true |
| 502 | | Unary (op, e) -> |
| 503 | true |
| 504 | | Binary (e1, op, e2) -> |
| 505 | true |
| 506 | | Cast (t, e) -> |
| 507 | true |
| 508 | | ParenExpr (e) -> is_simple_expr e |
| 509 | |
| 510 | | _ -> false |
| 511 | |
| 512 | (*****************************************************************************) |
| 513 | (* Typing rules *) |
| 514 | (*****************************************************************************) |
| 515 | (* now in type_c.ml *) |
| 516 | |
| 517 | |
| 518 | |
| 519 | (*****************************************************************************) |
| 520 | (* (Semi) Globals, Julia's style *) |
| 521 | (*****************************************************************************) |
| 522 | |
| 523 | (* opti: cache ? use hash ? *) |
| 524 | let _scoped_env = ref !initial_env |
| 525 | |
| 526 | (* memoise unnanoted var, to avoid too much warning messages *) |
| 527 | let _notyped_var = ref (Hashtbl.create 100) |
| 528 | |
| 529 | let new_scope() = _scoped_env := []::!_scoped_env |
| 530 | let del_scope() = _scoped_env := List.tl !_scoped_env |
| 531 | |
| 532 | let do_in_new_scope f = |
| 533 | begin |
| 534 | new_scope(); |
| 535 | let res = f() in |
| 536 | del_scope(); |
| 537 | res |
| 538 | end |
| 539 | |
| 540 | let add_in_scope namedef = |
| 541 | let (current, older) = Common.uncons !_scoped_env in |
| 542 | _scoped_env := (namedef::current)::older |
| 543 | |
| 544 | (* ------------------------------------------------------------ *) |
| 545 | |
| 546 | (* sort of hackish... *) |
| 547 | let islocal info = |
| 548 | if List.length (!_scoped_env) =|= List.length !initial_env |
| 549 | then Ast_c.NotLocalVar |
| 550 | else Ast_c.LocalVar info |
| 551 | |
| 552 | (* ------------------------------------------------------------ *) |
| 553 | (* the warning argument is here to allow some binding to overwrite an |
| 554 | * existing one. With function, we first have the prototype and then the def, |
| 555 | * and the def binding with the same string is not an error. |
| 556 | * |
| 557 | * todo?: but if we define two times the same function, then we will not |
| 558 | * detect it :( it would require to make a diff between adding a binding |
| 559 | * from a prototype and from a definition. |
| 560 | * |
| 561 | * opti: disabling the check_annotater flag have some important |
| 562 | * performance benefit. |
| 563 | * |
| 564 | *) |
| 565 | let add_binding2 namedef warning = |
| 566 | let (current_scope, _older_scope) = Common.uncons !_scoped_env in |
| 567 | |
| 568 | if !Flag_parsing_c.check_annotater then begin |
| 569 | (match namedef with |
| 570 | | VarOrFunc (s, typ) -> |
| 571 | if Hashtbl.mem !_notyped_var s |
| 572 | then pr2 ("warning: found typing information for a variable that was" ^ |
| 573 | "previously unknown:" ^ s); |
| 574 | | _ -> () |
| 575 | ); |
| 576 | |
| 577 | let (memberf, s) = |
| 578 | (match namedef with |
| 579 | | VarOrFunc (s, typ) -> |
| 580 | member_env (lookup_var s), s |
| 581 | | TypeDef (s, typ) -> |
| 582 | member_env (lookup_typedef s), s |
| 583 | | StructUnionNameDef (s, (su, typ)) -> |
| 584 | member_env (lookup_structunion (su, s)), s |
| 585 | | Macro (s, body) -> |
| 586 | member_env (lookup_macro s), s |
| 587 | | EnumConstant (s, body) -> |
| 588 | member_env (lookup_enum s), s |
| 589 | ) in |
| 590 | |
| 591 | if memberf [current_scope] && warning |
| 592 | then pr2 ("Type_annoter: warning, " ^ s ^ |
| 593 | " is already in current binding" ^ "\n" ^ |
| 594 | " so there is a weird shadowing"); |
| 595 | end; |
| 596 | add_in_scope namedef |
| 597 | |
| 598 | let add_binding namedef warning = |
| 599 | Common.profile_code "TAC.add_binding" (fun () -> add_binding2 namedef warning) |
| 600 | |
| 601 | |
| 602 | |
| 603 | (*****************************************************************************) |
| 604 | (* Helpers, part 2 *) |
| 605 | (*****************************************************************************) |
| 606 | |
| 607 | let lookup_opt_env lookupf s = |
| 608 | Common.optionise (fun () -> |
| 609 | lookupf s !_scoped_env |
| 610 | ) |
| 611 | |
| 612 | let unwrap_unfold_env2 typ = |
| 613 | Ast_c.unwrap_typeC |
| 614 | (type_unfold_one_step typ !_scoped_env) |
| 615 | let unwrap_unfold_env typ = |
| 616 | Common.profile_code "TAC.unwrap_unfold_env" (fun () -> unwrap_unfold_env2 typ) |
| 617 | |
| 618 | let typedef_fix a b = |
| 619 | Common.profile_code "TAC.typedef_fix" (fun () -> typedef_fix a b) |
| 620 | |
| 621 | let make_info_def_fix x = |
| 622 | Type_c.make_info_def (typedef_fix x !_scoped_env) |
| 623 | |
| 624 | let make_info_fix (typ, local) = |
| 625 | Type_c.make_info ((typedef_fix typ !_scoped_env),local) |
| 626 | |
| 627 | |
| 628 | let make_info_def = Type_c.make_info_def |
| 629 | |
| 630 | (*****************************************************************************) |
| 631 | (* Main typer code, put later in a visitor *) |
| 632 | (*****************************************************************************) |
| 633 | |
| 634 | let annotater_expr_visitor_subpart = (fun (k,bigf) expr -> |
| 635 | |
| 636 | let ty = |
| 637 | match Ast_c.unwrap_expr expr with |
| 638 | |
| 639 | (* -------------------------------------------------- *) |
| 640 | (* todo: should analyse the 's' for int to know if unsigned or not *) |
| 641 | | Constant (String (s,kind)) -> make_info_def (type_of_s "char []") |
| 642 | | Constant MultiString _ -> make_info_def (type_of_s "char []") |
| 643 | | Constant (Char (s,kind)) -> make_info_def (type_of_s "char") |
| 644 | | Constant (Int (s,kind)) -> |
| 645 | (* this seems really unpleasant, but perhaps the type needs to be set |
| 646 | up in some way that allows pretty printing *) |
| 647 | make_info_def |
| 648 | (match kind with |
| 649 | (* matches limited by what is generated in lexer_c.mll *) |
| 650 | Si(Signed,CInt) -> type_of_s "int" |
| 651 | | Si(UnSigned,CInt) -> type_of_s "unsigned int" |
| 652 | | Si(Signed,CLong) -> type_of_s "long" |
| 653 | | Si(UnSigned,CLong) -> type_of_s "unsigned long" |
| 654 | | Si(Signed,CLongLong) -> type_of_s "long long" |
| 655 | | Si(UnSigned,CLongLong) -> type_of_s "unsigned long long" |
| 656 | | _ -> failwith "unexpected kind for constant") |
| 657 | | Constant (Float (s,kind)) -> |
| 658 | let fake = Ast_c.fakeInfo (Common.fake_parse_info) in |
| 659 | let fake = Ast_c.rewrap_str "float" fake in |
| 660 | let iinull = [fake] in |
| 661 | make_info_def (Ast_c.mk_ty (BaseType (FloatType kind)) iinull) |
| 662 | |
| 663 | |
| 664 | (* -------------------------------------------------- *) |
| 665 | (* note: could factorize this code with the code for Ident |
| 666 | * and the other code for Funcall below. But as the Ident can be |
| 667 | * a macro-func, I prefer to handle it separately. So |
| 668 | * this rule can handle the macro-func, the Ident-rule can handle |
| 669 | * the macro-var, and the other FunCall-rule the regular |
| 670 | * function calls through fields. |
| 671 | * Also as I don't want a warning on the Ident that are a FunCall, |
| 672 | * easier to have a rule separate from the Ident rule. |
| 673 | *) |
| 674 | | FunCall (e1, args) -> |
| 675 | (match Ast_c.unwrap_expr e1 with |
| 676 | | Ident (ident) -> |
| 677 | (* recurse *) |
| 678 | args +> List.iter (fun (e,ii) -> |
| 679 | (* could typecheck if arguments agree with prototype *) |
| 680 | Visitor_c.vk_argument bigf e |
| 681 | ); |
| 682 | let s = Ast_c.str_of_name ident in |
| 683 | (match lookup_opt_env lookup_var s with |
| 684 | | Some ((typ,local),_nextenv) -> |
| 685 | |
| 686 | (* set type for ident *) |
| 687 | let tyinfo = make_info_fix (typ, local) in |
| 688 | Ast_c.set_type_expr e1 tyinfo; |
| 689 | |
| 690 | (match unwrap_unfold_env typ with |
| 691 | | FunctionType (ret, params) -> make_info_def ret |
| 692 | |
| 693 | (* can be function pointer, C have an iso for that, |
| 694 | * same pfn() syntax than regular function call. |
| 695 | *) |
| 696 | | Pointer (typ2) -> |
| 697 | (match unwrap_unfold_env typ2 with |
| 698 | | FunctionType (ret, params) -> make_info_def ret |
| 699 | | _ -> Type_c.noTypeHere |
| 700 | ) |
| 701 | | _ -> Type_c.noTypeHere |
| 702 | ) |
| 703 | | None -> |
| 704 | |
| 705 | (match lookup_opt_env lookup_macro s with |
| 706 | | Some ((defkind, defval), _nextenv) -> |
| 707 | (match defkind, defval with |
| 708 | | DefineFunc _, DefineExpr e -> |
| 709 | let rettype = Ast_c.get_onlytype_expr e in |
| 710 | |
| 711 | (* todo: could also set type for ident ? |
| 712 | have return type and at least type of concrete |
| 713 | parameters so can generate a fake FunctionType |
| 714 | *) |
| 715 | let macrotype_opt = |
| 716 | Type_c.fake_function_type rettype args |
| 717 | in |
| 718 | |
| 719 | macrotype_opt +> Common.do_option (fun t -> |
| 720 | pr2 ("Type_annotater: generate fake function type" ^ |
| 721 | "for macro: " ^ s); |
| 722 | let tyinfo = make_info_def_fix t in |
| 723 | Ast_c.set_type_expr e1 tyinfo; |
| 724 | ); |
| 725 | |
| 726 | Ast_c.get_type_expr e |
| 727 | | DefineVar, _ -> |
| 728 | pr2 ("Type_annoter: not a macro-func: " ^ s); |
| 729 | Type_c.noTypeHere |
| 730 | | Undef, _ -> |
| 731 | pr2 ("Type_annoter: not a macro-func: " ^ s); |
| 732 | Type_c.noTypeHere |
| 733 | | DefineFunc _, _ -> |
| 734 | (* normally the FunCall case should have caught it *) |
| 735 | pr2 ("Type_annoter: not a macro-func-expr: " ^ s); |
| 736 | Type_c.noTypeHere |
| 737 | ) |
| 738 | | None -> |
| 739 | pr2_once ("type_annotater: no type for function ident: " ^ s); |
| 740 | Type_c.noTypeHere |
| 741 | ) |
| 742 | ) |
| 743 | |
| 744 | |
| 745 | | _e -> |
| 746 | k expr; |
| 747 | |
| 748 | (Ast_c.get_type_expr e1) +> Type_c.do_with_type (fun typ -> |
| 749 | (* copy paste of above *) |
| 750 | (match unwrap_unfold_env typ with |
| 751 | | FunctionType (ret, params) -> make_info_def ret |
| 752 | | Pointer (typ) -> |
| 753 | (match unwrap_unfold_env typ with |
| 754 | | FunctionType (ret, params) -> make_info_def ret |
| 755 | | _ -> Type_c.noTypeHere |
| 756 | ) |
| 757 | | _ -> Type_c.noTypeHere |
| 758 | ) |
| 759 | ) |
| 760 | ) |
| 761 | |
| 762 | |
| 763 | (* -------------------------------------------------- *) |
| 764 | | Ident (ident) -> |
| 765 | let s = Ast_c.str_of_name ident in |
| 766 | (match lookup_opt_env lookup_var s with |
| 767 | | Some ((typ,local),_nextenv) -> make_info_fix (typ,local) |
| 768 | | None -> |
| 769 | (match lookup_opt_env lookup_macro s with |
| 770 | | Some ((defkind, defval), _nextenv) -> |
| 771 | (match defkind, defval with |
| 772 | | DefineVar, DefineExpr e -> |
| 773 | Ast_c.get_type_expr e |
| 774 | | DefineVar, _ -> |
| 775 | pr2 ("Type_annoter: not a expression: " ^ s); |
| 776 | Type_c.noTypeHere |
| 777 | | DefineFunc _, _ -> |
| 778 | (* normally the FunCall case should have catch it *) |
| 779 | pr2 ("Type_annoter: not a macro-var: " ^ s); |
| 780 | Type_c.noTypeHere |
| 781 | | Undef, _ -> |
| 782 | pr2 ("Type_annoter: not a expression: " ^ s); |
| 783 | Type_c.noTypeHere |
| 784 | ) |
| 785 | | None -> |
| 786 | (match lookup_opt_env lookup_enum s with |
| 787 | | Some (_, _nextenv) -> |
| 788 | make_info_def (type_of_s "int") |
| 789 | | None -> |
| 790 | if not (s =~ "[A-Z_]+") (* if macro then no warning *) |
| 791 | then |
| 792 | if !Flag_parsing_c.check_annotater then |
| 793 | if not (Hashtbl.mem !_notyped_var s) |
| 794 | then begin |
| 795 | pr2 ("Type_annoter: no type found for: " ^ s); |
| 796 | Hashtbl.add !_notyped_var s true; |
| 797 | end |
| 798 | else () |
| 799 | else |
| 800 | pr2 ("Type_annoter: no type found for: " ^ s) |
| 801 | ; |
| 802 | Type_c.noTypeHere |
| 803 | ) |
| 804 | ) |
| 805 | ) |
| 806 | |
| 807 | (* -------------------------------------------------- *) |
| 808 | (* C isomorphism on type on array and pointers *) |
| 809 | | Unary (e, DeRef) |
| 810 | | ArrayAccess (e, _) -> |
| 811 | k expr; (* recurse to set the types-ref of sub expressions *) |
| 812 | |
| 813 | (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t -> |
| 814 | (* todo: maybe not good env !! *) |
| 815 | match unwrap_unfold_env t with |
| 816 | | Pointer x |
| 817 | | Array (_, x) -> |
| 818 | make_info_def_fix x |
| 819 | | _ -> Type_c.noTypeHere |
| 820 | |
| 821 | ) |
| 822 | |
| 823 | | Unary (e, GetRef) -> |
| 824 | k expr; (* recurse to set the types-ref of sub expressions *) |
| 825 | |
| 826 | (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t -> |
| 827 | (* must generate an element so that '=' can be used |
| 828 | * to compare type ? |
| 829 | *) |
| 830 | let fake = Ast_c.fakeInfo Common.fake_parse_info in |
| 831 | let fake = Ast_c.rewrap_str "*" fake in |
| 832 | |
| 833 | let ft = Ast_c.mk_ty (Pointer t) [fake] in |
| 834 | make_info_def_fix ft |
| 835 | ) |
| 836 | |
| 837 | (* -------------------------------------------------- *) |
| 838 | (* fields *) |
| 839 | | RecordAccess (e, namefld) |
| 840 | | RecordPtAccess (e, namefld) as x -> |
| 841 | let fld = Ast_c.str_of_name namefld in |
| 842 | |
| 843 | k expr; (* recurse to set the types-ref of sub expressions *) |
| 844 | |
| 845 | (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t -> |
| 846 | |
| 847 | let topt = |
| 848 | match x with |
| 849 | | RecordAccess _ -> Some t |
| 850 | | RecordPtAccess _ -> |
| 851 | (match unwrap_unfold_env t with |
| 852 | | Pointer (t) -> Some t |
| 853 | | _ -> None |
| 854 | ) |
| 855 | | _ -> raise (Impossible 159) |
| 856 | |
| 857 | in |
| 858 | (match topt with |
| 859 | | None -> Type_c.noTypeHere |
| 860 | | Some t -> |
| 861 | match unwrap_unfold_env t with |
| 862 | | StructUnion (su, sopt, fields) -> |
| 863 | (try |
| 864 | (* todo: which env ? *) |
| 865 | make_info_def_fix |
| 866 | (Type_c.type_field fld (su, fields)) |
| 867 | with |
| 868 | | Not_found -> |
| 869 | pr2 (spf |
| 870 | "TYPE-ERROR: field '%s' does not belong in struct %s" |
| 871 | fld (match sopt with Some s -> s |_ -> "<anon>")); |
| 872 | Type_c.noTypeHere |
| 873 | | Multi_found -> |
| 874 | pr2 "TAC:MultiFound"; |
| 875 | Type_c.noTypeHere |
| 876 | ) |
| 877 | | _ -> Type_c.noTypeHere |
| 878 | ) |
| 879 | ) |
| 880 | |
| 881 | |
| 882 | |
| 883 | (* -------------------------------------------------- *) |
| 884 | | Cast (t, e) -> |
| 885 | k expr; |
| 886 | (* todo: if infer, can "push" info ? add_types_expr [t] e ? *) |
| 887 | make_info_def_fix (Lib.al_type t) |
| 888 | |
| 889 | (* todo? lub, hmm maybe not, cos type must be e1 *) |
| 890 | | Assignment (e1, op, e2) -> |
| 891 | k expr; |
| 892 | (* value of an assignment is the value of the RHS expression, but its |
| 893 | type is the type of the lhs expression. Use the rhs exp if no |
| 894 | information is available *) |
| 895 | (match Ast_c.get_type_expr e1 with |
| 896 | (None,_) -> Ast_c.get_type_expr e2 |
| 897 | | (Some ty,t) -> (Some ty,t)) |
| 898 | | Sequence (e1, e2) -> |
| 899 | k expr; |
| 900 | Ast_c.get_type_expr e2 |
| 901 | |
| 902 | | Binary (e1, Logical _, e2) -> |
| 903 | k expr; |
| 904 | make_info_def (type_of_s "int") |
| 905 | |
| 906 | (* todo: lub *) |
| 907 | | Binary (e1, Arith op, e2) -> |
| 908 | k expr; |
| 909 | Type_c.lub op (Type_c.get_opt_type e1) (Type_c.get_opt_type e2) |
| 910 | |
| 911 | | CondExpr (cond, e1opt, e2) -> |
| 912 | k expr; |
| 913 | Ast_c.get_type_expr e2 |
| 914 | |
| 915 | |
| 916 | | ParenExpr e -> |
| 917 | k expr; |
| 918 | Ast_c.get_type_expr e |
| 919 | |
| 920 | | Infix (e, op) | Postfix (e, op) -> |
| 921 | k expr; |
| 922 | Ast_c.get_type_expr e |
| 923 | |
| 924 | (* pad: julia wrote this ? *) |
| 925 | | Unary (e, UnPlus) -> |
| 926 | k expr; (* recurse to set the types-ref of sub expressions *) |
| 927 | make_info_def (type_of_s "int") |
| 928 | (* todo? can convert from unsigned to signed if UnMinus ? *) |
| 929 | | Unary (e, UnMinus) -> |
| 930 | k expr; (* recurse to set the types-ref of sub expressions *) |
| 931 | make_info_def (type_of_s "int") |
| 932 | |
| 933 | | SizeOfType _|SizeOfExpr _ -> |
| 934 | k expr; (* recurse to set the types-ref of sub expressions *) |
| 935 | make_info_def (type_of_s "size_t") |
| 936 | |
| 937 | | Constructor (ft, ini) -> |
| 938 | k expr; (* recurse to set the types-ref of sub expressions *) |
| 939 | make_info_def (Lib.al_type ft) |
| 940 | |
| 941 | | Unary (e, Not) -> |
| 942 | k expr; (* recurse to set the types-ref of sub expressions *) |
| 943 | (* the result of ! is always 0 or 1, not the argument type *) |
| 944 | make_info_def (type_of_s "int") |
| 945 | | Unary (e, Tilde) -> |
| 946 | k expr; (* recurse to set the types-ref of sub expressions *) |
| 947 | Ast_c.get_type_expr e |
| 948 | |
| 949 | (* -------------------------------------------------- *) |
| 950 | (* todo *) |
| 951 | | Unary (_, GetRefLabel) -> |
| 952 | k expr; (* recurse to set the types-ref of sub expressions *) |
| 953 | pr2_once "Type annotater:not handling GetRefLabel"; |
| 954 | Type_c.noTypeHere |
| 955 | (* todo *) |
| 956 | | StatementExpr _ -> |
| 957 | k expr; (* recurse to set the types-ref of sub expressions *) |
| 958 | pr2_once "Type annotater:not handling StatementExpr"; |
| 959 | Type_c.noTypeHere |
| 960 | (* |
| 961 | | _ -> k expr; Type_c.noTypeHere |
| 962 | *) |
| 963 | |
| 964 | | New (_, ty) -> |
| 965 | k expr; |
| 966 | pr2_once "Type annotater:not handling New"; |
| 967 | Type_c.noTypeHere (* TODO *) |
| 968 | |
| 969 | | Delete e -> |
| 970 | k expr; |
| 971 | pr2_once "Type annotater:not handling Delete"; |
| 972 | Type_c.noTypeHere (* TODO *) |
| 973 | |
| 974 | in |
| 975 | Ast_c.set_type_expr expr ty |
| 976 | |
| 977 | ) |
| 978 | |
| 979 | |
| 980 | (*****************************************************************************) |
| 981 | (* Visitor *) |
| 982 | (*****************************************************************************) |
| 983 | |
| 984 | (* Processing includes that were added after a cpp_ast_c makes the |
| 985 | * type annotater quite slow, especially when the depth of cpp_ast_c is |
| 986 | * big. But for such includes the only thing we really want is to modify |
| 987 | * the environment to have enough type information. We don't need |
| 988 | * to type the expressions inside those includes (they will be typed |
| 989 | * when we process the include file directly). Here the goal is |
| 990 | * to not recurse. |
| 991 | * |
| 992 | * Note that as usually header files contain mostly structure |
| 993 | * definitions and defines, that means we still have to do lots of work. |
| 994 | * We only win on function definition bodies, but usually header files |
| 995 | * have just prototypes, or inline function definitions which anyway have |
| 996 | * usually a small body. But still, we win. It also makes clearer |
| 997 | * that when processing include as we just need the environment, the caller |
| 998 | * of this module can do further optimisations such as memorising the |
| 999 | * state of the environment after each header files. |
| 1000 | * |
| 1001 | * |
| 1002 | * For sparse its makes the annotating speed goes from 9s to 4s |
| 1003 | * For Linux the speedup is even better, from ??? to ???. |
| 1004 | * |
| 1005 | * Because There would be some copy paste with annotate_program, it is |
| 1006 | * better to factorize code hence the just_add_in_env parameter below. |
| 1007 | * |
| 1008 | * todo? alternative optimisation for the include problem: |
| 1009 | * - processing all headers files one time and construct big env |
| 1010 | * - use hashtbl for env (but apparently not biggest problem) |
| 1011 | *) |
| 1012 | |
| 1013 | let rec visit_toplevel ~just_add_in_env ~depth elem = |
| 1014 | let need_annotate_body = not just_add_in_env in |
| 1015 | |
| 1016 | let bigf = { Visitor_c.default_visitor_c with |
| 1017 | |
| 1018 | (* ------------------------------------------------------------ *) |
| 1019 | Visitor_c.kcppdirective = (fun (k, bigf) directive -> |
| 1020 | match directive with |
| 1021 | (* do error messages for type annotater only for the real body of the |
| 1022 | * file, not inside include. |
| 1023 | *) |
| 1024 | | Include {i_content = opt} -> |
| 1025 | opt +> Common.do_option (fun (filename, program) -> |
| 1026 | Common.save_excursion Flag_parsing_c.verbose_type (fun () -> |
| 1027 | Flag_parsing_c.verbose_type := false; |
| 1028 | |
| 1029 | (* old: Visitor_c.vk_program bigf program; |
| 1030 | * opti: set the just_add_in_env |
| 1031 | *) |
| 1032 | program +> List.iter (fun elem -> |
| 1033 | visit_toplevel ~just_add_in_env:true ~depth:(depth+1) elem |
| 1034 | ) |
| 1035 | ) |
| 1036 | ) |
| 1037 | |
| 1038 | | Define ((s,ii), (defkind, defval)) -> |
| 1039 | |
| 1040 | |
| 1041 | (* even if we are in a just_add_in_env phase, such as when |
| 1042 | * we process include, as opposed to the body of functions, |
| 1043 | * with macros we still to type the body of the macro as |
| 1044 | * the macro has no type and so we infer its type from its |
| 1045 | * body (and one day later maybe from its use). |
| 1046 | *) |
| 1047 | (match defval with |
| 1048 | (* can try to optimize and recurse only when the define body |
| 1049 | * is simple ? |
| 1050 | *) |
| 1051 | |
| 1052 | | DefineExpr expr -> |
| 1053 | (* prevent macro-declared variables from leaking out *) |
| 1054 | do_in_new_scope (fun () -> |
| 1055 | if is_simple_expr expr |
| 1056 | (* even if not need_annotate_body, still recurse*) |
| 1057 | then k directive |
| 1058 | else |
| 1059 | if need_annotate_body |
| 1060 | then k directive) |
| 1061 | | _ -> |
| 1062 | do_in_new_scope (fun () -> |
| 1063 | if need_annotate_body |
| 1064 | then k directive) |
| 1065 | ); |
| 1066 | |
| 1067 | add_binding (Macro (s, (defkind, defval) )) true; |
| 1068 | |
| 1069 | | PragmaAndCo _ -> () |
| 1070 | ); |
| 1071 | |
| 1072 | (* ------------------------------------------------------------ *) |
| 1073 | (* main typer code *) |
| 1074 | (* ------------------------------------------------------------ *) |
| 1075 | Visitor_c.kexpr = annotater_expr_visitor_subpart; |
| 1076 | |
| 1077 | (* ------------------------------------------------------------ *) |
| 1078 | Visitor_c.kstatement = (fun (k, bigf) st -> |
| 1079 | match Ast_c.unwrap_st st with |
| 1080 | | Compound statxs -> do_in_new_scope (fun () -> k st); |
| 1081 | | _ -> k st |
| 1082 | ); |
| 1083 | (* ------------------------------------------------------------ *) |
| 1084 | Visitor_c.kdecl = (fun (k, bigf) d -> |
| 1085 | (match d with |
| 1086 | | (DeclList (xs, ii)) -> |
| 1087 | xs +> List.iter (fun ({v_namei = var; v_type = t; |
| 1088 | v_storage = sto; v_local = local} as x |
| 1089 | , iicomma) -> |
| 1090 | |
| 1091 | (* to add possible definition in type found in Decl *) |
| 1092 | Visitor_c.vk_type bigf t; |
| 1093 | |
| 1094 | |
| 1095 | let local = |
| 1096 | match (sto,local) with |
| 1097 | | (_,Ast_c.NotLocalDecl) -> Ast_c.NotLocalVar |
| 1098 | | ((Ast_c.Sto Ast_c.Static, _), Ast_c.LocalDecl) -> |
| 1099 | (match Ast_c.info_of_type t with |
| 1100 | (* if there is no info about the type it must not be |
| 1101 | present, so we don't know what the variable is *) |
| 1102 | None -> Ast_c.NotLocalVar |
| 1103 | | Some ii -> Ast_c.StaticLocalVar ii) |
| 1104 | | (_,Ast_c.LocalDecl) -> |
| 1105 | (match Ast_c.info_of_type t with |
| 1106 | (* if there is no info about the type it must not be |
| 1107 | present, so we don't know what the variable is *) |
| 1108 | None -> Ast_c.NotLocalVar |
| 1109 | | Some ii -> Ast_c.LocalVar ii) |
| 1110 | in |
| 1111 | var +> Common.do_option (fun (name, iniopt) -> |
| 1112 | let s = Ast_c.str_of_name name in |
| 1113 | |
| 1114 | match sto with |
| 1115 | | StoTypedef, _inline -> |
| 1116 | add_binding (TypeDef (s,Lib.al_type t)) true; |
| 1117 | | _ -> |
| 1118 | add_binding (VarOrFunc (s, (Lib.al_type t, local))) true; |
| 1119 | |
| 1120 | x.v_type_bis := |
| 1121 | Some (typedef_fix (Lib.al_type t) !_scoped_env); |
| 1122 | |
| 1123 | if need_annotate_body then begin |
| 1124 | (* int x = sizeof(x) is legal so need process ini *) |
| 1125 | match iniopt with |
| 1126 | Ast_c.NoInit -> () |
| 1127 | | Ast_c.ValInit(iini,init) -> Visitor_c.vk_ini bigf init |
| 1128 | | Ast_c.ConstrInit((args,_)) -> |
| 1129 | args +> List.iter (fun (e,ii) -> |
| 1130 | Visitor_c.vk_argument bigf e |
| 1131 | ) |
| 1132 | end |
| 1133 | ); |
| 1134 | ); |
| 1135 | | MacroDecl _ | MacroDeclInit _ -> |
| 1136 | if need_annotate_body |
| 1137 | then k d |
| 1138 | ); |
| 1139 | |
| 1140 | ); |
| 1141 | |
| 1142 | (* ------------------------------------------------------------ *) |
| 1143 | Visitor_c.ktype = (fun (k, bigf) typ -> |
| 1144 | (* bugfix: have a 'Lib.al_type typ' before, but because we can |
| 1145 | * have enum with possible expression, we don't want to change |
| 1146 | * the ref of abstract-lined types, but the real one, so |
| 1147 | * don't al_type here |
| 1148 | *) |
| 1149 | let (_q, tbis) = typ in |
| 1150 | match Ast_c.unwrap_typeC typ with |
| 1151 | | StructUnion (su, Some s, structType) -> |
| 1152 | let structType' = Lib.al_fields structType in |
| 1153 | let ii = Ast_c.get_ii_typeC_take_care tbis in |
| 1154 | let ii' = Lib.al_ii ii in |
| 1155 | add_binding (StructUnionNameDef (s, ((su, structType'),ii'))) true; |
| 1156 | |
| 1157 | if need_annotate_body |
| 1158 | then k typ (* todo: restrict ? new scope so use do_in_scope ? *) |
| 1159 | |
| 1160 | | Enum (sopt, enums) -> |
| 1161 | |
| 1162 | enums +> List.iter (fun ((name, eopt), iicomma) -> |
| 1163 | |
| 1164 | let s = Ast_c.str_of_name name in |
| 1165 | |
| 1166 | if need_annotate_body |
| 1167 | then eopt +> Common.do_option (fun (ieq, e) -> |
| 1168 | Visitor_c.vk_expr bigf e |
| 1169 | ); |
| 1170 | add_binding (EnumConstant (s, sopt)) true; |
| 1171 | ); |
| 1172 | |
| 1173 | |
| 1174 | (* TODO: if have a TypeName, then maybe can fill the option |
| 1175 | * information. |
| 1176 | *) |
| 1177 | | _ -> |
| 1178 | if need_annotate_body |
| 1179 | then k typ |
| 1180 | |
| 1181 | ); |
| 1182 | |
| 1183 | (* ------------------------------------------------------------ *) |
| 1184 | Visitor_c.ktoplevel = (fun (k, bigf) elem -> |
| 1185 | _notyped_var := Hashtbl.create 100; |
| 1186 | match elem with |
| 1187 | | Definition def -> |
| 1188 | let {f_name = name; |
| 1189 | f_type = ((returnt, (paramst, b)) as ftyp); |
| 1190 | f_storage = sto; |
| 1191 | f_body = statxs; |
| 1192 | f_old_c_style = oldstyle; |
| 1193 | },ii |
| 1194 | = def |
| 1195 | in |
| 1196 | let (i1, i2) = |
| 1197 | match ii with |
| 1198 | (* what is iifunc1? it should be a type. jll |
| 1199 | * pad: it's the '(' in the function definition. The |
| 1200 | * return type is part of f_type. |
| 1201 | *) |
| 1202 | | iifunc1::iifunc2::ibrace1::ibrace2::ifakestart::isto -> |
| 1203 | iifunc1, iifunc2 |
| 1204 | | _ -> raise (Impossible 160) |
| 1205 | in |
| 1206 | let funcs = Ast_c.str_of_name name in |
| 1207 | |
| 1208 | (match oldstyle with |
| 1209 | | None -> |
| 1210 | let typ' = |
| 1211 | Lib.al_type (Ast_c.mk_ty (FunctionType ftyp) [i1;i2]) in |
| 1212 | |
| 1213 | add_binding (VarOrFunc (funcs, (typ',islocal i1.Ast_c.pinfo))) |
| 1214 | false; |
| 1215 | |
| 1216 | if need_annotate_body then |
| 1217 | do_in_new_scope (fun () -> |
| 1218 | paramst +> List.iter (fun ({p_namei= nameopt; p_type= t},_)-> |
| 1219 | match nameopt with |
| 1220 | | Some name -> |
| 1221 | let s = Ast_c.str_of_name name in |
| 1222 | let local = |
| 1223 | (match Ast_c.info_of_type t with |
| 1224 | (* if there is no info about the type it must |
| 1225 | not be present, so we don't know what the |
| 1226 | variable is *) |
| 1227 | None -> Ast_c.NotLocalVar |
| 1228 | | Some ii -> Ast_c.LocalVar ii) |
| 1229 | in |
| 1230 | add_binding (VarOrFunc (s,(Lib.al_type t,local))) true |
| 1231 | | None -> |
| 1232 | pr2 "no type, certainly because Void type ?" |
| 1233 | ); |
| 1234 | (* recurse *) |
| 1235 | k elem |
| 1236 | ); |
| 1237 | | Some oldstyle -> |
| 1238 | (* generate regular function type *) |
| 1239 | |
| 1240 | pr2 "TODO generate type for function"; |
| 1241 | (* add bindings *) |
| 1242 | if need_annotate_body then |
| 1243 | do_in_new_scope (fun () -> |
| 1244 | (* recurse. should naturally call the kdecl visitor and |
| 1245 | * add binding |
| 1246 | *) |
| 1247 | k elem; |
| 1248 | ); |
| 1249 | |
| 1250 | ); |
| 1251 | | CppTop x -> |
| 1252 | (match x with |
| 1253 | | Define ((s,ii), (DefineVar, DefineType t)) -> |
| 1254 | add_binding (TypeDef (s,Lib.al_type t)) true; |
| 1255 | | _ -> k elem |
| 1256 | ) |
| 1257 | |
| 1258 | | Declaration _ |
| 1259 | |
| 1260 | |
| 1261 | |
| 1262 | | IfdefTop _ |
| 1263 | | MacroTop _ |
| 1264 | | EmptyDef _ |
| 1265 | | NotParsedCorrectly _ |
| 1266 | | FinalDef _ |
| 1267 | | Namespace _ |
| 1268 | -> |
| 1269 | k elem |
| 1270 | ); |
| 1271 | } |
| 1272 | in |
| 1273 | if just_add_in_env |
| 1274 | then |
| 1275 | if depth > 1 |
| 1276 | then Visitor_c.vk_toplevel bigf elem |
| 1277 | else |
| 1278 | Common.profile_code "TAC.annotate_only_included" (fun () -> |
| 1279 | Visitor_c.vk_toplevel bigf elem |
| 1280 | ) |
| 1281 | else Visitor_c.vk_toplevel bigf elem |
| 1282 | |
| 1283 | (*****************************************************************************) |
| 1284 | (* Entry point *) |
| 1285 | (*****************************************************************************) |
| 1286 | (* catch all the decl to grow the environment *) |
| 1287 | |
| 1288 | |
| 1289 | let rec (annotate_program2 : |
| 1290 | environment -> toplevel list -> (toplevel * environment Common.pair) list) = |
| 1291 | fun env prog -> |
| 1292 | |
| 1293 | (* globals (re)initialialisation *) |
| 1294 | _scoped_env := env; |
| 1295 | _notyped_var := (Hashtbl.create 100); |
| 1296 | |
| 1297 | prog +> List.map (fun elem -> |
| 1298 | let beforeenv = !_scoped_env in |
| 1299 | visit_toplevel ~just_add_in_env:false ~depth:0 elem; |
| 1300 | let afterenv = !_scoped_env in |
| 1301 | (elem, (beforeenv, afterenv)) |
| 1302 | ) |
| 1303 | |
| 1304 | |
| 1305 | |
| 1306 | |
| 1307 | (*****************************************************************************) |
| 1308 | (* Annotate test *) |
| 1309 | (*****************************************************************************) |
| 1310 | |
| 1311 | (* julia: for coccinelle *) |
| 1312 | let annotate_test_expressions prog = |
| 1313 | let rec propagate_test e = |
| 1314 | let ((e_term,info),_) = e in |
| 1315 | let (ty,_) = !info in |
| 1316 | info := (ty,Test); |
| 1317 | match e_term with |
| 1318 | Binary(e1,Logical(AndLog),e2) |
| 1319 | | Binary(e1,Logical(OrLog),e2) -> propagate_test e1; propagate_test e2 |
| 1320 | | Unary(e1,Not) -> propagate_test e1 |
| 1321 | | ParenExpr(e) -> propagate_test e |
| 1322 | | FunCall(e,args) -> (* not very nice, but so painful otherwise *) |
| 1323 | (match (unwrap e,args) with |
| 1324 | ((Ident(i),_),[(Left a,_)]) -> |
| 1325 | let nm = str_of_name i in |
| 1326 | if List.mem nm ["likely";"unlikely"] |
| 1327 | then propagate_test a |
| 1328 | else () |
| 1329 | | _ -> ()) |
| 1330 | | _ -> () in |
| 1331 | |
| 1332 | let bigf = { Visitor_c.default_visitor_c with |
| 1333 | Visitor_c.kexpr = (fun (k,bigf) expr -> |
| 1334 | (match unwrap_expr expr with |
| 1335 | CondExpr(e,_,_) -> propagate_test e |
| 1336 | | Binary(e1,Logical(AndLog),e2) |
| 1337 | | Binary(e1,Logical(OrLog),e2) -> propagate_test e1; propagate_test e2 |
| 1338 | | Unary(e1,Not) -> propagate_test e1 |
| 1339 | | _ -> () |
| 1340 | ); |
| 1341 | k expr |
| 1342 | ); |
| 1343 | Visitor_c.kstatement = (fun (k, bigf) st -> |
| 1344 | match unwrap_st st with |
| 1345 | Selection(s) -> |
| 1346 | (match s with If(e1,s1,s2) -> propagate_test e1 | _ -> ()); |
| 1347 | k st; |
| 1348 | | Iteration(i) -> |
| 1349 | (match i with |
| 1350 | While(e,s) -> propagate_test e |
| 1351 | | DoWhile(s,e) -> propagate_test e |
| 1352 | | For(_,es,_,_) -> |
| 1353 | (match unwrap es with Some e -> propagate_test e | None -> ()) |
| 1354 | | _ -> ()); |
| 1355 | k st |
| 1356 | | _ -> k st |
| 1357 | ) |
| 1358 | } in |
| 1359 | (prog +> List.iter (fun elem -> |
| 1360 | Visitor_c.vk_toplevel bigf elem |
| 1361 | )) |
| 1362 | |
| 1363 | |
| 1364 | |
| 1365 | (*****************************************************************************) |
| 1366 | (* Annotate types *) |
| 1367 | (*****************************************************************************) |
| 1368 | let annotate_program env prog = |
| 1369 | Common.profile_code "TAC.annotate_program" |
| 1370 | (fun () -> |
| 1371 | let res = annotate_program2 env prog in |
| 1372 | annotate_test_expressions prog; |
| 1373 | res |
| 1374 | ) |
| 1375 | |
| 1376 | let annotate_type_and_localvar env prog = |
| 1377 | Common.profile_code "TAC.annotate_type" |
| 1378 | (fun () -> annotate_program2 env prog) |
| 1379 | |
| 1380 | |
| 1381 | (*****************************************************************************) |
| 1382 | (* changing default typing environment, do concatenation *) |
| 1383 | let init_env filename = |
| 1384 | pr2 ("init_env: " ^ filename); |
| 1385 | let (ast2, _stat) = Parse_c.parse_c_and_cpp filename in |
| 1386 | let ast = Parse_c.program_of_program2 ast2 in |
| 1387 | |
| 1388 | let res = annotate_type_and_localvar !initial_env ast in |
| 1389 | match List.rev res with |
| 1390 | | [] -> pr2 "empty environment" |
| 1391 | | (_top,(env1,env2))::xs -> |
| 1392 | initial_env := !initial_env ++ env2; |
| 1393 | () |
| 1394 | |