Commit | Line | Data |
---|---|---|
0708f913 | 1 | (* Yoann Padioleau |
ae4735db C |
2 | * |
3 | * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. | |
4 | * Copyright (C) 2007, 2008 Ecole des Mines de Nantes, | |
0708f913 | 5 | * Copyright (C) 2009 University of Urbana Champaign |
34e49164 C |
6 | * |
7 | * This program is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License (GPL) | |
9 | * version 2 as published by the Free Software Foundation. | |
ae4735db | 10 | * |
34e49164 C |
11 | * This program is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * file license.txt for more details. | |
15 | *) | |
91eba41f | 16 | |
34e49164 C |
17 | open Common |
18 | ||
19 | open Ast_c | |
20 | ||
21 | module Lib = Lib_parsing_c | |
22 | ||
23 | (*****************************************************************************) | |
91eba41f C |
24 | (* Prelude *) |
25 | (*****************************************************************************) | |
ae4735db | 26 | (* History: |
91eba41f C |
27 | * - Done a first type checker in 2002, cf typing-semantic/, but |
28 | * was assuming that have all type info, and so was assuming had called | |
29 | * cpp and everything was right. | |
ae4735db | 30 | * - Wrote this file, in 2006?, as we added pattern matching on type |
91eba41f | 31 | * in coccinelle. Partial type annotater. |
ae4735db | 32 | * - Julia extended it in 2008? to have localvar/notlocalvar and |
91eba41f | 33 | * test/notest information, again used by coccinelle. |
ae4735db | 34 | * - I extended it in Fall 2008 to have more type information for the |
91eba41f C |
35 | * global analysis. I also added some optimisations to process |
36 | * included code faster. | |
ae4735db C |
37 | * |
38 | * | |
91eba41f C |
39 | * Design choices. Can either do: |
40 | * - a kind of inferer | |
41 | * - can first do a simple inferer, that just pass context | |
42 | * - then a real inferer, managing partial info. | |
34e49164 C |
43 | * type context = fullType option |
44 | * | |
45 | * - extract the information from the .h files | |
91eba41f | 46 | * (so no inference at all needed) |
ae4735db | 47 | * |
91eba41f C |
48 | * Difference with julia's code in parsing_cocci/type_infer.ml: |
49 | * - She handles just the variable namespace. She does not type | |
50 | * field access or enum or macros. This is because cocci programs are | |
51 | * usually simple and have no structure definition or macro definitions | |
52 | * that we need to type anyway. | |
53 | * - She does more propagation. | |
ae4735db | 54 | * - She does not have to handle the typedef isomorphism which force me |
91eba41f C |
55 | * to use those typedef_fix and type_unfold_one_step |
56 | * - She does not handle I think the function pointer C isomorphism. | |
ae4735db C |
57 | * |
58 | * - She has a cleaner type_cocci without any info. In my case | |
91eba41f | 59 | * I need to do those ugly al_type, or generate fake infos. |
ae4735db | 60 | * - She has more compact code. Perhaps because she does not have to |
91eba41f C |
61 | * handle the extra exp_info that she added on me :) So I need those |
62 | * do_with_type, make_info_xxx, etc. | |
ae4735db | 63 | * |
91eba41f C |
64 | * Note: if need to debug this annotater, use -show_trace_profile, it can |
65 | * help. You can also set the typedef_debug flag below. | |
ae4735db C |
66 | * |
67 | * | |
68 | * | |
34e49164 | 69 | * todo: expression contain types, and statements, which in turn can contain |
ae4735db | 70 | * expression, so need recurse. Need define an annote_statement and |
34e49164 | 71 | * annotate_type. |
ae4735db | 72 | * |
34e49164 C |
73 | * todo: how deal with typedef isomorphisms ? How store them in Ast_c ? |
74 | * store all posible variations in ast_c ? a list of type instead of just | |
75 | * the type ? | |
ae4735db | 76 | * |
0708f913 | 77 | * todo: how to handle multiple possible definitions for entities like |
ae4735db | 78 | * struct or typedefs ? Because of ifdef, we should store list of |
0708f913 | 79 | * possibilities sometimes. |
ae4735db | 80 | * |
34e49164 | 81 | * todo: define a new type ? like type_cocci ? where have a bool ? |
ae4735db C |
82 | * |
83 | * semi: How handle scope ? When search for type of field, we return | |
34e49164 | 84 | * a type, but this type makes sense only in a certain scope. |
ae4735db | 85 | * We could add a tag to each typedef, structUnionName to differentiate |
34e49164 C |
86 | * them and also associate in ast_c to the type the scope |
87 | * of this type, the env that were used to define this type. | |
ae4735db C |
88 | * |
89 | * todo: handle better the search in previous env, the env'. Cf the | |
91eba41f C |
90 | * termination problem in typedef_fix when I was searching in the same |
91 | * env. | |
ae4735db | 92 | * |
34e49164 C |
93 | *) |
94 | ||
95 | (*****************************************************************************) | |
96 | (* Wrappers *) | |
97 | (*****************************************************************************) | |
708f4980 | 98 | let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_type |
0708f913 | 99 | |
34e49164 C |
100 | (*****************************************************************************) |
101 | (* Environment *) | |
102 | (*****************************************************************************) | |
103 | ||
91eba41f | 104 | (* The different namespaces from stdC manual: |
ae4735db | 105 | * |
34e49164 | 106 | * You introduce two new name spaces with every block that you write. |
ae4735db C |
107 | * |
108 | * One name space includes all | |
109 | * - functions, | |
110 | * - objects, | |
91eba41f | 111 | * - type definitions, |
ae4735db C |
112 | * - and enumeration constants |
113 | * that you declare or define within the block. | |
114 | * | |
115 | * The other name space includes all | |
116 | * - enumeration, | |
117 | * - structure, | |
118 | * - and union | |
91eba41f | 119 | * *tags* that you define within the block. |
ae4735db | 120 | * |
34e49164 C |
121 | * You introduce a new member name space with every structure or union |
122 | * whose content you define. You identify a member name space by the | |
123 | * type of left operand that you write for a member selection | |
124 | * operator, as in x.y or p->y. A member name space ends with the end | |
125 | * of the block in which you declare it. | |
ae4735db | 126 | * |
34e49164 C |
127 | * You introduce a new goto label name space with every function |
128 | * definition you write. Each goto label name space ends with its | |
129 | * function definition. | |
130 | *) | |
131 | ||
132 | (* But I don't try to do a type-checker, I try to "resolve" type of var | |
133 | * so don't need make difference between namespaces here. | |
ae4735db C |
134 | * |
135 | * But, why not make simply a (string, kindstring) assoc ? | |
34e49164 C |
136 | * Because we dont want that a variable shadow a struct definition, because |
137 | * they are still in 2 different namespace. But could for typedef, | |
138 | * because VarOrFunc and Typedef are in the same namespace. | |
139 | * But could do a record as in c_info.ml | |
140 | *) | |
141 | ||
142 | ||
91eba41f | 143 | (* This type contains all "ident" like notion of C. Each time in Ast_c |
ae4735db | 144 | * you have a string type (as in expression, function name, fields) |
91eba41f | 145 | * then you need to manage the scope of this ident. |
ae4735db | 146 | * |
91eba41f C |
147 | * The wrap for StructUnionNameDef contain the whole ii, the i for |
148 | * the string, the structUnion and the structType. | |
ae4735db | 149 | * |
91eba41f C |
150 | * Put Macro here ? after all the scoping rules for cpp macros is different |
151 | * and so does not vanish after the closing '}'. | |
ae4735db | 152 | * |
91eba41f | 153 | * todo: EnumDef |
34e49164 | 154 | *) |
ae4735db | 155 | type namedef = |
34e49164 | 156 | | VarOrFunc of string * Ast_c.exp_type |
91eba41f C |
157 | | EnumConstant of string * string option |
158 | ||
708f4980 | 159 | (* also used for macro type aliases *) |
91eba41f C |
160 | | TypeDef of string * fullType |
161 | (* the structType contains nested "idents" with struct scope *) | |
34e49164 | 162 | | StructUnionNameDef of string * (structUnion * structType) wrap |
34e49164 | 163 | |
91eba41f | 164 | (* cppext: *) |
b1b2de81 | 165 | | Macro of string * (define_kind * define_val) |
91eba41f | 166 | |
993936c0 C |
167 | let print_scoped_env e = |
168 | List.iter | |
169 | (function e -> | |
170 | List.iter | |
171 | (function | |
172 | VarOrFunc(s,_) -> Printf.printf "%s " s | |
173 | | EnumConstant(s,_) -> Printf.printf "%s " s | |
174 | | TypeDef(s,t) -> Printf.printf "%s" s | |
175 | | StructUnionNameDef(s,_) -> Printf.printf "%s " s | |
176 | | Macro(s,_) -> Printf.printf "%s " s) | |
177 | e; | |
178 | Printf.printf "\n") | |
179 | e | |
91eba41f C |
180 | |
181 | (* Because have nested scope, have nested list, hence the list list. | |
182 | * | |
183 | * opti? use a hash to accelerate ? hmm but may have some problems | |
184 | * with hash to handle recursive lookup. For instance for the typedef | |
ae4735db C |
185 | * example where have mutually recursive definition of the type, |
186 | * we must take care to not loop by starting the second search | |
187 | * from the previous environment. With the list scheme in | |
91eba41f C |
188 | * lookup_env below it's quite easy to do. With hash it may be |
189 | * more complicated. | |
190 | *) | |
ae4735db | 191 | type environment = namedef list list |
34e49164 | 192 | |
91eba41f C |
193 | |
194 | (* ------------------------------------------------------------ *) | |
ae4735db C |
195 | (* can be modified by the init_env function below, by |
196 | * the file environment_unix.h | |
91eba41f C |
197 | *) |
198 | let initial_env = ref [ | |
199 | [VarOrFunc("NULL", | |
200 | (Lib.al_type (Parse_c.type_of_string "void *"), | |
201 | Ast_c.NotLocalVar)); | |
202 | ||
ae4735db | 203 | (* |
91eba41f C |
204 | VarOrFunc("malloc", |
205 | (Lib.al_type(Parse_c.type_of_string "void* (*)(int size)"), | |
206 | Ast_c.NotLocalVar)); | |
207 | VarOrFunc("free", | |
208 | (Lib.al_type(Parse_c.type_of_string "void (*)(void *ptr)"), | |
209 | Ast_c.NotLocalVar)); | |
210 | *) | |
211 | ] | |
34e49164 C |
212 | ] |
213 | ||
214 | ||
ae4735db | 215 | let typedef_debug = ref false |
91eba41f C |
216 | |
217 | ||
218 | (* ------------------------------------------------------------ *) | |
219 | (* generic, lookup and also return remaining env for further lookup *) | |
ae4735db C |
220 | let rec lookup_env2 f env = |
221 | match env with | |
34e49164 | 222 | | [] -> raise Not_found |
91eba41f | 223 | | []::zs -> lookup_env2 f zs |
ae4735db | 224 | | (x::xs)::zs -> |
91eba41f C |
225 | (match f x with |
226 | | None -> lookup_env2 f (xs::zs) | |
34e49164 | 227 | | Some y -> y, xs::zs |
91eba41f | 228 | ) |
ae4735db | 229 | let lookup_env a b = |
91eba41f C |
230 | Common.profile_code "TAC.lookup_env" (fun () -> lookup_env2 a b) |
231 | ||
232 | ||
233 | ||
ae4735db C |
234 | let member_env lookupf env = |
235 | try | |
91eba41f C |
236 | let _ = lookupf env in |
237 | true | |
238 | with Not_found -> false | |
239 | ||
240 | ||
241 | ||
242 | ||
243 | (* ------------------------------------------------------------ *) | |
34e49164 | 244 | |
34e49164 | 245 | |
ae4735db | 246 | let lookup_var s env = |
34e49164 | 247 | let f = function |
b1b2de81 | 248 | | VarOrFunc (s2, typ) -> if s2 =$= s then Some typ else None |
34e49164 C |
249 | | _ -> None |
250 | in | |
251 | lookup_env f env | |
252 | ||
ae4735db | 253 | let lookup_typedef s env = |
91eba41f | 254 | if !typedef_debug then pr2 ("looking for: " ^ s); |
34e49164 | 255 | let f = function |
b1b2de81 | 256 | | TypeDef (s2, typ) -> if s2 =$= s then Some typ else None |
34e49164 C |
257 | | _ -> None |
258 | in | |
259 | lookup_env f env | |
260 | ||
261 | let lookup_structunion (_su, s) env = | |
262 | let f = function | |
b1b2de81 | 263 | | StructUnionNameDef (s2, typ) -> if s2 =$= s then Some typ else None |
34e49164 C |
264 | | _ -> None |
265 | in | |
266 | lookup_env f env | |
267 | ||
ae4735db | 268 | let lookup_macro s env = |
91eba41f | 269 | let f = function |
b1b2de81 | 270 | | Macro (s2, typ) -> if s2 =$= s then Some typ else None |
91eba41f C |
271 | | _ -> None |
272 | in | |
273 | lookup_env f env | |
274 | ||
ae4735db | 275 | let lookup_enum s env = |
91eba41f | 276 | let f = function |
b1b2de81 | 277 | | EnumConstant (s2, typ) -> if s2 =$= s then Some typ else None |
91eba41f C |
278 | | _ -> None |
279 | in | |
280 | lookup_env f env | |
281 | ||
282 | ||
ae4735db | 283 | let lookup_typedef a b = |
91eba41f C |
284 | Common.profile_code "TAC.lookup_typedef" (fun () -> lookup_typedef a b) |
285 | ||
286 | ||
34e49164 C |
287 | |
288 | (*****************************************************************************) | |
289 | (* "type-lookup" *) | |
290 | (*****************************************************************************) | |
291 | ||
292 | (* find_final_type is used to know to what type a field correspond in | |
293 | * x.foo. Sometimes the type of x is a typedef or a structName in which | |
294 | * case we must look in environment to find the complete type, here | |
295 | * structUnion that contains the information. | |
ae4735db | 296 | * |
34e49164 C |
297 | * Because in C one can redefine in nested blocks some typedefs, |
298 | * struct, or variables, we have a static scoping resolving process. | |
299 | * So, when we look for the type of a var, if this var is in an | |
300 | * enclosing block, then maybe its type refer to a typdef of this | |
301 | * enclosing block, so must restart the "type-resolving" of this | |
302 | * typedef from this enclosing block, not from the bottom. So our | |
303 | * "resolving-type functions" take an env and also return an env from | |
304 | * where the next search must be performed. *) | |
305 | ||
306 | (* | |
ae4735db | 307 | let rec find_final_type ty env = |
34e49164 | 308 | |
ae4735db | 309 | match Ast_c.unwrap_typeC ty with |
34e49164 | 310 | | BaseType x -> (BaseType x) +> Ast_c.rewrap_typeC ty |
ae4735db | 311 | |
34e49164 C |
312 | | Pointer t -> (Pointer (find_final_type t env)) +> Ast_c.rewrap_typeC ty |
313 | | Array (e, t) -> Array (e, find_final_type t env) +> Ast_c.rewrap_typeC ty | |
ae4735db | 314 | |
34e49164 | 315 | | StructUnion (sopt, su) -> StructUnion (sopt, su) +> Ast_c.rewrap_typeC ty |
ae4735db | 316 | |
34e49164 C |
317 | | FunctionType t -> (FunctionType t) (* todo ? *) +> Ast_c.rewrap_typeC ty |
318 | | Enum (s, enumt) -> (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty | |
319 | | EnumName s -> (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty | |
ae4735db C |
320 | |
321 | | StructUnionName (su, s) -> | |
322 | (try | |
34e49164 C |
323 | let ((structtyp,ii), env') = lookup_structunion (su, s) env in |
324 | Ast_c.nQ, (StructUnion (Some s, structtyp), ii) | |
ae4735db | 325 | (* old: +> Ast_c.rewrap_typeC ty |
34e49164 C |
326 | * but must wrap with good ii, otherwise pretty_print_c |
327 | * will be lost and raise some Impossible | |
328 | *) | |
ae4735db | 329 | with Not_found -> |
34e49164 C |
330 | ty |
331 | ) | |
ae4735db C |
332 | |
333 | | TypeName s -> | |
334 | (try | |
34e49164 C |
335 | let (t', env') = lookup_typedef s env in |
336 | find_final_type t' env' | |
ae4735db | 337 | with Not_found -> |
34e49164 C |
338 | ty |
339 | ) | |
ae4735db | 340 | |
34e49164 C |
341 | | ParenType t -> find_final_type t env |
342 | | Typeof e -> failwith "typeof" | |
ae4735db | 343 | *) |
34e49164 C |
344 | |
345 | ||
346 | ||
347 | ||
91eba41f | 348 | (* ------------------------------------------------------------ *) |
ae4735db | 349 | let rec type_unfold_one_step ty env = |
993936c0 | 350 | let rec loop seen ty env = |
34e49164 | 351 | |
ae4735db | 352 | match Ast_c.unwrap_typeC ty with |
f59c9fb7 | 353 | | NoType -> ty |
91eba41f C |
354 | | BaseType x -> ty |
355 | | Pointer t -> ty | |
356 | | Array (e, t) -> ty | |
357 | ||
34e49164 | 358 | | StructUnion (sopt, su, fields) -> ty |
ae4735db | 359 | |
91eba41f | 360 | | FunctionType t -> ty |
34e49164 | 361 | | Enum (s, enumt) -> ty |
91eba41f C |
362 | |
363 | | EnumName s -> ty (* todo: look in env when will have EnumDef *) | |
ae4735db C |
364 | |
365 | | StructUnionName (su, s) -> | |
366 | (try | |
34e49164 | 367 | let (((su,fields),ii), env') = lookup_structunion (su, s) env in |
708f4980 | 368 | Ast_c.mk_ty (StructUnion (su, Some s, fields)) ii |
ae4735db | 369 | (* old: +> Ast_c.rewrap_typeC ty |
34e49164 C |
370 | * but must wrap with good ii, otherwise pretty_print_c |
371 | * will be lost and raise some Impossible | |
372 | *) | |
ae4735db | 373 | with Not_found -> |
34e49164 C |
374 | ty |
375 | ) | |
ae4735db C |
376 | |
377 | | TypeName (name, _typ) -> | |
b1b2de81 | 378 | let s = Ast_c.str_of_name name in |
ae4735db | 379 | (try |
91eba41f | 380 | if !typedef_debug then pr2 "type_unfold_one_step: lookup_typedef"; |
34e49164 | 381 | let (t', env') = lookup_typedef s env in |
993936c0 C |
382 | if List.mem s seen (* avoid pb with recursive typedefs *) |
383 | then type_unfold_one_step t' env' | |
384 | else loop (s::seen) t' env | |
ae4735db | 385 | with Not_found -> |
34e49164 C |
386 | ty |
387 | ) | |
ae4735db | 388 | |
34e49164 | 389 | | ParenType t -> type_unfold_one_step t env |
ae4735db | 390 | | TypeOfExpr e -> |
34e49164 C |
391 | pr2_once ("Type_annoter: not handling typeof"); |
392 | ty | |
993936c0 C |
393 | | TypeOfType t -> type_unfold_one_step t env in |
394 | loop [] ty env | |
34e49164 C |
395 | |
396 | ||
397 | ||
34e49164 C |
398 | |
399 | ||
400 | ||
34e49164 C |
401 | |
402 | ||
403 | ||
ae4735db | 404 | (* normalizer. can be seen as the opposite of the previous function as |
0708f913 C |
405 | * we "fold" at least for the structUnion. Should return something that |
406 | * Type_c.is_completed_fullType likes, something that makes it easier | |
407 | * for the programmer to work on, that has all the needed information | |
408 | * for most tasks. | |
91eba41f | 409 | *) |
ae4735db | 410 | let rec typedef_fix ty env = |
993936c0 C |
411 | let rec loop seen ty env = |
412 | match Ast_c.unwrap_typeC ty with | |
413 | | NoType -> | |
414 | ty | |
415 | | BaseType x -> | |
416 | ty | |
417 | | Pointer t -> | |
418 | Pointer (typedef_fix t env) +> Ast_c.rewrap_typeC ty | |
419 | | Array (e, t) -> | |
420 | Array (e, typedef_fix t env) +> Ast_c.rewrap_typeC ty | |
421 | | StructUnion (su, sopt, fields) -> | |
ae4735db | 422 | (* normalize, fold. |
993936c0 C |
423 | * todo? but what if correspond to a nested struct def ? |
424 | *) | |
425 | Type_c.structdef_to_struct_name ty | |
426 | | FunctionType ft -> | |
427 | (FunctionType ft) (* todo ? *) +> Ast_c.rewrap_typeC ty | |
428 | | Enum (s, enumt) -> | |
429 | (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty | |
430 | | EnumName s -> | |
431 | (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty | |
432 | ||
34e49164 | 433 | (* we prefer StructUnionName to StructUnion when it comes to typed metavar *) |
993936c0 C |
434 | | StructUnionName (su, s) -> |
435 | ty | |
436 | ||
91eba41f | 437 | (* keep the typename but complete with more information *) |
993936c0 C |
438 | | TypeName (name, typ) -> |
439 | let s = Ast_c.str_of_name name in | |
440 | (match typ with | |
441 | | Some _ -> | |
442 | pr2 ("typedef value already there:" ^ s); | |
443 | ty | |
444 | | None -> | |
445 | (try | |
446 | if !typedef_debug then pr2 "typedef_fix: lookup_typedef"; | |
447 | let (t', env') = lookup_typedef s env in | |
448 | ||
91eba41f | 449 | (* bugfix: termination bug if use env instead of env' below, because |
993936c0 C |
450 | * can have some weird mutually recursive typedef which |
451 | * each new type alias search for its mutual def. | |
452 | * seen is an attempt to do better. | |
453 | *) | |
454 | let fixed = | |
455 | if List.mem s seen | |
456 | then loop (s::seen) t' env | |
457 | else typedef_fix t' env' in | |
458 | TypeName (name, Some fixed) +> | |
459 | Ast_c.rewrap_typeC ty | |
460 | with Not_found -> | |
461 | ty)) | |
0708f913 | 462 | |
91eba41f | 463 | (* remove paren for better matching with typed metavar. kind of iso again *) |
993936c0 C |
464 | | ParenType t -> |
465 | typedef_fix t env | |
466 | | TypeOfExpr e -> | |
467 | pr2_once ("Type_annoter: not handling typeof"); | |
468 | ty | |
34e49164 | 469 | |
993936c0 C |
470 | | TypeOfType t -> |
471 | typedef_fix t env in | |
472 | loop [] ty env | |
91eba41f C |
473 | |
474 | ||
475 | (*****************************************************************************) | |
476 | (* Helpers, part 1 *) | |
477 | (*****************************************************************************) | |
478 | ||
ae4735db | 479 | let type_of_s2 s = |
91eba41f | 480 | (Lib.al_type (Parse_c.type_of_string s)) |
ae4735db | 481 | let type_of_s a = |
91eba41f C |
482 | Common.profile_code "Type_c.type_of_s" (fun () -> type_of_s2 a) |
483 | ||
484 | ||
485 | (* pad: pb on: | |
486 | * /home/pad/software-os-src2/freebsd/contrib/ipfilter/netinet/ip_fil_freebsd.c | |
487 | * because in the code there is: | |
488 | * static iss_seq_off = 0; | |
489 | * which in the parser was generating a default int without a parse_info. | |
490 | * I now add a fake parse_info for such default int so no more failwith | |
491 | * normally. | |
492 | *) | |
91eba41f | 493 | |
ae4735db | 494 | let rec is_simple_expr expr = |
91eba41f | 495 | match Ast_c.unwrap_expr expr with |
ae4735db C |
496 | (* todo? handle more special cases ? *) |
497 | ||
498 | | Ident _ -> | |
91eba41f | 499 | true |
ae4735db | 500 | | Constant (_) -> |
91eba41f | 501 | true |
ae4735db | 502 | | Unary (op, e) -> |
91eba41f | 503 | true |
ae4735db | 504 | | Binary (e1, op, e2) -> |
91eba41f | 505 | true |
ae4735db | 506 | | Cast (t, e) -> |
91eba41f C |
507 | true |
508 | | ParenExpr (e) -> is_simple_expr e | |
509 | ||
510 | | _ -> false | |
511 | ||
512 | (*****************************************************************************) | |
513 | (* Typing rules *) | |
514 | (*****************************************************************************) | |
515 | (* now in type_c.ml *) | |
516 | ||
517 | ||
34e49164 C |
518 | |
519 | (*****************************************************************************) | |
520 | (* (Semi) Globals, Julia's style *) | |
521 | (*****************************************************************************) | |
522 | ||
523 | (* opti: cache ? use hash ? *) | |
91eba41f | 524 | let _scoped_env = ref !initial_env |
34e49164 C |
525 | |
526 | (* memoise unnanoted var, to avoid too much warning messages *) | |
527 | let _notyped_var = ref (Hashtbl.create 100) | |
528 | ||
ae4735db | 529 | let new_scope() = _scoped_env := []::!_scoped_env |
34e49164 C |
530 | let del_scope() = _scoped_env := List.tl !_scoped_env |
531 | ||
ae4735db | 532 | let do_in_new_scope f = |
34e49164 C |
533 | begin |
534 | new_scope(); | |
535 | let res = f() in | |
536 | del_scope(); | |
537 | res | |
538 | end | |
539 | ||
540 | let add_in_scope namedef = | |
541 | let (current, older) = Common.uncons !_scoped_env in | |
542 | _scoped_env := (namedef::current)::older | |
543 | ||
91eba41f C |
544 | (* ------------------------------------------------------------ *) |
545 | ||
34e49164 C |
546 | (* sort of hackish... *) |
547 | let islocal info = | |
b1b2de81 | 548 | if List.length (!_scoped_env) =|= List.length !initial_env |
34e49164 C |
549 | then Ast_c.NotLocalVar |
550 | else Ast_c.LocalVar info | |
551 | ||
91eba41f | 552 | (* ------------------------------------------------------------ *) |
ae4735db | 553 | (* the warning argument is here to allow some binding to overwrite an |
91eba41f C |
554 | * existing one. With function, we first have the prototype and then the def, |
555 | * and the def binding with the same string is not an error. | |
ae4735db | 556 | * |
34e49164 | 557 | * todo?: but if we define two times the same function, then we will not |
ae4735db | 558 | * detect it :( it would require to make a diff between adding a binding |
34e49164 | 559 | * from a prototype and from a definition. |
ae4735db C |
560 | * |
561 | * opti: disabling the check_annotater flag have some important | |
91eba41f | 562 | * performance benefit. |
ae4735db | 563 | * |
34e49164 | 564 | *) |
ae4735db | 565 | let add_binding2 namedef warning = |
34e49164 C |
566 | let (current_scope, _older_scope) = Common.uncons !_scoped_env in |
567 | ||
91eba41f | 568 | if !Flag_parsing_c.check_annotater then begin |
34e49164 | 569 | (match namedef with |
ae4735db | 570 | | VarOrFunc (s, typ) -> |
91eba41f C |
571 | if Hashtbl.mem !_notyped_var s |
572 | then pr2 ("warning: found typing information for a variable that was" ^ | |
573 | "previously unknown:" ^ s); | |
574 | | _ -> () | |
575 | ); | |
ae4735db C |
576 | |
577 | let (memberf, s) = | |
91eba41f | 578 | (match namedef with |
ae4735db | 579 | | VarOrFunc (s, typ) -> |
91eba41f | 580 | member_env (lookup_var s), s |
ae4735db | 581 | | TypeDef (s, typ) -> |
91eba41f | 582 | member_env (lookup_typedef s), s |
ae4735db | 583 | | StructUnionNameDef (s, (su, typ)) -> |
91eba41f | 584 | member_env (lookup_structunion (su, s)), s |
ae4735db | 585 | | Macro (s, body) -> |
91eba41f | 586 | member_env (lookup_macro s), s |
ae4735db | 587 | | EnumConstant (s, body) -> |
91eba41f C |
588 | member_env (lookup_enum s), s |
589 | ) in | |
ae4735db | 590 | |
91eba41f | 591 | if memberf [current_scope] && warning |
ae4735db | 592 | then pr2 ("Type_annoter: warning, " ^ s ^ |
91eba41f | 593 | " is already in current binding" ^ "\n" ^ |
0708f913 | 594 | " so there is a weird shadowing"); |
91eba41f | 595 | end; |
34e49164 | 596 | add_in_scope namedef |
91eba41f | 597 | |
ae4735db | 598 | let add_binding namedef warning = |
91eba41f | 599 | Common.profile_code "TAC.add_binding" (fun () -> add_binding2 namedef warning) |
ae4735db | 600 | |
34e49164 | 601 | |
91eba41f | 602 | |
34e49164 | 603 | (*****************************************************************************) |
91eba41f | 604 | (* Helpers, part 2 *) |
34e49164 C |
605 | (*****************************************************************************) |
606 | ||
ae4735db | 607 | let lookup_opt_env lookupf s = |
91eba41f C |
608 | Common.optionise (fun () -> |
609 | lookupf s !_scoped_env | |
610 | ) | |
611 | ||
ae4735db C |
612 | let unwrap_unfold_env2 typ = |
613 | Ast_c.unwrap_typeC | |
614 | (type_unfold_one_step typ !_scoped_env) | |
615 | let unwrap_unfold_env typ = | |
91eba41f | 616 | Common.profile_code "TAC.unwrap_unfold_env" (fun () -> unwrap_unfold_env2 typ) |
34e49164 | 617 | |
ae4735db | 618 | let typedef_fix a b = |
91eba41f | 619 | Common.profile_code "TAC.typedef_fix" (fun () -> typedef_fix a b) |
34e49164 | 620 | |
ae4735db C |
621 | let make_info_def_fix x = |
622 | Type_c.make_info_def (typedef_fix x !_scoped_env) | |
34e49164 | 623 | |
91eba41f C |
624 | let make_info_fix (typ, local) = |
625 | Type_c.make_info ((typedef_fix typ !_scoped_env),local) | |
626 | ||
ae4735db C |
627 | |
628 | let make_info_def = Type_c.make_info_def | |
34e49164 | 629 | |
34e49164 | 630 | (*****************************************************************************) |
91eba41f | 631 | (* Main typer code, put later in a visitor *) |
34e49164 | 632 | (*****************************************************************************) |
34e49164 | 633 | |
ae4735db | 634 | let annotater_expr_visitor_subpart = (fun (k,bigf) expr -> |
34e49164 | 635 | |
ae4735db | 636 | let ty = |
91eba41f | 637 | match Ast_c.unwrap_expr expr with |
ae4735db | 638 | |
91eba41f C |
639 | (* -------------------------------------------------- *) |
640 | (* todo: should analyse the 's' for int to know if unsigned or not *) | |
5626f154 C |
641 | | Constant (String (s,kind)) -> make_info_def (type_of_s "char []") |
642 | | Constant MultiString _ -> make_info_def (type_of_s "char []") | |
91eba41f | 643 | | Constant (Char (s,kind)) -> make_info_def (type_of_s "char") |
708f4980 C |
644 | | Constant (Int (s,kind)) -> |
645 | (* this seems really unpleasant, but perhaps the type needs to be set | |
646 | up in some way that allows pretty printing *) | |
647 | make_info_def | |
648 | (match kind with | |
649 | (* matches limited by what is generated in lexer_c.mll *) | |
650 | Si(Signed,CInt) -> type_of_s "int" | |
651 | | Si(UnSigned,CInt) -> type_of_s "unsigned int" | |
652 | | Si(Signed,CLong) -> type_of_s "long" | |
653 | | Si(UnSigned,CLong) -> type_of_s "unsigned long" | |
654 | | Si(Signed,CLongLong) -> type_of_s "long long" | |
655 | | Si(UnSigned,CLongLong) -> type_of_s "unsigned long long" | |
656 | | _ -> failwith "unexpected kind for constant") | |
ae4735db | 657 | | Constant (Float (s,kind)) -> |
91eba41f C |
658 | let fake = Ast_c.fakeInfo (Common.fake_parse_info) in |
659 | let fake = Ast_c.rewrap_str "float" fake in | |
660 | let iinull = [fake] in | |
708f4980 | 661 | make_info_def (Ast_c.mk_ty (BaseType (FloatType kind)) iinull) |
ae4735db C |
662 | |
663 | ||
91eba41f | 664 | (* -------------------------------------------------- *) |
ae4735db C |
665 | (* note: could factorize this code with the code for Ident |
666 | * and the other code for Funcall below. But as the Ident can be | |
91eba41f C |
667 | * a macro-func, I prefer to handle it separately. So |
668 | * this rule can handle the macro-func, the Ident-rule can handle | |
669 | * the macro-var, and the other FunCall-rule the regular | |
670 | * function calls through fields. | |
671 | * Also as I don't want a warning on the Ident that are a FunCall, | |
672 | * easier to have a rule separate from the Ident rule. | |
673 | *) | |
ae4735db | 674 | | FunCall (e1, args) -> |
708f4980 | 675 | (match Ast_c.unwrap_expr e1 with |
ae4735db | 676 | | Ident (ident) -> |
91eba41f | 677 | (* recurse *) |
ae4735db | 678 | args +> List.iter (fun (e,ii) -> |
91eba41f C |
679 | (* could typecheck if arguments agree with prototype *) |
680 | Visitor_c.vk_argument bigf e | |
681 | ); | |
b1b2de81 | 682 | let s = Ast_c.str_of_name ident in |
91eba41f | 683 | (match lookup_opt_env lookup_var s with |
ae4735db C |
684 | | Some ((typ,local),_nextenv) -> |
685 | ||
91eba41f C |
686 | (* set type for ident *) |
687 | let tyinfo = make_info_fix (typ, local) in | |
688 | Ast_c.set_type_expr e1 tyinfo; | |
689 | ||
690 | (match unwrap_unfold_env typ with | |
691 | | FunctionType (ret, params) -> make_info_def ret | |
692 | ||
ae4735db | 693 | (* can be function pointer, C have an iso for that, |
91eba41f C |
694 | * same pfn() syntax than regular function call. |
695 | *) | |
ae4735db | 696 | | Pointer (typ2) -> |
91eba41f C |
697 | (match unwrap_unfold_env typ2 with |
698 | | FunctionType (ret, params) -> make_info_def ret | |
699 | | _ -> Type_c.noTypeHere | |
700 | ) | |
701 | | _ -> Type_c.noTypeHere | |
702 | ) | |
ae4735db | 703 | | None -> |
91eba41f C |
704 | |
705 | (match lookup_opt_env lookup_macro s with | |
ae4735db | 706 | | Some ((defkind, defval), _nextenv) -> |
91eba41f | 707 | (match defkind, defval with |
ae4735db | 708 | | DefineFunc _, DefineExpr e -> |
91eba41f C |
709 | let rettype = Ast_c.get_onlytype_expr e in |
710 | ||
711 | (* todo: could also set type for ident ? | |
712 | have return type and at least type of concrete | |
713 | parameters so can generate a fake FunctionType | |
714 | *) | |
ae4735db | 715 | let macrotype_opt = |
91eba41f C |
716 | Type_c.fake_function_type rettype args |
717 | in | |
718 | ||
ae4735db | 719 | macrotype_opt +> Common.do_option (fun t -> |
91eba41f C |
720 | pr2 ("Type_annotater: generate fake function type" ^ |
721 | "for macro: " ^ s); | |
722 | let tyinfo = make_info_def_fix t in | |
723 | Ast_c.set_type_expr e1 tyinfo; | |
724 | ); | |
725 | ||
726 | Ast_c.get_type_expr e | |
ae4735db | 727 | | DefineVar, _ -> |
91eba41f C |
728 | pr2 ("Type_annoter: not a macro-func: " ^ s); |
729 | Type_c.noTypeHere | |
3a314143 C |
730 | | Undef, _ -> |
731 | pr2 ("Type_annoter: not a macro-func: " ^ s); | |
732 | Type_c.noTypeHere | |
ae4735db | 733 | | DefineFunc _, _ -> |
3a314143 | 734 | (* normally the FunCall case should have caught it *) |
91eba41f C |
735 | pr2 ("Type_annoter: not a macro-func-expr: " ^ s); |
736 | Type_c.noTypeHere | |
737 | ) | |
ae4735db | 738 | | None -> |
0708f913 | 739 | pr2_once ("type_annotater: no type for function ident: " ^ s); |
91eba41f C |
740 | Type_c.noTypeHere |
741 | ) | |
742 | ) | |
34e49164 | 743 | |
34e49164 | 744 | |
ae4735db | 745 | | _e -> |
91eba41f | 746 | k expr; |
ae4735db C |
747 | |
748 | (Ast_c.get_type_expr e1) +> Type_c.do_with_type (fun typ -> | |
749 | (* copy paste of above *) | |
91eba41f C |
750 | (match unwrap_unfold_env typ with |
751 | | FunctionType (ret, params) -> make_info_def ret | |
ae4735db | 752 | | Pointer (typ) -> |
91eba41f C |
753 | (match unwrap_unfold_env typ with |
754 | | FunctionType (ret, params) -> make_info_def ret | |
755 | | _ -> Type_c.noTypeHere | |
756 | ) | |
757 | | _ -> Type_c.noTypeHere | |
34e49164 | 758 | ) |
91eba41f | 759 | ) |
708f4980 | 760 | ) |
91eba41f C |
761 | |
762 | ||
763 | (* -------------------------------------------------- *) | |
ae4735db | 764 | | Ident (ident) -> |
b1b2de81 | 765 | let s = Ast_c.str_of_name ident in |
91eba41f | 766 | (match lookup_opt_env lookup_var s with |
993936c0 | 767 | | Some ((typ,local),_nextenv) -> make_info_fix (typ,local) |
ae4735db | 768 | | None -> |
91eba41f | 769 | (match lookup_opt_env lookup_macro s with |
ae4735db | 770 | | Some ((defkind, defval), _nextenv) -> |
91eba41f | 771 | (match defkind, defval with |
ae4735db | 772 | | DefineVar, DefineExpr e -> |
91eba41f | 773 | Ast_c.get_type_expr e |
ae4735db | 774 | | DefineVar, _ -> |
91eba41f C |
775 | pr2 ("Type_annoter: not a expression: " ^ s); |
776 | Type_c.noTypeHere | |
ae4735db | 777 | | DefineFunc _, _ -> |
91eba41f C |
778 | (* normally the FunCall case should have catch it *) |
779 | pr2 ("Type_annoter: not a macro-var: " ^ s); | |
780 | Type_c.noTypeHere | |
3a314143 C |
781 | | Undef, _ -> |
782 | pr2 ("Type_annoter: not a expression: " ^ s); | |
783 | Type_c.noTypeHere | |
91eba41f | 784 | ) |
ae4735db | 785 | | None -> |
91eba41f | 786 | (match lookup_opt_env lookup_enum s with |
ae4735db C |
787 | | Some (_, _nextenv) -> |
788 | make_info_def (type_of_s "int") | |
789 | | None -> | |
91eba41f | 790 | if not (s =~ "[A-Z_]+") (* if macro then no warning *) |
ae4735db C |
791 | then |
792 | if !Flag_parsing_c.check_annotater then | |
91eba41f | 793 | if not (Hashtbl.mem !_notyped_var s) |
ae4735db | 794 | then begin |
002099fc | 795 | pr2 ("Type_annoter: no type found for: " ^ s); |
91eba41f C |
796 | Hashtbl.add !_notyped_var s true; |
797 | end | |
798 | else () | |
ae4735db | 799 | else |
002099fc | 800 | pr2 ("Type_annoter: no type found for: " ^ s) |
91eba41f C |
801 | ; |
802 | Type_c.noTypeHere | |
803 | ) | |
804 | ) | |
805 | ) | |
ae4735db | 806 | |
91eba41f C |
807 | (* -------------------------------------------------- *) |
808 | (* C isomorphism on type on array and pointers *) | |
809 | | Unary (e, DeRef) | |
810 | | ArrayAccess (e, _) -> | |
811 | k expr; (* recurse to set the types-ref of sub expressions *) | |
812 | ||
ae4735db | 813 | (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t -> |
91eba41f C |
814 | (* todo: maybe not good env !! *) |
815 | match unwrap_unfold_env t with | |
ae4735db C |
816 | | Pointer x |
817 | | Array (_, x) -> | |
91eba41f C |
818 | make_info_def_fix x |
819 | | _ -> Type_c.noTypeHere | |
820 | ||
821 | ) | |
822 | ||
ae4735db | 823 | | Unary (e, GetRef) -> |
91eba41f C |
824 | k expr; (* recurse to set the types-ref of sub expressions *) |
825 | ||
ae4735db C |
826 | (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t -> |
827 | (* must generate an element so that '=' can be used | |
91eba41f C |
828 | * to compare type ? |
829 | *) | |
830 | let fake = Ast_c.fakeInfo Common.fake_parse_info in | |
831 | let fake = Ast_c.rewrap_str "*" fake in | |
ae4735db | 832 | |
708f4980 | 833 | let ft = Ast_c.mk_ty (Pointer t) [fake] in |
91eba41f C |
834 | make_info_def_fix ft |
835 | ) | |
836 | ||
91eba41f C |
837 | (* -------------------------------------------------- *) |
838 | (* fields *) | |
ae4735db C |
839 | | RecordAccess (e, namefld) |
840 | | RecordPtAccess (e, namefld) as x -> | |
b1b2de81 | 841 | let fld = Ast_c.str_of_name namefld in |
34e49164 | 842 | |
91eba41f | 843 | k expr; (* recurse to set the types-ref of sub expressions *) |
91eba41f | 844 | |
ae4735db C |
845 | (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t -> |
846 | ||
847 | let topt = | |
91eba41f C |
848 | match x with |
849 | | RecordAccess _ -> Some t | |
ae4735db C |
850 | | RecordPtAccess _ -> |
851 | (match unwrap_unfold_env t with | |
91eba41f C |
852 | | Pointer (t) -> Some t |
853 | | _ -> None | |
34e49164 | 854 | ) |
91eba41f | 855 | | _ -> raise Impossible |
ae4735db | 856 | |
91eba41f C |
857 | in |
858 | (match topt with | |
859 | | None -> Type_c.noTypeHere | |
ae4735db | 860 | | Some t -> |
91eba41f | 861 | match unwrap_unfold_env t with |
ae4735db C |
862 | | StructUnion (su, sopt, fields) -> |
863 | (try | |
91eba41f | 864 | (* todo: which env ? *) |
ae4735db | 865 | make_info_def_fix |
91eba41f | 866 | (Type_c.type_field fld (su, fields)) |
ae4735db C |
867 | with |
868 | | Not_found -> | |
869 | pr2 (spf | |
91eba41f C |
870 | "TYPE-ERROR: field '%s' does not belong in struct %s" |
871 | fld (match sopt with Some s -> s |_ -> "<anon>")); | |
872 | Type_c.noTypeHere | |
ae4735db | 873 | | Multi_found -> |
91eba41f C |
874 | pr2 "TAC:MultiFound"; |
875 | Type_c.noTypeHere | |
876 | ) | |
877 | | _ -> Type_c.noTypeHere | |
34e49164 | 878 | ) |
91eba41f | 879 | ) |
ae4735db | 880 | |
91eba41f C |
881 | |
882 | ||
883 | (* -------------------------------------------------- *) | |
ae4735db | 884 | | Cast (t, e) -> |
91eba41f C |
885 | k expr; |
886 | (* todo: if infer, can "push" info ? add_types_expr [t] e ? *) | |
887 | make_info_def_fix (Lib.al_type t) | |
888 | ||
889 | (* todo? lub, hmm maybe not, cos type must be e1 *) | |
ae4735db C |
890 | | Assignment (e1, op, e2) -> |
891 | k expr; | |
c491d8ee C |
892 | (* value of an assignment is the value of the RHS expression, but its |
893 | type is the type of the lhs expression. Use the rhs exp if no | |
894 | information is available *) | |
895 | (match Ast_c.get_type_expr e1 with | |
896 | (None,_) -> Ast_c.get_type_expr e2 | |
897 | | (Some ty,t) -> (Some ty,t)) | |
ae4735db C |
898 | | Sequence (e1, e2) -> |
899 | k expr; | |
91eba41f | 900 | Ast_c.get_type_expr e2 |
ae4735db C |
901 | |
902 | | Binary (e1, Logical _, e2) -> | |
708f4980 C |
903 | k expr; |
904 | make_info_def (type_of_s "int") | |
905 | ||
91eba41f | 906 | (* todo: lub *) |
ae4735db | 907 | | Binary (e1, Arith op, e2) -> |
91eba41f | 908 | k expr; |
708f4980 | 909 | Type_c.lub op (Type_c.get_opt_type e1) (Type_c.get_opt_type e2) |
91eba41f | 910 | |
ae4735db | 911 | | CondExpr (cond, e1opt, e2) -> |
91eba41f C |
912 | k expr; |
913 | Ast_c.get_type_expr e2 | |
34e49164 | 914 | |
34e49164 | 915 | |
ae4735db | 916 | | ParenExpr e -> |
91eba41f C |
917 | k expr; |
918 | Ast_c.get_type_expr e | |
919 | ||
ae4735db | 920 | | Infix (e, op) | Postfix (e, op) -> |
91eba41f C |
921 | k expr; |
922 | Ast_c.get_type_expr e | |
ae4735db | 923 | |
91eba41f | 924 | (* pad: julia wrote this ? *) |
ae4735db | 925 | | Unary (e, UnPlus) -> |
91eba41f C |
926 | k expr; (* recurse to set the types-ref of sub expressions *) |
927 | make_info_def (type_of_s "int") | |
928 | (* todo? can convert from unsigned to signed if UnMinus ? *) | |
ae4735db | 929 | | Unary (e, UnMinus) -> |
91eba41f C |
930 | k expr; (* recurse to set the types-ref of sub expressions *) |
931 | make_info_def (type_of_s "int") | |
ae4735db C |
932 | |
933 | | SizeOfType _|SizeOfExpr _ -> | |
91eba41f | 934 | k expr; (* recurse to set the types-ref of sub expressions *) |
1eddfd50 | 935 | make_info_def (type_of_s "size_t") |
ae4735db C |
936 | |
937 | | Constructor (ft, ini) -> | |
91eba41f C |
938 | k expr; (* recurse to set the types-ref of sub expressions *) |
939 | make_info_def (Lib.al_type ft) | |
ae4735db C |
940 | |
941 | | Unary (e, Not) -> | |
91eba41f | 942 | k expr; (* recurse to set the types-ref of sub expressions *) |
5636bb2c C |
943 | (* the result of ! is always 0 or 1, not the argument type *) |
944 | make_info_def (type_of_s "int") | |
ae4735db | 945 | | Unary (e, Tilde) -> |
91eba41f C |
946 | k expr; (* recurse to set the types-ref of sub expressions *) |
947 | Ast_c.get_type_expr e | |
ae4735db | 948 | |
91eba41f C |
949 | (* -------------------------------------------------- *) |
950 | (* todo *) | |
ae4735db | 951 | | Unary (_, GetRefLabel) -> |
91eba41f C |
952 | k expr; (* recurse to set the types-ref of sub expressions *) |
953 | pr2_once "Type annotater:not handling GetRefLabel"; | |
954 | Type_c.noTypeHere | |
955 | (* todo *) | |
ae4735db | 956 | | StatementExpr _ -> |
91eba41f | 957 | k expr; (* recurse to set the types-ref of sub expressions *) |
951c7801 | 958 | pr2_once "Type annotater:not handling StatementExpr"; |
91eba41f C |
959 | Type_c.noTypeHere |
960 | (* | |
961 | | _ -> k expr; Type_c.noTypeHere | |
962 | *) | |
ae4735db | 963 | |
f59c9fb7 C |
964 | | New ty -> |
965 | k expr; | |
966 | pr2_once "Type annotater:not handling New"; | |
967 | Type_c.noTypeHere (* TODO *) | |
968 | ||
4dfbc1c2 C |
969 | | Delete e -> |
970 | k expr; | |
971 | pr2_once "Type annotater:not handling Delete"; | |
972 | Type_c.noTypeHere (* TODO *) | |
973 | ||
91eba41f C |
974 | in |
975 | Ast_c.set_type_expr expr ty | |
976 | ||
977 | ) | |
978 | ||
ae4735db | 979 | |
91eba41f C |
980 | (*****************************************************************************) |
981 | (* Visitor *) | |
982 | (*****************************************************************************) | |
983 | ||
984 | (* Processing includes that were added after a cpp_ast_c makes the | |
985 | * type annotater quite slow, especially when the depth of cpp_ast_c is | |
986 | * big. But for such includes the only thing we really want is to modify | |
987 | * the environment to have enough type information. We don't need | |
988 | * to type the expressions inside those includes (they will be typed | |
ae4735db | 989 | * when we process the include file directly). Here the goal is |
91eba41f | 990 | * to not recurse. |
ae4735db | 991 | * |
91eba41f C |
992 | * Note that as usually header files contain mostly structure |
993 | * definitions and defines, that means we still have to do lots of work. | |
ae4735db | 994 | * We only win on function definition bodies, but usually header files |
91eba41f C |
995 | * have just prototypes, or inline function definitions which anyway have |
996 | * usually a small body. But still, we win. It also makes clearer | |
997 | * that when processing include as we just need the environment, the caller | |
ae4735db | 998 | * of this module can do further optimisations such as memorising the |
91eba41f | 999 | * state of the environment after each header files. |
ae4735db C |
1000 | * |
1001 | * | |
91eba41f C |
1002 | * For sparse its makes the annotating speed goes from 9s to 4s |
1003 | * For Linux the speedup is even better, from ??? to ???. | |
ae4735db | 1004 | * |
91eba41f C |
1005 | * Because There would be some copy paste with annotate_program, it is |
1006 | * better to factorize code hence the just_add_in_env parameter below. | |
ae4735db | 1007 | * |
91eba41f C |
1008 | * todo? alternative optimisation for the include problem: |
1009 | * - processing all headers files one time and construct big env | |
1010 | * - use hashtbl for env (but apparently not biggest problem) | |
1011 | *) | |
ae4735db C |
1012 | |
1013 | let rec visit_toplevel ~just_add_in_env ~depth elem = | |
91eba41f C |
1014 | let need_annotate_body = not just_add_in_env in |
1015 | ||
ae4735db | 1016 | let bigf = { Visitor_c.default_visitor_c with |
91eba41f C |
1017 | |
1018 | (* ------------------------------------------------------------ *) | |
ae4735db | 1019 | Visitor_c.kcppdirective = (fun (k, bigf) directive -> |
91eba41f C |
1020 | match directive with |
1021 | (* do error messages for type annotater only for the real body of the | |
1022 | * file, not inside include. | |
1023 | *) | |
ae4735db C |
1024 | | Include {i_content = opt} -> |
1025 | opt +> Common.do_option (fun (filename, program) -> | |
1026 | Common.save_excursion Flag_parsing_c.verbose_type (fun () -> | |
91eba41f C |
1027 | Flag_parsing_c.verbose_type := false; |
1028 | ||
ae4735db | 1029 | (* old: Visitor_c.vk_program bigf program; |
91eba41f C |
1030 | * opti: set the just_add_in_env |
1031 | *) | |
ae4735db | 1032 | program +> List.iter (fun elem -> |
91eba41f | 1033 | visit_toplevel ~just_add_in_env:true ~depth:(depth+1) elem |
34e49164 | 1034 | ) |
91eba41f | 1035 | ) |
34e49164 | 1036 | ) |
91eba41f | 1037 | |
ae4735db | 1038 | | Define ((s,ii), (defkind, defval)) -> |
91eba41f C |
1039 | |
1040 | ||
1041 | (* even if we are in a just_add_in_env phase, such as when | |
ae4735db C |
1042 | * we process include, as opposed to the body of functions, |
1043 | * with macros we still to type the body of the macro as | |
91eba41f C |
1044 | * the macro has no type and so we infer its type from its |
1045 | * body (and one day later maybe from its use). | |
1046 | *) | |
1047 | (match defval with | |
1048 | (* can try to optimize and recurse only when the define body | |
ae4735db | 1049 | * is simple ? |
91eba41f C |
1050 | *) |
1051 | ||
ae4735db | 1052 | | DefineExpr expr -> |
951c7801 C |
1053 | (* prevent macro-declared variables from leaking out *) |
1054 | do_in_new_scope (fun () -> | |
1055 | if is_simple_expr expr | |
91eba41f | 1056 | (* even if not need_annotate_body, still recurse*) |
ae4735db C |
1057 | then k directive |
1058 | else | |
951c7801 C |
1059 | if need_annotate_body |
1060 | then k directive) | |
ae4735db | 1061 | | _ -> |
951c7801 C |
1062 | do_in_new_scope (fun () -> |
1063 | if need_annotate_body | |
1064 | then k directive) | |
91eba41f C |
1065 | ); |
1066 | ||
1067 | add_binding (Macro (s, (defkind, defval) )) true; | |
1068 | ||
91eba41f | 1069 | | PragmaAndCo _ -> () |
34e49164 C |
1070 | ); |
1071 | ||
91eba41f C |
1072 | (* ------------------------------------------------------------ *) |
1073 | (* main typer code *) | |
1074 | (* ------------------------------------------------------------ *) | |
1075 | Visitor_c.kexpr = annotater_expr_visitor_subpart; | |
1076 | ||
1077 | (* ------------------------------------------------------------ *) | |
ae4735db C |
1078 | Visitor_c.kstatement = (fun (k, bigf) st -> |
1079 | match Ast_c.unwrap_st st with | |
708f4980 | 1080 | | Compound statxs -> do_in_new_scope (fun () -> k st); |
34e49164 | 1081 | | _ -> k st |
34e49164 | 1082 | ); |
91eba41f | 1083 | (* ------------------------------------------------------------ *) |
ae4735db | 1084 | Visitor_c.kdecl = (fun (k, bigf) d -> |
34e49164 | 1085 | (match d with |
ae4735db | 1086 | | (DeclList (xs, ii)) -> |
485bce71 | 1087 | xs +> List.iter (fun ({v_namei = var; v_type = t; |
978fd7e5 C |
1088 | v_storage = sto; v_local = local} as x |
1089 | , iicomma) -> | |
485bce71 | 1090 | |
34e49164 C |
1091 | (* to add possible definition in type found in Decl *) |
1092 | Visitor_c.vk_type bigf t; | |
91eba41f C |
1093 | |
1094 | ||
1095 | let local = | |
8babbc8f C |
1096 | match (sto,local) with |
1097 | | (_,Ast_c.NotLocalDecl) -> Ast_c.NotLocalVar | |
1098 | | ((Ast_c.Sto Ast_c.Static, _), Ast_c.LocalDecl) -> | |
97111a47 C |
1099 | (match Ast_c.info_of_type t with |
1100 | (* if there is no info about the type it must not be | |
1101 | present, so we don't know what the variable is *) | |
1102 | None -> Ast_c.NotLocalVar | |
1103 | | Some ii -> Ast_c.StaticLocalVar ii) | |
1104 | | (_,Ast_c.LocalDecl) -> | |
1105 | (match Ast_c.info_of_type t with | |
1106 | (* if there is no info about the type it must not be | |
1107 | present, so we don't know what the variable is *) | |
1108 | None -> Ast_c.NotLocalVar | |
1109 | | Some ii -> Ast_c.LocalVar ii) | |
91eba41f | 1110 | in |
ae4735db | 1111 | var +> Common.do_option (fun (name, iniopt) -> |
b1b2de81 C |
1112 | let s = Ast_c.str_of_name name in |
1113 | ||
ae4735db C |
1114 | match sto with |
1115 | | StoTypedef, _inline -> | |
34e49164 | 1116 | add_binding (TypeDef (s,Lib.al_type t)) true; |
ae4735db | 1117 | | _ -> |
34e49164 | 1118 | add_binding (VarOrFunc (s, (Lib.al_type t, local))) true; |
91eba41f | 1119 | |
ae4735db | 1120 | x.v_type_bis := |
978fd7e5 | 1121 | Some (typedef_fix (Lib.al_type t) !_scoped_env); |
91eba41f C |
1122 | |
1123 | if need_annotate_body then begin | |
1124 | (* int x = sizeof(x) is legal so need process ini *) | |
4dfbc1c2 C |
1125 | match iniopt with |
1126 | Ast_c.NoInit -> () | |
1127 | | Ast_c.ValInit(iini,init) -> Visitor_c.vk_ini bigf init | |
1128 | | Ast_c.ConstrInit((args,_)) -> | |
1129 | args +> List.iter (fun (e,ii) -> | |
1130 | Visitor_c.vk_argument bigf e | |
1131 | ) | |
91eba41f | 1132 | end |
34e49164 C |
1133 | ); |
1134 | ); | |
17ba0788 | 1135 | | MacroDecl _ | MacroDeclInit _ -> |
91eba41f C |
1136 | if need_annotate_body |
1137 | then k d | |
34e49164 | 1138 | ); |
ae4735db | 1139 | |
34e49164 C |
1140 | ); |
1141 | ||
91eba41f | 1142 | (* ------------------------------------------------------------ *) |
ae4735db C |
1143 | Visitor_c.ktype = (fun (k, bigf) typ -> |
1144 | (* bugfix: have a 'Lib.al_type typ' before, but because we can | |
91eba41f | 1145 | * have enum with possible expression, we don't want to change |
ae4735db | 1146 | * the ref of abstract-lined types, but the real one, so |
91eba41f C |
1147 | * don't al_type here |
1148 | *) | |
708f4980 | 1149 | let (_q, tbis) = typ in |
ae4735db C |
1150 | match Ast_c.unwrap_typeC typ with |
1151 | | StructUnion (su, Some s, structType) -> | |
1152 | let structType' = Lib.al_fields structType in | |
708f4980 | 1153 | let ii = Ast_c.get_ii_typeC_take_care tbis in |
91eba41f C |
1154 | let ii' = Lib.al_ii ii in |
1155 | add_binding (StructUnionNameDef (s, ((su, structType'),ii'))) true; | |
1156 | ||
1157 | if need_annotate_body | |
1158 | then k typ (* todo: restrict ? new scope so use do_in_scope ? *) | |
1159 | ||
ae4735db | 1160 | | Enum (sopt, enums) -> |
91eba41f | 1161 | |
ae4735db | 1162 | enums +> List.iter (fun ((name, eopt), iicomma) -> |
b1b2de81 C |
1163 | |
1164 | let s = Ast_c.str_of_name name in | |
91eba41f C |
1165 | |
1166 | if need_annotate_body | |
ae4735db | 1167 | then eopt +> Common.do_option (fun (ieq, e) -> |
91eba41f C |
1168 | Visitor_c.vk_expr bigf e |
1169 | ); | |
1170 | add_binding (EnumConstant (s, sopt)) true; | |
1171 | ); | |
34e49164 C |
1172 | |
1173 | ||
1174 | (* TODO: if have a TypeName, then maybe can fill the option | |
1175 | * information. | |
1176 | *) | |
ae4735db | 1177 | | _ -> |
91eba41f C |
1178 | if need_annotate_body |
1179 | then k typ | |
ae4735db C |
1180 | |
1181 | ); | |
34e49164 | 1182 | |
91eba41f | 1183 | (* ------------------------------------------------------------ *) |
ae4735db | 1184 | Visitor_c.ktoplevel = (fun (k, bigf) elem -> |
34e49164 C |
1185 | _notyped_var := Hashtbl.create 100; |
1186 | match elem with | |
ae4735db | 1187 | | Definition def -> |
b1b2de81 | 1188 | let {f_name = name; |
485bce71 C |
1189 | f_type = ((returnt, (paramst, b)) as ftyp); |
1190 | f_storage = sto; | |
91eba41f C |
1191 | f_body = statxs; |
1192 | f_old_c_style = oldstyle; | |
ae4735db | 1193 | },ii |
91eba41f | 1194 | = def |
34e49164 | 1195 | in |
ae4735db C |
1196 | let (i1, i2) = |
1197 | match ii with | |
1198 | (* what is iifunc1? it should be a type. jll | |
002099fc C |
1199 | * pad: it's the '(' in the function definition. The |
1200 | * return type is part of f_type. | |
1201 | *) | |
ae4735db | 1202 | | iifunc1::iifunc2::ibrace1::ibrace2::ifakestart::isto -> |
34e49164 C |
1203 | iifunc1, iifunc2 |
1204 | | _ -> raise Impossible | |
1205 | in | |
b1b2de81 | 1206 | let funcs = Ast_c.str_of_name name in |
91eba41f | 1207 | |
ae4735db C |
1208 | (match oldstyle with |
1209 | | None -> | |
1210 | let typ' = | |
708f4980 | 1211 | Lib.al_type (Ast_c.mk_ty (FunctionType ftyp) [i1;i2]) in |
91eba41f | 1212 | |
ae4735db | 1213 | add_binding (VarOrFunc (funcs, (typ',islocal i1.Ast_c.pinfo))) |
91eba41f C |
1214 | false; |
1215 | ||
1216 | if need_annotate_body then | |
ae4735db C |
1217 | do_in_new_scope (fun () -> |
1218 | paramst +> List.iter (fun ({p_namei= nameopt; p_type= t},_)-> | |
1219 | match nameopt with | |
b1b2de81 C |
1220 | | Some name -> |
1221 | let s = Ast_c.str_of_name name in | |
97111a47 C |
1222 | let local = |
1223 | (match Ast_c.info_of_type t with | |
1224 | (* if there is no info about the type it must | |
1225 | not be present, so we don't know what the | |
1226 | variable is *) | |
1227 | None -> Ast_c.NotLocalVar | |
1228 | | Some ii -> Ast_c.LocalVar ii) | |
1229 | in | |
91eba41f | 1230 | add_binding (VarOrFunc (s,(Lib.al_type t,local))) true |
ae4735db | 1231 | | None -> |
91eba41f C |
1232 | pr2 "no type, certainly because Void type ?" |
1233 | ); | |
1234 | (* recurse *) | |
1235 | k elem | |
1236 | ); | |
ae4735db | 1237 | | Some oldstyle -> |
91eba41f C |
1238 | (* generate regular function type *) |
1239 | ||
ae4735db | 1240 | pr2 "TODO generate type for function"; |
91eba41f C |
1241 | (* add bindings *) |
1242 | if need_annotate_body then | |
ae4735db | 1243 | do_in_new_scope (fun () -> |
91eba41f | 1244 | (* recurse. should naturally call the kdecl visitor and |
ae4735db | 1245 | * add binding |
91eba41f C |
1246 | *) |
1247 | k elem; | |
1248 | ); | |
1249 | ||
34e49164 | 1250 | ); |
ae4735db | 1251 | | CppTop x -> |
708f4980 | 1252 | (match x with |
ae4735db | 1253 | | Define ((s,ii), (DefineVar, DefineType t)) -> |
708f4980 C |
1254 | add_binding (TypeDef (s,Lib.al_type t)) true; |
1255 | | _ -> k elem | |
ae4735db | 1256 | ) |
708f4980 | 1257 | |
91eba41f C |
1258 | | Declaration _ |
1259 | ||
708f4980 C |
1260 | |
1261 | ||
91eba41f | 1262 | | IfdefTop _ |
ae4735db | 1263 | | MacroTop _ |
91eba41f C |
1264 | | EmptyDef _ |
1265 | | NotParsedCorrectly _ | |
1266 | | FinalDef _ | |
ae4735db | 1267 | -> |
91eba41f | 1268 | k elem |
34e49164 | 1269 | ); |
ae4735db | 1270 | } |
34e49164 | 1271 | in |
91eba41f | 1272 | if just_add_in_env |
ae4735db C |
1273 | then |
1274 | if depth > 1 | |
91eba41f | 1275 | then Visitor_c.vk_toplevel bigf elem |
ae4735db C |
1276 | else |
1277 | Common.profile_code "TAC.annotate_only_included" (fun () -> | |
91eba41f C |
1278 | Visitor_c.vk_toplevel bigf elem |
1279 | ) | |
1280 | else Visitor_c.vk_toplevel bigf elem | |
1281 | ||
1282 | (*****************************************************************************) | |
1283 | (* Entry point *) | |
1284 | (*****************************************************************************) | |
1285 | (* catch all the decl to grow the environment *) | |
1286 | ||
1287 | ||
ae4735db | 1288 | let rec (annotate_program2 : |
91eba41f C |
1289 | environment -> toplevel list -> (toplevel * environment Common.pair) list) = |
1290 | fun env prog -> | |
1291 | ||
ae4735db | 1292 | (* globals (re)initialialisation *) |
91eba41f C |
1293 | _scoped_env := env; |
1294 | _notyped_var := (Hashtbl.create 100); | |
34e49164 | 1295 | |
ae4735db | 1296 | prog +> List.map (fun elem -> |
34e49164 | 1297 | let beforeenv = !_scoped_env in |
91eba41f | 1298 | visit_toplevel ~just_add_in_env:false ~depth:0 elem; |
34e49164 C |
1299 | let afterenv = !_scoped_env in |
1300 | (elem, (beforeenv, afterenv)) | |
1301 | ) | |
1302 | ||
91eba41f C |
1303 | |
1304 | ||
1305 | ||
1306 | (*****************************************************************************) | |
1307 | (* Annotate test *) | |
1308 | (*****************************************************************************) | |
1309 | ||
ae4735db | 1310 | (* julia: for coccinelle *) |
34e49164 C |
1311 | let annotate_test_expressions prog = |
1312 | let rec propagate_test e = | |
1313 | let ((e_term,info),_) = e in | |
1314 | let (ty,_) = !info in | |
1315 | info := (ty,Test); | |
1316 | match e_term with | |
1317 | Binary(e1,Logical(AndLog),e2) | |
1318 | | Binary(e1,Logical(OrLog),e2) -> propagate_test e1; propagate_test e2 | |
1319 | | Unary(e1,Not) -> propagate_test e1 | |
1320 | | ParenExpr(e) -> propagate_test e | |
3a314143 C |
1321 | | FunCall(e,args) -> (* not very nice, but so painful otherwise *) |
1322 | (match (unwrap e,args) with | |
1323 | ((Ident(i),_),[(Left a,_)]) -> | |
1324 | let nm = str_of_name i in | |
1325 | if List.mem nm ["likely";"unlikely"] | |
1326 | then propagate_test a | |
1327 | else () | |
1328 | | _ -> ()) | |
34e49164 C |
1329 | | _ -> () in |
1330 | ||
1331 | let bigf = { Visitor_c.default_visitor_c with | |
1332 | Visitor_c.kexpr = (fun (k,bigf) expr -> | |
708f4980 C |
1333 | (match unwrap_expr expr with |
1334 | CondExpr(e,_,_) -> propagate_test e | |
d6a55602 C |
1335 | | Binary(e1,Logical(AndLog),e2) |
1336 | | Binary(e1,Logical(OrLog),e2) -> propagate_test e1; propagate_test e2 | |
1337 | | Unary(e1,Not) -> propagate_test e1 | |
1338 | | _ -> () | |
91eba41f C |
1339 | ); |
1340 | k expr | |
1341 | ); | |
34e49164 | 1342 | Visitor_c.kstatement = (fun (k, bigf) st -> |
ae4735db | 1343 | match unwrap_st st with |
34e49164 C |
1344 | Selection(s) -> |
1345 | (match s with If(e1,s1,s2) -> propagate_test e1 | _ -> ()); | |
1346 | k st; | |
1347 | | Iteration(i) -> | |
1348 | (match i with | |
1349 | While(e,s) -> propagate_test e | |
1350 | | DoWhile(s,e) -> propagate_test e | |
1351 | | For(_,es,_,_) -> | |
1352 | (match unwrap es with Some e -> propagate_test e | None -> ()) | |
1353 | | _ -> ()); | |
1354 | k st | |
91eba41f | 1355 | | _ -> k st |
ae4735db | 1356 | ) |
91eba41f | 1357 | } in |
ae4735db | 1358 | (prog +> List.iter (fun elem -> |
34e49164 C |
1359 | Visitor_c.vk_toplevel bigf elem |
1360 | )) | |
1361 | ||
91eba41f C |
1362 | |
1363 | ||
1364 | (*****************************************************************************) | |
1365 | (* Annotate types *) | |
1366 | (*****************************************************************************) | |
ae4735db | 1367 | let annotate_program env prog = |
91eba41f C |
1368 | Common.profile_code "TAC.annotate_program" |
1369 | (fun () -> | |
1370 | let res = annotate_program2 env prog in | |
34e49164 | 1371 | annotate_test_expressions prog; |
91eba41f C |
1372 | res |
1373 | ) | |
1374 | ||
ae4735db | 1375 | let annotate_type_and_localvar env prog = |
91eba41f C |
1376 | Common.profile_code "TAC.annotate_type" |
1377 | (fun () -> annotate_program2 env prog) | |
1378 | ||
1379 | ||
1380 | (*****************************************************************************) | |
1381 | (* changing default typing environment, do concatenation *) | |
ae4735db | 1382 | let init_env filename = |
91eba41f C |
1383 | pr2 ("init_env: " ^ filename); |
1384 | let (ast2, _stat) = Parse_c.parse_c_and_cpp filename in | |
1385 | let ast = Parse_c.program_of_program2 ast2 in | |
1386 | ||
1387 | let res = annotate_type_and_localvar !initial_env ast in | |
1388 | match List.rev res with | |
1389 | | [] -> pr2 "empty environment" | |
ae4735db | 1390 | | (_top,(env1,env2))::xs -> |
91eba41f C |
1391 | initial_env := !initial_env ++ env2; |
1392 | () | |
1393 |