Release coccinelle-0.1.8
[bpt/coccinelle.git] / parsing_c / type_annoter_c.ml
CommitLineData
0708f913
C
1(* Yoann Padioleau
2 *
3 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes,
4 * Copyright (C) 2009 University of Urbana Champaign
34e49164
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
91eba41f 15
34e49164
C
16open Common
17
18open Ast_c
19
20module Lib = Lib_parsing_c
21
22(*****************************************************************************)
91eba41f
C
23(* Prelude *)
24(*****************************************************************************)
25(* History:
26 * - Done a first type checker in 2002, cf typing-semantic/, but
27 * was assuming that have all type info, and so was assuming had called
28 * cpp and everything was right.
29 * - Wrote this file, in 2006?, as we added pattern matching on type
30 * in coccinelle. Partial type annotater.
31 * - Julia extended it in 2008? to have localvar/notlocalvar and
32 * test/notest information, again used by coccinelle.
0708f913 33 * - I extended it in Fall 2008 to have more type information for the
91eba41f
C
34 * global analysis. I also added some optimisations to process
35 * included code faster.
36 *
37 *
38 * Design choices. Can either do:
39 * - a kind of inferer
40 * - can first do a simple inferer, that just pass context
41 * - then a real inferer, managing partial info.
34e49164
C
42 * type context = fullType option
43 *
44 * - extract the information from the .h files
91eba41f
C
45 * (so no inference at all needed)
46 *
47 * Difference with julia's code in parsing_cocci/type_infer.ml:
48 * - She handles just the variable namespace. She does not type
49 * field access or enum or macros. This is because cocci programs are
50 * usually simple and have no structure definition or macro definitions
51 * that we need to type anyway.
52 * - She does more propagation.
53 * - She does not have to handle the typedef isomorphism which force me
54 * to use those typedef_fix and type_unfold_one_step
55 * - She does not handle I think the function pointer C isomorphism.
56 *
57 * - She has a cleaner type_cocci without any info. In my case
58 * I need to do those ugly al_type, or generate fake infos.
59 * - She has more compact code. Perhaps because she does not have to
60 * handle the extra exp_info that she added on me :) So I need those
61 * do_with_type, make_info_xxx, etc.
62 *
63 * Note: if need to debug this annotater, use -show_trace_profile, it can
64 * help. You can also set the typedef_debug flag below.
65 *
66 *
34e49164
C
67 *
68 * todo: expression contain types, and statements, which in turn can contain
69 * expression, so need recurse. Need define an annote_statement and
70 * annotate_type.
91eba41f 71 *
34e49164
C
72 * todo: how deal with typedef isomorphisms ? How store them in Ast_c ?
73 * store all posible variations in ast_c ? a list of type instead of just
74 * the type ?
75 *
0708f913
C
76 * todo: how to handle multiple possible definitions for entities like
77 * struct or typedefs ? Because of ifdef, we should store list of
78 * possibilities sometimes.
79 *
34e49164
C
80 * todo: define a new type ? like type_cocci ? where have a bool ?
81 *
91eba41f 82 * semi: How handle scope ? When search for type of field, we return
34e49164
C
83 * a type, but this type makes sense only in a certain scope.
84 * We could add a tag to each typedef, structUnionName to differentiate
85 * them and also associate in ast_c to the type the scope
86 * of this type, the env that were used to define this type.
91eba41f
C
87 *
88 * todo: handle better the search in previous env, the env'. Cf the
89 * termination problem in typedef_fix when I was searching in the same
90 * env.
91 *
34e49164
C
92 *)
93
94(*****************************************************************************)
95(* Wrappers *)
96(*****************************************************************************)
708f4980 97let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_type
0708f913 98
34e49164
C
99(*****************************************************************************)
100(* Environment *)
101(*****************************************************************************)
102
91eba41f 103(* The different namespaces from stdC manual:
34e49164
C
104 *
105 * You introduce two new name spaces with every block that you write.
91eba41f
C
106 *
107 * One name space includes all
108 * - functions,
109 * - objects,
110 * - type definitions,
111 * - and enumeration constants
112 * that you declare or define within the block.
113 *
114 * The other name space includes all
115 * - enumeration,
116 * - structure,
117 * - and union
118 * *tags* that you define within the block.
34e49164
C
119 *
120 * You introduce a new member name space with every structure or union
121 * whose content you define. You identify a member name space by the
122 * type of left operand that you write for a member selection
123 * operator, as in x.y or p->y. A member name space ends with the end
124 * of the block in which you declare it.
125 *
126 * You introduce a new goto label name space with every function
127 * definition you write. Each goto label name space ends with its
128 * function definition.
129 *)
130
131(* But I don't try to do a type-checker, I try to "resolve" type of var
132 * so don't need make difference between namespaces here.
133 *
134 * But, why not make simply a (string, kindstring) assoc ?
135 * Because we dont want that a variable shadow a struct definition, because
136 * they are still in 2 different namespace. But could for typedef,
137 * because VarOrFunc and Typedef are in the same namespace.
138 * But could do a record as in c_info.ml
139 *)
140
141
91eba41f
C
142(* This type contains all "ident" like notion of C. Each time in Ast_c
143 * you have a string type (as in expression, function name, fields)
144 * then you need to manage the scope of this ident.
145 *
146 * The wrap for StructUnionNameDef contain the whole ii, the i for
147 * the string, the structUnion and the structType.
148 *
149 * Put Macro here ? after all the scoping rules for cpp macros is different
150 * and so does not vanish after the closing '}'.
151 *
152 * todo: EnumDef
34e49164
C
153 *)
154type namedef =
155 | VarOrFunc of string * Ast_c.exp_type
91eba41f
C
156 | EnumConstant of string * string option
157
708f4980 158 (* also used for macro type aliases *)
91eba41f
C
159 | TypeDef of string * fullType
160 (* the structType contains nested "idents" with struct scope *)
34e49164 161 | StructUnionNameDef of string * (structUnion * structType) wrap
34e49164 162
91eba41f 163 (* cppext: *)
b1b2de81 164 | Macro of string * (define_kind * define_val)
91eba41f
C
165
166
167(* Because have nested scope, have nested list, hence the list list.
168 *
169 * opti? use a hash to accelerate ? hmm but may have some problems
170 * with hash to handle recursive lookup. For instance for the typedef
171 * example where have mutually recursive definition of the type,
172 * we must take care to not loop by starting the second search
173 * from the previous environment. With the list scheme in
174 * lookup_env below it's quite easy to do. With hash it may be
175 * more complicated.
176*)
34e49164
C
177type environment = namedef list list
178
91eba41f
C
179
180(* ------------------------------------------------------------ *)
181(* can be modified by the init_env function below, by
182 * the file environment_unix.h
183 *)
184let initial_env = ref [
185 [VarOrFunc("NULL",
186 (Lib.al_type (Parse_c.type_of_string "void *"),
187 Ast_c.NotLocalVar));
188
189 (*
190 VarOrFunc("malloc",
191 (Lib.al_type(Parse_c.type_of_string "void* (*)(int size)"),
192 Ast_c.NotLocalVar));
193 VarOrFunc("free",
194 (Lib.al_type(Parse_c.type_of_string "void (*)(void *ptr)"),
195 Ast_c.NotLocalVar));
196 *)
197 ]
34e49164
C
198]
199
200
91eba41f
C
201let typedef_debug = ref false
202
203
204(* ------------------------------------------------------------ *)
205(* generic, lookup and also return remaining env for further lookup *)
206let rec lookup_env2 f env =
34e49164
C
207 match env with
208 | [] -> raise Not_found
91eba41f 209 | []::zs -> lookup_env2 f zs
34e49164 210 | (x::xs)::zs ->
91eba41f
C
211 (match f x with
212 | None -> lookup_env2 f (xs::zs)
34e49164 213 | Some y -> y, xs::zs
91eba41f
C
214 )
215let lookup_env a b =
216 Common.profile_code "TAC.lookup_env" (fun () -> lookup_env2 a b)
217
218
219
220let member_env lookupf env =
221 try
222 let _ = lookupf env in
223 true
224 with Not_found -> false
225
226
227
228
229(* ------------------------------------------------------------ *)
34e49164 230
34e49164
C
231
232let lookup_var s env =
233 let f = function
b1b2de81 234 | VarOrFunc (s2, typ) -> if s2 =$= s then Some typ else None
34e49164
C
235 | _ -> None
236 in
237 lookup_env f env
238
239let lookup_typedef s env =
91eba41f 240 if !typedef_debug then pr2 ("looking for: " ^ s);
34e49164 241 let f = function
b1b2de81 242 | TypeDef (s2, typ) -> if s2 =$= s then Some typ else None
34e49164
C
243 | _ -> None
244 in
245 lookup_env f env
246
247let lookup_structunion (_su, s) env =
248 let f = function
b1b2de81 249 | StructUnionNameDef (s2, typ) -> if s2 =$= s then Some typ else None
34e49164
C
250 | _ -> None
251 in
252 lookup_env f env
253
91eba41f
C
254let lookup_macro s env =
255 let f = function
b1b2de81 256 | Macro (s2, typ) -> if s2 =$= s then Some typ else None
91eba41f
C
257 | _ -> None
258 in
259 lookup_env f env
260
261let lookup_enum s env =
262 let f = function
b1b2de81 263 | EnumConstant (s2, typ) -> if s2 =$= s then Some typ else None
91eba41f
C
264 | _ -> None
265 in
266 lookup_env f env
267
268
269let lookup_typedef a b =
270 Common.profile_code "TAC.lookup_typedef" (fun () -> lookup_typedef a b)
271
272
34e49164
C
273
274(*****************************************************************************)
275(* "type-lookup" *)
276(*****************************************************************************)
277
278(* find_final_type is used to know to what type a field correspond in
279 * x.foo. Sometimes the type of x is a typedef or a structName in which
280 * case we must look in environment to find the complete type, here
281 * structUnion that contains the information.
282 *
283 * Because in C one can redefine in nested blocks some typedefs,
284 * struct, or variables, we have a static scoping resolving process.
285 * So, when we look for the type of a var, if this var is in an
286 * enclosing block, then maybe its type refer to a typdef of this
287 * enclosing block, so must restart the "type-resolving" of this
288 * typedef from this enclosing block, not from the bottom. So our
289 * "resolving-type functions" take an env and also return an env from
290 * where the next search must be performed. *)
291
292(*
293let rec find_final_type ty env =
294
295 match Ast_c.unwrap_typeC ty with
296 | BaseType x -> (BaseType x) +> Ast_c.rewrap_typeC ty
297
298 | Pointer t -> (Pointer (find_final_type t env)) +> Ast_c.rewrap_typeC ty
299 | Array (e, t) -> Array (e, find_final_type t env) +> Ast_c.rewrap_typeC ty
300
301 | StructUnion (sopt, su) -> StructUnion (sopt, su) +> Ast_c.rewrap_typeC ty
302
303 | FunctionType t -> (FunctionType t) (* todo ? *) +> Ast_c.rewrap_typeC ty
304 | Enum (s, enumt) -> (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty
305 | EnumName s -> (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty
306
307 | StructUnionName (su, s) ->
308 (try
309 let ((structtyp,ii), env') = lookup_structunion (su, s) env in
310 Ast_c.nQ, (StructUnion (Some s, structtyp), ii)
311 (* old: +> Ast_c.rewrap_typeC ty
312 * but must wrap with good ii, otherwise pretty_print_c
313 * will be lost and raise some Impossible
314 *)
315 with Not_found ->
316 ty
317 )
318
319 | TypeName s ->
320 (try
321 let (t', env') = lookup_typedef s env in
322 find_final_type t' env'
323 with Not_found ->
324 ty
325 )
326
327 | ParenType t -> find_final_type t env
328 | Typeof e -> failwith "typeof"
329*)
330
331
332
333
91eba41f 334(* ------------------------------------------------------------ *)
34e49164
C
335let rec type_unfold_one_step ty env =
336
337 match Ast_c.unwrap_typeC ty with
91eba41f
C
338 | BaseType x -> ty
339 | Pointer t -> ty
340 | Array (e, t) -> ty
341
34e49164
C
342 | StructUnion (sopt, su, fields) -> ty
343
91eba41f 344 | FunctionType t -> ty
34e49164 345 | Enum (s, enumt) -> ty
91eba41f
C
346
347 | EnumName s -> ty (* todo: look in env when will have EnumDef *)
34e49164
C
348
349 | StructUnionName (su, s) ->
350 (try
351 let (((su,fields),ii), env') = lookup_structunion (su, s) env in
708f4980 352 Ast_c.mk_ty (StructUnion (su, Some s, fields)) ii
34e49164
C
353 (* old: +> Ast_c.rewrap_typeC ty
354 * but must wrap with good ii, otherwise pretty_print_c
355 * will be lost and raise some Impossible
356 *)
357 with Not_found ->
358 ty
359 )
360
b1b2de81
C
361 | TypeName (name, _typ) ->
362 let s = Ast_c.str_of_name name in
34e49164 363 (try
91eba41f 364 if !typedef_debug then pr2 "type_unfold_one_step: lookup_typedef";
34e49164
C
365 let (t', env') = lookup_typedef s env in
366 type_unfold_one_step t' env'
91eba41f 367 with Not_found ->
34e49164
C
368 ty
369 )
370
371 | ParenType t -> type_unfold_one_step t env
372 | TypeOfExpr e ->
373 pr2_once ("Type_annoter: not handling typeof");
374 ty
375 | TypeOfType t -> type_unfold_one_step t env
376
377
378
34e49164
C
379
380
381
34e49164
C
382
383
384
91eba41f 385(* normalizer. can be seen as the opposite of the previous function as
0708f913
C
386 * we "fold" at least for the structUnion. Should return something that
387 * Type_c.is_completed_fullType likes, something that makes it easier
388 * for the programmer to work on, that has all the needed information
389 * for most tasks.
91eba41f 390 *)
34e49164 391let rec typedef_fix ty env =
34e49164 392 match Ast_c.unwrap_typeC ty with
91eba41f
C
393 | BaseType x ->
394 ty
395 | Pointer t ->
396 Pointer (typedef_fix t env) +> Ast_c.rewrap_typeC ty
397 | Array (e, t) ->
398 Array (e, typedef_fix t env) +> Ast_c.rewrap_typeC ty
399 | StructUnion (su, sopt, fields) ->
400 (* normalize, fold.
401 * todo? but what if correspond to a nested struct def ?
402 *)
403 Type_c.structdef_to_struct_name ty
34e49164
C
404 | FunctionType ft ->
405 (FunctionType ft) (* todo ? *) +> Ast_c.rewrap_typeC ty
406 | Enum (s, enumt) ->
407 (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty
408 | EnumName s ->
409 (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty
410
411 (* we prefer StructUnionName to StructUnion when it comes to typed metavar *)
412 | StructUnionName (su, s) -> ty
91eba41f
C
413
414 (* keep the typename but complete with more information *)
b1b2de81
C
415 | TypeName (name, typ) ->
416 let s = Ast_c.str_of_name name in
0708f913
C
417 (match typ with
418 | Some _ ->
419 pr2 ("typedef value already there:" ^ s);
420 ty
421 | None ->
422 (try
91eba41f 423 if !typedef_debug then pr2 "typedef_fix: lookup_typedef";
34e49164 424 let (t', env') = lookup_typedef s env in
91eba41f
C
425
426 (* bugfix: termination bug if use env instead of env' below, because
0708f913 427 * can have some weird mutually recursive typedef which
91eba41f
C
428 * each new type alias search for its mutual def.
429 *)
b1b2de81 430 TypeName (name, Some (typedef_fix t' env')) +> Ast_c.rewrap_typeC ty
91eba41f 431 with Not_found ->
34e49164 432 ty
0708f913
C
433 ))
434
91eba41f
C
435 (* remove paren for better matching with typed metavar. kind of iso again *)
436 | ParenType t ->
437 typedef_fix t env
34e49164
C
438 | TypeOfExpr e ->
439 pr2_once ("Type_annoter: not handling typeof");
440 ty
441
91eba41f
C
442 | TypeOfType t ->
443 typedef_fix t env
444
445
446(*****************************************************************************)
447(* Helpers, part 1 *)
448(*****************************************************************************)
449
450let type_of_s2 s =
451 (Lib.al_type (Parse_c.type_of_string s))
452let type_of_s a =
453 Common.profile_code "Type_c.type_of_s" (fun () -> type_of_s2 a)
454
455
456(* pad: pb on:
457 * /home/pad/software-os-src2/freebsd/contrib/ipfilter/netinet/ip_fil_freebsd.c
458 * because in the code there is:
459 * static iss_seq_off = 0;
460 * which in the parser was generating a default int without a parse_info.
461 * I now add a fake parse_info for such default int so no more failwith
462 * normally.
463 *)
708f4980
C
464let offset ft =
465 let (qu, ty) = ft in
466 (* bugfix: because of string->name, the ii can be deeper *)
467 let ii = Ast_c.get_local_ii_of_tybis_inlining_ii_of_name ty in
468 match ii with
469 | ii::_ -> ii.Ast_c.pinfo
470 | [] -> failwith "type has no text; need to think again"
91eba41f
C
471
472
473
474let rec is_simple_expr expr =
475 match Ast_c.unwrap_expr expr with
476 (* todo? handle more special cases ? *)
477
478 | Ident _ ->
479 true
480 | Constant (_) ->
481 true
482 | Unary (op, e) ->
483 true
484 | Binary (e1, op, e2) ->
485 true
486 | Cast (t, e) ->
487 true
488 | ParenExpr (e) -> is_simple_expr e
489
490 | _ -> false
491
492(*****************************************************************************)
493(* Typing rules *)
494(*****************************************************************************)
495(* now in type_c.ml *)
496
497
34e49164
C
498
499(*****************************************************************************)
500(* (Semi) Globals, Julia's style *)
501(*****************************************************************************)
502
503(* opti: cache ? use hash ? *)
91eba41f 504let _scoped_env = ref !initial_env
34e49164
C
505
506(* memoise unnanoted var, to avoid too much warning messages *)
507let _notyped_var = ref (Hashtbl.create 100)
508
509let new_scope() = _scoped_env := []::!_scoped_env
510let del_scope() = _scoped_env := List.tl !_scoped_env
511
512let do_in_new_scope f =
513 begin
514 new_scope();
515 let res = f() in
516 del_scope();
517 res
518 end
519
520let add_in_scope namedef =
521 let (current, older) = Common.uncons !_scoped_env in
522 _scoped_env := (namedef::current)::older
523
91eba41f
C
524
525(* ------------------------------------------------------------ *)
526
34e49164
C
527(* sort of hackish... *)
528let islocal info =
b1b2de81 529 if List.length (!_scoped_env) =|= List.length !initial_env
34e49164
C
530 then Ast_c.NotLocalVar
531 else Ast_c.LocalVar info
532
91eba41f 533(* ------------------------------------------------------------ *)
34e49164 534(* the warning argument is here to allow some binding to overwrite an
91eba41f
C
535 * existing one. With function, we first have the prototype and then the def,
536 * and the def binding with the same string is not an error.
537 *
34e49164 538 * todo?: but if we define two times the same function, then we will not
91eba41f 539 * detect it :( it would require to make a diff between adding a binding
34e49164 540 * from a prototype and from a definition.
91eba41f
C
541 *
542 * opti: disabling the check_annotater flag have some important
543 * performance benefit.
544 *
34e49164 545 *)
91eba41f 546let add_binding2 namedef warning =
34e49164
C
547 let (current_scope, _older_scope) = Common.uncons !_scoped_env in
548
91eba41f 549 if !Flag_parsing_c.check_annotater then begin
34e49164 550 (match namedef with
91eba41f
C
551 | VarOrFunc (s, typ) ->
552 if Hashtbl.mem !_notyped_var s
553 then pr2 ("warning: found typing information for a variable that was" ^
554 "previously unknown:" ^ s);
555 | _ -> ()
556 );
557
558 let (memberf, s) =
559 (match namedef with
560 | VarOrFunc (s, typ) ->
561 member_env (lookup_var s), s
562 | TypeDef (s, typ) ->
563 member_env (lookup_typedef s), s
564 | StructUnionNameDef (s, (su, typ)) ->
565 member_env (lookup_structunion (su, s)), s
566 | Macro (s, body) ->
567 member_env (lookup_macro s), s
568 | EnumConstant (s, body) ->
569 member_env (lookup_enum s), s
570 ) in
571
572 if memberf [current_scope] && warning
573 then pr2 ("Type_annoter: warning, " ^ s ^
574 " is already in current binding" ^ "\n" ^
0708f913 575 " so there is a weird shadowing");
91eba41f 576 end;
34e49164 577 add_in_scope namedef
91eba41f
C
578
579let add_binding namedef warning =
580 Common.profile_code "TAC.add_binding" (fun () -> add_binding2 namedef warning)
34e49164
C
581
582
91eba41f 583
34e49164 584(*****************************************************************************)
91eba41f 585(* Helpers, part 2 *)
34e49164
C
586(*****************************************************************************)
587
91eba41f
C
588let lookup_opt_env lookupf s =
589 Common.optionise (fun () ->
590 lookupf s !_scoped_env
591 )
592
593let unwrap_unfold_env2 typ =
594 Ast_c.unwrap_typeC
595 (type_unfold_one_step typ !_scoped_env)
596let unwrap_unfold_env typ =
597 Common.profile_code "TAC.unwrap_unfold_env" (fun () -> unwrap_unfold_env2 typ)
34e49164 598
91eba41f
C
599let typedef_fix a b =
600 Common.profile_code "TAC.typedef_fix" (fun () -> typedef_fix a b)
34e49164 601
91eba41f
C
602let make_info_def_fix x =
603 Type_c.make_info_def (typedef_fix x !_scoped_env)
34e49164 604
91eba41f
C
605let make_info_fix (typ, local) =
606 Type_c.make_info ((typedef_fix typ !_scoped_env),local)
607
608
609let make_info_def = Type_c.make_info_def
34e49164 610
34e49164 611(*****************************************************************************)
91eba41f 612(* Main typer code, put later in a visitor *)
34e49164 613(*****************************************************************************)
34e49164 614
91eba41f 615let annotater_expr_visitor_subpart = (fun (k,bigf) expr ->
34e49164 616
91eba41f
C
617 let ty =
618 match Ast_c.unwrap_expr expr with
619
620 (* -------------------------------------------------- *)
621 (* todo: should analyse the 's' for int to know if unsigned or not *)
622 | Constant (String (s,kind)) -> make_info_def (type_of_s "char *")
0708f913 623 | Constant MultiString _ -> make_info_def (type_of_s "char *")
91eba41f 624 | Constant (Char (s,kind)) -> make_info_def (type_of_s "char")
708f4980
C
625 | Constant (Int (s,kind)) ->
626 (* this seems really unpleasant, but perhaps the type needs to be set
627 up in some way that allows pretty printing *)
628 make_info_def
629 (match kind with
630 (* matches limited by what is generated in lexer_c.mll *)
631 Si(Signed,CInt) -> type_of_s "int"
632 | Si(UnSigned,CInt) -> type_of_s "unsigned int"
633 | Si(Signed,CLong) -> type_of_s "long"
634 | Si(UnSigned,CLong) -> type_of_s "unsigned long"
635 | Si(Signed,CLongLong) -> type_of_s "long long"
636 | Si(UnSigned,CLongLong) -> type_of_s "unsigned long long"
637 | _ -> failwith "unexpected kind for constant")
91eba41f
C
638 | Constant (Float (s,kind)) ->
639 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
640 let fake = Ast_c.rewrap_str "float" fake in
641 let iinull = [fake] in
708f4980 642 make_info_def (Ast_c.mk_ty (BaseType (FloatType kind)) iinull)
91eba41f
C
643
644
645 (* -------------------------------------------------- *)
646 (* note: could factorize this code with the code for Ident
647 * and the other code for Funcall below. But as the Ident can be
648 * a macro-func, I prefer to handle it separately. So
649 * this rule can handle the macro-func, the Ident-rule can handle
650 * the macro-var, and the other FunCall-rule the regular
651 * function calls through fields.
652 * Also as I don't want a warning on the Ident that are a FunCall,
653 * easier to have a rule separate from the Ident rule.
654 *)
708f4980
C
655 | FunCall (e1, args) ->
656 (match Ast_c.unwrap_expr e1 with
657 | Ident (ident) ->
91eba41f
C
658
659 (* recurse *)
660 args +> List.iter (fun (e,ii) ->
661 (* could typecheck if arguments agree with prototype *)
662 Visitor_c.vk_argument bigf e
663 );
b1b2de81 664 let s = Ast_c.str_of_name ident in
91eba41f
C
665 (match lookup_opt_env lookup_var s with
666 | Some ((typ,local),_nextenv) ->
667
668 (* set type for ident *)
669 let tyinfo = make_info_fix (typ, local) in
670 Ast_c.set_type_expr e1 tyinfo;
671
672 (match unwrap_unfold_env typ with
673 | FunctionType (ret, params) -> make_info_def ret
674
675 (* can be function pointer, C have an iso for that,
676 * same pfn() syntax than regular function call.
677 *)
678 | Pointer (typ2) ->
679 (match unwrap_unfold_env typ2 with
680 | FunctionType (ret, params) -> make_info_def ret
681 | _ -> Type_c.noTypeHere
682 )
683 | _ -> Type_c.noTypeHere
684 )
685 | None ->
686
687 (match lookup_opt_env lookup_macro s with
688 | Some ((defkind, defval), _nextenv) ->
689 (match defkind, defval with
690 | DefineFunc _, DefineExpr e ->
691 let rettype = Ast_c.get_onlytype_expr e in
692
693 (* todo: could also set type for ident ?
694 have return type and at least type of concrete
695 parameters so can generate a fake FunctionType
696 *)
697 let macrotype_opt =
698 Type_c.fake_function_type rettype args
699 in
700
701 macrotype_opt +> Common.do_option (fun t ->
702 pr2 ("Type_annotater: generate fake function type" ^
703 "for macro: " ^ s);
704 let tyinfo = make_info_def_fix t in
705 Ast_c.set_type_expr e1 tyinfo;
706 );
707
708 Ast_c.get_type_expr e
709 | DefineVar, _ ->
710 pr2 ("Type_annoter: not a macro-func: " ^ s);
711 Type_c.noTypeHere
712 | DefineFunc _, _ ->
713 (* normally the FunCall case should have catch it *)
714 pr2 ("Type_annoter: not a macro-func-expr: " ^ s);
715 Type_c.noTypeHere
716 )
717 | None ->
0708f913 718 pr2_once ("type_annotater: no type for function ident: " ^ s);
91eba41f
C
719 Type_c.noTypeHere
720 )
721 )
34e49164 722
34e49164 723
708f4980 724 | _e ->
91eba41f
C
725 k expr;
726
708f4980 727 (Ast_c.get_type_expr e1) +> Type_c.do_with_type (fun typ ->
91eba41f
C
728 (* copy paste of above *)
729 (match unwrap_unfold_env typ with
730 | FunctionType (ret, params) -> make_info_def ret
731 | Pointer (typ) ->
732 (match unwrap_unfold_env typ with
733 | FunctionType (ret, params) -> make_info_def ret
734 | _ -> Type_c.noTypeHere
735 )
736 | _ -> Type_c.noTypeHere
34e49164 737 )
91eba41f 738 )
708f4980 739 )
91eba41f
C
740
741
742 (* -------------------------------------------------- *)
b1b2de81
C
743 | Ident (ident) ->
744 let s = Ast_c.str_of_name ident in
91eba41f
C
745 (match lookup_opt_env lookup_var s with
746 | Some ((typ,local),_nextenv) ->
747 make_info_fix (typ,local)
748 | None ->
749 (match lookup_opt_env lookup_macro s with
750 | Some ((defkind, defval), _nextenv) ->
751 (match defkind, defval with
752 | DefineVar, DefineExpr e ->
753 Ast_c.get_type_expr e
754 | DefineVar, _ ->
755 pr2 ("Type_annoter: not a expression: " ^ s);
756 Type_c.noTypeHere
757 | DefineFunc _, _ ->
758 (* normally the FunCall case should have catch it *)
759 pr2 ("Type_annoter: not a macro-var: " ^ s);
760 Type_c.noTypeHere
761 )
762 | None ->
763 (match lookup_opt_env lookup_enum s with
764 | Some (_, _nextenv) ->
765 make_info_def (type_of_s "int")
766 | None ->
767 if not (s =~ "[A-Z_]+") (* if macro then no warning *)
768 then
769 if !Flag_parsing_c.check_annotater then
770 if not (Hashtbl.mem !_notyped_var s)
771 then begin
772 pr2 ("Type_annoter: not finding type for: " ^ s);
773 Hashtbl.add !_notyped_var s true;
774 end
775 else ()
776 else
777 pr2 ("Type_annoter: not finding type for: " ^ s)
778 ;
779 Type_c.noTypeHere
780 )
781 )
782 )
783
784 (* -------------------------------------------------- *)
785 (* C isomorphism on type on array and pointers *)
786 | Unary (e, DeRef)
787 | ArrayAccess (e, _) ->
788 k expr; (* recurse to set the types-ref of sub expressions *)
789
790 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
791 (* todo: maybe not good env !! *)
792 match unwrap_unfold_env t with
793 | Pointer x
794 | Array (_, x) ->
795 make_info_def_fix x
796 | _ -> Type_c.noTypeHere
797
798 )
799
800 | Unary (e, GetRef) ->
801 k expr; (* recurse to set the types-ref of sub expressions *)
802
803 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
804 (* must generate an element so that '=' can be used
805 * to compare type ?
806 *)
807 let fake = Ast_c.fakeInfo Common.fake_parse_info in
808 let fake = Ast_c.rewrap_str "*" fake in
809
708f4980 810 let ft = Ast_c.mk_ty (Pointer t) [fake] in
91eba41f
C
811 make_info_def_fix ft
812 )
813
814
815 (* -------------------------------------------------- *)
816 (* fields *)
b1b2de81
C
817 | RecordAccess (e, namefld)
818 | RecordPtAccess (e, namefld) as x ->
819
820 let fld = Ast_c.str_of_name namefld in
34e49164 821
91eba41f
C
822 k expr; (* recurse to set the types-ref of sub expressions *)
823
824 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
825
826 let topt =
827 match x with
828 | RecordAccess _ -> Some t
829 | RecordPtAccess _ ->
830 (match unwrap_unfold_env t with
831 | Pointer (t) -> Some t
832 | _ -> None
34e49164 833 )
91eba41f
C
834 | _ -> raise Impossible
835
836 in
837 (match topt with
838 | None -> Type_c.noTypeHere
839 | Some t ->
840 match unwrap_unfold_env t with
841 | StructUnion (su, sopt, fields) ->
842 (try
843 (* todo: which env ? *)
844 make_info_def_fix
845 (Type_c.type_field fld (su, fields))
846 with
847 | Not_found ->
848 pr2 (spf
849 "TYPE-ERROR: field '%s' does not belong in struct %s"
850 fld (match sopt with Some s -> s |_ -> "<anon>"));
851 Type_c.noTypeHere
0708f913 852 | Multi_found ->
91eba41f
C
853 pr2 "TAC:MultiFound";
854 Type_c.noTypeHere
855 )
856 | _ -> Type_c.noTypeHere
34e49164 857 )
91eba41f
C
858 )
859
860
861
862 (* -------------------------------------------------- *)
863 | Cast (t, e) ->
864 k expr;
865 (* todo: if infer, can "push" info ? add_types_expr [t] e ? *)
866 make_info_def_fix (Lib.al_type t)
867
868 (* todo? lub, hmm maybe not, cos type must be e1 *)
869 | Assignment (e1, op, e2) ->
870 k expr;
b1b2de81
C
871 (* value of an assignment is the value of the RHS expression *)
872 Ast_c.get_type_expr e2
91eba41f
C
873 | Sequence (e1, e2) ->
874 k expr;
875 Ast_c.get_type_expr e2
876
708f4980
C
877 | Binary (e1, Logical _, e2) ->
878 k expr;
879 make_info_def (type_of_s "int")
880
91eba41f 881 (* todo: lub *)
708f4980 882 | Binary (e1, Arith op, e2) ->
91eba41f 883 k expr;
708f4980 884 Type_c.lub op (Type_c.get_opt_type e1) (Type_c.get_opt_type e2)
91eba41f
C
885
886 | CondExpr (cond, e1opt, e2) ->
887 k expr;
888 Ast_c.get_type_expr e2
34e49164 889
34e49164 890
91eba41f
C
891 | ParenExpr e ->
892 k expr;
893 Ast_c.get_type_expr e
894
895 | Infix (e, op) | Postfix (e, op) ->
896 k expr;
897 Ast_c.get_type_expr e
898
899 (* pad: julia wrote this ? *)
900 | Unary (e, UnPlus) ->
901 k expr; (* recurse to set the types-ref of sub expressions *)
902 make_info_def (type_of_s "int")
903 (* todo? can convert from unsigned to signed if UnMinus ? *)
904 | Unary (e, UnMinus) ->
905 k expr; (* recurse to set the types-ref of sub expressions *)
906 make_info_def (type_of_s "int")
907
908 | SizeOfType _|SizeOfExpr _ ->
909 k expr; (* recurse to set the types-ref of sub expressions *)
910 make_info_def (type_of_s "int")
911
912 | Constructor (ft, ini) ->
913 k expr; (* recurse to set the types-ref of sub expressions *)
914 make_info_def (Lib.al_type ft)
915
916 | Unary (e, Not) ->
917 k expr; (* recurse to set the types-ref of sub expressions *)
918 Ast_c.get_type_expr e
919 | Unary (e, Tilde) ->
920 k expr; (* recurse to set the types-ref of sub expressions *)
921 Ast_c.get_type_expr e
922
923 (* -------------------------------------------------- *)
924 (* todo *)
925 | Unary (_, GetRefLabel) ->
926 k expr; (* recurse to set the types-ref of sub expressions *)
927 pr2_once "Type annotater:not handling GetRefLabel";
928 Type_c.noTypeHere
929 (* todo *)
930 | StatementExpr _ ->
931 k expr; (* recurse to set the types-ref of sub expressions *)
932 pr2_once "Type annotater:not handling GetRefLabel";
933 Type_c.noTypeHere
934 (*
935 | _ -> k expr; Type_c.noTypeHere
936 *)
937
938 in
939 Ast_c.set_type_expr expr ty
940
941)
942
943
944(*****************************************************************************)
945(* Visitor *)
946(*****************************************************************************)
947
948(* Processing includes that were added after a cpp_ast_c makes the
949 * type annotater quite slow, especially when the depth of cpp_ast_c is
950 * big. But for such includes the only thing we really want is to modify
951 * the environment to have enough type information. We don't need
952 * to type the expressions inside those includes (they will be typed
953 * when we process the include file directly). Here the goal is
954 * to not recurse.
955 *
956 * Note that as usually header files contain mostly structure
957 * definitions and defines, that means we still have to do lots of work.
958 * We only win on function definition bodies, but usually header files
959 * have just prototypes, or inline function definitions which anyway have
960 * usually a small body. But still, we win. It also makes clearer
961 * that when processing include as we just need the environment, the caller
962 * of this module can do further optimisations such as memorising the
963 * state of the environment after each header files.
964 *
965 *
966 * For sparse its makes the annotating speed goes from 9s to 4s
967 * For Linux the speedup is even better, from ??? to ???.
968 *
969 * Because There would be some copy paste with annotate_program, it is
970 * better to factorize code hence the just_add_in_env parameter below.
971 *
972 * todo? alternative optimisation for the include problem:
973 * - processing all headers files one time and construct big env
974 * - use hashtbl for env (but apparently not biggest problem)
975 *)
976
977let rec visit_toplevel ~just_add_in_env ~depth elem =
978 let need_annotate_body = not just_add_in_env in
979
980 let bigf = { Visitor_c.default_visitor_c with
981
982 (* ------------------------------------------------------------ *)
983 Visitor_c.kcppdirective = (fun (k, bigf) directive ->
984 match directive with
985 (* do error messages for type annotater only for the real body of the
986 * file, not inside include.
987 *)
988 | Include {i_content = opt} ->
989 opt +> Common.do_option (fun (filename, program) ->
990 Common.save_excursion Flag_parsing_c.verbose_type (fun () ->
991 Flag_parsing_c.verbose_type := false;
992
993 (* old: Visitor_c.vk_program bigf program;
994 * opti: set the just_add_in_env
995 *)
996 program +> List.iter (fun elem ->
997 visit_toplevel ~just_add_in_env:true ~depth:(depth+1) elem
34e49164 998 )
91eba41f 999 )
34e49164 1000 )
91eba41f
C
1001
1002 | Define ((s,ii), (defkind, defval)) ->
1003
1004
1005 (* even if we are in a just_add_in_env phase, such as when
1006 * we process include, as opposed to the body of functions,
1007 * with macros we still to type the body of the macro as
1008 * the macro has no type and so we infer its type from its
1009 * body (and one day later maybe from its use).
1010 *)
1011 (match defval with
1012 (* can try to optimize and recurse only when the define body
1013 * is simple ?
1014 *)
1015
1016 | DefineExpr expr ->
1017 if is_simple_expr expr
1018 (* even if not need_annotate_body, still recurse*)
1019 then k directive
1020 else
1021 if need_annotate_body
1022 then k directive;
1023 | _ ->
1024 if need_annotate_body
1025 then k directive;
1026 );
1027
1028 add_binding (Macro (s, (defkind, defval) )) true;
1029
1030 | Undef _
1031 | PragmaAndCo _ -> ()
34e49164
C
1032 );
1033
91eba41f
C
1034 (* ------------------------------------------------------------ *)
1035 (* main typer code *)
1036 (* ------------------------------------------------------------ *)
1037 Visitor_c.kexpr = annotater_expr_visitor_subpart;
1038
1039 (* ------------------------------------------------------------ *)
34e49164 1040 Visitor_c.kstatement = (fun (k, bigf) st ->
708f4980
C
1041 match Ast_c.unwrap_st st with
1042 | Compound statxs -> do_in_new_scope (fun () -> k st);
34e49164 1043 | _ -> k st
34e49164 1044 );
91eba41f 1045 (* ------------------------------------------------------------ *)
34e49164
C
1046 Visitor_c.kdecl = (fun (k, bigf) d ->
1047 (match d with
1048 | (DeclList (xs, ii)) ->
485bce71
C
1049 xs +> List.iter (fun ({v_namei = var; v_type = t;
1050 v_storage = sto; v_local = local}, iicomma) ->
1051
34e49164
C
1052 (* to add possible definition in type found in Decl *)
1053 Visitor_c.vk_type bigf t;
91eba41f
C
1054
1055
1056 let local =
1057 match local with
1058 | Ast_c.NotLocalDecl -> Ast_c.NotLocalVar
708f4980 1059 | Ast_c.LocalDecl -> Ast_c.LocalVar (offset t)
91eba41f 1060 in
34e49164 1061
b1b2de81
C
1062 var +> Common.do_option (fun (name, iniopt) ->
1063 let s = Ast_c.str_of_name name in
1064
34e49164
C
1065 match sto with
1066 | StoTypedef, _inline ->
1067 add_binding (TypeDef (s,Lib.al_type t)) true;
1068 | _ ->
1069 add_binding (VarOrFunc (s, (Lib.al_type t, local))) true;
91eba41f
C
1070
1071
1072 if need_annotate_body then begin
1073 (* int x = sizeof(x) is legal so need process ini *)
b1b2de81 1074 iniopt +> Common.do_option (fun (info, ini) ->
91eba41f
C
1075 Visitor_c.vk_ini bigf ini
1076 );
1077 end
34e49164
C
1078 );
1079 );
91eba41f
C
1080 | MacroDecl _ ->
1081 if need_annotate_body
1082 then k d
34e49164
C
1083 );
1084
1085 );
1086
91eba41f 1087 (* ------------------------------------------------------------ *)
34e49164 1088 Visitor_c.ktype = (fun (k, bigf) typ ->
91eba41f
C
1089 (* bugfix: have a 'Lib.al_type typ' before, but because we can
1090 * have enum with possible expression, we don't want to change
1091 * the ref of abstract-lined types, but the real one, so
1092 * don't al_type here
1093 *)
708f4980
C
1094 let (_q, tbis) = typ in
1095 match Ast_c.unwrap_typeC typ with
1096 | StructUnion (su, Some s, structType) ->
91eba41f 1097 let structType' = Lib.al_fields structType in
708f4980 1098 let ii = Ast_c.get_ii_typeC_take_care tbis in
91eba41f
C
1099 let ii' = Lib.al_ii ii in
1100 add_binding (StructUnionNameDef (s, ((su, structType'),ii'))) true;
1101
1102 if need_annotate_body
1103 then k typ (* todo: restrict ? new scope so use do_in_scope ? *)
1104
708f4980 1105 | Enum (sopt, enums) ->
91eba41f 1106
b1b2de81
C
1107 enums +> List.iter (fun ((name, eopt), iicomma) ->
1108
1109 let s = Ast_c.str_of_name name in
91eba41f
C
1110
1111 if need_annotate_body
b1b2de81 1112 then eopt +> Common.do_option (fun (ieq, e) ->
91eba41f
C
1113 Visitor_c.vk_expr bigf e
1114 );
1115 add_binding (EnumConstant (s, sopt)) true;
1116 );
34e49164
C
1117
1118
1119 (* TODO: if have a TypeName, then maybe can fill the option
1120 * information.
1121 *)
91eba41f
C
1122 | _ ->
1123 if need_annotate_body
1124 then k typ
34e49164
C
1125
1126 );
1127
91eba41f 1128 (* ------------------------------------------------------------ *)
34e49164
C
1129 Visitor_c.ktoplevel = (fun (k, bigf) elem ->
1130 _notyped_var := Hashtbl.create 100;
1131 match elem with
1132 | Definition def ->
b1b2de81 1133 let {f_name = name;
485bce71
C
1134 f_type = ((returnt, (paramst, b)) as ftyp);
1135 f_storage = sto;
91eba41f
C
1136 f_body = statxs;
1137 f_old_c_style = oldstyle;
1138 },ii
1139 = def
34e49164
C
1140 in
1141 let (i1, i2) =
1142 match ii with
b1b2de81 1143 | iifunc1::iifunc2::ibrace1::ibrace2::ifakestart::isto ->
34e49164
C
1144 iifunc1, iifunc2
1145 | _ -> raise Impossible
1146 in
b1b2de81 1147 let funcs = Ast_c.str_of_name name in
91eba41f
C
1148
1149 (match oldstyle with
1150 | None ->
1151 let typ' =
708f4980 1152 Lib.al_type (Ast_c.mk_ty (FunctionType ftyp) [i1;i2]) in
91eba41f
C
1153
1154 add_binding (VarOrFunc (funcs, (typ',islocal i1.Ast_c.pinfo)))
1155 false;
1156
1157 if need_annotate_body then
1158 do_in_new_scope (fun () ->
b1b2de81
C
1159 paramst +> List.iter (fun ({p_namei= nameopt; p_type= t},_)->
1160 match nameopt with
1161 | Some name ->
1162 let s = Ast_c.str_of_name name in
91eba41f
C
1163 let local = Ast_c.LocalVar (offset t) in
1164 add_binding (VarOrFunc (s,(Lib.al_type t,local))) true
1165 | None ->
1166 pr2 "no type, certainly because Void type ?"
1167 );
1168 (* recurse *)
1169 k elem
1170 );
1171 | Some oldstyle ->
1172 (* generate regular function type *)
1173
1174 pr2 "TODO generate type for function";
1175 (* add bindings *)
1176 if need_annotate_body then
1177 do_in_new_scope (fun () ->
1178 (* recurse. should naturally call the kdecl visitor and
1179 * add binding
1180 *)
1181 k elem;
1182 );
1183
34e49164 1184 );
708f4980
C
1185 | CppTop x ->
1186 (match x with
1187 | Define ((s,ii), (DefineVar, DefineType t)) ->
1188 add_binding (TypeDef (s,Lib.al_type t)) true;
1189 | _ -> k elem
1190 )
1191
91eba41f
C
1192 | Declaration _
1193
708f4980
C
1194
1195
91eba41f
C
1196 | IfdefTop _
1197 | MacroTop _
1198 | EmptyDef _
1199 | NotParsedCorrectly _
1200 | FinalDef _
1201 ->
1202 k elem
34e49164
C
1203 );
1204 }
1205 in
91eba41f
C
1206 if just_add_in_env
1207 then
1208 if depth > 1
1209 then Visitor_c.vk_toplevel bigf elem
1210 else
1211 Common.profile_code "TAC.annotate_only_included" (fun () ->
1212 Visitor_c.vk_toplevel bigf elem
1213 )
1214 else Visitor_c.vk_toplevel bigf elem
1215
1216(*****************************************************************************)
1217(* Entry point *)
1218(*****************************************************************************)
1219(* catch all the decl to grow the environment *)
1220
1221
1222let rec (annotate_program2 :
1223 environment -> toplevel list -> (toplevel * environment Common.pair) list) =
1224 fun env prog ->
1225
1226 (* globals (re)initialialisation *)
1227 _scoped_env := env;
1228 _notyped_var := (Hashtbl.create 100);
34e49164
C
1229
1230 prog +> List.map (fun elem ->
1231 let beforeenv = !_scoped_env in
91eba41f 1232 visit_toplevel ~just_add_in_env:false ~depth:0 elem;
34e49164
C
1233 let afterenv = !_scoped_env in
1234 (elem, (beforeenv, afterenv))
1235 )
1236
91eba41f
C
1237
1238
1239
1240(*****************************************************************************)
1241(* Annotate test *)
1242(*****************************************************************************)
1243
1244(* julia: for coccinelle *)
34e49164
C
1245let annotate_test_expressions prog =
1246 let rec propagate_test e =
1247 let ((e_term,info),_) = e in
1248 let (ty,_) = !info in
1249 info := (ty,Test);
1250 match e_term with
1251 Binary(e1,Logical(AndLog),e2)
1252 | Binary(e1,Logical(OrLog),e2) -> propagate_test e1; propagate_test e2
1253 | Unary(e1,Not) -> propagate_test e1
1254 | ParenExpr(e) -> propagate_test e
1255 | _ -> () in
1256
1257 let bigf = { Visitor_c.default_visitor_c with
1258 Visitor_c.kexpr = (fun (k,bigf) expr ->
708f4980
C
1259 (match unwrap_expr expr with
1260 CondExpr(e,_,_) -> propagate_test e
91eba41f
C
1261 | _ -> ()
1262 );
1263 k expr
1264 );
34e49164 1265 Visitor_c.kstatement = (fun (k, bigf) st ->
708f4980 1266 match unwrap_st st with
34e49164
C
1267 Selection(s) ->
1268 (match s with If(e1,s1,s2) -> propagate_test e1 | _ -> ());
1269 k st;
1270 | Iteration(i) ->
1271 (match i with
1272 While(e,s) -> propagate_test e
1273 | DoWhile(s,e) -> propagate_test e
1274 | For(_,es,_,_) ->
1275 (match unwrap es with Some e -> propagate_test e | None -> ())
1276 | _ -> ());
1277 k st
91eba41f
C
1278 | _ -> k st
1279 )
1280 } in
34e49164
C
1281 (prog +> List.iter (fun elem ->
1282 Visitor_c.vk_toplevel bigf elem
1283 ))
1284
91eba41f
C
1285
1286
1287(*****************************************************************************)
1288(* Annotate types *)
1289(*****************************************************************************)
1290let annotate_program env prog =
1291 Common.profile_code "TAC.annotate_program"
1292 (fun () ->
1293 let res = annotate_program2 env prog in
34e49164 1294 annotate_test_expressions prog;
91eba41f
C
1295 res
1296 )
1297
1298let annotate_type_and_localvar env prog =
1299 Common.profile_code "TAC.annotate_type"
1300 (fun () -> annotate_program2 env prog)
1301
1302
1303(*****************************************************************************)
1304(* changing default typing environment, do concatenation *)
1305let init_env filename =
1306 pr2 ("init_env: " ^ filename);
1307 let (ast2, _stat) = Parse_c.parse_c_and_cpp filename in
1308 let ast = Parse_c.program_of_program2 ast2 in
1309
1310 let res = annotate_type_and_localvar !initial_env ast in
1311 match List.rev res with
1312 | [] -> pr2 "empty environment"
1313 | (_top,(env1,env2))::xs ->
1314 initial_env := !initial_env ++ env2;
1315 ()
1316