3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes,
5 * Copyright (C) 2009 University of Urbana Champaign
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License (GPL)
9 * version 2 as published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * file license.txt for more details.
21 module Lib
= Lib_parsing_c
23 (*****************************************************************************)
25 (*****************************************************************************)
27 * - Done a first type checker in 2002, cf typing-semantic/, but
28 * was assuming that have all type info, and so was assuming had called
29 * cpp and everything was right.
30 * - Wrote this file, in 2006?, as we added pattern matching on type
31 * in coccinelle. Partial type annotater.
32 * - Julia extended it in 2008? to have localvar/notlocalvar and
33 * test/notest information, again used by coccinelle.
34 * - I extended it in Fall 2008 to have more type information for the
35 * global analysis. I also added some optimisations to process
36 * included code faster.
39 * Design choices. Can either do:
41 * - can first do a simple inferer, that just pass context
42 * - then a real inferer, managing partial info.
43 * type context = fullType option
45 * - extract the information from the .h files
46 * (so no inference at all needed)
48 * Difference with julia's code in parsing_cocci/type_infer.ml:
49 * - She handles just the variable namespace. She does not type
50 * field access or enum or macros. This is because cocci programs are
51 * usually simple and have no structure definition or macro definitions
52 * that we need to type anyway.
53 * - She does more propagation.
54 * - She does not have to handle the typedef isomorphism which force me
55 * to use those typedef_fix and type_unfold_one_step
56 * - She does not handle I think the function pointer C isomorphism.
58 * - She has a cleaner type_cocci without any info. In my case
59 * I need to do those ugly al_type, or generate fake infos.
60 * - She has more compact code. Perhaps because she does not have to
61 * handle the extra exp_info that she added on me :) So I need those
62 * do_with_type, make_info_xxx, etc.
64 * Note: if need to debug this annotater, use -show_trace_profile, it can
65 * help. You can also set the typedef_debug flag below.
69 * todo: expression contain types, and statements, which in turn can contain
70 * expression, so need recurse. Need define an annote_statement and
73 * todo: how deal with typedef isomorphisms ? How store them in Ast_c ?
74 * store all posible variations in ast_c ? a list of type instead of just
77 * todo: how to handle multiple possible definitions for entities like
78 * struct or typedefs ? Because of ifdef, we should store list of
79 * possibilities sometimes.
81 * todo: define a new type ? like type_cocci ? where have a bool ?
83 * semi: How handle scope ? When search for type of field, we return
84 * a type, but this type makes sense only in a certain scope.
85 * We could add a tag to each typedef, structUnionName to differentiate
86 * them and also associate in ast_c to the type the scope
87 * of this type, the env that were used to define this type.
89 * todo: handle better the search in previous env, the env'. Cf the
90 * termination problem in typedef_fix when I was searching in the same
95 (*****************************************************************************)
97 (*****************************************************************************)
98 let pr2, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_type
100 (*****************************************************************************)
102 (*****************************************************************************)
104 (* The different namespaces from stdC manual:
106 * You introduce two new name spaces with every block that you write.
108 * One name space includes all
111 * - type definitions,
112 * - and enumeration constants
113 * that you declare or define within the block.
115 * The other name space includes all
119 * *tags* that you define within the block.
121 * You introduce a new member name space with every structure or union
122 * whose content you define. You identify a member name space by the
123 * type of left operand that you write for a member selection
124 * operator, as in x.y or p->y. A member name space ends with the end
125 * of the block in which you declare it.
127 * You introduce a new goto label name space with every function
128 * definition you write. Each goto label name space ends with its
129 * function definition.
132 (* But I don't try to do a type-checker, I try to "resolve" type of var
133 * so don't need make difference between namespaces here.
135 * But, why not make simply a (string, kindstring) assoc ?
136 * Because we dont want that a variable shadow a struct definition, because
137 * they are still in 2 different namespace. But could for typedef,
138 * because VarOrFunc and Typedef are in the same namespace.
139 * But could do a record as in c_info.ml
143 (* This type contains all "ident" like notion of C. Each time in Ast_c
144 * you have a string type (as in expression, function name, fields)
145 * then you need to manage the scope of this ident.
147 * The wrap for StructUnionNameDef contain the whole ii, the i for
148 * the string, the structUnion and the structType.
150 * Put Macro here ? after all the scoping rules for cpp macros is different
151 * and so does not vanish after the closing '}'.
156 | VarOrFunc
of string * Ast_c.exp_type
157 | EnumConstant
of string * string option
159 (* also used for macro type aliases *)
160 | TypeDef
of string * fullType
161 (* the structType contains nested "idents" with struct scope *)
162 | StructUnionNameDef
of string * (structUnion
* structType
) wrap
165 | Macro
of string * (define_kind
* define_val
)
168 (* Because have nested scope, have nested list, hence the list list.
170 * opti? use a hash to accelerate ? hmm but may have some problems
171 * with hash to handle recursive lookup. For instance for the typedef
172 * example where have mutually recursive definition of the type,
173 * we must take care to not loop by starting the second search
174 * from the previous environment. With the list scheme in
175 * lookup_env below it's quite easy to do. With hash it may be
178 type environment
= namedef list list
181 (* ------------------------------------------------------------ *)
182 (* can be modified by the init_env function below, by
183 * the file environment_unix.h
185 let initial_env = ref [
187 (Lib.al_type
(Parse_c.type_of_string
"void *"),
192 (Lib.al_type(Parse_c.type_of_string "void* (*)(int size)"),
195 (Lib.al_type(Parse_c.type_of_string "void (*)(void *ptr)"),
202 let typedef_debug = ref false
205 (* ------------------------------------------------------------ *)
206 (* generic, lookup and also return remaining env for further lookup *)
207 let rec lookup_env2 f env
=
209 | [] -> raise Not_found
210 | []::zs
-> lookup_env2 f zs
213 | None
-> lookup_env2 f
(xs
::zs
)
214 | Some y
-> y
, xs
::zs
217 Common.profile_code
"TAC.lookup_env" (fun () -> lookup_env2 a b
)
221 let member_env lookupf env
=
223 let _ = lookupf env
in
225 with Not_found
-> false
230 (* ------------------------------------------------------------ *)
233 let lookup_var s env
=
235 | VarOrFunc
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
240 let lookup_typedef s env
=
241 if !typedef_debug then pr2 ("looking for: " ^ s
);
243 | TypeDef
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
248 let lookup_structunion (_su
, s
) env
=
250 | StructUnionNameDef
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
255 let lookup_macro s env
=
257 | Macro
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
262 let lookup_enum s env
=
264 | EnumConstant
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
270 let lookup_typedef a b
=
271 Common.profile_code
"TAC.lookup_typedef" (fun () -> lookup_typedef a b
)
275 (*****************************************************************************)
277 (*****************************************************************************)
279 (* find_final_type is used to know to what type a field correspond in
280 * x.foo. Sometimes the type of x is a typedef or a structName in which
281 * case we must look in environment to find the complete type, here
282 * structUnion that contains the information.
284 * Because in C one can redefine in nested blocks some typedefs,
285 * struct, or variables, we have a static scoping resolving process.
286 * So, when we look for the type of a var, if this var is in an
287 * enclosing block, then maybe its type refer to a typdef of this
288 * enclosing block, so must restart the "type-resolving" of this
289 * typedef from this enclosing block, not from the bottom. So our
290 * "resolving-type functions" take an env and also return an env from
291 * where the next search must be performed. *)
294 let rec find_final_type ty env =
296 match Ast_c.unwrap_typeC ty with
297 | BaseType x -> (BaseType x) +> Ast_c.rewrap_typeC ty
299 | Pointer t -> (Pointer (find_final_type t env)) +> Ast_c.rewrap_typeC ty
300 | Array (e, t) -> Array (e, find_final_type t env) +> Ast_c.rewrap_typeC ty
302 | StructUnion (sopt, su) -> StructUnion (sopt, su) +> Ast_c.rewrap_typeC ty
304 | FunctionType t -> (FunctionType t) (* todo ? *) +> Ast_c.rewrap_typeC ty
305 | Enum
(s
, enumt
) -> (Enum
(s
, enumt
)) (* todo? *) +> Ast_c.rewrap_typeC ty
306 | EnumName s
-> (EnumName s
) (* todo? *) +> Ast_c.rewrap_typeC ty
308 | StructUnionName
(su
, s
) ->
310 let ((structtyp
,ii
), env'
) = lookup_structunion (su
, s
) env
in
311 Ast_c.nQ
, (StructUnion
(Some s
, structtyp
), ii
)
312 (* old: +> Ast_c.rewrap_typeC ty
313 * but must wrap with good ii, otherwise pretty_print_c
314 * will be lost and raise some Impossible
322 let (t'
, env'
) = lookup_typedef s env
in
323 find_final_type t' env'
328 | ParenType t
-> find_final_type t env
329 | Typeof e
-> failwith
"typeof"
335 (* ------------------------------------------------------------ *)
336 let rec type_unfold_one_step ty env
=
338 match Ast_c.unwrap_typeC ty
with
343 | StructUnion
(sopt
, su
, fields
) -> ty
345 | FunctionType t
-> ty
346 | Enum
(s
, enumt
) -> ty
348 | EnumName s
-> ty
(* todo: look in env when will have EnumDef *)
350 | StructUnionName
(su
, s
) ->
352 let (((su
,fields
),ii
), env'
) = lookup_structunion (su
, s
) env
in
353 Ast_c.mk_ty
(StructUnion
(su
, Some s
, fields
)) ii
354 (* old: +> Ast_c.rewrap_typeC ty
355 * but must wrap with good ii, otherwise pretty_print_c
356 * will be lost and raise some Impossible
362 | TypeName
(name
, _typ
) ->
363 let s = Ast_c.str_of_name name
in
365 if !typedef_debug then pr2 "type_unfold_one_step: lookup_typedef";
366 let (t'
, env'
) = lookup_typedef s env
in
367 type_unfold_one_step t' env'
372 | ParenType t
-> type_unfold_one_step t env
374 pr2_once
("Type_annoter: not handling typeof");
376 | TypeOfType t
-> type_unfold_one_step t env
386 (* normalizer. can be seen as the opposite of the previous function as
387 * we "fold" at least for the structUnion. Should return something that
388 * Type_c.is_completed_fullType likes, something that makes it easier
389 * for the programmer to work on, that has all the needed information
392 let rec typedef_fix ty env
=
393 match Ast_c.unwrap_typeC ty
with
397 Pointer
(typedef_fix t env
) +> Ast_c.rewrap_typeC ty
399 Array
(e
, typedef_fix t env
) +> Ast_c.rewrap_typeC ty
400 | StructUnion
(su
, sopt
, fields
) ->
402 * todo? but what if correspond to a nested struct def ?
404 Type_c.structdef_to_struct_name ty
406 (FunctionType ft
) (* todo ? *) +> Ast_c.rewrap_typeC ty
408 (Enum
(s, enumt
)) (* todo? *) +> Ast_c.rewrap_typeC ty
410 (EnumName
s) (* todo? *) +> Ast_c.rewrap_typeC ty
412 (* we prefer StructUnionName to StructUnion when it comes to typed metavar *)
413 | StructUnionName
(su
, s) -> ty
415 (* keep the typename but complete with more information *)
416 | TypeName
(name
, typ
) ->
417 let s = Ast_c.str_of_name name
in
420 pr2 ("typedef value already there:" ^
s);
424 if !typedef_debug then pr2 "typedef_fix: lookup_typedef";
425 let (t'
, env'
) = lookup_typedef s env
in
427 (* bugfix: termination bug if use env instead of env' below, because
428 * can have some weird mutually recursive typedef which
429 * each new type alias search for its mutual def.
431 TypeName
(name
, Some
(typedef_fix t' env'
)) +> Ast_c.rewrap_typeC ty
436 (* remove paren for better matching with typed metavar. kind of iso again *)
440 pr2_once
("Type_annoter: not handling typeof");
447 (*****************************************************************************)
448 (* Helpers, part 1 *)
449 (*****************************************************************************)
452 (Lib.al_type
(Parse_c.type_of_string
s))
454 Common.profile_code
"Type_c.type_of_s" (fun () -> type_of_s2 a
)
458 * /home/pad/software-os-src2/freebsd/contrib/ipfilter/netinet/ip_fil_freebsd.c
459 * because in the code there is:
460 * static iss_seq_off = 0;
461 * which in the parser was generating a default int without a parse_info.
462 * I now add a fake parse_info for such default int so no more failwith
466 let rec is_simple_expr expr
=
467 match Ast_c.unwrap_expr expr
with
468 (* todo? handle more special cases ? *)
476 | Binary
(e1
, op
, e2
) ->
480 | ParenExpr
(e
) -> is_simple_expr e
484 (*****************************************************************************)
486 (*****************************************************************************)
487 (* now in type_c.ml *)
491 (*****************************************************************************)
492 (* (Semi) Globals, Julia's style *)
493 (*****************************************************************************)
495 (* opti: cache ? use hash ? *)
496 let _scoped_env = ref !initial_env
498 (* memoise unnanoted var, to avoid too much warning messages *)
499 let _notyped_var = ref (Hashtbl.create
100)
501 let new_scope() = _scoped_env := []::!_scoped_env
502 let del_scope() = _scoped_env := List.tl
!_scoped_env
504 let do_in_new_scope f =
512 let add_in_scope namedef
=
513 let (current
, older
) = Common.uncons
!_scoped_env in
514 _scoped_env := (namedef
::current
)::older
517 (* ------------------------------------------------------------ *)
519 (* sort of hackish... *)
521 if List.length
(!_scoped_env) =|= List.length
!initial_env
522 then Ast_c.NotLocalVar
523 else Ast_c.LocalVar info
525 (* ------------------------------------------------------------ *)
526 (* the warning argument is here to allow some binding to overwrite an
527 * existing one. With function, we first have the prototype and then the def,
528 * and the def binding with the same string is not an error.
530 * todo?: but if we define two times the same function, then we will not
531 * detect it :( it would require to make a diff between adding a binding
532 * from a prototype and from a definition.
534 * opti: disabling the check_annotater flag have some important
535 * performance benefit.
538 let add_binding2 namedef warning
=
539 let (current_scope
, _older_scope
) = Common.uncons
!_scoped_env in
541 if !Flag_parsing_c.check_annotater
then begin
543 | VarOrFunc
(s, typ
) ->
544 if Hashtbl.mem
!_notyped_var s
545 then pr2 ("warning: found typing information for a variable that was" ^
546 "previously unknown:" ^
s);
552 | VarOrFunc
(s, typ
) ->
553 member_env (lookup_var s), s
554 | TypeDef
(s, typ
) ->
555 member_env (lookup_typedef s), s
556 | StructUnionNameDef
(s, (su
, typ
)) ->
557 member_env (lookup_structunion (su
, s)), s
559 member_env (lookup_macro s), s
560 | EnumConstant
(s, body
) ->
561 member_env (lookup_enum s), s
564 if memberf
[current_scope
] && warning
565 then pr2 ("Type_annoter: warning, " ^
s ^
566 " is already in current binding" ^
"\n" ^
567 " so there is a weird shadowing");
571 let add_binding namedef warning
=
572 Common.profile_code
"TAC.add_binding" (fun () -> add_binding2 namedef warning
)
576 (*****************************************************************************)
577 (* Helpers, part 2 *)
578 (*****************************************************************************)
580 let lookup_opt_env lookupf
s =
581 Common.optionise
(fun () ->
582 lookupf
s !_scoped_env
585 let unwrap_unfold_env2 typ
=
587 (type_unfold_one_step typ
!_scoped_env)
588 let unwrap_unfold_env typ
=
589 Common.profile_code
"TAC.unwrap_unfold_env" (fun () -> unwrap_unfold_env2 typ
)
591 let typedef_fix a b
=
592 Common.profile_code
"TAC.typedef_fix" (fun () -> typedef_fix a b
)
594 let make_info_def_fix x
=
595 Type_c.make_info_def
(typedef_fix x
!_scoped_env)
597 let make_info_fix (typ
, local
) =
598 Type_c.make_info
((typedef_fix typ
!_scoped_env),local
)
601 let make_info_def = Type_c.make_info_def
603 (*****************************************************************************)
604 (* Main typer code, put later in a visitor *)
605 (*****************************************************************************)
607 let annotater_expr_visitor_subpart = (fun (k
,bigf
) expr
->
610 match Ast_c.unwrap_expr expr
with
612 (* -------------------------------------------------- *)
613 (* todo: should analyse the 's' for int to know if unsigned or not *)
614 | Constant
(String
(s,kind
)) -> make_info_def (type_of_s "char *")
615 | Constant MultiString
_ -> make_info_def (type_of_s "char *")
616 | Constant
(Char
(s,kind
)) -> make_info_def (type_of_s "char")
617 | Constant
(Int
(s,kind
)) ->
618 (* this seems really unpleasant, but perhaps the type needs to be set
619 up in some way that allows pretty printing *)
622 (* matches limited by what is generated in lexer_c.mll *)
623 Si
(Signed
,CInt
) -> type_of_s "int"
624 | Si
(UnSigned
,CInt
) -> type_of_s "unsigned int"
625 | Si
(Signed
,CLong
) -> type_of_s "long"
626 | Si
(UnSigned
,CLong
) -> type_of_s "unsigned long"
627 | Si
(Signed
,CLongLong
) -> type_of_s "long long"
628 | Si
(UnSigned
,CLongLong
) -> type_of_s "unsigned long long"
629 | _ -> failwith
"unexpected kind for constant")
630 | Constant
(Float
(s,kind
)) ->
631 let fake = Ast_c.fakeInfo
(Common.fake_parse_info
) in
632 let fake = Ast_c.rewrap_str
"float" fake in
633 let iinull = [fake] in
634 make_info_def (Ast_c.mk_ty
(BaseType
(FloatType kind
)) iinull)
637 (* -------------------------------------------------- *)
638 (* note: could factorize this code with the code for Ident
639 * and the other code for Funcall below. But as the Ident can be
640 * a macro-func, I prefer to handle it separately. So
641 * this rule can handle the macro-func, the Ident-rule can handle
642 * the macro-var, and the other FunCall-rule the regular
643 * function calls through fields.
644 * Also as I don't want a warning on the Ident that are a FunCall,
645 * easier to have a rule separate from the Ident rule.
647 | FunCall
(e1
, args
) ->
648 (match Ast_c.unwrap_expr e1
with
652 args
+> List.iter
(fun (e
,ii
) ->
653 (* could typecheck if arguments agree with prototype *)
654 Visitor_c.vk_argument bigf e
656 let s = Ast_c.str_of_name ident
in
657 (match lookup_opt_env lookup_var s with
658 | Some
((typ
,local
),_nextenv
) ->
660 (* set type for ident *)
661 let tyinfo = make_info_fix (typ
, local
) in
662 Ast_c.set_type_expr e1
tyinfo;
664 (match unwrap_unfold_env typ
with
665 | FunctionType
(ret
, params
) -> make_info_def ret
667 (* can be function pointer, C have an iso for that,
668 * same pfn() syntax than regular function call.
671 (match unwrap_unfold_env typ2
with
672 | FunctionType
(ret
, params
) -> make_info_def ret
673 | _ -> Type_c.noTypeHere
675 | _ -> Type_c.noTypeHere
679 (match lookup_opt_env lookup_macro s with
680 | Some
((defkind
, defval
), _nextenv
) ->
681 (match defkind
, defval
with
682 | DefineFunc
_, DefineExpr e
->
683 let rettype = Ast_c.get_onlytype_expr e
in
685 (* todo: could also set type for ident ?
686 have return type and at least type of concrete
687 parameters so can generate a fake FunctionType
690 Type_c.fake_function_type
rettype args
693 macrotype_opt +> Common.do_option
(fun t
->
694 pr2 ("Type_annotater: generate fake function type" ^
696 let tyinfo = make_info_def_fix t
in
697 Ast_c.set_type_expr e1
tyinfo;
700 Ast_c.get_type_expr e
702 pr2 ("Type_annoter: not a macro-func: " ^
s);
705 (* normally the FunCall case should have catch it *)
706 pr2 ("Type_annoter: not a macro-func-expr: " ^
s);
710 pr2_once
("type_annotater: no type for function ident: " ^
s);
719 (Ast_c.get_type_expr e1
) +> Type_c.do_with_type
(fun typ
->
720 (* copy paste of above *)
721 (match unwrap_unfold_env typ
with
722 | FunctionType
(ret
, params
) -> make_info_def ret
724 (match unwrap_unfold_env typ
with
725 | FunctionType
(ret
, params
) -> make_info_def ret
726 | _ -> Type_c.noTypeHere
728 | _ -> Type_c.noTypeHere
734 (* -------------------------------------------------- *)
736 let s = Ast_c.str_of_name ident
in
737 (match lookup_opt_env lookup_var s with
738 | Some
((typ
,local
),_nextenv
) ->
739 make_info_fix (typ
,local
)
741 (match lookup_opt_env lookup_macro s with
742 | Some
((defkind
, defval
), _nextenv
) ->
743 (match defkind
, defval
with
744 | DefineVar
, DefineExpr e
->
745 Ast_c.get_type_expr e
747 pr2 ("Type_annoter: not a expression: " ^
s);
750 (* normally the FunCall case should have catch it *)
751 pr2 ("Type_annoter: not a macro-var: " ^
s);
755 (match lookup_opt_env lookup_enum s with
756 | Some
(_, _nextenv
) ->
757 make_info_def (type_of_s "int")
759 if not
(s =~
"[A-Z_]+") (* if macro then no warning *)
761 if !Flag_parsing_c.check_annotater
then
762 if not
(Hashtbl.mem
!_notyped_var s)
764 pr2 ("Type_annoter: no type found for: " ^
s);
765 Hashtbl.add
!_notyped_var s true;
769 pr2 ("Type_annoter: no type found for: " ^
s)
776 (* -------------------------------------------------- *)
777 (* C isomorphism on type on array and pointers *)
779 | ArrayAccess
(e
, _) ->
780 k expr
; (* recurse to set the types-ref of sub expressions *)
782 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun t
->
783 (* todo: maybe not good env !! *)
784 match unwrap_unfold_env t
with
788 | _ -> Type_c.noTypeHere
792 | Unary
(e
, GetRef
) ->
793 k expr
; (* recurse to set the types-ref of sub expressions *)
795 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun t
->
796 (* must generate an element so that '=' can be used
799 let fake = Ast_c.fakeInfo
Common.fake_parse_info
in
800 let fake = Ast_c.rewrap_str
"*" fake in
802 let ft = Ast_c.mk_ty
(Pointer t
) [fake] in
807 (* -------------------------------------------------- *)
809 | RecordAccess
(e
, namefld
)
810 | RecordPtAccess
(e
, namefld
) as x
->
812 let fld = Ast_c.str_of_name namefld
in
814 k expr
; (* recurse to set the types-ref of sub expressions *)
816 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun t
->
820 | RecordAccess
_ -> Some t
821 | RecordPtAccess
_ ->
822 (match unwrap_unfold_env t
with
823 | Pointer
(t
) -> Some t
826 | _ -> raise Impossible
830 | None
-> Type_c.noTypeHere
832 match unwrap_unfold_env t
with
833 | StructUnion
(su
, sopt
, fields
) ->
835 (* todo: which env ? *)
837 (Type_c.type_field
fld (su
, fields
))
841 "TYPE-ERROR: field '%s' does not belong in struct %s"
842 fld (match sopt
with Some
s -> s |_ -> "<anon>"));
845 pr2 "TAC:MultiFound";
848 | _ -> Type_c.noTypeHere
854 (* -------------------------------------------------- *)
857 (* todo: if infer, can "push" info ? add_types_expr [t] e ? *)
858 make_info_def_fix (Lib.al_type t
)
860 (* todo? lub, hmm maybe not, cos type must be e1 *)
861 | Assignment
(e1
, op
, e2
) ->
863 (* value of an assignment is the value of the RHS expression *)
864 Ast_c.get_type_expr e2
865 | Sequence
(e1
, e2
) ->
867 Ast_c.get_type_expr e2
869 | Binary
(e1
, Logical
_, e2
) ->
871 make_info_def (type_of_s "int")
874 | Binary
(e1
, Arith op
, e2
) ->
876 Type_c.lub op
(Type_c.get_opt_type e1
) (Type_c.get_opt_type e2
)
878 | CondExpr
(cond
, e1opt
, e2
) ->
880 Ast_c.get_type_expr e2
885 Ast_c.get_type_expr e
887 | Infix
(e
, op
) | Postfix
(e
, op
) ->
889 Ast_c.get_type_expr e
891 (* pad: julia wrote this ? *)
892 | Unary
(e
, UnPlus
) ->
893 k expr
; (* recurse to set the types-ref of sub expressions *)
894 make_info_def (type_of_s "int")
895 (* todo? can convert from unsigned to signed if UnMinus ? *)
896 | Unary
(e
, UnMinus
) ->
897 k expr
; (* recurse to set the types-ref of sub expressions *)
898 make_info_def (type_of_s "int")
900 | SizeOfType
_|SizeOfExpr
_ ->
901 k expr
; (* recurse to set the types-ref of sub expressions *)
902 make_info_def (type_of_s "int")
904 | Constructor
(ft, ini
) ->
905 k expr
; (* recurse to set the types-ref of sub expressions *)
906 make_info_def (Lib.al_type
ft)
909 k expr
; (* recurse to set the types-ref of sub expressions *)
910 Ast_c.get_type_expr e
911 | Unary
(e
, Tilde
) ->
912 k expr
; (* recurse to set the types-ref of sub expressions *)
913 Ast_c.get_type_expr e
915 (* -------------------------------------------------- *)
917 | Unary
(_, GetRefLabel
) ->
918 k expr
; (* recurse to set the types-ref of sub expressions *)
919 pr2_once
"Type annotater:not handling GetRefLabel";
923 k expr
; (* recurse to set the types-ref of sub expressions *)
924 pr2_once
"Type annotater:not handling StatementExpr";
927 | _ -> k expr; Type_c.noTypeHere
931 Ast_c.set_type_expr expr
ty
936 (*****************************************************************************)
938 (*****************************************************************************)
940 (* Processing includes that were added after a cpp_ast_c makes the
941 * type annotater quite slow, especially when the depth of cpp_ast_c is
942 * big. But for such includes the only thing we really want is to modify
943 * the environment to have enough type information. We don't need
944 * to type the expressions inside those includes (they will be typed
945 * when we process the include file directly). Here the goal is
948 * Note that as usually header files contain mostly structure
949 * definitions and defines, that means we still have to do lots of work.
950 * We only win on function definition bodies, but usually header files
951 * have just prototypes, or inline function definitions which anyway have
952 * usually a small body. But still, we win. It also makes clearer
953 * that when processing include as we just need the environment, the caller
954 * of this module can do further optimisations such as memorising the
955 * state of the environment after each header files.
958 * For sparse its makes the annotating speed goes from 9s to 4s
959 * For Linux the speedup is even better, from ??? to ???.
961 * Because There would be some copy paste with annotate_program, it is
962 * better to factorize code hence the just_add_in_env parameter below.
964 * todo? alternative optimisation for the include problem:
965 * - processing all headers files one time and construct big env
966 * - use hashtbl for env (but apparently not biggest problem)
969 let rec visit_toplevel ~just_add_in_env ~depth elem
=
970 let need_annotate_body = not just_add_in_env
in
972 let bigf = { Visitor_c.default_visitor_c
with
974 (* ------------------------------------------------------------ *)
975 Visitor_c.kcppdirective
= (fun (k
, bigf) directive
->
977 (* do error messages for type annotater only for the real body of the
978 * file, not inside include.
980 | Include
{i_content
= opt
} ->
981 opt
+> Common.do_option
(fun (filename
, program
) ->
982 Common.save_excursion
Flag_parsing_c.verbose_type
(fun () ->
983 Flag_parsing_c.verbose_type
:= false;
985 (* old: Visitor_c.vk_program bigf program;
986 * opti: set the just_add_in_env
988 program
+> List.iter
(fun elem
->
989 visit_toplevel ~just_add_in_env
:true ~depth
:(depth
+1) elem
994 | Define
((s,ii
), (defkind
, defval
)) ->
997 (* even if we are in a just_add_in_env phase, such as when
998 * we process include, as opposed to the body of functions,
999 * with macros we still to type the body of the macro as
1000 * the macro has no type and so we infer its type from its
1001 * body (and one day later maybe from its use).
1004 (* can try to optimize and recurse only when the define body
1008 | DefineExpr expr
->
1009 (* prevent macro-declared variables from leaking out *)
1010 do_in_new_scope (fun () ->
1011 if is_simple_expr expr
1012 (* even if not need_annotate_body, still recurse*)
1015 if need_annotate_body
1018 do_in_new_scope (fun () ->
1019 if need_annotate_body
1023 add_binding (Macro
(s, (defkind
, defval
) )) true;
1026 | PragmaAndCo
_ -> ()
1029 (* ------------------------------------------------------------ *)
1030 (* main typer code *)
1031 (* ------------------------------------------------------------ *)
1032 Visitor_c.kexpr
= annotater_expr_visitor_subpart;
1034 (* ------------------------------------------------------------ *)
1035 Visitor_c.kstatement
= (fun (k
, bigf) st
->
1036 match Ast_c.unwrap_st st
with
1037 | Compound statxs
-> do_in_new_scope (fun () -> k st
);
1040 (* ------------------------------------------------------------ *)
1041 Visitor_c.kdecl
= (fun (k
, bigf) d
->
1043 | (DeclList
(xs
, ii
)) ->
1044 xs
+> List.iter
(fun ({v_namei
= var
; v_type
= t
;
1045 v_storage
= sto
; v_local
= local
} as x
1048 (* to add possible definition in type found in Decl *)
1049 Visitor_c.vk_type
bigf t
;
1054 | Ast_c.NotLocalDecl
-> Ast_c.NotLocalVar
1055 | Ast_c.LocalDecl
-> Ast_c.LocalVar
(Ast_c.info_of_type t
)
1057 var
+> Common.do_option
(fun (name
, iniopt
) ->
1058 let s = Ast_c.str_of_name name
in
1061 | StoTypedef
, _inline
->
1062 add_binding (TypeDef
(s,Lib.al_type t
)) true;
1064 add_binding (VarOrFunc
(s, (Lib.al_type t
, local))) true;
1067 Some
(typedef_fix (Lib.al_type t
) !_scoped_env);
1069 if need_annotate_body then begin
1070 (* int x = sizeof(x) is legal so need process ini *)
1071 iniopt
+> Common.do_option
(fun (info
, ini
) ->
1072 Visitor_c.vk_ini
bigf ini
1078 if need_annotate_body
1084 (* ------------------------------------------------------------ *)
1085 Visitor_c.ktype
= (fun (k
, bigf) typ
->
1086 (* bugfix: have a 'Lib.al_type typ' before, but because we can
1087 * have enum with possible expression, we don't want to change
1088 * the ref of abstract-lined types, but the real one, so
1089 * don't al_type here
1091 let (_q
, tbis
) = typ
in
1092 match Ast_c.unwrap_typeC typ
with
1093 | StructUnion
(su
, Some
s, structType
) ->
1094 let structType'
= Lib.al_fields
structType in
1095 let ii = Ast_c.get_ii_typeC_take_care tbis
in
1096 let ii'
= Lib.al_ii
ii in
1097 add_binding (StructUnionNameDef
(s, ((su
, structType'
),ii'
))) true;
1099 if need_annotate_body
1100 then k typ
(* todo: restrict ? new scope so use do_in_scope ? *)
1102 | Enum
(sopt
, enums
) ->
1104 enums
+> List.iter
(fun ((name
, eopt
), iicomma
) ->
1106 let s = Ast_c.str_of_name name
in
1108 if need_annotate_body
1109 then eopt
+> Common.do_option
(fun (ieq
, e
) ->
1110 Visitor_c.vk_expr
bigf e
1112 add_binding (EnumConstant
(s, sopt
)) true;
1116 (* TODO: if have a TypeName, then maybe can fill the option
1120 if need_annotate_body
1125 (* ------------------------------------------------------------ *)
1126 Visitor_c.ktoplevel
= (fun (k
, bigf) elem
->
1127 _notyped_var := Hashtbl.create
100;
1131 f_type
= ((returnt
, (paramst
, b
)) as ftyp
);
1134 f_old_c_style
= oldstyle
;
1140 (* what is iifunc1? it should be a type. jll
1141 * pad: it's the '(' in the function definition. The
1142 * return type is part of f_type.
1144 | iifunc1
::iifunc2
::ibrace1
::ibrace2
::ifakestart
::isto
->
1146 | _ -> raise Impossible
1148 let funcs = Ast_c.str_of_name name
in
1150 (match oldstyle
with
1153 Lib.al_type
(Ast_c.mk_ty
(FunctionType ftyp
) [i1
;i2
]) in
1155 add_binding (VarOrFunc
(funcs, (typ'
,islocal i1
.Ast_c.pinfo
)))
1158 if need_annotate_body then
1159 do_in_new_scope (fun () ->
1160 paramst
+> List.iter
(fun ({p_namei
= nameopt
; p_type
= t
},_)->
1163 let s = Ast_c.str_of_name name
in
1164 let local = Ast_c.LocalVar
(Ast_c.info_of_type t
) in
1165 add_binding (VarOrFunc
(s,(Lib.al_type t
,local))) true
1167 pr2 "no type, certainly because Void type ?"
1173 (* generate regular function type *)
1175 pr2 "TODO generate type for function";
1177 if need_annotate_body then
1178 do_in_new_scope (fun () ->
1179 (* recurse. should naturally call the kdecl visitor and
1188 | Define
((s,ii), (DefineVar
, DefineType t
)) ->
1189 add_binding (TypeDef
(s,Lib.al_type t
)) true;
1200 | NotParsedCorrectly
_
1210 then Visitor_c.vk_toplevel
bigf elem
1212 Common.profile_code
"TAC.annotate_only_included" (fun () ->
1213 Visitor_c.vk_toplevel
bigf elem
1215 else Visitor_c.vk_toplevel
bigf elem
1217 (*****************************************************************************)
1219 (*****************************************************************************)
1220 (* catch all the decl to grow the environment *)
1223 let rec (annotate_program2
:
1224 environment
-> toplevel list
-> (toplevel
* environment
Common.pair
) list
) =
1227 (* globals (re)initialialisation *)
1229 _notyped_var := (Hashtbl.create
100);
1231 prog
+> List.map
(fun elem
->
1232 let beforeenv = !_scoped_env in
1233 visit_toplevel ~just_add_in_env
:false ~depth
:0 elem
;
1234 let afterenv = !_scoped_env in
1235 (elem
, (beforeenv, afterenv))
1241 (*****************************************************************************)
1243 (*****************************************************************************)
1245 (* julia: for coccinelle *)
1246 let annotate_test_expressions prog
=
1247 let rec propagate_test e
=
1248 let ((e_term
,info
),_) = e
in
1249 let (ty,_) = !info
in
1252 Binary
(e1
,Logical
(AndLog
),e2
)
1253 | Binary
(e1
,Logical
(OrLog
),e2
) -> propagate_test e1
; propagate_test e2
1254 | Unary
(e1
,Not
) -> propagate_test e1
1255 | ParenExpr
(e
) -> propagate_test e
1258 let bigf = { Visitor_c.default_visitor_c
with
1259 Visitor_c.kexpr
= (fun (k
,bigf) expr
->
1260 (match unwrap_expr expr
with
1261 CondExpr
(e
,_,_) -> propagate_test e
1262 | Binary
(e1
,Logical
(AndLog
),e2
)
1263 | Binary
(e1
,Logical
(OrLog
),e2
) -> propagate_test e1
; propagate_test e2
1264 | Unary
(e1
,Not
) -> propagate_test e1
1269 Visitor_c.kstatement
= (fun (k
, bigf) st
->
1270 match unwrap_st st
with
1272 (match s with If
(e1
,s1
,s2
) -> propagate_test e1
| _ -> ());
1276 While
(e
,s) -> propagate_test e
1277 | DoWhile
(s,e
) -> propagate_test e
1279 (match unwrap es
with Some e
-> propagate_test e
| None
-> ())
1285 (prog
+> List.iter
(fun elem
->
1286 Visitor_c.vk_toplevel
bigf elem
1291 (*****************************************************************************)
1292 (* Annotate types *)
1293 (*****************************************************************************)
1294 let annotate_program env prog
=
1295 Common.profile_code
"TAC.annotate_program"
1297 let res = annotate_program2 env prog
in
1298 annotate_test_expressions prog
;
1302 let annotate_type_and_localvar env prog
=
1303 Common.profile_code
"TAC.annotate_type"
1304 (fun () -> annotate_program2 env prog
)
1307 (*****************************************************************************)
1308 (* changing default typing environment, do concatenation *)
1309 let init_env filename
=
1310 pr2 ("init_env: " ^ filename
);
1311 let (ast2
, _stat
) = Parse_c.parse_c_and_cpp filename
in
1312 let ast = Parse_c.program_of_program2 ast2
in
1314 let res = annotate_type_and_localvar !initial_env ast in
1315 match List.rev
res with
1316 | [] -> pr2 "empty environment"
1317 | (_top
,(env1
,env2
))::xs
->
1318 initial_env := !initial_env ++ env2
;