3 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes,
4 * Copyright (C) 2009 University of Urbana Champaign
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
20 module Lib
= Lib_parsing_c
22 (*****************************************************************************)
24 (*****************************************************************************)
26 * - Done a first type checker in 2002, cf typing-semantic/, but
27 * was assuming that have all type info, and so was assuming had called
28 * cpp and everything was right.
29 * - Wrote this file, in 2006?, as we added pattern matching on type
30 * in coccinelle. Partial type annotater.
31 * - Julia extended it in 2008? to have localvar/notlocalvar and
32 * test/notest information, again used by coccinelle.
33 * - I extended it in Fall 2008 to have more type information for the
34 * global analysis. I also added some optimisations to process
35 * included code faster.
38 * Design choices. Can either do:
40 * - can first do a simple inferer, that just pass context
41 * - then a real inferer, managing partial info.
42 * type context = fullType option
44 * - extract the information from the .h files
45 * (so no inference at all needed)
47 * Difference with julia's code in parsing_cocci/type_infer.ml:
48 * - She handles just the variable namespace. She does not type
49 * field access or enum or macros. This is because cocci programs are
50 * usually simple and have no structure definition or macro definitions
51 * that we need to type anyway.
52 * - She does more propagation.
53 * - She does not have to handle the typedef isomorphism which force me
54 * to use those typedef_fix and type_unfold_one_step
55 * - She does not handle I think the function pointer C isomorphism.
57 * - She has a cleaner type_cocci without any info. In my case
58 * I need to do those ugly al_type, or generate fake infos.
59 * - She has more compact code. Perhaps because she does not have to
60 * handle the extra exp_info that she added on me :) So I need those
61 * do_with_type, make_info_xxx, etc.
63 * Note: if need to debug this annotater, use -show_trace_profile, it can
64 * help. You can also set the typedef_debug flag below.
68 * todo: expression contain types, and statements, which in turn can contain
69 * expression, so need recurse. Need define an annote_statement and
72 * todo: how deal with typedef isomorphisms ? How store them in Ast_c ?
73 * store all posible variations in ast_c ? a list of type instead of just
76 * todo: how to handle multiple possible definitions for entities like
77 * struct or typedefs ? Because of ifdef, we should store list of
78 * possibilities sometimes.
80 * todo: define a new type ? like type_cocci ? where have a bool ?
82 * semi: How handle scope ? When search for type of field, we return
83 * a type, but this type makes sense only in a certain scope.
84 * We could add a tag to each typedef, structUnionName to differentiate
85 * them and also associate in ast_c to the type the scope
86 * of this type, the env that were used to define this type.
88 * todo: handle better the search in previous env, the env'. Cf the
89 * termination problem in typedef_fix when I was searching in the same
94 (*****************************************************************************)
96 (*****************************************************************************)
97 let pr2, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_type
99 (*****************************************************************************)
101 (*****************************************************************************)
103 (* The different namespaces from stdC manual:
105 * You introduce two new name spaces with every block that you write.
107 * One name space includes all
110 * - type definitions,
111 * - and enumeration constants
112 * that you declare or define within the block.
114 * The other name space includes all
118 * *tags* that you define within the block.
120 * You introduce a new member name space with every structure or union
121 * whose content you define. You identify a member name space by the
122 * type of left operand that you write for a member selection
123 * operator, as in x.y or p->y. A member name space ends with the end
124 * of the block in which you declare it.
126 * You introduce a new goto label name space with every function
127 * definition you write. Each goto label name space ends with its
128 * function definition.
131 (* But I don't try to do a type-checker, I try to "resolve" type of var
132 * so don't need make difference between namespaces here.
134 * But, why not make simply a (string, kindstring) assoc ?
135 * Because we dont want that a variable shadow a struct definition, because
136 * they are still in 2 different namespace. But could for typedef,
137 * because VarOrFunc and Typedef are in the same namespace.
138 * But could do a record as in c_info.ml
142 (* This type contains all "ident" like notion of C. Each time in Ast_c
143 * you have a string type (as in expression, function name, fields)
144 * then you need to manage the scope of this ident.
146 * The wrap for StructUnionNameDef contain the whole ii, the i for
147 * the string, the structUnion and the structType.
149 * Put Macro here ? after all the scoping rules for cpp macros is different
150 * and so does not vanish after the closing '}'.
155 | VarOrFunc
of string * Ast_c.exp_type
156 | EnumConstant
of string * string option
158 (* also used for macro type aliases *)
159 | TypeDef
of string * fullType
160 (* the structType contains nested "idents" with struct scope *)
161 | StructUnionNameDef
of string * (structUnion
* structType
) wrap
164 | Macro
of string * (define_kind
* define_val
)
167 (* Because have nested scope, have nested list, hence the list list.
169 * opti? use a hash to accelerate ? hmm but may have some problems
170 * with hash to handle recursive lookup. For instance for the typedef
171 * example where have mutually recursive definition of the type,
172 * we must take care to not loop by starting the second search
173 * from the previous environment. With the list scheme in
174 * lookup_env below it's quite easy to do. With hash it may be
177 type environment
= namedef list list
180 (* ------------------------------------------------------------ *)
181 (* can be modified by the init_env function below, by
182 * the file environment_unix.h
184 let initial_env = ref [
186 (Lib.al_type
(Parse_c.type_of_string
"void *"),
191 (Lib.al_type(Parse_c.type_of_string "void* (*)(int size)"),
194 (Lib.al_type(Parse_c.type_of_string "void (*)(void *ptr)"),
201 let typedef_debug = ref false
204 (* ------------------------------------------------------------ *)
205 (* generic, lookup and also return remaining env for further lookup *)
206 let rec lookup_env2 f env
=
208 | [] -> raise Not_found
209 | []::zs
-> lookup_env2 f zs
212 | None
-> lookup_env2 f
(xs
::zs
)
213 | Some y
-> y
, xs
::zs
216 Common.profile_code
"TAC.lookup_env" (fun () -> lookup_env2 a b
)
220 let member_env lookupf env
=
222 let _ = lookupf env
in
224 with Not_found
-> false
229 (* ------------------------------------------------------------ *)
232 let lookup_var s env
=
234 | VarOrFunc
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
239 let lookup_typedef s env
=
240 if !typedef_debug then pr2 ("looking for: " ^ s
);
242 | TypeDef
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
247 let lookup_structunion (_su
, s
) env
=
249 | StructUnionNameDef
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
254 let lookup_macro s env
=
256 | Macro
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
261 let lookup_enum s env
=
263 | EnumConstant
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
269 let lookup_typedef a b
=
270 Common.profile_code
"TAC.lookup_typedef" (fun () -> lookup_typedef a b
)
274 (*****************************************************************************)
276 (*****************************************************************************)
278 (* find_final_type is used to know to what type a field correspond in
279 * x.foo. Sometimes the type of x is a typedef or a structName in which
280 * case we must look in environment to find the complete type, here
281 * structUnion that contains the information.
283 * Because in C one can redefine in nested blocks some typedefs,
284 * struct, or variables, we have a static scoping resolving process.
285 * So, when we look for the type of a var, if this var is in an
286 * enclosing block, then maybe its type refer to a typdef of this
287 * enclosing block, so must restart the "type-resolving" of this
288 * typedef from this enclosing block, not from the bottom. So our
289 * "resolving-type functions" take an env and also return an env from
290 * where the next search must be performed. *)
293 let rec find_final_type ty env =
295 match Ast_c.unwrap_typeC ty with
296 | BaseType x -> (BaseType x) +> Ast_c.rewrap_typeC ty
298 | Pointer t -> (Pointer (find_final_type t env)) +> Ast_c.rewrap_typeC ty
299 | Array (e, t) -> Array (e, find_final_type t env) +> Ast_c.rewrap_typeC ty
301 | StructUnion (sopt, su) -> StructUnion (sopt, su) +> Ast_c.rewrap_typeC ty
303 | FunctionType t -> (FunctionType t) (* todo ? *) +> Ast_c.rewrap_typeC ty
304 | Enum
(s
, enumt
) -> (Enum
(s
, enumt
)) (* todo? *) +> Ast_c.rewrap_typeC ty
305 | EnumName s
-> (EnumName s
) (* todo? *) +> Ast_c.rewrap_typeC ty
307 | StructUnionName
(su
, s
) ->
309 let ((structtyp
,ii
), env'
) = lookup_structunion (su
, s
) env
in
310 Ast_c.nQ
, (StructUnion
(Some s
, structtyp
), ii
)
311 (* old: +> Ast_c.rewrap_typeC ty
312 * but must wrap with good ii, otherwise pretty_print_c
313 * will be lost and raise some Impossible
321 let (t'
, env'
) = lookup_typedef s env
in
322 find_final_type t' env'
327 | ParenType t
-> find_final_type t env
328 | Typeof e
-> failwith
"typeof"
334 (* ------------------------------------------------------------ *)
335 let rec type_unfold_one_step ty env
=
337 match Ast_c.unwrap_typeC ty
with
342 | StructUnion
(sopt
, su
, fields
) -> ty
344 | FunctionType t
-> ty
345 | Enum
(s
, enumt
) -> ty
347 | EnumName s
-> ty
(* todo: look in env when will have EnumDef *)
349 | StructUnionName
(su
, s
) ->
351 let (((su
,fields
),ii
), env'
) = lookup_structunion (su
, s
) env
in
352 Ast_c.mk_ty
(StructUnion
(su
, Some s
, fields
)) ii
353 (* old: +> Ast_c.rewrap_typeC ty
354 * but must wrap with good ii, otherwise pretty_print_c
355 * will be lost and raise some Impossible
361 | TypeName
(name
, _typ
) ->
362 let s = Ast_c.str_of_name name
in
364 if !typedef_debug then pr2 "type_unfold_one_step: lookup_typedef";
365 let (t'
, env'
) = lookup_typedef s env
in
366 type_unfold_one_step t' env'
371 | ParenType t
-> type_unfold_one_step t env
373 pr2_once
("Type_annoter: not handling typeof");
375 | TypeOfType t
-> type_unfold_one_step t env
385 (* normalizer. can be seen as the opposite of the previous function as
386 * we "fold" at least for the structUnion. Should return something that
387 * Type_c.is_completed_fullType likes, something that makes it easier
388 * for the programmer to work on, that has all the needed information
391 let rec typedef_fix ty env
=
392 match Ast_c.unwrap_typeC ty
with
396 Pointer
(typedef_fix t env
) +> Ast_c.rewrap_typeC ty
398 Array
(e
, typedef_fix t env
) +> Ast_c.rewrap_typeC ty
399 | StructUnion
(su
, sopt
, fields
) ->
401 * todo? but what if correspond to a nested struct def ?
403 Type_c.structdef_to_struct_name ty
405 (FunctionType ft
) (* todo ? *) +> Ast_c.rewrap_typeC ty
407 (Enum
(s, enumt
)) (* todo? *) +> Ast_c.rewrap_typeC ty
409 (EnumName
s) (* todo? *) +> Ast_c.rewrap_typeC ty
411 (* we prefer StructUnionName to StructUnion when it comes to typed metavar *)
412 | StructUnionName
(su
, s) -> ty
414 (* keep the typename but complete with more information *)
415 | TypeName
(name
, typ
) ->
416 let s = Ast_c.str_of_name name
in
419 pr2 ("typedef value already there:" ^
s);
423 if !typedef_debug then pr2 "typedef_fix: lookup_typedef";
424 let (t'
, env'
) = lookup_typedef s env
in
426 (* bugfix: termination bug if use env instead of env' below, because
427 * can have some weird mutually recursive typedef which
428 * each new type alias search for its mutual def.
430 TypeName
(name
, Some
(typedef_fix t' env'
)) +> Ast_c.rewrap_typeC ty
435 (* remove paren for better matching with typed metavar. kind of iso again *)
439 pr2_once
("Type_annoter: not handling typeof");
446 (*****************************************************************************)
447 (* Helpers, part 1 *)
448 (*****************************************************************************)
451 (Lib.al_type
(Parse_c.type_of_string
s))
453 Common.profile_code
"Type_c.type_of_s" (fun () -> type_of_s2 a
)
457 * /home/pad/software-os-src2/freebsd/contrib/ipfilter/netinet/ip_fil_freebsd.c
458 * because in the code there is:
459 * static iss_seq_off = 0;
460 * which in the parser was generating a default int without a parse_info.
461 * I now add a fake parse_info for such default int so no more failwith
466 (* bugfix: because of string->name, the ii can be deeper *)
467 let ii = Ast_c.get_local_ii_of_tybis_inlining_ii_of_name ty
in
469 | ii::_ -> ii.Ast_c.pinfo
470 | [] -> failwith
"type has no text; need to think again"
474 let rec is_simple_expr expr
=
475 match Ast_c.unwrap_expr expr
with
476 (* todo? handle more special cases ? *)
484 | Binary
(e1
, op
, e2
) ->
488 | ParenExpr
(e
) -> is_simple_expr e
492 (*****************************************************************************)
494 (*****************************************************************************)
495 (* now in type_c.ml *)
499 (*****************************************************************************)
500 (* (Semi) Globals, Julia's style *)
501 (*****************************************************************************)
503 (* opti: cache ? use hash ? *)
504 let _scoped_env = ref !initial_env
506 (* memoise unnanoted var, to avoid too much warning messages *)
507 let _notyped_var = ref (Hashtbl.create
100)
509 let new_scope() = _scoped_env := []::!_scoped_env
510 let del_scope() = _scoped_env := List.tl
!_scoped_env
512 let do_in_new_scope f =
520 let add_in_scope namedef
=
521 let (current
, older
) = Common.uncons
!_scoped_env in
522 _scoped_env := (namedef
::current
)::older
525 (* ------------------------------------------------------------ *)
527 (* sort of hackish... *)
529 if List.length
(!_scoped_env) =|= List.length
!initial_env
530 then Ast_c.NotLocalVar
531 else Ast_c.LocalVar info
533 (* ------------------------------------------------------------ *)
534 (* the warning argument is here to allow some binding to overwrite an
535 * existing one. With function, we first have the prototype and then the def,
536 * and the def binding with the same string is not an error.
538 * todo?: but if we define two times the same function, then we will not
539 * detect it :( it would require to make a diff between adding a binding
540 * from a prototype and from a definition.
542 * opti: disabling the check_annotater flag have some important
543 * performance benefit.
546 let add_binding2 namedef warning
=
547 let (current_scope
, _older_scope
) = Common.uncons
!_scoped_env in
549 if !Flag_parsing_c.check_annotater
then begin
551 | VarOrFunc
(s, typ
) ->
552 if Hashtbl.mem
!_notyped_var s
553 then pr2 ("warning: found typing information for a variable that was" ^
554 "previously unknown:" ^
s);
560 | VarOrFunc
(s, typ
) ->
561 member_env (lookup_var s), s
562 | TypeDef
(s, typ
) ->
563 member_env (lookup_typedef s), s
564 | StructUnionNameDef
(s, (su
, typ
)) ->
565 member_env (lookup_structunion (su
, s)), s
567 member_env (lookup_macro s), s
568 | EnumConstant
(s, body
) ->
569 member_env (lookup_enum s), s
572 if memberf
[current_scope
] && warning
573 then pr2 ("Type_annoter: warning, " ^
s ^
574 " is already in current binding" ^
"\n" ^
575 " so there is a weird shadowing");
579 let add_binding namedef warning
=
580 Common.profile_code
"TAC.add_binding" (fun () -> add_binding2 namedef warning
)
584 (*****************************************************************************)
585 (* Helpers, part 2 *)
586 (*****************************************************************************)
588 let lookup_opt_env lookupf
s =
589 Common.optionise
(fun () ->
590 lookupf
s !_scoped_env
593 let unwrap_unfold_env2 typ
=
595 (type_unfold_one_step typ
!_scoped_env)
596 let unwrap_unfold_env typ
=
597 Common.profile_code
"TAC.unwrap_unfold_env" (fun () -> unwrap_unfold_env2 typ
)
599 let typedef_fix a b
=
600 Common.profile_code
"TAC.typedef_fix" (fun () -> typedef_fix a b
)
602 let make_info_def_fix x
=
603 Type_c.make_info_def
(typedef_fix x
!_scoped_env)
605 let make_info_fix (typ
, local
) =
606 Type_c.make_info
((typedef_fix typ
!_scoped_env),local
)
609 let make_info_def = Type_c.make_info_def
611 (*****************************************************************************)
612 (* Main typer code, put later in a visitor *)
613 (*****************************************************************************)
615 let annotater_expr_visitor_subpart = (fun (k
,bigf
) expr
->
618 match Ast_c.unwrap_expr expr
with
620 (* -------------------------------------------------- *)
621 (* todo: should analyse the 's' for int to know if unsigned or not *)
622 | Constant
(String
(s,kind
)) -> make_info_def (type_of_s "char *")
623 | Constant MultiString
_ -> make_info_def (type_of_s "char *")
624 | Constant
(Char
(s,kind
)) -> make_info_def (type_of_s "char")
625 | Constant
(Int
(s,kind
)) ->
626 (* this seems really unpleasant, but perhaps the type needs to be set
627 up in some way that allows pretty printing *)
630 (* matches limited by what is generated in lexer_c.mll *)
631 Si
(Signed
,CInt
) -> type_of_s "int"
632 | Si
(UnSigned
,CInt
) -> type_of_s "unsigned int"
633 | Si
(Signed
,CLong
) -> type_of_s "long"
634 | Si
(UnSigned
,CLong
) -> type_of_s "unsigned long"
635 | Si
(Signed
,CLongLong
) -> type_of_s "long long"
636 | Si
(UnSigned
,CLongLong
) -> type_of_s "unsigned long long"
637 | _ -> failwith
"unexpected kind for constant")
638 | Constant
(Float
(s,kind
)) ->
639 let fake = Ast_c.fakeInfo
(Common.fake_parse_info
) in
640 let fake = Ast_c.rewrap_str
"float" fake in
641 let iinull = [fake] in
642 make_info_def (Ast_c.mk_ty
(BaseType
(FloatType kind
)) iinull)
645 (* -------------------------------------------------- *)
646 (* note: could factorize this code with the code for Ident
647 * and the other code for Funcall below. But as the Ident can be
648 * a macro-func, I prefer to handle it separately. So
649 * this rule can handle the macro-func, the Ident-rule can handle
650 * the macro-var, and the other FunCall-rule the regular
651 * function calls through fields.
652 * Also as I don't want a warning on the Ident that are a FunCall,
653 * easier to have a rule separate from the Ident rule.
655 | FunCall
(e1
, args
) ->
656 (match Ast_c.unwrap_expr e1
with
660 args
+> List.iter
(fun (e
,ii) ->
661 (* could typecheck if arguments agree with prototype *)
662 Visitor_c.vk_argument bigf e
664 let s = Ast_c.str_of_name ident
in
665 (match lookup_opt_env lookup_var s with
666 | Some
((typ
,local
),_nextenv
) ->
668 (* set type for ident *)
669 let tyinfo = make_info_fix (typ
, local
) in
670 Ast_c.set_type_expr e1
tyinfo;
672 (match unwrap_unfold_env typ
with
673 | FunctionType
(ret
, params
) -> make_info_def ret
675 (* can be function pointer, C have an iso for that,
676 * same pfn() syntax than regular function call.
679 (match unwrap_unfold_env typ2
with
680 | FunctionType
(ret
, params
) -> make_info_def ret
681 | _ -> Type_c.noTypeHere
683 | _ -> Type_c.noTypeHere
687 (match lookup_opt_env lookup_macro s with
688 | Some
((defkind
, defval
), _nextenv
) ->
689 (match defkind
, defval
with
690 | DefineFunc
_, DefineExpr e
->
691 let rettype = Ast_c.get_onlytype_expr e
in
693 (* todo: could also set type for ident ?
694 have return type and at least type of concrete
695 parameters so can generate a fake FunctionType
698 Type_c.fake_function_type
rettype args
701 macrotype_opt +> Common.do_option
(fun t
->
702 pr2 ("Type_annotater: generate fake function type" ^
704 let tyinfo = make_info_def_fix t
in
705 Ast_c.set_type_expr e1
tyinfo;
708 Ast_c.get_type_expr e
710 pr2 ("Type_annoter: not a macro-func: " ^
s);
713 (* normally the FunCall case should have catch it *)
714 pr2 ("Type_annoter: not a macro-func-expr: " ^
s);
718 pr2_once
("type_annotater: no type for function ident: " ^
s);
727 (Ast_c.get_type_expr e1
) +> Type_c.do_with_type
(fun typ
->
728 (* copy paste of above *)
729 (match unwrap_unfold_env typ
with
730 | FunctionType
(ret
, params
) -> make_info_def ret
732 (match unwrap_unfold_env typ
with
733 | FunctionType
(ret
, params
) -> make_info_def ret
734 | _ -> Type_c.noTypeHere
736 | _ -> Type_c.noTypeHere
742 (* -------------------------------------------------- *)
744 let s = Ast_c.str_of_name ident
in
745 (match lookup_opt_env lookup_var s with
746 | Some
((typ
,local
),_nextenv
) ->
747 make_info_fix (typ
,local
)
749 (match lookup_opt_env lookup_macro s with
750 | Some
((defkind
, defval
), _nextenv
) ->
751 (match defkind
, defval
with
752 | DefineVar
, DefineExpr e
->
753 Ast_c.get_type_expr e
755 pr2 ("Type_annoter: not a expression: " ^
s);
758 (* normally the FunCall case should have catch it *)
759 pr2 ("Type_annoter: not a macro-var: " ^
s);
763 (match lookup_opt_env lookup_enum s with
764 | Some
(_, _nextenv
) ->
765 make_info_def (type_of_s "int")
767 if not
(s =~
"[A-Z_]+") (* if macro then no warning *)
769 if !Flag_parsing_c.check_annotater
then
770 if not
(Hashtbl.mem
!_notyped_var s)
772 pr2 ("Type_annoter: not finding type for: " ^
s);
773 Hashtbl.add
!_notyped_var s true;
777 pr2 ("Type_annoter: not finding type for: " ^
s)
784 (* -------------------------------------------------- *)
785 (* C isomorphism on type on array and pointers *)
787 | ArrayAccess
(e
, _) ->
788 k expr
; (* recurse to set the types-ref of sub expressions *)
790 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun t
->
791 (* todo: maybe not good env !! *)
792 match unwrap_unfold_env t
with
796 | _ -> Type_c.noTypeHere
800 | Unary
(e
, GetRef
) ->
801 k expr
; (* recurse to set the types-ref of sub expressions *)
803 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun t
->
804 (* must generate an element so that '=' can be used
807 let fake = Ast_c.fakeInfo
Common.fake_parse_info
in
808 let fake = Ast_c.rewrap_str
"*" fake in
810 let ft = Ast_c.mk_ty
(Pointer t
) [fake] in
815 (* -------------------------------------------------- *)
817 | RecordAccess
(e
, namefld
)
818 | RecordPtAccess
(e
, namefld
) as x
->
820 let fld = Ast_c.str_of_name namefld
in
822 k expr
; (* recurse to set the types-ref of sub expressions *)
824 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun t
->
828 | RecordAccess
_ -> Some t
829 | RecordPtAccess
_ ->
830 (match unwrap_unfold_env t
with
831 | Pointer
(t
) -> Some t
834 | _ -> raise Impossible
838 | None
-> Type_c.noTypeHere
840 match unwrap_unfold_env t
with
841 | StructUnion
(su
, sopt
, fields
) ->
843 (* todo: which env ? *)
845 (Type_c.type_field
fld (su
, fields
))
849 "TYPE-ERROR: field '%s' does not belong in struct %s"
850 fld (match sopt
with Some
s -> s |_ -> "<anon>"));
853 pr2 "TAC:MultiFound";
856 | _ -> Type_c.noTypeHere
862 (* -------------------------------------------------- *)
865 (* todo: if infer, can "push" info ? add_types_expr [t] e ? *)
866 make_info_def_fix (Lib.al_type t
)
868 (* todo? lub, hmm maybe not, cos type must be e1 *)
869 | Assignment
(e1
, op
, e2
) ->
871 (* value of an assignment is the value of the RHS expression *)
872 Ast_c.get_type_expr e2
873 | Sequence
(e1
, e2
) ->
875 Ast_c.get_type_expr e2
877 | Binary
(e1
, Logical
_, e2
) ->
879 make_info_def (type_of_s "int")
882 | Binary
(e1
, Arith op
, e2
) ->
884 Type_c.lub op
(Type_c.get_opt_type e1
) (Type_c.get_opt_type e2
)
886 | CondExpr
(cond
, e1opt
, e2
) ->
888 Ast_c.get_type_expr e2
893 Ast_c.get_type_expr e
895 | Infix
(e
, op
) | Postfix
(e
, op
) ->
897 Ast_c.get_type_expr e
899 (* pad: julia wrote this ? *)
900 | Unary
(e
, UnPlus
) ->
901 k expr
; (* recurse to set the types-ref of sub expressions *)
902 make_info_def (type_of_s "int")
903 (* todo? can convert from unsigned to signed if UnMinus ? *)
904 | Unary
(e
, UnMinus
) ->
905 k expr
; (* recurse to set the types-ref of sub expressions *)
906 make_info_def (type_of_s "int")
908 | SizeOfType
_|SizeOfExpr
_ ->
909 k expr
; (* recurse to set the types-ref of sub expressions *)
910 make_info_def (type_of_s "int")
912 | Constructor
(ft, ini
) ->
913 k expr
; (* recurse to set the types-ref of sub expressions *)
914 make_info_def (Lib.al_type
ft)
917 k expr
; (* recurse to set the types-ref of sub expressions *)
918 Ast_c.get_type_expr e
919 | Unary
(e
, Tilde
) ->
920 k expr
; (* recurse to set the types-ref of sub expressions *)
921 Ast_c.get_type_expr e
923 (* -------------------------------------------------- *)
925 | Unary
(_, GetRefLabel
) ->
926 k expr
; (* recurse to set the types-ref of sub expressions *)
927 pr2_once
"Type annotater:not handling GetRefLabel";
931 k expr
; (* recurse to set the types-ref of sub expressions *)
932 pr2_once
"Type annotater:not handling GetRefLabel";
935 | _ -> k expr; Type_c.noTypeHere
939 Ast_c.set_type_expr expr
ty
944 (*****************************************************************************)
946 (*****************************************************************************)
948 (* Processing includes that were added after a cpp_ast_c makes the
949 * type annotater quite slow, especially when the depth of cpp_ast_c is
950 * big. But for such includes the only thing we really want is to modify
951 * the environment to have enough type information. We don't need
952 * to type the expressions inside those includes (they will be typed
953 * when we process the include file directly). Here the goal is
956 * Note that as usually header files contain mostly structure
957 * definitions and defines, that means we still have to do lots of work.
958 * We only win on function definition bodies, but usually header files
959 * have just prototypes, or inline function definitions which anyway have
960 * usually a small body. But still, we win. It also makes clearer
961 * that when processing include as we just need the environment, the caller
962 * of this module can do further optimisations such as memorising the
963 * state of the environment after each header files.
966 * For sparse its makes the annotating speed goes from 9s to 4s
967 * For Linux the speedup is even better, from ??? to ???.
969 * Because There would be some copy paste with annotate_program, it is
970 * better to factorize code hence the just_add_in_env parameter below.
972 * todo? alternative optimisation for the include problem:
973 * - processing all headers files one time and construct big env
974 * - use hashtbl for env (but apparently not biggest problem)
977 let rec visit_toplevel ~just_add_in_env ~depth elem
=
978 let need_annotate_body = not just_add_in_env
in
980 let bigf = { Visitor_c.default_visitor_c
with
982 (* ------------------------------------------------------------ *)
983 Visitor_c.kcppdirective
= (fun (k
, bigf) directive
->
985 (* do error messages for type annotater only for the real body of the
986 * file, not inside include.
988 | Include
{i_content
= opt
} ->
989 opt
+> Common.do_option
(fun (filename
, program
) ->
990 Common.save_excursion
Flag_parsing_c.verbose_type
(fun () ->
991 Flag_parsing_c.verbose_type
:= false;
993 (* old: Visitor_c.vk_program bigf program;
994 * opti: set the just_add_in_env
996 program
+> List.iter
(fun elem
->
997 visit_toplevel ~just_add_in_env
:true ~depth
:(depth
+1) elem
1002 | Define
((s,ii), (defkind
, defval
)) ->
1005 (* even if we are in a just_add_in_env phase, such as when
1006 * we process include, as opposed to the body of functions,
1007 * with macros we still to type the body of the macro as
1008 * the macro has no type and so we infer its type from its
1009 * body (and one day later maybe from its use).
1012 (* can try to optimize and recurse only when the define body
1016 | DefineExpr expr
->
1017 if is_simple_expr expr
1018 (* even if not need_annotate_body, still recurse*)
1021 if need_annotate_body
1024 if need_annotate_body
1028 add_binding (Macro
(s, (defkind
, defval
) )) true;
1031 | PragmaAndCo
_ -> ()
1034 (* ------------------------------------------------------------ *)
1035 (* main typer code *)
1036 (* ------------------------------------------------------------ *)
1037 Visitor_c.kexpr
= annotater_expr_visitor_subpart;
1039 (* ------------------------------------------------------------ *)
1040 Visitor_c.kstatement
= (fun (k
, bigf) st
->
1041 match Ast_c.unwrap_st st
with
1042 | Compound statxs
-> do_in_new_scope (fun () -> k st
);
1045 (* ------------------------------------------------------------ *)
1046 Visitor_c.kdecl
= (fun (k
, bigf) d
->
1048 | (DeclList
(xs
, ii)) ->
1049 xs
+> List.iter
(fun ({v_namei
= var
; v_type
= t
;
1050 v_storage
= sto
; v_local
= local
}, iicomma
) ->
1052 (* to add possible definition in type found in Decl *)
1053 Visitor_c.vk_type
bigf t
;
1058 | Ast_c.NotLocalDecl
-> Ast_c.NotLocalVar
1059 | Ast_c.LocalDecl
-> Ast_c.LocalVar
(offset t
)
1062 var
+> Common.do_option
(fun (name
, iniopt
) ->
1063 let s = Ast_c.str_of_name name
in
1066 | StoTypedef
, _inline
->
1067 add_binding (TypeDef
(s,Lib.al_type t
)) true;
1069 add_binding (VarOrFunc
(s, (Lib.al_type t
, local))) true;
1072 if need_annotate_body then begin
1073 (* int x = sizeof(x) is legal so need process ini *)
1074 iniopt
+> Common.do_option
(fun (info
, ini
) ->
1075 Visitor_c.vk_ini
bigf ini
1081 if need_annotate_body
1087 (* ------------------------------------------------------------ *)
1088 Visitor_c.ktype
= (fun (k
, bigf) typ
->
1089 (* bugfix: have a 'Lib.al_type typ' before, but because we can
1090 * have enum with possible expression, we don't want to change
1091 * the ref of abstract-lined types, but the real one, so
1092 * don't al_type here
1094 let (_q
, tbis
) = typ
in
1095 match Ast_c.unwrap_typeC typ
with
1096 | StructUnion
(su
, Some
s, structType
) ->
1097 let structType'
= Lib.al_fields
structType in
1098 let ii = Ast_c.get_ii_typeC_take_care tbis
in
1099 let ii'
= Lib.al_ii
ii in
1100 add_binding (StructUnionNameDef
(s, ((su
, structType'
),ii'
))) true;
1102 if need_annotate_body
1103 then k typ
(* todo: restrict ? new scope so use do_in_scope ? *)
1105 | Enum
(sopt
, enums
) ->
1107 enums
+> List.iter
(fun ((name
, eopt
), iicomma
) ->
1109 let s = Ast_c.str_of_name name
in
1111 if need_annotate_body
1112 then eopt
+> Common.do_option
(fun (ieq
, e
) ->
1113 Visitor_c.vk_expr
bigf e
1115 add_binding (EnumConstant
(s, sopt
)) true;
1119 (* TODO: if have a TypeName, then maybe can fill the option
1123 if need_annotate_body
1128 (* ------------------------------------------------------------ *)
1129 Visitor_c.ktoplevel
= (fun (k
, bigf) elem
->
1130 _notyped_var := Hashtbl.create
100;
1134 f_type
= ((returnt
, (paramst
, b
)) as ftyp
);
1137 f_old_c_style
= oldstyle
;
1143 | iifunc1
::iifunc2
::ibrace1
::ibrace2
::ifakestart
::isto
->
1145 | _ -> raise Impossible
1147 let funcs = Ast_c.str_of_name name
in
1149 (match oldstyle
with
1152 Lib.al_type
(Ast_c.mk_ty
(FunctionType ftyp
) [i1
;i2
]) in
1154 add_binding (VarOrFunc
(funcs, (typ'
,islocal i1
.Ast_c.pinfo
)))
1157 if need_annotate_body then
1158 do_in_new_scope (fun () ->
1159 paramst
+> List.iter
(fun ({p_namei
= nameopt
; p_type
= t
},_)->
1162 let s = Ast_c.str_of_name name
in
1163 let local = Ast_c.LocalVar
(offset t
) in
1164 add_binding (VarOrFunc
(s,(Lib.al_type t
,local))) true
1166 pr2 "no type, certainly because Void type ?"
1172 (* generate regular function type *)
1174 pr2 "TODO generate type for function";
1176 if need_annotate_body then
1177 do_in_new_scope (fun () ->
1178 (* recurse. should naturally call the kdecl visitor and
1187 | Define
((s,ii), (DefineVar
, DefineType t
)) ->
1188 add_binding (TypeDef
(s,Lib.al_type t
)) true;
1199 | NotParsedCorrectly
_
1209 then Visitor_c.vk_toplevel
bigf elem
1211 Common.profile_code
"TAC.annotate_only_included" (fun () ->
1212 Visitor_c.vk_toplevel
bigf elem
1214 else Visitor_c.vk_toplevel
bigf elem
1216 (*****************************************************************************)
1218 (*****************************************************************************)
1219 (* catch all the decl to grow the environment *)
1222 let rec (annotate_program2
:
1223 environment
-> toplevel list
-> (toplevel
* environment
Common.pair
) list
) =
1226 (* globals (re)initialialisation *)
1228 _notyped_var := (Hashtbl.create
100);
1230 prog
+> List.map
(fun elem
->
1231 let beforeenv = !_scoped_env in
1232 visit_toplevel ~just_add_in_env
:false ~depth
:0 elem
;
1233 let afterenv = !_scoped_env in
1234 (elem
, (beforeenv, afterenv))
1240 (*****************************************************************************)
1242 (*****************************************************************************)
1244 (* julia: for coccinelle *)
1245 let annotate_test_expressions prog
=
1246 let rec propagate_test e
=
1247 let ((e_term
,info
),_) = e
in
1248 let (ty,_) = !info
in
1251 Binary
(e1
,Logical
(AndLog
),e2
)
1252 | Binary
(e1
,Logical
(OrLog
),e2
) -> propagate_test e1
; propagate_test e2
1253 | Unary
(e1
,Not
) -> propagate_test e1
1254 | ParenExpr
(e
) -> propagate_test e
1257 let bigf = { Visitor_c.default_visitor_c
with
1258 Visitor_c.kexpr
= (fun (k
,bigf) expr
->
1259 (match unwrap_expr expr
with
1260 CondExpr
(e
,_,_) -> propagate_test e
1265 Visitor_c.kstatement
= (fun (k
, bigf) st
->
1266 match unwrap_st st
with
1268 (match s with If
(e1
,s1
,s2
) -> propagate_test e1
| _ -> ());
1272 While
(e
,s) -> propagate_test e
1273 | DoWhile
(s,e
) -> propagate_test e
1275 (match unwrap es
with Some e
-> propagate_test e
| None
-> ())
1281 (prog
+> List.iter
(fun elem
->
1282 Visitor_c.vk_toplevel
bigf elem
1287 (*****************************************************************************)
1288 (* Annotate types *)
1289 (*****************************************************************************)
1290 let annotate_program env prog
=
1291 Common.profile_code
"TAC.annotate_program"
1293 let res = annotate_program2 env prog
in
1294 annotate_test_expressions prog
;
1298 let annotate_type_and_localvar env prog
=
1299 Common.profile_code
"TAC.annotate_type"
1300 (fun () -> annotate_program2 env prog
)
1303 (*****************************************************************************)
1304 (* changing default typing environment, do concatenation *)
1305 let init_env filename
=
1306 pr2 ("init_env: " ^ filename
);
1307 let (ast2
, _stat
) = Parse_c.parse_c_and_cpp filename
in
1308 let ast = Parse_c.program_of_program2 ast2
in
1310 let res = annotate_type_and_localvar !initial_env ast in
1311 match List.rev
res with
1312 | [] -> pr2 "empty environment"
1313 | (_top
,(env1
,env2
))::xs
->
1314 initial_env := !initial_env ++ env2
;