3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes,
5 * Copyright (C) 2009 University of Urbana Champaign
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License (GPL)
9 * version 2 as published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * file license.txt for more details.
21 module Lib
= Lib_parsing_c
23 (*****************************************************************************)
25 (*****************************************************************************)
27 * - Done a first type checker in 2002, cf typing-semantic/, but
28 * was assuming that have all type info, and so was assuming had called
29 * cpp and everything was right.
30 * - Wrote this file, in 2006?, as we added pattern matching on type
31 * in coccinelle. Partial type annotater.
32 * - Julia extended it in 2008? to have localvar/notlocalvar and
33 * test/notest information, again used by coccinelle.
34 * - I extended it in Fall 2008 to have more type information for the
35 * global analysis. I also added some optimisations to process
36 * included code faster.
39 * Design choices. Can either do:
41 * - can first do a simple inferer, that just pass context
42 * - then a real inferer, managing partial info.
43 * type context = fullType option
45 * - extract the information from the .h files
46 * (so no inference at all needed)
48 * Difference with julia's code in parsing_cocci/type_infer.ml:
49 * - She handles just the variable namespace. She does not type
50 * field access or enum or macros. This is because cocci programs are
51 * usually simple and have no structure definition or macro definitions
52 * that we need to type anyway.
53 * - She does more propagation.
54 * - She does not have to handle the typedef isomorphism which force me
55 * to use those typedef_fix and type_unfold_one_step
56 * - She does not handle I think the function pointer C isomorphism.
58 * - She has a cleaner type_cocci without any info. In my case
59 * I need to do those ugly al_type, or generate fake infos.
60 * - She has more compact code. Perhaps because she does not have to
61 * handle the extra exp_info that she added on me :) So I need those
62 * do_with_type, make_info_xxx, etc.
64 * Note: if need to debug this annotater, use -show_trace_profile, it can
65 * help. You can also set the typedef_debug flag below.
69 * todo: expression contain types, and statements, which in turn can contain
70 * expression, so need recurse. Need define an annote_statement and
73 * todo: how deal with typedef isomorphisms ? How store them in Ast_c ?
74 * store all posible variations in ast_c ? a list of type instead of just
77 * todo: how to handle multiple possible definitions for entities like
78 * struct or typedefs ? Because of ifdef, we should store list of
79 * possibilities sometimes.
81 * todo: define a new type ? like type_cocci ? where have a bool ?
83 * semi: How handle scope ? When search for type of field, we return
84 * a type, but this type makes sense only in a certain scope.
85 * We could add a tag to each typedef, structUnionName to differentiate
86 * them and also associate in ast_c to the type the scope
87 * of this type, the env that were used to define this type.
89 * todo: handle better the search in previous env, the env'. Cf the
90 * termination problem in typedef_fix when I was searching in the same
95 (*****************************************************************************)
97 (*****************************************************************************)
98 let pr2, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_type
100 (*****************************************************************************)
102 (*****************************************************************************)
104 (* The different namespaces from stdC manual:
106 * You introduce two new name spaces with every block that you write.
108 * One name space includes all
111 * - type definitions,
112 * - and enumeration constants
113 * that you declare or define within the block.
115 * The other name space includes all
119 * *tags* that you define within the block.
121 * You introduce a new member name space with every structure or union
122 * whose content you define. You identify a member name space by the
123 * type of left operand that you write for a member selection
124 * operator, as in x.y or p->y. A member name space ends with the end
125 * of the block in which you declare it.
127 * You introduce a new goto label name space with every function
128 * definition you write. Each goto label name space ends with its
129 * function definition.
132 (* But I don't try to do a type-checker, I try to "resolve" type of var
133 * so don't need make difference between namespaces here.
135 * But, why not make simply a (string, kindstring) assoc ?
136 * Because we dont want that a variable shadow a struct definition, because
137 * they are still in 2 different namespace. But could for typedef,
138 * because VarOrFunc and Typedef are in the same namespace.
139 * But could do a record as in c_info.ml
143 (* This type contains all "ident" like notion of C. Each time in Ast_c
144 * you have a string type (as in expression, function name, fields)
145 * then you need to manage the scope of this ident.
147 * The wrap for StructUnionNameDef contain the whole ii, the i for
148 * the string, the structUnion and the structType.
150 * Put Macro here ? after all the scoping rules for cpp macros is different
151 * and so does not vanish after the closing '}'.
156 | VarOrFunc
of string * Ast_c.exp_type
157 | EnumConstant
of string * string option
159 (* also used for macro type aliases *)
160 | TypeDef
of string * fullType
161 (* the structType contains nested "idents" with struct scope *)
162 | StructUnionNameDef
of string * (structUnion
* structType
) wrap
165 | Macro
of string * (define_kind
* define_val
)
168 (* Because have nested scope, have nested list, hence the list list.
170 * opti? use a hash to accelerate ? hmm but may have some problems
171 * with hash to handle recursive lookup. For instance for the typedef
172 * example where have mutually recursive definition of the type,
173 * we must take care to not loop by starting the second search
174 * from the previous environment. With the list scheme in
175 * lookup_env below it's quite easy to do. With hash it may be
178 type environment
= namedef list list
181 (* ------------------------------------------------------------ *)
182 (* can be modified by the init_env function below, by
183 * the file environment_unix.h
185 let initial_env = ref [
187 (Lib.al_type
(Parse_c.type_of_string
"void *"),
192 (Lib.al_type(Parse_c.type_of_string "void* (*)(int size)"),
195 (Lib.al_type(Parse_c.type_of_string "void (*)(void *ptr)"),
202 let typedef_debug = ref false
205 (* ------------------------------------------------------------ *)
206 (* generic, lookup and also return remaining env for further lookup *)
207 let rec lookup_env2 f env
=
209 | [] -> raise Not_found
210 | []::zs
-> lookup_env2 f zs
213 | None
-> lookup_env2 f
(xs
::zs
)
214 | Some y
-> y
, xs
::zs
217 Common.profile_code
"TAC.lookup_env" (fun () -> lookup_env2 a b
)
221 let member_env lookupf env
=
223 let _ = lookupf env
in
225 with Not_found
-> false
230 (* ------------------------------------------------------------ *)
233 let lookup_var s env
=
235 | VarOrFunc
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
240 let lookup_typedef s env
=
241 if !typedef_debug then pr2 ("looking for: " ^ s
);
243 | TypeDef
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
248 let lookup_structunion (_su
, s
) env
=
250 | StructUnionNameDef
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
255 let lookup_macro s env
=
257 | Macro
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
262 let lookup_enum s env
=
264 | EnumConstant
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
270 let lookup_typedef a b
=
271 Common.profile_code
"TAC.lookup_typedef" (fun () -> lookup_typedef a b
)
275 (*****************************************************************************)
277 (*****************************************************************************)
279 (* find_final_type is used to know to what type a field correspond in
280 * x.foo. Sometimes the type of x is a typedef or a structName in which
281 * case we must look in environment to find the complete type, here
282 * structUnion that contains the information.
284 * Because in C one can redefine in nested blocks some typedefs,
285 * struct, or variables, we have a static scoping resolving process.
286 * So, when we look for the type of a var, if this var is in an
287 * enclosing block, then maybe its type refer to a typdef of this
288 * enclosing block, so must restart the "type-resolving" of this
289 * typedef from this enclosing block, not from the bottom. So our
290 * "resolving-type functions" take an env and also return an env from
291 * where the next search must be performed. *)
294 let rec find_final_type ty env =
296 match Ast_c.unwrap_typeC ty with
297 | BaseType x -> (BaseType x) +> Ast_c.rewrap_typeC ty
299 | Pointer t -> (Pointer (find_final_type t env)) +> Ast_c.rewrap_typeC ty
300 | Array (e, t) -> Array (e, find_final_type t env) +> Ast_c.rewrap_typeC ty
302 | StructUnion (sopt, su) -> StructUnion (sopt, su) +> Ast_c.rewrap_typeC ty
304 | FunctionType t -> (FunctionType t) (* todo ? *) +> Ast_c.rewrap_typeC ty
305 | Enum
(s
, enumt
) -> (Enum
(s
, enumt
)) (* todo? *) +> Ast_c.rewrap_typeC ty
306 | EnumName s
-> (EnumName s
) (* todo? *) +> Ast_c.rewrap_typeC ty
308 | StructUnionName
(su
, s
) ->
310 let ((structtyp
,ii
), env'
) = lookup_structunion (su
, s
) env
in
311 Ast_c.nQ
, (StructUnion
(Some s
, structtyp
), ii
)
312 (* old: +> Ast_c.rewrap_typeC ty
313 * but must wrap with good ii, otherwise pretty_print_c
314 * will be lost and raise some Impossible
322 let (t'
, env'
) = lookup_typedef s env
in
323 find_final_type t' env'
328 | ParenType t
-> find_final_type t env
329 | Typeof e
-> failwith
"typeof"
335 (* ------------------------------------------------------------ *)
336 let rec type_unfold_one_step ty env
=
338 match Ast_c.unwrap_typeC ty
with
343 | StructUnion
(sopt
, su
, fields
) -> ty
345 | FunctionType t
-> ty
346 | Enum
(s
, enumt
) -> ty
348 | EnumName s
-> ty
(* todo: look in env when will have EnumDef *)
350 | StructUnionName
(su
, s
) ->
352 let (((su
,fields
),ii
), env'
) = lookup_structunion (su
, s
) env
in
353 Ast_c.mk_ty
(StructUnion
(su
, Some s
, fields
)) ii
354 (* old: +> Ast_c.rewrap_typeC ty
355 * but must wrap with good ii, otherwise pretty_print_c
356 * will be lost and raise some Impossible
362 | TypeName
(name
, _typ
) ->
363 let s = Ast_c.str_of_name name
in
365 if !typedef_debug then pr2 "type_unfold_one_step: lookup_typedef";
366 let (t'
, env'
) = lookup_typedef s env
in
367 type_unfold_one_step t' env'
372 | ParenType t
-> type_unfold_one_step t env
374 pr2_once
("Type_annoter: not handling typeof");
376 | TypeOfType t
-> type_unfold_one_step t env
386 (* normalizer. can be seen as the opposite of the previous function as
387 * we "fold" at least for the structUnion. Should return something that
388 * Type_c.is_completed_fullType likes, something that makes it easier
389 * for the programmer to work on, that has all the needed information
392 let rec typedef_fix ty env
=
393 match Ast_c.unwrap_typeC ty
with
397 Pointer
(typedef_fix t env
) +> Ast_c.rewrap_typeC ty
399 Array
(e
, typedef_fix t env
) +> Ast_c.rewrap_typeC ty
400 | StructUnion
(su
, sopt
, fields
) ->
402 * todo? but what if correspond to a nested struct def ?
404 Type_c.structdef_to_struct_name ty
406 (FunctionType ft
) (* todo ? *) +> Ast_c.rewrap_typeC ty
408 (Enum
(s, enumt
)) (* todo? *) +> Ast_c.rewrap_typeC ty
410 (EnumName
s) (* todo? *) +> Ast_c.rewrap_typeC ty
412 (* we prefer StructUnionName to StructUnion when it comes to typed metavar *)
413 | StructUnionName
(su
, s) -> ty
415 (* keep the typename but complete with more information *)
416 | TypeName
(name
, typ
) ->
417 let s = Ast_c.str_of_name name
in
420 pr2 ("typedef value already there:" ^
s);
424 if !typedef_debug then pr2 "typedef_fix: lookup_typedef";
425 let (t'
, env'
) = lookup_typedef s env
in
427 (* bugfix: termination bug if use env instead of env' below, because
428 * can have some weird mutually recursive typedef which
429 * each new type alias search for its mutual def.
431 TypeName
(name
, Some
(typedef_fix t' env'
)) +> Ast_c.rewrap_typeC ty
436 (* remove paren for better matching with typed metavar. kind of iso again *)
440 pr2_once
("Type_annoter: not handling typeof");
447 (*****************************************************************************)
448 (* Helpers, part 1 *)
449 (*****************************************************************************)
452 (Lib.al_type
(Parse_c.type_of_string
s))
454 Common.profile_code
"Type_c.type_of_s" (fun () -> type_of_s2 a
)
458 * /home/pad/software-os-src2/freebsd/contrib/ipfilter/netinet/ip_fil_freebsd.c
459 * because in the code there is:
460 * static iss_seq_off = 0;
461 * which in the parser was generating a default int without a parse_info.
462 * I now add a fake parse_info for such default int so no more failwith
466 let rec is_simple_expr expr
=
467 match Ast_c.unwrap_expr expr
with
468 (* todo? handle more special cases ? *)
476 | Binary
(e1
, op
, e2
) ->
480 | ParenExpr
(e
) -> is_simple_expr e
484 (*****************************************************************************)
486 (*****************************************************************************)
487 (* now in type_c.ml *)
491 (*****************************************************************************)
492 (* (Semi) Globals, Julia's style *)
493 (*****************************************************************************)
495 (* opti: cache ? use hash ? *)
496 let _scoped_env = ref !initial_env
498 (* memoise unnanoted var, to avoid too much warning messages *)
499 let _notyped_var = ref (Hashtbl.create
100)
501 let new_scope() = _scoped_env := []::!_scoped_env
502 let del_scope() = _scoped_env := List.tl
!_scoped_env
504 let do_in_new_scope f =
512 let add_in_scope namedef
=
513 let (current
, older
) = Common.uncons
!_scoped_env in
514 _scoped_env := (namedef
::current
)::older
517 (* ------------------------------------------------------------ *)
519 (* sort of hackish... *)
521 if List.length
(!_scoped_env) =|= List.length
!initial_env
522 then Ast_c.NotLocalVar
523 else Ast_c.LocalVar info
525 (* ------------------------------------------------------------ *)
526 (* the warning argument is here to allow some binding to overwrite an
527 * existing one. With function, we first have the prototype and then the def,
528 * and the def binding with the same string is not an error.
530 * todo?: but if we define two times the same function, then we will not
531 * detect it :( it would require to make a diff between adding a binding
532 * from a prototype and from a definition.
534 * opti: disabling the check_annotater flag have some important
535 * performance benefit.
538 let add_binding2 namedef warning
=
539 let (current_scope
, _older_scope
) = Common.uncons
!_scoped_env in
541 if !Flag_parsing_c.check_annotater
then begin
543 | VarOrFunc
(s, typ
) ->
544 if Hashtbl.mem
!_notyped_var s
545 then pr2 ("warning: found typing information for a variable that was" ^
546 "previously unknown:" ^
s);
552 | VarOrFunc
(s, typ
) ->
553 member_env (lookup_var s), s
554 | TypeDef
(s, typ
) ->
555 member_env (lookup_typedef s), s
556 | StructUnionNameDef
(s, (su
, typ
)) ->
557 member_env (lookup_structunion (su
, s)), s
559 member_env (lookup_macro s), s
560 | EnumConstant
(s, body
) ->
561 member_env (lookup_enum s), s
564 if memberf
[current_scope
] && warning
565 then pr2 ("Type_annoter: warning, " ^
s ^
566 " is already in current binding" ^
"\n" ^
567 " so there is a weird shadowing");
571 let add_binding namedef warning
=
572 Common.profile_code
"TAC.add_binding" (fun () -> add_binding2 namedef warning
)
576 (*****************************************************************************)
577 (* Helpers, part 2 *)
578 (*****************************************************************************)
580 let lookup_opt_env lookupf
s =
581 Common.optionise
(fun () ->
582 lookupf
s !_scoped_env
585 let unwrap_unfold_env2 typ
=
587 (type_unfold_one_step typ
!_scoped_env)
588 let unwrap_unfold_env typ
=
589 Common.profile_code
"TAC.unwrap_unfold_env" (fun () -> unwrap_unfold_env2 typ
)
591 let typedef_fix a b
=
592 Common.profile_code
"TAC.typedef_fix" (fun () -> typedef_fix a b
)
594 let make_info_def_fix x
=
595 Type_c.make_info_def
(typedef_fix x
!_scoped_env)
597 let make_info_fix (typ
, local
) =
598 Type_c.make_info
((typedef_fix typ
!_scoped_env),local
)
601 let make_info_def = Type_c.make_info_def
603 (*****************************************************************************)
604 (* Main typer code, put later in a visitor *)
605 (*****************************************************************************)
607 let annotater_expr_visitor_subpart = (fun (k
,bigf
) expr
->
610 match Ast_c.unwrap_expr expr
with
612 (* -------------------------------------------------- *)
613 (* todo: should analyse the 's' for int to know if unsigned or not *)
614 | Constant
(String
(s,kind
)) -> make_info_def (type_of_s "char []")
615 | Constant MultiString
_ -> make_info_def (type_of_s "char []")
616 | Constant
(Char
(s,kind
)) -> make_info_def (type_of_s "char")
617 | Constant
(Int
(s,kind
)) ->
618 (* this seems really unpleasant, but perhaps the type needs to be set
619 up in some way that allows pretty printing *)
622 (* matches limited by what is generated in lexer_c.mll *)
623 Si
(Signed
,CInt
) -> type_of_s "int"
624 | Si
(UnSigned
,CInt
) -> type_of_s "unsigned int"
625 | Si
(Signed
,CLong
) -> type_of_s "long"
626 | Si
(UnSigned
,CLong
) -> type_of_s "unsigned long"
627 | Si
(Signed
,CLongLong
) -> type_of_s "long long"
628 | Si
(UnSigned
,CLongLong
) -> type_of_s "unsigned long long"
629 | _ -> failwith
"unexpected kind for constant")
630 | Constant
(Float
(s,kind
)) ->
631 let fake = Ast_c.fakeInfo
(Common.fake_parse_info
) in
632 let fake = Ast_c.rewrap_str
"float" fake in
633 let iinull = [fake] in
634 make_info_def (Ast_c.mk_ty
(BaseType
(FloatType kind
)) iinull)
637 (* -------------------------------------------------- *)
638 (* note: could factorize this code with the code for Ident
639 * and the other code for Funcall below. But as the Ident can be
640 * a macro-func, I prefer to handle it separately. So
641 * this rule can handle the macro-func, the Ident-rule can handle
642 * the macro-var, and the other FunCall-rule the regular
643 * function calls through fields.
644 * Also as I don't want a warning on the Ident that are a FunCall,
645 * easier to have a rule separate from the Ident rule.
647 | FunCall
(e1
, args
) ->
648 (match Ast_c.unwrap_expr e1
with
651 args
+> List.iter
(fun (e
,ii
) ->
652 (* could typecheck if arguments agree with prototype *)
653 Visitor_c.vk_argument bigf e
655 let s = Ast_c.str_of_name ident
in
656 (match lookup_opt_env lookup_var s with
657 | Some
((typ
,local
),_nextenv
) ->
659 (* set type for ident *)
660 let tyinfo = make_info_fix (typ
, local
) in
661 Ast_c.set_type_expr e1
tyinfo;
663 (match unwrap_unfold_env typ
with
664 | FunctionType
(ret
, params
) -> make_info_def ret
666 (* can be function pointer, C have an iso for that,
667 * same pfn() syntax than regular function call.
670 (match unwrap_unfold_env typ2
with
671 | FunctionType
(ret
, params
) -> make_info_def ret
672 | _ -> Type_c.noTypeHere
674 | _ -> Type_c.noTypeHere
678 (match lookup_opt_env lookup_macro s with
679 | Some
((defkind
, defval
), _nextenv
) ->
680 (match defkind
, defval
with
681 | DefineFunc
_, DefineExpr e
->
682 let rettype = Ast_c.get_onlytype_expr e
in
684 (* todo: could also set type for ident ?
685 have return type and at least type of concrete
686 parameters so can generate a fake FunctionType
689 Type_c.fake_function_type
rettype args
692 macrotype_opt +> Common.do_option
(fun t
->
693 pr2 ("Type_annotater: generate fake function type" ^
695 let tyinfo = make_info_def_fix t
in
696 Ast_c.set_type_expr e1
tyinfo;
699 Ast_c.get_type_expr e
701 pr2 ("Type_annoter: not a macro-func: " ^
s);
704 pr2 ("Type_annoter: not a macro-func: " ^
s);
707 (* normally the FunCall case should have caught it *)
708 pr2 ("Type_annoter: not a macro-func-expr: " ^
s);
712 pr2_once
("type_annotater: no type for function ident: " ^
s);
721 (Ast_c.get_type_expr e1
) +> Type_c.do_with_type
(fun typ
->
722 (* copy paste of above *)
723 (match unwrap_unfold_env typ
with
724 | FunctionType
(ret
, params
) -> make_info_def ret
726 (match unwrap_unfold_env typ
with
727 | FunctionType
(ret
, params
) -> make_info_def ret
728 | _ -> Type_c.noTypeHere
730 | _ -> Type_c.noTypeHere
736 (* -------------------------------------------------- *)
738 let s = Ast_c.str_of_name ident
in
739 (match lookup_opt_env lookup_var s with
740 | Some
((typ
,local
),_nextenv
) ->
741 make_info_fix (typ
,local
)
743 (match lookup_opt_env lookup_macro s with
744 | Some
((defkind
, defval
), _nextenv
) ->
745 (match defkind
, defval
with
746 | DefineVar
, DefineExpr e
->
747 Ast_c.get_type_expr e
749 pr2 ("Type_annoter: not a expression: " ^
s);
752 (* normally the FunCall case should have catch it *)
753 pr2 ("Type_annoter: not a macro-var: " ^
s);
756 pr2 ("Type_annoter: not a expression: " ^
s);
760 (match lookup_opt_env lookup_enum s with
761 | Some
(_, _nextenv
) ->
762 make_info_def (type_of_s "int")
764 if not
(s =~
"[A-Z_]+") (* if macro then no warning *)
766 if !Flag_parsing_c.check_annotater
then
767 if not
(Hashtbl.mem
!_notyped_var s)
769 pr2 ("Type_annoter: no type found for: " ^
s);
770 Hashtbl.add
!_notyped_var s true;
774 pr2 ("Type_annoter: no type found for: " ^
s)
781 (* -------------------------------------------------- *)
782 (* C isomorphism on type on array and pointers *)
784 | ArrayAccess
(e
, _) ->
785 k expr
; (* recurse to set the types-ref of sub expressions *)
787 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun t
->
788 (* todo: maybe not good env !! *)
789 match unwrap_unfold_env t
with
793 | _ -> Type_c.noTypeHere
797 | Unary
(e
, GetRef
) ->
798 k expr
; (* recurse to set the types-ref of sub expressions *)
800 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun t
->
801 (* must generate an element so that '=' can be used
804 let fake = Ast_c.fakeInfo
Common.fake_parse_info
in
805 let fake = Ast_c.rewrap_str
"*" fake in
807 let ft = Ast_c.mk_ty
(Pointer t
) [fake] in
812 (* -------------------------------------------------- *)
814 | RecordAccess
(e
, namefld
)
815 | RecordPtAccess
(e
, namefld
) as x
->
817 let fld = Ast_c.str_of_name namefld
in
819 k expr
; (* recurse to set the types-ref of sub expressions *)
821 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun t
->
825 | RecordAccess
_ -> Some t
826 | RecordPtAccess
_ ->
827 (match unwrap_unfold_env t
with
828 | Pointer
(t
) -> Some t
831 | _ -> raise Impossible
835 | None
-> Type_c.noTypeHere
837 match unwrap_unfold_env t
with
838 | StructUnion
(su
, sopt
, fields
) ->
840 (* todo: which env ? *)
842 (Type_c.type_field
fld (su
, fields
))
846 "TYPE-ERROR: field '%s' does not belong in struct %s"
847 fld (match sopt
with Some
s -> s |_ -> "<anon>"));
850 pr2 "TAC:MultiFound";
853 | _ -> Type_c.noTypeHere
859 (* -------------------------------------------------- *)
862 (* todo: if infer, can "push" info ? add_types_expr [t] e ? *)
863 make_info_def_fix (Lib.al_type t
)
865 (* todo? lub, hmm maybe not, cos type must be e1 *)
866 | Assignment
(e1
, op
, e2
) ->
868 (* value of an assignment is the value of the RHS expression, but its
869 type is the type of the lhs expression. Use the rhs exp if no
870 information is available *)
871 (match Ast_c.get_type_expr e1
with
872 (None
,_) -> Ast_c.get_type_expr e2
873 | (Some
ty,t
) -> (Some
ty,t
))
874 | Sequence
(e1
, e2
) ->
876 Ast_c.get_type_expr e2
878 | Binary
(e1
, Logical
_, e2
) ->
880 make_info_def (type_of_s "int")
883 | Binary
(e1
, Arith op
, e2
) ->
885 Type_c.lub op
(Type_c.get_opt_type e1
) (Type_c.get_opt_type e2
)
887 | CondExpr
(cond
, e1opt
, e2
) ->
889 Ast_c.get_type_expr e2
894 Ast_c.get_type_expr e
896 | Infix
(e
, op
) | Postfix
(e
, op
) ->
898 Ast_c.get_type_expr e
900 (* pad: julia wrote this ? *)
901 | Unary
(e
, UnPlus
) ->
902 k expr
; (* recurse to set the types-ref of sub expressions *)
903 make_info_def (type_of_s "int")
904 (* todo? can convert from unsigned to signed if UnMinus ? *)
905 | Unary
(e
, UnMinus
) ->
906 k expr
; (* recurse to set the types-ref of sub expressions *)
907 make_info_def (type_of_s "int")
909 | SizeOfType
_|SizeOfExpr
_ ->
910 k expr
; (* recurse to set the types-ref of sub expressions *)
911 make_info_def (type_of_s "size_t")
913 | Constructor
(ft, ini
) ->
914 k expr
; (* recurse to set the types-ref of sub expressions *)
915 make_info_def (Lib.al_type
ft)
918 k expr
; (* recurse to set the types-ref of sub expressions *)
919 (* the result of ! is always 0 or 1, not the argument type *)
920 make_info_def (type_of_s "int")
921 | Unary
(e
, Tilde
) ->
922 k expr
; (* recurse to set the types-ref of sub expressions *)
923 Ast_c.get_type_expr e
925 (* -------------------------------------------------- *)
927 | Unary
(_, GetRefLabel
) ->
928 k expr
; (* recurse to set the types-ref of sub expressions *)
929 pr2_once
"Type annotater:not handling GetRefLabel";
933 k expr
; (* recurse to set the types-ref of sub expressions *)
934 pr2_once
"Type annotater:not handling StatementExpr";
937 | _ -> k expr; Type_c.noTypeHere
941 Ast_c.set_type_expr expr
ty
946 (*****************************************************************************)
948 (*****************************************************************************)
950 (* Processing includes that were added after a cpp_ast_c makes the
951 * type annotater quite slow, especially when the depth of cpp_ast_c is
952 * big. But for such includes the only thing we really want is to modify
953 * the environment to have enough type information. We don't need
954 * to type the expressions inside those includes (they will be typed
955 * when we process the include file directly). Here the goal is
958 * Note that as usually header files contain mostly structure
959 * definitions and defines, that means we still have to do lots of work.
960 * We only win on function definition bodies, but usually header files
961 * have just prototypes, or inline function definitions which anyway have
962 * usually a small body. But still, we win. It also makes clearer
963 * that when processing include as we just need the environment, the caller
964 * of this module can do further optimisations such as memorising the
965 * state of the environment after each header files.
968 * For sparse its makes the annotating speed goes from 9s to 4s
969 * For Linux the speedup is even better, from ??? to ???.
971 * Because There would be some copy paste with annotate_program, it is
972 * better to factorize code hence the just_add_in_env parameter below.
974 * todo? alternative optimisation for the include problem:
975 * - processing all headers files one time and construct big env
976 * - use hashtbl for env (but apparently not biggest problem)
979 let rec visit_toplevel ~just_add_in_env ~depth elem
=
980 let need_annotate_body = not just_add_in_env
in
982 let bigf = { Visitor_c.default_visitor_c
with
984 (* ------------------------------------------------------------ *)
985 Visitor_c.kcppdirective
= (fun (k
, bigf) directive
->
987 (* do error messages for type annotater only for the real body of the
988 * file, not inside include.
990 | Include
{i_content
= opt
} ->
991 opt
+> Common.do_option
(fun (filename
, program
) ->
992 Common.save_excursion
Flag_parsing_c.verbose_type
(fun () ->
993 Flag_parsing_c.verbose_type
:= false;
995 (* old: Visitor_c.vk_program bigf program;
996 * opti: set the just_add_in_env
998 program
+> List.iter
(fun elem
->
999 visit_toplevel ~just_add_in_env
:true ~depth
:(depth
+1) elem
1004 | Define
((s,ii
), (defkind
, defval
)) ->
1007 (* even if we are in a just_add_in_env phase, such as when
1008 * we process include, as opposed to the body of functions,
1009 * with macros we still to type the body of the macro as
1010 * the macro has no type and so we infer its type from its
1011 * body (and one day later maybe from its use).
1014 (* can try to optimize and recurse only when the define body
1018 | DefineExpr expr
->
1019 (* prevent macro-declared variables from leaking out *)
1020 do_in_new_scope (fun () ->
1021 if is_simple_expr expr
1022 (* even if not need_annotate_body, still recurse*)
1025 if need_annotate_body
1028 do_in_new_scope (fun () ->
1029 if need_annotate_body
1033 add_binding (Macro
(s, (defkind
, defval
) )) true;
1035 | PragmaAndCo
_ -> ()
1038 (* ------------------------------------------------------------ *)
1039 (* main typer code *)
1040 (* ------------------------------------------------------------ *)
1041 Visitor_c.kexpr
= annotater_expr_visitor_subpart;
1043 (* ------------------------------------------------------------ *)
1044 Visitor_c.kstatement
= (fun (k
, bigf) st
->
1045 match Ast_c.unwrap_st st
with
1046 | Compound statxs
-> do_in_new_scope (fun () -> k st
);
1049 (* ------------------------------------------------------------ *)
1050 Visitor_c.kdecl
= (fun (k
, bigf) d
->
1052 | (DeclList
(xs
, ii
)) ->
1053 xs
+> List.iter
(fun ({v_namei
= var
; v_type
= t
;
1054 v_storage
= sto
; v_local
= local
} as x
1057 (* to add possible definition in type found in Decl *)
1058 Visitor_c.vk_type
bigf t
;
1063 | Ast_c.NotLocalDecl
-> Ast_c.NotLocalVar
1064 | Ast_c.LocalDecl
-> Ast_c.LocalVar
(Ast_c.info_of_type t
)
1066 var
+> Common.do_option
(fun (name
, iniopt
) ->
1067 let s = Ast_c.str_of_name name
in
1070 | StoTypedef
, _inline
->
1071 add_binding (TypeDef
(s,Lib.al_type t
)) true;
1073 add_binding (VarOrFunc
(s, (Lib.al_type t
, local))) true;
1076 Some
(typedef_fix (Lib.al_type t
) !_scoped_env);
1078 if need_annotate_body then begin
1079 (* int x = sizeof(x) is legal so need process ini *)
1080 iniopt
+> Common.do_option
(fun (info
, ini
) ->
1081 Visitor_c.vk_ini
bigf ini
1087 if need_annotate_body
1093 (* ------------------------------------------------------------ *)
1094 Visitor_c.ktype
= (fun (k
, bigf) typ
->
1095 (* bugfix: have a 'Lib.al_type typ' before, but because we can
1096 * have enum with possible expression, we don't want to change
1097 * the ref of abstract-lined types, but the real one, so
1098 * don't al_type here
1100 let (_q
, tbis
) = typ
in
1101 match Ast_c.unwrap_typeC typ
with
1102 | StructUnion
(su
, Some
s, structType
) ->
1103 let structType'
= Lib.al_fields
structType in
1104 let ii = Ast_c.get_ii_typeC_take_care tbis
in
1105 let ii'
= Lib.al_ii
ii in
1106 add_binding (StructUnionNameDef
(s, ((su
, structType'
),ii'
))) true;
1108 if need_annotate_body
1109 then k typ
(* todo: restrict ? new scope so use do_in_scope ? *)
1111 | Enum
(sopt
, enums
) ->
1113 enums
+> List.iter
(fun ((name
, eopt
), iicomma
) ->
1115 let s = Ast_c.str_of_name name
in
1117 if need_annotate_body
1118 then eopt
+> Common.do_option
(fun (ieq
, e
) ->
1119 Visitor_c.vk_expr
bigf e
1121 add_binding (EnumConstant
(s, sopt
)) true;
1125 (* TODO: if have a TypeName, then maybe can fill the option
1129 if need_annotate_body
1134 (* ------------------------------------------------------------ *)
1135 Visitor_c.ktoplevel
= (fun (k
, bigf) elem
->
1136 _notyped_var := Hashtbl.create
100;
1140 f_type
= ((returnt
, (paramst
, b
)) as ftyp
);
1143 f_old_c_style
= oldstyle
;
1149 (* what is iifunc1? it should be a type. jll
1150 * pad: it's the '(' in the function definition. The
1151 * return type is part of f_type.
1153 | iifunc1
::iifunc2
::ibrace1
::ibrace2
::ifakestart
::isto
->
1155 | _ -> raise Impossible
1157 let funcs = Ast_c.str_of_name name
in
1159 (match oldstyle
with
1162 Lib.al_type
(Ast_c.mk_ty
(FunctionType ftyp
) [i1
;i2
]) in
1164 add_binding (VarOrFunc
(funcs, (typ'
,islocal i1
.Ast_c.pinfo
)))
1167 if need_annotate_body then
1168 do_in_new_scope (fun () ->
1169 paramst
+> List.iter
(fun ({p_namei
= nameopt
; p_type
= t
},_)->
1172 let s = Ast_c.str_of_name name
in
1173 let local = Ast_c.LocalVar
(Ast_c.info_of_type t
) in
1174 add_binding (VarOrFunc
(s,(Lib.al_type t
,local))) true
1176 pr2 "no type, certainly because Void type ?"
1182 (* generate regular function type *)
1184 pr2 "TODO generate type for function";
1186 if need_annotate_body then
1187 do_in_new_scope (fun () ->
1188 (* recurse. should naturally call the kdecl visitor and
1197 | Define
((s,ii), (DefineVar
, DefineType t
)) ->
1198 add_binding (TypeDef
(s,Lib.al_type t
)) true;
1209 | NotParsedCorrectly
_
1219 then Visitor_c.vk_toplevel
bigf elem
1221 Common.profile_code
"TAC.annotate_only_included" (fun () ->
1222 Visitor_c.vk_toplevel
bigf elem
1224 else Visitor_c.vk_toplevel
bigf elem
1226 (*****************************************************************************)
1228 (*****************************************************************************)
1229 (* catch all the decl to grow the environment *)
1232 let rec (annotate_program2
:
1233 environment
-> toplevel list
-> (toplevel
* environment
Common.pair
) list
) =
1236 (* globals (re)initialialisation *)
1238 _notyped_var := (Hashtbl.create
100);
1240 prog
+> List.map
(fun elem
->
1241 let beforeenv = !_scoped_env in
1242 visit_toplevel ~just_add_in_env
:false ~depth
:0 elem
;
1243 let afterenv = !_scoped_env in
1244 (elem
, (beforeenv, afterenv))
1250 (*****************************************************************************)
1252 (*****************************************************************************)
1254 (* julia: for coccinelle *)
1255 let annotate_test_expressions prog
=
1256 let rec propagate_test e
=
1257 let ((e_term
,info
),_) = e
in
1258 let (ty,_) = !info
in
1261 Binary
(e1
,Logical
(AndLog
),e2
)
1262 | Binary
(e1
,Logical
(OrLog
),e2
) -> propagate_test e1
; propagate_test e2
1263 | Unary
(e1
,Not
) -> propagate_test e1
1264 | ParenExpr
(e
) -> propagate_test e
1265 | FunCall
(e
,args
) -> (* not very nice, but so painful otherwise *)
1266 (match (unwrap e
,args
) with
1267 ((Ident
(i
),_),[(Left a
,_)]) ->
1268 let nm = str_of_name i
in
1269 if List.mem
nm ["likely";"unlikely"]
1270 then propagate_test a
1275 let bigf = { Visitor_c.default_visitor_c
with
1276 Visitor_c.kexpr
= (fun (k
,bigf) expr
->
1277 (match unwrap_expr expr
with
1278 CondExpr
(e
,_,_) -> propagate_test e
1279 | Binary
(e1
,Logical
(AndLog
),e2
)
1280 | Binary
(e1
,Logical
(OrLog
),e2
) -> propagate_test e1
; propagate_test e2
1281 | Unary
(e1
,Not
) -> propagate_test e1
1286 Visitor_c.kstatement
= (fun (k
, bigf) st
->
1287 match unwrap_st st
with
1289 (match s with If
(e1
,s1
,s2
) -> propagate_test e1
| _ -> ());
1293 While
(e
,s) -> propagate_test e
1294 | DoWhile
(s,e
) -> propagate_test e
1296 (match unwrap es
with Some e
-> propagate_test e
| None
-> ())
1302 (prog
+> List.iter
(fun elem
->
1303 Visitor_c.vk_toplevel
bigf elem
1308 (*****************************************************************************)
1309 (* Annotate types *)
1310 (*****************************************************************************)
1311 let annotate_program env prog
=
1312 Common.profile_code
"TAC.annotate_program"
1314 let res = annotate_program2 env prog
in
1315 annotate_test_expressions prog
;
1319 let annotate_type_and_localvar env prog
=
1320 Common.profile_code
"TAC.annotate_type"
1321 (fun () -> annotate_program2 env prog
)
1324 (*****************************************************************************)
1325 (* changing default typing environment, do concatenation *)
1326 let init_env filename
=
1327 pr2 ("init_env: " ^ filename
);
1328 let (ast2
, _stat
) = Parse_c.parse_c_and_cpp filename
in
1329 let ast = Parse_c.program_of_program2 ast2
in
1331 let res = annotate_type_and_localvar !initial_env ast in
1332 match List.rev
res with
1333 | [] -> pr2 "empty environment"
1334 | (_top
,(env1
,env2
))::xs
->
1335 initial_env := !initial_env ++ env2
;