3 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes,
4 * Copyright (C) 2009 University of Urbana Champaign
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
20 module Lib
= Lib_parsing_c
22 (*****************************************************************************)
24 (*****************************************************************************)
26 * - Done a first type checker in 2002, cf typing-semantic/, but
27 * was assuming that have all type info, and so was assuming had called
28 * cpp and everything was right.
29 * - Wrote this file, in 2006?, as we added pattern matching on type
30 * in coccinelle. Partial type annotater.
31 * - Julia extended it in 2008? to have localvar/notlocalvar and
32 * test/notest information, again used by coccinelle.
33 * - I extended it in Fall 2008 to have more type information for the
34 * global analysis. I also added some optimisations to process
35 * included code faster.
38 * Design choices. Can either do:
40 * - can first do a simple inferer, that just pass context
41 * - then a real inferer, managing partial info.
42 * type context = fullType option
44 * - extract the information from the .h files
45 * (so no inference at all needed)
47 * Difference with julia's code in parsing_cocci/type_infer.ml:
48 * - She handles just the variable namespace. She does not type
49 * field access or enum or macros. This is because cocci programs are
50 * usually simple and have no structure definition or macro definitions
51 * that we need to type anyway.
52 * - She does more propagation.
53 * - She does not have to handle the typedef isomorphism which force me
54 * to use those typedef_fix and type_unfold_one_step
55 * - She does not handle I think the function pointer C isomorphism.
57 * - She has a cleaner type_cocci without any info. In my case
58 * I need to do those ugly al_type, or generate fake infos.
59 * - She has more compact code. Perhaps because she does not have to
60 * handle the extra exp_info that she added on me :) So I need those
61 * do_with_type, make_info_xxx, etc.
63 * Note: if need to debug this annotater, use -show_trace_profile, it can
64 * help. You can also set the typedef_debug flag below.
68 * todo: expression contain types, and statements, which in turn can contain
69 * expression, so need recurse. Need define an annote_statement and
72 * todo: how deal with typedef isomorphisms ? How store them in Ast_c ?
73 * store all posible variations in ast_c ? a list of type instead of just
76 * todo: how to handle multiple possible definitions for entities like
77 * struct or typedefs ? Because of ifdef, we should store list of
78 * possibilities sometimes.
80 * todo: define a new type ? like type_cocci ? where have a bool ?
82 * semi: How handle scope ? When search for type of field, we return
83 * a type, but this type makes sense only in a certain scope.
84 * We could add a tag to each typedef, structUnionName to differentiate
85 * them and also associate in ast_c to the type the scope
86 * of this type, the env that were used to define this type.
88 * todo: handle better the search in previous env, the env'. Cf the
89 * termination problem in typedef_fix when I was searching in the same
94 (*****************************************************************************)
96 (*****************************************************************************)
98 if !Flag_parsing_c.verbose_type
102 if !Flag_parsing_c.verbose_type
103 then Common.pr2_once s
105 (*****************************************************************************)
107 (*****************************************************************************)
109 (* The different namespaces from stdC manual:
111 * You introduce two new name spaces with every block that you write.
113 * One name space includes all
116 * - type definitions,
117 * - and enumeration constants
118 * that you declare or define within the block.
120 * The other name space includes all
124 * *tags* that you define within the block.
126 * You introduce a new member name space with every structure or union
127 * whose content you define. You identify a member name space by the
128 * type of left operand that you write for a member selection
129 * operator, as in x.y or p->y. A member name space ends with the end
130 * of the block in which you declare it.
132 * You introduce a new goto label name space with every function
133 * definition you write. Each goto label name space ends with its
134 * function definition.
137 (* But I don't try to do a type-checker, I try to "resolve" type of var
138 * so don't need make difference between namespaces here.
140 * But, why not make simply a (string, kindstring) assoc ?
141 * Because we dont want that a variable shadow a struct definition, because
142 * they are still in 2 different namespace. But could for typedef,
143 * because VarOrFunc and Typedef are in the same namespace.
144 * But could do a record as in c_info.ml
148 (* This type contains all "ident" like notion of C. Each time in Ast_c
149 * you have a string type (as in expression, function name, fields)
150 * then you need to manage the scope of this ident.
152 * The wrap for StructUnionNameDef contain the whole ii, the i for
153 * the string, the structUnion and the structType.
155 * Put Macro here ? after all the scoping rules for cpp macros is different
156 * and so does not vanish after the closing '}'.
161 | VarOrFunc
of string * Ast_c.exp_type
162 | EnumConstant
of string * string option
164 | TypeDef
of string * fullType
165 (* the structType contains nested "idents" with struct scope *)
166 | StructUnionNameDef
of string * (structUnion
* structType
) wrap
169 | Macro
of string * define_body
172 (* Because have nested scope, have nested list, hence the list list.
174 * opti? use a hash to accelerate ? hmm but may have some problems
175 * with hash to handle recursive lookup. For instance for the typedef
176 * example where have mutually recursive definition of the type,
177 * we must take care to not loop by starting the second search
178 * from the previous environment. With the list scheme in
179 * lookup_env below it's quite easy to do. With hash it may be
182 type environment
= namedef list list
185 (* ------------------------------------------------------------ *)
186 (* can be modified by the init_env function below, by
187 * the file environment_unix.h
189 let initial_env = ref [
191 (Lib.al_type
(Parse_c.type_of_string
"void *"),
196 (Lib.al_type(Parse_c.type_of_string "void* (*)(int size)"),
199 (Lib.al_type(Parse_c.type_of_string "void (*)(void *ptr)"),
206 let typedef_debug = ref false
209 (* ------------------------------------------------------------ *)
210 (* generic, lookup and also return remaining env for further lookup *)
211 let rec lookup_env2 f env
=
213 | [] -> raise Not_found
214 | []::zs
-> lookup_env2 f zs
217 | None
-> lookup_env2 f
(xs
::zs
)
218 | Some y
-> y
, xs
::zs
221 Common.profile_code
"TAC.lookup_env" (fun () -> lookup_env2 a b
)
225 let member_env lookupf env
=
227 let _ = lookupf env
in
229 with Not_found
-> false
234 (* ------------------------------------------------------------ *)
237 let lookup_var s env
=
239 | VarOrFunc
(s2
, typ
) -> if s2
= s
then Some typ
else None
244 let lookup_typedef s env
=
245 if !typedef_debug then pr2 ("looking for: " ^ s
);
247 | TypeDef
(s2
, typ
) -> if s2
= s
then Some typ
else None
252 let lookup_structunion (_su
, s
) env
=
254 | StructUnionNameDef
(s2
, typ
) -> if s2
= s
then Some typ
else None
259 let lookup_macro s env
=
261 | Macro
(s2
, typ
) -> if s2
= s
then Some typ
else None
266 let lookup_enum s env
=
268 | EnumConstant
(s2
, typ
) -> if s2
= s
then Some typ
else None
274 let lookup_typedef a b
=
275 Common.profile_code
"TAC.lookup_typedef" (fun () -> lookup_typedef a b
)
279 (*****************************************************************************)
281 (*****************************************************************************)
283 (* find_final_type is used to know to what type a field correspond in
284 * x.foo. Sometimes the type of x is a typedef or a structName in which
285 * case we must look in environment to find the complete type, here
286 * structUnion that contains the information.
288 * Because in C one can redefine in nested blocks some typedefs,
289 * struct, or variables, we have a static scoping resolving process.
290 * So, when we look for the type of a var, if this var is in an
291 * enclosing block, then maybe its type refer to a typdef of this
292 * enclosing block, so must restart the "type-resolving" of this
293 * typedef from this enclosing block, not from the bottom. So our
294 * "resolving-type functions" take an env and also return an env from
295 * where the next search must be performed. *)
298 let rec find_final_type ty env =
300 match Ast_c.unwrap_typeC ty with
301 | BaseType x -> (BaseType x) +> Ast_c.rewrap_typeC ty
303 | Pointer t -> (Pointer (find_final_type t env)) +> Ast_c.rewrap_typeC ty
304 | Array (e, t) -> Array (e, find_final_type t env) +> Ast_c.rewrap_typeC ty
306 | StructUnion (sopt, su) -> StructUnion (sopt, su) +> Ast_c.rewrap_typeC ty
308 | FunctionType t -> (FunctionType t) (* todo ? *) +> Ast_c.rewrap_typeC ty
309 | Enum
(s
, enumt
) -> (Enum
(s
, enumt
)) (* todo? *) +> Ast_c.rewrap_typeC ty
310 | EnumName s
-> (EnumName s
) (* todo? *) +> Ast_c.rewrap_typeC ty
312 | StructUnionName
(su
, s
) ->
314 let ((structtyp
,ii
), env'
) = lookup_structunion (su
, s
) env
in
315 Ast_c.nQ
, (StructUnion
(Some s
, structtyp
), ii
)
316 (* old: +> Ast_c.rewrap_typeC ty
317 * but must wrap with good ii, otherwise pretty_print_c
318 * will be lost and raise some Impossible
326 let (t'
, env'
) = lookup_typedef s env
in
327 find_final_type t' env'
332 | ParenType t
-> find_final_type t env
333 | Typeof e
-> failwith
"typeof"
339 (* ------------------------------------------------------------ *)
340 let rec type_unfold_one_step ty env
=
342 match Ast_c.unwrap_typeC ty
with
347 | StructUnion
(sopt
, su
, fields
) -> ty
349 | FunctionType t
-> ty
350 | Enum
(s
, enumt
) -> ty
352 | EnumName s
-> ty
(* todo: look in env when will have EnumDef *)
354 | StructUnionName
(su
, s
) ->
356 let (((su
,fields
),ii
), env'
) = lookup_structunion (su
, s
) env
in
357 Ast_c.nQ
, (StructUnion
(su
, Some s
, fields
), ii
)
358 (* old: +> Ast_c.rewrap_typeC ty
359 * but must wrap with good ii, otherwise pretty_print_c
360 * will be lost and raise some Impossible
366 | TypeName
(s
,_typ
) ->
368 if !typedef_debug then pr2 "type_unfold_one_step: lookup_typedef";
369 let (t'
, env'
) = lookup_typedef s env
in
370 type_unfold_one_step t' env'
375 | ParenType t
-> type_unfold_one_step t env
377 pr2_once ("Type_annoter: not handling typeof");
379 | TypeOfType t
-> type_unfold_one_step t env
389 (* normalizer. can be seen as the opposite of the previous function as
390 * we "fold" at least for the structUnion. Should return something that
391 * Type_c.is_completed_fullType likes, something that makes it easier
392 * for the programmer to work on, that has all the needed information
395 let rec typedef_fix ty env
=
396 match Ast_c.unwrap_typeC ty
with
400 Pointer
(typedef_fix t env
) +> Ast_c.rewrap_typeC ty
402 Array
(e
, typedef_fix t env
) +> Ast_c.rewrap_typeC ty
403 | StructUnion
(su
, sopt
, fields
) ->
405 * todo? but what if correspond to a nested struct def ?
407 Type_c.structdef_to_struct_name ty
409 (FunctionType ft
) (* todo ? *) +> Ast_c.rewrap_typeC ty
411 (Enum
(s
, enumt
)) (* todo? *) +> Ast_c.rewrap_typeC ty
413 (EnumName s
) (* todo? *) +> Ast_c.rewrap_typeC ty
415 (* we prefer StructUnionName to StructUnion when it comes to typed metavar *)
416 | StructUnionName
(su
, s
) -> ty
418 (* keep the typename but complete with more information *)
419 | TypeName
(s
, typ
) ->
422 pr2 ("typedef value already there:" ^ s
);
426 if !typedef_debug then pr2 "typedef_fix: lookup_typedef";
427 let (t'
, env'
) = lookup_typedef s env
in
429 (* bugfix: termination bug if use env instead of env' below, because
430 * can have some weird mutually recursive typedef which
431 * each new type alias search for its mutual def.
433 TypeName
(s
, Some
(typedef_fix t' env'
)) +> Ast_c.rewrap_typeC ty
438 (* remove paren for better matching with typed metavar. kind of iso again *)
442 pr2_once ("Type_annoter: not handling typeof");
449 (*****************************************************************************)
450 (* Helpers, part 1 *)
451 (*****************************************************************************)
454 (Lib.al_type
(Parse_c.type_of_string s
))
456 Common.profile_code
"Type_c.type_of_s" (fun () -> type_of_s2 a
)
460 * /home/pad/software-os-src2/freebsd/contrib/ipfilter/netinet/ip_fil_freebsd.c
461 * because in the code there is:
462 * static iss_seq_off = 0;
463 * which in the parser was generating a default int without a parse_info.
464 * I now add a fake parse_info for such default int so no more failwith
467 let offset (_,(ty
,iis
)) =
469 ii
::_ -> ii
.Ast_c.pinfo
470 | _ -> failwith
"type has no text; need to think again"
474 let rec is_simple_expr expr
=
475 match Ast_c.unwrap_expr expr
with
476 (* todo? handle more special cases ? *)
484 | Binary
(e1
, op
, e2
) ->
488 | ParenExpr
(e
) -> is_simple_expr e
492 (*****************************************************************************)
494 (*****************************************************************************)
495 (* now in type_c.ml *)
499 (*****************************************************************************)
500 (* (Semi) Globals, Julia's style *)
501 (*****************************************************************************)
503 (* opti: cache ? use hash ? *)
504 let _scoped_env = ref !initial_env
506 (* memoise unnanoted var, to avoid too much warning messages *)
507 let _notyped_var = ref (Hashtbl.create
100)
509 let new_scope() = _scoped_env := []::!_scoped_env
510 let del_scope() = _scoped_env := List.tl
!_scoped_env
512 let do_in_new_scope f =
520 let add_in_scope namedef
=
521 let (current
, older
) = Common.uncons
!_scoped_env in
522 _scoped_env := (namedef
::current
)::older
525 (* ------------------------------------------------------------ *)
527 (* sort of hackish... *)
529 if List.length
(!_scoped_env) = List.length
!initial_env
530 then Ast_c.NotLocalVar
531 else Ast_c.LocalVar info
533 (* ------------------------------------------------------------ *)
534 (* the warning argument is here to allow some binding to overwrite an
535 * existing one. With function, we first have the prototype and then the def,
536 * and the def binding with the same string is not an error.
538 * todo?: but if we define two times the same function, then we will not
539 * detect it :( it would require to make a diff between adding a binding
540 * from a prototype and from a definition.
542 * opti: disabling the check_annotater flag have some important
543 * performance benefit.
546 let add_binding2 namedef warning
=
547 let (current_scope
, _older_scope
) = Common.uncons
!_scoped_env in
549 if !Flag_parsing_c.check_annotater
then begin
551 | VarOrFunc
(s
, typ
) ->
552 if Hashtbl.mem
!_notyped_var s
553 then pr2 ("warning: found typing information for a variable that was" ^
554 "previously unknown:" ^ s
);
560 | VarOrFunc
(s
, typ
) ->
561 member_env (lookup_var s
), s
562 | TypeDef
(s
, typ
) ->
563 member_env (lookup_typedef s
), s
564 | StructUnionNameDef
(s
, (su
, typ
)) ->
565 member_env (lookup_structunion (su
, s
)), s
567 member_env (lookup_macro s
), s
568 | EnumConstant
(s
, body
) ->
569 member_env (lookup_enum s
), s
572 if memberf
[current_scope
] && warning
573 then pr2 ("Type_annoter: warning, " ^ s ^
574 " is already in current binding" ^
"\n" ^
575 " so there is a weird shadowing");
579 let add_binding namedef warning
=
580 Common.profile_code
"TAC.add_binding" (fun () -> add_binding2 namedef warning
)
584 (*****************************************************************************)
585 (* Helpers, part 2 *)
586 (*****************************************************************************)
588 let lookup_opt_env lookupf s
=
589 Common.optionise
(fun () ->
590 lookupf s
!_scoped_env
593 let unwrap_unfold_env2 typ
=
595 (type_unfold_one_step typ
!_scoped_env)
596 let unwrap_unfold_env typ
=
597 Common.profile_code
"TAC.unwrap_unfold_env" (fun () -> unwrap_unfold_env2 typ
)
599 let typedef_fix a b
=
600 Common.profile_code
"TAC.typedef_fix" (fun () -> typedef_fix a b
)
602 let make_info_def_fix x
=
603 Type_c.make_info_def
(typedef_fix x
!_scoped_env)
605 let make_info_fix (typ
, local
) =
606 Type_c.make_info
((typedef_fix typ
!_scoped_env),local
)
609 let make_info_def = Type_c.make_info_def
611 (*****************************************************************************)
612 (* Main typer code, put later in a visitor *)
613 (*****************************************************************************)
615 let annotater_expr_visitor_subpart = (fun (k
,bigf
) expr
->
618 match Ast_c.unwrap_expr expr
with
620 (* -------------------------------------------------- *)
621 (* todo: should analyse the 's' for int to know if unsigned or not *)
622 | Constant
(String
(s
,kind
)) -> make_info_def (type_of_s "char *")
623 | Constant MultiString
_ -> make_info_def (type_of_s "char *")
624 | Constant
(Char
(s
,kind
)) -> make_info_def (type_of_s "char")
625 | Constant
(Int
(s
)) -> make_info_def (type_of_s "int")
626 | Constant
(Float
(s
,kind
)) ->
627 let fake = Ast_c.fakeInfo
(Common.fake_parse_info
) in
628 let fake = Ast_c.rewrap_str
"float" fake in
629 let iinull = [fake] in
631 (Ast_c.nQ
, (BaseType
(FloatType kind
), iinull))
634 (* -------------------------------------------------- *)
635 (* note: could factorize this code with the code for Ident
636 * and the other code for Funcall below. But as the Ident can be
637 * a macro-func, I prefer to handle it separately. So
638 * this rule can handle the macro-func, the Ident-rule can handle
639 * the macro-var, and the other FunCall-rule the regular
640 * function calls through fields.
641 * Also as I don't want a warning on the Ident that are a FunCall,
642 * easier to have a rule separate from the Ident rule.
644 | FunCall
(((Ident s
, typ
), ii
) as e1
, args
) ->
647 args
+> List.iter
(fun (e
,ii
) ->
648 (* could typecheck if arguments agree with prototype *)
649 Visitor_c.vk_argument bigf e
652 (match lookup_opt_env lookup_var s
with
653 | Some
((typ
,local
),_nextenv
) ->
655 (* set type for ident *)
656 let tyinfo = make_info_fix (typ
, local
) in
657 Ast_c.set_type_expr e1
tyinfo;
659 (match unwrap_unfold_env typ
with
660 | FunctionType
(ret
, params
) -> make_info_def ret
662 (* can be function pointer, C have an iso for that,
663 * same pfn() syntax than regular function call.
666 (match unwrap_unfold_env typ2
with
667 | FunctionType
(ret
, params
) -> make_info_def ret
668 | _ -> Type_c.noTypeHere
670 | _ -> Type_c.noTypeHere
674 (match lookup_opt_env lookup_macro s
with
675 | Some
((defkind
, defval
), _nextenv
) ->
676 (match defkind
, defval
with
677 | DefineFunc
_, DefineExpr e
->
678 let rettype = Ast_c.get_onlytype_expr e
in
680 (* todo: could also set type for ident ?
681 have return type and at least type of concrete
682 parameters so can generate a fake FunctionType
685 Type_c.fake_function_type
rettype args
688 macrotype_opt +> Common.do_option
(fun t
->
689 pr2 ("Type_annotater: generate fake function type" ^
691 let tyinfo = make_info_def_fix t
in
692 Ast_c.set_type_expr e1
tyinfo;
695 Ast_c.get_type_expr e
697 pr2 ("Type_annoter: not a macro-func: " ^ s
);
700 (* normally the FunCall case should have catch it *)
701 pr2 ("Type_annoter: not a macro-func-expr: " ^ s
);
705 pr2_once ("type_annotater: no type for function ident: " ^ s
);
711 | FunCall
(e
, args
) ->
714 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun typ
->
715 (* copy paste of above *)
716 (match unwrap_unfold_env typ
with
717 | FunctionType
(ret
, params
) -> make_info_def ret
719 (match unwrap_unfold_env typ
with
720 | FunctionType
(ret
, params
) -> make_info_def ret
721 | _ -> Type_c.noTypeHere
723 | _ -> Type_c.noTypeHere
728 (* -------------------------------------------------- *)
730 (match lookup_opt_env lookup_var s
with
731 | Some
((typ
,local
),_nextenv
) ->
732 make_info_fix (typ
,local
)
734 (match lookup_opt_env lookup_macro s
with
735 | Some
((defkind
, defval
), _nextenv
) ->
736 (match defkind
, defval
with
737 | DefineVar
, DefineExpr e
->
738 Ast_c.get_type_expr e
740 pr2 ("Type_annoter: not a expression: " ^ s
);
743 (* normally the FunCall case should have catch it *)
744 pr2 ("Type_annoter: not a macro-var: " ^ s
);
748 (match lookup_opt_env lookup_enum s
with
749 | Some
(_, _nextenv
) ->
750 make_info_def (type_of_s "int")
752 if not
(s
=~
"[A-Z_]+") (* if macro then no warning *)
754 if !Flag_parsing_c.check_annotater
then
755 if not
(Hashtbl.mem
!_notyped_var s
)
757 pr2 ("Type_annoter: not finding type for: " ^ s
);
758 Hashtbl.add
!_notyped_var s
true;
762 pr2 ("Type_annoter: not finding type for: " ^ s
)
769 (* -------------------------------------------------- *)
770 (* C isomorphism on type on array and pointers *)
772 | ArrayAccess
(e
, _) ->
773 k expr
; (* recurse to set the types-ref of sub expressions *)
775 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun t
->
776 (* todo: maybe not good env !! *)
777 match unwrap_unfold_env t
with
781 | _ -> Type_c.noTypeHere
785 | Unary
(e
, GetRef
) ->
786 k expr
; (* recurse to set the types-ref of sub expressions *)
788 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun t
->
789 (* must generate an element so that '=' can be used
792 let fake = Ast_c.fakeInfo
Common.fake_parse_info
in
793 let fake = Ast_c.rewrap_str
"*" fake in
795 let ft = (Ast_c.nQ
, (Pointer t
, [fake])) in
800 (* -------------------------------------------------- *)
802 | RecordAccess
(e
, fld
)
803 | RecordPtAccess
(e
, fld
) as x
->
805 k expr
; (* recurse to set the types-ref of sub expressions *)
807 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun t
->
811 | RecordAccess
_ -> Some t
812 | RecordPtAccess
_ ->
813 (match unwrap_unfold_env t
with
814 | Pointer
(t
) -> Some t
817 | _ -> raise Impossible
821 | None
-> Type_c.noTypeHere
823 match unwrap_unfold_env t
with
824 | StructUnion
(su
, sopt
, fields
) ->
826 (* todo: which env ? *)
828 (Type_c.type_field fld
(su
, fields
))
832 "TYPE-ERROR: field '%s' does not belong in struct %s"
833 fld
(match sopt
with Some s
-> s
|_ -> "<anon>"));
836 pr2 "TAC:MultiFound";
839 | _ -> Type_c.noTypeHere
845 (* -------------------------------------------------- *)
848 (* todo: if infer, can "push" info ? add_types_expr [t] e ? *)
849 make_info_def_fix (Lib.al_type t
)
851 (* todo? lub, hmm maybe not, cos type must be e1 *)
852 | Assignment
(e1
, op
, e2
) ->
854 Ast_c.get_type_expr e1
855 | Sequence
(e1
, e2
) ->
857 Ast_c.get_type_expr e2
860 | Binary
(e1
, op
, e2
) ->
862 Type_c.lub
(Type_c.get_opt_type e1
) (Type_c.get_opt_type e2
)
864 | CondExpr
(cond
, e1opt
, e2
) ->
866 Ast_c.get_type_expr e2
871 Ast_c.get_type_expr e
873 | Infix
(e
, op
) | Postfix
(e
, op
) ->
875 Ast_c.get_type_expr e
877 (* pad: julia wrote this ? *)
878 | Unary
(e
, UnPlus
) ->
879 k expr
; (* recurse to set the types-ref of sub expressions *)
880 make_info_def (type_of_s "int")
881 (* todo? can convert from unsigned to signed if UnMinus ? *)
882 | Unary
(e
, UnMinus
) ->
883 k expr
; (* recurse to set the types-ref of sub expressions *)
884 make_info_def (type_of_s "int")
886 | SizeOfType
_|SizeOfExpr
_ ->
887 k expr
; (* recurse to set the types-ref of sub expressions *)
888 make_info_def (type_of_s "int")
890 | Constructor
(ft, ini
) ->
891 k expr
; (* recurse to set the types-ref of sub expressions *)
892 make_info_def (Lib.al_type
ft)
895 k expr
; (* recurse to set the types-ref of sub expressions *)
896 Ast_c.get_type_expr e
897 | Unary
(e
, Tilde
) ->
898 k expr
; (* recurse to set the types-ref of sub expressions *)
899 Ast_c.get_type_expr e
901 (* -------------------------------------------------- *)
903 | Unary
(_, GetRefLabel
) ->
904 k expr
; (* recurse to set the types-ref of sub expressions *)
905 pr2_once "Type annotater:not handling GetRefLabel";
909 k expr
; (* recurse to set the types-ref of sub expressions *)
910 pr2_once "Type annotater:not handling GetRefLabel";
913 | _ -> k expr; Type_c.noTypeHere
917 Ast_c.set_type_expr expr
ty
922 (*****************************************************************************)
924 (*****************************************************************************)
926 (* Processing includes that were added after a cpp_ast_c makes the
927 * type annotater quite slow, especially when the depth of cpp_ast_c is
928 * big. But for such includes the only thing we really want is to modify
929 * the environment to have enough type information. We don't need
930 * to type the expressions inside those includes (they will be typed
931 * when we process the include file directly). Here the goal is
934 * Note that as usually header files contain mostly structure
935 * definitions and defines, that means we still have to do lots of work.
936 * We only win on function definition bodies, but usually header files
937 * have just prototypes, or inline function definitions which anyway have
938 * usually a small body. But still, we win. It also makes clearer
939 * that when processing include as we just need the environment, the caller
940 * of this module can do further optimisations such as memorising the
941 * state of the environment after each header files.
944 * For sparse its makes the annotating speed goes from 9s to 4s
945 * For Linux the speedup is even better, from ??? to ???.
947 * Because There would be some copy paste with annotate_program, it is
948 * better to factorize code hence the just_add_in_env parameter below.
950 * todo? alternative optimisation for the include problem:
951 * - processing all headers files one time and construct big env
952 * - use hashtbl for env (but apparently not biggest problem)
955 let rec visit_toplevel ~just_add_in_env ~depth elem
=
956 let need_annotate_body = not just_add_in_env
in
958 let bigf = { Visitor_c.default_visitor_c
with
960 (* ------------------------------------------------------------ *)
961 Visitor_c.kcppdirective
= (fun (k
, bigf) directive
->
963 (* do error messages for type annotater only for the real body of the
964 * file, not inside include.
966 | Include
{i_content
= opt
} ->
967 opt
+> Common.do_option
(fun (filename
, program
) ->
968 Common.save_excursion
Flag_parsing_c.verbose_type
(fun () ->
969 Flag_parsing_c.verbose_type
:= false;
971 (* old: Visitor_c.vk_program bigf program;
972 * opti: set the just_add_in_env
974 program
+> List.iter
(fun elem
->
975 visit_toplevel ~just_add_in_env
:true ~depth
:(depth
+1) elem
980 | Define
((s
,ii
), (defkind
, defval
)) ->
983 (* even if we are in a just_add_in_env phase, such as when
984 * we process include, as opposed to the body of functions,
985 * with macros we still to type the body of the macro as
986 * the macro has no type and so we infer its type from its
987 * body (and one day later maybe from its use).
990 (* can try to optimize and recurse only when the define body
995 if is_simple_expr expr
996 (* even if not need_annotate_body, still recurse*)
999 if need_annotate_body
1002 if need_annotate_body
1006 add_binding (Macro
(s
, (defkind
, defval
) )) true;
1009 | PragmaAndCo
_ -> ()
1012 (* ------------------------------------------------------------ *)
1013 (* main typer code *)
1014 (* ------------------------------------------------------------ *)
1015 Visitor_c.kexpr
= annotater_expr_visitor_subpart;
1017 (* ------------------------------------------------------------ *)
1018 Visitor_c.kstatement
= (fun (k
, bigf) st
->
1020 | Compound statxs
, ii
-> do_in_new_scope (fun () -> k st
);
1023 (* ------------------------------------------------------------ *)
1024 Visitor_c.kdecl
= (fun (k
, bigf) d
->
1026 | (DeclList
(xs
, ii
)) ->
1027 xs
+> List.iter
(fun ({v_namei
= var
; v_type
= t
;
1028 v_storage
= sto
; v_local
= local
}, iicomma
) ->
1030 (* to add possible definition in type found in Decl *)
1031 Visitor_c.vk_type
bigf t
;
1036 | Ast_c.NotLocalDecl
-> Ast_c.NotLocalVar
1037 | Ast_c.LocalDecl
-> Ast_c.LocalVar
(offset t
)
1040 var
+> Common.do_option
(fun ((s
, ini
), ii_s_ini
) ->
1042 | StoTypedef
, _inline
->
1043 add_binding (TypeDef
(s
,Lib.al_type t
)) true;
1045 add_binding (VarOrFunc
(s
, (Lib.al_type t
, local))) true;
1048 if need_annotate_body then begin
1049 (* int x = sizeof(x) is legal so need process ini *)
1050 ini
+> Common.do_option
(fun ini
->
1051 Visitor_c.vk_ini
bigf ini
1057 if need_annotate_body
1063 (* ------------------------------------------------------------ *)
1064 Visitor_c.ktype
= (fun (k
, bigf) typ
->
1065 (* bugfix: have a 'Lib.al_type typ' before, but because we can
1066 * have enum with possible expression, we don't want to change
1067 * the ref of abstract-lined types, but the real one, so
1068 * don't al_type here
1070 let (_q
, t
) = typ
in
1072 | StructUnion
(su
, Some s
, structType
),ii
->
1073 let structType'
= Lib.al_fields
structType in
1074 let ii'
= Lib.al_ii
ii in
1075 add_binding (StructUnionNameDef
(s
, ((su
, structType'
),ii'
))) true;
1077 if need_annotate_body
1078 then k typ
(* todo: restrict ? new scope so use do_in_scope ? *)
1080 | Enum
(sopt
, enums
), ii ->
1082 enums
+> List.iter
(fun (((s
, eopt
),ii_s_eq
), iicomma
) ->
1084 if need_annotate_body
1085 then eopt
+> Common.do_option
(fun e
->
1086 Visitor_c.vk_expr
bigf e
1088 add_binding (EnumConstant
(s
, sopt
)) true;
1092 (* TODO: if have a TypeName, then maybe can fill the option
1096 if need_annotate_body
1101 (* ------------------------------------------------------------ *)
1102 Visitor_c.ktoplevel
= (fun (k
, bigf) elem
->
1103 _notyped_var := Hashtbl.create
100;
1106 let {f_name
= funcs
;
1107 f_type
= ((returnt
, (paramst
, b
)) as ftyp
);
1110 f_old_c_style
= oldstyle
;
1116 | is
::iifunc1
::iifunc2
::ibrace1
::ibrace2
::ifakestart
::isto
->
1118 | _ -> raise Impossible
1121 (match oldstyle
with
1124 Lib.al_type
(Ast_c.nQ
, (FunctionType ftyp
, [i1
;i2
])) in
1126 add_binding (VarOrFunc
(funcs
, (typ'
,islocal i1
.Ast_c.pinfo
)))
1129 if need_annotate_body then
1130 do_in_new_scope (fun () ->
1131 paramst
+> List.iter
(fun (((b
, s
, t
), _),_) ->
1134 let local = Ast_c.LocalVar
(offset t
) in
1135 add_binding (VarOrFunc
(s
,(Lib.al_type t
,local))) true
1137 pr2 "no type, certainly because Void type ?"
1143 (* generate regular function type *)
1145 pr2 "TODO generate type for function";
1147 if need_annotate_body then
1148 do_in_new_scope (fun () ->
1149 (* recurse. should naturally call the kdecl visitor and
1162 | NotParsedCorrectly
_
1172 then Visitor_c.vk_toplevel
bigf elem
1174 Common.profile_code
"TAC.annotate_only_included" (fun () ->
1175 Visitor_c.vk_toplevel
bigf elem
1177 else Visitor_c.vk_toplevel
bigf elem
1179 (*****************************************************************************)
1181 (*****************************************************************************)
1182 (* catch all the decl to grow the environment *)
1185 let rec (annotate_program2
:
1186 environment
-> toplevel list
-> (toplevel
* environment
Common.pair
) list
) =
1189 (* globals (re)initialialisation *)
1191 _notyped_var := (Hashtbl.create
100);
1193 prog
+> List.map
(fun elem
->
1194 let beforeenv = !_scoped_env in
1195 visit_toplevel ~just_add_in_env
:false ~depth
:0 elem
;
1196 let afterenv = !_scoped_env in
1197 (elem
, (beforeenv, afterenv))
1203 (*****************************************************************************)
1205 (*****************************************************************************)
1207 (* julia: for coccinelle *)
1208 let annotate_test_expressions prog
=
1209 let rec propagate_test e
=
1210 let ((e_term
,info
),_) = e
in
1211 let (ty,_) = !info
in
1214 Binary
(e1
,Logical
(AndLog
),e2
)
1215 | Binary
(e1
,Logical
(OrLog
),e2
) -> propagate_test e1
; propagate_test e2
1216 | Unary
(e1
,Not
) -> propagate_test e1
1217 | ParenExpr
(e
) -> propagate_test e
1220 let bigf = { Visitor_c.default_visitor_c
with
1221 Visitor_c.kexpr
= (fun (k
,bigf) expr
->
1222 (match unwrap expr
with
1223 (CondExpr
(e
,_,_),_) -> propagate_test e
1228 Visitor_c.kstatement
= (fun (k
, bigf) st
->
1229 match unwrap st
with
1231 (match s
with If
(e1
,s1
,s2
) -> propagate_test e1
| _ -> ());
1235 While
(e
,s
) -> propagate_test e
1236 | DoWhile
(s
,e
) -> propagate_test e
1238 (match unwrap es
with Some e
-> propagate_test e
| None
-> ())
1244 (prog
+> List.iter
(fun elem
->
1245 Visitor_c.vk_toplevel
bigf elem
1250 (*****************************************************************************)
1251 (* Annotate types *)
1252 (*****************************************************************************)
1253 let annotate_program env prog
=
1254 Common.profile_code
"TAC.annotate_program"
1256 let res = annotate_program2 env prog
in
1257 annotate_test_expressions prog
;
1261 let annotate_type_and_localvar env prog
=
1262 Common.profile_code
"TAC.annotate_type"
1263 (fun () -> annotate_program2 env prog
)
1266 (*****************************************************************************)
1267 (* changing default typing environment, do concatenation *)
1268 let init_env filename
=
1269 pr2 ("init_env: " ^ filename
);
1270 let (ast2
, _stat
) = Parse_c.parse_c_and_cpp filename
in
1271 let ast = Parse_c.program_of_program2 ast2
in
1273 let res = annotate_type_and_localvar !initial_env ast in
1274 match List.rev
res with
1275 | [] -> pr2 "empty environment"
1276 | (_top
,(env1
,env2
))::xs
->
1277 initial_env := !initial_env ++ env2
;