3 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes,
4 * Copyright (C) 2009 University of Urbana Champaign
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
20 module Lib
= Lib_parsing_c
22 (*****************************************************************************)
24 (*****************************************************************************)
26 * - Done a first type checker in 2002, cf typing-semantic/, but
27 * was assuming that have all type info, and so was assuming had called
28 * cpp and everything was right.
29 * - Wrote this file, in 2006?, as we added pattern matching on type
30 * in coccinelle. Partial type annotater.
31 * - Julia extended it in 2008? to have localvar/notlocalvar and
32 * test/notest information, again used by coccinelle.
33 * - I extended it in Fall 2008 to have more type information for the
34 * global analysis. I also added some optimisations to process
35 * included code faster.
38 * Design choices. Can either do:
40 * - can first do a simple inferer, that just pass context
41 * - then a real inferer, managing partial info.
42 * type context = fullType option
44 * - extract the information from the .h files
45 * (so no inference at all needed)
47 * Difference with julia's code in parsing_cocci/type_infer.ml:
48 * - She handles just the variable namespace. She does not type
49 * field access or enum or macros. This is because cocci programs are
50 * usually simple and have no structure definition or macro definitions
51 * that we need to type anyway.
52 * - She does more propagation.
53 * - She does not have to handle the typedef isomorphism which force me
54 * to use those typedef_fix and type_unfold_one_step
55 * - She does not handle I think the function pointer C isomorphism.
57 * - She has a cleaner type_cocci without any info. In my case
58 * I need to do those ugly al_type, or generate fake infos.
59 * - She has more compact code. Perhaps because she does not have to
60 * handle the extra exp_info that she added on me :) So I need those
61 * do_with_type, make_info_xxx, etc.
63 * Note: if need to debug this annotater, use -show_trace_profile, it can
64 * help. You can also set the typedef_debug flag below.
68 * todo: expression contain types, and statements, which in turn can contain
69 * expression, so need recurse. Need define an annote_statement and
72 * todo: how deal with typedef isomorphisms ? How store them in Ast_c ?
73 * store all posible variations in ast_c ? a list of type instead of just
76 * todo: how to handle multiple possible definitions for entities like
77 * struct or typedefs ? Because of ifdef, we should store list of
78 * possibilities sometimes.
80 * todo: define a new type ? like type_cocci ? where have a bool ?
82 * semi: How handle scope ? When search for type of field, we return
83 * a type, but this type makes sense only in a certain scope.
84 * We could add a tag to each typedef, structUnionName to differentiate
85 * them and also associate in ast_c to the type the scope
86 * of this type, the env that were used to define this type.
88 * todo: handle better the search in previous env, the env'. Cf the
89 * termination problem in typedef_fix when I was searching in the same
94 (*****************************************************************************)
96 (*****************************************************************************)
98 if !Flag_parsing_c.verbose_type
102 if !Flag_parsing_c.verbose_type
103 then Common.pr2_once s
105 (*****************************************************************************)
107 (*****************************************************************************)
109 (* The different namespaces from stdC manual:
111 * You introduce two new name spaces with every block that you write.
113 * One name space includes all
116 * - type definitions,
117 * - and enumeration constants
118 * that you declare or define within the block.
120 * The other name space includes all
124 * *tags* that you define within the block.
126 * You introduce a new member name space with every structure or union
127 * whose content you define. You identify a member name space by the
128 * type of left operand that you write for a member selection
129 * operator, as in x.y or p->y. A member name space ends with the end
130 * of the block in which you declare it.
132 * You introduce a new goto label name space with every function
133 * definition you write. Each goto label name space ends with its
134 * function definition.
137 (* But I don't try to do a type-checker, I try to "resolve" type of var
138 * so don't need make difference between namespaces here.
140 * But, why not make simply a (string, kindstring) assoc ?
141 * Because we dont want that a variable shadow a struct definition, because
142 * they are still in 2 different namespace. But could for typedef,
143 * because VarOrFunc and Typedef are in the same namespace.
144 * But could do a record as in c_info.ml
148 (* This type contains all "ident" like notion of C. Each time in Ast_c
149 * you have a string type (as in expression, function name, fields)
150 * then you need to manage the scope of this ident.
152 * The wrap for StructUnionNameDef contain the whole ii, the i for
153 * the string, the structUnion and the structType.
155 * Put Macro here ? after all the scoping rules for cpp macros is different
156 * and so does not vanish after the closing '}'.
161 | VarOrFunc
of string * Ast_c.exp_type
162 | EnumConstant
of string * string option
164 | TypeDef
of string * fullType
165 (* the structType contains nested "idents" with struct scope *)
166 | StructUnionNameDef
of string * (structUnion
* structType
) wrap
169 | Macro
of string * (define_kind
* define_val
)
172 (* Because have nested scope, have nested list, hence the list list.
174 * opti? use a hash to accelerate ? hmm but may have some problems
175 * with hash to handle recursive lookup. For instance for the typedef
176 * example where have mutually recursive definition of the type,
177 * we must take care to not loop by starting the second search
178 * from the previous environment. With the list scheme in
179 * lookup_env below it's quite easy to do. With hash it may be
182 type environment
= namedef list list
185 (* ------------------------------------------------------------ *)
186 (* can be modified by the init_env function below, by
187 * the file environment_unix.h
189 let initial_env = ref [
191 (Lib.al_type
(Parse_c.type_of_string
"void *"),
196 (Lib.al_type(Parse_c.type_of_string "void* (*)(int size)"),
199 (Lib.al_type(Parse_c.type_of_string "void (*)(void *ptr)"),
206 let typedef_debug = ref false
209 (* ------------------------------------------------------------ *)
210 (* generic, lookup and also return remaining env for further lookup *)
211 let rec lookup_env2 f env
=
213 | [] -> raise Not_found
214 | []::zs
-> lookup_env2 f zs
217 | None
-> lookup_env2 f
(xs
::zs
)
218 | Some y
-> y
, xs
::zs
221 Common.profile_code
"TAC.lookup_env" (fun () -> lookup_env2 a b
)
225 let member_env lookupf env
=
227 let _ = lookupf env
in
229 with Not_found
-> false
234 (* ------------------------------------------------------------ *)
237 let lookup_var s env
=
239 | VarOrFunc
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
244 let lookup_typedef s env
=
245 if !typedef_debug then pr2 ("looking for: " ^ s
);
247 | TypeDef
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
252 let lookup_structunion (_su
, s
) env
=
254 | StructUnionNameDef
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
259 let lookup_macro s env
=
261 | Macro
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
266 let lookup_enum s env
=
268 | EnumConstant
(s2
, typ
) -> if s2
=$
= s
then Some typ
else None
274 let lookup_typedef a b
=
275 Common.profile_code
"TAC.lookup_typedef" (fun () -> lookup_typedef a b
)
279 (*****************************************************************************)
281 (*****************************************************************************)
283 (* find_final_type is used to know to what type a field correspond in
284 * x.foo. Sometimes the type of x is a typedef or a structName in which
285 * case we must look in environment to find the complete type, here
286 * structUnion that contains the information.
288 * Because in C one can redefine in nested blocks some typedefs,
289 * struct, or variables, we have a static scoping resolving process.
290 * So, when we look for the type of a var, if this var is in an
291 * enclosing block, then maybe its type refer to a typdef of this
292 * enclosing block, so must restart the "type-resolving" of this
293 * typedef from this enclosing block, not from the bottom. So our
294 * "resolving-type functions" take an env and also return an env from
295 * where the next search must be performed. *)
298 let rec find_final_type ty env =
300 match Ast_c.unwrap_typeC ty with
301 | BaseType x -> (BaseType x) +> Ast_c.rewrap_typeC ty
303 | Pointer t -> (Pointer (find_final_type t env)) +> Ast_c.rewrap_typeC ty
304 | Array (e, t) -> Array (e, find_final_type t env) +> Ast_c.rewrap_typeC ty
306 | StructUnion (sopt, su) -> StructUnion (sopt, su) +> Ast_c.rewrap_typeC ty
308 | FunctionType t -> (FunctionType t) (* todo ? *) +> Ast_c.rewrap_typeC ty
309 | Enum
(s
, enumt
) -> (Enum
(s
, enumt
)) (* todo? *) +> Ast_c.rewrap_typeC ty
310 | EnumName s
-> (EnumName s
) (* todo? *) +> Ast_c.rewrap_typeC ty
312 | StructUnionName
(su
, s
) ->
314 let ((structtyp
,ii
), env'
) = lookup_structunion (su
, s
) env
in
315 Ast_c.nQ
, (StructUnion
(Some s
, structtyp
), ii
)
316 (* old: +> Ast_c.rewrap_typeC ty
317 * but must wrap with good ii, otherwise pretty_print_c
318 * will be lost and raise some Impossible
326 let (t'
, env'
) = lookup_typedef s env
in
327 find_final_type t' env'
332 | ParenType t
-> find_final_type t env
333 | Typeof e
-> failwith
"typeof"
339 (* ------------------------------------------------------------ *)
340 let rec type_unfold_one_step ty env
=
342 match Ast_c.unwrap_typeC ty
with
347 | StructUnion
(sopt
, su
, fields
) -> ty
349 | FunctionType t
-> ty
350 | Enum
(s
, enumt
) -> ty
352 | EnumName s
-> ty
(* todo: look in env when will have EnumDef *)
354 | StructUnionName
(su
, s
) ->
356 let (((su
,fields
),ii
), env'
) = lookup_structunion (su
, s
) env
in
357 Ast_c.nQ
, (StructUnion
(su
, Some s
, fields
), ii
)
358 (* old: +> Ast_c.rewrap_typeC ty
359 * but must wrap with good ii, otherwise pretty_print_c
360 * will be lost and raise some Impossible
366 | TypeName
(name
, _typ
) ->
367 let s = Ast_c.str_of_name name
in
369 if !typedef_debug then pr2 "type_unfold_one_step: lookup_typedef";
370 let (t'
, env'
) = lookup_typedef s env
in
371 type_unfold_one_step t' env'
376 | ParenType t
-> type_unfold_one_step t env
378 pr2_once ("Type_annoter: not handling typeof");
380 | TypeOfType t
-> type_unfold_one_step t env
390 (* normalizer. can be seen as the opposite of the previous function as
391 * we "fold" at least for the structUnion. Should return something that
392 * Type_c.is_completed_fullType likes, something that makes it easier
393 * for the programmer to work on, that has all the needed information
396 let rec typedef_fix ty env
=
397 match Ast_c.unwrap_typeC ty
with
401 Pointer
(typedef_fix t env
) +> Ast_c.rewrap_typeC ty
403 Array
(e
, typedef_fix t env
) +> Ast_c.rewrap_typeC ty
404 | StructUnion
(su
, sopt
, fields
) ->
406 * todo? but what if correspond to a nested struct def ?
408 Type_c.structdef_to_struct_name ty
410 (FunctionType ft
) (* todo ? *) +> Ast_c.rewrap_typeC ty
412 (Enum
(s, enumt
)) (* todo? *) +> Ast_c.rewrap_typeC ty
414 (EnumName
s) (* todo? *) +> Ast_c.rewrap_typeC ty
416 (* we prefer StructUnionName to StructUnion when it comes to typed metavar *)
417 | StructUnionName
(su
, s) -> ty
419 (* keep the typename but complete with more information *)
420 | TypeName
(name
, typ
) ->
421 let s = Ast_c.str_of_name name
in
424 pr2 ("typedef value already there:" ^
s);
428 if !typedef_debug then pr2 "typedef_fix: lookup_typedef";
429 let (t'
, env'
) = lookup_typedef s env
in
431 (* bugfix: termination bug if use env instead of env' below, because
432 * can have some weird mutually recursive typedef which
433 * each new type alias search for its mutual def.
435 TypeName
(name
, Some
(typedef_fix t' env'
)) +> Ast_c.rewrap_typeC ty
440 (* remove paren for better matching with typed metavar. kind of iso again *)
444 pr2_once ("Type_annoter: not handling typeof");
451 (*****************************************************************************)
452 (* Helpers, part 1 *)
453 (*****************************************************************************)
456 (Lib.al_type
(Parse_c.type_of_string
s))
458 Common.profile_code
"Type_c.type_of_s" (fun () -> type_of_s2 a
)
462 * /home/pad/software-os-src2/freebsd/contrib/ipfilter/netinet/ip_fil_freebsd.c
463 * because in the code there is:
464 * static iss_seq_off = 0;
465 * which in the parser was generating a default int without a parse_info.
466 * I now add a fake parse_info for such default int so no more failwith
469 let offset (_,(ty
,iis
)) =
471 | TypeName
(name
, _typ
), [] ->
473 | RegularName
(s, [ii
]) -> ii
.Ast_c.pinfo
476 | _, ii
::_ -> ii
.Ast_c.pinfo
477 | _ -> failwith
"type has no text; need to think again"
481 let rec is_simple_expr expr
=
482 match Ast_c.unwrap_expr expr
with
483 (* todo? handle more special cases ? *)
491 | Binary
(e1
, op
, e2
) ->
495 | ParenExpr
(e
) -> is_simple_expr e
499 (*****************************************************************************)
501 (*****************************************************************************)
502 (* now in type_c.ml *)
506 (*****************************************************************************)
507 (* (Semi) Globals, Julia's style *)
508 (*****************************************************************************)
510 (* opti: cache ? use hash ? *)
511 let _scoped_env = ref !initial_env
513 (* memoise unnanoted var, to avoid too much warning messages *)
514 let _notyped_var = ref (Hashtbl.create
100)
516 let new_scope() = _scoped_env := []::!_scoped_env
517 let del_scope() = _scoped_env := List.tl
!_scoped_env
519 let do_in_new_scope f =
527 let add_in_scope namedef
=
528 let (current
, older
) = Common.uncons
!_scoped_env in
529 _scoped_env := (namedef
::current
)::older
532 (* ------------------------------------------------------------ *)
534 (* sort of hackish... *)
536 if List.length
(!_scoped_env) =|= List.length
!initial_env
537 then Ast_c.NotLocalVar
538 else Ast_c.LocalVar info
540 (* ------------------------------------------------------------ *)
541 (* the warning argument is here to allow some binding to overwrite an
542 * existing one. With function, we first have the prototype and then the def,
543 * and the def binding with the same string is not an error.
545 * todo?: but if we define two times the same function, then we will not
546 * detect it :( it would require to make a diff between adding a binding
547 * from a prototype and from a definition.
549 * opti: disabling the check_annotater flag have some important
550 * performance benefit.
553 let add_binding2 namedef warning
=
554 let (current_scope
, _older_scope
) = Common.uncons
!_scoped_env in
556 if !Flag_parsing_c.check_annotater
then begin
558 | VarOrFunc
(s, typ
) ->
559 if Hashtbl.mem
!_notyped_var s
560 then pr2 ("warning: found typing information for a variable that was" ^
561 "previously unknown:" ^
s);
567 | VarOrFunc
(s, typ
) ->
568 member_env (lookup_var s), s
569 | TypeDef
(s, typ
) ->
570 member_env (lookup_typedef s), s
571 | StructUnionNameDef
(s, (su
, typ
)) ->
572 member_env (lookup_structunion (su
, s)), s
574 member_env (lookup_macro s), s
575 | EnumConstant
(s, body
) ->
576 member_env (lookup_enum s), s
579 if memberf
[current_scope
] && warning
580 then pr2 ("Type_annoter: warning, " ^
s ^
581 " is already in current binding" ^
"\n" ^
582 " so there is a weird shadowing");
586 let add_binding namedef warning
=
587 Common.profile_code
"TAC.add_binding" (fun () -> add_binding2 namedef warning
)
591 (*****************************************************************************)
592 (* Helpers, part 2 *)
593 (*****************************************************************************)
595 let lookup_opt_env lookupf
s =
596 Common.optionise
(fun () ->
597 lookupf
s !_scoped_env
600 let unwrap_unfold_env2 typ
=
602 (type_unfold_one_step typ
!_scoped_env)
603 let unwrap_unfold_env typ
=
604 Common.profile_code
"TAC.unwrap_unfold_env" (fun () -> unwrap_unfold_env2 typ
)
606 let typedef_fix a b
=
607 Common.profile_code
"TAC.typedef_fix" (fun () -> typedef_fix a b
)
609 let make_info_def_fix x
=
610 Type_c.make_info_def
(typedef_fix x
!_scoped_env)
612 let make_info_fix (typ
, local
) =
613 Type_c.make_info
((typedef_fix typ
!_scoped_env),local
)
616 let make_info_def = Type_c.make_info_def
618 (*****************************************************************************)
619 (* Main typer code, put later in a visitor *)
620 (*****************************************************************************)
622 let annotater_expr_visitor_subpart = (fun (k
,bigf
) expr
->
625 match Ast_c.unwrap_expr expr
with
627 (* -------------------------------------------------- *)
628 (* todo: should analyse the 's' for int to know if unsigned or not *)
629 | Constant
(String
(s,kind
)) -> make_info_def (type_of_s "char *")
630 | Constant MultiString
_ -> make_info_def (type_of_s "char *")
631 | Constant
(Char
(s,kind
)) -> make_info_def (type_of_s "char")
632 | Constant
(Int
(s)) -> make_info_def (type_of_s "int")
633 | Constant
(Float
(s,kind
)) ->
634 let fake = Ast_c.fakeInfo
(Common.fake_parse_info
) in
635 let fake = Ast_c.rewrap_str
"float" fake in
636 let iinull = [fake] in
638 (Ast_c.nQ
, (BaseType
(FloatType kind
), iinull))
641 (* -------------------------------------------------- *)
642 (* note: could factorize this code with the code for Ident
643 * and the other code for Funcall below. But as the Ident can be
644 * a macro-func, I prefer to handle it separately. So
645 * this rule can handle the macro-func, the Ident-rule can handle
646 * the macro-var, and the other FunCall-rule the regular
647 * function calls through fields.
648 * Also as I don't want a warning on the Ident that are a FunCall,
649 * easier to have a rule separate from the Ident rule.
651 | FunCall
(((Ident
(ident
), typ
), _ii
) as e1
, args
) ->
654 args
+> List.iter
(fun (e
,ii
) ->
655 (* could typecheck if arguments agree with prototype *)
656 Visitor_c.vk_argument bigf e
658 let s = Ast_c.str_of_name ident
in
659 (match lookup_opt_env lookup_var s with
660 | Some
((typ
,local
),_nextenv
) ->
662 (* set type for ident *)
663 let tyinfo = make_info_fix (typ
, local
) in
664 Ast_c.set_type_expr e1
tyinfo;
666 (match unwrap_unfold_env typ
with
667 | FunctionType
(ret
, params
) -> make_info_def ret
669 (* can be function pointer, C have an iso for that,
670 * same pfn() syntax than regular function call.
673 (match unwrap_unfold_env typ2
with
674 | FunctionType
(ret
, params
) -> make_info_def ret
675 | _ -> Type_c.noTypeHere
677 | _ -> Type_c.noTypeHere
681 (match lookup_opt_env lookup_macro s with
682 | Some
((defkind
, defval
), _nextenv
) ->
683 (match defkind
, defval
with
684 | DefineFunc
_, DefineExpr e
->
685 let rettype = Ast_c.get_onlytype_expr e
in
687 (* todo: could also set type for ident ?
688 have return type and at least type of concrete
689 parameters so can generate a fake FunctionType
692 Type_c.fake_function_type
rettype args
695 macrotype_opt +> Common.do_option
(fun t
->
696 pr2 ("Type_annotater: generate fake function type" ^
698 let tyinfo = make_info_def_fix t
in
699 Ast_c.set_type_expr e1
tyinfo;
702 Ast_c.get_type_expr e
704 pr2 ("Type_annoter: not a macro-func: " ^
s);
707 (* normally the FunCall case should have catch it *)
708 pr2 ("Type_annoter: not a macro-func-expr: " ^
s);
712 pr2_once ("type_annotater: no type for function ident: " ^
s);
718 | FunCall
(e
, args
) ->
721 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun typ
->
722 (* copy paste of above *)
723 (match unwrap_unfold_env typ
with
724 | FunctionType
(ret
, params
) -> make_info_def ret
726 (match unwrap_unfold_env typ
with
727 | FunctionType
(ret
, params
) -> make_info_def ret
728 | _ -> Type_c.noTypeHere
730 | _ -> Type_c.noTypeHere
735 (* -------------------------------------------------- *)
737 let s = Ast_c.str_of_name ident
in
738 (match lookup_opt_env lookup_var s with
739 | Some
((typ
,local
),_nextenv
) ->
740 make_info_fix (typ
,local
)
742 (match lookup_opt_env lookup_macro s with
743 | Some
((defkind
, defval
), _nextenv
) ->
744 (match defkind
, defval
with
745 | DefineVar
, DefineExpr e
->
746 Ast_c.get_type_expr e
748 pr2 ("Type_annoter: not a expression: " ^
s);
751 (* normally the FunCall case should have catch it *)
752 pr2 ("Type_annoter: not a macro-var: " ^
s);
756 (match lookup_opt_env lookup_enum s with
757 | Some
(_, _nextenv
) ->
758 make_info_def (type_of_s "int")
760 if not
(s =~
"[A-Z_]+") (* if macro then no warning *)
762 if !Flag_parsing_c.check_annotater
then
763 if not
(Hashtbl.mem
!_notyped_var s)
765 pr2 ("Type_annoter: not finding type for: " ^
s);
766 Hashtbl.add
!_notyped_var s true;
770 pr2 ("Type_annoter: not finding type for: " ^
s)
777 (* -------------------------------------------------- *)
778 (* C isomorphism on type on array and pointers *)
780 | ArrayAccess
(e
, _) ->
781 k expr
; (* recurse to set the types-ref of sub expressions *)
783 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun t
->
784 (* todo: maybe not good env !! *)
785 match unwrap_unfold_env t
with
789 | _ -> Type_c.noTypeHere
793 | Unary
(e
, GetRef
) ->
794 k expr
; (* recurse to set the types-ref of sub expressions *)
796 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun t
->
797 (* must generate an element so that '=' can be used
800 let fake = Ast_c.fakeInfo
Common.fake_parse_info
in
801 let fake = Ast_c.rewrap_str
"*" fake in
803 let ft = (Ast_c.nQ
, (Pointer t
, [fake])) in
808 (* -------------------------------------------------- *)
810 | RecordAccess
(e
, namefld
)
811 | RecordPtAccess
(e
, namefld
) as x
->
813 let fld = Ast_c.str_of_name namefld
in
815 k expr
; (* recurse to set the types-ref of sub expressions *)
817 (Ast_c.get_type_expr e
) +> Type_c.do_with_type
(fun t
->
821 | RecordAccess
_ -> Some t
822 | RecordPtAccess
_ ->
823 (match unwrap_unfold_env t
with
824 | Pointer
(t
) -> Some t
827 | _ -> raise Impossible
831 | None
-> Type_c.noTypeHere
833 match unwrap_unfold_env t
with
834 | StructUnion
(su
, sopt
, fields
) ->
836 (* todo: which env ? *)
838 (Type_c.type_field
fld (su
, fields
))
842 "TYPE-ERROR: field '%s' does not belong in struct %s"
843 fld (match sopt
with Some
s -> s |_ -> "<anon>"));
846 pr2 "TAC:MultiFound";
849 | _ -> Type_c.noTypeHere
855 (* -------------------------------------------------- *)
858 (* todo: if infer, can "push" info ? add_types_expr [t] e ? *)
859 make_info_def_fix (Lib.al_type t
)
861 (* todo? lub, hmm maybe not, cos type must be e1 *)
862 | Assignment
(e1
, op
, e2
) ->
864 (* value of an assignment is the value of the RHS expression *)
865 Ast_c.get_type_expr e2
866 | Sequence
(e1
, e2
) ->
868 Ast_c.get_type_expr e2
871 | Binary
(e1
, op
, e2
) ->
873 Type_c.lub
(Type_c.get_opt_type e1
) (Type_c.get_opt_type e2
)
875 | CondExpr
(cond
, e1opt
, e2
) ->
877 Ast_c.get_type_expr e2
882 Ast_c.get_type_expr e
884 | Infix
(e
, op
) | Postfix
(e
, op
) ->
886 Ast_c.get_type_expr e
888 (* pad: julia wrote this ? *)
889 | Unary
(e
, UnPlus
) ->
890 k expr
; (* recurse to set the types-ref of sub expressions *)
891 make_info_def (type_of_s "int")
892 (* todo? can convert from unsigned to signed if UnMinus ? *)
893 | Unary
(e
, UnMinus
) ->
894 k expr
; (* recurse to set the types-ref of sub expressions *)
895 make_info_def (type_of_s "int")
897 | SizeOfType
_|SizeOfExpr
_ ->
898 k expr
; (* recurse to set the types-ref of sub expressions *)
899 make_info_def (type_of_s "int")
901 | Constructor
(ft, ini
) ->
902 k expr
; (* recurse to set the types-ref of sub expressions *)
903 make_info_def (Lib.al_type
ft)
906 k expr
; (* recurse to set the types-ref of sub expressions *)
907 Ast_c.get_type_expr e
908 | Unary
(e
, Tilde
) ->
909 k expr
; (* recurse to set the types-ref of sub expressions *)
910 Ast_c.get_type_expr e
912 (* -------------------------------------------------- *)
914 | Unary
(_, GetRefLabel
) ->
915 k expr
; (* recurse to set the types-ref of sub expressions *)
916 pr2_once "Type annotater:not handling GetRefLabel";
920 k expr
; (* recurse to set the types-ref of sub expressions *)
921 pr2_once "Type annotater:not handling GetRefLabel";
924 | _ -> k expr; Type_c.noTypeHere
928 Ast_c.set_type_expr expr
ty
933 (*****************************************************************************)
935 (*****************************************************************************)
937 (* Processing includes that were added after a cpp_ast_c makes the
938 * type annotater quite slow, especially when the depth of cpp_ast_c is
939 * big. But for such includes the only thing we really want is to modify
940 * the environment to have enough type information. We don't need
941 * to type the expressions inside those includes (they will be typed
942 * when we process the include file directly). Here the goal is
945 * Note that as usually header files contain mostly structure
946 * definitions and defines, that means we still have to do lots of work.
947 * We only win on function definition bodies, but usually header files
948 * have just prototypes, or inline function definitions which anyway have
949 * usually a small body. But still, we win. It also makes clearer
950 * that when processing include as we just need the environment, the caller
951 * of this module can do further optimisations such as memorising the
952 * state of the environment after each header files.
955 * For sparse its makes the annotating speed goes from 9s to 4s
956 * For Linux the speedup is even better, from ??? to ???.
958 * Because There would be some copy paste with annotate_program, it is
959 * better to factorize code hence the just_add_in_env parameter below.
961 * todo? alternative optimisation for the include problem:
962 * - processing all headers files one time and construct big env
963 * - use hashtbl for env (but apparently not biggest problem)
966 let rec visit_toplevel ~just_add_in_env ~depth elem
=
967 let need_annotate_body = not just_add_in_env
in
969 let bigf = { Visitor_c.default_visitor_c
with
971 (* ------------------------------------------------------------ *)
972 Visitor_c.kcppdirective
= (fun (k
, bigf) directive
->
974 (* do error messages for type annotater only for the real body of the
975 * file, not inside include.
977 | Include
{i_content
= opt
} ->
978 opt
+> Common.do_option
(fun (filename
, program
) ->
979 Common.save_excursion
Flag_parsing_c.verbose_type
(fun () ->
980 Flag_parsing_c.verbose_type
:= false;
982 (* old: Visitor_c.vk_program bigf program;
983 * opti: set the just_add_in_env
985 program
+> List.iter
(fun elem
->
986 visit_toplevel ~just_add_in_env
:true ~depth
:(depth
+1) elem
991 | Define
((s,ii
), (defkind
, defval
)) ->
994 (* even if we are in a just_add_in_env phase, such as when
995 * we process include, as opposed to the body of functions,
996 * with macros we still to type the body of the macro as
997 * the macro has no type and so we infer its type from its
998 * body (and one day later maybe from its use).
1001 (* can try to optimize and recurse only when the define body
1005 | DefineExpr expr
->
1006 if is_simple_expr expr
1007 (* even if not need_annotate_body, still recurse*)
1010 if need_annotate_body
1013 if need_annotate_body
1017 add_binding (Macro
(s, (defkind
, defval
) )) true;
1020 | PragmaAndCo
_ -> ()
1023 (* ------------------------------------------------------------ *)
1024 (* main typer code *)
1025 (* ------------------------------------------------------------ *)
1026 Visitor_c.kexpr
= annotater_expr_visitor_subpart;
1028 (* ------------------------------------------------------------ *)
1029 Visitor_c.kstatement
= (fun (k
, bigf) st
->
1031 | Compound statxs
, ii
-> do_in_new_scope (fun () -> k st
);
1034 (* ------------------------------------------------------------ *)
1035 Visitor_c.kdecl
= (fun (k
, bigf) d
->
1037 | (DeclList
(xs
, ii
)) ->
1038 xs
+> List.iter
(fun ({v_namei
= var
; v_type
= t
;
1039 v_storage
= sto
; v_local
= local
}, iicomma
) ->
1041 (* to add possible definition in type found in Decl *)
1042 Visitor_c.vk_type
bigf t
;
1047 | Ast_c.NotLocalDecl
-> Ast_c.NotLocalVar
1048 | Ast_c.LocalDecl
-> Ast_c.LocalVar
(offset t
)
1051 var
+> Common.do_option
(fun (name
, iniopt
) ->
1052 let s = Ast_c.str_of_name name
in
1055 | StoTypedef
, _inline
->
1056 add_binding (TypeDef
(s,Lib.al_type t
)) true;
1058 add_binding (VarOrFunc
(s, (Lib.al_type t
, local))) true;
1061 if need_annotate_body then begin
1062 (* int x = sizeof(x) is legal so need process ini *)
1063 iniopt
+> Common.do_option
(fun (info
, ini
) ->
1064 Visitor_c.vk_ini
bigf ini
1070 if need_annotate_body
1076 (* ------------------------------------------------------------ *)
1077 Visitor_c.ktype
= (fun (k
, bigf) typ
->
1078 (* bugfix: have a 'Lib.al_type typ' before, but because we can
1079 * have enum with possible expression, we don't want to change
1080 * the ref of abstract-lined types, but the real one, so
1081 * don't al_type here
1083 let (_q
, t
) = typ
in
1085 | StructUnion
(su
, Some
s, structType
),ii
->
1086 let structType'
= Lib.al_fields
structType in
1087 let ii'
= Lib.al_ii
ii in
1088 add_binding (StructUnionNameDef
(s, ((su
, structType'
),ii'
))) true;
1090 if need_annotate_body
1091 then k typ
(* todo: restrict ? new scope so use do_in_scope ? *)
1093 | Enum
(sopt
, enums
), ii ->
1095 enums
+> List.iter
(fun ((name
, eopt
), iicomma
) ->
1097 let s = Ast_c.str_of_name name
in
1099 if need_annotate_body
1100 then eopt
+> Common.do_option
(fun (ieq
, e
) ->
1101 Visitor_c.vk_expr
bigf e
1103 add_binding (EnumConstant
(s, sopt
)) true;
1107 (* TODO: if have a TypeName, then maybe can fill the option
1111 if need_annotate_body
1116 (* ------------------------------------------------------------ *)
1117 Visitor_c.ktoplevel
= (fun (k
, bigf) elem
->
1118 _notyped_var := Hashtbl.create
100;
1122 f_type
= ((returnt
, (paramst
, b
)) as ftyp
);
1125 f_old_c_style
= oldstyle
;
1131 | iifunc1
::iifunc2
::ibrace1
::ibrace2
::ifakestart
::isto
->
1133 | _ -> raise Impossible
1135 let funcs = Ast_c.str_of_name name
in
1137 (match oldstyle
with
1140 Lib.al_type
(Ast_c.nQ
, (FunctionType ftyp
, [i1
;i2
])) in
1142 add_binding (VarOrFunc
(funcs, (typ'
,islocal i1
.Ast_c.pinfo
)))
1145 if need_annotate_body then
1146 do_in_new_scope (fun () ->
1147 paramst
+> List.iter
(fun ({p_namei
= nameopt
; p_type
= t
},_)->
1150 let s = Ast_c.str_of_name name
in
1151 let local = Ast_c.LocalVar
(offset t
) in
1152 add_binding (VarOrFunc
(s,(Lib.al_type t
,local))) true
1154 pr2 "no type, certainly because Void type ?"
1160 (* generate regular function type *)
1162 pr2 "TODO generate type for function";
1164 if need_annotate_body then
1165 do_in_new_scope (fun () ->
1166 (* recurse. should naturally call the kdecl visitor and
1179 | NotParsedCorrectly
_
1189 then Visitor_c.vk_toplevel
bigf elem
1191 Common.profile_code
"TAC.annotate_only_included" (fun () ->
1192 Visitor_c.vk_toplevel
bigf elem
1194 else Visitor_c.vk_toplevel
bigf elem
1196 (*****************************************************************************)
1198 (*****************************************************************************)
1199 (* catch all the decl to grow the environment *)
1202 let rec (annotate_program2
:
1203 environment
-> toplevel list
-> (toplevel
* environment
Common.pair
) list
) =
1206 (* globals (re)initialialisation *)
1208 _notyped_var := (Hashtbl.create
100);
1210 prog
+> List.map
(fun elem
->
1211 let beforeenv = !_scoped_env in
1212 visit_toplevel ~just_add_in_env
:false ~depth
:0 elem
;
1213 let afterenv = !_scoped_env in
1214 (elem
, (beforeenv, afterenv))
1220 (*****************************************************************************)
1222 (*****************************************************************************)
1224 (* julia: for coccinelle *)
1225 let annotate_test_expressions prog
=
1226 let rec propagate_test e
=
1227 let ((e_term
,info
),_) = e
in
1228 let (ty,_) = !info
in
1231 Binary
(e1
,Logical
(AndLog
),e2
)
1232 | Binary
(e1
,Logical
(OrLog
),e2
) -> propagate_test e1
; propagate_test e2
1233 | Unary
(e1
,Not
) -> propagate_test e1
1234 | ParenExpr
(e
) -> propagate_test e
1237 let bigf = { Visitor_c.default_visitor_c
with
1238 Visitor_c.kexpr
= (fun (k
,bigf) expr
->
1239 (match unwrap expr
with
1240 (CondExpr
(e
,_,_),_) -> propagate_test e
1245 Visitor_c.kstatement
= (fun (k
, bigf) st
->
1246 match unwrap st
with
1248 (match s with If
(e1
,s1
,s2
) -> propagate_test e1
| _ -> ());
1252 While
(e
,s) -> propagate_test e
1253 | DoWhile
(s,e
) -> propagate_test e
1255 (match unwrap es
with Some e
-> propagate_test e
| None
-> ())
1261 (prog
+> List.iter
(fun elem
->
1262 Visitor_c.vk_toplevel
bigf elem
1267 (*****************************************************************************)
1268 (* Annotate types *)
1269 (*****************************************************************************)
1270 let annotate_program env prog
=
1271 Common.profile_code
"TAC.annotate_program"
1273 let res = annotate_program2 env prog
in
1274 annotate_test_expressions prog
;
1278 let annotate_type_and_localvar env prog
=
1279 Common.profile_code
"TAC.annotate_type"
1280 (fun () -> annotate_program2 env prog
)
1283 (*****************************************************************************)
1284 (* changing default typing environment, do concatenation *)
1285 let init_env filename
=
1286 pr2 ("init_env: " ^ filename
);
1287 let (ast2
, _stat
) = Parse_c.parse_c_and_cpp filename
in
1288 let ast = Parse_c.program_of_program2 ast2
in
1290 let res = annotate_type_and_localvar !initial_env ast in
1291 match List.rev
res with
1292 | [] -> pr2 "empty environment"
1293 | (_top
,(env1
,env2
))::xs
->
1294 initial_env := !initial_env ++ env2
;