Release coccinelle-0.2.0rc1
[bpt/coccinelle.git] / parsing_cocci / type_infer.ml
CommitLineData
34e49164
C
1module T = Type_cocci
2module Ast = Ast_cocci
3module Ast0 = Ast0_cocci
4module V0 = Visitor_ast0
b1b2de81 5module VT0 = Visitor_ast0_types
34e49164
C
6
7(* Type inference:
8Just propagates information based on declarations. Could try to infer
9more precise information about expression metavariables, but not sure it is
10worth it. The most obvious goal is to distinguish between test expressions
11that have pointer, integer, and boolean type when matching isomorphisms,
12but perhaps other needs will become apparent. *)
13
14(* "functions" that return a boolean value *)
15let bool_functions = ["likely";"unlikely"]
16
17let err wrapped ty s =
18 T.typeC ty; Format.print_newline();
19 failwith (Printf.sprintf "line %d: %s" (Ast0.get_line wrapped) s)
20
21type id = Id of string | Meta of (string * string)
22
faf9a90c
C
23let int_type = T.BaseType(T.IntType)
24let bool_type = T.BaseType(T.BoolType)
25let char_type = T.BaseType(T.CharType)
26let float_type = T.BaseType(T.FloatType)
27
34e49164
C
28let rec lub_type t1 t2 =
29 match (t1,t2) with
30 (None,None) -> None
31 | (None,Some t) -> t2
32 | (Some t,None) -> t1
33 | (Some t1,Some t2) ->
34 let rec loop = function
35 (T.Unknown,t2) -> t2
36 | (t1,T.Unknown) -> t1
37 | (T.ConstVol(cv1,ty1),T.ConstVol(cv2,ty2)) when cv1 = cv2 ->
38 T.ConstVol(cv1,loop(ty1,ty2))
91eba41f
C
39
40 (* pad: in pointer arithmetic, as in ptr+1, the lub must be ptr *)
34e49164
C
41 | (T.Pointer(ty1),T.Pointer(ty2)) ->
42 T.Pointer(loop(ty1,ty2))
43 | (ty1,T.Pointer(ty2)) -> T.Pointer(ty2)
44 | (T.Pointer(ty1),ty2) -> T.Pointer(ty1)
91eba41f 45
34e49164
C
46 | (T.Array(ty1),T.Array(ty2)) -> T.Array(loop(ty1,ty2))
47 | (T.TypeName(s1),t2) -> t2
48 | (t1,T.TypeName(s1)) -> t1
49 | (t1,_) -> t1 in (* arbitrarily pick the first, assume type correct *)
50 Some (loop (t1,t2))
51
52let lub_envs envs =
53 List.fold_left
54 (function acc ->
55 function env ->
56 List.fold_left
57 (function acc ->
58 function (var,ty) ->
59 let (relevant,irrelevant) =
60 List.partition (function (x,_) -> x = var) acc in
61 match relevant with
62 [] -> (var,ty)::acc
63 | [(x,ty1)] ->
64 (match lub_type (Some ty) (Some ty1) with
65 Some new_ty -> (var,new_ty)::irrelevant
66 | None -> irrelevant)
67 | _ -> failwith "bad type environment")
68 acc env)
69 [] envs
70
71let rec propagate_types env =
72 let option_default = None in
73 let bind x y = option_default in (* no generic way of combining types *)
74
34e49164
C
75 let ident r k i =
76 match Ast0.unwrap i with
77 Ast0.Id(id) ->
78 (try Some(List.assoc (Id(Ast0.unwrap_mcode id)) env)
79 with Not_found -> None)
80 | Ast0.MetaId(id,_,_) ->
81 (try Some(List.assoc (Meta(Ast0.unwrap_mcode id)) env)
82 with Not_found -> None)
83 | _ -> k i in
84
85 let strip_cv = function
86 Some (T.ConstVol(_,t)) -> Some t
87 | t -> t in
88
faf9a90c
C
89 (* types that might be integer types. should char be allowed? *)
90 let rec is_int_type = function
91 T.BaseType(T.IntType)
92 | T.BaseType(T.LongType)
93 | T.BaseType(T.ShortType)
94 | T.MetaType(_,_,_)
95 | T.TypeName _
0708f913 96 | T.EnumName _
faf9a90c
C
97 | T.SignedT(_,None) -> true
98 | T.SignedT(_,Some ty) -> is_int_type ty
99 | _ -> false in
100
34e49164
C
101 let expression r k e =
102 let res = k e in
103 let ty =
104 match Ast0.unwrap e with
951c7801
C
105 (* pad: the type of id is set in the ident visitor *)
106 Ast0.Ident(id) -> Ast0.set_type e res; res
107 | Ast0.Constant(const) ->
108 (match Ast0.unwrap_mcode const with
109 Ast.String(_) -> Some (T.Pointer(char_type))
110 | Ast.Char(_) -> Some (char_type)
111 | Ast.Int(_) -> Some (int_type)
112 | Ast.Float(_) -> Some (float_type))
113 (* pad: note that in C can do either ptr(...) or ( *ptr)(...)
114 * so I am not sure this code is enough.
115 *)
116 | Ast0.FunCall(fn,lp,args,rp) ->
117 (match Ast0.get_type fn with
118 Some (T.FunctionPointer(ty)) -> Some ty
119 | _ ->
120 (match Ast0.unwrap fn with
121 Ast0.Ident(id) ->
122 (match Ast0.unwrap id with
123 Ast0.Id(id) ->
124 if List.mem (Ast0.unwrap_mcode id) bool_functions
125 then Some(bool_type)
126 else None
127 | _ -> None)
128 | _ -> None))
129 | Ast0.Assignment(exp1,op,exp2,_) ->
130 let ty = lub_type (Ast0.get_type exp1) (Ast0.get_type exp2) in
131 Ast0.set_type exp1 ty; Ast0.set_type exp2 ty; ty
132 | Ast0.CondExpr(exp1,why,Some exp2,colon,exp3) ->
133 let ty = lub_type (Ast0.get_type exp2) (Ast0.get_type exp3) in
134 Ast0.set_type exp2 ty; Ast0.set_type exp3 ty; ty
135 | Ast0.CondExpr(exp1,why,None,colon,exp3) -> Ast0.get_type exp3
136 | Ast0.Postfix(exp,op) | Ast0.Infix(exp,op) -> (* op is dec or inc *)
137 Ast0.get_type exp
138 | Ast0.Unary(exp,op) ->
139 (match Ast0.unwrap_mcode op with
140 Ast.GetRef ->
141 (match Ast0.get_type exp with
142 None -> Some (T.Pointer(T.Unknown))
143 | Some t -> Some (T.Pointer(t)))
144 | Ast.DeRef ->
145 (match Ast0.get_type exp with
146 Some (T.Pointer(t)) -> Some t
147 | _ -> None)
148 | Ast.UnPlus -> Ast0.get_type exp
149 | Ast.UnMinus -> Ast0.get_type exp
150 | Ast.Tilde -> Ast0.get_type exp
151 | Ast.Not -> Some(bool_type))
152 | Ast0.Nested(exp1,op,exp2) -> failwith "nested in type inf not possible"
153 | Ast0.Binary(exp1,op,exp2) ->
154 let ty1 = Ast0.get_type exp1 in
155 let ty2 = Ast0.get_type exp2 in
156 let same_type = function
157 (None,None) -> Some (int_type)
91eba41f 158
951c7801
C
159 (* pad: pointer arithmetic handling as in ptr+1 *)
160 | (Some (T.Pointer ty1),Some ty2) when is_int_type ty2 ->
161 Some (T.Pointer ty1)
162 | (Some ty1,Some (T.Pointer ty2)) when is_int_type ty1 ->
163 Some (T.Pointer ty2)
91eba41f 164
951c7801
C
165 | (t1,t2) ->
166 let ty = lub_type t1 t2 in
167 Ast0.set_type exp1 ty; Ast0.set_type exp2 ty; ty in
168 (match Ast0.unwrap_mcode op with
169 Ast.Arith(op) -> same_type (ty1, ty2)
170 | Ast.Logical(op) ->
171 let ty = lub_type ty1 ty2 in
172 Ast0.set_type exp1 ty; Ast0.set_type exp2 ty;
173 Some(bool_type))
174 | Ast0.Paren(lp,exp,rp) -> Ast0.get_type exp
175 | Ast0.ArrayAccess(exp1,lb,exp2,rb) ->
176 (match strip_cv (Ast0.get_type exp2) with
177 None -> Ast0.set_type exp2 (Some(int_type))
178 | Some(ty) when is_int_type ty -> ()
179 | Some ty -> err exp2 ty "bad type for an array index");
180 (match strip_cv (Ast0.get_type exp1) with
181 None -> None
182 | Some (T.Array(ty)) -> Some ty
183 | Some (T.Pointer(ty)) -> Some ty
184 | Some (T.MetaType(_,_,_)) -> None
185 | Some x -> err exp1 x "ill-typed array reference")
186 (* pad: should handle structure one day and look 'field' in environment *)
187 | Ast0.RecordAccess(exp,pt,field) ->
188 (match strip_cv (Ast0.get_type exp) with
189 None -> None
190 | Some (T.StructUnionName(_,_,_)) -> None
191 | Some (T.TypeName(_)) -> None
192 | Some (T.MetaType(_,_,_)) -> None
193 | Some x -> err exp x "non-structure type in field ref")
194 | Ast0.RecordPtAccess(exp,ar,field) ->
195 (match strip_cv (Ast0.get_type exp) with
196 None -> None
197 | Some (T.Pointer(t)) ->
198 (match strip_cv (Some t) with
199 | Some (T.Unknown) -> None
200 | Some (T.MetaType(_,_,_)) -> None
201 | Some (T.TypeName(_)) -> None
202 | Some (T.StructUnionName(_,_,_)) -> None
203 | Some x ->
204 err exp (T.Pointer(t))
205 "non-structure pointer type in field ref"
206 | _ -> failwith "not possible")
207 | Some (T.MetaType(_,_,_)) -> None
208 | Some (T.TypeName(_)) -> None
209 | Some x -> err exp x "non-structure pointer type in field ref")
210 | Ast0.Cast(lp,ty,rp,exp) -> Some(Ast0.ast0_type_to_type ty)
211 | Ast0.SizeOfExpr(szf,exp) -> Some(int_type)
212 | Ast0.SizeOfType(szf,lp,ty,rp) -> Some(int_type)
213 | Ast0.TypeExp(ty) -> None
214 | Ast0.MetaErr(name,_,_) -> None
215 | Ast0.MetaExpr(name,_,Some [ty],_,_) -> Some ty
216 | Ast0.MetaExpr(name,_,ty,_,_) -> None
217 | Ast0.MetaExprList(name,_,_) -> None
218 | Ast0.EComma(cm) -> None
219 | Ast0.DisjExpr(_,exp_list,_,_) ->
220 let types = List.map Ast0.get_type exp_list in
221 let combined = List.fold_left lub_type None types in
222 (match combined with
223 None -> None
224 | Some t ->
225 List.iter (function e -> Ast0.set_type e (Some t)) exp_list;
226 Some t)
227 | Ast0.NestExpr(starter,expr_dots,ender,None,multi) ->
228 let _ = r.VT0.combiner_rec_expression_dots expr_dots in None
229 | Ast0.NestExpr(starter,expr_dots,ender,Some e,multi) ->
230 let _ = r.VT0.combiner_rec_expression_dots expr_dots in
231 let _ = r.VT0.combiner_rec_expression e in None
232 | Ast0.Edots(_,None) | Ast0.Ecircles(_,None) | Ast0.Estars(_,None) ->
233 None
234 | Ast0.Edots(_,Some e) | Ast0.Ecircles(_,Some e)
235 | Ast0.Estars(_,Some e) ->
236 let _ = r.VT0.combiner_rec_expression e in None
237 | Ast0.OptExp(exp) -> Ast0.get_type exp
238 | Ast0.UniqueExp(exp) -> Ast0.get_type exp in
239 Ast0.set_type e ty;
240 ty in
34e49164 241
34e49164
C
242 let rec strip id =
243 match Ast0.unwrap id with
951c7801 244 Ast0.Id(name) -> Id(Ast0.unwrap_mcode name)
34e49164
C
245 | Ast0.MetaId(name,_,_) -> Meta(Ast0.unwrap_mcode name)
246 | Ast0.MetaFunc(name,_,_) -> Meta(Ast0.unwrap_mcode name)
247 | Ast0.MetaLocalFunc(name,_,_) -> Meta(Ast0.unwrap_mcode name)
951c7801
C
248 | Ast0.OptIdent(id) -> strip id
249 | Ast0.UniqueIdent(id) -> strip id in
34e49164 250
1be43e12 251 let process_whencode notfn allfn exp = function
34e49164
C
252 Ast0.WhenNot(x) -> let _ = notfn x in ()
253 | Ast0.WhenAlways(x) -> let _ = allfn x in ()
1be43e12
C
254 | Ast0.WhenModifier(_) -> ()
255 | Ast0.WhenNotTrue(x) -> let _ = exp x in ()
256 | Ast0.WhenNotFalse(x) -> let _ = exp x in () in
34e49164
C
257
258 (* assume that all of the declarations are at the beginning of a statement
259 list, which is required by C, but not actually required by the cocci
260 parser *)
261 let rec process_statement_list r acc = function
262 [] -> acc
263 | (s::ss) ->
264 (match Ast0.unwrap s with
265 Ast0.Decl(_,decl) ->
fc1ad971 266 let new_acc = (process_decl acc decl)@acc in
34e49164
C
267 process_statement_list r new_acc ss
268 | Ast0.Dots(_,wc) ->
1be43e12 269 (* why is this case here? why is there none for nests? *)
34e49164 270 List.iter
b1b2de81
C
271 (process_whencode r.VT0.combiner_rec_statement_dots
272 r.VT0.combiner_rec_statement r.VT0.combiner_rec_expression)
34e49164
C
273 wc;
274 process_statement_list r acc ss
275 | Ast0.Disj(_,statement_dots_list,_,_) ->
276 let new_acc =
277 lub_envs
278 (List.map
279 (function x -> process_statement_list r acc (Ast0.undots x))
280 statement_dots_list) in
281 process_statement_list r new_acc ss
282 | _ ->
b1b2de81 283 let _ = (propagate_types acc).VT0.combiner_rec_statement s in
fc1ad971
C
284 process_statement_list r acc ss)
285
286 and process_decl env decl =
287 match Ast0.unwrap decl with
288 Ast0.Init(_,ty,id,_,exp,_) ->
289 let _ =
290 (propagate_types env).VT0.combiner_rec_initialiser exp in
291 [(strip id,Ast0.ast0_type_to_type ty)]
292 | Ast0.UnInit(_,ty,id,_) ->
293 [(strip id,Ast0.ast0_type_to_type ty)]
294 | Ast0.MacroDecl(_,_,_,_,_) -> []
295 | Ast0.TyDecl(ty,_) -> []
296 (* pad: should handle typedef one day and add a binding *)
297 | Ast0.Typedef(_,_,_,_) -> []
298 | Ast0.DisjDecl(_,disjs,_,_) ->
299 List.concat(List.map (process_decl env) disjs)
300 | Ast0.Ddots(_,_) -> [] (* not in a statement list anyway *)
301 | Ast0.OptDecl(decl) -> process_decl env decl
302 | Ast0.UniqueDecl(decl) -> process_decl env decl in
34e49164
C
303
304 let statement_dots r k d =
305 match Ast0.unwrap d with
306 Ast0.DOTS(l) | Ast0.CIRCLES(l) | Ast0.STARS(l) ->
307 let _ = process_statement_list r env l in option_default in
fc1ad971
C
308
309 let post_bool exp =
310 let rec process_test exp =
311 match (Ast0.unwrap exp,Ast0.get_type exp) with
312 (Ast0.Edots(_,_),_) -> None
313 | (Ast0.NestExpr(_,_,_,_,_),_) -> None
314 | (Ast0.MetaExpr(_,_,_,_,_),_) ->
315 (* if a type is known, it is specified in the decl *)
316 None
317 | (Ast0.Paren(lp,exp,rp),None) -> process_test exp
318 | (_,None) -> Some (int_type)
319 | _ -> None in
320 let new_expty = process_test exp in
321 (match new_expty with
322 None -> () (* leave things as they are *)
323 | Some ty -> Ast0.set_type exp new_expty) in
324
34e49164
C
325 let statement r k s =
326 match Ast0.unwrap s with
327 Ast0.FunDecl(_,fninfo,name,lp,params,rp,lbrace,body,rbrace) ->
328 let rec get_binding p =
329 match Ast0.unwrap p with
330 Ast0.Param(ty,Some id) ->
331 [(strip id,Ast0.ast0_type_to_type ty)]
332 | Ast0.OptParam(param) -> get_binding param
333 | _ -> [] in
334 let fenv = List.concat (List.map get_binding (Ast0.undots params)) in
b1b2de81 335 (propagate_types (fenv@env)).VT0.combiner_rec_statement_dots body
34e49164
C
336 | Ast0.IfThen(_,_,exp,_,_,_) | Ast0.IfThenElse(_,_,exp,_,_,_,_,_)
337 | Ast0.While(_,_,exp,_,_,_) | Ast0.Do(_,_,_,_,exp,_,_)
fc1ad971 338 | Ast0.For(_,_,_,_,Some exp,_,_,_,_,_) ->
34e49164 339 let _ = k s in
fc1ad971 340 post_bool exp;
34e49164 341 None
fc1ad971
C
342 | Ast0.Switch(_,_,exp,_,_,decls,cases,_) ->
343 let senv = process_statement_list r env (Ast0.undots decls) in
344 let res =
345 (propagate_types (senv@env)).VT0.combiner_rec_case_line_dots cases in
346 post_bool exp;
347 res
34e49164
C
348 | _ -> k s
349
350 and case_line r k c =
351 match Ast0.unwrap c with
fc1ad971 352 Ast0.Case(case,exp,colon,code) ->
34e49164
C
353 let _ = k c in
354 (match Ast0.get_type exp with
faf9a90c 355 None -> Ast0.set_type exp (Some (int_type))
34e49164
C
356 | _ -> ());
357 None
fc1ad971 358 | _ -> k c in
34e49164
C
359
360 V0.combiner bind option_default
b1b2de81
C
361 {V0.combiner_functions with
362 VT0.combiner_dotsstmtfn = statement_dots;
363 VT0.combiner_identfn = ident;
364 VT0.combiner_exprfn = expression;
365 VT0.combiner_stmtfn = statement;
366 VT0.combiner_casefn = case_line}
34e49164
C
367
368let type_infer code =
369 let prop = propagate_types [(Id("NULL"),T.Pointer(T.Unknown))] in
b1b2de81 370 let fn = prop.VT0.combiner_rec_top_level in
34e49164
C
371 let _ = List.map fn code in
372 ()