Release coccinelle-0.2.4rc6
[bpt/coccinelle.git] / parsing_cocci / type_infer.ml
1 (*
2 * Copyright 2010, INRIA, University of Copenhagen
3 * Julia Lawall, Rene Rydhof Hansen, Gilles Muller, Nicolas Palix
4 * Copyright 2005-2009, Ecole des Mines de Nantes, University of Copenhagen
5 * Yoann Padioleau, Julia Lawall, Rene Rydhof Hansen, Henrik Stuart, Gilles Muller, Nicolas Palix
6 * This file is part of Coccinelle.
7 *
8 * Coccinelle is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, according to version 2 of the License.
11 *
12 * Coccinelle is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with Coccinelle. If not, see <http://www.gnu.org/licenses/>.
19 *
20 * The authors reserve the right to distribute this or future versions of
21 * Coccinelle under other licenses.
22 *)
23
24
25 module T = Type_cocci
26 module Ast = Ast_cocci
27 module Ast0 = Ast0_cocci
28 module V0 = Visitor_ast0
29 module VT0 = Visitor_ast0_types
30
31 (* Type inference:
32 Just propagates information based on declarations. Could try to infer
33 more precise information about expression metavariables, but not sure it is
34 worth it. The most obvious goal is to distinguish between test expressions
35 that have pointer, integer, and boolean type when matching isomorphisms,
36 but perhaps other needs will become apparent. *)
37
38 (* "functions" that return a boolean value *)
39 let bool_functions = ["likely";"unlikely"]
40
41 let err wrapped ty s =
42 T.typeC ty; Format.print_newline();
43 failwith (Printf.sprintf "line %d: %s" (Ast0.get_line wrapped) s)
44
45 type id = Id of string | Meta of Ast.meta_name
46
47 let int_type = T.BaseType(T.IntType)
48 let bool_type = T.BaseType(T.BoolType)
49 let char_type = T.BaseType(T.CharType)
50 let float_type = T.BaseType(T.FloatType)
51 let size_type = T.BaseType(T.SizeType)
52 let ssize_type = T.BaseType(T.SSizeType)
53 let ptrdiff_type = T.BaseType(T.PtrDiffType)
54
55 let rec lub_type t1 t2 =
56 match (t1,t2) with
57 (None,None) -> None
58 | (None,Some t) -> t2
59 | (Some t,None) -> t1
60 | (Some t1,Some t2) ->
61 let rec loop = function
62 (T.Unknown,t2) -> t2
63 | (t1,T.Unknown) -> t1
64 | (T.ConstVol(cv1,ty1),T.ConstVol(cv2,ty2)) when cv1 = cv2 ->
65 T.ConstVol(cv1,loop(ty1,ty2))
66
67 (* pad: in pointer arithmetic, as in ptr+1, the lub must be ptr *)
68 | (T.Pointer(ty1),T.Pointer(ty2)) ->
69 T.Pointer(loop(ty1,ty2))
70 | (ty1,T.Pointer(ty2)) -> T.Pointer(ty2)
71 | (T.Pointer(ty1),ty2) -> T.Pointer(ty1)
72
73 | (T.Array(ty1),T.Array(ty2)) -> T.Array(loop(ty1,ty2))
74 | (T.TypeName(s1),t2) -> t2
75 | (t1,T.TypeName(s1)) -> t1
76 | (t1,_) -> t1 in (* arbitrarily pick the first, assume type correct *)
77 Some (loop (t1,t2))
78
79 let lub_envs envs =
80 List.fold_left
81 (function acc ->
82 function env ->
83 List.fold_left
84 (function acc ->
85 function (var,ty) ->
86 let (relevant,irrelevant) =
87 List.partition (function (x,_) -> x = var) acc in
88 match relevant with
89 [] -> (var,ty)::acc
90 | [(x,ty1)] ->
91 (match lub_type (Some ty) (Some ty1) with
92 Some new_ty -> (var,new_ty)::irrelevant
93 | None -> irrelevant)
94 | _ -> failwith "bad type environment")
95 acc env)
96 [] envs
97
98 let rec propagate_types env =
99 let option_default = None in
100 let bind x y = option_default in (* no generic way of combining types *)
101
102 let ident r k i =
103 match Ast0.unwrap i with
104 Ast0.Id(id) ->
105 (try Some(List.assoc (Id(Ast0.unwrap_mcode id)) env)
106 with Not_found -> None)
107 | Ast0.MetaId(id,_,_) ->
108 (try Some(List.assoc (Meta(Ast0.unwrap_mcode id)) env)
109 with Not_found -> None)
110 | _ -> k i in
111
112 let strip_cv = function
113 Some (T.ConstVol(_,t)) -> Some t
114 | t -> t in
115
116 (* types that might be integer types. should char be allowed? *)
117 let rec is_int_type = function
118 T.BaseType(T.IntType)
119 | T.BaseType(T.LongType)
120 | T.BaseType(T.ShortType)
121 | T.MetaType(_,_,_)
122 | T.TypeName _
123 | T.EnumName _
124 | T.SignedT(_,None) -> true
125 | T.SignedT(_,Some ty) -> is_int_type ty
126 | _ -> false in
127
128 let expression r k e =
129 let res = k e in
130 let ty =
131 match Ast0.unwrap e with
132 (* pad: the type of id is set in the ident visitor *)
133 Ast0.Ident(id) -> Ast0.set_type e res; res
134 | Ast0.Constant(const) ->
135 (match Ast0.unwrap_mcode const with
136 Ast.String(_) -> Some (T.Pointer(char_type))
137 | Ast.Char(_) -> Some (char_type)
138 | Ast.Int(_) -> Some (int_type)
139 | Ast.Float(_) -> Some (float_type))
140 (* pad: note that in C can do either ptr(...) or ( *ptr)(...)
141 * so I am not sure this code is enough.
142 *)
143 | Ast0.FunCall(fn,lp,args,rp) ->
144 (match Ast0.get_type fn with
145 Some (T.FunctionPointer(ty)) -> Some ty
146 | _ ->
147 (match Ast0.unwrap fn with
148 Ast0.Ident(id) ->
149 (match Ast0.unwrap id with
150 Ast0.Id(id) ->
151 if List.mem (Ast0.unwrap_mcode id) bool_functions
152 then Some(bool_type)
153 else None
154 | _ -> None)
155 | _ -> None))
156 | Ast0.Assignment(exp1,op,exp2,_) ->
157 let ty = lub_type (Ast0.get_type exp1) (Ast0.get_type exp2) in
158 Ast0.set_type exp1 ty; Ast0.set_type exp2 ty; ty
159 | Ast0.CondExpr(exp1,why,Some exp2,colon,exp3) ->
160 let ty = lub_type (Ast0.get_type exp2) (Ast0.get_type exp3) in
161 Ast0.set_type exp2 ty; Ast0.set_type exp3 ty; ty
162 | Ast0.CondExpr(exp1,why,None,colon,exp3) -> Ast0.get_type exp3
163 | Ast0.Postfix(exp,op) | Ast0.Infix(exp,op) -> (* op is dec or inc *)
164 Ast0.get_type exp
165 | Ast0.Unary(exp,op) ->
166 (match Ast0.unwrap_mcode op with
167 Ast.GetRef ->
168 (match Ast0.get_type exp with
169 None -> Some (T.Pointer(T.Unknown))
170 | Some t -> Some (T.Pointer(t)))
171 | Ast.DeRef ->
172 (match Ast0.get_type exp with
173 Some (T.Pointer(t)) -> Some t
174 | _ -> None)
175 | Ast.UnPlus -> Ast0.get_type exp
176 | Ast.UnMinus -> Ast0.get_type exp
177 | Ast.Tilde -> Ast0.get_type exp
178 | Ast.Not -> Some(bool_type))
179 | Ast0.Nested(exp1,op,exp2) -> failwith "nested in type inf not possible"
180 | Ast0.Binary(exp1,op,exp2) ->
181 let ty1 = Ast0.get_type exp1 in
182 let ty2 = Ast0.get_type exp2 in
183 let same_type = function
184 (None,None) -> Some (int_type)
185
186 (* pad: pointer arithmetic handling as in ptr+1 *)
187 | (Some (T.Pointer ty1),Some ty2) when is_int_type ty2 ->
188 Some (T.Pointer ty1)
189 | (Some ty1,Some (T.Pointer ty2)) when is_int_type ty1 ->
190 Some (T.Pointer ty2)
191
192 | (t1,t2) ->
193 let ty = lub_type t1 t2 in
194 Ast0.set_type exp1 ty; Ast0.set_type exp2 ty; ty in
195 (match Ast0.unwrap_mcode op with
196 Ast.Arith(op) -> same_type (ty1, ty2)
197 | Ast.Logical(Ast.AndLog) | Ast.Logical(Ast.OrLog) ->
198 Some(bool_type)
199 | Ast.Logical(op) ->
200 let ty = lub_type ty1 ty2 in
201 Ast0.set_type exp1 ty; Ast0.set_type exp2 ty;
202 Some(bool_type))
203 | Ast0.Paren(lp,exp,rp) -> Ast0.get_type exp
204 | Ast0.ArrayAccess(exp1,lb,exp2,rb) ->
205 (match strip_cv (Ast0.get_type exp2) with
206 None -> Ast0.set_type exp2 (Some(int_type))
207 | Some(ty) when is_int_type ty -> ()
208 | Some(Type_cocci.Unknown) ->
209 (* unknown comes from param types, not sure why this
210 is not just None... *)
211 Ast0.set_type exp2 (Some(int_type))
212 | Some ty -> err exp2 ty "bad type for an array index");
213 (match strip_cv (Ast0.get_type exp1) with
214 None -> None
215 | Some (T.Array(ty)) -> Some ty
216 | Some (T.Pointer(ty)) -> Some ty
217 | Some (T.MetaType(_,_,_)) -> None
218 | Some x -> err exp1 x "ill-typed array reference")
219 (* pad: should handle structure one day and look 'field' in environment *)
220 | Ast0.RecordAccess(exp,pt,field) ->
221 (match strip_cv (Ast0.get_type exp) with
222 None -> None
223 | Some (T.StructUnionName(_,_)) -> None
224 | Some (T.TypeName(_)) -> None
225 | Some (T.MetaType(_,_,_)) -> None
226 | Some x -> err exp x "non-structure type in field ref")
227 | Ast0.RecordPtAccess(exp,ar,field) ->
228 (match strip_cv (Ast0.get_type exp) with
229 None -> None
230 | Some (T.Pointer(t)) ->
231 (match strip_cv (Some t) with
232 | Some (T.Unknown) -> None
233 | Some (T.MetaType(_,_,_)) -> None
234 | Some (T.TypeName(_)) -> None
235 | Some (T.StructUnionName(_,_)) -> None
236 | Some x ->
237 err exp (T.Pointer(t))
238 "non-structure pointer type in field ref"
239 | _ -> failwith "not possible")
240 | Some (T.MetaType(_,_,_)) -> None
241 | Some (T.TypeName(_)) -> None
242 | Some x -> err exp x "non-structure pointer type in field ref")
243 | Ast0.Cast(lp,ty,rp,exp) -> Some(Ast0.ast0_type_to_type ty)
244 | Ast0.SizeOfExpr(szf,exp) -> Some(int_type)
245 | Ast0.SizeOfType(szf,lp,ty,rp) -> Some(int_type)
246 | Ast0.TypeExp(ty) -> None
247 | Ast0.MetaErr(name,_,_) -> None
248 | Ast0.MetaExpr(name,_,Some [ty],_,_) -> Some ty
249 | Ast0.MetaExpr(name,_,ty,_,_) -> None
250 | Ast0.MetaExprList(name,_,_) -> None
251 | Ast0.EComma(cm) -> None
252 | Ast0.DisjExpr(_,exp_list,_,_) ->
253 let types = List.map Ast0.get_type exp_list in
254 let combined = List.fold_left lub_type None types in
255 (match combined with
256 None -> None
257 | Some t ->
258 List.iter (function e -> Ast0.set_type e (Some t)) exp_list;
259 Some t)
260 | Ast0.NestExpr(starter,expr_dots,ender,None,multi) ->
261 let _ = r.VT0.combiner_rec_expression_dots expr_dots in None
262 | Ast0.NestExpr(starter,expr_dots,ender,Some e,multi) ->
263 let _ = r.VT0.combiner_rec_expression_dots expr_dots in
264 let _ = r.VT0.combiner_rec_expression e in None
265 | Ast0.Edots(_,None) | Ast0.Ecircles(_,None) | Ast0.Estars(_,None) ->
266 None
267 | Ast0.Edots(_,Some e) | Ast0.Ecircles(_,Some e)
268 | Ast0.Estars(_,Some e) ->
269 let _ = r.VT0.combiner_rec_expression e in None
270 | Ast0.OptExp(exp) -> Ast0.get_type exp
271 | Ast0.UniqueExp(exp) -> Ast0.get_type exp in
272 Ast0.set_type e ty;
273 ty in
274
275 let rec strip id =
276 match Ast0.unwrap id with
277 Ast0.Id(name) -> Id(Ast0.unwrap_mcode name)
278 | Ast0.MetaId(name,_,_) -> Meta(Ast0.unwrap_mcode name)
279 | Ast0.MetaFunc(name,_,_) -> Meta(Ast0.unwrap_mcode name)
280 | Ast0.MetaLocalFunc(name,_,_) -> Meta(Ast0.unwrap_mcode name)
281 | Ast0.OptIdent(id) -> strip id
282 | Ast0.UniqueIdent(id) -> strip id in
283
284 let process_whencode notfn allfn exp = function
285 Ast0.WhenNot(x) -> let _ = notfn x in ()
286 | Ast0.WhenAlways(x) -> let _ = allfn x in ()
287 | Ast0.WhenModifier(_) -> ()
288 | Ast0.WhenNotTrue(x) -> let _ = exp x in ()
289 | Ast0.WhenNotFalse(x) -> let _ = exp x in () in
290
291 (* assume that all of the declarations are at the beginning of a statement
292 list, which is required by C, but not actually required by the cocci
293 parser *)
294 let rec process_statement_list r acc = function
295 [] -> acc
296 | (s::ss) ->
297 (match Ast0.unwrap s with
298 Ast0.Decl(_,decl) ->
299 let new_acc = (process_decl acc decl)@acc in
300 process_statement_list r new_acc ss
301 | Ast0.Dots(_,wc) ->
302 (* why is this case here? why is there none for nests? *)
303 List.iter
304 (process_whencode r.VT0.combiner_rec_statement_dots
305 r.VT0.combiner_rec_statement r.VT0.combiner_rec_expression)
306 wc;
307 process_statement_list r acc ss
308 | Ast0.Disj(_,statement_dots_list,_,_) ->
309 let new_acc =
310 lub_envs
311 (List.map
312 (function x -> process_statement_list r acc (Ast0.undots x))
313 statement_dots_list) in
314 process_statement_list r new_acc ss
315 | _ ->
316 let _ = (propagate_types acc).VT0.combiner_rec_statement s in
317 process_statement_list r acc ss)
318
319 and process_decl env decl =
320 match Ast0.unwrap decl with
321 Ast0.MetaDecl(_,_) | Ast0.MetaField(_,_) -> []
322 | Ast0.Init(_,ty,id,_,exp,_) ->
323 let _ =
324 (propagate_types env).VT0.combiner_rec_initialiser exp in
325 [(strip id,Ast0.ast0_type_to_type ty)]
326 | Ast0.UnInit(_,ty,id,_) ->
327 [(strip id,Ast0.ast0_type_to_type ty)]
328 | Ast0.MacroDecl(_,_,_,_,_) -> []
329 | Ast0.TyDecl(ty,_) -> []
330 (* pad: should handle typedef one day and add a binding *)
331 | Ast0.Typedef(_,_,_,_) -> []
332 | Ast0.DisjDecl(_,disjs,_,_) ->
333 List.concat(List.map (process_decl env) disjs)
334 | Ast0.Ddots(_,_) -> [] (* not in a statement list anyway *)
335 | Ast0.OptDecl(decl) -> process_decl env decl
336 | Ast0.UniqueDecl(decl) -> process_decl env decl in
337
338 let statement_dots r k d =
339 match Ast0.unwrap d with
340 Ast0.DOTS(l) | Ast0.CIRCLES(l) | Ast0.STARS(l) ->
341 let _ = process_statement_list r env l in option_default in
342
343 let post_bool exp =
344 let rec process_test exp =
345 match (Ast0.unwrap exp,Ast0.get_type exp) with
346 (Ast0.Edots(_,_),_) -> None
347 | (Ast0.NestExpr(_,_,_,_,_),_) -> None
348 | (Ast0.MetaExpr(_,_,_,_,_),_) ->
349 (* if a type is known, it is specified in the decl *)
350 None
351 | (Ast0.Paren(lp,exp,rp),None) -> process_test exp
352 (* the following doesn't seem like a good idea - triggers int isos
353 on all test expressions *)
354 (*| (_,None) -> Some (int_type) *)
355 | _ -> None in
356 let new_expty = process_test exp in
357 (match new_expty with
358 None -> () (* leave things as they are *)
359 | Some ty -> Ast0.set_type exp new_expty) in
360
361 let statement r k s =
362 match Ast0.unwrap s with
363 Ast0.FunDecl(_,fninfo,name,lp,params,rp,lbrace,body,rbrace) ->
364 let rec get_binding p =
365 match Ast0.unwrap p with
366 Ast0.Param(ty,Some id) ->
367 [(strip id,Ast0.ast0_type_to_type ty)]
368 | Ast0.OptParam(param) -> get_binding param
369 | _ -> [] in
370 let fenv = List.concat (List.map get_binding (Ast0.undots params)) in
371 (propagate_types (fenv@env)).VT0.combiner_rec_statement_dots body
372 | Ast0.IfThen(_,_,exp,_,_,_) | Ast0.IfThenElse(_,_,exp,_,_,_,_,_)
373 | Ast0.While(_,_,exp,_,_,_) | Ast0.Do(_,_,_,_,exp,_,_)
374 | Ast0.For(_,_,_,_,Some exp,_,_,_,_,_) ->
375 let _ = k s in
376 post_bool exp;
377 None
378 | Ast0.Switch(_,_,exp,_,_,decls,cases,_) ->
379 let senv = process_statement_list r env (Ast0.undots decls) in
380 let res =
381 (propagate_types (senv@env)).VT0.combiner_rec_case_line_dots cases in
382 post_bool exp;
383 res
384 | _ -> k s
385
386 and case_line r k c =
387 match Ast0.unwrap c with
388 Ast0.Case(case,exp,colon,code) ->
389 let _ = k c in
390 (match Ast0.get_type exp with
391 None -> Ast0.set_type exp (Some (int_type))
392 | _ -> ());
393 None
394 | _ -> k c in
395
396 V0.combiner bind option_default
397 {V0.combiner_functions with
398 VT0.combiner_dotsstmtfn = statement_dots;
399 VT0.combiner_identfn = ident;
400 VT0.combiner_exprfn = expression;
401 VT0.combiner_stmtfn = statement;
402 VT0.combiner_casefn = case_line}
403
404 let type_infer code =
405 let prop = propagate_types [(Id("NULL"),T.Pointer(T.Unknown))] in
406 let fn = prop.VT0.combiner_rec_top_level in
407 let _ = List.map fn code in
408 ()