Release coccinelle-0.2.0
[bpt/coccinelle.git] / parsing_cocci / type_infer.ml
1 (*
2 * Copyright 2005-2009, Ecole des Mines de Nantes, University of Copenhagen
3 * Yoann Padioleau, Julia Lawall, Rene Rydhof Hansen, Henrik Stuart, Gilles Muller, Nicolas Palix
4 * This file is part of Coccinelle.
5 *
6 * Coccinelle is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, according to version 2 of the License.
9 *
10 * Coccinelle is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with Coccinelle. If not, see <http://www.gnu.org/licenses/>.
17 *
18 * The authors reserve the right to distribute this or future versions of
19 * Coccinelle under other licenses.
20 *)
21
22
23 module T = Type_cocci
24 module Ast = Ast_cocci
25 module Ast0 = Ast0_cocci
26 module V0 = Visitor_ast0
27 module VT0 = Visitor_ast0_types
28
29 (* Type inference:
30 Just propagates information based on declarations. Could try to infer
31 more precise information about expression metavariables, but not sure it is
32 worth it. The most obvious goal is to distinguish between test expressions
33 that have pointer, integer, and boolean type when matching isomorphisms,
34 but perhaps other needs will become apparent. *)
35
36 (* "functions" that return a boolean value *)
37 let bool_functions = ["likely";"unlikely"]
38
39 let err wrapped ty s =
40 T.typeC ty; Format.print_newline();
41 failwith (Printf.sprintf "line %d: %s" (Ast0.get_line wrapped) s)
42
43 type id = Id of string | Meta of (string * string)
44
45 let int_type = T.BaseType(T.IntType)
46 let bool_type = T.BaseType(T.BoolType)
47 let char_type = T.BaseType(T.CharType)
48 let float_type = T.BaseType(T.FloatType)
49
50 let rec lub_type t1 t2 =
51 match (t1,t2) with
52 (None,None) -> None
53 | (None,Some t) -> t2
54 | (Some t,None) -> t1
55 | (Some t1,Some t2) ->
56 let rec loop = function
57 (T.Unknown,t2) -> t2
58 | (t1,T.Unknown) -> t1
59 | (T.ConstVol(cv1,ty1),T.ConstVol(cv2,ty2)) when cv1 = cv2 ->
60 T.ConstVol(cv1,loop(ty1,ty2))
61
62 (* pad: in pointer arithmetic, as in ptr+1, the lub must be ptr *)
63 | (T.Pointer(ty1),T.Pointer(ty2)) ->
64 T.Pointer(loop(ty1,ty2))
65 | (ty1,T.Pointer(ty2)) -> T.Pointer(ty2)
66 | (T.Pointer(ty1),ty2) -> T.Pointer(ty1)
67
68 | (T.Array(ty1),T.Array(ty2)) -> T.Array(loop(ty1,ty2))
69 | (T.TypeName(s1),t2) -> t2
70 | (t1,T.TypeName(s1)) -> t1
71 | (t1,_) -> t1 in (* arbitrarily pick the first, assume type correct *)
72 Some (loop (t1,t2))
73
74 let lub_envs envs =
75 List.fold_left
76 (function acc ->
77 function env ->
78 List.fold_left
79 (function acc ->
80 function (var,ty) ->
81 let (relevant,irrelevant) =
82 List.partition (function (x,_) -> x = var) acc in
83 match relevant with
84 [] -> (var,ty)::acc
85 | [(x,ty1)] ->
86 (match lub_type (Some ty) (Some ty1) with
87 Some new_ty -> (var,new_ty)::irrelevant
88 | None -> irrelevant)
89 | _ -> failwith "bad type environment")
90 acc env)
91 [] envs
92
93 let rec propagate_types env =
94 let option_default = None in
95 let bind x y = option_default in (* no generic way of combining types *)
96
97 let ident r k i =
98 match Ast0.unwrap i with
99 Ast0.Id(id) ->
100 (try Some(List.assoc (Id(Ast0.unwrap_mcode id)) env)
101 with Not_found -> None)
102 | Ast0.MetaId(id,_,_) ->
103 (try Some(List.assoc (Meta(Ast0.unwrap_mcode id)) env)
104 with Not_found -> None)
105 | _ -> k i in
106
107 let strip_cv = function
108 Some (T.ConstVol(_,t)) -> Some t
109 | t -> t in
110
111 (* types that might be integer types. should char be allowed? *)
112 let rec is_int_type = function
113 T.BaseType(T.IntType)
114 | T.BaseType(T.LongType)
115 | T.BaseType(T.ShortType)
116 | T.MetaType(_,_,_)
117 | T.TypeName _
118 | T.EnumName _
119 | T.SignedT(_,None) -> true
120 | T.SignedT(_,Some ty) -> is_int_type ty
121 | _ -> false in
122
123 let expression r k e =
124 let res = k e in
125 let ty =
126 match Ast0.unwrap e with
127 (* pad: the type of id is set in the ident visitor *)
128 Ast0.Ident(id) -> Ast0.set_type e res; res
129 | Ast0.Constant(const) ->
130 (match Ast0.unwrap_mcode const with
131 Ast.String(_) -> Some (T.Pointer(char_type))
132 | Ast.Char(_) -> Some (char_type)
133 | Ast.Int(_) -> Some (int_type)
134 | Ast.Float(_) -> Some (float_type))
135 (* pad: note that in C can do either ptr(...) or ( *ptr)(...)
136 * so I am not sure this code is enough.
137 *)
138 | Ast0.FunCall(fn,lp,args,rp) ->
139 (match Ast0.get_type fn with
140 Some (T.FunctionPointer(ty)) -> Some ty
141 | _ ->
142 (match Ast0.unwrap fn with
143 Ast0.Ident(id) ->
144 (match Ast0.unwrap id with
145 Ast0.Id(id) ->
146 if List.mem (Ast0.unwrap_mcode id) bool_functions
147 then Some(bool_type)
148 else None
149 | _ -> None)
150 | _ -> None))
151 | Ast0.Assignment(exp1,op,exp2,_) ->
152 let ty = lub_type (Ast0.get_type exp1) (Ast0.get_type exp2) in
153 Ast0.set_type exp1 ty; Ast0.set_type exp2 ty; ty
154 | Ast0.CondExpr(exp1,why,Some exp2,colon,exp3) ->
155 let ty = lub_type (Ast0.get_type exp2) (Ast0.get_type exp3) in
156 Ast0.set_type exp2 ty; Ast0.set_type exp3 ty; ty
157 | Ast0.CondExpr(exp1,why,None,colon,exp3) -> Ast0.get_type exp3
158 | Ast0.Postfix(exp,op) | Ast0.Infix(exp,op) -> (* op is dec or inc *)
159 Ast0.get_type exp
160 | Ast0.Unary(exp,op) ->
161 (match Ast0.unwrap_mcode op with
162 Ast.GetRef ->
163 (match Ast0.get_type exp with
164 None -> Some (T.Pointer(T.Unknown))
165 | Some t -> Some (T.Pointer(t)))
166 | Ast.DeRef ->
167 (match Ast0.get_type exp with
168 Some (T.Pointer(t)) -> Some t
169 | _ -> None)
170 | Ast.UnPlus -> Ast0.get_type exp
171 | Ast.UnMinus -> Ast0.get_type exp
172 | Ast.Tilde -> Ast0.get_type exp
173 | Ast.Not -> Some(bool_type))
174 | Ast0.Nested(exp1,op,exp2) -> failwith "nested in type inf not possible"
175 | Ast0.Binary(exp1,op,exp2) ->
176 let ty1 = Ast0.get_type exp1 in
177 let ty2 = Ast0.get_type exp2 in
178 let same_type = function
179 (None,None) -> Some (int_type)
180
181 (* pad: pointer arithmetic handling as in ptr+1 *)
182 | (Some (T.Pointer ty1),Some ty2) when is_int_type ty2 ->
183 Some (T.Pointer ty1)
184 | (Some ty1,Some (T.Pointer ty2)) when is_int_type ty1 ->
185 Some (T.Pointer ty2)
186
187 | (t1,t2) ->
188 let ty = lub_type t1 t2 in
189 Ast0.set_type exp1 ty; Ast0.set_type exp2 ty; ty in
190 (match Ast0.unwrap_mcode op with
191 Ast.Arith(op) -> same_type (ty1, ty2)
192 | Ast.Logical(op) ->
193 let ty = lub_type ty1 ty2 in
194 Ast0.set_type exp1 ty; Ast0.set_type exp2 ty;
195 Some(bool_type))
196 | Ast0.Paren(lp,exp,rp) -> Ast0.get_type exp
197 | Ast0.ArrayAccess(exp1,lb,exp2,rb) ->
198 (match strip_cv (Ast0.get_type exp2) with
199 None -> Ast0.set_type exp2 (Some(int_type))
200 | Some(ty) when is_int_type ty -> ()
201 | Some ty -> err exp2 ty "bad type for an array index");
202 (match strip_cv (Ast0.get_type exp1) with
203 None -> None
204 | Some (T.Array(ty)) -> Some ty
205 | Some (T.Pointer(ty)) -> Some ty
206 | Some (T.MetaType(_,_,_)) -> None
207 | Some x -> err exp1 x "ill-typed array reference")
208 (* pad: should handle structure one day and look 'field' in environment *)
209 | Ast0.RecordAccess(exp,pt,field) ->
210 (match strip_cv (Ast0.get_type exp) with
211 None -> None
212 | Some (T.StructUnionName(_,_,_)) -> None
213 | Some (T.TypeName(_)) -> None
214 | Some (T.MetaType(_,_,_)) -> None
215 | Some x -> err exp x "non-structure type in field ref")
216 | Ast0.RecordPtAccess(exp,ar,field) ->
217 (match strip_cv (Ast0.get_type exp) with
218 None -> None
219 | Some (T.Pointer(t)) ->
220 (match strip_cv (Some t) with
221 | Some (T.Unknown) -> None
222 | Some (T.MetaType(_,_,_)) -> None
223 | Some (T.TypeName(_)) -> None
224 | Some (T.StructUnionName(_,_,_)) -> None
225 | Some x ->
226 err exp (T.Pointer(t))
227 "non-structure pointer type in field ref"
228 | _ -> failwith "not possible")
229 | Some (T.MetaType(_,_,_)) -> None
230 | Some (T.TypeName(_)) -> None
231 | Some x -> err exp x "non-structure pointer type in field ref")
232 | Ast0.Cast(lp,ty,rp,exp) -> Some(Ast0.ast0_type_to_type ty)
233 | Ast0.SizeOfExpr(szf,exp) -> Some(int_type)
234 | Ast0.SizeOfType(szf,lp,ty,rp) -> Some(int_type)
235 | Ast0.TypeExp(ty) -> None
236 | Ast0.MetaErr(name,_,_) -> None
237 | Ast0.MetaExpr(name,_,Some [ty],_,_) -> Some ty
238 | Ast0.MetaExpr(name,_,ty,_,_) -> None
239 | Ast0.MetaExprList(name,_,_) -> None
240 | Ast0.EComma(cm) -> None
241 | Ast0.DisjExpr(_,exp_list,_,_) ->
242 let types = List.map Ast0.get_type exp_list in
243 let combined = List.fold_left lub_type None types in
244 (match combined with
245 None -> None
246 | Some t ->
247 List.iter (function e -> Ast0.set_type e (Some t)) exp_list;
248 Some t)
249 | Ast0.NestExpr(starter,expr_dots,ender,None,multi) ->
250 let _ = r.VT0.combiner_rec_expression_dots expr_dots in None
251 | Ast0.NestExpr(starter,expr_dots,ender,Some e,multi) ->
252 let _ = r.VT0.combiner_rec_expression_dots expr_dots in
253 let _ = r.VT0.combiner_rec_expression e in None
254 | Ast0.Edots(_,None) | Ast0.Ecircles(_,None) | Ast0.Estars(_,None) ->
255 None
256 | Ast0.Edots(_,Some e) | Ast0.Ecircles(_,Some e)
257 | Ast0.Estars(_,Some e) ->
258 let _ = r.VT0.combiner_rec_expression e in None
259 | Ast0.OptExp(exp) -> Ast0.get_type exp
260 | Ast0.UniqueExp(exp) -> Ast0.get_type exp in
261 Ast0.set_type e ty;
262 ty in
263
264 let rec strip id =
265 match Ast0.unwrap id with
266 Ast0.Id(name) -> Id(Ast0.unwrap_mcode name)
267 | Ast0.MetaId(name,_,_) -> Meta(Ast0.unwrap_mcode name)
268 | Ast0.MetaFunc(name,_,_) -> Meta(Ast0.unwrap_mcode name)
269 | Ast0.MetaLocalFunc(name,_,_) -> Meta(Ast0.unwrap_mcode name)
270 | Ast0.OptIdent(id) -> strip id
271 | Ast0.UniqueIdent(id) -> strip id in
272
273 let process_whencode notfn allfn exp = function
274 Ast0.WhenNot(x) -> let _ = notfn x in ()
275 | Ast0.WhenAlways(x) -> let _ = allfn x in ()
276 | Ast0.WhenModifier(_) -> ()
277 | Ast0.WhenNotTrue(x) -> let _ = exp x in ()
278 | Ast0.WhenNotFalse(x) -> let _ = exp x in () in
279
280 (* assume that all of the declarations are at the beginning of a statement
281 list, which is required by C, but not actually required by the cocci
282 parser *)
283 let rec process_statement_list r acc = function
284 [] -> acc
285 | (s::ss) ->
286 (match Ast0.unwrap s with
287 Ast0.Decl(_,decl) ->
288 let new_acc = (process_decl acc decl)@acc in
289 process_statement_list r new_acc ss
290 | Ast0.Dots(_,wc) ->
291 (* why is this case here? why is there none for nests? *)
292 List.iter
293 (process_whencode r.VT0.combiner_rec_statement_dots
294 r.VT0.combiner_rec_statement r.VT0.combiner_rec_expression)
295 wc;
296 process_statement_list r acc ss
297 | Ast0.Disj(_,statement_dots_list,_,_) ->
298 let new_acc =
299 lub_envs
300 (List.map
301 (function x -> process_statement_list r acc (Ast0.undots x))
302 statement_dots_list) in
303 process_statement_list r new_acc ss
304 | _ ->
305 let _ = (propagate_types acc).VT0.combiner_rec_statement s in
306 process_statement_list r acc ss)
307
308 and process_decl env decl =
309 match Ast0.unwrap decl with
310 Ast0.Init(_,ty,id,_,exp,_) ->
311 let _ =
312 (propagate_types env).VT0.combiner_rec_initialiser exp in
313 [(strip id,Ast0.ast0_type_to_type ty)]
314 | Ast0.UnInit(_,ty,id,_) ->
315 [(strip id,Ast0.ast0_type_to_type ty)]
316 | Ast0.MacroDecl(_,_,_,_,_) -> []
317 | Ast0.TyDecl(ty,_) -> []
318 (* pad: should handle typedef one day and add a binding *)
319 | Ast0.Typedef(_,_,_,_) -> []
320 | Ast0.DisjDecl(_,disjs,_,_) ->
321 List.concat(List.map (process_decl env) disjs)
322 | Ast0.Ddots(_,_) -> [] (* not in a statement list anyway *)
323 | Ast0.OptDecl(decl) -> process_decl env decl
324 | Ast0.UniqueDecl(decl) -> process_decl env decl in
325
326 let statement_dots r k d =
327 match Ast0.unwrap d with
328 Ast0.DOTS(l) | Ast0.CIRCLES(l) | Ast0.STARS(l) ->
329 let _ = process_statement_list r env l in option_default in
330
331 let post_bool exp =
332 let rec process_test exp =
333 match (Ast0.unwrap exp,Ast0.get_type exp) with
334 (Ast0.Edots(_,_),_) -> None
335 | (Ast0.NestExpr(_,_,_,_,_),_) -> None
336 | (Ast0.MetaExpr(_,_,_,_,_),_) ->
337 (* if a type is known, it is specified in the decl *)
338 None
339 | (Ast0.Paren(lp,exp,rp),None) -> process_test exp
340 | (_,None) -> Some (int_type)
341 | _ -> None in
342 let new_expty = process_test exp in
343 (match new_expty with
344 None -> () (* leave things as they are *)
345 | Some ty -> Ast0.set_type exp new_expty) in
346
347 let statement r k s =
348 match Ast0.unwrap s with
349 Ast0.FunDecl(_,fninfo,name,lp,params,rp,lbrace,body,rbrace) ->
350 let rec get_binding p =
351 match Ast0.unwrap p with
352 Ast0.Param(ty,Some id) ->
353 [(strip id,Ast0.ast0_type_to_type ty)]
354 | Ast0.OptParam(param) -> get_binding param
355 | _ -> [] in
356 let fenv = List.concat (List.map get_binding (Ast0.undots params)) in
357 (propagate_types (fenv@env)).VT0.combiner_rec_statement_dots body
358 | Ast0.IfThen(_,_,exp,_,_,_) | Ast0.IfThenElse(_,_,exp,_,_,_,_,_)
359 | Ast0.While(_,_,exp,_,_,_) | Ast0.Do(_,_,_,_,exp,_,_)
360 | Ast0.For(_,_,_,_,Some exp,_,_,_,_,_) ->
361 let _ = k s in
362 post_bool exp;
363 None
364 | Ast0.Switch(_,_,exp,_,_,decls,cases,_) ->
365 let senv = process_statement_list r env (Ast0.undots decls) in
366 let res =
367 (propagate_types (senv@env)).VT0.combiner_rec_case_line_dots cases in
368 post_bool exp;
369 res
370 | _ -> k s
371
372 and case_line r k c =
373 match Ast0.unwrap c with
374 Ast0.Case(case,exp,colon,code) ->
375 let _ = k c in
376 (match Ast0.get_type exp with
377 None -> Ast0.set_type exp (Some (int_type))
378 | _ -> ());
379 None
380 | _ -> k c in
381
382 V0.combiner bind option_default
383 {V0.combiner_functions with
384 VT0.combiner_dotsstmtfn = statement_dots;
385 VT0.combiner_identfn = ident;
386 VT0.combiner_exprfn = expression;
387 VT0.combiner_stmtfn = statement;
388 VT0.combiner_casefn = case_line}
389
390 let type_infer code =
391 let prop = propagate_types [(Id("NULL"),T.Pointer(T.Unknown))] in
392 let fn = prop.VT0.combiner_rec_top_level in
393 let _ = List.map fn code in
394 ()