Release coccinelle-0.2.2-rc2
[bpt/coccinelle.git] / parsing_cocci / type_infer.ml
1 (*
2 * Copyright 2005-2010, Ecole des Mines de Nantes, University of Copenhagen
3 * Yoann Padioleau, Julia Lawall, Rene Rydhof Hansen, Henrik Stuart, Gilles Muller, Nicolas Palix
4 * This file is part of Coccinelle.
5 *
6 * Coccinelle is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, according to version 2 of the License.
9 *
10 * Coccinelle is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with Coccinelle. If not, see <http://www.gnu.org/licenses/>.
17 *
18 * The authors reserve the right to distribute this or future versions of
19 * Coccinelle under other licenses.
20 *)
21
22
23 module T = Type_cocci
24 module Ast = Ast_cocci
25 module Ast0 = Ast0_cocci
26 module V0 = Visitor_ast0
27 module VT0 = Visitor_ast0_types
28
29 (* Type inference:
30 Just propagates information based on declarations. Could try to infer
31 more precise information about expression metavariables, but not sure it is
32 worth it. The most obvious goal is to distinguish between test expressions
33 that have pointer, integer, and boolean type when matching isomorphisms,
34 but perhaps other needs will become apparent. *)
35
36 (* "functions" that return a boolean value *)
37 let bool_functions = ["likely";"unlikely"]
38
39 let err wrapped ty s =
40 T.typeC ty; Format.print_newline();
41 failwith (Printf.sprintf "line %d: %s" (Ast0.get_line wrapped) s)
42
43 type id = Id of string | Meta of Ast.meta_name
44
45 let int_type = T.BaseType(T.IntType)
46 let bool_type = T.BaseType(T.BoolType)
47 let char_type = T.BaseType(T.CharType)
48 let float_type = T.BaseType(T.FloatType)
49
50 let rec lub_type t1 t2 =
51 match (t1,t2) with
52 (None,None) -> None
53 | (None,Some t) -> t2
54 | (Some t,None) -> t1
55 | (Some t1,Some t2) ->
56 let rec loop = function
57 (T.Unknown,t2) -> t2
58 | (t1,T.Unknown) -> t1
59 | (T.ConstVol(cv1,ty1),T.ConstVol(cv2,ty2)) when cv1 = cv2 ->
60 T.ConstVol(cv1,loop(ty1,ty2))
61
62 (* pad: in pointer arithmetic, as in ptr+1, the lub must be ptr *)
63 | (T.Pointer(ty1),T.Pointer(ty2)) ->
64 T.Pointer(loop(ty1,ty2))
65 | (ty1,T.Pointer(ty2)) -> T.Pointer(ty2)
66 | (T.Pointer(ty1),ty2) -> T.Pointer(ty1)
67
68 | (T.Array(ty1),T.Array(ty2)) -> T.Array(loop(ty1,ty2))
69 | (T.TypeName(s1),t2) -> t2
70 | (t1,T.TypeName(s1)) -> t1
71 | (t1,_) -> t1 in (* arbitrarily pick the first, assume type correct *)
72 Some (loop (t1,t2))
73
74 let lub_envs envs =
75 List.fold_left
76 (function acc ->
77 function env ->
78 List.fold_left
79 (function acc ->
80 function (var,ty) ->
81 let (relevant,irrelevant) =
82 List.partition (function (x,_) -> x = var) acc in
83 match relevant with
84 [] -> (var,ty)::acc
85 | [(x,ty1)] ->
86 (match lub_type (Some ty) (Some ty1) with
87 Some new_ty -> (var,new_ty)::irrelevant
88 | None -> irrelevant)
89 | _ -> failwith "bad type environment")
90 acc env)
91 [] envs
92
93 let rec propagate_types env =
94 let option_default = None in
95 let bind x y = option_default in (* no generic way of combining types *)
96
97 let ident r k i =
98 match Ast0.unwrap i with
99 Ast0.Id(id) ->
100 (try Some(List.assoc (Id(Ast0.unwrap_mcode id)) env)
101 with Not_found -> None)
102 | Ast0.MetaId(id,_,_) ->
103 (try Some(List.assoc (Meta(Ast0.unwrap_mcode id)) env)
104 with Not_found -> None)
105 | _ -> k i in
106
107 let strip_cv = function
108 Some (T.ConstVol(_,t)) -> Some t
109 | t -> t in
110
111 (* types that might be integer types. should char be allowed? *)
112 let rec is_int_type = function
113 T.BaseType(T.IntType)
114 | T.BaseType(T.LongType)
115 | T.BaseType(T.ShortType)
116 | T.MetaType(_,_,_)
117 | T.TypeName _
118 | T.EnumName _
119 | T.SignedT(_,None) -> true
120 | T.SignedT(_,Some ty) -> is_int_type ty
121 | _ -> false in
122
123 let expression r k e =
124 let res = k e in
125 let ty =
126 match Ast0.unwrap e with
127 (* pad: the type of id is set in the ident visitor *)
128 Ast0.Ident(id) -> Ast0.set_type e res; res
129 | Ast0.Constant(const) ->
130 (match Ast0.unwrap_mcode const with
131 Ast.String(_) -> Some (T.Pointer(char_type))
132 | Ast.Char(_) -> Some (char_type)
133 | Ast.Int(_) -> Some (int_type)
134 | Ast.Float(_) -> Some (float_type))
135 (* pad: note that in C can do either ptr(...) or ( *ptr)(...)
136 * so I am not sure this code is enough.
137 *)
138 | Ast0.FunCall(fn,lp,args,rp) ->
139 (match Ast0.get_type fn with
140 Some (T.FunctionPointer(ty)) -> Some ty
141 | _ ->
142 (match Ast0.unwrap fn with
143 Ast0.Ident(id) ->
144 (match Ast0.unwrap id with
145 Ast0.Id(id) ->
146 if List.mem (Ast0.unwrap_mcode id) bool_functions
147 then Some(bool_type)
148 else None
149 | _ -> None)
150 | _ -> None))
151 | Ast0.Assignment(exp1,op,exp2,_) ->
152 let ty = lub_type (Ast0.get_type exp1) (Ast0.get_type exp2) in
153 Ast0.set_type exp1 ty; Ast0.set_type exp2 ty; ty
154 | Ast0.CondExpr(exp1,why,Some exp2,colon,exp3) ->
155 let ty = lub_type (Ast0.get_type exp2) (Ast0.get_type exp3) in
156 Ast0.set_type exp2 ty; Ast0.set_type exp3 ty; ty
157 | Ast0.CondExpr(exp1,why,None,colon,exp3) -> Ast0.get_type exp3
158 | Ast0.Postfix(exp,op) | Ast0.Infix(exp,op) -> (* op is dec or inc *)
159 Ast0.get_type exp
160 | Ast0.Unary(exp,op) ->
161 (match Ast0.unwrap_mcode op with
162 Ast.GetRef ->
163 (match Ast0.get_type exp with
164 None -> Some (T.Pointer(T.Unknown))
165 | Some t -> Some (T.Pointer(t)))
166 | Ast.DeRef ->
167 (match Ast0.get_type exp with
168 Some (T.Pointer(t)) -> Some t
169 | _ -> None)
170 | Ast.UnPlus -> Ast0.get_type exp
171 | Ast.UnMinus -> Ast0.get_type exp
172 | Ast.Tilde -> Ast0.get_type exp
173 | Ast.Not -> Some(bool_type))
174 | Ast0.Nested(exp1,op,exp2) -> failwith "nested in type inf not possible"
175 | Ast0.Binary(exp1,op,exp2) ->
176 let ty1 = Ast0.get_type exp1 in
177 let ty2 = Ast0.get_type exp2 in
178 let same_type = function
179 (None,None) -> Some (int_type)
180
181 (* pad: pointer arithmetic handling as in ptr+1 *)
182 | (Some (T.Pointer ty1),Some ty2) when is_int_type ty2 ->
183 Some (T.Pointer ty1)
184 | (Some ty1,Some (T.Pointer ty2)) when is_int_type ty1 ->
185 Some (T.Pointer ty2)
186
187 | (t1,t2) ->
188 let ty = lub_type t1 t2 in
189 Ast0.set_type exp1 ty; Ast0.set_type exp2 ty; ty in
190 (match Ast0.unwrap_mcode op with
191 Ast.Arith(op) -> same_type (ty1, ty2)
192 | Ast.Logical(Ast.AndLog) | Ast.Logical(Ast.OrLog) ->
193 Some(bool_type)
194 | Ast.Logical(op) ->
195 let ty = lub_type ty1 ty2 in
196 Ast0.set_type exp1 ty; Ast0.set_type exp2 ty;
197 Some(bool_type))
198 | Ast0.Paren(lp,exp,rp) -> Ast0.get_type exp
199 | Ast0.ArrayAccess(exp1,lb,exp2,rb) ->
200 (match strip_cv (Ast0.get_type exp2) with
201 None -> Ast0.set_type exp2 (Some(int_type))
202 | Some(ty) when is_int_type ty -> ()
203 | Some(Type_cocci.Unknown) ->
204 (* unknown comes from param types, not sure why this
205 is not just None... *)
206 Ast0.set_type exp2 (Some(int_type))
207 | Some ty -> err exp2 ty "bad type for an array index");
208 (match strip_cv (Ast0.get_type exp1) with
209 None -> None
210 | Some (T.Array(ty)) -> Some ty
211 | Some (T.Pointer(ty)) -> Some ty
212 | Some (T.MetaType(_,_,_)) -> None
213 | Some x -> err exp1 x "ill-typed array reference")
214 (* pad: should handle structure one day and look 'field' in environment *)
215 | Ast0.RecordAccess(exp,pt,field) ->
216 (match strip_cv (Ast0.get_type exp) with
217 None -> None
218 | Some (T.StructUnionName(_,_,_)) -> None
219 | Some (T.TypeName(_)) -> None
220 | Some (T.MetaType(_,_,_)) -> None
221 | Some x -> err exp x "non-structure type in field ref")
222 | Ast0.RecordPtAccess(exp,ar,field) ->
223 (match strip_cv (Ast0.get_type exp) with
224 None -> None
225 | Some (T.Pointer(t)) ->
226 (match strip_cv (Some t) with
227 | Some (T.Unknown) -> None
228 | Some (T.MetaType(_,_,_)) -> None
229 | Some (T.TypeName(_)) -> None
230 | Some (T.StructUnionName(_,_,_)) -> None
231 | Some x ->
232 err exp (T.Pointer(t))
233 "non-structure pointer type in field ref"
234 | _ -> failwith "not possible")
235 | Some (T.MetaType(_,_,_)) -> None
236 | Some (T.TypeName(_)) -> None
237 | Some x -> err exp x "non-structure pointer type in field ref")
238 | Ast0.Cast(lp,ty,rp,exp) -> Some(Ast0.ast0_type_to_type ty)
239 | Ast0.SizeOfExpr(szf,exp) -> Some(int_type)
240 | Ast0.SizeOfType(szf,lp,ty,rp) -> Some(int_type)
241 | Ast0.TypeExp(ty) -> None
242 | Ast0.MetaErr(name,_,_) -> None
243 | Ast0.MetaExpr(name,_,Some [ty],_,_) -> Some ty
244 | Ast0.MetaExpr(name,_,ty,_,_) -> None
245 | Ast0.MetaExprList(name,_,_) -> None
246 | Ast0.EComma(cm) -> None
247 | Ast0.DisjExpr(_,exp_list,_,_) ->
248 let types = List.map Ast0.get_type exp_list in
249 let combined = List.fold_left lub_type None types in
250 (match combined with
251 None -> None
252 | Some t ->
253 List.iter (function e -> Ast0.set_type e (Some t)) exp_list;
254 Some t)
255 | Ast0.NestExpr(starter,expr_dots,ender,None,multi) ->
256 let _ = r.VT0.combiner_rec_expression_dots expr_dots in None
257 | Ast0.NestExpr(starter,expr_dots,ender,Some e,multi) ->
258 let _ = r.VT0.combiner_rec_expression_dots expr_dots in
259 let _ = r.VT0.combiner_rec_expression e in None
260 | Ast0.Edots(_,None) | Ast0.Ecircles(_,None) | Ast0.Estars(_,None) ->
261 None
262 | Ast0.Edots(_,Some e) | Ast0.Ecircles(_,Some e)
263 | Ast0.Estars(_,Some e) ->
264 let _ = r.VT0.combiner_rec_expression e in None
265 | Ast0.OptExp(exp) -> Ast0.get_type exp
266 | Ast0.UniqueExp(exp) -> Ast0.get_type exp in
267 Ast0.set_type e ty;
268 ty in
269
270 let rec strip id =
271 match Ast0.unwrap id with
272 Ast0.Id(name) -> Id(Ast0.unwrap_mcode name)
273 | Ast0.MetaId(name,_,_) -> Meta(Ast0.unwrap_mcode name)
274 | Ast0.MetaFunc(name,_,_) -> Meta(Ast0.unwrap_mcode name)
275 | Ast0.MetaLocalFunc(name,_,_) -> Meta(Ast0.unwrap_mcode name)
276 | Ast0.OptIdent(id) -> strip id
277 | Ast0.UniqueIdent(id) -> strip id in
278
279 let process_whencode notfn allfn exp = function
280 Ast0.WhenNot(x) -> let _ = notfn x in ()
281 | Ast0.WhenAlways(x) -> let _ = allfn x in ()
282 | Ast0.WhenModifier(_) -> ()
283 | Ast0.WhenNotTrue(x) -> let _ = exp x in ()
284 | Ast0.WhenNotFalse(x) -> let _ = exp x in () in
285
286 (* assume that all of the declarations are at the beginning of a statement
287 list, which is required by C, but not actually required by the cocci
288 parser *)
289 let rec process_statement_list r acc = function
290 [] -> acc
291 | (s::ss) ->
292 (match Ast0.unwrap s with
293 Ast0.Decl(_,decl) ->
294 let new_acc = (process_decl acc decl)@acc in
295 process_statement_list r new_acc ss
296 | Ast0.Dots(_,wc) ->
297 (* why is this case here? why is there none for nests? *)
298 List.iter
299 (process_whencode r.VT0.combiner_rec_statement_dots
300 r.VT0.combiner_rec_statement r.VT0.combiner_rec_expression)
301 wc;
302 process_statement_list r acc ss
303 | Ast0.Disj(_,statement_dots_list,_,_) ->
304 let new_acc =
305 lub_envs
306 (List.map
307 (function x -> process_statement_list r acc (Ast0.undots x))
308 statement_dots_list) in
309 process_statement_list r new_acc ss
310 | _ ->
311 let _ = (propagate_types acc).VT0.combiner_rec_statement s in
312 process_statement_list r acc ss)
313
314 and process_decl env decl =
315 match Ast0.unwrap decl with
316 Ast0.Init(_,ty,id,_,exp,_) ->
317 let _ =
318 (propagate_types env).VT0.combiner_rec_initialiser exp in
319 [(strip id,Ast0.ast0_type_to_type ty)]
320 | Ast0.UnInit(_,ty,id,_) ->
321 [(strip id,Ast0.ast0_type_to_type ty)]
322 | Ast0.MacroDecl(_,_,_,_,_) -> []
323 | Ast0.TyDecl(ty,_) -> []
324 (* pad: should handle typedef one day and add a binding *)
325 | Ast0.Typedef(_,_,_,_) -> []
326 | Ast0.DisjDecl(_,disjs,_,_) ->
327 List.concat(List.map (process_decl env) disjs)
328 | Ast0.Ddots(_,_) -> [] (* not in a statement list anyway *)
329 | Ast0.OptDecl(decl) -> process_decl env decl
330 | Ast0.UniqueDecl(decl) -> process_decl env decl in
331
332 let statement_dots r k d =
333 match Ast0.unwrap d with
334 Ast0.DOTS(l) | Ast0.CIRCLES(l) | Ast0.STARS(l) ->
335 let _ = process_statement_list r env l in option_default in
336
337 let post_bool exp =
338 let rec process_test exp =
339 match (Ast0.unwrap exp,Ast0.get_type exp) with
340 (Ast0.Edots(_,_),_) -> None
341 | (Ast0.NestExpr(_,_,_,_,_),_) -> None
342 | (Ast0.MetaExpr(_,_,_,_,_),_) ->
343 (* if a type is known, it is specified in the decl *)
344 None
345 | (Ast0.Paren(lp,exp,rp),None) -> process_test exp
346 (* the following doesn't seem like a good idea - triggers int isos
347 on all test expressions *)
348 (*| (_,None) -> Some (int_type) *)
349 | _ -> None in
350 let new_expty = process_test exp in
351 (match new_expty with
352 None -> () (* leave things as they are *)
353 | Some ty -> Ast0.set_type exp new_expty) in
354
355 let statement r k s =
356 match Ast0.unwrap s with
357 Ast0.FunDecl(_,fninfo,name,lp,params,rp,lbrace,body,rbrace) ->
358 let rec get_binding p =
359 match Ast0.unwrap p with
360 Ast0.Param(ty,Some id) ->
361 [(strip id,Ast0.ast0_type_to_type ty)]
362 | Ast0.OptParam(param) -> get_binding param
363 | _ -> [] in
364 let fenv = List.concat (List.map get_binding (Ast0.undots params)) in
365 (propagate_types (fenv@env)).VT0.combiner_rec_statement_dots body
366 | Ast0.IfThen(_,_,exp,_,_,_) | Ast0.IfThenElse(_,_,exp,_,_,_,_,_)
367 | Ast0.While(_,_,exp,_,_,_) | Ast0.Do(_,_,_,_,exp,_,_)
368 | Ast0.For(_,_,_,_,Some exp,_,_,_,_,_) ->
369 let _ = k s in
370 post_bool exp;
371 None
372 | Ast0.Switch(_,_,exp,_,_,decls,cases,_) ->
373 let senv = process_statement_list r env (Ast0.undots decls) in
374 let res =
375 (propagate_types (senv@env)).VT0.combiner_rec_case_line_dots cases in
376 post_bool exp;
377 res
378 | _ -> k s
379
380 and case_line r k c =
381 match Ast0.unwrap c with
382 Ast0.Case(case,exp,colon,code) ->
383 let _ = k c in
384 (match Ast0.get_type exp with
385 None -> Ast0.set_type exp (Some (int_type))
386 | _ -> ());
387 None
388 | _ -> k c in
389
390 V0.combiner bind option_default
391 {V0.combiner_functions with
392 VT0.combiner_dotsstmtfn = statement_dots;
393 VT0.combiner_identfn = ident;
394 VT0.combiner_exprfn = expression;
395 VT0.combiner_stmtfn = statement;
396 VT0.combiner_casefn = case_line}
397
398 let type_infer code =
399 let prop = propagate_types [(Id("NULL"),T.Pointer(T.Unknown))] in
400 let fn = prop.VT0.combiner_rec_top_level in
401 let _ = List.map fn code in
402 ()