Release coccinelle-0.2.3rc1
[bpt/coccinelle.git] / parsing_cocci / type_infer.ml
1 (*
2 * Copyright 2005-2010, Ecole des Mines de Nantes, University of Copenhagen
3 * Yoann Padioleau, Julia Lawall, Rene Rydhof Hansen, Henrik Stuart, Gilles Muller, Nicolas Palix
4 * This file is part of Coccinelle.
5 *
6 * Coccinelle is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, according to version 2 of the License.
9 *
10 * Coccinelle is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with Coccinelle. If not, see <http://www.gnu.org/licenses/>.
17 *
18 * The authors reserve the right to distribute this or future versions of
19 * Coccinelle under other licenses.
20 *)
21
22
23 (*
24 * Copyright 2005-2010, Ecole des Mines de Nantes, University of Copenhagen
25 * Yoann Padioleau, Julia Lawall, Rene Rydhof Hansen, Henrik Stuart, Gilles Muller, Nicolas Palix
26 * This file is part of Coccinelle.
27 *
28 * Coccinelle is free software: you can redistribute it and/or modify
29 * it under the terms of the GNU General Public License as published by
30 * the Free Software Foundation, according to version 2 of the License.
31 *
32 * Coccinelle is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with Coccinelle. If not, see <http://www.gnu.org/licenses/>.
39 *
40 * The authors reserve the right to distribute this or future versions of
41 * Coccinelle under other licenses.
42 *)
43
44
45 module T = Type_cocci
46 module Ast = Ast_cocci
47 module Ast0 = Ast0_cocci
48 module V0 = Visitor_ast0
49 module VT0 = Visitor_ast0_types
50
51 (* Type inference:
52 Just propagates information based on declarations. Could try to infer
53 more precise information about expression metavariables, but not sure it is
54 worth it. The most obvious goal is to distinguish between test expressions
55 that have pointer, integer, and boolean type when matching isomorphisms,
56 but perhaps other needs will become apparent. *)
57
58 (* "functions" that return a boolean value *)
59 let bool_functions = ["likely";"unlikely"]
60
61 let err wrapped ty s =
62 T.typeC ty; Format.print_newline();
63 failwith (Printf.sprintf "line %d: %s" (Ast0.get_line wrapped) s)
64
65 type id = Id of string | Meta of Ast.meta_name
66
67 let int_type = T.BaseType(T.IntType)
68 let bool_type = T.BaseType(T.BoolType)
69 let char_type = T.BaseType(T.CharType)
70 let float_type = T.BaseType(T.FloatType)
71
72 let rec lub_type t1 t2 =
73 match (t1,t2) with
74 (None,None) -> None
75 | (None,Some t) -> t2
76 | (Some t,None) -> t1
77 | (Some t1,Some t2) ->
78 let rec loop = function
79 (T.Unknown,t2) -> t2
80 | (t1,T.Unknown) -> t1
81 | (T.ConstVol(cv1,ty1),T.ConstVol(cv2,ty2)) when cv1 = cv2 ->
82 T.ConstVol(cv1,loop(ty1,ty2))
83
84 (* pad: in pointer arithmetic, as in ptr+1, the lub must be ptr *)
85 | (T.Pointer(ty1),T.Pointer(ty2)) ->
86 T.Pointer(loop(ty1,ty2))
87 | (ty1,T.Pointer(ty2)) -> T.Pointer(ty2)
88 | (T.Pointer(ty1),ty2) -> T.Pointer(ty1)
89
90 | (T.Array(ty1),T.Array(ty2)) -> T.Array(loop(ty1,ty2))
91 | (T.TypeName(s1),t2) -> t2
92 | (t1,T.TypeName(s1)) -> t1
93 | (t1,_) -> t1 in (* arbitrarily pick the first, assume type correct *)
94 Some (loop (t1,t2))
95
96 let lub_envs envs =
97 List.fold_left
98 (function acc ->
99 function env ->
100 List.fold_left
101 (function acc ->
102 function (var,ty) ->
103 let (relevant,irrelevant) =
104 List.partition (function (x,_) -> x = var) acc in
105 match relevant with
106 [] -> (var,ty)::acc
107 | [(x,ty1)] ->
108 (match lub_type (Some ty) (Some ty1) with
109 Some new_ty -> (var,new_ty)::irrelevant
110 | None -> irrelevant)
111 | _ -> failwith "bad type environment")
112 acc env)
113 [] envs
114
115 let rec propagate_types env =
116 let option_default = None in
117 let bind x y = option_default in (* no generic way of combining types *)
118
119 let ident r k i =
120 match Ast0.unwrap i with
121 Ast0.Id(id) ->
122 (try Some(List.assoc (Id(Ast0.unwrap_mcode id)) env)
123 with Not_found -> None)
124 | Ast0.MetaId(id,_,_) ->
125 (try Some(List.assoc (Meta(Ast0.unwrap_mcode id)) env)
126 with Not_found -> None)
127 | _ -> k i in
128
129 let strip_cv = function
130 Some (T.ConstVol(_,t)) -> Some t
131 | t -> t in
132
133 (* types that might be integer types. should char be allowed? *)
134 let rec is_int_type = function
135 T.BaseType(T.IntType)
136 | T.BaseType(T.LongType)
137 | T.BaseType(T.ShortType)
138 | T.MetaType(_,_,_)
139 | T.TypeName _
140 | T.EnumName _
141 | T.SignedT(_,None) -> true
142 | T.SignedT(_,Some ty) -> is_int_type ty
143 | _ -> false in
144
145 let expression r k e =
146 let res = k e in
147 let ty =
148 match Ast0.unwrap e with
149 (* pad: the type of id is set in the ident visitor *)
150 Ast0.Ident(id) -> Ast0.set_type e res; res
151 | Ast0.Constant(const) ->
152 (match Ast0.unwrap_mcode const with
153 Ast.String(_) -> Some (T.Pointer(char_type))
154 | Ast.Char(_) -> Some (char_type)
155 | Ast.Int(_) -> Some (int_type)
156 | Ast.Float(_) -> Some (float_type))
157 (* pad: note that in C can do either ptr(...) or ( *ptr)(...)
158 * so I am not sure this code is enough.
159 *)
160 | Ast0.FunCall(fn,lp,args,rp) ->
161 (match Ast0.get_type fn with
162 Some (T.FunctionPointer(ty)) -> Some ty
163 | _ ->
164 (match Ast0.unwrap fn with
165 Ast0.Ident(id) ->
166 (match Ast0.unwrap id with
167 Ast0.Id(id) ->
168 if List.mem (Ast0.unwrap_mcode id) bool_functions
169 then Some(bool_type)
170 else None
171 | _ -> None)
172 | _ -> None))
173 | Ast0.Assignment(exp1,op,exp2,_) ->
174 let ty = lub_type (Ast0.get_type exp1) (Ast0.get_type exp2) in
175 Ast0.set_type exp1 ty; Ast0.set_type exp2 ty; ty
176 | Ast0.CondExpr(exp1,why,Some exp2,colon,exp3) ->
177 let ty = lub_type (Ast0.get_type exp2) (Ast0.get_type exp3) in
178 Ast0.set_type exp2 ty; Ast0.set_type exp3 ty; ty
179 | Ast0.CondExpr(exp1,why,None,colon,exp3) -> Ast0.get_type exp3
180 | Ast0.Postfix(exp,op) | Ast0.Infix(exp,op) -> (* op is dec or inc *)
181 Ast0.get_type exp
182 | Ast0.Unary(exp,op) ->
183 (match Ast0.unwrap_mcode op with
184 Ast.GetRef ->
185 (match Ast0.get_type exp with
186 None -> Some (T.Pointer(T.Unknown))
187 | Some t -> Some (T.Pointer(t)))
188 | Ast.DeRef ->
189 (match Ast0.get_type exp with
190 Some (T.Pointer(t)) -> Some t
191 | _ -> None)
192 | Ast.UnPlus -> Ast0.get_type exp
193 | Ast.UnMinus -> Ast0.get_type exp
194 | Ast.Tilde -> Ast0.get_type exp
195 | Ast.Not -> Some(bool_type))
196 | Ast0.Nested(exp1,op,exp2) -> failwith "nested in type inf not possible"
197 | Ast0.Binary(exp1,op,exp2) ->
198 let ty1 = Ast0.get_type exp1 in
199 let ty2 = Ast0.get_type exp2 in
200 let same_type = function
201 (None,None) -> Some (int_type)
202
203 (* pad: pointer arithmetic handling as in ptr+1 *)
204 | (Some (T.Pointer ty1),Some ty2) when is_int_type ty2 ->
205 Some (T.Pointer ty1)
206 | (Some ty1,Some (T.Pointer ty2)) when is_int_type ty1 ->
207 Some (T.Pointer ty2)
208
209 | (t1,t2) ->
210 let ty = lub_type t1 t2 in
211 Ast0.set_type exp1 ty; Ast0.set_type exp2 ty; ty in
212 (match Ast0.unwrap_mcode op with
213 Ast.Arith(op) -> same_type (ty1, ty2)
214 | Ast.Logical(Ast.AndLog) | Ast.Logical(Ast.OrLog) ->
215 Some(bool_type)
216 | Ast.Logical(op) ->
217 let ty = lub_type ty1 ty2 in
218 Ast0.set_type exp1 ty; Ast0.set_type exp2 ty;
219 Some(bool_type))
220 | Ast0.Paren(lp,exp,rp) -> Ast0.get_type exp
221 | Ast0.ArrayAccess(exp1,lb,exp2,rb) ->
222 (match strip_cv (Ast0.get_type exp2) with
223 None -> Ast0.set_type exp2 (Some(int_type))
224 | Some(ty) when is_int_type ty -> ()
225 | Some(Type_cocci.Unknown) ->
226 (* unknown comes from param types, not sure why this
227 is not just None... *)
228 Ast0.set_type exp2 (Some(int_type))
229 | Some ty -> err exp2 ty "bad type for an array index");
230 (match strip_cv (Ast0.get_type exp1) with
231 None -> None
232 | Some (T.Array(ty)) -> Some ty
233 | Some (T.Pointer(ty)) -> Some ty
234 | Some (T.MetaType(_,_,_)) -> None
235 | Some x -> err exp1 x "ill-typed array reference")
236 (* pad: should handle structure one day and look 'field' in environment *)
237 | Ast0.RecordAccess(exp,pt,field) ->
238 (match strip_cv (Ast0.get_type exp) with
239 None -> None
240 | Some (T.StructUnionName(_,_,_)) -> None
241 | Some (T.TypeName(_)) -> None
242 | Some (T.MetaType(_,_,_)) -> None
243 | Some x -> err exp x "non-structure type in field ref")
244 | Ast0.RecordPtAccess(exp,ar,field) ->
245 (match strip_cv (Ast0.get_type exp) with
246 None -> None
247 | Some (T.Pointer(t)) ->
248 (match strip_cv (Some t) with
249 | Some (T.Unknown) -> None
250 | Some (T.MetaType(_,_,_)) -> None
251 | Some (T.TypeName(_)) -> None
252 | Some (T.StructUnionName(_,_,_)) -> None
253 | Some x ->
254 err exp (T.Pointer(t))
255 "non-structure pointer type in field ref"
256 | _ -> failwith "not possible")
257 | Some (T.MetaType(_,_,_)) -> None
258 | Some (T.TypeName(_)) -> None
259 | Some x -> err exp x "non-structure pointer type in field ref")
260 | Ast0.Cast(lp,ty,rp,exp) -> Some(Ast0.ast0_type_to_type ty)
261 | Ast0.SizeOfExpr(szf,exp) -> Some(int_type)
262 | Ast0.SizeOfType(szf,lp,ty,rp) -> Some(int_type)
263 | Ast0.TypeExp(ty) -> None
264 | Ast0.MetaErr(name,_,_) -> None
265 | Ast0.MetaExpr(name,_,Some [ty],_,_) -> Some ty
266 | Ast0.MetaExpr(name,_,ty,_,_) -> None
267 | Ast0.MetaExprList(name,_,_) -> None
268 | Ast0.EComma(cm) -> None
269 | Ast0.DisjExpr(_,exp_list,_,_) ->
270 let types = List.map Ast0.get_type exp_list in
271 let combined = List.fold_left lub_type None types in
272 (match combined with
273 None -> None
274 | Some t ->
275 List.iter (function e -> Ast0.set_type e (Some t)) exp_list;
276 Some t)
277 | Ast0.NestExpr(starter,expr_dots,ender,None,multi) ->
278 let _ = r.VT0.combiner_rec_expression_dots expr_dots in None
279 | Ast0.NestExpr(starter,expr_dots,ender,Some e,multi) ->
280 let _ = r.VT0.combiner_rec_expression_dots expr_dots in
281 let _ = r.VT0.combiner_rec_expression e in None
282 | Ast0.Edots(_,None) | Ast0.Ecircles(_,None) | Ast0.Estars(_,None) ->
283 None
284 | Ast0.Edots(_,Some e) | Ast0.Ecircles(_,Some e)
285 | Ast0.Estars(_,Some e) ->
286 let _ = r.VT0.combiner_rec_expression e in None
287 | Ast0.OptExp(exp) -> Ast0.get_type exp
288 | Ast0.UniqueExp(exp) -> Ast0.get_type exp in
289 Ast0.set_type e ty;
290 ty in
291
292 let rec strip id =
293 match Ast0.unwrap id with
294 Ast0.Id(name) -> Id(Ast0.unwrap_mcode name)
295 | Ast0.MetaId(name,_,_) -> Meta(Ast0.unwrap_mcode name)
296 | Ast0.MetaFunc(name,_,_) -> Meta(Ast0.unwrap_mcode name)
297 | Ast0.MetaLocalFunc(name,_,_) -> Meta(Ast0.unwrap_mcode name)
298 | Ast0.OptIdent(id) -> strip id
299 | Ast0.UniqueIdent(id) -> strip id in
300
301 let process_whencode notfn allfn exp = function
302 Ast0.WhenNot(x) -> let _ = notfn x in ()
303 | Ast0.WhenAlways(x) -> let _ = allfn x in ()
304 | Ast0.WhenModifier(_) -> ()
305 | Ast0.WhenNotTrue(x) -> let _ = exp x in ()
306 | Ast0.WhenNotFalse(x) -> let _ = exp x in () in
307
308 (* assume that all of the declarations are at the beginning of a statement
309 list, which is required by C, but not actually required by the cocci
310 parser *)
311 let rec process_statement_list r acc = function
312 [] -> acc
313 | (s::ss) ->
314 (match Ast0.unwrap s with
315 Ast0.Decl(_,decl) ->
316 let new_acc = (process_decl acc decl)@acc in
317 process_statement_list r new_acc ss
318 | Ast0.Dots(_,wc) ->
319 (* why is this case here? why is there none for nests? *)
320 List.iter
321 (process_whencode r.VT0.combiner_rec_statement_dots
322 r.VT0.combiner_rec_statement r.VT0.combiner_rec_expression)
323 wc;
324 process_statement_list r acc ss
325 | Ast0.Disj(_,statement_dots_list,_,_) ->
326 let new_acc =
327 lub_envs
328 (List.map
329 (function x -> process_statement_list r acc (Ast0.undots x))
330 statement_dots_list) in
331 process_statement_list r new_acc ss
332 | _ ->
333 let _ = (propagate_types acc).VT0.combiner_rec_statement s in
334 process_statement_list r acc ss)
335
336 and process_decl env decl =
337 match Ast0.unwrap decl with
338 Ast0.Init(_,ty,id,_,exp,_) ->
339 let _ =
340 (propagate_types env).VT0.combiner_rec_initialiser exp in
341 [(strip id,Ast0.ast0_type_to_type ty)]
342 | Ast0.UnInit(_,ty,id,_) ->
343 [(strip id,Ast0.ast0_type_to_type ty)]
344 | Ast0.MacroDecl(_,_,_,_,_) -> []
345 | Ast0.TyDecl(ty,_) -> []
346 (* pad: should handle typedef one day and add a binding *)
347 | Ast0.Typedef(_,_,_,_) -> []
348 | Ast0.DisjDecl(_,disjs,_,_) ->
349 List.concat(List.map (process_decl env) disjs)
350 | Ast0.Ddots(_,_) -> [] (* not in a statement list anyway *)
351 | Ast0.OptDecl(decl) -> process_decl env decl
352 | Ast0.UniqueDecl(decl) -> process_decl env decl in
353
354 let statement_dots r k d =
355 match Ast0.unwrap d with
356 Ast0.DOTS(l) | Ast0.CIRCLES(l) | Ast0.STARS(l) ->
357 let _ = process_statement_list r env l in option_default in
358
359 let post_bool exp =
360 let rec process_test exp =
361 match (Ast0.unwrap exp,Ast0.get_type exp) with
362 (Ast0.Edots(_,_),_) -> None
363 | (Ast0.NestExpr(_,_,_,_,_),_) -> None
364 | (Ast0.MetaExpr(_,_,_,_,_),_) ->
365 (* if a type is known, it is specified in the decl *)
366 None
367 | (Ast0.Paren(lp,exp,rp),None) -> process_test exp
368 (* the following doesn't seem like a good idea - triggers int isos
369 on all test expressions *)
370 (*| (_,None) -> Some (int_type) *)
371 | _ -> None in
372 let new_expty = process_test exp in
373 (match new_expty with
374 None -> () (* leave things as they are *)
375 | Some ty -> Ast0.set_type exp new_expty) in
376
377 let statement r k s =
378 match Ast0.unwrap s with
379 Ast0.FunDecl(_,fninfo,name,lp,params,rp,lbrace,body,rbrace) ->
380 let rec get_binding p =
381 match Ast0.unwrap p with
382 Ast0.Param(ty,Some id) ->
383 [(strip id,Ast0.ast0_type_to_type ty)]
384 | Ast0.OptParam(param) -> get_binding param
385 | _ -> [] in
386 let fenv = List.concat (List.map get_binding (Ast0.undots params)) in
387 (propagate_types (fenv@env)).VT0.combiner_rec_statement_dots body
388 | Ast0.IfThen(_,_,exp,_,_,_) | Ast0.IfThenElse(_,_,exp,_,_,_,_,_)
389 | Ast0.While(_,_,exp,_,_,_) | Ast0.Do(_,_,_,_,exp,_,_)
390 | Ast0.For(_,_,_,_,Some exp,_,_,_,_,_) ->
391 let _ = k s in
392 post_bool exp;
393 None
394 | Ast0.Switch(_,_,exp,_,_,decls,cases,_) ->
395 let senv = process_statement_list r env (Ast0.undots decls) in
396 let res =
397 (propagate_types (senv@env)).VT0.combiner_rec_case_line_dots cases in
398 post_bool exp;
399 res
400 | _ -> k s
401
402 and case_line r k c =
403 match Ast0.unwrap c with
404 Ast0.Case(case,exp,colon,code) ->
405 let _ = k c in
406 (match Ast0.get_type exp with
407 None -> Ast0.set_type exp (Some (int_type))
408 | _ -> ());
409 None
410 | _ -> k c in
411
412 V0.combiner bind option_default
413 {V0.combiner_functions with
414 VT0.combiner_dotsstmtfn = statement_dots;
415 VT0.combiner_identfn = ident;
416 VT0.combiner_exprfn = expression;
417 VT0.combiner_stmtfn = statement;
418 VT0.combiner_casefn = case_line}
419
420 let type_infer code =
421 let prop = propagate_types [(Id("NULL"),T.Pointer(T.Unknown))] in
422 let fn = prop.VT0.combiner_rec_top_level in
423 let _ = List.map fn code in
424 ()