Release coccinelle-0.1.2
[bpt/coccinelle.git] / parsing_c / type_annoter_c.ml
1 (* Copyright (C) 2007, 2008 Yoann Padioleau
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License (GPL)
5 * version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * file license.txt for more details.
11 *)
12 open Common
13
14 open Ast_c
15
16 module Lib = Lib_parsing_c
17
18 (*****************************************************************************)
19 (* can either:
20 *
21 * - do a kind of inferer
22 * * can first do a simple inferer, that just pass context
23 * * then a real inferer, managing partial info.
24 * type context = fullType option
25 *
26 * - extract the information from the .h files
27 * (so no inference at all needed)
28 *
29 * todo: expression contain types, and statements, which in turn can contain
30 * expression, so need recurse. Need define an annote_statement and
31 * annotate_type.
32
33 * todo: how deal with typedef isomorphisms ? How store them in Ast_c ?
34 * store all posible variations in ast_c ? a list of type instead of just
35 * the type ?
36 *
37 * todo: define a new type ? like type_cocci ? where have a bool ?
38 *
39 * How handle scope ? When search for type of field, we return
40 * a type, but this type makes sense only in a certain scope.
41 * We could add a tag to each typedef, structUnionName to differentiate
42 * them and also associate in ast_c to the type the scope
43 * of this type, the env that were used to define this type.
44 *)
45
46 (*****************************************************************************)
47 (* Wrappers *)
48 (*****************************************************************************)
49 let pr2 s =
50 if !Flag_parsing_c.verbose_type
51 then Common.pr2 s
52
53 (*****************************************************************************)
54 (* Environment *)
55 (*****************************************************************************)
56
57 (* the different namespaces from stdC manual:
58 *
59 * You introduce two new name spaces with every block that you write.
60 * One name space includes all functions, objects, type definitions,
61 * and enumeration constants that you declare or define within the
62 * block. The other name space includes all enumeration, structure, and
63 * union tags that you define within the block.
64 *
65 * You introduce a new member name space with every structure or union
66 * whose content you define. You identify a member name space by the
67 * type of left operand that you write for a member selection
68 * operator, as in x.y or p->y. A member name space ends with the end
69 * of the block in which you declare it.
70 *
71 * You introduce a new goto label name space with every function
72 * definition you write. Each goto label name space ends with its
73 * function definition.
74 *)
75
76 (* But I don't try to do a type-checker, I try to "resolve" type of var
77 * so don't need make difference between namespaces here.
78 *
79 * But, why not make simply a (string, kindstring) assoc ?
80 * Because we dont want that a variable shadow a struct definition, because
81 * they are still in 2 different namespace. But could for typedef,
82 * because VarOrFunc and Typedef are in the same namespace.
83 * But could do a record as in c_info.ml
84 *)
85
86
87 (* the wrap for StructUnionNameDef contain the whole ii, the i for
88 * the string, the structUnion and the structType
89 *)
90 type namedef =
91 | VarOrFunc of string * Ast_c.exp_type
92 | TypeDef of string * fullType
93 | StructUnionNameDef of string * (structUnion * structType) wrap
94 (* todo: EnumConstant *)
95 (* todo: EnumDef *)
96
97 (* because have nested scope, have nested list, hence the list list *)
98 type environment = namedef list list
99
100 let initial_env = [
101 [VarOrFunc("NULL",(Lib.al_type (Parse_c.type_of_string "void *"),
102 Ast_c.NotLocalVar))]
103 ]
104
105
106 let rec lookup_env f env =
107 match env with
108 | [] -> raise Not_found
109 | []::zs -> lookup_env f zs
110 | (x::xs)::zs ->
111 match f x with
112 | None -> lookup_env f (xs::zs)
113 | Some y -> y, xs::zs
114
115
116
117 let lookup_var s env =
118 let f = function
119 | VarOrFunc (s2, typ) -> if s2 = s then Some typ else None
120 | _ -> None
121 in
122 lookup_env f env
123
124 let lookup_typedef s env =
125 let f = function
126 | TypeDef (s2, typ) -> if s2 = s then Some typ else None
127 | _ -> None
128 in
129 lookup_env f env
130
131 let lookup_structunion (_su, s) env =
132 let f = function
133 | StructUnionNameDef (s2, typ) -> if s2 = s then Some typ else None
134 | _ -> None
135 in
136 lookup_env f env
137
138 let member_env lookupf env =
139 try
140 let _ = lookupf env in
141 true
142 with Not_found -> false
143
144 (*****************************************************************************)
145 (* "type-lookup" *)
146 (*****************************************************************************)
147
148 (* find_final_type is used to know to what type a field correspond in
149 * x.foo. Sometimes the type of x is a typedef or a structName in which
150 * case we must look in environment to find the complete type, here
151 * structUnion that contains the information.
152 *
153 * Because in C one can redefine in nested blocks some typedefs,
154 * struct, or variables, we have a static scoping resolving process.
155 * So, when we look for the type of a var, if this var is in an
156 * enclosing block, then maybe its type refer to a typdef of this
157 * enclosing block, so must restart the "type-resolving" of this
158 * typedef from this enclosing block, not from the bottom. So our
159 * "resolving-type functions" take an env and also return an env from
160 * where the next search must be performed. *)
161
162 (*
163 let rec find_final_type ty env =
164
165 match Ast_c.unwrap_typeC ty with
166 | BaseType x -> (BaseType x) +> Ast_c.rewrap_typeC ty
167
168 | Pointer t -> (Pointer (find_final_type t env)) +> Ast_c.rewrap_typeC ty
169 | Array (e, t) -> Array (e, find_final_type t env) +> Ast_c.rewrap_typeC ty
170
171 | StructUnion (sopt, su) -> StructUnion (sopt, su) +> Ast_c.rewrap_typeC ty
172
173 | FunctionType t -> (FunctionType t) (* todo ? *) +> Ast_c.rewrap_typeC ty
174 | Enum (s, enumt) -> (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty
175 | EnumName s -> (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty
176
177 | StructUnionName (su, s) ->
178 (try
179 let ((structtyp,ii), env') = lookup_structunion (su, s) env in
180 Ast_c.nQ, (StructUnion (Some s, structtyp), ii)
181 (* old: +> Ast_c.rewrap_typeC ty
182 * but must wrap with good ii, otherwise pretty_print_c
183 * will be lost and raise some Impossible
184 *)
185 with Not_found ->
186 ty
187 )
188
189 | TypeName s ->
190 (try
191 let (t', env') = lookup_typedef s env in
192 find_final_type t' env'
193 with Not_found ->
194 ty
195 )
196
197 | ParenType t -> find_final_type t env
198 | Typeof e -> failwith "typeof"
199 *)
200
201
202
203
204 let rec type_unfold_one_step ty env =
205
206 match Ast_c.unwrap_typeC ty with
207 | BaseType x -> ty
208 | Pointer t -> ty
209 | Array (e, t) -> ty
210 | StructUnion (sopt, su, fields) -> ty
211
212 | FunctionType t -> ty
213 | Enum (s, enumt) -> ty
214 | EnumName s -> ty
215
216 | StructUnionName (su, s) ->
217 (try
218 let (((su,fields),ii), env') = lookup_structunion (su, s) env in
219 Ast_c.nQ, (StructUnion (su, Some s, fields), ii)
220 (* old: +> Ast_c.rewrap_typeC ty
221 * but must wrap with good ii, otherwise pretty_print_c
222 * will be lost and raise some Impossible
223 *)
224 with Not_found ->
225 ty
226 )
227
228 | TypeName (s,_typ) ->
229 (try
230 let (t', env') = lookup_typedef s env in
231 type_unfold_one_step t' env'
232 with Not_found ->
233 ty
234 )
235
236 | ParenType t -> type_unfold_one_step t env
237 | TypeOfExpr e ->
238 pr2_once ("Type_annoter: not handling typeof");
239 ty
240 | TypeOfType t -> type_unfold_one_step t env
241
242
243
244 let (type_field:
245 string -> (Ast_c.structUnion * Ast_c.structType) -> Ast_c.fullType) =
246 fun fld (su, fields) ->
247 fields +> Common.find_some (fun x ->
248 match Ast_c.unwrap x with
249 | DeclarationField (FieldDeclList (onefield_multivars, iiptvirg)) ->
250 Common.optionise (fun () ->
251 onefield_multivars +> Common.find_some (fun fieldkind ->
252
253 match Ast_c.unwrap (Ast_c.unwrap fieldkind) with
254 | Simple (Some s, t) | BitField (Some s, t, _) ->
255 if s = fld then Some t else None
256 | _ -> None
257 )
258 )
259 | EmptyField -> None
260 | MacroStructDeclTodo -> pr2 "DeclTodo"; None
261 | CppDirectiveStruct _
262 | IfdefStruct _ -> pr2 "StructCpp"; None
263 )
264
265
266
267
268 let structdef_to_struct_name ty =
269 match ty with
270 | qu, (StructUnion (su, sopt, fields), iis) ->
271 (match sopt,iis with
272 | Some s , [i1;i2;i3;i4] ->
273 qu, (StructUnionName (su, s), [i1;i2])
274 | None, _ ->
275 ty
276
277 | x -> raise Impossible
278 )
279 | _ -> raise Impossible
280
281
282
283 let rec typedef_fix ty env =
284
285 match Ast_c.unwrap_typeC ty with
286 | BaseType x -> ty
287 | Pointer t -> Pointer (typedef_fix t env) +> Ast_c.rewrap_typeC ty
288 | Array (e, t) -> Array (e, typedef_fix t env) +> Ast_c.rewrap_typeC ty
289 | StructUnion (su, sopt, fields) -> structdef_to_struct_name ty
290 | FunctionType ft ->
291 (FunctionType ft) (* todo ? *) +> Ast_c.rewrap_typeC ty
292 | Enum (s, enumt) ->
293 (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty
294 | EnumName s ->
295 (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty
296
297 (* we prefer StructUnionName to StructUnion when it comes to typed metavar *)
298 | StructUnionName (su, s) -> ty
299
300 | TypeName (s, _typ) ->
301 (try
302 let (t', env') = lookup_typedef s env in
303 TypeName (s, Some (typedef_fix t' env)) +> Ast_c.rewrap_typeC ty
304 with Not_found ->
305 ty
306 )
307
308 | ParenType t -> typedef_fix t env
309 | TypeOfExpr e ->
310 pr2_once ("Type_annoter: not handling typeof");
311 ty
312
313 | TypeOfType t -> typedef_fix t env
314
315 (*****************************************************************************)
316 (* (Semi) Globals, Julia's style *)
317 (*****************************************************************************)
318
319 (* opti: cache ? use hash ? *)
320 let _scoped_env = ref initial_env
321
322 (* memoise unnanoted var, to avoid too much warning messages *)
323 let _notyped_var = ref (Hashtbl.create 100)
324
325 let new_scope() = _scoped_env := []::!_scoped_env
326 let del_scope() = _scoped_env := List.tl !_scoped_env
327
328 let do_in_new_scope f =
329 begin
330 new_scope();
331 let res = f() in
332 del_scope();
333 res
334 end
335
336 let add_in_scope namedef =
337 let (current, older) = Common.uncons !_scoped_env in
338 _scoped_env := (namedef::current)::older
339
340 (* sort of hackish... *)
341 let islocal info =
342 if List.length (!_scoped_env) = List.length initial_env
343 then Ast_c.NotLocalVar
344 else Ast_c.LocalVar info
345
346 (* the warning argument is here to allow some binding to overwrite an
347 * existing one. With function, we first have the protype and then the def
348 * and the def binding the same string is not an error.
349 * todo?: but if we define two times the same function, then we will not
350 * detect it :( would require to make a diff between adding a binding
351 * from a prototype and from a definition.
352 *)
353 let add_binding namedef warning =
354 let (current_scope, _older_scope) = Common.uncons !_scoped_env in
355
356 (match namedef with
357 | VarOrFunc (s, typ) ->
358 if Hashtbl.mem !_notyped_var s
359 then pr2 ("warning: found typing information for a variable that was" ^
360 "previously unknown:" ^ s);
361 | _ -> ()
362 );
363
364 let (memberf, s) =
365 (match namedef with
366 | VarOrFunc (s, typ) -> member_env (lookup_var s), s
367 | TypeDef (s, typ) -> member_env (lookup_typedef s), s
368 | StructUnionNameDef (s, (su, typ)) ->
369 member_env (lookup_structunion (su, s)), s
370 ) in
371
372 if memberf [current_scope] && warning
373 then pr2 ("Type_annoter: warning, " ^ s ^
374 " is already in current binding" ^ "\n" ^
375 " so there is a wierd shadowing");
376 add_in_scope namedef
377
378
379 (*****************************************************************************)
380 (* Helpers *)
381 (*****************************************************************************)
382
383 let make_info t = (Some t,Ast_c.NotTest)
384
385 let type_of_s s =
386 (Lib.al_type (Parse_c.type_of_string s), Ast_c.NotLocalVar)
387
388 let noTypeHere = (None,Ast_c.NotTest)
389
390 let do_with_type f (t,_test) =
391 match t with
392 | None -> noTypeHere
393 | Some (t,_local) -> f t
394
395
396 (*****************************************************************************)
397 (* Entry point *)
398 (*****************************************************************************)
399 (* catch all the decl to grow the environment *)
400
401 let rec (annotate_program2 :
402 environment -> toplevel list -> (toplevel * environment Common.pair) list
403 ) = fun env prog ->
404
405 (* globals (re)initialialisation *)
406 _scoped_env := env;
407 _notyped_var := (Hashtbl.create 100);
408
409
410 let bigf = { Visitor_c.default_visitor_c with
411
412 Visitor_c.kexpr = (fun (k,bigf) expr ->
413 k expr; (* recurse to set the types-ref of sub expressions *)
414 let ty =
415 match Ast_c.unwrap_expr expr with
416 (* todo: should analyse the 's' for int to know if unsigned or not *)
417 | Constant (String (s,kind)) -> make_info (type_of_s "char *")
418 | Constant (Char (s,kind)) -> make_info (type_of_s "char")
419 | Constant (Int (s)) -> make_info (type_of_s "int")
420 | Constant (Float (s,kind)) ->
421 let iinull = [] in
422 make_info
423 ((Ast_c.nQ, (BaseType (FloatType kind), iinull)),
424 Ast_c.NotLocalVar)
425
426 (* don't want a warning on the Ident that are a FunCall *)
427 | FunCall (((Ident f, typ), ii), args) ->
428 args +> List.iter (fun (e,ii) ->
429 Visitor_c.vk_argument bigf e
430 );
431 noTypeHere
432
433 | Ident (s) ->
434 (match (Common.optionise (fun () -> lookup_var s !_scoped_env)) with
435 | Some ((typ,local),_nextenv) ->
436 make_info ((typedef_fix typ !_scoped_env),local)
437 | None ->
438 if not (s =~ "[A-Z_]+") (* if macro then no warning *)
439 then
440 if not (Hashtbl.mem !_notyped_var s)
441 then begin
442 pr2 ("Type_annoter: not finding type for " ^ s);
443 Hashtbl.add !_notyped_var s true;
444 end;
445 noTypeHere
446 )
447 | Unary (e, UnMinus) | Unary (e, UnPlus) -> make_info (type_of_s "int")
448 | Unary (e, DeRef)
449 | ArrayAccess (e, _) ->
450 (Ast_c.get_type_expr e) +> do_with_type (fun t ->
451 (* todo: maybe not good env !! *)
452 match Ast_c.unwrap_typeC (type_unfold_one_step t !_scoped_env) with
453 | Pointer x
454 | Array (_, x) ->
455 make_info ((typedef_fix x !_scoped_env),Ast_c.NotLocalVar)
456 | _ -> noTypeHere
457 )
458
459 | RecordAccess (e, fld) ->
460 (Ast_c.get_type_expr e) +> do_with_type (fun t ->
461 match Ast_c.unwrap_typeC (type_unfold_one_step t !_scoped_env) with
462 | StructUnion (su, sopt, fields) ->
463 (try
464 (* todo: which env ? *)
465 make_info
466 ((typedef_fix (type_field fld (su, fields)) !_scoped_env),
467 Ast_c.NotLocalVar)
468 with Not_found ->
469 pr2
470 ("TYPE-ERROR: field '" ^ fld ^ "' does not belong in" ^
471 " struct '"^(match sopt with Some s -> s |_ -> "<anon>")^
472 "'");
473 noTypeHere
474 )
475 | _ -> noTypeHere
476 )
477
478 | RecordPtAccess (e, fld) ->
479 (Ast_c.get_type_expr e) +> do_with_type (fun t ->
480 match Ast_c.unwrap_typeC (type_unfold_one_step t !_scoped_env) with
481 | Pointer (t) ->
482 (match Ast_c.unwrap_typeC (type_unfold_one_step t !_scoped_env)
483 with
484 | StructUnion (su, sopt, fields) ->
485 (try
486 (* todo: which env ? *)
487 make_info
488 ((typedef_fix (type_field fld (su, fields)) !_scoped_env),
489 Ast_c.NotLocalVar)
490 with Not_found ->
491 pr2
492 ("TYPE-ERROR: field '" ^ fld ^ "' does not belong in" ^
493 " struct '"^(match sopt with Some s -> s |_ -> "<anon>")^
494 "'");
495 noTypeHere
496 )
497
498 | _ -> noTypeHere
499 )
500 | _ -> noTypeHere
501 )
502 | Cast (t, e) ->
503 (* todo: add_types_expr [t] e ? *)
504 make_info
505 ((typedef_fix (Lib.al_type t) !_scoped_env),Ast_c.NotLocalVar)
506
507 (* todo: check e2 ? *)
508 | Assignment (e1, op, e2) ->
509 Ast_c.get_type_expr e1
510 | ParenExpr e ->
511 Ast_c.get_type_expr e
512
513 | _ -> noTypeHere
514 in
515 Ast_c.set_type_expr expr ty
516
517 );
518
519 Visitor_c.kstatement = (fun (k, bigf) st ->
520 match st with
521 | Compound statxs, ii -> do_in_new_scope (fun () -> k st);
522 | _ -> k st
523
524 );
525 Visitor_c.kdecl = (fun (k, bigf) d ->
526 (match d with
527 | (DeclList (xs, ii)) ->
528 xs +> List.iter (fun ({v_namei = var; v_type = t;
529 v_storage = sto; v_local = local}, iicomma) ->
530
531 let local =
532 match local with
533 Ast_c.NotLocalDecl -> Ast_c.NotLocalVar
534 | Ast_c.LocalDecl -> Ast_c.LocalVar (offset t) in
535
536 (* to add possible definition in type found in Decl *)
537 Visitor_c.vk_type bigf t;
538
539 var +> do_option (fun ((s, ini), ii_s_ini) ->
540 match sto with
541 | StoTypedef, _inline ->
542 add_binding (TypeDef (s,Lib.al_type t)) true;
543 | _ ->
544 add_binding (VarOrFunc (s, (Lib.al_type t, local))) true;
545 (* int x = sizeof(x) is legal so need process ini *)
546 ini +> Common.do_option (fun ini ->
547 Visitor_c.vk_ini bigf ini);
548 );
549 );
550 | _ -> k d
551 );
552
553 );
554
555 Visitor_c.ktype = (fun (k, bigf) typ ->
556 let (q, t) = Lib.al_type typ in
557 match t with
558 | StructUnion (su, Some s, structType),ii ->
559 add_binding (StructUnionNameDef (s, ((su, structType),ii))) true;
560 k typ (* todo: restrict ? new scope so use do_in_scope ? *)
561
562
563 (* TODO: if have a TypeName, then maybe can fill the option
564 * information.
565 *)
566 | _ -> k typ
567
568 );
569
570 Visitor_c.ktoplevel = (fun (k, bigf) elem ->
571 _notyped_var := Hashtbl.create 100;
572 match elem with
573 | Definition def ->
574 let {f_name = funcs;
575 f_type = ((returnt, (paramst, b)) as ftyp);
576 f_storage = sto;
577 f_body = statxs},ii = def
578 in
579 let (i1, i2) =
580 match ii with
581 | is::iifunc1::iifunc2::ibrace1::ibrace2::ifakestart::isto ->
582 iifunc1, iifunc2
583 | _ -> raise Impossible
584 in
585 let typ' = Lib.al_type (Ast_c.nQ, (FunctionType ftyp, [i1;i2])) in
586
587 add_binding (VarOrFunc (funcs, (typ',islocal i1.Ast_c.pinfo))) false;
588 do_in_new_scope (fun () ->
589 paramst +> List.iter (fun (((b, s, t), _),_) ->
590 match s with
591 | Some s ->
592 let local = Ast_c.LocalVar (offset t) in
593 add_binding (VarOrFunc (s,(Lib.al_type t,local))) true
594 | None -> pr2 "no type, certainly because Void type ?"
595 );
596 k elem
597 );
598 | _ -> k elem
599 );
600 }
601 in
602
603 prog +> List.map (fun elem ->
604 let beforeenv = !_scoped_env in
605 Visitor_c.vk_toplevel bigf elem;
606 let afterenv = !_scoped_env in
607 (elem, (beforeenv, afterenv))
608 )
609
610 and offset (_,(ty,iis)) =
611 match iis with
612 ii::_ -> ii.Ast_c.pinfo
613 | _ -> failwith "type has no text; need to think again"
614
615
616 let annotate_test_expressions prog =
617 let rec propagate_test e =
618 let ((e_term,info),_) = e in
619 let (ty,_) = !info in
620 info := (ty,Test);
621 match e_term with
622 Binary(e1,Logical(AndLog),e2)
623 | Binary(e1,Logical(OrLog),e2) -> propagate_test e1; propagate_test e2
624 | Unary(e1,Not) -> propagate_test e1
625 | ParenExpr(e) -> propagate_test e
626 | _ -> () in
627
628 let bigf = { Visitor_c.default_visitor_c with
629 Visitor_c.kexpr = (fun (k,bigf) expr ->
630 (match unwrap expr with
631 (CondExpr(e,_,_),_) -> propagate_test e
632 | _ -> ());
633 k expr);
634 Visitor_c.kstatement = (fun (k, bigf) st ->
635 match unwrap st with
636 Selection(s) ->
637 (match s with If(e1,s1,s2) -> propagate_test e1 | _ -> ());
638 k st;
639 | Iteration(i) ->
640 (match i with
641 While(e,s) -> propagate_test e
642 | DoWhile(s,e) -> propagate_test e
643 | For(_,es,_,_) ->
644 (match unwrap es with Some e -> propagate_test e | None -> ())
645 | _ -> ());
646 k st
647 | _ -> k st) } in
648 (prog +> List.iter (fun elem ->
649 Visitor_c.vk_toplevel bigf elem
650 ))
651
652 let annotate_program a types_needed =
653 Common.profile_code "annotate_type"
654 (fun () prog ->
655 let res =
656 if true (*types_needed*)
657 then annotate_program2 a prog
658 else prog +> List.map (fun c -> c, (initial_env, initial_env)) in
659 annotate_test_expressions prog;
660 res)