Release coccinelle-0.2.5-rc2
[bpt/coccinelle.git] / parsing_c / type_annoter_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes,
5 * Copyright (C) 2009 University of Urbana Champaign
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License (GPL)
9 * version 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * file license.txt for more details.
15 *)
16
17 open Common
18
19 open Ast_c
20
21 module Lib = Lib_parsing_c
22
23 (*****************************************************************************)
24 (* Prelude *)
25 (*****************************************************************************)
26 (* History:
27 * - Done a first type checker in 2002, cf typing-semantic/, but
28 * was assuming that have all type info, and so was assuming had called
29 * cpp and everything was right.
30 * - Wrote this file, in 2006?, as we added pattern matching on type
31 * in coccinelle. Partial type annotater.
32 * - Julia extended it in 2008? to have localvar/notlocalvar and
33 * test/notest information, again used by coccinelle.
34 * - I extended it in Fall 2008 to have more type information for the
35 * global analysis. I also added some optimisations to process
36 * included code faster.
37 *
38 *
39 * Design choices. Can either do:
40 * - a kind of inferer
41 * - can first do a simple inferer, that just pass context
42 * - then a real inferer, managing partial info.
43 * type context = fullType option
44 *
45 * - extract the information from the .h files
46 * (so no inference at all needed)
47 *
48 * Difference with julia's code in parsing_cocci/type_infer.ml:
49 * - She handles just the variable namespace. She does not type
50 * field access or enum or macros. This is because cocci programs are
51 * usually simple and have no structure definition or macro definitions
52 * that we need to type anyway.
53 * - She does more propagation.
54 * - She does not have to handle the typedef isomorphism which force me
55 * to use those typedef_fix and type_unfold_one_step
56 * - She does not handle I think the function pointer C isomorphism.
57 *
58 * - She has a cleaner type_cocci without any info. In my case
59 * I need to do those ugly al_type, or generate fake infos.
60 * - She has more compact code. Perhaps because she does not have to
61 * handle the extra exp_info that she added on me :) So I need those
62 * do_with_type, make_info_xxx, etc.
63 *
64 * Note: if need to debug this annotater, use -show_trace_profile, it can
65 * help. You can also set the typedef_debug flag below.
66 *
67 *
68 *
69 * todo: expression contain types, and statements, which in turn can contain
70 * expression, so need recurse. Need define an annote_statement and
71 * annotate_type.
72 *
73 * todo: how deal with typedef isomorphisms ? How store them in Ast_c ?
74 * store all posible variations in ast_c ? a list of type instead of just
75 * the type ?
76 *
77 * todo: how to handle multiple possible definitions for entities like
78 * struct or typedefs ? Because of ifdef, we should store list of
79 * possibilities sometimes.
80 *
81 * todo: define a new type ? like type_cocci ? where have a bool ?
82 *
83 * semi: How handle scope ? When search for type of field, we return
84 * a type, but this type makes sense only in a certain scope.
85 * We could add a tag to each typedef, structUnionName to differentiate
86 * them and also associate in ast_c to the type the scope
87 * of this type, the env that were used to define this type.
88 *
89 * todo: handle better the search in previous env, the env'. Cf the
90 * termination problem in typedef_fix when I was searching in the same
91 * env.
92 *
93 *)
94
95 (*****************************************************************************)
96 (* Wrappers *)
97 (*****************************************************************************)
98 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_type
99
100 (*****************************************************************************)
101 (* Environment *)
102 (*****************************************************************************)
103
104 (* The different namespaces from stdC manual:
105 *
106 * You introduce two new name spaces with every block that you write.
107 *
108 * One name space includes all
109 * - functions,
110 * - objects,
111 * - type definitions,
112 * - and enumeration constants
113 * that you declare or define within the block.
114 *
115 * The other name space includes all
116 * - enumeration,
117 * - structure,
118 * - and union
119 * *tags* that you define within the block.
120 *
121 * You introduce a new member name space with every structure or union
122 * whose content you define. You identify a member name space by the
123 * type of left operand that you write for a member selection
124 * operator, as in x.y or p->y. A member name space ends with the end
125 * of the block in which you declare it.
126 *
127 * You introduce a new goto label name space with every function
128 * definition you write. Each goto label name space ends with its
129 * function definition.
130 *)
131
132 (* But I don't try to do a type-checker, I try to "resolve" type of var
133 * so don't need make difference between namespaces here.
134 *
135 * But, why not make simply a (string, kindstring) assoc ?
136 * Because we dont want that a variable shadow a struct definition, because
137 * they are still in 2 different namespace. But could for typedef,
138 * because VarOrFunc and Typedef are in the same namespace.
139 * But could do a record as in c_info.ml
140 *)
141
142
143 (* This type contains all "ident" like notion of C. Each time in Ast_c
144 * you have a string type (as in expression, function name, fields)
145 * then you need to manage the scope of this ident.
146 *
147 * The wrap for StructUnionNameDef contain the whole ii, the i for
148 * the string, the structUnion and the structType.
149 *
150 * Put Macro here ? after all the scoping rules for cpp macros is different
151 * and so does not vanish after the closing '}'.
152 *
153 * todo: EnumDef
154 *)
155 type namedef =
156 | VarOrFunc of string * Ast_c.exp_type
157 | EnumConstant of string * string option
158
159 (* also used for macro type aliases *)
160 | TypeDef of string * fullType
161 (* the structType contains nested "idents" with struct scope *)
162 | StructUnionNameDef of string * (structUnion * structType) wrap
163
164 (* cppext: *)
165 | Macro of string * (define_kind * define_val)
166
167
168 (* Because have nested scope, have nested list, hence the list list.
169 *
170 * opti? use a hash to accelerate ? hmm but may have some problems
171 * with hash to handle recursive lookup. For instance for the typedef
172 * example where have mutually recursive definition of the type,
173 * we must take care to not loop by starting the second search
174 * from the previous environment. With the list scheme in
175 * lookup_env below it's quite easy to do. With hash it may be
176 * more complicated.
177 *)
178 type environment = namedef list list
179
180
181 (* ------------------------------------------------------------ *)
182 (* can be modified by the init_env function below, by
183 * the file environment_unix.h
184 *)
185 let initial_env = ref [
186 [VarOrFunc("NULL",
187 (Lib.al_type (Parse_c.type_of_string "void *"),
188 Ast_c.NotLocalVar));
189
190 (*
191 VarOrFunc("malloc",
192 (Lib.al_type(Parse_c.type_of_string "void* (*)(int size)"),
193 Ast_c.NotLocalVar));
194 VarOrFunc("free",
195 (Lib.al_type(Parse_c.type_of_string "void (*)(void *ptr)"),
196 Ast_c.NotLocalVar));
197 *)
198 ]
199 ]
200
201
202 let typedef_debug = ref false
203
204
205 (* ------------------------------------------------------------ *)
206 (* generic, lookup and also return remaining env for further lookup *)
207 let rec lookup_env2 f env =
208 match env with
209 | [] -> raise Not_found
210 | []::zs -> lookup_env2 f zs
211 | (x::xs)::zs ->
212 (match f x with
213 | None -> lookup_env2 f (xs::zs)
214 | Some y -> y, xs::zs
215 )
216 let lookup_env a b =
217 Common.profile_code "TAC.lookup_env" (fun () -> lookup_env2 a b)
218
219
220
221 let member_env lookupf env =
222 try
223 let _ = lookupf env in
224 true
225 with Not_found -> false
226
227
228
229
230 (* ------------------------------------------------------------ *)
231
232
233 let lookup_var s env =
234 let f = function
235 | VarOrFunc (s2, typ) -> if s2 =$= s then Some typ else None
236 | _ -> None
237 in
238 lookup_env f env
239
240 let lookup_typedef s env =
241 if !typedef_debug then pr2 ("looking for: " ^ s);
242 let f = function
243 | TypeDef (s2, typ) -> if s2 =$= s then Some typ else None
244 | _ -> None
245 in
246 lookup_env f env
247
248 let lookup_structunion (_su, s) env =
249 let f = function
250 | StructUnionNameDef (s2, typ) -> if s2 =$= s then Some typ else None
251 | _ -> None
252 in
253 lookup_env f env
254
255 let lookup_macro s env =
256 let f = function
257 | Macro (s2, typ) -> if s2 =$= s then Some typ else None
258 | _ -> None
259 in
260 lookup_env f env
261
262 let lookup_enum s env =
263 let f = function
264 | EnumConstant (s2, typ) -> if s2 =$= s then Some typ else None
265 | _ -> None
266 in
267 lookup_env f env
268
269
270 let lookup_typedef a b =
271 Common.profile_code "TAC.lookup_typedef" (fun () -> lookup_typedef a b)
272
273
274
275 (*****************************************************************************)
276 (* "type-lookup" *)
277 (*****************************************************************************)
278
279 (* find_final_type is used to know to what type a field correspond in
280 * x.foo. Sometimes the type of x is a typedef or a structName in which
281 * case we must look in environment to find the complete type, here
282 * structUnion that contains the information.
283 *
284 * Because in C one can redefine in nested blocks some typedefs,
285 * struct, or variables, we have a static scoping resolving process.
286 * So, when we look for the type of a var, if this var is in an
287 * enclosing block, then maybe its type refer to a typdef of this
288 * enclosing block, so must restart the "type-resolving" of this
289 * typedef from this enclosing block, not from the bottom. So our
290 * "resolving-type functions" take an env and also return an env from
291 * where the next search must be performed. *)
292
293 (*
294 let rec find_final_type ty env =
295
296 match Ast_c.unwrap_typeC ty with
297 | BaseType x -> (BaseType x) +> Ast_c.rewrap_typeC ty
298
299 | Pointer t -> (Pointer (find_final_type t env)) +> Ast_c.rewrap_typeC ty
300 | Array (e, t) -> Array (e, find_final_type t env) +> Ast_c.rewrap_typeC ty
301
302 | StructUnion (sopt, su) -> StructUnion (sopt, su) +> Ast_c.rewrap_typeC ty
303
304 | FunctionType t -> (FunctionType t) (* todo ? *) +> Ast_c.rewrap_typeC ty
305 | Enum (s, enumt) -> (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty
306 | EnumName s -> (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty
307
308 | StructUnionName (su, s) ->
309 (try
310 let ((structtyp,ii), env') = lookup_structunion (su, s) env in
311 Ast_c.nQ, (StructUnion (Some s, structtyp), ii)
312 (* old: +> Ast_c.rewrap_typeC ty
313 * but must wrap with good ii, otherwise pretty_print_c
314 * will be lost and raise some Impossible
315 *)
316 with Not_found ->
317 ty
318 )
319
320 | TypeName s ->
321 (try
322 let (t', env') = lookup_typedef s env in
323 find_final_type t' env'
324 with Not_found ->
325 ty
326 )
327
328 | ParenType t -> find_final_type t env
329 | Typeof e -> failwith "typeof"
330 *)
331
332
333
334
335 (* ------------------------------------------------------------ *)
336 let rec type_unfold_one_step ty env =
337
338 match Ast_c.unwrap_typeC ty with
339 | BaseType x -> ty
340 | Pointer t -> ty
341 | Array (e, t) -> ty
342
343 | StructUnion (sopt, su, fields) -> ty
344
345 | FunctionType t -> ty
346 | Enum (s, enumt) -> ty
347
348 | EnumName s -> ty (* todo: look in env when will have EnumDef *)
349
350 | StructUnionName (su, s) ->
351 (try
352 let (((su,fields),ii), env') = lookup_structunion (su, s) env in
353 Ast_c.mk_ty (StructUnion (su, Some s, fields)) ii
354 (* old: +> Ast_c.rewrap_typeC ty
355 * but must wrap with good ii, otherwise pretty_print_c
356 * will be lost and raise some Impossible
357 *)
358 with Not_found ->
359 ty
360 )
361
362 | TypeName (name, _typ) ->
363 let s = Ast_c.str_of_name name in
364 (try
365 if !typedef_debug then pr2 "type_unfold_one_step: lookup_typedef";
366 let (t', env') = lookup_typedef s env in
367 type_unfold_one_step t' env'
368 with Not_found ->
369 ty
370 )
371
372 | ParenType t -> type_unfold_one_step t env
373 | TypeOfExpr e ->
374 pr2_once ("Type_annoter: not handling typeof");
375 ty
376 | TypeOfType t -> type_unfold_one_step t env
377
378
379
380
381
382
383
384
385
386 (* normalizer. can be seen as the opposite of the previous function as
387 * we "fold" at least for the structUnion. Should return something that
388 * Type_c.is_completed_fullType likes, something that makes it easier
389 * for the programmer to work on, that has all the needed information
390 * for most tasks.
391 *)
392 let rec typedef_fix ty env =
393 match Ast_c.unwrap_typeC ty with
394 | BaseType x ->
395 ty
396 | Pointer t ->
397 Pointer (typedef_fix t env) +> Ast_c.rewrap_typeC ty
398 | Array (e, t) ->
399 Array (e, typedef_fix t env) +> Ast_c.rewrap_typeC ty
400 | StructUnion (su, sopt, fields) ->
401 (* normalize, fold.
402 * todo? but what if correspond to a nested struct def ?
403 *)
404 Type_c.structdef_to_struct_name ty
405 | FunctionType ft ->
406 (FunctionType ft) (* todo ? *) +> Ast_c.rewrap_typeC ty
407 | Enum (s, enumt) ->
408 (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty
409 | EnumName s ->
410 (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty
411
412 (* we prefer StructUnionName to StructUnion when it comes to typed metavar *)
413 | StructUnionName (su, s) -> ty
414
415 (* keep the typename but complete with more information *)
416 | TypeName (name, typ) ->
417 let s = Ast_c.str_of_name name in
418 (match typ with
419 | Some _ ->
420 pr2 ("typedef value already there:" ^ s);
421 ty
422 | None ->
423 (try
424 if !typedef_debug then pr2 "typedef_fix: lookup_typedef";
425 let (t', env') = lookup_typedef s env in
426
427 (* bugfix: termination bug if use env instead of env' below, because
428 * can have some weird mutually recursive typedef which
429 * each new type alias search for its mutual def.
430 *)
431 TypeName (name, Some (typedef_fix t' env')) +> Ast_c.rewrap_typeC ty
432 with Not_found ->
433 ty
434 ))
435
436 (* remove paren for better matching with typed metavar. kind of iso again *)
437 | ParenType t ->
438 typedef_fix t env
439 | TypeOfExpr e ->
440 pr2_once ("Type_annoter: not handling typeof");
441 ty
442
443 | TypeOfType t ->
444 typedef_fix t env
445
446
447 (*****************************************************************************)
448 (* Helpers, part 1 *)
449 (*****************************************************************************)
450
451 let type_of_s2 s =
452 (Lib.al_type (Parse_c.type_of_string s))
453 let type_of_s a =
454 Common.profile_code "Type_c.type_of_s" (fun () -> type_of_s2 a)
455
456
457 (* pad: pb on:
458 * /home/pad/software-os-src2/freebsd/contrib/ipfilter/netinet/ip_fil_freebsd.c
459 * because in the code there is:
460 * static iss_seq_off = 0;
461 * which in the parser was generating a default int without a parse_info.
462 * I now add a fake parse_info for such default int so no more failwith
463 * normally.
464 *)
465
466 let rec is_simple_expr expr =
467 match Ast_c.unwrap_expr expr with
468 (* todo? handle more special cases ? *)
469
470 | Ident _ ->
471 true
472 | Constant (_) ->
473 true
474 | Unary (op, e) ->
475 true
476 | Binary (e1, op, e2) ->
477 true
478 | Cast (t, e) ->
479 true
480 | ParenExpr (e) -> is_simple_expr e
481
482 | _ -> false
483
484 (*****************************************************************************)
485 (* Typing rules *)
486 (*****************************************************************************)
487 (* now in type_c.ml *)
488
489
490
491 (*****************************************************************************)
492 (* (Semi) Globals, Julia's style *)
493 (*****************************************************************************)
494
495 (* opti: cache ? use hash ? *)
496 let _scoped_env = ref !initial_env
497
498 (* memoise unnanoted var, to avoid too much warning messages *)
499 let _notyped_var = ref (Hashtbl.create 100)
500
501 let new_scope() = _scoped_env := []::!_scoped_env
502 let del_scope() = _scoped_env := List.tl !_scoped_env
503
504 let do_in_new_scope f =
505 begin
506 new_scope();
507 let res = f() in
508 del_scope();
509 res
510 end
511
512 let add_in_scope namedef =
513 let (current, older) = Common.uncons !_scoped_env in
514 _scoped_env := (namedef::current)::older
515
516
517 (* ------------------------------------------------------------ *)
518
519 (* sort of hackish... *)
520 let islocal info =
521 if List.length (!_scoped_env) =|= List.length !initial_env
522 then Ast_c.NotLocalVar
523 else Ast_c.LocalVar info
524
525 (* ------------------------------------------------------------ *)
526 (* the warning argument is here to allow some binding to overwrite an
527 * existing one. With function, we first have the prototype and then the def,
528 * and the def binding with the same string is not an error.
529 *
530 * todo?: but if we define two times the same function, then we will not
531 * detect it :( it would require to make a diff between adding a binding
532 * from a prototype and from a definition.
533 *
534 * opti: disabling the check_annotater flag have some important
535 * performance benefit.
536 *
537 *)
538 let add_binding2 namedef warning =
539 let (current_scope, _older_scope) = Common.uncons !_scoped_env in
540
541 if !Flag_parsing_c.check_annotater then begin
542 (match namedef with
543 | VarOrFunc (s, typ) ->
544 if Hashtbl.mem !_notyped_var s
545 then pr2 ("warning: found typing information for a variable that was" ^
546 "previously unknown:" ^ s);
547 | _ -> ()
548 );
549
550 let (memberf, s) =
551 (match namedef with
552 | VarOrFunc (s, typ) ->
553 member_env (lookup_var s), s
554 | TypeDef (s, typ) ->
555 member_env (lookup_typedef s), s
556 | StructUnionNameDef (s, (su, typ)) ->
557 member_env (lookup_structunion (su, s)), s
558 | Macro (s, body) ->
559 member_env (lookup_macro s), s
560 | EnumConstant (s, body) ->
561 member_env (lookup_enum s), s
562 ) in
563
564 if memberf [current_scope] && warning
565 then pr2 ("Type_annoter: warning, " ^ s ^
566 " is already in current binding" ^ "\n" ^
567 " so there is a weird shadowing");
568 end;
569 add_in_scope namedef
570
571 let add_binding namedef warning =
572 Common.profile_code "TAC.add_binding" (fun () -> add_binding2 namedef warning)
573
574
575
576 (*****************************************************************************)
577 (* Helpers, part 2 *)
578 (*****************************************************************************)
579
580 let lookup_opt_env lookupf s =
581 Common.optionise (fun () ->
582 lookupf s !_scoped_env
583 )
584
585 let unwrap_unfold_env2 typ =
586 Ast_c.unwrap_typeC
587 (type_unfold_one_step typ !_scoped_env)
588 let unwrap_unfold_env typ =
589 Common.profile_code "TAC.unwrap_unfold_env" (fun () -> unwrap_unfold_env2 typ)
590
591 let typedef_fix a b =
592 Common.profile_code "TAC.typedef_fix" (fun () -> typedef_fix a b)
593
594 let make_info_def_fix x =
595 Type_c.make_info_def (typedef_fix x !_scoped_env)
596
597 let make_info_fix (typ, local) =
598 Type_c.make_info ((typedef_fix typ !_scoped_env),local)
599
600
601 let make_info_def = Type_c.make_info_def
602
603 (*****************************************************************************)
604 (* Main typer code, put later in a visitor *)
605 (*****************************************************************************)
606
607 let annotater_expr_visitor_subpart = (fun (k,bigf) expr ->
608
609 let ty =
610 match Ast_c.unwrap_expr expr with
611
612 (* -------------------------------------------------- *)
613 (* todo: should analyse the 's' for int to know if unsigned or not *)
614 | Constant (String (s,kind)) -> make_info_def (type_of_s "char []")
615 | Constant MultiString _ -> make_info_def (type_of_s "char []")
616 | Constant (Char (s,kind)) -> make_info_def (type_of_s "char")
617 | Constant (Int (s,kind)) ->
618 (* this seems really unpleasant, but perhaps the type needs to be set
619 up in some way that allows pretty printing *)
620 make_info_def
621 (match kind with
622 (* matches limited by what is generated in lexer_c.mll *)
623 Si(Signed,CInt) -> type_of_s "int"
624 | Si(UnSigned,CInt) -> type_of_s "unsigned int"
625 | Si(Signed,CLong) -> type_of_s "long"
626 | Si(UnSigned,CLong) -> type_of_s "unsigned long"
627 | Si(Signed,CLongLong) -> type_of_s "long long"
628 | Si(UnSigned,CLongLong) -> type_of_s "unsigned long long"
629 | _ -> failwith "unexpected kind for constant")
630 | Constant (Float (s,kind)) ->
631 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
632 let fake = Ast_c.rewrap_str "float" fake in
633 let iinull = [fake] in
634 make_info_def (Ast_c.mk_ty (BaseType (FloatType kind)) iinull)
635
636
637 (* -------------------------------------------------- *)
638 (* note: could factorize this code with the code for Ident
639 * and the other code for Funcall below. But as the Ident can be
640 * a macro-func, I prefer to handle it separately. So
641 * this rule can handle the macro-func, the Ident-rule can handle
642 * the macro-var, and the other FunCall-rule the regular
643 * function calls through fields.
644 * Also as I don't want a warning on the Ident that are a FunCall,
645 * easier to have a rule separate from the Ident rule.
646 *)
647 | FunCall (e1, args) ->
648 (match Ast_c.unwrap_expr e1 with
649 | Ident (ident) ->
650 (* recurse *)
651 args +> List.iter (fun (e,ii) ->
652 (* could typecheck if arguments agree with prototype *)
653 Visitor_c.vk_argument bigf e
654 );
655 let s = Ast_c.str_of_name ident in
656 (match lookup_opt_env lookup_var s with
657 | Some ((typ,local),_nextenv) ->
658
659 (* set type for ident *)
660 let tyinfo = make_info_fix (typ, local) in
661 Ast_c.set_type_expr e1 tyinfo;
662
663 (match unwrap_unfold_env typ with
664 | FunctionType (ret, params) -> make_info_def ret
665
666 (* can be function pointer, C have an iso for that,
667 * same pfn() syntax than regular function call.
668 *)
669 | Pointer (typ2) ->
670 (match unwrap_unfold_env typ2 with
671 | FunctionType (ret, params) -> make_info_def ret
672 | _ -> Type_c.noTypeHere
673 )
674 | _ -> Type_c.noTypeHere
675 )
676 | None ->
677
678 (match lookup_opt_env lookup_macro s with
679 | Some ((defkind, defval), _nextenv) ->
680 (match defkind, defval with
681 | DefineFunc _, DefineExpr e ->
682 let rettype = Ast_c.get_onlytype_expr e in
683
684 (* todo: could also set type for ident ?
685 have return type and at least type of concrete
686 parameters so can generate a fake FunctionType
687 *)
688 let macrotype_opt =
689 Type_c.fake_function_type rettype args
690 in
691
692 macrotype_opt +> Common.do_option (fun t ->
693 pr2 ("Type_annotater: generate fake function type" ^
694 "for macro: " ^ s);
695 let tyinfo = make_info_def_fix t in
696 Ast_c.set_type_expr e1 tyinfo;
697 );
698
699 Ast_c.get_type_expr e
700 | DefineVar, _ ->
701 pr2 ("Type_annoter: not a macro-func: " ^ s);
702 Type_c.noTypeHere
703 | Undef, _ ->
704 pr2 ("Type_annoter: not a macro-func: " ^ s);
705 Type_c.noTypeHere
706 | DefineFunc _, _ ->
707 (* normally the FunCall case should have caught it *)
708 pr2 ("Type_annoter: not a macro-func-expr: " ^ s);
709 Type_c.noTypeHere
710 )
711 | None ->
712 pr2_once ("type_annotater: no type for function ident: " ^ s);
713 Type_c.noTypeHere
714 )
715 )
716
717
718 | _e ->
719 k expr;
720
721 (Ast_c.get_type_expr e1) +> Type_c.do_with_type (fun typ ->
722 (* copy paste of above *)
723 (match unwrap_unfold_env typ with
724 | FunctionType (ret, params) -> make_info_def ret
725 | Pointer (typ) ->
726 (match unwrap_unfold_env typ with
727 | FunctionType (ret, params) -> make_info_def ret
728 | _ -> Type_c.noTypeHere
729 )
730 | _ -> Type_c.noTypeHere
731 )
732 )
733 )
734
735
736 (* -------------------------------------------------- *)
737 | Ident (ident) ->
738 let s = Ast_c.str_of_name ident in
739 (match lookup_opt_env lookup_var s with
740 | Some ((typ,local),_nextenv) ->
741 make_info_fix (typ,local)
742 | None ->
743 (match lookup_opt_env lookup_macro s with
744 | Some ((defkind, defval), _nextenv) ->
745 (match defkind, defval with
746 | DefineVar, DefineExpr e ->
747 Ast_c.get_type_expr e
748 | DefineVar, _ ->
749 pr2 ("Type_annoter: not a expression: " ^ s);
750 Type_c.noTypeHere
751 | DefineFunc _, _ ->
752 (* normally the FunCall case should have catch it *)
753 pr2 ("Type_annoter: not a macro-var: " ^ s);
754 Type_c.noTypeHere
755 | Undef, _ ->
756 pr2 ("Type_annoter: not a expression: " ^ s);
757 Type_c.noTypeHere
758 )
759 | None ->
760 (match lookup_opt_env lookup_enum s with
761 | Some (_, _nextenv) ->
762 make_info_def (type_of_s "int")
763 | None ->
764 if not (s =~ "[A-Z_]+") (* if macro then no warning *)
765 then
766 if !Flag_parsing_c.check_annotater then
767 if not (Hashtbl.mem !_notyped_var s)
768 then begin
769 pr2 ("Type_annoter: no type found for: " ^ s);
770 Hashtbl.add !_notyped_var s true;
771 end
772 else ()
773 else
774 pr2 ("Type_annoter: no type found for: " ^ s)
775 ;
776 Type_c.noTypeHere
777 )
778 )
779 )
780
781 (* -------------------------------------------------- *)
782 (* C isomorphism on type on array and pointers *)
783 | Unary (e, DeRef)
784 | ArrayAccess (e, _) ->
785 k expr; (* recurse to set the types-ref of sub expressions *)
786
787 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
788 (* todo: maybe not good env !! *)
789 match unwrap_unfold_env t with
790 | Pointer x
791 | Array (_, x) ->
792 make_info_def_fix x
793 | _ -> Type_c.noTypeHere
794
795 )
796
797 | Unary (e, GetRef) ->
798 k expr; (* recurse to set the types-ref of sub expressions *)
799
800 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
801 (* must generate an element so that '=' can be used
802 * to compare type ?
803 *)
804 let fake = Ast_c.fakeInfo Common.fake_parse_info in
805 let fake = Ast_c.rewrap_str "*" fake in
806
807 let ft = Ast_c.mk_ty (Pointer t) [fake] in
808 make_info_def_fix ft
809 )
810
811
812 (* -------------------------------------------------- *)
813 (* fields *)
814 | RecordAccess (e, namefld)
815 | RecordPtAccess (e, namefld) as x ->
816
817 let fld = Ast_c.str_of_name namefld in
818
819 k expr; (* recurse to set the types-ref of sub expressions *)
820
821 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
822
823 let topt =
824 match x with
825 | RecordAccess _ -> Some t
826 | RecordPtAccess _ ->
827 (match unwrap_unfold_env t with
828 | Pointer (t) -> Some t
829 | _ -> None
830 )
831 | _ -> raise Impossible
832
833 in
834 (match topt with
835 | None -> Type_c.noTypeHere
836 | Some t ->
837 match unwrap_unfold_env t with
838 | StructUnion (su, sopt, fields) ->
839 (try
840 (* todo: which env ? *)
841 make_info_def_fix
842 (Type_c.type_field fld (su, fields))
843 with
844 | Not_found ->
845 pr2 (spf
846 "TYPE-ERROR: field '%s' does not belong in struct %s"
847 fld (match sopt with Some s -> s |_ -> "<anon>"));
848 Type_c.noTypeHere
849 | Multi_found ->
850 pr2 "TAC:MultiFound";
851 Type_c.noTypeHere
852 )
853 | _ -> Type_c.noTypeHere
854 )
855 )
856
857
858
859 (* -------------------------------------------------- *)
860 | Cast (t, e) ->
861 k expr;
862 (* todo: if infer, can "push" info ? add_types_expr [t] e ? *)
863 make_info_def_fix (Lib.al_type t)
864
865 (* todo? lub, hmm maybe not, cos type must be e1 *)
866 | Assignment (e1, op, e2) ->
867 k expr;
868 (* value of an assignment is the value of the RHS expression, but its
869 type is the type of the lhs expression. Use the rhs exp if no
870 information is available *)
871 (match Ast_c.get_type_expr e1 with
872 (None,_) -> Ast_c.get_type_expr e2
873 | (Some ty,t) -> (Some ty,t))
874 | Sequence (e1, e2) ->
875 k expr;
876 Ast_c.get_type_expr e2
877
878 | Binary (e1, Logical _, e2) ->
879 k expr;
880 make_info_def (type_of_s "int")
881
882 (* todo: lub *)
883 | Binary (e1, Arith op, e2) ->
884 k expr;
885 Type_c.lub op (Type_c.get_opt_type e1) (Type_c.get_opt_type e2)
886
887 | CondExpr (cond, e1opt, e2) ->
888 k expr;
889 Ast_c.get_type_expr e2
890
891
892 | ParenExpr e ->
893 k expr;
894 Ast_c.get_type_expr e
895
896 | Infix (e, op) | Postfix (e, op) ->
897 k expr;
898 Ast_c.get_type_expr e
899
900 (* pad: julia wrote this ? *)
901 | Unary (e, UnPlus) ->
902 k expr; (* recurse to set the types-ref of sub expressions *)
903 make_info_def (type_of_s "int")
904 (* todo? can convert from unsigned to signed if UnMinus ? *)
905 | Unary (e, UnMinus) ->
906 k expr; (* recurse to set the types-ref of sub expressions *)
907 make_info_def (type_of_s "int")
908
909 | SizeOfType _|SizeOfExpr _ ->
910 k expr; (* recurse to set the types-ref of sub expressions *)
911 make_info_def (type_of_s "size_t")
912
913 | Constructor (ft, ini) ->
914 k expr; (* recurse to set the types-ref of sub expressions *)
915 make_info_def (Lib.al_type ft)
916
917 | Unary (e, Not) ->
918 k expr; (* recurse to set the types-ref of sub expressions *)
919 (* the result of ! is always 0 or 1, not the argument type *)
920 make_info_def (type_of_s "int")
921 | Unary (e, Tilde) ->
922 k expr; (* recurse to set the types-ref of sub expressions *)
923 Ast_c.get_type_expr e
924
925 (* -------------------------------------------------- *)
926 (* todo *)
927 | Unary (_, GetRefLabel) ->
928 k expr; (* recurse to set the types-ref of sub expressions *)
929 pr2_once "Type annotater:not handling GetRefLabel";
930 Type_c.noTypeHere
931 (* todo *)
932 | StatementExpr _ ->
933 k expr; (* recurse to set the types-ref of sub expressions *)
934 pr2_once "Type annotater:not handling StatementExpr";
935 Type_c.noTypeHere
936 (*
937 | _ -> k expr; Type_c.noTypeHere
938 *)
939
940 in
941 Ast_c.set_type_expr expr ty
942
943 )
944
945
946 (*****************************************************************************)
947 (* Visitor *)
948 (*****************************************************************************)
949
950 (* Processing includes that were added after a cpp_ast_c makes the
951 * type annotater quite slow, especially when the depth of cpp_ast_c is
952 * big. But for such includes the only thing we really want is to modify
953 * the environment to have enough type information. We don't need
954 * to type the expressions inside those includes (they will be typed
955 * when we process the include file directly). Here the goal is
956 * to not recurse.
957 *
958 * Note that as usually header files contain mostly structure
959 * definitions and defines, that means we still have to do lots of work.
960 * We only win on function definition bodies, but usually header files
961 * have just prototypes, or inline function definitions which anyway have
962 * usually a small body. But still, we win. It also makes clearer
963 * that when processing include as we just need the environment, the caller
964 * of this module can do further optimisations such as memorising the
965 * state of the environment after each header files.
966 *
967 *
968 * For sparse its makes the annotating speed goes from 9s to 4s
969 * For Linux the speedup is even better, from ??? to ???.
970 *
971 * Because There would be some copy paste with annotate_program, it is
972 * better to factorize code hence the just_add_in_env parameter below.
973 *
974 * todo? alternative optimisation for the include problem:
975 * - processing all headers files one time and construct big env
976 * - use hashtbl for env (but apparently not biggest problem)
977 *)
978
979 let rec visit_toplevel ~just_add_in_env ~depth elem =
980 let need_annotate_body = not just_add_in_env in
981
982 let bigf = { Visitor_c.default_visitor_c with
983
984 (* ------------------------------------------------------------ *)
985 Visitor_c.kcppdirective = (fun (k, bigf) directive ->
986 match directive with
987 (* do error messages for type annotater only for the real body of the
988 * file, not inside include.
989 *)
990 | Include {i_content = opt} ->
991 opt +> Common.do_option (fun (filename, program) ->
992 Common.save_excursion Flag_parsing_c.verbose_type (fun () ->
993 Flag_parsing_c.verbose_type := false;
994
995 (* old: Visitor_c.vk_program bigf program;
996 * opti: set the just_add_in_env
997 *)
998 program +> List.iter (fun elem ->
999 visit_toplevel ~just_add_in_env:true ~depth:(depth+1) elem
1000 )
1001 )
1002 )
1003
1004 | Define ((s,ii), (defkind, defval)) ->
1005
1006
1007 (* even if we are in a just_add_in_env phase, such as when
1008 * we process include, as opposed to the body of functions,
1009 * with macros we still to type the body of the macro as
1010 * the macro has no type and so we infer its type from its
1011 * body (and one day later maybe from its use).
1012 *)
1013 (match defval with
1014 (* can try to optimize and recurse only when the define body
1015 * is simple ?
1016 *)
1017
1018 | DefineExpr expr ->
1019 (* prevent macro-declared variables from leaking out *)
1020 do_in_new_scope (fun () ->
1021 if is_simple_expr expr
1022 (* even if not need_annotate_body, still recurse*)
1023 then k directive
1024 else
1025 if need_annotate_body
1026 then k directive)
1027 | _ ->
1028 do_in_new_scope (fun () ->
1029 if need_annotate_body
1030 then k directive)
1031 );
1032
1033 add_binding (Macro (s, (defkind, defval) )) true;
1034
1035 | PragmaAndCo _ -> ()
1036 );
1037
1038 (* ------------------------------------------------------------ *)
1039 (* main typer code *)
1040 (* ------------------------------------------------------------ *)
1041 Visitor_c.kexpr = annotater_expr_visitor_subpart;
1042
1043 (* ------------------------------------------------------------ *)
1044 Visitor_c.kstatement = (fun (k, bigf) st ->
1045 match Ast_c.unwrap_st st with
1046 | Compound statxs -> do_in_new_scope (fun () -> k st);
1047 | _ -> k st
1048 );
1049 (* ------------------------------------------------------------ *)
1050 Visitor_c.kdecl = (fun (k, bigf) d ->
1051 (match d with
1052 | (DeclList (xs, ii)) ->
1053 xs +> List.iter (fun ({v_namei = var; v_type = t;
1054 v_storage = sto; v_local = local} as x
1055 , iicomma) ->
1056
1057 (* to add possible definition in type found in Decl *)
1058 Visitor_c.vk_type bigf t;
1059
1060
1061 let local =
1062 match local with
1063 | Ast_c.NotLocalDecl -> Ast_c.NotLocalVar
1064 | Ast_c.LocalDecl -> Ast_c.LocalVar (Ast_c.info_of_type t)
1065 in
1066 var +> Common.do_option (fun (name, iniopt) ->
1067 let s = Ast_c.str_of_name name in
1068
1069 match sto with
1070 | StoTypedef, _inline ->
1071 add_binding (TypeDef (s,Lib.al_type t)) true;
1072 | _ ->
1073 add_binding (VarOrFunc (s, (Lib.al_type t, local))) true;
1074
1075 x.v_type_bis :=
1076 Some (typedef_fix (Lib.al_type t) !_scoped_env);
1077
1078 if need_annotate_body then begin
1079 (* int x = sizeof(x) is legal so need process ini *)
1080 iniopt +> Common.do_option (fun (info, ini) ->
1081 Visitor_c.vk_ini bigf ini
1082 );
1083 end
1084 );
1085 );
1086 | MacroDecl _ ->
1087 if need_annotate_body
1088 then k d
1089 );
1090
1091 );
1092
1093 (* ------------------------------------------------------------ *)
1094 Visitor_c.ktype = (fun (k, bigf) typ ->
1095 (* bugfix: have a 'Lib.al_type typ' before, but because we can
1096 * have enum with possible expression, we don't want to change
1097 * the ref of abstract-lined types, but the real one, so
1098 * don't al_type here
1099 *)
1100 let (_q, tbis) = typ in
1101 match Ast_c.unwrap_typeC typ with
1102 | StructUnion (su, Some s, structType) ->
1103 let structType' = Lib.al_fields structType in
1104 let ii = Ast_c.get_ii_typeC_take_care tbis in
1105 let ii' = Lib.al_ii ii in
1106 add_binding (StructUnionNameDef (s, ((su, structType'),ii'))) true;
1107
1108 if need_annotate_body
1109 then k typ (* todo: restrict ? new scope so use do_in_scope ? *)
1110
1111 | Enum (sopt, enums) ->
1112
1113 enums +> List.iter (fun ((name, eopt), iicomma) ->
1114
1115 let s = Ast_c.str_of_name name in
1116
1117 if need_annotate_body
1118 then eopt +> Common.do_option (fun (ieq, e) ->
1119 Visitor_c.vk_expr bigf e
1120 );
1121 add_binding (EnumConstant (s, sopt)) true;
1122 );
1123
1124
1125 (* TODO: if have a TypeName, then maybe can fill the option
1126 * information.
1127 *)
1128 | _ ->
1129 if need_annotate_body
1130 then k typ
1131
1132 );
1133
1134 (* ------------------------------------------------------------ *)
1135 Visitor_c.ktoplevel = (fun (k, bigf) elem ->
1136 _notyped_var := Hashtbl.create 100;
1137 match elem with
1138 | Definition def ->
1139 let {f_name = name;
1140 f_type = ((returnt, (paramst, b)) as ftyp);
1141 f_storage = sto;
1142 f_body = statxs;
1143 f_old_c_style = oldstyle;
1144 },ii
1145 = def
1146 in
1147 let (i1, i2) =
1148 match ii with
1149 (* what is iifunc1? it should be a type. jll
1150 * pad: it's the '(' in the function definition. The
1151 * return type is part of f_type.
1152 *)
1153 | iifunc1::iifunc2::ibrace1::ibrace2::ifakestart::isto ->
1154 iifunc1, iifunc2
1155 | _ -> raise Impossible
1156 in
1157 let funcs = Ast_c.str_of_name name in
1158
1159 (match oldstyle with
1160 | None ->
1161 let typ' =
1162 Lib.al_type (Ast_c.mk_ty (FunctionType ftyp) [i1;i2]) in
1163
1164 add_binding (VarOrFunc (funcs, (typ',islocal i1.Ast_c.pinfo)))
1165 false;
1166
1167 if need_annotate_body then
1168 do_in_new_scope (fun () ->
1169 paramst +> List.iter (fun ({p_namei= nameopt; p_type= t},_)->
1170 match nameopt with
1171 | Some name ->
1172 let s = Ast_c.str_of_name name in
1173 let local = Ast_c.LocalVar (Ast_c.info_of_type t) in
1174 add_binding (VarOrFunc (s,(Lib.al_type t,local))) true
1175 | None ->
1176 pr2 "no type, certainly because Void type ?"
1177 );
1178 (* recurse *)
1179 k elem
1180 );
1181 | Some oldstyle ->
1182 (* generate regular function type *)
1183
1184 pr2 "TODO generate type for function";
1185 (* add bindings *)
1186 if need_annotate_body then
1187 do_in_new_scope (fun () ->
1188 (* recurse. should naturally call the kdecl visitor and
1189 * add binding
1190 *)
1191 k elem;
1192 );
1193
1194 );
1195 | CppTop x ->
1196 (match x with
1197 | Define ((s,ii), (DefineVar, DefineType t)) ->
1198 add_binding (TypeDef (s,Lib.al_type t)) true;
1199 | _ -> k elem
1200 )
1201
1202 | Declaration _
1203
1204
1205
1206 | IfdefTop _
1207 | MacroTop _
1208 | EmptyDef _
1209 | NotParsedCorrectly _
1210 | FinalDef _
1211 ->
1212 k elem
1213 );
1214 }
1215 in
1216 if just_add_in_env
1217 then
1218 if depth > 1
1219 then Visitor_c.vk_toplevel bigf elem
1220 else
1221 Common.profile_code "TAC.annotate_only_included" (fun () ->
1222 Visitor_c.vk_toplevel bigf elem
1223 )
1224 else Visitor_c.vk_toplevel bigf elem
1225
1226 (*****************************************************************************)
1227 (* Entry point *)
1228 (*****************************************************************************)
1229 (* catch all the decl to grow the environment *)
1230
1231
1232 let rec (annotate_program2 :
1233 environment -> toplevel list -> (toplevel * environment Common.pair) list) =
1234 fun env prog ->
1235
1236 (* globals (re)initialialisation *)
1237 _scoped_env := env;
1238 _notyped_var := (Hashtbl.create 100);
1239
1240 prog +> List.map (fun elem ->
1241 let beforeenv = !_scoped_env in
1242 visit_toplevel ~just_add_in_env:false ~depth:0 elem;
1243 let afterenv = !_scoped_env in
1244 (elem, (beforeenv, afterenv))
1245 )
1246
1247
1248
1249
1250 (*****************************************************************************)
1251 (* Annotate test *)
1252 (*****************************************************************************)
1253
1254 (* julia: for coccinelle *)
1255 let annotate_test_expressions prog =
1256 let rec propagate_test e =
1257 let ((e_term,info),_) = e in
1258 let (ty,_) = !info in
1259 info := (ty,Test);
1260 match e_term with
1261 Binary(e1,Logical(AndLog),e2)
1262 | Binary(e1,Logical(OrLog),e2) -> propagate_test e1; propagate_test e2
1263 | Unary(e1,Not) -> propagate_test e1
1264 | ParenExpr(e) -> propagate_test e
1265 | FunCall(e,args) -> (* not very nice, but so painful otherwise *)
1266 (match (unwrap e,args) with
1267 ((Ident(i),_),[(Left a,_)]) ->
1268 let nm = str_of_name i in
1269 if List.mem nm ["likely";"unlikely"]
1270 then propagate_test a
1271 else ()
1272 | _ -> ())
1273 | _ -> () in
1274
1275 let bigf = { Visitor_c.default_visitor_c with
1276 Visitor_c.kexpr = (fun (k,bigf) expr ->
1277 (match unwrap_expr expr with
1278 CondExpr(e,_,_) -> propagate_test e
1279 | Binary(e1,Logical(AndLog),e2)
1280 | Binary(e1,Logical(OrLog),e2) -> propagate_test e1; propagate_test e2
1281 | Unary(e1,Not) -> propagate_test e1
1282 | _ -> ()
1283 );
1284 k expr
1285 );
1286 Visitor_c.kstatement = (fun (k, bigf) st ->
1287 match unwrap_st st with
1288 Selection(s) ->
1289 (match s with If(e1,s1,s2) -> propagate_test e1 | _ -> ());
1290 k st;
1291 | Iteration(i) ->
1292 (match i with
1293 While(e,s) -> propagate_test e
1294 | DoWhile(s,e) -> propagate_test e
1295 | For(_,es,_,_) ->
1296 (match unwrap es with Some e -> propagate_test e | None -> ())
1297 | _ -> ());
1298 k st
1299 | _ -> k st
1300 )
1301 } in
1302 (prog +> List.iter (fun elem ->
1303 Visitor_c.vk_toplevel bigf elem
1304 ))
1305
1306
1307
1308 (*****************************************************************************)
1309 (* Annotate types *)
1310 (*****************************************************************************)
1311 let annotate_program env prog =
1312 Common.profile_code "TAC.annotate_program"
1313 (fun () ->
1314 let res = annotate_program2 env prog in
1315 annotate_test_expressions prog;
1316 res
1317 )
1318
1319 let annotate_type_and_localvar env prog =
1320 Common.profile_code "TAC.annotate_type"
1321 (fun () -> annotate_program2 env prog)
1322
1323
1324 (*****************************************************************************)
1325 (* changing default typing environment, do concatenation *)
1326 let init_env filename =
1327 pr2 ("init_env: " ^ filename);
1328 let (ast2, _stat) = Parse_c.parse_c_and_cpp filename in
1329 let ast = Parse_c.program_of_program2 ast2 in
1330
1331 let res = annotate_type_and_localvar !initial_env ast in
1332 match List.rev res with
1333 | [] -> pr2 "empty environment"
1334 | (_top,(env1,env2))::xs ->
1335 initial_env := !initial_env ++ env2;
1336 ()
1337