Release coccinelle-0.2.3rc1
[bpt/coccinelle.git] / parsing_c / type_annoter_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes,
5 * Copyright (C) 2009 University of Urbana Champaign
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License (GPL)
9 * version 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * file license.txt for more details.
15 *)
16
17 open Common
18
19 open Ast_c
20
21 module Lib = Lib_parsing_c
22
23 (*****************************************************************************)
24 (* Prelude *)
25 (*****************************************************************************)
26 (* History:
27 * - Done a first type checker in 2002, cf typing-semantic/, but
28 * was assuming that have all type info, and so was assuming had called
29 * cpp and everything was right.
30 * - Wrote this file, in 2006?, as we added pattern matching on type
31 * in coccinelle. Partial type annotater.
32 * - Julia extended it in 2008? to have localvar/notlocalvar and
33 * test/notest information, again used by coccinelle.
34 * - I extended it in Fall 2008 to have more type information for the
35 * global analysis. I also added some optimisations to process
36 * included code faster.
37 *
38 *
39 * Design choices. Can either do:
40 * - a kind of inferer
41 * - can first do a simple inferer, that just pass context
42 * - then a real inferer, managing partial info.
43 * type context = fullType option
44 *
45 * - extract the information from the .h files
46 * (so no inference at all needed)
47 *
48 * Difference with julia's code in parsing_cocci/type_infer.ml:
49 * - She handles just the variable namespace. She does not type
50 * field access or enum or macros. This is because cocci programs are
51 * usually simple and have no structure definition or macro definitions
52 * that we need to type anyway.
53 * - She does more propagation.
54 * - She does not have to handle the typedef isomorphism which force me
55 * to use those typedef_fix and type_unfold_one_step
56 * - She does not handle I think the function pointer C isomorphism.
57 *
58 * - She has a cleaner type_cocci without any info. In my case
59 * I need to do those ugly al_type, or generate fake infos.
60 * - She has more compact code. Perhaps because she does not have to
61 * handle the extra exp_info that she added on me :) So I need those
62 * do_with_type, make_info_xxx, etc.
63 *
64 * Note: if need to debug this annotater, use -show_trace_profile, it can
65 * help. You can also set the typedef_debug flag below.
66 *
67 *
68 *
69 * todo: expression contain types, and statements, which in turn can contain
70 * expression, so need recurse. Need define an annote_statement and
71 * annotate_type.
72 *
73 * todo: how deal with typedef isomorphisms ? How store them in Ast_c ?
74 * store all posible variations in ast_c ? a list of type instead of just
75 * the type ?
76 *
77 * todo: how to handle multiple possible definitions for entities like
78 * struct or typedefs ? Because of ifdef, we should store list of
79 * possibilities sometimes.
80 *
81 * todo: define a new type ? like type_cocci ? where have a bool ?
82 *
83 * semi: How handle scope ? When search for type of field, we return
84 * a type, but this type makes sense only in a certain scope.
85 * We could add a tag to each typedef, structUnionName to differentiate
86 * them and also associate in ast_c to the type the scope
87 * of this type, the env that were used to define this type.
88 *
89 * todo: handle better the search in previous env, the env'. Cf the
90 * termination problem in typedef_fix when I was searching in the same
91 * env.
92 *
93 *)
94
95 (*****************************************************************************)
96 (* Wrappers *)
97 (*****************************************************************************)
98 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_type
99
100 (*****************************************************************************)
101 (* Environment *)
102 (*****************************************************************************)
103
104 (* The different namespaces from stdC manual:
105 *
106 * You introduce two new name spaces with every block that you write.
107 *
108 * One name space includes all
109 * - functions,
110 * - objects,
111 * - type definitions,
112 * - and enumeration constants
113 * that you declare or define within the block.
114 *
115 * The other name space includes all
116 * - enumeration,
117 * - structure,
118 * - and union
119 * *tags* that you define within the block.
120 *
121 * You introduce a new member name space with every structure or union
122 * whose content you define. You identify a member name space by the
123 * type of left operand that you write for a member selection
124 * operator, as in x.y or p->y. A member name space ends with the end
125 * of the block in which you declare it.
126 *
127 * You introduce a new goto label name space with every function
128 * definition you write. Each goto label name space ends with its
129 * function definition.
130 *)
131
132 (* But I don't try to do a type-checker, I try to "resolve" type of var
133 * so don't need make difference between namespaces here.
134 *
135 * But, why not make simply a (string, kindstring) assoc ?
136 * Because we dont want that a variable shadow a struct definition, because
137 * they are still in 2 different namespace. But could for typedef,
138 * because VarOrFunc and Typedef are in the same namespace.
139 * But could do a record as in c_info.ml
140 *)
141
142
143 (* This type contains all "ident" like notion of C. Each time in Ast_c
144 * you have a string type (as in expression, function name, fields)
145 * then you need to manage the scope of this ident.
146 *
147 * The wrap for StructUnionNameDef contain the whole ii, the i for
148 * the string, the structUnion and the structType.
149 *
150 * Put Macro here ? after all the scoping rules for cpp macros is different
151 * and so does not vanish after the closing '}'.
152 *
153 * todo: EnumDef
154 *)
155 type namedef =
156 | VarOrFunc of string * Ast_c.exp_type
157 | EnumConstant of string * string option
158
159 (* also used for macro type aliases *)
160 | TypeDef of string * fullType
161 (* the structType contains nested "idents" with struct scope *)
162 | StructUnionNameDef of string * (structUnion * structType) wrap
163
164 (* cppext: *)
165 | Macro of string * (define_kind * define_val)
166
167
168 (* Because have nested scope, have nested list, hence the list list.
169 *
170 * opti? use a hash to accelerate ? hmm but may have some problems
171 * with hash to handle recursive lookup. For instance for the typedef
172 * example where have mutually recursive definition of the type,
173 * we must take care to not loop by starting the second search
174 * from the previous environment. With the list scheme in
175 * lookup_env below it's quite easy to do. With hash it may be
176 * more complicated.
177 *)
178 type environment = namedef list list
179
180
181 (* ------------------------------------------------------------ *)
182 (* can be modified by the init_env function below, by
183 * the file environment_unix.h
184 *)
185 let initial_env = ref [
186 [VarOrFunc("NULL",
187 (Lib.al_type (Parse_c.type_of_string "void *"),
188 Ast_c.NotLocalVar));
189
190 (*
191 VarOrFunc("malloc",
192 (Lib.al_type(Parse_c.type_of_string "void* (*)(int size)"),
193 Ast_c.NotLocalVar));
194 VarOrFunc("free",
195 (Lib.al_type(Parse_c.type_of_string "void (*)(void *ptr)"),
196 Ast_c.NotLocalVar));
197 *)
198 ]
199 ]
200
201
202 let typedef_debug = ref false
203
204
205 (* ------------------------------------------------------------ *)
206 (* generic, lookup and also return remaining env for further lookup *)
207 let rec lookup_env2 f env =
208 match env with
209 | [] -> raise Not_found
210 | []::zs -> lookup_env2 f zs
211 | (x::xs)::zs ->
212 (match f x with
213 | None -> lookup_env2 f (xs::zs)
214 | Some y -> y, xs::zs
215 )
216 let lookup_env a b =
217 Common.profile_code "TAC.lookup_env" (fun () -> lookup_env2 a b)
218
219
220
221 let member_env lookupf env =
222 try
223 let _ = lookupf env in
224 true
225 with Not_found -> false
226
227
228
229
230 (* ------------------------------------------------------------ *)
231
232
233 let lookup_var s env =
234 let f = function
235 | VarOrFunc (s2, typ) -> if s2 =$= s then Some typ else None
236 | _ -> None
237 in
238 lookup_env f env
239
240 let lookup_typedef s env =
241 if !typedef_debug then pr2 ("looking for: " ^ s);
242 let f = function
243 | TypeDef (s2, typ) -> if s2 =$= s then Some typ else None
244 | _ -> None
245 in
246 lookup_env f env
247
248 let lookup_structunion (_su, s) env =
249 let f = function
250 | StructUnionNameDef (s2, typ) -> if s2 =$= s then Some typ else None
251 | _ -> None
252 in
253 lookup_env f env
254
255 let lookup_macro s env =
256 let f = function
257 | Macro (s2, typ) -> if s2 =$= s then Some typ else None
258 | _ -> None
259 in
260 lookup_env f env
261
262 let lookup_enum s env =
263 let f = function
264 | EnumConstant (s2, typ) -> if s2 =$= s then Some typ else None
265 | _ -> None
266 in
267 lookup_env f env
268
269
270 let lookup_typedef a b =
271 Common.profile_code "TAC.lookup_typedef" (fun () -> lookup_typedef a b)
272
273
274
275 (*****************************************************************************)
276 (* "type-lookup" *)
277 (*****************************************************************************)
278
279 (* find_final_type is used to know to what type a field correspond in
280 * x.foo. Sometimes the type of x is a typedef or a structName in which
281 * case we must look in environment to find the complete type, here
282 * structUnion that contains the information.
283 *
284 * Because in C one can redefine in nested blocks some typedefs,
285 * struct, or variables, we have a static scoping resolving process.
286 * So, when we look for the type of a var, if this var is in an
287 * enclosing block, then maybe its type refer to a typdef of this
288 * enclosing block, so must restart the "type-resolving" of this
289 * typedef from this enclosing block, not from the bottom. So our
290 * "resolving-type functions" take an env and also return an env from
291 * where the next search must be performed. *)
292
293 (*
294 let rec find_final_type ty env =
295
296 match Ast_c.unwrap_typeC ty with
297 | BaseType x -> (BaseType x) +> Ast_c.rewrap_typeC ty
298
299 | Pointer t -> (Pointer (find_final_type t env)) +> Ast_c.rewrap_typeC ty
300 | Array (e, t) -> Array (e, find_final_type t env) +> Ast_c.rewrap_typeC ty
301
302 | StructUnion (sopt, su) -> StructUnion (sopt, su) +> Ast_c.rewrap_typeC ty
303
304 | FunctionType t -> (FunctionType t) (* todo ? *) +> Ast_c.rewrap_typeC ty
305 | Enum (s, enumt) -> (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty
306 | EnumName s -> (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty
307
308 | StructUnionName (su, s) ->
309 (try
310 let ((structtyp,ii), env') = lookup_structunion (su, s) env in
311 Ast_c.nQ, (StructUnion (Some s, structtyp), ii)
312 (* old: +> Ast_c.rewrap_typeC ty
313 * but must wrap with good ii, otherwise pretty_print_c
314 * will be lost and raise some Impossible
315 *)
316 with Not_found ->
317 ty
318 )
319
320 | TypeName s ->
321 (try
322 let (t', env') = lookup_typedef s env in
323 find_final_type t' env'
324 with Not_found ->
325 ty
326 )
327
328 | ParenType t -> find_final_type t env
329 | Typeof e -> failwith "typeof"
330 *)
331
332
333
334
335 (* ------------------------------------------------------------ *)
336 let rec type_unfold_one_step ty env =
337
338 match Ast_c.unwrap_typeC ty with
339 | BaseType x -> ty
340 | Pointer t -> ty
341 | Array (e, t) -> ty
342
343 | StructUnion (sopt, su, fields) -> ty
344
345 | FunctionType t -> ty
346 | Enum (s, enumt) -> ty
347
348 | EnumName s -> ty (* todo: look in env when will have EnumDef *)
349
350 | StructUnionName (su, s) ->
351 (try
352 let (((su,fields),ii), env') = lookup_structunion (su, s) env in
353 Ast_c.mk_ty (StructUnion (su, Some s, fields)) ii
354 (* old: +> Ast_c.rewrap_typeC ty
355 * but must wrap with good ii, otherwise pretty_print_c
356 * will be lost and raise some Impossible
357 *)
358 with Not_found ->
359 ty
360 )
361
362 | TypeName (name, _typ) ->
363 let s = Ast_c.str_of_name name in
364 (try
365 if !typedef_debug then pr2 "type_unfold_one_step: lookup_typedef";
366 let (t', env') = lookup_typedef s env in
367 type_unfold_one_step t' env'
368 with Not_found ->
369 ty
370 )
371
372 | ParenType t -> type_unfold_one_step t env
373 | TypeOfExpr e ->
374 pr2_once ("Type_annoter: not handling typeof");
375 ty
376 | TypeOfType t -> type_unfold_one_step t env
377
378
379
380
381
382
383
384
385
386 (* normalizer. can be seen as the opposite of the previous function as
387 * we "fold" at least for the structUnion. Should return something that
388 * Type_c.is_completed_fullType likes, something that makes it easier
389 * for the programmer to work on, that has all the needed information
390 * for most tasks.
391 *)
392 let rec typedef_fix ty env =
393 match Ast_c.unwrap_typeC ty with
394 | BaseType x ->
395 ty
396 | Pointer t ->
397 Pointer (typedef_fix t env) +> Ast_c.rewrap_typeC ty
398 | Array (e, t) ->
399 Array (e, typedef_fix t env) +> Ast_c.rewrap_typeC ty
400 | StructUnion (su, sopt, fields) ->
401 (* normalize, fold.
402 * todo? but what if correspond to a nested struct def ?
403 *)
404 Type_c.structdef_to_struct_name ty
405 | FunctionType ft ->
406 (FunctionType ft) (* todo ? *) +> Ast_c.rewrap_typeC ty
407 | Enum (s, enumt) ->
408 (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty
409 | EnumName s ->
410 (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty
411
412 (* we prefer StructUnionName to StructUnion when it comes to typed metavar *)
413 | StructUnionName (su, s) -> ty
414
415 (* keep the typename but complete with more information *)
416 | TypeName (name, typ) ->
417 let s = Ast_c.str_of_name name in
418 (match typ with
419 | Some _ ->
420 pr2 ("typedef value already there:" ^ s);
421 ty
422 | None ->
423 (try
424 if !typedef_debug then pr2 "typedef_fix: lookup_typedef";
425 let (t', env') = lookup_typedef s env in
426
427 (* bugfix: termination bug if use env instead of env' below, because
428 * can have some weird mutually recursive typedef which
429 * each new type alias search for its mutual def.
430 *)
431 TypeName (name, Some (typedef_fix t' env')) +> Ast_c.rewrap_typeC ty
432 with Not_found ->
433 ty
434 ))
435
436 (* remove paren for better matching with typed metavar. kind of iso again *)
437 | ParenType t ->
438 typedef_fix t env
439 | TypeOfExpr e ->
440 pr2_once ("Type_annoter: not handling typeof");
441 ty
442
443 | TypeOfType t ->
444 typedef_fix t env
445
446
447 (*****************************************************************************)
448 (* Helpers, part 1 *)
449 (*****************************************************************************)
450
451 let type_of_s2 s =
452 (Lib.al_type (Parse_c.type_of_string s))
453 let type_of_s a =
454 Common.profile_code "Type_c.type_of_s" (fun () -> type_of_s2 a)
455
456
457 (* pad: pb on:
458 * /home/pad/software-os-src2/freebsd/contrib/ipfilter/netinet/ip_fil_freebsd.c
459 * because in the code there is:
460 * static iss_seq_off = 0;
461 * which in the parser was generating a default int without a parse_info.
462 * I now add a fake parse_info for such default int so no more failwith
463 * normally.
464 *)
465
466 let rec is_simple_expr expr =
467 match Ast_c.unwrap_expr expr with
468 (* todo? handle more special cases ? *)
469
470 | Ident _ ->
471 true
472 | Constant (_) ->
473 true
474 | Unary (op, e) ->
475 true
476 | Binary (e1, op, e2) ->
477 true
478 | Cast (t, e) ->
479 true
480 | ParenExpr (e) -> is_simple_expr e
481
482 | _ -> false
483
484 (*****************************************************************************)
485 (* Typing rules *)
486 (*****************************************************************************)
487 (* now in type_c.ml *)
488
489
490
491 (*****************************************************************************)
492 (* (Semi) Globals, Julia's style *)
493 (*****************************************************************************)
494
495 (* opti: cache ? use hash ? *)
496 let _scoped_env = ref !initial_env
497
498 (* memoise unnanoted var, to avoid too much warning messages *)
499 let _notyped_var = ref (Hashtbl.create 100)
500
501 let new_scope() = _scoped_env := []::!_scoped_env
502 let del_scope() = _scoped_env := List.tl !_scoped_env
503
504 let do_in_new_scope f =
505 begin
506 new_scope();
507 let res = f() in
508 del_scope();
509 res
510 end
511
512 let add_in_scope namedef =
513 let (current, older) = Common.uncons !_scoped_env in
514 _scoped_env := (namedef::current)::older
515
516
517 (* ------------------------------------------------------------ *)
518
519 (* sort of hackish... *)
520 let islocal info =
521 if List.length (!_scoped_env) =|= List.length !initial_env
522 then Ast_c.NotLocalVar
523 else Ast_c.LocalVar info
524
525 (* ------------------------------------------------------------ *)
526 (* the warning argument is here to allow some binding to overwrite an
527 * existing one. With function, we first have the prototype and then the def,
528 * and the def binding with the same string is not an error.
529 *
530 * todo?: but if we define two times the same function, then we will not
531 * detect it :( it would require to make a diff between adding a binding
532 * from a prototype and from a definition.
533 *
534 * opti: disabling the check_annotater flag have some important
535 * performance benefit.
536 *
537 *)
538 let add_binding2 namedef warning =
539 let (current_scope, _older_scope) = Common.uncons !_scoped_env in
540
541 if !Flag_parsing_c.check_annotater then begin
542 (match namedef with
543 | VarOrFunc (s, typ) ->
544 if Hashtbl.mem !_notyped_var s
545 then pr2 ("warning: found typing information for a variable that was" ^
546 "previously unknown:" ^ s);
547 | _ -> ()
548 );
549
550 let (memberf, s) =
551 (match namedef with
552 | VarOrFunc (s, typ) ->
553 member_env (lookup_var s), s
554 | TypeDef (s, typ) ->
555 member_env (lookup_typedef s), s
556 | StructUnionNameDef (s, (su, typ)) ->
557 member_env (lookup_structunion (su, s)), s
558 | Macro (s, body) ->
559 member_env (lookup_macro s), s
560 | EnumConstant (s, body) ->
561 member_env (lookup_enum s), s
562 ) in
563
564 if memberf [current_scope] && warning
565 then pr2 ("Type_annoter: warning, " ^ s ^
566 " is already in current binding" ^ "\n" ^
567 " so there is a weird shadowing");
568 end;
569 add_in_scope namedef
570
571 let add_binding namedef warning =
572 Common.profile_code "TAC.add_binding" (fun () -> add_binding2 namedef warning)
573
574
575
576 (*****************************************************************************)
577 (* Helpers, part 2 *)
578 (*****************************************************************************)
579
580 let lookup_opt_env lookupf s =
581 Common.optionise (fun () ->
582 lookupf s !_scoped_env
583 )
584
585 let unwrap_unfold_env2 typ =
586 Ast_c.unwrap_typeC
587 (type_unfold_one_step typ !_scoped_env)
588 let unwrap_unfold_env typ =
589 Common.profile_code "TAC.unwrap_unfold_env" (fun () -> unwrap_unfold_env2 typ)
590
591 let typedef_fix a b =
592 Common.profile_code "TAC.typedef_fix" (fun () -> typedef_fix a b)
593
594 let make_info_def_fix x =
595 Type_c.make_info_def (typedef_fix x !_scoped_env)
596
597 let make_info_fix (typ, local) =
598 Type_c.make_info ((typedef_fix typ !_scoped_env),local)
599
600
601 let make_info_def = Type_c.make_info_def
602
603 (*****************************************************************************)
604 (* Main typer code, put later in a visitor *)
605 (*****************************************************************************)
606
607 let annotater_expr_visitor_subpart = (fun (k,bigf) expr ->
608
609 let ty =
610 match Ast_c.unwrap_expr expr with
611
612 (* -------------------------------------------------- *)
613 (* todo: should analyse the 's' for int to know if unsigned or not *)
614 | Constant (String (s,kind)) -> make_info_def (type_of_s "char *")
615 | Constant MultiString _ -> make_info_def (type_of_s "char *")
616 | Constant (Char (s,kind)) -> make_info_def (type_of_s "char")
617 | Constant (Int (s,kind)) ->
618 (* this seems really unpleasant, but perhaps the type needs to be set
619 up in some way that allows pretty printing *)
620 make_info_def
621 (match kind with
622 (* matches limited by what is generated in lexer_c.mll *)
623 Si(Signed,CInt) -> type_of_s "int"
624 | Si(UnSigned,CInt) -> type_of_s "unsigned int"
625 | Si(Signed,CLong) -> type_of_s "long"
626 | Si(UnSigned,CLong) -> type_of_s "unsigned long"
627 | Si(Signed,CLongLong) -> type_of_s "long long"
628 | Si(UnSigned,CLongLong) -> type_of_s "unsigned long long"
629 | _ -> failwith "unexpected kind for constant")
630 | Constant (Float (s,kind)) ->
631 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
632 let fake = Ast_c.rewrap_str "float" fake in
633 let iinull = [fake] in
634 make_info_def (Ast_c.mk_ty (BaseType (FloatType kind)) iinull)
635
636
637 (* -------------------------------------------------- *)
638 (* note: could factorize this code with the code for Ident
639 * and the other code for Funcall below. But as the Ident can be
640 * a macro-func, I prefer to handle it separately. So
641 * this rule can handle the macro-func, the Ident-rule can handle
642 * the macro-var, and the other FunCall-rule the regular
643 * function calls through fields.
644 * Also as I don't want a warning on the Ident that are a FunCall,
645 * easier to have a rule separate from the Ident rule.
646 *)
647 | FunCall (e1, args) ->
648 (match Ast_c.unwrap_expr e1 with
649 | Ident (ident) ->
650
651 (* recurse *)
652 args +> List.iter (fun (e,ii) ->
653 (* could typecheck if arguments agree with prototype *)
654 Visitor_c.vk_argument bigf e
655 );
656 let s = Ast_c.str_of_name ident in
657 (match lookup_opt_env lookup_var s with
658 | Some ((typ,local),_nextenv) ->
659
660 (* set type for ident *)
661 let tyinfo = make_info_fix (typ, local) in
662 Ast_c.set_type_expr e1 tyinfo;
663
664 (match unwrap_unfold_env typ with
665 | FunctionType (ret, params) -> make_info_def ret
666
667 (* can be function pointer, C have an iso for that,
668 * same pfn() syntax than regular function call.
669 *)
670 | Pointer (typ2) ->
671 (match unwrap_unfold_env typ2 with
672 | FunctionType (ret, params) -> make_info_def ret
673 | _ -> Type_c.noTypeHere
674 )
675 | _ -> Type_c.noTypeHere
676 )
677 | None ->
678
679 (match lookup_opt_env lookup_macro s with
680 | Some ((defkind, defval), _nextenv) ->
681 (match defkind, defval with
682 | DefineFunc _, DefineExpr e ->
683 let rettype = Ast_c.get_onlytype_expr e in
684
685 (* todo: could also set type for ident ?
686 have return type and at least type of concrete
687 parameters so can generate a fake FunctionType
688 *)
689 let macrotype_opt =
690 Type_c.fake_function_type rettype args
691 in
692
693 macrotype_opt +> Common.do_option (fun t ->
694 pr2 ("Type_annotater: generate fake function type" ^
695 "for macro: " ^ s);
696 let tyinfo = make_info_def_fix t in
697 Ast_c.set_type_expr e1 tyinfo;
698 );
699
700 Ast_c.get_type_expr e
701 | DefineVar, _ ->
702 pr2 ("Type_annoter: not a macro-func: " ^ s);
703 Type_c.noTypeHere
704 | DefineFunc _, _ ->
705 (* normally the FunCall case should have catch it *)
706 pr2 ("Type_annoter: not a macro-func-expr: " ^ s);
707 Type_c.noTypeHere
708 )
709 | None ->
710 pr2_once ("type_annotater: no type for function ident: " ^ s);
711 Type_c.noTypeHere
712 )
713 )
714
715
716 | _e ->
717 k expr;
718
719 (Ast_c.get_type_expr e1) +> Type_c.do_with_type (fun typ ->
720 (* copy paste of above *)
721 (match unwrap_unfold_env typ with
722 | FunctionType (ret, params) -> make_info_def ret
723 | Pointer (typ) ->
724 (match unwrap_unfold_env typ with
725 | FunctionType (ret, params) -> make_info_def ret
726 | _ -> Type_c.noTypeHere
727 )
728 | _ -> Type_c.noTypeHere
729 )
730 )
731 )
732
733
734 (* -------------------------------------------------- *)
735 | Ident (ident) ->
736 let s = Ast_c.str_of_name ident in
737 (match lookup_opt_env lookup_var s with
738 | Some ((typ,local),_nextenv) ->
739 make_info_fix (typ,local)
740 | None ->
741 (match lookup_opt_env lookup_macro s with
742 | Some ((defkind, defval), _nextenv) ->
743 (match defkind, defval with
744 | DefineVar, DefineExpr e ->
745 Ast_c.get_type_expr e
746 | DefineVar, _ ->
747 pr2 ("Type_annoter: not a expression: " ^ s);
748 Type_c.noTypeHere
749 | DefineFunc _, _ ->
750 (* normally the FunCall case should have catch it *)
751 pr2 ("Type_annoter: not a macro-var: " ^ s);
752 Type_c.noTypeHere
753 )
754 | None ->
755 (match lookup_opt_env lookup_enum s with
756 | Some (_, _nextenv) ->
757 make_info_def (type_of_s "int")
758 | None ->
759 if not (s =~ "[A-Z_]+") (* if macro then no warning *)
760 then
761 if !Flag_parsing_c.check_annotater then
762 if not (Hashtbl.mem !_notyped_var s)
763 then begin
764 pr2 ("Type_annoter: no type found for: " ^ s);
765 Hashtbl.add !_notyped_var s true;
766 end
767 else ()
768 else
769 pr2 ("Type_annoter: no type found for: " ^ s)
770 ;
771 Type_c.noTypeHere
772 )
773 )
774 )
775
776 (* -------------------------------------------------- *)
777 (* C isomorphism on type on array and pointers *)
778 | Unary (e, DeRef)
779 | ArrayAccess (e, _) ->
780 k expr; (* recurse to set the types-ref of sub expressions *)
781
782 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
783 (* todo: maybe not good env !! *)
784 match unwrap_unfold_env t with
785 | Pointer x
786 | Array (_, x) ->
787 make_info_def_fix x
788 | _ -> Type_c.noTypeHere
789
790 )
791
792 | Unary (e, GetRef) ->
793 k expr; (* recurse to set the types-ref of sub expressions *)
794
795 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
796 (* must generate an element so that '=' can be used
797 * to compare type ?
798 *)
799 let fake = Ast_c.fakeInfo Common.fake_parse_info in
800 let fake = Ast_c.rewrap_str "*" fake in
801
802 let ft = Ast_c.mk_ty (Pointer t) [fake] in
803 make_info_def_fix ft
804 )
805
806
807 (* -------------------------------------------------- *)
808 (* fields *)
809 | RecordAccess (e, namefld)
810 | RecordPtAccess (e, namefld) as x ->
811
812 let fld = Ast_c.str_of_name namefld in
813
814 k expr; (* recurse to set the types-ref of sub expressions *)
815
816 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
817
818 let topt =
819 match x with
820 | RecordAccess _ -> Some t
821 | RecordPtAccess _ ->
822 (match unwrap_unfold_env t with
823 | Pointer (t) -> Some t
824 | _ -> None
825 )
826 | _ -> raise Impossible
827
828 in
829 (match topt with
830 | None -> Type_c.noTypeHere
831 | Some t ->
832 match unwrap_unfold_env t with
833 | StructUnion (su, sopt, fields) ->
834 (try
835 (* todo: which env ? *)
836 make_info_def_fix
837 (Type_c.type_field fld (su, fields))
838 with
839 | Not_found ->
840 pr2 (spf
841 "TYPE-ERROR: field '%s' does not belong in struct %s"
842 fld (match sopt with Some s -> s |_ -> "<anon>"));
843 Type_c.noTypeHere
844 | Multi_found ->
845 pr2 "TAC:MultiFound";
846 Type_c.noTypeHere
847 )
848 | _ -> Type_c.noTypeHere
849 )
850 )
851
852
853
854 (* -------------------------------------------------- *)
855 | Cast (t, e) ->
856 k expr;
857 (* todo: if infer, can "push" info ? add_types_expr [t] e ? *)
858 make_info_def_fix (Lib.al_type t)
859
860 (* todo? lub, hmm maybe not, cos type must be e1 *)
861 | Assignment (e1, op, e2) ->
862 k expr;
863 (* value of an assignment is the value of the RHS expression *)
864 Ast_c.get_type_expr e2
865 | Sequence (e1, e2) ->
866 k expr;
867 Ast_c.get_type_expr e2
868
869 | Binary (e1, Logical _, e2) ->
870 k expr;
871 make_info_def (type_of_s "int")
872
873 (* todo: lub *)
874 | Binary (e1, Arith op, e2) ->
875 k expr;
876 Type_c.lub op (Type_c.get_opt_type e1) (Type_c.get_opt_type e2)
877
878 | CondExpr (cond, e1opt, e2) ->
879 k expr;
880 Ast_c.get_type_expr e2
881
882
883 | ParenExpr e ->
884 k expr;
885 Ast_c.get_type_expr e
886
887 | Infix (e, op) | Postfix (e, op) ->
888 k expr;
889 Ast_c.get_type_expr e
890
891 (* pad: julia wrote this ? *)
892 | Unary (e, UnPlus) ->
893 k expr; (* recurse to set the types-ref of sub expressions *)
894 make_info_def (type_of_s "int")
895 (* todo? can convert from unsigned to signed if UnMinus ? *)
896 | Unary (e, UnMinus) ->
897 k expr; (* recurse to set the types-ref of sub expressions *)
898 make_info_def (type_of_s "int")
899
900 | SizeOfType _|SizeOfExpr _ ->
901 k expr; (* recurse to set the types-ref of sub expressions *)
902 make_info_def (type_of_s "int")
903
904 | Constructor (ft, ini) ->
905 k expr; (* recurse to set the types-ref of sub expressions *)
906 make_info_def (Lib.al_type ft)
907
908 | Unary (e, Not) ->
909 k expr; (* recurse to set the types-ref of sub expressions *)
910 (* the result of ! is always 0 or 1, not the argument type *)
911 make_info_def (type_of_s "int")
912 | Unary (e, Tilde) ->
913 k expr; (* recurse to set the types-ref of sub expressions *)
914 Ast_c.get_type_expr e
915
916 (* -------------------------------------------------- *)
917 (* todo *)
918 | Unary (_, GetRefLabel) ->
919 k expr; (* recurse to set the types-ref of sub expressions *)
920 pr2_once "Type annotater:not handling GetRefLabel";
921 Type_c.noTypeHere
922 (* todo *)
923 | StatementExpr _ ->
924 k expr; (* recurse to set the types-ref of sub expressions *)
925 pr2_once "Type annotater:not handling StatementExpr";
926 Type_c.noTypeHere
927 (*
928 | _ -> k expr; Type_c.noTypeHere
929 *)
930
931 in
932 Ast_c.set_type_expr expr ty
933
934 )
935
936
937 (*****************************************************************************)
938 (* Visitor *)
939 (*****************************************************************************)
940
941 (* Processing includes that were added after a cpp_ast_c makes the
942 * type annotater quite slow, especially when the depth of cpp_ast_c is
943 * big. But for such includes the only thing we really want is to modify
944 * the environment to have enough type information. We don't need
945 * to type the expressions inside those includes (they will be typed
946 * when we process the include file directly). Here the goal is
947 * to not recurse.
948 *
949 * Note that as usually header files contain mostly structure
950 * definitions and defines, that means we still have to do lots of work.
951 * We only win on function definition bodies, but usually header files
952 * have just prototypes, or inline function definitions which anyway have
953 * usually a small body. But still, we win. It also makes clearer
954 * that when processing include as we just need the environment, the caller
955 * of this module can do further optimisations such as memorising the
956 * state of the environment after each header files.
957 *
958 *
959 * For sparse its makes the annotating speed goes from 9s to 4s
960 * For Linux the speedup is even better, from ??? to ???.
961 *
962 * Because There would be some copy paste with annotate_program, it is
963 * better to factorize code hence the just_add_in_env parameter below.
964 *
965 * todo? alternative optimisation for the include problem:
966 * - processing all headers files one time and construct big env
967 * - use hashtbl for env (but apparently not biggest problem)
968 *)
969
970 let rec visit_toplevel ~just_add_in_env ~depth elem =
971 let need_annotate_body = not just_add_in_env in
972
973 let bigf = { Visitor_c.default_visitor_c with
974
975 (* ------------------------------------------------------------ *)
976 Visitor_c.kcppdirective = (fun (k, bigf) directive ->
977 match directive with
978 (* do error messages for type annotater only for the real body of the
979 * file, not inside include.
980 *)
981 | Include {i_content = opt} ->
982 opt +> Common.do_option (fun (filename, program) ->
983 Common.save_excursion Flag_parsing_c.verbose_type (fun () ->
984 Flag_parsing_c.verbose_type := false;
985
986 (* old: Visitor_c.vk_program bigf program;
987 * opti: set the just_add_in_env
988 *)
989 program +> List.iter (fun elem ->
990 visit_toplevel ~just_add_in_env:true ~depth:(depth+1) elem
991 )
992 )
993 )
994
995 | Define ((s,ii), (defkind, defval)) ->
996
997
998 (* even if we are in a just_add_in_env phase, such as when
999 * we process include, as opposed to the body of functions,
1000 * with macros we still to type the body of the macro as
1001 * the macro has no type and so we infer its type from its
1002 * body (and one day later maybe from its use).
1003 *)
1004 (match defval with
1005 (* can try to optimize and recurse only when the define body
1006 * is simple ?
1007 *)
1008
1009 | DefineExpr expr ->
1010 (* prevent macro-declared variables from leaking out *)
1011 do_in_new_scope (fun () ->
1012 if is_simple_expr expr
1013 (* even if not need_annotate_body, still recurse*)
1014 then k directive
1015 else
1016 if need_annotate_body
1017 then k directive)
1018 | _ ->
1019 do_in_new_scope (fun () ->
1020 if need_annotate_body
1021 then k directive)
1022 );
1023
1024 add_binding (Macro (s, (defkind, defval) )) true;
1025
1026 | Undef _
1027 | PragmaAndCo _ -> ()
1028 );
1029
1030 (* ------------------------------------------------------------ *)
1031 (* main typer code *)
1032 (* ------------------------------------------------------------ *)
1033 Visitor_c.kexpr = annotater_expr_visitor_subpart;
1034
1035 (* ------------------------------------------------------------ *)
1036 Visitor_c.kstatement = (fun (k, bigf) st ->
1037 match Ast_c.unwrap_st st with
1038 | Compound statxs -> do_in_new_scope (fun () -> k st);
1039 | _ -> k st
1040 );
1041 (* ------------------------------------------------------------ *)
1042 Visitor_c.kdecl = (fun (k, bigf) d ->
1043 (match d with
1044 | (DeclList (xs, ii)) ->
1045 xs +> List.iter (fun ({v_namei = var; v_type = t;
1046 v_storage = sto; v_local = local} as x
1047 , iicomma) ->
1048
1049 (* to add possible definition in type found in Decl *)
1050 Visitor_c.vk_type bigf t;
1051
1052
1053 let local =
1054 match local with
1055 | Ast_c.NotLocalDecl -> Ast_c.NotLocalVar
1056 | Ast_c.LocalDecl -> Ast_c.LocalVar (Ast_c.info_of_type t)
1057 in
1058 var +> Common.do_option (fun (name, iniopt) ->
1059 let s = Ast_c.str_of_name name in
1060
1061 match sto with
1062 | StoTypedef, _inline ->
1063 add_binding (TypeDef (s,Lib.al_type t)) true;
1064 | _ ->
1065 add_binding (VarOrFunc (s, (Lib.al_type t, local))) true;
1066
1067 x.v_type_bis :=
1068 Some (typedef_fix (Lib.al_type t) !_scoped_env);
1069
1070 if need_annotate_body then begin
1071 (* int x = sizeof(x) is legal so need process ini *)
1072 iniopt +> Common.do_option (fun (info, ini) ->
1073 Visitor_c.vk_ini bigf ini
1074 );
1075 end
1076 );
1077 );
1078 | MacroDecl _ ->
1079 if need_annotate_body
1080 then k d
1081 );
1082
1083 );
1084
1085 (* ------------------------------------------------------------ *)
1086 Visitor_c.ktype = (fun (k, bigf) typ ->
1087 (* bugfix: have a 'Lib.al_type typ' before, but because we can
1088 * have enum with possible expression, we don't want to change
1089 * the ref of abstract-lined types, but the real one, so
1090 * don't al_type here
1091 *)
1092 let (_q, tbis) = typ in
1093 match Ast_c.unwrap_typeC typ with
1094 | StructUnion (su, Some s, structType) ->
1095 let structType' = Lib.al_fields structType in
1096 let ii = Ast_c.get_ii_typeC_take_care tbis in
1097 let ii' = Lib.al_ii ii in
1098 add_binding (StructUnionNameDef (s, ((su, structType'),ii'))) true;
1099
1100 if need_annotate_body
1101 then k typ (* todo: restrict ? new scope so use do_in_scope ? *)
1102
1103 | Enum (sopt, enums) ->
1104
1105 enums +> List.iter (fun ((name, eopt), iicomma) ->
1106
1107 let s = Ast_c.str_of_name name in
1108
1109 if need_annotate_body
1110 then eopt +> Common.do_option (fun (ieq, e) ->
1111 Visitor_c.vk_expr bigf e
1112 );
1113 add_binding (EnumConstant (s, sopt)) true;
1114 );
1115
1116
1117 (* TODO: if have a TypeName, then maybe can fill the option
1118 * information.
1119 *)
1120 | _ ->
1121 if need_annotate_body
1122 then k typ
1123
1124 );
1125
1126 (* ------------------------------------------------------------ *)
1127 Visitor_c.ktoplevel = (fun (k, bigf) elem ->
1128 _notyped_var := Hashtbl.create 100;
1129 match elem with
1130 | Definition def ->
1131 let {f_name = name;
1132 f_type = ((returnt, (paramst, b)) as ftyp);
1133 f_storage = sto;
1134 f_body = statxs;
1135 f_old_c_style = oldstyle;
1136 },ii
1137 = def
1138 in
1139 let (i1, i2) =
1140 match ii with
1141 (* what is iifunc1? it should be a type. jll
1142 * pad: it's the '(' in the function definition. The
1143 * return type is part of f_type.
1144 *)
1145 | iifunc1::iifunc2::ibrace1::ibrace2::ifakestart::isto ->
1146 iifunc1, iifunc2
1147 | _ -> raise Impossible
1148 in
1149 let funcs = Ast_c.str_of_name name in
1150
1151 (match oldstyle with
1152 | None ->
1153 let typ' =
1154 Lib.al_type (Ast_c.mk_ty (FunctionType ftyp) [i1;i2]) in
1155
1156 add_binding (VarOrFunc (funcs, (typ',islocal i1.Ast_c.pinfo)))
1157 false;
1158
1159 if need_annotate_body then
1160 do_in_new_scope (fun () ->
1161 paramst +> List.iter (fun ({p_namei= nameopt; p_type= t},_)->
1162 match nameopt with
1163 | Some name ->
1164 let s = Ast_c.str_of_name name in
1165 let local = Ast_c.LocalVar (Ast_c.info_of_type t) in
1166 add_binding (VarOrFunc (s,(Lib.al_type t,local))) true
1167 | None ->
1168 pr2 "no type, certainly because Void type ?"
1169 );
1170 (* recurse *)
1171 k elem
1172 );
1173 | Some oldstyle ->
1174 (* generate regular function type *)
1175
1176 pr2 "TODO generate type for function";
1177 (* add bindings *)
1178 if need_annotate_body then
1179 do_in_new_scope (fun () ->
1180 (* recurse. should naturally call the kdecl visitor and
1181 * add binding
1182 *)
1183 k elem;
1184 );
1185
1186 );
1187 | CppTop x ->
1188 (match x with
1189 | Define ((s,ii), (DefineVar, DefineType t)) ->
1190 add_binding (TypeDef (s,Lib.al_type t)) true;
1191 | _ -> k elem
1192 )
1193
1194 | Declaration _
1195
1196
1197
1198 | IfdefTop _
1199 | MacroTop _
1200 | EmptyDef _
1201 | NotParsedCorrectly _
1202 | FinalDef _
1203 ->
1204 k elem
1205 );
1206 }
1207 in
1208 if just_add_in_env
1209 then
1210 if depth > 1
1211 then Visitor_c.vk_toplevel bigf elem
1212 else
1213 Common.profile_code "TAC.annotate_only_included" (fun () ->
1214 Visitor_c.vk_toplevel bigf elem
1215 )
1216 else Visitor_c.vk_toplevel bigf elem
1217
1218 (*****************************************************************************)
1219 (* Entry point *)
1220 (*****************************************************************************)
1221 (* catch all the decl to grow the environment *)
1222
1223
1224 let rec (annotate_program2 :
1225 environment -> toplevel list -> (toplevel * environment Common.pair) list) =
1226 fun env prog ->
1227
1228 (* globals (re)initialialisation *)
1229 _scoped_env := env;
1230 _notyped_var := (Hashtbl.create 100);
1231
1232 prog +> List.map (fun elem ->
1233 let beforeenv = !_scoped_env in
1234 visit_toplevel ~just_add_in_env:false ~depth:0 elem;
1235 let afterenv = !_scoped_env in
1236 (elem, (beforeenv, afterenv))
1237 )
1238
1239
1240
1241
1242 (*****************************************************************************)
1243 (* Annotate test *)
1244 (*****************************************************************************)
1245
1246 (* julia: for coccinelle *)
1247 let annotate_test_expressions prog =
1248 let rec propagate_test e =
1249 let ((e_term,info),_) = e in
1250 let (ty,_) = !info in
1251 info := (ty,Test);
1252 match e_term with
1253 Binary(e1,Logical(AndLog),e2)
1254 | Binary(e1,Logical(OrLog),e2) -> propagate_test e1; propagate_test e2
1255 | Unary(e1,Not) -> propagate_test e1
1256 | ParenExpr(e) -> propagate_test e
1257 | _ -> () in
1258
1259 let bigf = { Visitor_c.default_visitor_c with
1260 Visitor_c.kexpr = (fun (k,bigf) expr ->
1261 (match unwrap_expr expr with
1262 CondExpr(e,_,_) -> propagate_test e
1263 | Binary(e1,Logical(AndLog),e2)
1264 | Binary(e1,Logical(OrLog),e2) -> propagate_test e1; propagate_test e2
1265 | Unary(e1,Not) -> propagate_test e1
1266 | _ -> ()
1267 );
1268 k expr
1269 );
1270 Visitor_c.kstatement = (fun (k, bigf) st ->
1271 match unwrap_st st with
1272 Selection(s) ->
1273 (match s with If(e1,s1,s2) -> propagate_test e1 | _ -> ());
1274 k st;
1275 | Iteration(i) ->
1276 (match i with
1277 While(e,s) -> propagate_test e
1278 | DoWhile(s,e) -> propagate_test e
1279 | For(_,es,_,_) ->
1280 (match unwrap es with Some e -> propagate_test e | None -> ())
1281 | _ -> ());
1282 k st
1283 | _ -> k st
1284 )
1285 } in
1286 (prog +> List.iter (fun elem ->
1287 Visitor_c.vk_toplevel bigf elem
1288 ))
1289
1290
1291
1292 (*****************************************************************************)
1293 (* Annotate types *)
1294 (*****************************************************************************)
1295 let annotate_program env prog =
1296 Common.profile_code "TAC.annotate_program"
1297 (fun () ->
1298 let res = annotate_program2 env prog in
1299 annotate_test_expressions prog;
1300 res
1301 )
1302
1303 let annotate_type_and_localvar env prog =
1304 Common.profile_code "TAC.annotate_type"
1305 (fun () -> annotate_program2 env prog)
1306
1307
1308 (*****************************************************************************)
1309 (* changing default typing environment, do concatenation *)
1310 let init_env filename =
1311 pr2 ("init_env: " ^ filename);
1312 let (ast2, _stat) = Parse_c.parse_c_and_cpp filename in
1313 let ast = Parse_c.program_of_program2 ast2 in
1314
1315 let res = annotate_type_and_localvar !initial_env ast in
1316 match List.rev res with
1317 | [] -> pr2 "empty environment"
1318 | (_top,(env1,env2))::xs ->
1319 initial_env := !initial_env ++ env2;
1320 ()
1321