Coccinelle release 0.2.5-rc7.
[bpt/coccinelle.git] / parsing_c / type_annoter_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes,
5 * Copyright (C) 2009 University of Urbana Champaign
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License (GPL)
9 * version 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * file license.txt for more details.
15 *)
16
17 open Common
18
19 open Ast_c
20
21 module Lib = Lib_parsing_c
22
23 (*****************************************************************************)
24 (* Prelude *)
25 (*****************************************************************************)
26 (* History:
27 * - Done a first type checker in 2002, cf typing-semantic/, but
28 * was assuming that have all type info, and so was assuming had called
29 * cpp and everything was right.
30 * - Wrote this file, in 2006?, as we added pattern matching on type
31 * in coccinelle. Partial type annotater.
32 * - Julia extended it in 2008? to have localvar/notlocalvar and
33 * test/notest information, again used by coccinelle.
34 * - I extended it in Fall 2008 to have more type information for the
35 * global analysis. I also added some optimisations to process
36 * included code faster.
37 *
38 *
39 * Design choices. Can either do:
40 * - a kind of inferer
41 * - can first do a simple inferer, that just pass context
42 * - then a real inferer, managing partial info.
43 * type context = fullType option
44 *
45 * - extract the information from the .h files
46 * (so no inference at all needed)
47 *
48 * Difference with julia's code in parsing_cocci/type_infer.ml:
49 * - She handles just the variable namespace. She does not type
50 * field access or enum or macros. This is because cocci programs are
51 * usually simple and have no structure definition or macro definitions
52 * that we need to type anyway.
53 * - She does more propagation.
54 * - She does not have to handle the typedef isomorphism which force me
55 * to use those typedef_fix and type_unfold_one_step
56 * - She does not handle I think the function pointer C isomorphism.
57 *
58 * - She has a cleaner type_cocci without any info. In my case
59 * I need to do those ugly al_type, or generate fake infos.
60 * - She has more compact code. Perhaps because she does not have to
61 * handle the extra exp_info that she added on me :) So I need those
62 * do_with_type, make_info_xxx, etc.
63 *
64 * Note: if need to debug this annotater, use -show_trace_profile, it can
65 * help. You can also set the typedef_debug flag below.
66 *
67 *
68 *
69 * todo: expression contain types, and statements, which in turn can contain
70 * expression, so need recurse. Need define an annote_statement and
71 * annotate_type.
72 *
73 * todo: how deal with typedef isomorphisms ? How store them in Ast_c ?
74 * store all posible variations in ast_c ? a list of type instead of just
75 * the type ?
76 *
77 * todo: how to handle multiple possible definitions for entities like
78 * struct or typedefs ? Because of ifdef, we should store list of
79 * possibilities sometimes.
80 *
81 * todo: define a new type ? like type_cocci ? where have a bool ?
82 *
83 * semi: How handle scope ? When search for type of field, we return
84 * a type, but this type makes sense only in a certain scope.
85 * We could add a tag to each typedef, structUnionName to differentiate
86 * them and also associate in ast_c to the type the scope
87 * of this type, the env that were used to define this type.
88 *
89 * todo: handle better the search in previous env, the env'. Cf the
90 * termination problem in typedef_fix when I was searching in the same
91 * env.
92 *
93 *)
94
95 (*****************************************************************************)
96 (* Wrappers *)
97 (*****************************************************************************)
98 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_type
99
100 (*****************************************************************************)
101 (* Environment *)
102 (*****************************************************************************)
103
104 (* The different namespaces from stdC manual:
105 *
106 * You introduce two new name spaces with every block that you write.
107 *
108 * One name space includes all
109 * - functions,
110 * - objects,
111 * - type definitions,
112 * - and enumeration constants
113 * that you declare or define within the block.
114 *
115 * The other name space includes all
116 * - enumeration,
117 * - structure,
118 * - and union
119 * *tags* that you define within the block.
120 *
121 * You introduce a new member name space with every structure or union
122 * whose content you define. You identify a member name space by the
123 * type of left operand that you write for a member selection
124 * operator, as in x.y or p->y. A member name space ends with the end
125 * of the block in which you declare it.
126 *
127 * You introduce a new goto label name space with every function
128 * definition you write. Each goto label name space ends with its
129 * function definition.
130 *)
131
132 (* But I don't try to do a type-checker, I try to "resolve" type of var
133 * so don't need make difference between namespaces here.
134 *
135 * But, why not make simply a (string, kindstring) assoc ?
136 * Because we dont want that a variable shadow a struct definition, because
137 * they are still in 2 different namespace. But could for typedef,
138 * because VarOrFunc and Typedef are in the same namespace.
139 * But could do a record as in c_info.ml
140 *)
141
142
143 (* This type contains all "ident" like notion of C. Each time in Ast_c
144 * you have a string type (as in expression, function name, fields)
145 * then you need to manage the scope of this ident.
146 *
147 * The wrap for StructUnionNameDef contain the whole ii, the i for
148 * the string, the structUnion and the structType.
149 *
150 * Put Macro here ? after all the scoping rules for cpp macros is different
151 * and so does not vanish after the closing '}'.
152 *
153 * todo: EnumDef
154 *)
155 type namedef =
156 | VarOrFunc of string * Ast_c.exp_type
157 | EnumConstant of string * string option
158
159 (* also used for macro type aliases *)
160 | TypeDef of string * fullType
161 (* the structType contains nested "idents" with struct scope *)
162 | StructUnionNameDef of string * (structUnion * structType) wrap
163
164 (* cppext: *)
165 | Macro of string * (define_kind * define_val)
166
167
168 (* Because have nested scope, have nested list, hence the list list.
169 *
170 * opti? use a hash to accelerate ? hmm but may have some problems
171 * with hash to handle recursive lookup. For instance for the typedef
172 * example where have mutually recursive definition of the type,
173 * we must take care to not loop by starting the second search
174 * from the previous environment. With the list scheme in
175 * lookup_env below it's quite easy to do. With hash it may be
176 * more complicated.
177 *)
178 type environment = namedef list list
179
180
181 (* ------------------------------------------------------------ *)
182 (* can be modified by the init_env function below, by
183 * the file environment_unix.h
184 *)
185 let initial_env = ref [
186 [VarOrFunc("NULL",
187 (Lib.al_type (Parse_c.type_of_string "void *"),
188 Ast_c.NotLocalVar));
189
190 (*
191 VarOrFunc("malloc",
192 (Lib.al_type(Parse_c.type_of_string "void* (*)(int size)"),
193 Ast_c.NotLocalVar));
194 VarOrFunc("free",
195 (Lib.al_type(Parse_c.type_of_string "void (*)(void *ptr)"),
196 Ast_c.NotLocalVar));
197 *)
198 ]
199 ]
200
201
202 let typedef_debug = ref false
203
204
205 (* ------------------------------------------------------------ *)
206 (* generic, lookup and also return remaining env for further lookup *)
207 let rec lookup_env2 f env =
208 match env with
209 | [] -> raise Not_found
210 | []::zs -> lookup_env2 f zs
211 | (x::xs)::zs ->
212 (match f x with
213 | None -> lookup_env2 f (xs::zs)
214 | Some y -> y, xs::zs
215 )
216 let lookup_env a b =
217 Common.profile_code "TAC.lookup_env" (fun () -> lookup_env2 a b)
218
219
220
221 let member_env lookupf env =
222 try
223 let _ = lookupf env in
224 true
225 with Not_found -> false
226
227
228
229
230 (* ------------------------------------------------------------ *)
231
232
233 let lookup_var s env =
234 let f = function
235 | VarOrFunc (s2, typ) -> if s2 =$= s then Some typ else None
236 | _ -> None
237 in
238 lookup_env f env
239
240 let lookup_typedef s env =
241 if !typedef_debug then pr2 ("looking for: " ^ s);
242 let f = function
243 | TypeDef (s2, typ) -> if s2 =$= s then Some typ else None
244 | _ -> None
245 in
246 lookup_env f env
247
248 let lookup_structunion (_su, s) env =
249 let f = function
250 | StructUnionNameDef (s2, typ) -> if s2 =$= s then Some typ else None
251 | _ -> None
252 in
253 lookup_env f env
254
255 let lookup_macro s env =
256 let f = function
257 | Macro (s2, typ) -> if s2 =$= s then Some typ else None
258 | _ -> None
259 in
260 lookup_env f env
261
262 let lookup_enum s env =
263 let f = function
264 | EnumConstant (s2, typ) -> if s2 =$= s then Some typ else None
265 | _ -> None
266 in
267 lookup_env f env
268
269
270 let lookup_typedef a b =
271 Common.profile_code "TAC.lookup_typedef" (fun () -> lookup_typedef a b)
272
273
274
275 (*****************************************************************************)
276 (* "type-lookup" *)
277 (*****************************************************************************)
278
279 (* find_final_type is used to know to what type a field correspond in
280 * x.foo. Sometimes the type of x is a typedef or a structName in which
281 * case we must look in environment to find the complete type, here
282 * structUnion that contains the information.
283 *
284 * Because in C one can redefine in nested blocks some typedefs,
285 * struct, or variables, we have a static scoping resolving process.
286 * So, when we look for the type of a var, if this var is in an
287 * enclosing block, then maybe its type refer to a typdef of this
288 * enclosing block, so must restart the "type-resolving" of this
289 * typedef from this enclosing block, not from the bottom. So our
290 * "resolving-type functions" take an env and also return an env from
291 * where the next search must be performed. *)
292
293 (*
294 let rec find_final_type ty env =
295
296 match Ast_c.unwrap_typeC ty with
297 | BaseType x -> (BaseType x) +> Ast_c.rewrap_typeC ty
298
299 | Pointer t -> (Pointer (find_final_type t env)) +> Ast_c.rewrap_typeC ty
300 | Array (e, t) -> Array (e, find_final_type t env) +> Ast_c.rewrap_typeC ty
301
302 | StructUnion (sopt, su) -> StructUnion (sopt, su) +> Ast_c.rewrap_typeC ty
303
304 | FunctionType t -> (FunctionType t) (* todo ? *) +> Ast_c.rewrap_typeC ty
305 | Enum (s, enumt) -> (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty
306 | EnumName s -> (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty
307
308 | StructUnionName (su, s) ->
309 (try
310 let ((structtyp,ii), env') = lookup_structunion (su, s) env in
311 Ast_c.nQ, (StructUnion (Some s, structtyp), ii)
312 (* old: +> Ast_c.rewrap_typeC ty
313 * but must wrap with good ii, otherwise pretty_print_c
314 * will be lost and raise some Impossible
315 *)
316 with Not_found ->
317 ty
318 )
319
320 | TypeName s ->
321 (try
322 let (t', env') = lookup_typedef s env in
323 find_final_type t' env'
324 with Not_found ->
325 ty
326 )
327
328 | ParenType t -> find_final_type t env
329 | Typeof e -> failwith "typeof"
330 *)
331
332
333
334
335 (* ------------------------------------------------------------ *)
336 let rec type_unfold_one_step ty env =
337
338 match Ast_c.unwrap_typeC ty with
339 | NoType -> ty
340 | BaseType x -> ty
341 | Pointer t -> ty
342 | Array (e, t) -> ty
343
344 | StructUnion (sopt, su, fields) -> ty
345
346 | FunctionType t -> ty
347 | Enum (s, enumt) -> ty
348
349 | EnumName s -> ty (* todo: look in env when will have EnumDef *)
350
351 | StructUnionName (su, s) ->
352 (try
353 let (((su,fields),ii), env') = lookup_structunion (su, s) env in
354 Ast_c.mk_ty (StructUnion (su, Some s, fields)) ii
355 (* old: +> Ast_c.rewrap_typeC ty
356 * but must wrap with good ii, otherwise pretty_print_c
357 * will be lost and raise some Impossible
358 *)
359 with Not_found ->
360 ty
361 )
362
363 | TypeName (name, _typ) ->
364 let s = Ast_c.str_of_name name in
365 (try
366 if !typedef_debug then pr2 "type_unfold_one_step: lookup_typedef";
367 let (t', env') = lookup_typedef s env in
368 type_unfold_one_step t' env'
369 with Not_found ->
370 ty
371 )
372
373 | ParenType t -> type_unfold_one_step t env
374 | TypeOfExpr e ->
375 pr2_once ("Type_annoter: not handling typeof");
376 ty
377 | TypeOfType t -> type_unfold_one_step t env
378
379
380
381
382
383
384
385
386
387 (* normalizer. can be seen as the opposite of the previous function as
388 * we "fold" at least for the structUnion. Should return something that
389 * Type_c.is_completed_fullType likes, something that makes it easier
390 * for the programmer to work on, that has all the needed information
391 * for most tasks.
392 *)
393 let rec typedef_fix ty env =
394 match Ast_c.unwrap_typeC ty with
395 | NoType ->
396 ty
397 | BaseType x ->
398 ty
399 | Pointer t ->
400 Pointer (typedef_fix t env) +> Ast_c.rewrap_typeC ty
401 | Array (e, t) ->
402 Array (e, typedef_fix t env) +> Ast_c.rewrap_typeC ty
403 | StructUnion (su, sopt, fields) ->
404 (* normalize, fold.
405 * todo? but what if correspond to a nested struct def ?
406 *)
407 Type_c.structdef_to_struct_name ty
408 | FunctionType ft ->
409 (FunctionType ft) (* todo ? *) +> Ast_c.rewrap_typeC ty
410 | Enum (s, enumt) ->
411 (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty
412 | EnumName s ->
413 (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty
414
415 (* we prefer StructUnionName to StructUnion when it comes to typed metavar *)
416 | StructUnionName (su, s) -> ty
417
418 (* keep the typename but complete with more information *)
419 | TypeName (name, typ) ->
420 let s = Ast_c.str_of_name name in
421 (match typ with
422 | Some _ ->
423 pr2 ("typedef value already there:" ^ s);
424 ty
425 | None ->
426 (try
427 if !typedef_debug then pr2 "typedef_fix: lookup_typedef";
428 let (t', env') = lookup_typedef s env in
429
430 (* bugfix: termination bug if use env instead of env' below, because
431 * can have some weird mutually recursive typedef which
432 * each new type alias search for its mutual def.
433 *)
434 TypeName (name, Some (typedef_fix t' env')) +> Ast_c.rewrap_typeC ty
435 with Not_found ->
436 ty
437 ))
438
439 (* remove paren for better matching with typed metavar. kind of iso again *)
440 | ParenType t ->
441 typedef_fix t env
442 | TypeOfExpr e ->
443 pr2_once ("Type_annoter: not handling typeof");
444 ty
445
446 | TypeOfType t ->
447 typedef_fix t env
448
449
450 (*****************************************************************************)
451 (* Helpers, part 1 *)
452 (*****************************************************************************)
453
454 let type_of_s2 s =
455 (Lib.al_type (Parse_c.type_of_string s))
456 let type_of_s a =
457 Common.profile_code "Type_c.type_of_s" (fun () -> type_of_s2 a)
458
459
460 (* pad: pb on:
461 * /home/pad/software-os-src2/freebsd/contrib/ipfilter/netinet/ip_fil_freebsd.c
462 * because in the code there is:
463 * static iss_seq_off = 0;
464 * which in the parser was generating a default int without a parse_info.
465 * I now add a fake parse_info for such default int so no more failwith
466 * normally.
467 *)
468
469 let rec is_simple_expr expr =
470 match Ast_c.unwrap_expr expr with
471 (* todo? handle more special cases ? *)
472
473 | Ident _ ->
474 true
475 | Constant (_) ->
476 true
477 | Unary (op, e) ->
478 true
479 | Binary (e1, op, e2) ->
480 true
481 | Cast (t, e) ->
482 true
483 | ParenExpr (e) -> is_simple_expr e
484
485 | _ -> false
486
487 (*****************************************************************************)
488 (* Typing rules *)
489 (*****************************************************************************)
490 (* now in type_c.ml *)
491
492
493
494 (*****************************************************************************)
495 (* (Semi) Globals, Julia's style *)
496 (*****************************************************************************)
497
498 (* opti: cache ? use hash ? *)
499 let _scoped_env = ref !initial_env
500
501 (* memoise unnanoted var, to avoid too much warning messages *)
502 let _notyped_var = ref (Hashtbl.create 100)
503
504 let new_scope() = _scoped_env := []::!_scoped_env
505 let del_scope() = _scoped_env := List.tl !_scoped_env
506
507 let do_in_new_scope f =
508 begin
509 new_scope();
510 let res = f() in
511 del_scope();
512 res
513 end
514
515 let add_in_scope namedef =
516 let (current, older) = Common.uncons !_scoped_env in
517 _scoped_env := (namedef::current)::older
518
519
520 (* ------------------------------------------------------------ *)
521
522 (* sort of hackish... *)
523 let islocal info =
524 if List.length (!_scoped_env) =|= List.length !initial_env
525 then Ast_c.NotLocalVar
526 else Ast_c.LocalVar info
527
528 (* ------------------------------------------------------------ *)
529 (* the warning argument is here to allow some binding to overwrite an
530 * existing one. With function, we first have the prototype and then the def,
531 * and the def binding with the same string is not an error.
532 *
533 * todo?: but if we define two times the same function, then we will not
534 * detect it :( it would require to make a diff between adding a binding
535 * from a prototype and from a definition.
536 *
537 * opti: disabling the check_annotater flag have some important
538 * performance benefit.
539 *
540 *)
541 let add_binding2 namedef warning =
542 let (current_scope, _older_scope) = Common.uncons !_scoped_env in
543
544 if !Flag_parsing_c.check_annotater then begin
545 (match namedef with
546 | VarOrFunc (s, typ) ->
547 if Hashtbl.mem !_notyped_var s
548 then pr2 ("warning: found typing information for a variable that was" ^
549 "previously unknown:" ^ s);
550 | _ -> ()
551 );
552
553 let (memberf, s) =
554 (match namedef with
555 | VarOrFunc (s, typ) ->
556 member_env (lookup_var s), s
557 | TypeDef (s, typ) ->
558 member_env (lookup_typedef s), s
559 | StructUnionNameDef (s, (su, typ)) ->
560 member_env (lookup_structunion (su, s)), s
561 | Macro (s, body) ->
562 member_env (lookup_macro s), s
563 | EnumConstant (s, body) ->
564 member_env (lookup_enum s), s
565 ) in
566
567 if memberf [current_scope] && warning
568 then pr2 ("Type_annoter: warning, " ^ s ^
569 " is already in current binding" ^ "\n" ^
570 " so there is a weird shadowing");
571 end;
572 add_in_scope namedef
573
574 let add_binding namedef warning =
575 Common.profile_code "TAC.add_binding" (fun () -> add_binding2 namedef warning)
576
577
578
579 (*****************************************************************************)
580 (* Helpers, part 2 *)
581 (*****************************************************************************)
582
583 let lookup_opt_env lookupf s =
584 Common.optionise (fun () ->
585 lookupf s !_scoped_env
586 )
587
588 let unwrap_unfold_env2 typ =
589 Ast_c.unwrap_typeC
590 (type_unfold_one_step typ !_scoped_env)
591 let unwrap_unfold_env typ =
592 Common.profile_code "TAC.unwrap_unfold_env" (fun () -> unwrap_unfold_env2 typ)
593
594 let typedef_fix a b =
595 Common.profile_code "TAC.typedef_fix" (fun () -> typedef_fix a b)
596
597 let make_info_def_fix x =
598 Type_c.make_info_def (typedef_fix x !_scoped_env)
599
600 let make_info_fix (typ, local) =
601 Type_c.make_info ((typedef_fix typ !_scoped_env),local)
602
603
604 let make_info_def = Type_c.make_info_def
605
606 (*****************************************************************************)
607 (* Main typer code, put later in a visitor *)
608 (*****************************************************************************)
609
610 let annotater_expr_visitor_subpart = (fun (k,bigf) expr ->
611
612 let ty =
613 match Ast_c.unwrap_expr expr with
614
615 (* -------------------------------------------------- *)
616 (* todo: should analyse the 's' for int to know if unsigned or not *)
617 | Constant (String (s,kind)) -> make_info_def (type_of_s "char []")
618 | Constant MultiString _ -> make_info_def (type_of_s "char []")
619 | Constant (Char (s,kind)) -> make_info_def (type_of_s "char")
620 | Constant (Int (s,kind)) ->
621 (* this seems really unpleasant, but perhaps the type needs to be set
622 up in some way that allows pretty printing *)
623 make_info_def
624 (match kind with
625 (* matches limited by what is generated in lexer_c.mll *)
626 Si(Signed,CInt) -> type_of_s "int"
627 | Si(UnSigned,CInt) -> type_of_s "unsigned int"
628 | Si(Signed,CLong) -> type_of_s "long"
629 | Si(UnSigned,CLong) -> type_of_s "unsigned long"
630 | Si(Signed,CLongLong) -> type_of_s "long long"
631 | Si(UnSigned,CLongLong) -> type_of_s "unsigned long long"
632 | _ -> failwith "unexpected kind for constant")
633 | Constant (Float (s,kind)) ->
634 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
635 let fake = Ast_c.rewrap_str "float" fake in
636 let iinull = [fake] in
637 make_info_def (Ast_c.mk_ty (BaseType (FloatType kind)) iinull)
638
639
640 (* -------------------------------------------------- *)
641 (* note: could factorize this code with the code for Ident
642 * and the other code for Funcall below. But as the Ident can be
643 * a macro-func, I prefer to handle it separately. So
644 * this rule can handle the macro-func, the Ident-rule can handle
645 * the macro-var, and the other FunCall-rule the regular
646 * function calls through fields.
647 * Also as I don't want a warning on the Ident that are a FunCall,
648 * easier to have a rule separate from the Ident rule.
649 *)
650 | FunCall (e1, args) ->
651 (match Ast_c.unwrap_expr e1 with
652 | Ident (ident) ->
653 (* recurse *)
654 args +> List.iter (fun (e,ii) ->
655 (* could typecheck if arguments agree with prototype *)
656 Visitor_c.vk_argument bigf e
657 );
658 let s = Ast_c.str_of_name ident in
659 (match lookup_opt_env lookup_var s with
660 | Some ((typ,local),_nextenv) ->
661
662 (* set type for ident *)
663 let tyinfo = make_info_fix (typ, local) in
664 Ast_c.set_type_expr e1 tyinfo;
665
666 (match unwrap_unfold_env typ with
667 | FunctionType (ret, params) -> make_info_def ret
668
669 (* can be function pointer, C have an iso for that,
670 * same pfn() syntax than regular function call.
671 *)
672 | Pointer (typ2) ->
673 (match unwrap_unfold_env typ2 with
674 | FunctionType (ret, params) -> make_info_def ret
675 | _ -> Type_c.noTypeHere
676 )
677 | _ -> Type_c.noTypeHere
678 )
679 | None ->
680
681 (match lookup_opt_env lookup_macro s with
682 | Some ((defkind, defval), _nextenv) ->
683 (match defkind, defval with
684 | DefineFunc _, DefineExpr e ->
685 let rettype = Ast_c.get_onlytype_expr e in
686
687 (* todo: could also set type for ident ?
688 have return type and at least type of concrete
689 parameters so can generate a fake FunctionType
690 *)
691 let macrotype_opt =
692 Type_c.fake_function_type rettype args
693 in
694
695 macrotype_opt +> Common.do_option (fun t ->
696 pr2 ("Type_annotater: generate fake function type" ^
697 "for macro: " ^ s);
698 let tyinfo = make_info_def_fix t in
699 Ast_c.set_type_expr e1 tyinfo;
700 );
701
702 Ast_c.get_type_expr e
703 | DefineVar, _ ->
704 pr2 ("Type_annoter: not a macro-func: " ^ s);
705 Type_c.noTypeHere
706 | Undef, _ ->
707 pr2 ("Type_annoter: not a macro-func: " ^ s);
708 Type_c.noTypeHere
709 | DefineFunc _, _ ->
710 (* normally the FunCall case should have caught it *)
711 pr2 ("Type_annoter: not a macro-func-expr: " ^ s);
712 Type_c.noTypeHere
713 )
714 | None ->
715 pr2_once ("type_annotater: no type for function ident: " ^ s);
716 Type_c.noTypeHere
717 )
718 )
719
720
721 | _e ->
722 k expr;
723
724 (Ast_c.get_type_expr e1) +> Type_c.do_with_type (fun typ ->
725 (* copy paste of above *)
726 (match unwrap_unfold_env typ with
727 | FunctionType (ret, params) -> make_info_def ret
728 | Pointer (typ) ->
729 (match unwrap_unfold_env typ with
730 | FunctionType (ret, params) -> make_info_def ret
731 | _ -> Type_c.noTypeHere
732 )
733 | _ -> Type_c.noTypeHere
734 )
735 )
736 )
737
738
739 (* -------------------------------------------------- *)
740 | Ident (ident) ->
741 let s = Ast_c.str_of_name ident in
742 (match lookup_opt_env lookup_var s with
743 | Some ((typ,local),_nextenv) ->
744 make_info_fix (typ,local)
745 | None ->
746 (match lookup_opt_env lookup_macro s with
747 | Some ((defkind, defval), _nextenv) ->
748 (match defkind, defval with
749 | DefineVar, DefineExpr e ->
750 Ast_c.get_type_expr e
751 | DefineVar, _ ->
752 pr2 ("Type_annoter: not a expression: " ^ s);
753 Type_c.noTypeHere
754 | DefineFunc _, _ ->
755 (* normally the FunCall case should have catch it *)
756 pr2 ("Type_annoter: not a macro-var: " ^ s);
757 Type_c.noTypeHere
758 | Undef, _ ->
759 pr2 ("Type_annoter: not a expression: " ^ s);
760 Type_c.noTypeHere
761 )
762 | None ->
763 (match lookup_opt_env lookup_enum s with
764 | Some (_, _nextenv) ->
765 make_info_def (type_of_s "int")
766 | None ->
767 if not (s =~ "[A-Z_]+") (* if macro then no warning *)
768 then
769 if !Flag_parsing_c.check_annotater then
770 if not (Hashtbl.mem !_notyped_var s)
771 then begin
772 pr2 ("Type_annoter: no type found for: " ^ s);
773 Hashtbl.add !_notyped_var s true;
774 end
775 else ()
776 else
777 pr2 ("Type_annoter: no type found for: " ^ s)
778 ;
779 Type_c.noTypeHere
780 )
781 )
782 )
783
784 (* -------------------------------------------------- *)
785 (* C isomorphism on type on array and pointers *)
786 | Unary (e, DeRef)
787 | ArrayAccess (e, _) ->
788 k expr; (* recurse to set the types-ref of sub expressions *)
789
790 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
791 (* todo: maybe not good env !! *)
792 match unwrap_unfold_env t with
793 | Pointer x
794 | Array (_, x) ->
795 make_info_def_fix x
796 | _ -> Type_c.noTypeHere
797
798 )
799
800 | Unary (e, GetRef) ->
801 k expr; (* recurse to set the types-ref of sub expressions *)
802
803 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
804 (* must generate an element so that '=' can be used
805 * to compare type ?
806 *)
807 let fake = Ast_c.fakeInfo Common.fake_parse_info in
808 let fake = Ast_c.rewrap_str "*" fake in
809
810 let ft = Ast_c.mk_ty (Pointer t) [fake] in
811 make_info_def_fix ft
812 )
813
814
815 (* -------------------------------------------------- *)
816 (* fields *)
817 | RecordAccess (e, namefld)
818 | RecordPtAccess (e, namefld) as x ->
819
820 let fld = Ast_c.str_of_name namefld in
821
822 k expr; (* recurse to set the types-ref of sub expressions *)
823
824 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
825
826 let topt =
827 match x with
828 | RecordAccess _ -> Some t
829 | RecordPtAccess _ ->
830 (match unwrap_unfold_env t with
831 | Pointer (t) -> Some t
832 | _ -> None
833 )
834 | _ -> raise Impossible
835
836 in
837 (match topt with
838 | None -> Type_c.noTypeHere
839 | Some t ->
840 match unwrap_unfold_env t with
841 | StructUnion (su, sopt, fields) ->
842 (try
843 (* todo: which env ? *)
844 make_info_def_fix
845 (Type_c.type_field fld (su, fields))
846 with
847 | Not_found ->
848 pr2 (spf
849 "TYPE-ERROR: field '%s' does not belong in struct %s"
850 fld (match sopt with Some s -> s |_ -> "<anon>"));
851 Type_c.noTypeHere
852 | Multi_found ->
853 pr2 "TAC:MultiFound";
854 Type_c.noTypeHere
855 )
856 | _ -> Type_c.noTypeHere
857 )
858 )
859
860
861
862 (* -------------------------------------------------- *)
863 | Cast (t, e) ->
864 k expr;
865 (* todo: if infer, can "push" info ? add_types_expr [t] e ? *)
866 make_info_def_fix (Lib.al_type t)
867
868 (* todo? lub, hmm maybe not, cos type must be e1 *)
869 | Assignment (e1, op, e2) ->
870 k expr;
871 (* value of an assignment is the value of the RHS expression, but its
872 type is the type of the lhs expression. Use the rhs exp if no
873 information is available *)
874 (match Ast_c.get_type_expr e1 with
875 (None,_) -> Ast_c.get_type_expr e2
876 | (Some ty,t) -> (Some ty,t))
877 | Sequence (e1, e2) ->
878 k expr;
879 Ast_c.get_type_expr e2
880
881 | Binary (e1, Logical _, e2) ->
882 k expr;
883 make_info_def (type_of_s "int")
884
885 (* todo: lub *)
886 | Binary (e1, Arith op, e2) ->
887 k expr;
888 Type_c.lub op (Type_c.get_opt_type e1) (Type_c.get_opt_type e2)
889
890 | CondExpr (cond, e1opt, e2) ->
891 k expr;
892 Ast_c.get_type_expr e2
893
894
895 | ParenExpr e ->
896 k expr;
897 Ast_c.get_type_expr e
898
899 | Infix (e, op) | Postfix (e, op) ->
900 k expr;
901 Ast_c.get_type_expr e
902
903 (* pad: julia wrote this ? *)
904 | Unary (e, UnPlus) ->
905 k expr; (* recurse to set the types-ref of sub expressions *)
906 make_info_def (type_of_s "int")
907 (* todo? can convert from unsigned to signed if UnMinus ? *)
908 | Unary (e, UnMinus) ->
909 k expr; (* recurse to set the types-ref of sub expressions *)
910 make_info_def (type_of_s "int")
911
912 | SizeOfType _|SizeOfExpr _ ->
913 k expr; (* recurse to set the types-ref of sub expressions *)
914 make_info_def (type_of_s "size_t")
915
916 | Constructor (ft, ini) ->
917 k expr; (* recurse to set the types-ref of sub expressions *)
918 make_info_def (Lib.al_type ft)
919
920 | Unary (e, Not) ->
921 k expr; (* recurse to set the types-ref of sub expressions *)
922 (* the result of ! is always 0 or 1, not the argument type *)
923 make_info_def (type_of_s "int")
924 | Unary (e, Tilde) ->
925 k expr; (* recurse to set the types-ref of sub expressions *)
926 Ast_c.get_type_expr e
927
928 (* -------------------------------------------------- *)
929 (* todo *)
930 | Unary (_, GetRefLabel) ->
931 k expr; (* recurse to set the types-ref of sub expressions *)
932 pr2_once "Type annotater:not handling GetRefLabel";
933 Type_c.noTypeHere
934 (* todo *)
935 | StatementExpr _ ->
936 k expr; (* recurse to set the types-ref of sub expressions *)
937 pr2_once "Type annotater:not handling StatementExpr";
938 Type_c.noTypeHere
939 (*
940 | _ -> k expr; Type_c.noTypeHere
941 *)
942
943 | New ty ->
944 k expr;
945 pr2_once "Type annotater:not handling New";
946 Type_c.noTypeHere (* TODO *)
947
948 | Delete e ->
949 k expr;
950 pr2_once "Type annotater:not handling Delete";
951 Type_c.noTypeHere (* TODO *)
952
953 in
954 Ast_c.set_type_expr expr ty
955
956 )
957
958
959 (*****************************************************************************)
960 (* Visitor *)
961 (*****************************************************************************)
962
963 (* Processing includes that were added after a cpp_ast_c makes the
964 * type annotater quite slow, especially when the depth of cpp_ast_c is
965 * big. But for such includes the only thing we really want is to modify
966 * the environment to have enough type information. We don't need
967 * to type the expressions inside those includes (they will be typed
968 * when we process the include file directly). Here the goal is
969 * to not recurse.
970 *
971 * Note that as usually header files contain mostly structure
972 * definitions and defines, that means we still have to do lots of work.
973 * We only win on function definition bodies, but usually header files
974 * have just prototypes, or inline function definitions which anyway have
975 * usually a small body. But still, we win. It also makes clearer
976 * that when processing include as we just need the environment, the caller
977 * of this module can do further optimisations such as memorising the
978 * state of the environment after each header files.
979 *
980 *
981 * For sparse its makes the annotating speed goes from 9s to 4s
982 * For Linux the speedup is even better, from ??? to ???.
983 *
984 * Because There would be some copy paste with annotate_program, it is
985 * better to factorize code hence the just_add_in_env parameter below.
986 *
987 * todo? alternative optimisation for the include problem:
988 * - processing all headers files one time and construct big env
989 * - use hashtbl for env (but apparently not biggest problem)
990 *)
991
992 let rec visit_toplevel ~just_add_in_env ~depth elem =
993 let need_annotate_body = not just_add_in_env in
994
995 let bigf = { Visitor_c.default_visitor_c with
996
997 (* ------------------------------------------------------------ *)
998 Visitor_c.kcppdirective = (fun (k, bigf) directive ->
999 match directive with
1000 (* do error messages for type annotater only for the real body of the
1001 * file, not inside include.
1002 *)
1003 | Include {i_content = opt} ->
1004 opt +> Common.do_option (fun (filename, program) ->
1005 Common.save_excursion Flag_parsing_c.verbose_type (fun () ->
1006 Flag_parsing_c.verbose_type := false;
1007
1008 (* old: Visitor_c.vk_program bigf program;
1009 * opti: set the just_add_in_env
1010 *)
1011 program +> List.iter (fun elem ->
1012 visit_toplevel ~just_add_in_env:true ~depth:(depth+1) elem
1013 )
1014 )
1015 )
1016
1017 | Define ((s,ii), (defkind, defval)) ->
1018
1019
1020 (* even if we are in a just_add_in_env phase, such as when
1021 * we process include, as opposed to the body of functions,
1022 * with macros we still to type the body of the macro as
1023 * the macro has no type and so we infer its type from its
1024 * body (and one day later maybe from its use).
1025 *)
1026 (match defval with
1027 (* can try to optimize and recurse only when the define body
1028 * is simple ?
1029 *)
1030
1031 | DefineExpr expr ->
1032 (* prevent macro-declared variables from leaking out *)
1033 do_in_new_scope (fun () ->
1034 if is_simple_expr expr
1035 (* even if not need_annotate_body, still recurse*)
1036 then k directive
1037 else
1038 if need_annotate_body
1039 then k directive)
1040 | _ ->
1041 do_in_new_scope (fun () ->
1042 if need_annotate_body
1043 then k directive)
1044 );
1045
1046 add_binding (Macro (s, (defkind, defval) )) true;
1047
1048 | PragmaAndCo _ -> ()
1049 );
1050
1051 (* ------------------------------------------------------------ *)
1052 (* main typer code *)
1053 (* ------------------------------------------------------------ *)
1054 Visitor_c.kexpr = annotater_expr_visitor_subpart;
1055
1056 (* ------------------------------------------------------------ *)
1057 Visitor_c.kstatement = (fun (k, bigf) st ->
1058 match Ast_c.unwrap_st st with
1059 | Compound statxs -> do_in_new_scope (fun () -> k st);
1060 | _ -> k st
1061 );
1062 (* ------------------------------------------------------------ *)
1063 Visitor_c.kdecl = (fun (k, bigf) d ->
1064 (match d with
1065 | (DeclList (xs, ii)) ->
1066 xs +> List.iter (fun ({v_namei = var; v_type = t;
1067 v_storage = sto; v_local = local} as x
1068 , iicomma) ->
1069
1070 (* to add possible definition in type found in Decl *)
1071 Visitor_c.vk_type bigf t;
1072
1073
1074 let local =
1075 match local with
1076 | Ast_c.NotLocalDecl -> Ast_c.NotLocalVar
1077 | Ast_c.LocalDecl -> Ast_c.LocalVar (Ast_c.info_of_type t)
1078 in
1079 var +> Common.do_option (fun (name, iniopt) ->
1080 let s = Ast_c.str_of_name name in
1081
1082 match sto with
1083 | StoTypedef, _inline ->
1084 add_binding (TypeDef (s,Lib.al_type t)) true;
1085 | _ ->
1086 add_binding (VarOrFunc (s, (Lib.al_type t, local))) true;
1087
1088 x.v_type_bis :=
1089 Some (typedef_fix (Lib.al_type t) !_scoped_env);
1090
1091 if need_annotate_body then begin
1092 (* int x = sizeof(x) is legal so need process ini *)
1093 match iniopt with
1094 Ast_c.NoInit -> ()
1095 | Ast_c.ValInit(iini,init) -> Visitor_c.vk_ini bigf init
1096 | Ast_c.ConstrInit((args,_)) ->
1097 args +> List.iter (fun (e,ii) ->
1098 Visitor_c.vk_argument bigf e
1099 )
1100 end
1101 );
1102 );
1103 | MacroDecl _ ->
1104 if need_annotate_body
1105 then k d
1106 );
1107
1108 );
1109
1110 (* ------------------------------------------------------------ *)
1111 Visitor_c.ktype = (fun (k, bigf) typ ->
1112 (* bugfix: have a 'Lib.al_type typ' before, but because we can
1113 * have enum with possible expression, we don't want to change
1114 * the ref of abstract-lined types, but the real one, so
1115 * don't al_type here
1116 *)
1117 let (_q, tbis) = typ in
1118 match Ast_c.unwrap_typeC typ with
1119 | StructUnion (su, Some s, structType) ->
1120 let structType' = Lib.al_fields structType in
1121 let ii = Ast_c.get_ii_typeC_take_care tbis in
1122 let ii' = Lib.al_ii ii in
1123 add_binding (StructUnionNameDef (s, ((su, structType'),ii'))) true;
1124
1125 if need_annotate_body
1126 then k typ (* todo: restrict ? new scope so use do_in_scope ? *)
1127
1128 | Enum (sopt, enums) ->
1129
1130 enums +> List.iter (fun ((name, eopt), iicomma) ->
1131
1132 let s = Ast_c.str_of_name name in
1133
1134 if need_annotate_body
1135 then eopt +> Common.do_option (fun (ieq, e) ->
1136 Visitor_c.vk_expr bigf e
1137 );
1138 add_binding (EnumConstant (s, sopt)) true;
1139 );
1140
1141
1142 (* TODO: if have a TypeName, then maybe can fill the option
1143 * information.
1144 *)
1145 | _ ->
1146 if need_annotate_body
1147 then k typ
1148
1149 );
1150
1151 (* ------------------------------------------------------------ *)
1152 Visitor_c.ktoplevel = (fun (k, bigf) elem ->
1153 _notyped_var := Hashtbl.create 100;
1154 match elem with
1155 | Definition def ->
1156 let {f_name = name;
1157 f_type = ((returnt, (paramst, b)) as ftyp);
1158 f_storage = sto;
1159 f_body = statxs;
1160 f_old_c_style = oldstyle;
1161 },ii
1162 = def
1163 in
1164 let (i1, i2) =
1165 match ii with
1166 (* what is iifunc1? it should be a type. jll
1167 * pad: it's the '(' in the function definition. The
1168 * return type is part of f_type.
1169 *)
1170 | iifunc1::iifunc2::ibrace1::ibrace2::ifakestart::isto ->
1171 iifunc1, iifunc2
1172 | _ -> raise Impossible
1173 in
1174 let funcs = Ast_c.str_of_name name in
1175
1176 (match oldstyle with
1177 | None ->
1178 let typ' =
1179 Lib.al_type (Ast_c.mk_ty (FunctionType ftyp) [i1;i2]) in
1180
1181 add_binding (VarOrFunc (funcs, (typ',islocal i1.Ast_c.pinfo)))
1182 false;
1183
1184 if need_annotate_body then
1185 do_in_new_scope (fun () ->
1186 paramst +> List.iter (fun ({p_namei= nameopt; p_type= t},_)->
1187 match nameopt with
1188 | Some name ->
1189 let s = Ast_c.str_of_name name in
1190 let local = Ast_c.LocalVar (Ast_c.info_of_type t) in
1191 add_binding (VarOrFunc (s,(Lib.al_type t,local))) true
1192 | None ->
1193 pr2 "no type, certainly because Void type ?"
1194 );
1195 (* recurse *)
1196 k elem
1197 );
1198 | Some oldstyle ->
1199 (* generate regular function type *)
1200
1201 pr2 "TODO generate type for function";
1202 (* add bindings *)
1203 if need_annotate_body then
1204 do_in_new_scope (fun () ->
1205 (* recurse. should naturally call the kdecl visitor and
1206 * add binding
1207 *)
1208 k elem;
1209 );
1210
1211 );
1212 | CppTop x ->
1213 (match x with
1214 | Define ((s,ii), (DefineVar, DefineType t)) ->
1215 add_binding (TypeDef (s,Lib.al_type t)) true;
1216 | _ -> k elem
1217 )
1218
1219 | Declaration _
1220
1221
1222
1223 | IfdefTop _
1224 | MacroTop _
1225 | EmptyDef _
1226 | NotParsedCorrectly _
1227 | FinalDef _
1228 ->
1229 k elem
1230 );
1231 }
1232 in
1233 if just_add_in_env
1234 then
1235 if depth > 1
1236 then Visitor_c.vk_toplevel bigf elem
1237 else
1238 Common.profile_code "TAC.annotate_only_included" (fun () ->
1239 Visitor_c.vk_toplevel bigf elem
1240 )
1241 else Visitor_c.vk_toplevel bigf elem
1242
1243 (*****************************************************************************)
1244 (* Entry point *)
1245 (*****************************************************************************)
1246 (* catch all the decl to grow the environment *)
1247
1248
1249 let rec (annotate_program2 :
1250 environment -> toplevel list -> (toplevel * environment Common.pair) list) =
1251 fun env prog ->
1252
1253 (* globals (re)initialialisation *)
1254 _scoped_env := env;
1255 _notyped_var := (Hashtbl.create 100);
1256
1257 prog +> List.map (fun elem ->
1258 let beforeenv = !_scoped_env in
1259 visit_toplevel ~just_add_in_env:false ~depth:0 elem;
1260 let afterenv = !_scoped_env in
1261 (elem, (beforeenv, afterenv))
1262 )
1263
1264
1265
1266
1267 (*****************************************************************************)
1268 (* Annotate test *)
1269 (*****************************************************************************)
1270
1271 (* julia: for coccinelle *)
1272 let annotate_test_expressions prog =
1273 let rec propagate_test e =
1274 let ((e_term,info),_) = e in
1275 let (ty,_) = !info in
1276 info := (ty,Test);
1277 match e_term with
1278 Binary(e1,Logical(AndLog),e2)
1279 | Binary(e1,Logical(OrLog),e2) -> propagate_test e1; propagate_test e2
1280 | Unary(e1,Not) -> propagate_test e1
1281 | ParenExpr(e) -> propagate_test e
1282 | FunCall(e,args) -> (* not very nice, but so painful otherwise *)
1283 (match (unwrap e,args) with
1284 ((Ident(i),_),[(Left a,_)]) ->
1285 let nm = str_of_name i in
1286 if List.mem nm ["likely";"unlikely"]
1287 then propagate_test a
1288 else ()
1289 | _ -> ())
1290 | _ -> () in
1291
1292 let bigf = { Visitor_c.default_visitor_c with
1293 Visitor_c.kexpr = (fun (k,bigf) expr ->
1294 (match unwrap_expr expr with
1295 CondExpr(e,_,_) -> propagate_test e
1296 | Binary(e1,Logical(AndLog),e2)
1297 | Binary(e1,Logical(OrLog),e2) -> propagate_test e1; propagate_test e2
1298 | Unary(e1,Not) -> propagate_test e1
1299 | _ -> ()
1300 );
1301 k expr
1302 );
1303 Visitor_c.kstatement = (fun (k, bigf) st ->
1304 match unwrap_st st with
1305 Selection(s) ->
1306 (match s with If(e1,s1,s2) -> propagate_test e1 | _ -> ());
1307 k st;
1308 | Iteration(i) ->
1309 (match i with
1310 While(e,s) -> propagate_test e
1311 | DoWhile(s,e) -> propagate_test e
1312 | For(_,es,_,_) ->
1313 (match unwrap es with Some e -> propagate_test e | None -> ())
1314 | _ -> ());
1315 k st
1316 | _ -> k st
1317 )
1318 } in
1319 (prog +> List.iter (fun elem ->
1320 Visitor_c.vk_toplevel bigf elem
1321 ))
1322
1323
1324
1325 (*****************************************************************************)
1326 (* Annotate types *)
1327 (*****************************************************************************)
1328 let annotate_program env prog =
1329 Common.profile_code "TAC.annotate_program"
1330 (fun () ->
1331 let res = annotate_program2 env prog in
1332 annotate_test_expressions prog;
1333 res
1334 )
1335
1336 let annotate_type_and_localvar env prog =
1337 Common.profile_code "TAC.annotate_type"
1338 (fun () -> annotate_program2 env prog)
1339
1340
1341 (*****************************************************************************)
1342 (* changing default typing environment, do concatenation *)
1343 let init_env filename =
1344 pr2 ("init_env: " ^ filename);
1345 let (ast2, _stat) = Parse_c.parse_c_and_cpp filename in
1346 let ast = Parse_c.program_of_program2 ast2 in
1347
1348 let res = annotate_type_and_localvar !initial_env ast in
1349 match List.rev res with
1350 | [] -> pr2 "empty environment"
1351 | (_top,(env1,env2))::xs ->
1352 initial_env := !initial_env ++ env2;
1353 ()
1354