5779ee0586123800ad6eb375b183937c36cc8d0a
[bpt/coccinelle.git] / parsing_c / type_annoter_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes,
5 * Copyright (C) 2009 University of Urbana Champaign
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License (GPL)
9 * version 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * file license.txt for more details.
15 *)
16
17 open Common
18
19 open Ast_c
20
21 module Lib = Lib_parsing_c
22
23 (*****************************************************************************)
24 (* Prelude *)
25 (*****************************************************************************)
26 (* History:
27 * - Done a first type checker in 2002, cf typing-semantic/, but
28 * was assuming that have all type info, and so was assuming had called
29 * cpp and everything was right.
30 * - Wrote this file, in 2006?, as we added pattern matching on type
31 * in coccinelle. Partial type annotater.
32 * - Julia extended it in 2008? to have localvar/notlocalvar and
33 * test/notest information, again used by coccinelle.
34 * - I extended it in Fall 2008 to have more type information for the
35 * global analysis. I also added some optimisations to process
36 * included code faster.
37 *
38 *
39 * Design choices. Can either do:
40 * - a kind of inferer
41 * - can first do a simple inferer, that just pass context
42 * - then a real inferer, managing partial info.
43 * type context = fullType option
44 *
45 * - extract the information from the .h files
46 * (so no inference at all needed)
47 *
48 * Difference with julia's code in parsing_cocci/type_infer.ml:
49 * - She handles just the variable namespace. She does not type
50 * field access or enum or macros. This is because cocci programs are
51 * usually simple and have no structure definition or macro definitions
52 * that we need to type anyway.
53 * - She does more propagation.
54 * - She does not have to handle the typedef isomorphism which force me
55 * to use those typedef_fix and type_unfold_one_step
56 * - She does not handle I think the function pointer C isomorphism.
57 *
58 * - She has a cleaner type_cocci without any info. In my case
59 * I need to do those ugly al_type, or generate fake infos.
60 * - She has more compact code. Perhaps because she does not have to
61 * handle the extra exp_info that she added on me :) So I need those
62 * do_with_type, make_info_xxx, etc.
63 *
64 * Note: if need to debug this annotater, use -show_trace_profile, it can
65 * help. You can also set the typedef_debug flag below.
66 *
67 *
68 *
69 * todo: expression contain types, and statements, which in turn can contain
70 * expression, so need recurse. Need define an annote_statement and
71 * annotate_type.
72 *
73 * todo: how deal with typedef isomorphisms ? How store them in Ast_c ?
74 * store all posible variations in ast_c ? a list of type instead of just
75 * the type ?
76 *
77 * todo: how to handle multiple possible definitions for entities like
78 * struct or typedefs ? Because of ifdef, we should store list of
79 * possibilities sometimes.
80 *
81 * todo: define a new type ? like type_cocci ? where have a bool ?
82 *
83 * semi: How handle scope ? When search for type of field, we return
84 * a type, but this type makes sense only in a certain scope.
85 * We could add a tag to each typedef, structUnionName to differentiate
86 * them and also associate in ast_c to the type the scope
87 * of this type, the env that were used to define this type.
88 *
89 * todo: handle better the search in previous env, the env'. Cf the
90 * termination problem in typedef_fix when I was searching in the same
91 * env.
92 *
93 *)
94
95 (*****************************************************************************)
96 (* Wrappers *)
97 (*****************************************************************************)
98 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_type
99
100 (*****************************************************************************)
101 (* Environment *)
102 (*****************************************************************************)
103
104 (* The different namespaces from stdC manual:
105 *
106 * You introduce two new name spaces with every block that you write.
107 *
108 * One name space includes all
109 * - functions,
110 * - objects,
111 * - type definitions,
112 * - and enumeration constants
113 * that you declare or define within the block.
114 *
115 * The other name space includes all
116 * - enumeration,
117 * - structure,
118 * - and union
119 * *tags* that you define within the block.
120 *
121 * You introduce a new member name space with every structure or union
122 * whose content you define. You identify a member name space by the
123 * type of left operand that you write for a member selection
124 * operator, as in x.y or p->y. A member name space ends with the end
125 * of the block in which you declare it.
126 *
127 * You introduce a new goto label name space with every function
128 * definition you write. Each goto label name space ends with its
129 * function definition.
130 *)
131
132 (* But I don't try to do a type-checker, I try to "resolve" type of var
133 * so don't need make difference between namespaces here.
134 *
135 * But, why not make simply a (string, kindstring) assoc ?
136 * Because we dont want that a variable shadow a struct definition, because
137 * they are still in 2 different namespace. But could for typedef,
138 * because VarOrFunc and Typedef are in the same namespace.
139 * But could do a record as in c_info.ml
140 *)
141
142
143 (* This type contains all "ident" like notion of C. Each time in Ast_c
144 * you have a string type (as in expression, function name, fields)
145 * then you need to manage the scope of this ident.
146 *
147 * The wrap for StructUnionNameDef contain the whole ii, the i for
148 * the string, the structUnion and the structType.
149 *
150 * Put Macro here ? after all the scoping rules for cpp macros is different
151 * and so does not vanish after the closing '}'.
152 *
153 * todo: EnumDef
154 *)
155 type namedef =
156 | VarOrFunc of string * Ast_c.exp_type
157 | EnumConstant of string * string option
158
159 (* also used for macro type aliases *)
160 | TypeDef of string * fullType
161 (* the structType contains nested "idents" with struct scope *)
162 | StructUnionNameDef of string * (structUnion * structType) wrap
163
164 (* cppext: *)
165 | Macro of string * (define_kind * define_val)
166
167
168 (* Because have nested scope, have nested list, hence the list list.
169 *
170 * opti? use a hash to accelerate ? hmm but may have some problems
171 * with hash to handle recursive lookup. For instance for the typedef
172 * example where have mutually recursive definition of the type,
173 * we must take care to not loop by starting the second search
174 * from the previous environment. With the list scheme in
175 * lookup_env below it's quite easy to do. With hash it may be
176 * more complicated.
177 *)
178 type environment = namedef list list
179
180
181 (* ------------------------------------------------------------ *)
182 (* can be modified by the init_env function below, by
183 * the file environment_unix.h
184 *)
185 let initial_env = ref [
186 [VarOrFunc("NULL",
187 (Lib.al_type (Parse_c.type_of_string "void *"),
188 Ast_c.NotLocalVar));
189
190 (*
191 VarOrFunc("malloc",
192 (Lib.al_type(Parse_c.type_of_string "void* (*)(int size)"),
193 Ast_c.NotLocalVar));
194 VarOrFunc("free",
195 (Lib.al_type(Parse_c.type_of_string "void (*)(void *ptr)"),
196 Ast_c.NotLocalVar));
197 *)
198 ]
199 ]
200
201
202 let typedef_debug = ref false
203
204
205 (* ------------------------------------------------------------ *)
206 (* generic, lookup and also return remaining env for further lookup *)
207 let rec lookup_env2 f env =
208 match env with
209 | [] -> raise Not_found
210 | []::zs -> lookup_env2 f zs
211 | (x::xs)::zs ->
212 (match f x with
213 | None -> lookup_env2 f (xs::zs)
214 | Some y -> y, xs::zs
215 )
216 let lookup_env a b =
217 Common.profile_code "TAC.lookup_env" (fun () -> lookup_env2 a b)
218
219
220
221 let member_env lookupf env =
222 try
223 let _ = lookupf env in
224 true
225 with Not_found -> false
226
227
228
229
230 (* ------------------------------------------------------------ *)
231
232
233 let lookup_var s env =
234 let f = function
235 | VarOrFunc (s2, typ) -> if s2 =$= s then Some typ else None
236 | _ -> None
237 in
238 lookup_env f env
239
240 let lookup_typedef s env =
241 if !typedef_debug then pr2 ("looking for: " ^ s);
242 let f = function
243 | TypeDef (s2, typ) -> if s2 =$= s then Some typ else None
244 | _ -> None
245 in
246 lookup_env f env
247
248 let lookup_structunion (_su, s) env =
249 let f = function
250 | StructUnionNameDef (s2, typ) -> if s2 =$= s then Some typ else None
251 | _ -> None
252 in
253 lookup_env f env
254
255 let lookup_macro s env =
256 let f = function
257 | Macro (s2, typ) -> if s2 =$= s then Some typ else None
258 | _ -> None
259 in
260 lookup_env f env
261
262 let lookup_enum s env =
263 let f = function
264 | EnumConstant (s2, typ) -> if s2 =$= s then Some typ else None
265 | _ -> None
266 in
267 lookup_env f env
268
269
270 let lookup_typedef a b =
271 Common.profile_code "TAC.lookup_typedef" (fun () -> lookup_typedef a b)
272
273
274
275 (*****************************************************************************)
276 (* "type-lookup" *)
277 (*****************************************************************************)
278
279 (* find_final_type is used to know to what type a field correspond in
280 * x.foo. Sometimes the type of x is a typedef or a structName in which
281 * case we must look in environment to find the complete type, here
282 * structUnion that contains the information.
283 *
284 * Because in C one can redefine in nested blocks some typedefs,
285 * struct, or variables, we have a static scoping resolving process.
286 * So, when we look for the type of a var, if this var is in an
287 * enclosing block, then maybe its type refer to a typdef of this
288 * enclosing block, so must restart the "type-resolving" of this
289 * typedef from this enclosing block, not from the bottom. So our
290 * "resolving-type functions" take an env and also return an env from
291 * where the next search must be performed. *)
292
293 (*
294 let rec find_final_type ty env =
295
296 match Ast_c.unwrap_typeC ty with
297 | BaseType x -> (BaseType x) +> Ast_c.rewrap_typeC ty
298
299 | Pointer t -> (Pointer (find_final_type t env)) +> Ast_c.rewrap_typeC ty
300 | Array (e, t) -> Array (e, find_final_type t env) +> Ast_c.rewrap_typeC ty
301
302 | StructUnion (sopt, su) -> StructUnion (sopt, su) +> Ast_c.rewrap_typeC ty
303
304 | FunctionType t -> (FunctionType t) (* todo ? *) +> Ast_c.rewrap_typeC ty
305 | Enum (s, enumt) -> (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty
306 | EnumName s -> (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty
307
308 | StructUnionName (su, s) ->
309 (try
310 let ((structtyp,ii), env') = lookup_structunion (su, s) env in
311 Ast_c.nQ, (StructUnion (Some s, structtyp), ii)
312 (* old: +> Ast_c.rewrap_typeC ty
313 * but must wrap with good ii, otherwise pretty_print_c
314 * will be lost and raise some Impossible
315 *)
316 with Not_found ->
317 ty
318 )
319
320 | TypeName s ->
321 (try
322 let (t', env') = lookup_typedef s env in
323 find_final_type t' env'
324 with Not_found ->
325 ty
326 )
327
328 | ParenType t -> find_final_type t env
329 | Typeof e -> failwith "typeof"
330 *)
331
332
333
334
335 (* ------------------------------------------------------------ *)
336 let rec type_unfold_one_step ty env =
337
338 match Ast_c.unwrap_typeC ty with
339 | BaseType x -> ty
340 | Pointer t -> ty
341 | Array (e, t) -> ty
342
343 | StructUnion (sopt, su, fields) -> ty
344
345 | FunctionType t -> ty
346 | Enum (s, enumt) -> ty
347
348 | EnumName s -> ty (* todo: look in env when will have EnumDef *)
349
350 | StructUnionName (su, s) ->
351 (try
352 let (((su,fields),ii), env') = lookup_structunion (su, s) env in
353 Ast_c.mk_ty (StructUnion (su, Some s, fields)) ii
354 (* old: +> Ast_c.rewrap_typeC ty
355 * but must wrap with good ii, otherwise pretty_print_c
356 * will be lost and raise some Impossible
357 *)
358 with Not_found ->
359 ty
360 )
361
362 | TypeName (name, _typ) ->
363 let s = Ast_c.str_of_name name in
364 (try
365 if !typedef_debug then pr2 "type_unfold_one_step: lookup_typedef";
366 let (t', env') = lookup_typedef s env in
367 type_unfold_one_step t' env'
368 with Not_found ->
369 ty
370 )
371
372 | ParenType t -> type_unfold_one_step t env
373 | TypeOfExpr e ->
374 pr2_once ("Type_annoter: not handling typeof");
375 ty
376 | TypeOfType t -> type_unfold_one_step t env
377
378
379
380
381
382
383
384
385
386 (* normalizer. can be seen as the opposite of the previous function as
387 * we "fold" at least for the structUnion. Should return something that
388 * Type_c.is_completed_fullType likes, something that makes it easier
389 * for the programmer to work on, that has all the needed information
390 * for most tasks.
391 *)
392 let rec typedef_fix ty env =
393 match Ast_c.unwrap_typeC ty with
394 | BaseType x ->
395 ty
396 | Pointer t ->
397 Pointer (typedef_fix t env) +> Ast_c.rewrap_typeC ty
398 | Array (e, t) ->
399 Array (e, typedef_fix t env) +> Ast_c.rewrap_typeC ty
400 | StructUnion (su, sopt, fields) ->
401 (* normalize, fold.
402 * todo? but what if correspond to a nested struct def ?
403 *)
404 Type_c.structdef_to_struct_name ty
405 | FunctionType ft ->
406 (FunctionType ft) (* todo ? *) +> Ast_c.rewrap_typeC ty
407 | Enum (s, enumt) ->
408 (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty
409 | EnumName s ->
410 (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty
411
412 (* we prefer StructUnionName to StructUnion when it comes to typed metavar *)
413 | StructUnionName (su, s) -> ty
414
415 (* keep the typename but complete with more information *)
416 | TypeName (name, typ) ->
417 let s = Ast_c.str_of_name name in
418 (match typ with
419 | Some _ ->
420 pr2 ("typedef value already there:" ^ s);
421 ty
422 | None ->
423 (try
424 if !typedef_debug then pr2 "typedef_fix: lookup_typedef";
425 let (t', env') = lookup_typedef s env in
426
427 (* bugfix: termination bug if use env instead of env' below, because
428 * can have some weird mutually recursive typedef which
429 * each new type alias search for its mutual def.
430 *)
431 TypeName (name, Some (typedef_fix t' env')) +> Ast_c.rewrap_typeC ty
432 with Not_found ->
433 ty
434 ))
435
436 (* remove paren for better matching with typed metavar. kind of iso again *)
437 | ParenType t ->
438 typedef_fix t env
439 | TypeOfExpr e ->
440 pr2_once ("Type_annoter: not handling typeof");
441 ty
442
443 | TypeOfType t ->
444 typedef_fix t env
445
446
447 (*****************************************************************************)
448 (* Helpers, part 1 *)
449 (*****************************************************************************)
450
451 let type_of_s2 s =
452 (Lib.al_type (Parse_c.type_of_string s))
453 let type_of_s a =
454 Common.profile_code "Type_c.type_of_s" (fun () -> type_of_s2 a)
455
456
457 (* pad: pb on:
458 * /home/pad/software-os-src2/freebsd/contrib/ipfilter/netinet/ip_fil_freebsd.c
459 * because in the code there is:
460 * static iss_seq_off = 0;
461 * which in the parser was generating a default int without a parse_info.
462 * I now add a fake parse_info for such default int so no more failwith
463 * normally.
464 *)
465
466 let rec is_simple_expr expr =
467 match Ast_c.unwrap_expr expr with
468 (* todo? handle more special cases ? *)
469
470 | Ident _ ->
471 true
472 | Constant (_) ->
473 true
474 | Unary (op, e) ->
475 true
476 | Binary (e1, op, e2) ->
477 true
478 | Cast (t, e) ->
479 true
480 | ParenExpr (e) -> is_simple_expr e
481
482 | _ -> false
483
484 (*****************************************************************************)
485 (* Typing rules *)
486 (*****************************************************************************)
487 (* now in type_c.ml *)
488
489
490
491 (*****************************************************************************)
492 (* (Semi) Globals, Julia's style *)
493 (*****************************************************************************)
494
495 (* opti: cache ? use hash ? *)
496 let _scoped_env = ref !initial_env
497
498 (* memoise unnanoted var, to avoid too much warning messages *)
499 let _notyped_var = ref (Hashtbl.create 100)
500
501 let new_scope() = _scoped_env := []::!_scoped_env
502 let del_scope() = _scoped_env := List.tl !_scoped_env
503
504 let do_in_new_scope f =
505 begin
506 new_scope();
507 let res = f() in
508 del_scope();
509 res
510 end
511
512 let add_in_scope namedef =
513 let (current, older) = Common.uncons !_scoped_env in
514 _scoped_env := (namedef::current)::older
515
516
517 (* ------------------------------------------------------------ *)
518
519 (* sort of hackish... *)
520 let islocal info =
521 if List.length (!_scoped_env) =|= List.length !initial_env
522 then Ast_c.NotLocalVar
523 else Ast_c.LocalVar info
524
525 (* ------------------------------------------------------------ *)
526 (* the warning argument is here to allow some binding to overwrite an
527 * existing one. With function, we first have the prototype and then the def,
528 * and the def binding with the same string is not an error.
529 *
530 * todo?: but if we define two times the same function, then we will not
531 * detect it :( it would require to make a diff between adding a binding
532 * from a prototype and from a definition.
533 *
534 * opti: disabling the check_annotater flag have some important
535 * performance benefit.
536 *
537 *)
538 let add_binding2 namedef warning =
539 let (current_scope, _older_scope) = Common.uncons !_scoped_env in
540
541 if !Flag_parsing_c.check_annotater then begin
542 (match namedef with
543 | VarOrFunc (s, typ) ->
544 if Hashtbl.mem !_notyped_var s
545 then pr2 ("warning: found typing information for a variable that was" ^
546 "previously unknown:" ^ s);
547 | _ -> ()
548 );
549
550 let (memberf, s) =
551 (match namedef with
552 | VarOrFunc (s, typ) ->
553 member_env (lookup_var s), s
554 | TypeDef (s, typ) ->
555 member_env (lookup_typedef s), s
556 | StructUnionNameDef (s, (su, typ)) ->
557 member_env (lookup_structunion (su, s)), s
558 | Macro (s, body) ->
559 member_env (lookup_macro s), s
560 | EnumConstant (s, body) ->
561 member_env (lookup_enum s), s
562 ) in
563
564 if memberf [current_scope] && warning
565 then pr2 ("Type_annoter: warning, " ^ s ^
566 " is already in current binding" ^ "\n" ^
567 " so there is a weird shadowing");
568 end;
569 add_in_scope namedef
570
571 let add_binding namedef warning =
572 Common.profile_code "TAC.add_binding" (fun () -> add_binding2 namedef warning)
573
574
575
576 (*****************************************************************************)
577 (* Helpers, part 2 *)
578 (*****************************************************************************)
579
580 let lookup_opt_env lookupf s =
581 Common.optionise (fun () ->
582 lookupf s !_scoped_env
583 )
584
585 let unwrap_unfold_env2 typ =
586 Ast_c.unwrap_typeC
587 (type_unfold_one_step typ !_scoped_env)
588 let unwrap_unfold_env typ =
589 Common.profile_code "TAC.unwrap_unfold_env" (fun () -> unwrap_unfold_env2 typ)
590
591 let typedef_fix a b =
592 Common.profile_code "TAC.typedef_fix" (fun () -> typedef_fix a b)
593
594 let make_info_def_fix x =
595 Type_c.make_info_def (typedef_fix x !_scoped_env)
596
597 let make_info_fix (typ, local) =
598 Type_c.make_info ((typedef_fix typ !_scoped_env),local)
599
600
601 let make_info_def = Type_c.make_info_def
602
603 (*****************************************************************************)
604 (* Main typer code, put later in a visitor *)
605 (*****************************************************************************)
606
607 let annotater_expr_visitor_subpart = (fun (k,bigf) expr ->
608
609 let ty =
610 match Ast_c.unwrap_expr expr with
611
612 (* -------------------------------------------------- *)
613 (* todo: should analyse the 's' for int to know if unsigned or not *)
614 | Constant (String (s,kind)) -> make_info_def (type_of_s "char []")
615 | Constant MultiString _ -> make_info_def (type_of_s "char []")
616 | Constant (Char (s,kind)) -> make_info_def (type_of_s "char")
617 | Constant (Int (s,kind)) ->
618 (* this seems really unpleasant, but perhaps the type needs to be set
619 up in some way that allows pretty printing *)
620 make_info_def
621 (match kind with
622 (* matches limited by what is generated in lexer_c.mll *)
623 Si(Signed,CInt) -> type_of_s "int"
624 | Si(UnSigned,CInt) -> type_of_s "unsigned int"
625 | Si(Signed,CLong) -> type_of_s "long"
626 | Si(UnSigned,CLong) -> type_of_s "unsigned long"
627 | Si(Signed,CLongLong) -> type_of_s "long long"
628 | Si(UnSigned,CLongLong) -> type_of_s "unsigned long long"
629 | _ -> failwith "unexpected kind for constant")
630 | Constant (Float (s,kind)) ->
631 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
632 let fake = Ast_c.rewrap_str "float" fake in
633 let iinull = [fake] in
634 make_info_def (Ast_c.mk_ty (BaseType (FloatType kind)) iinull)
635
636
637 (* -------------------------------------------------- *)
638 (* note: could factorize this code with the code for Ident
639 * and the other code for Funcall below. But as the Ident can be
640 * a macro-func, I prefer to handle it separately. So
641 * this rule can handle the macro-func, the Ident-rule can handle
642 * the macro-var, and the other FunCall-rule the regular
643 * function calls through fields.
644 * Also as I don't want a warning on the Ident that are a FunCall,
645 * easier to have a rule separate from the Ident rule.
646 *)
647 | FunCall (e1, args) ->
648 (match Ast_c.unwrap_expr e1 with
649 | Ident (ident) ->
650
651 (* recurse *)
652 args +> List.iter (fun (e,ii) ->
653 (* could typecheck if arguments agree with prototype *)
654 Visitor_c.vk_argument bigf e
655 );
656 let s = Ast_c.str_of_name ident in
657 (match lookup_opt_env lookup_var s with
658 | Some ((typ,local),_nextenv) ->
659
660 (* set type for ident *)
661 let tyinfo = make_info_fix (typ, local) in
662 Ast_c.set_type_expr e1 tyinfo;
663
664 (match unwrap_unfold_env typ with
665 | FunctionType (ret, params) -> make_info_def ret
666
667 (* can be function pointer, C have an iso for that,
668 * same pfn() syntax than regular function call.
669 *)
670 | Pointer (typ2) ->
671 (match unwrap_unfold_env typ2 with
672 | FunctionType (ret, params) -> make_info_def ret
673 | _ -> Type_c.noTypeHere
674 )
675 | _ -> Type_c.noTypeHere
676 )
677 | None ->
678
679 (match lookup_opt_env lookup_macro s with
680 | Some ((defkind, defval), _nextenv) ->
681 (match defkind, defval with
682 | DefineFunc _, DefineExpr e ->
683 let rettype = Ast_c.get_onlytype_expr e in
684
685 (* todo: could also set type for ident ?
686 have return type and at least type of concrete
687 parameters so can generate a fake FunctionType
688 *)
689 let macrotype_opt =
690 Type_c.fake_function_type rettype args
691 in
692
693 macrotype_opt +> Common.do_option (fun t ->
694 pr2 ("Type_annotater: generate fake function type" ^
695 "for macro: " ^ s);
696 let tyinfo = make_info_def_fix t in
697 Ast_c.set_type_expr e1 tyinfo;
698 );
699
700 Ast_c.get_type_expr e
701 | DefineVar, _ ->
702 pr2 ("Type_annoter: not a macro-func: " ^ s);
703 Type_c.noTypeHere
704 | DefineFunc _, _ ->
705 (* normally the FunCall case should have catch it *)
706 pr2 ("Type_annoter: not a macro-func-expr: " ^ s);
707 Type_c.noTypeHere
708 )
709 | None ->
710 pr2_once ("type_annotater: no type for function ident: " ^ s);
711 Type_c.noTypeHere
712 )
713 )
714
715
716 | _e ->
717 k expr;
718
719 (Ast_c.get_type_expr e1) +> Type_c.do_with_type (fun typ ->
720 (* copy paste of above *)
721 (match unwrap_unfold_env typ with
722 | FunctionType (ret, params) -> make_info_def ret
723 | Pointer (typ) ->
724 (match unwrap_unfold_env typ with
725 | FunctionType (ret, params) -> make_info_def ret
726 | _ -> Type_c.noTypeHere
727 )
728 | _ -> Type_c.noTypeHere
729 )
730 )
731 )
732
733
734 (* -------------------------------------------------- *)
735 | Ident (ident) ->
736 let s = Ast_c.str_of_name ident in
737 (match lookup_opt_env lookup_var s with
738 | Some ((typ,local),_nextenv) ->
739 make_info_fix (typ,local)
740 | None ->
741 (match lookup_opt_env lookup_macro s with
742 | Some ((defkind, defval), _nextenv) ->
743 (match defkind, defval with
744 | DefineVar, DefineExpr e ->
745 Ast_c.get_type_expr e
746 | DefineVar, _ ->
747 pr2 ("Type_annoter: not a expression: " ^ s);
748 Type_c.noTypeHere
749 | DefineFunc _, _ ->
750 (* normally the FunCall case should have catch it *)
751 pr2 ("Type_annoter: not a macro-var: " ^ s);
752 Type_c.noTypeHere
753 )
754 | None ->
755 (match lookup_opt_env lookup_enum s with
756 | Some (_, _nextenv) ->
757 make_info_def (type_of_s "int")
758 | None ->
759 if not (s =~ "[A-Z_]+") (* if macro then no warning *)
760 then
761 if !Flag_parsing_c.check_annotater then
762 if not (Hashtbl.mem !_notyped_var s)
763 then begin
764 pr2 ("Type_annoter: no type found for: " ^ s);
765 Hashtbl.add !_notyped_var s true;
766 end
767 else ()
768 else
769 pr2 ("Type_annoter: no type found for: " ^ s)
770 ;
771 Type_c.noTypeHere
772 )
773 )
774 )
775
776 (* -------------------------------------------------- *)
777 (* C isomorphism on type on array and pointers *)
778 | Unary (e, DeRef)
779 | ArrayAccess (e, _) ->
780 k expr; (* recurse to set the types-ref of sub expressions *)
781
782 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
783 (* todo: maybe not good env !! *)
784 match unwrap_unfold_env t with
785 | Pointer x
786 | Array (_, x) ->
787 make_info_def_fix x
788 | _ -> Type_c.noTypeHere
789
790 )
791
792 | Unary (e, GetRef) ->
793 k expr; (* recurse to set the types-ref of sub expressions *)
794
795 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
796 (* must generate an element so that '=' can be used
797 * to compare type ?
798 *)
799 let fake = Ast_c.fakeInfo Common.fake_parse_info in
800 let fake = Ast_c.rewrap_str "*" fake in
801
802 let ft = Ast_c.mk_ty (Pointer t) [fake] in
803 make_info_def_fix ft
804 )
805
806
807 (* -------------------------------------------------- *)
808 (* fields *)
809 | RecordAccess (e, namefld)
810 | RecordPtAccess (e, namefld) as x ->
811
812 let fld = Ast_c.str_of_name namefld in
813
814 k expr; (* recurse to set the types-ref of sub expressions *)
815
816 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
817
818 let topt =
819 match x with
820 | RecordAccess _ -> Some t
821 | RecordPtAccess _ ->
822 (match unwrap_unfold_env t with
823 | Pointer (t) -> Some t
824 | _ -> None
825 )
826 | _ -> raise Impossible
827
828 in
829 (match topt with
830 | None -> Type_c.noTypeHere
831 | Some t ->
832 match unwrap_unfold_env t with
833 | StructUnion (su, sopt, fields) ->
834 (try
835 (* todo: which env ? *)
836 make_info_def_fix
837 (Type_c.type_field fld (su, fields))
838 with
839 | Not_found ->
840 pr2 (spf
841 "TYPE-ERROR: field '%s' does not belong in struct %s"
842 fld (match sopt with Some s -> s |_ -> "<anon>"));
843 Type_c.noTypeHere
844 | Multi_found ->
845 pr2 "TAC:MultiFound";
846 Type_c.noTypeHere
847 )
848 | _ -> Type_c.noTypeHere
849 )
850 )
851
852
853
854 (* -------------------------------------------------- *)
855 | Cast (t, e) ->
856 k expr;
857 (* todo: if infer, can "push" info ? add_types_expr [t] e ? *)
858 make_info_def_fix (Lib.al_type t)
859
860 (* todo? lub, hmm maybe not, cos type must be e1 *)
861 | Assignment (e1, op, e2) ->
862 k expr;
863 (* value of an assignment is the value of the RHS expression, but its
864 type is the type of the lhs expression. Use the rhs exp if no
865 information is available *)
866 (match Ast_c.get_type_expr e1 with
867 (None,_) -> Ast_c.get_type_expr e2
868 | (Some ty,t) -> (Some ty,t))
869 | Sequence (e1, e2) ->
870 k expr;
871 Ast_c.get_type_expr e2
872
873 | Binary (e1, Logical _, e2) ->
874 k expr;
875 make_info_def (type_of_s "int")
876
877 (* todo: lub *)
878 | Binary (e1, Arith op, e2) ->
879 k expr;
880 Type_c.lub op (Type_c.get_opt_type e1) (Type_c.get_opt_type e2)
881
882 | CondExpr (cond, e1opt, e2) ->
883 k expr;
884 Ast_c.get_type_expr e2
885
886
887 | ParenExpr e ->
888 k expr;
889 Ast_c.get_type_expr e
890
891 | Infix (e, op) | Postfix (e, op) ->
892 k expr;
893 Ast_c.get_type_expr e
894
895 (* pad: julia wrote this ? *)
896 | Unary (e, UnPlus) ->
897 k expr; (* recurse to set the types-ref of sub expressions *)
898 make_info_def (type_of_s "int")
899 (* todo? can convert from unsigned to signed if UnMinus ? *)
900 | Unary (e, UnMinus) ->
901 k expr; (* recurse to set the types-ref of sub expressions *)
902 make_info_def (type_of_s "int")
903
904 | SizeOfType _|SizeOfExpr _ ->
905 k expr; (* recurse to set the types-ref of sub expressions *)
906 make_info_def (type_of_s "int")
907
908 | Constructor (ft, ini) ->
909 k expr; (* recurse to set the types-ref of sub expressions *)
910 make_info_def (Lib.al_type ft)
911
912 | Unary (e, Not) ->
913 k expr; (* recurse to set the types-ref of sub expressions *)
914 (* the result of ! is always 0 or 1, not the argument type *)
915 make_info_def (type_of_s "int")
916 | Unary (e, Tilde) ->
917 k expr; (* recurse to set the types-ref of sub expressions *)
918 Ast_c.get_type_expr e
919
920 (* -------------------------------------------------- *)
921 (* todo *)
922 | Unary (_, GetRefLabel) ->
923 k expr; (* recurse to set the types-ref of sub expressions *)
924 pr2_once "Type annotater:not handling GetRefLabel";
925 Type_c.noTypeHere
926 (* todo *)
927 | StatementExpr _ ->
928 k expr; (* recurse to set the types-ref of sub expressions *)
929 pr2_once "Type annotater:not handling StatementExpr";
930 Type_c.noTypeHere
931 (*
932 | _ -> k expr; Type_c.noTypeHere
933 *)
934
935 in
936 Ast_c.set_type_expr expr ty
937
938 )
939
940
941 (*****************************************************************************)
942 (* Visitor *)
943 (*****************************************************************************)
944
945 (* Processing includes that were added after a cpp_ast_c makes the
946 * type annotater quite slow, especially when the depth of cpp_ast_c is
947 * big. But for such includes the only thing we really want is to modify
948 * the environment to have enough type information. We don't need
949 * to type the expressions inside those includes (they will be typed
950 * when we process the include file directly). Here the goal is
951 * to not recurse.
952 *
953 * Note that as usually header files contain mostly structure
954 * definitions and defines, that means we still have to do lots of work.
955 * We only win on function definition bodies, but usually header files
956 * have just prototypes, or inline function definitions which anyway have
957 * usually a small body. But still, we win. It also makes clearer
958 * that when processing include as we just need the environment, the caller
959 * of this module can do further optimisations such as memorising the
960 * state of the environment after each header files.
961 *
962 *
963 * For sparse its makes the annotating speed goes from 9s to 4s
964 * For Linux the speedup is even better, from ??? to ???.
965 *
966 * Because There would be some copy paste with annotate_program, it is
967 * better to factorize code hence the just_add_in_env parameter below.
968 *
969 * todo? alternative optimisation for the include problem:
970 * - processing all headers files one time and construct big env
971 * - use hashtbl for env (but apparently not biggest problem)
972 *)
973
974 let rec visit_toplevel ~just_add_in_env ~depth elem =
975 let need_annotate_body = not just_add_in_env in
976
977 let bigf = { Visitor_c.default_visitor_c with
978
979 (* ------------------------------------------------------------ *)
980 Visitor_c.kcppdirective = (fun (k, bigf) directive ->
981 match directive with
982 (* do error messages for type annotater only for the real body of the
983 * file, not inside include.
984 *)
985 | Include {i_content = opt} ->
986 opt +> Common.do_option (fun (filename, program) ->
987 Common.save_excursion Flag_parsing_c.verbose_type (fun () ->
988 Flag_parsing_c.verbose_type := false;
989
990 (* old: Visitor_c.vk_program bigf program;
991 * opti: set the just_add_in_env
992 *)
993 program +> List.iter (fun elem ->
994 visit_toplevel ~just_add_in_env:true ~depth:(depth+1) elem
995 )
996 )
997 )
998
999 | Define ((s,ii), (defkind, defval)) ->
1000
1001
1002 (* even if we are in a just_add_in_env phase, such as when
1003 * we process include, as opposed to the body of functions,
1004 * with macros we still to type the body of the macro as
1005 * the macro has no type and so we infer its type from its
1006 * body (and one day later maybe from its use).
1007 *)
1008 (match defval with
1009 (* can try to optimize and recurse only when the define body
1010 * is simple ?
1011 *)
1012
1013 | DefineExpr expr ->
1014 (* prevent macro-declared variables from leaking out *)
1015 do_in_new_scope (fun () ->
1016 if is_simple_expr expr
1017 (* even if not need_annotate_body, still recurse*)
1018 then k directive
1019 else
1020 if need_annotate_body
1021 then k directive)
1022 | _ ->
1023 do_in_new_scope (fun () ->
1024 if need_annotate_body
1025 then k directive)
1026 );
1027
1028 add_binding (Macro (s, (defkind, defval) )) true;
1029
1030 | Undef _
1031 | PragmaAndCo _ -> ()
1032 );
1033
1034 (* ------------------------------------------------------------ *)
1035 (* main typer code *)
1036 (* ------------------------------------------------------------ *)
1037 Visitor_c.kexpr = annotater_expr_visitor_subpart;
1038
1039 (* ------------------------------------------------------------ *)
1040 Visitor_c.kstatement = (fun (k, bigf) st ->
1041 match Ast_c.unwrap_st st with
1042 | Compound statxs -> do_in_new_scope (fun () -> k st);
1043 | _ -> k st
1044 );
1045 (* ------------------------------------------------------------ *)
1046 Visitor_c.kdecl = (fun (k, bigf) d ->
1047 (match d with
1048 | (DeclList (xs, ii)) ->
1049 xs +> List.iter (fun ({v_namei = var; v_type = t;
1050 v_storage = sto; v_local = local} as x
1051 , iicomma) ->
1052
1053 (* to add possible definition in type found in Decl *)
1054 Visitor_c.vk_type bigf t;
1055
1056
1057 let local =
1058 match local with
1059 | Ast_c.NotLocalDecl -> Ast_c.NotLocalVar
1060 | Ast_c.LocalDecl -> Ast_c.LocalVar (Ast_c.info_of_type t)
1061 in
1062 var +> Common.do_option (fun (name, iniopt) ->
1063 let s = Ast_c.str_of_name name in
1064
1065 match sto with
1066 | StoTypedef, _inline ->
1067 add_binding (TypeDef (s,Lib.al_type t)) true;
1068 | _ ->
1069 add_binding (VarOrFunc (s, (Lib.al_type t, local))) true;
1070
1071 x.v_type_bis :=
1072 Some (typedef_fix (Lib.al_type t) !_scoped_env);
1073
1074 if need_annotate_body then begin
1075 (* int x = sizeof(x) is legal so need process ini *)
1076 iniopt +> Common.do_option (fun (info, ini) ->
1077 Visitor_c.vk_ini bigf ini
1078 );
1079 end
1080 );
1081 );
1082 | MacroDecl _ ->
1083 if need_annotate_body
1084 then k d
1085 );
1086
1087 );
1088
1089 (* ------------------------------------------------------------ *)
1090 Visitor_c.ktype = (fun (k, bigf) typ ->
1091 (* bugfix: have a 'Lib.al_type typ' before, but because we can
1092 * have enum with possible expression, we don't want to change
1093 * the ref of abstract-lined types, but the real one, so
1094 * don't al_type here
1095 *)
1096 let (_q, tbis) = typ in
1097 match Ast_c.unwrap_typeC typ with
1098 | StructUnion (su, Some s, structType) ->
1099 let structType' = Lib.al_fields structType in
1100 let ii = Ast_c.get_ii_typeC_take_care tbis in
1101 let ii' = Lib.al_ii ii in
1102 add_binding (StructUnionNameDef (s, ((su, structType'),ii'))) true;
1103
1104 if need_annotate_body
1105 then k typ (* todo: restrict ? new scope so use do_in_scope ? *)
1106
1107 | Enum (sopt, enums) ->
1108
1109 enums +> List.iter (fun ((name, eopt), iicomma) ->
1110
1111 let s = Ast_c.str_of_name name in
1112
1113 if need_annotate_body
1114 then eopt +> Common.do_option (fun (ieq, e) ->
1115 Visitor_c.vk_expr bigf e
1116 );
1117 add_binding (EnumConstant (s, sopt)) true;
1118 );
1119
1120
1121 (* TODO: if have a TypeName, then maybe can fill the option
1122 * information.
1123 *)
1124 | _ ->
1125 if need_annotate_body
1126 then k typ
1127
1128 );
1129
1130 (* ------------------------------------------------------------ *)
1131 Visitor_c.ktoplevel = (fun (k, bigf) elem ->
1132 _notyped_var := Hashtbl.create 100;
1133 match elem with
1134 | Definition def ->
1135 let {f_name = name;
1136 f_type = ((returnt, (paramst, b)) as ftyp);
1137 f_storage = sto;
1138 f_body = statxs;
1139 f_old_c_style = oldstyle;
1140 },ii
1141 = def
1142 in
1143 let (i1, i2) =
1144 match ii with
1145 (* what is iifunc1? it should be a type. jll
1146 * pad: it's the '(' in the function definition. The
1147 * return type is part of f_type.
1148 *)
1149 | iifunc1::iifunc2::ibrace1::ibrace2::ifakestart::isto ->
1150 iifunc1, iifunc2
1151 | _ -> raise Impossible
1152 in
1153 let funcs = Ast_c.str_of_name name in
1154
1155 (match oldstyle with
1156 | None ->
1157 let typ' =
1158 Lib.al_type (Ast_c.mk_ty (FunctionType ftyp) [i1;i2]) in
1159
1160 add_binding (VarOrFunc (funcs, (typ',islocal i1.Ast_c.pinfo)))
1161 false;
1162
1163 if need_annotate_body then
1164 do_in_new_scope (fun () ->
1165 paramst +> List.iter (fun ({p_namei= nameopt; p_type= t},_)->
1166 match nameopt with
1167 | Some name ->
1168 let s = Ast_c.str_of_name name in
1169 let local = Ast_c.LocalVar (Ast_c.info_of_type t) in
1170 add_binding (VarOrFunc (s,(Lib.al_type t,local))) true
1171 | None ->
1172 pr2 "no type, certainly because Void type ?"
1173 );
1174 (* recurse *)
1175 k elem
1176 );
1177 | Some oldstyle ->
1178 (* generate regular function type *)
1179
1180 pr2 "TODO generate type for function";
1181 (* add bindings *)
1182 if need_annotate_body then
1183 do_in_new_scope (fun () ->
1184 (* recurse. should naturally call the kdecl visitor and
1185 * add binding
1186 *)
1187 k elem;
1188 );
1189
1190 );
1191 | CppTop x ->
1192 (match x with
1193 | Define ((s,ii), (DefineVar, DefineType t)) ->
1194 add_binding (TypeDef (s,Lib.al_type t)) true;
1195 | _ -> k elem
1196 )
1197
1198 | Declaration _
1199
1200
1201
1202 | IfdefTop _
1203 | MacroTop _
1204 | EmptyDef _
1205 | NotParsedCorrectly _
1206 | FinalDef _
1207 ->
1208 k elem
1209 );
1210 }
1211 in
1212 if just_add_in_env
1213 then
1214 if depth > 1
1215 then Visitor_c.vk_toplevel bigf elem
1216 else
1217 Common.profile_code "TAC.annotate_only_included" (fun () ->
1218 Visitor_c.vk_toplevel bigf elem
1219 )
1220 else Visitor_c.vk_toplevel bigf elem
1221
1222 (*****************************************************************************)
1223 (* Entry point *)
1224 (*****************************************************************************)
1225 (* catch all the decl to grow the environment *)
1226
1227
1228 let rec (annotate_program2 :
1229 environment -> toplevel list -> (toplevel * environment Common.pair) list) =
1230 fun env prog ->
1231
1232 (* globals (re)initialialisation *)
1233 _scoped_env := env;
1234 _notyped_var := (Hashtbl.create 100);
1235
1236 prog +> List.map (fun elem ->
1237 let beforeenv = !_scoped_env in
1238 visit_toplevel ~just_add_in_env:false ~depth:0 elem;
1239 let afterenv = !_scoped_env in
1240 (elem, (beforeenv, afterenv))
1241 )
1242
1243
1244
1245
1246 (*****************************************************************************)
1247 (* Annotate test *)
1248 (*****************************************************************************)
1249
1250 (* julia: for coccinelle *)
1251 let annotate_test_expressions prog =
1252 let rec propagate_test e =
1253 let ((e_term,info),_) = e in
1254 let (ty,_) = !info in
1255 info := (ty,Test);
1256 match e_term with
1257 Binary(e1,Logical(AndLog),e2)
1258 | Binary(e1,Logical(OrLog),e2) -> propagate_test e1; propagate_test e2
1259 | Unary(e1,Not) -> propagate_test e1
1260 | ParenExpr(e) -> propagate_test e
1261 | _ -> () in
1262
1263 let bigf = { Visitor_c.default_visitor_c with
1264 Visitor_c.kexpr = (fun (k,bigf) expr ->
1265 (match unwrap_expr expr with
1266 CondExpr(e,_,_) -> propagate_test e
1267 | Binary(e1,Logical(AndLog),e2)
1268 | Binary(e1,Logical(OrLog),e2) -> propagate_test e1; propagate_test e2
1269 | Unary(e1,Not) -> propagate_test e1
1270 | _ -> ()
1271 );
1272 k expr
1273 );
1274 Visitor_c.kstatement = (fun (k, bigf) st ->
1275 match unwrap_st st with
1276 Selection(s) ->
1277 (match s with If(e1,s1,s2) -> propagate_test e1 | _ -> ());
1278 k st;
1279 | Iteration(i) ->
1280 (match i with
1281 While(e,s) -> propagate_test e
1282 | DoWhile(s,e) -> propagate_test e
1283 | For(_,es,_,_) ->
1284 (match unwrap es with Some e -> propagate_test e | None -> ())
1285 | _ -> ());
1286 k st
1287 | _ -> k st
1288 )
1289 } in
1290 (prog +> List.iter (fun elem ->
1291 Visitor_c.vk_toplevel bigf elem
1292 ))
1293
1294
1295
1296 (*****************************************************************************)
1297 (* Annotate types *)
1298 (*****************************************************************************)
1299 let annotate_program env prog =
1300 Common.profile_code "TAC.annotate_program"
1301 (fun () ->
1302 let res = annotate_program2 env prog in
1303 annotate_test_expressions prog;
1304 res
1305 )
1306
1307 let annotate_type_and_localvar env prog =
1308 Common.profile_code "TAC.annotate_type"
1309 (fun () -> annotate_program2 env prog)
1310
1311
1312 (*****************************************************************************)
1313 (* changing default typing environment, do concatenation *)
1314 let init_env filename =
1315 pr2 ("init_env: " ^ filename);
1316 let (ast2, _stat) = Parse_c.parse_c_and_cpp filename in
1317 let ast = Parse_c.program_of_program2 ast2 in
1318
1319 let res = annotate_type_and_localvar !initial_env ast in
1320 match List.rev res with
1321 | [] -> pr2 "empty environment"
1322 | (_top,(env1,env2))::xs ->
1323 initial_env := !initial_env ++ env2;
1324 ()
1325