0707df8d0f00a2b08fbd00b9e5c689c9851f7827
[bpt/coccinelle.git] / parsing_c / type_annoter_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes,
4 * Copyright (C) 2009 University of Urbana Champaign
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16 open Common
17
18 open Ast_c
19
20 module Lib = Lib_parsing_c
21
22 (*****************************************************************************)
23 (* Prelude *)
24 (*****************************************************************************)
25 (* History:
26 * - Done a first type checker in 2002, cf typing-semantic/, but
27 * was assuming that have all type info, and so was assuming had called
28 * cpp and everything was right.
29 * - Wrote this file, in 2006?, as we added pattern matching on type
30 * in coccinelle. Partial type annotater.
31 * - Julia extended it in 2008? to have localvar/notlocalvar and
32 * test/notest information, again used by coccinelle.
33 * - I extended it in Fall 2008 to have more type information for the
34 * global analysis. I also added some optimisations to process
35 * included code faster.
36 *
37 *
38 * Design choices. Can either do:
39 * - a kind of inferer
40 * - can first do a simple inferer, that just pass context
41 * - then a real inferer, managing partial info.
42 * type context = fullType option
43 *
44 * - extract the information from the .h files
45 * (so no inference at all needed)
46 *
47 * Difference with julia's code in parsing_cocci/type_infer.ml:
48 * - She handles just the variable namespace. She does not type
49 * field access or enum or macros. This is because cocci programs are
50 * usually simple and have no structure definition or macro definitions
51 * that we need to type anyway.
52 * - She does more propagation.
53 * - She does not have to handle the typedef isomorphism which force me
54 * to use those typedef_fix and type_unfold_one_step
55 * - She does not handle I think the function pointer C isomorphism.
56 *
57 * - She has a cleaner type_cocci without any info. In my case
58 * I need to do those ugly al_type, or generate fake infos.
59 * - She has more compact code. Perhaps because she does not have to
60 * handle the extra exp_info that she added on me :) So I need those
61 * do_with_type, make_info_xxx, etc.
62 *
63 * Note: if need to debug this annotater, use -show_trace_profile, it can
64 * help. You can also set the typedef_debug flag below.
65 *
66 *
67 *
68 * todo: expression contain types, and statements, which in turn can contain
69 * expression, so need recurse. Need define an annote_statement and
70 * annotate_type.
71 *
72 * todo: how deal with typedef isomorphisms ? How store them in Ast_c ?
73 * store all posible variations in ast_c ? a list of type instead of just
74 * the type ?
75 *
76 * todo: how to handle multiple possible definitions for entities like
77 * struct or typedefs ? Because of ifdef, we should store list of
78 * possibilities sometimes.
79 *
80 * todo: define a new type ? like type_cocci ? where have a bool ?
81 *
82 * semi: How handle scope ? When search for type of field, we return
83 * a type, but this type makes sense only in a certain scope.
84 * We could add a tag to each typedef, structUnionName to differentiate
85 * them and also associate in ast_c to the type the scope
86 * of this type, the env that were used to define this type.
87 *
88 * todo: handle better the search in previous env, the env'. Cf the
89 * termination problem in typedef_fix when I was searching in the same
90 * env.
91 *
92 *)
93
94 (*****************************************************************************)
95 (* Wrappers *)
96 (*****************************************************************************)
97 let pr2 s =
98 if !Flag_parsing_c.verbose_type
99 then Common.pr2 s
100
101 let pr2_once s =
102 if !Flag_parsing_c.verbose_type
103 then Common.pr2_once s
104
105 (*****************************************************************************)
106 (* Environment *)
107 (*****************************************************************************)
108
109 (* The different namespaces from stdC manual:
110 *
111 * You introduce two new name spaces with every block that you write.
112 *
113 * One name space includes all
114 * - functions,
115 * - objects,
116 * - type definitions,
117 * - and enumeration constants
118 * that you declare or define within the block.
119 *
120 * The other name space includes all
121 * - enumeration,
122 * - structure,
123 * - and union
124 * *tags* that you define within the block.
125 *
126 * You introduce a new member name space with every structure or union
127 * whose content you define. You identify a member name space by the
128 * type of left operand that you write for a member selection
129 * operator, as in x.y or p->y. A member name space ends with the end
130 * of the block in which you declare it.
131 *
132 * You introduce a new goto label name space with every function
133 * definition you write. Each goto label name space ends with its
134 * function definition.
135 *)
136
137 (* But I don't try to do a type-checker, I try to "resolve" type of var
138 * so don't need make difference between namespaces here.
139 *
140 * But, why not make simply a (string, kindstring) assoc ?
141 * Because we dont want that a variable shadow a struct definition, because
142 * they are still in 2 different namespace. But could for typedef,
143 * because VarOrFunc and Typedef are in the same namespace.
144 * But could do a record as in c_info.ml
145 *)
146
147
148 (* This type contains all "ident" like notion of C. Each time in Ast_c
149 * you have a string type (as in expression, function name, fields)
150 * then you need to manage the scope of this ident.
151 *
152 * The wrap for StructUnionNameDef contain the whole ii, the i for
153 * the string, the structUnion and the structType.
154 *
155 * Put Macro here ? after all the scoping rules for cpp macros is different
156 * and so does not vanish after the closing '}'.
157 *
158 * todo: EnumDef
159 *)
160 type namedef =
161 | VarOrFunc of string * Ast_c.exp_type
162 | EnumConstant of string * string option
163
164 | TypeDef of string * fullType
165 (* the structType contains nested "idents" with struct scope *)
166 | StructUnionNameDef of string * (structUnion * structType) wrap
167
168 (* cppext: *)
169 | Macro of string * (define_kind * define_val)
170
171
172 (* Because have nested scope, have nested list, hence the list list.
173 *
174 * opti? use a hash to accelerate ? hmm but may have some problems
175 * with hash to handle recursive lookup. For instance for the typedef
176 * example where have mutually recursive definition of the type,
177 * we must take care to not loop by starting the second search
178 * from the previous environment. With the list scheme in
179 * lookup_env below it's quite easy to do. With hash it may be
180 * more complicated.
181 *)
182 type environment = namedef list list
183
184
185 (* ------------------------------------------------------------ *)
186 (* can be modified by the init_env function below, by
187 * the file environment_unix.h
188 *)
189 let initial_env = ref [
190 [VarOrFunc("NULL",
191 (Lib.al_type (Parse_c.type_of_string "void *"),
192 Ast_c.NotLocalVar));
193
194 (*
195 VarOrFunc("malloc",
196 (Lib.al_type(Parse_c.type_of_string "void* (*)(int size)"),
197 Ast_c.NotLocalVar));
198 VarOrFunc("free",
199 (Lib.al_type(Parse_c.type_of_string "void (*)(void *ptr)"),
200 Ast_c.NotLocalVar));
201 *)
202 ]
203 ]
204
205
206 let typedef_debug = ref false
207
208
209 (* ------------------------------------------------------------ *)
210 (* generic, lookup and also return remaining env for further lookup *)
211 let rec lookup_env2 f env =
212 match env with
213 | [] -> raise Not_found
214 | []::zs -> lookup_env2 f zs
215 | (x::xs)::zs ->
216 (match f x with
217 | None -> lookup_env2 f (xs::zs)
218 | Some y -> y, xs::zs
219 )
220 let lookup_env a b =
221 Common.profile_code "TAC.lookup_env" (fun () -> lookup_env2 a b)
222
223
224
225 let member_env lookupf env =
226 try
227 let _ = lookupf env in
228 true
229 with Not_found -> false
230
231
232
233
234 (* ------------------------------------------------------------ *)
235
236
237 let lookup_var s env =
238 let f = function
239 | VarOrFunc (s2, typ) -> if s2 =$= s then Some typ else None
240 | _ -> None
241 in
242 lookup_env f env
243
244 let lookup_typedef s env =
245 if !typedef_debug then pr2 ("looking for: " ^ s);
246 let f = function
247 | TypeDef (s2, typ) -> if s2 =$= s then Some typ else None
248 | _ -> None
249 in
250 lookup_env f env
251
252 let lookup_structunion (_su, s) env =
253 let f = function
254 | StructUnionNameDef (s2, typ) -> if s2 =$= s then Some typ else None
255 | _ -> None
256 in
257 lookup_env f env
258
259 let lookup_macro s env =
260 let f = function
261 | Macro (s2, typ) -> if s2 =$= s then Some typ else None
262 | _ -> None
263 in
264 lookup_env f env
265
266 let lookup_enum s env =
267 let f = function
268 | EnumConstant (s2, typ) -> if s2 =$= s then Some typ else None
269 | _ -> None
270 in
271 lookup_env f env
272
273
274 let lookup_typedef a b =
275 Common.profile_code "TAC.lookup_typedef" (fun () -> lookup_typedef a b)
276
277
278
279 (*****************************************************************************)
280 (* "type-lookup" *)
281 (*****************************************************************************)
282
283 (* find_final_type is used to know to what type a field correspond in
284 * x.foo. Sometimes the type of x is a typedef or a structName in which
285 * case we must look in environment to find the complete type, here
286 * structUnion that contains the information.
287 *
288 * Because in C one can redefine in nested blocks some typedefs,
289 * struct, or variables, we have a static scoping resolving process.
290 * So, when we look for the type of a var, if this var is in an
291 * enclosing block, then maybe its type refer to a typdef of this
292 * enclosing block, so must restart the "type-resolving" of this
293 * typedef from this enclosing block, not from the bottom. So our
294 * "resolving-type functions" take an env and also return an env from
295 * where the next search must be performed. *)
296
297 (*
298 let rec find_final_type ty env =
299
300 match Ast_c.unwrap_typeC ty with
301 | BaseType x -> (BaseType x) +> Ast_c.rewrap_typeC ty
302
303 | Pointer t -> (Pointer (find_final_type t env)) +> Ast_c.rewrap_typeC ty
304 | Array (e, t) -> Array (e, find_final_type t env) +> Ast_c.rewrap_typeC ty
305
306 | StructUnion (sopt, su) -> StructUnion (sopt, su) +> Ast_c.rewrap_typeC ty
307
308 | FunctionType t -> (FunctionType t) (* todo ? *) +> Ast_c.rewrap_typeC ty
309 | Enum (s, enumt) -> (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty
310 | EnumName s -> (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty
311
312 | StructUnionName (su, s) ->
313 (try
314 let ((structtyp,ii), env') = lookup_structunion (su, s) env in
315 Ast_c.nQ, (StructUnion (Some s, structtyp), ii)
316 (* old: +> Ast_c.rewrap_typeC ty
317 * but must wrap with good ii, otherwise pretty_print_c
318 * will be lost and raise some Impossible
319 *)
320 with Not_found ->
321 ty
322 )
323
324 | TypeName s ->
325 (try
326 let (t', env') = lookup_typedef s env in
327 find_final_type t' env'
328 with Not_found ->
329 ty
330 )
331
332 | ParenType t -> find_final_type t env
333 | Typeof e -> failwith "typeof"
334 *)
335
336
337
338
339 (* ------------------------------------------------------------ *)
340 let rec type_unfold_one_step ty env =
341
342 match Ast_c.unwrap_typeC ty with
343 | BaseType x -> ty
344 | Pointer t -> ty
345 | Array (e, t) -> ty
346
347 | StructUnion (sopt, su, fields) -> ty
348
349 | FunctionType t -> ty
350 | Enum (s, enumt) -> ty
351
352 | EnumName s -> ty (* todo: look in env when will have EnumDef *)
353
354 | StructUnionName (su, s) ->
355 (try
356 let (((su,fields),ii), env') = lookup_structunion (su, s) env in
357 Ast_c.nQ, (StructUnion (su, Some s, fields), ii)
358 (* old: +> Ast_c.rewrap_typeC ty
359 * but must wrap with good ii, otherwise pretty_print_c
360 * will be lost and raise some Impossible
361 *)
362 with Not_found ->
363 ty
364 )
365
366 | TypeName (name, _typ) ->
367 let s = Ast_c.str_of_name name in
368 (try
369 if !typedef_debug then pr2 "type_unfold_one_step: lookup_typedef";
370 let (t', env') = lookup_typedef s env in
371 type_unfold_one_step t' env'
372 with Not_found ->
373 ty
374 )
375
376 | ParenType t -> type_unfold_one_step t env
377 | TypeOfExpr e ->
378 pr2_once ("Type_annoter: not handling typeof");
379 ty
380 | TypeOfType t -> type_unfold_one_step t env
381
382
383
384
385
386
387
388
389
390 (* normalizer. can be seen as the opposite of the previous function as
391 * we "fold" at least for the structUnion. Should return something that
392 * Type_c.is_completed_fullType likes, something that makes it easier
393 * for the programmer to work on, that has all the needed information
394 * for most tasks.
395 *)
396 let rec typedef_fix ty env =
397 match Ast_c.unwrap_typeC ty with
398 | BaseType x ->
399 ty
400 | Pointer t ->
401 Pointer (typedef_fix t env) +> Ast_c.rewrap_typeC ty
402 | Array (e, t) ->
403 Array (e, typedef_fix t env) +> Ast_c.rewrap_typeC ty
404 | StructUnion (su, sopt, fields) ->
405 (* normalize, fold.
406 * todo? but what if correspond to a nested struct def ?
407 *)
408 Type_c.structdef_to_struct_name ty
409 | FunctionType ft ->
410 (FunctionType ft) (* todo ? *) +> Ast_c.rewrap_typeC ty
411 | Enum (s, enumt) ->
412 (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty
413 | EnumName s ->
414 (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty
415
416 (* we prefer StructUnionName to StructUnion when it comes to typed metavar *)
417 | StructUnionName (su, s) -> ty
418
419 (* keep the typename but complete with more information *)
420 | TypeName (name, typ) ->
421 let s = Ast_c.str_of_name name in
422 (match typ with
423 | Some _ ->
424 pr2 ("typedef value already there:" ^ s);
425 ty
426 | None ->
427 (try
428 if !typedef_debug then pr2 "typedef_fix: lookup_typedef";
429 let (t', env') = lookup_typedef s env in
430
431 (* bugfix: termination bug if use env instead of env' below, because
432 * can have some weird mutually recursive typedef which
433 * each new type alias search for its mutual def.
434 *)
435 TypeName (name, Some (typedef_fix t' env')) +> Ast_c.rewrap_typeC ty
436 with Not_found ->
437 ty
438 ))
439
440 (* remove paren for better matching with typed metavar. kind of iso again *)
441 | ParenType t ->
442 typedef_fix t env
443 | TypeOfExpr e ->
444 pr2_once ("Type_annoter: not handling typeof");
445 ty
446
447 | TypeOfType t ->
448 typedef_fix t env
449
450
451 (*****************************************************************************)
452 (* Helpers, part 1 *)
453 (*****************************************************************************)
454
455 let type_of_s2 s =
456 (Lib.al_type (Parse_c.type_of_string s))
457 let type_of_s a =
458 Common.profile_code "Type_c.type_of_s" (fun () -> type_of_s2 a)
459
460
461 (* pad: pb on:
462 * /home/pad/software-os-src2/freebsd/contrib/ipfilter/netinet/ip_fil_freebsd.c
463 * because in the code there is:
464 * static iss_seq_off = 0;
465 * which in the parser was generating a default int without a parse_info.
466 * I now add a fake parse_info for such default int so no more failwith
467 * normally.
468 *)
469 let offset (_,(ty,iis)) =
470 match ty, iis with
471 | TypeName (name, _typ), [] ->
472 (match name with
473 | RegularName (s, [ii]) -> ii.Ast_c.pinfo
474 | _ -> raise Todo
475 )
476 | _, ii::_ -> ii.Ast_c.pinfo
477 | _ -> failwith "type has no text; need to think again"
478
479
480
481 let rec is_simple_expr expr =
482 match Ast_c.unwrap_expr expr with
483 (* todo? handle more special cases ? *)
484
485 | Ident _ ->
486 true
487 | Constant (_) ->
488 true
489 | Unary (op, e) ->
490 true
491 | Binary (e1, op, e2) ->
492 true
493 | Cast (t, e) ->
494 true
495 | ParenExpr (e) -> is_simple_expr e
496
497 | _ -> false
498
499 (*****************************************************************************)
500 (* Typing rules *)
501 (*****************************************************************************)
502 (* now in type_c.ml *)
503
504
505
506 (*****************************************************************************)
507 (* (Semi) Globals, Julia's style *)
508 (*****************************************************************************)
509
510 (* opti: cache ? use hash ? *)
511 let _scoped_env = ref !initial_env
512
513 (* memoise unnanoted var, to avoid too much warning messages *)
514 let _notyped_var = ref (Hashtbl.create 100)
515
516 let new_scope() = _scoped_env := []::!_scoped_env
517 let del_scope() = _scoped_env := List.tl !_scoped_env
518
519 let do_in_new_scope f =
520 begin
521 new_scope();
522 let res = f() in
523 del_scope();
524 res
525 end
526
527 let add_in_scope namedef =
528 let (current, older) = Common.uncons !_scoped_env in
529 _scoped_env := (namedef::current)::older
530
531
532 (* ------------------------------------------------------------ *)
533
534 (* sort of hackish... *)
535 let islocal info =
536 if List.length (!_scoped_env) =|= List.length !initial_env
537 then Ast_c.NotLocalVar
538 else Ast_c.LocalVar info
539
540 (* ------------------------------------------------------------ *)
541 (* the warning argument is here to allow some binding to overwrite an
542 * existing one. With function, we first have the prototype and then the def,
543 * and the def binding with the same string is not an error.
544 *
545 * todo?: but if we define two times the same function, then we will not
546 * detect it :( it would require to make a diff between adding a binding
547 * from a prototype and from a definition.
548 *
549 * opti: disabling the check_annotater flag have some important
550 * performance benefit.
551 *
552 *)
553 let add_binding2 namedef warning =
554 let (current_scope, _older_scope) = Common.uncons !_scoped_env in
555
556 if !Flag_parsing_c.check_annotater then begin
557 (match namedef with
558 | VarOrFunc (s, typ) ->
559 if Hashtbl.mem !_notyped_var s
560 then pr2 ("warning: found typing information for a variable that was" ^
561 "previously unknown:" ^ s);
562 | _ -> ()
563 );
564
565 let (memberf, s) =
566 (match namedef with
567 | VarOrFunc (s, typ) ->
568 member_env (lookup_var s), s
569 | TypeDef (s, typ) ->
570 member_env (lookup_typedef s), s
571 | StructUnionNameDef (s, (su, typ)) ->
572 member_env (lookup_structunion (su, s)), s
573 | Macro (s, body) ->
574 member_env (lookup_macro s), s
575 | EnumConstant (s, body) ->
576 member_env (lookup_enum s), s
577 ) in
578
579 if memberf [current_scope] && warning
580 then pr2 ("Type_annoter: warning, " ^ s ^
581 " is already in current binding" ^ "\n" ^
582 " so there is a weird shadowing");
583 end;
584 add_in_scope namedef
585
586 let add_binding namedef warning =
587 Common.profile_code "TAC.add_binding" (fun () -> add_binding2 namedef warning)
588
589
590
591 (*****************************************************************************)
592 (* Helpers, part 2 *)
593 (*****************************************************************************)
594
595 let lookup_opt_env lookupf s =
596 Common.optionise (fun () ->
597 lookupf s !_scoped_env
598 )
599
600 let unwrap_unfold_env2 typ =
601 Ast_c.unwrap_typeC
602 (type_unfold_one_step typ !_scoped_env)
603 let unwrap_unfold_env typ =
604 Common.profile_code "TAC.unwrap_unfold_env" (fun () -> unwrap_unfold_env2 typ)
605
606 let typedef_fix a b =
607 Common.profile_code "TAC.typedef_fix" (fun () -> typedef_fix a b)
608
609 let make_info_def_fix x =
610 Type_c.make_info_def (typedef_fix x !_scoped_env)
611
612 let make_info_fix (typ, local) =
613 Type_c.make_info ((typedef_fix typ !_scoped_env),local)
614
615
616 let make_info_def = Type_c.make_info_def
617
618 (*****************************************************************************)
619 (* Main typer code, put later in a visitor *)
620 (*****************************************************************************)
621
622 let annotater_expr_visitor_subpart = (fun (k,bigf) expr ->
623
624 let ty =
625 match Ast_c.unwrap_expr expr with
626
627 (* -------------------------------------------------- *)
628 (* todo: should analyse the 's' for int to know if unsigned or not *)
629 | Constant (String (s,kind)) -> make_info_def (type_of_s "char *")
630 | Constant MultiString _ -> make_info_def (type_of_s "char *")
631 | Constant (Char (s,kind)) -> make_info_def (type_of_s "char")
632 | Constant (Int (s)) -> make_info_def (type_of_s "int")
633 | Constant (Float (s,kind)) ->
634 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
635 let fake = Ast_c.rewrap_str "float" fake in
636 let iinull = [fake] in
637 make_info_def
638 (Ast_c.nQ, (BaseType (FloatType kind), iinull))
639
640
641 (* -------------------------------------------------- *)
642 (* note: could factorize this code with the code for Ident
643 * and the other code for Funcall below. But as the Ident can be
644 * a macro-func, I prefer to handle it separately. So
645 * this rule can handle the macro-func, the Ident-rule can handle
646 * the macro-var, and the other FunCall-rule the regular
647 * function calls through fields.
648 * Also as I don't want a warning on the Ident that are a FunCall,
649 * easier to have a rule separate from the Ident rule.
650 *)
651 | FunCall (((Ident (ident), typ), _ii) as e1, args) ->
652
653 (* recurse *)
654 args +> List.iter (fun (e,ii) ->
655 (* could typecheck if arguments agree with prototype *)
656 Visitor_c.vk_argument bigf e
657 );
658 let s = Ast_c.str_of_name ident in
659 (match lookup_opt_env lookup_var s with
660 | Some ((typ,local),_nextenv) ->
661
662 (* set type for ident *)
663 let tyinfo = make_info_fix (typ, local) in
664 Ast_c.set_type_expr e1 tyinfo;
665
666 (match unwrap_unfold_env typ with
667 | FunctionType (ret, params) -> make_info_def ret
668
669 (* can be function pointer, C have an iso for that,
670 * same pfn() syntax than regular function call.
671 *)
672 | Pointer (typ2) ->
673 (match unwrap_unfold_env typ2 with
674 | FunctionType (ret, params) -> make_info_def ret
675 | _ -> Type_c.noTypeHere
676 )
677 | _ -> Type_c.noTypeHere
678 )
679 | None ->
680
681 (match lookup_opt_env lookup_macro s with
682 | Some ((defkind, defval), _nextenv) ->
683 (match defkind, defval with
684 | DefineFunc _, DefineExpr e ->
685 let rettype = Ast_c.get_onlytype_expr e in
686
687 (* todo: could also set type for ident ?
688 have return type and at least type of concrete
689 parameters so can generate a fake FunctionType
690 *)
691 let macrotype_opt =
692 Type_c.fake_function_type rettype args
693 in
694
695 macrotype_opt +> Common.do_option (fun t ->
696 pr2 ("Type_annotater: generate fake function type" ^
697 "for macro: " ^ s);
698 let tyinfo = make_info_def_fix t in
699 Ast_c.set_type_expr e1 tyinfo;
700 );
701
702 Ast_c.get_type_expr e
703 | DefineVar, _ ->
704 pr2 ("Type_annoter: not a macro-func: " ^ s);
705 Type_c.noTypeHere
706 | DefineFunc _, _ ->
707 (* normally the FunCall case should have catch it *)
708 pr2 ("Type_annoter: not a macro-func-expr: " ^ s);
709 Type_c.noTypeHere
710 )
711 | None ->
712 pr2_once ("type_annotater: no type for function ident: " ^ s);
713 Type_c.noTypeHere
714 )
715 )
716
717
718 | FunCall (e, args) ->
719 k expr;
720
721 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun typ ->
722 (* copy paste of above *)
723 (match unwrap_unfold_env typ with
724 | FunctionType (ret, params) -> make_info_def ret
725 | Pointer (typ) ->
726 (match unwrap_unfold_env typ with
727 | FunctionType (ret, params) -> make_info_def ret
728 | _ -> Type_c.noTypeHere
729 )
730 | _ -> Type_c.noTypeHere
731 )
732 )
733
734
735 (* -------------------------------------------------- *)
736 | Ident (ident) ->
737 let s = Ast_c.str_of_name ident in
738 (match lookup_opt_env lookup_var s with
739 | Some ((typ,local),_nextenv) ->
740 make_info_fix (typ,local)
741 | None ->
742 (match lookup_opt_env lookup_macro s with
743 | Some ((defkind, defval), _nextenv) ->
744 (match defkind, defval with
745 | DefineVar, DefineExpr e ->
746 Ast_c.get_type_expr e
747 | DefineVar, _ ->
748 pr2 ("Type_annoter: not a expression: " ^ s);
749 Type_c.noTypeHere
750 | DefineFunc _, _ ->
751 (* normally the FunCall case should have catch it *)
752 pr2 ("Type_annoter: not a macro-var: " ^ s);
753 Type_c.noTypeHere
754 )
755 | None ->
756 (match lookup_opt_env lookup_enum s with
757 | Some (_, _nextenv) ->
758 make_info_def (type_of_s "int")
759 | None ->
760 if not (s =~ "[A-Z_]+") (* if macro then no warning *)
761 then
762 if !Flag_parsing_c.check_annotater then
763 if not (Hashtbl.mem !_notyped_var s)
764 then begin
765 pr2 ("Type_annoter: not finding type for: " ^ s);
766 Hashtbl.add !_notyped_var s true;
767 end
768 else ()
769 else
770 pr2 ("Type_annoter: not finding type for: " ^ s)
771 ;
772 Type_c.noTypeHere
773 )
774 )
775 )
776
777 (* -------------------------------------------------- *)
778 (* C isomorphism on type on array and pointers *)
779 | Unary (e, DeRef)
780 | ArrayAccess (e, _) ->
781 k expr; (* recurse to set the types-ref of sub expressions *)
782
783 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
784 (* todo: maybe not good env !! *)
785 match unwrap_unfold_env t with
786 | Pointer x
787 | Array (_, x) ->
788 make_info_def_fix x
789 | _ -> Type_c.noTypeHere
790
791 )
792
793 | Unary (e, GetRef) ->
794 k expr; (* recurse to set the types-ref of sub expressions *)
795
796 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
797 (* must generate an element so that '=' can be used
798 * to compare type ?
799 *)
800 let fake = Ast_c.fakeInfo Common.fake_parse_info in
801 let fake = Ast_c.rewrap_str "*" fake in
802
803 let ft = (Ast_c.nQ, (Pointer t, [fake])) in
804 make_info_def_fix ft
805 )
806
807
808 (* -------------------------------------------------- *)
809 (* fields *)
810 | RecordAccess (e, namefld)
811 | RecordPtAccess (e, namefld) as x ->
812
813 let fld = Ast_c.str_of_name namefld in
814
815 k expr; (* recurse to set the types-ref of sub expressions *)
816
817 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
818
819 let topt =
820 match x with
821 | RecordAccess _ -> Some t
822 | RecordPtAccess _ ->
823 (match unwrap_unfold_env t with
824 | Pointer (t) -> Some t
825 | _ -> None
826 )
827 | _ -> raise Impossible
828
829 in
830 (match topt with
831 | None -> Type_c.noTypeHere
832 | Some t ->
833 match unwrap_unfold_env t with
834 | StructUnion (su, sopt, fields) ->
835 (try
836 (* todo: which env ? *)
837 make_info_def_fix
838 (Type_c.type_field fld (su, fields))
839 with
840 | Not_found ->
841 pr2 (spf
842 "TYPE-ERROR: field '%s' does not belong in struct %s"
843 fld (match sopt with Some s -> s |_ -> "<anon>"));
844 Type_c.noTypeHere
845 | Multi_found ->
846 pr2 "TAC:MultiFound";
847 Type_c.noTypeHere
848 )
849 | _ -> Type_c.noTypeHere
850 )
851 )
852
853
854
855 (* -------------------------------------------------- *)
856 | Cast (t, e) ->
857 k expr;
858 (* todo: if infer, can "push" info ? add_types_expr [t] e ? *)
859 make_info_def_fix (Lib.al_type t)
860
861 (* todo? lub, hmm maybe not, cos type must be e1 *)
862 | Assignment (e1, op, e2) ->
863 k expr;
864 (* value of an assignment is the value of the RHS expression *)
865 Ast_c.get_type_expr e2
866 | Sequence (e1, e2) ->
867 k expr;
868 Ast_c.get_type_expr e2
869
870 (* todo: lub *)
871 | Binary (e1, op, e2) ->
872 k expr;
873 Type_c.lub (Type_c.get_opt_type e1) (Type_c.get_opt_type e2)
874
875 | CondExpr (cond, e1opt, e2) ->
876 k expr;
877 Ast_c.get_type_expr e2
878
879
880 | ParenExpr e ->
881 k expr;
882 Ast_c.get_type_expr e
883
884 | Infix (e, op) | Postfix (e, op) ->
885 k expr;
886 Ast_c.get_type_expr e
887
888 (* pad: julia wrote this ? *)
889 | Unary (e, UnPlus) ->
890 k expr; (* recurse to set the types-ref of sub expressions *)
891 make_info_def (type_of_s "int")
892 (* todo? can convert from unsigned to signed if UnMinus ? *)
893 | Unary (e, UnMinus) ->
894 k expr; (* recurse to set the types-ref of sub expressions *)
895 make_info_def (type_of_s "int")
896
897 | SizeOfType _|SizeOfExpr _ ->
898 k expr; (* recurse to set the types-ref of sub expressions *)
899 make_info_def (type_of_s "int")
900
901 | Constructor (ft, ini) ->
902 k expr; (* recurse to set the types-ref of sub expressions *)
903 make_info_def (Lib.al_type ft)
904
905 | Unary (e, Not) ->
906 k expr; (* recurse to set the types-ref of sub expressions *)
907 Ast_c.get_type_expr e
908 | Unary (e, Tilde) ->
909 k expr; (* recurse to set the types-ref of sub expressions *)
910 Ast_c.get_type_expr e
911
912 (* -------------------------------------------------- *)
913 (* todo *)
914 | Unary (_, GetRefLabel) ->
915 k expr; (* recurse to set the types-ref of sub expressions *)
916 pr2_once "Type annotater:not handling GetRefLabel";
917 Type_c.noTypeHere
918 (* todo *)
919 | StatementExpr _ ->
920 k expr; (* recurse to set the types-ref of sub expressions *)
921 pr2_once "Type annotater:not handling GetRefLabel";
922 Type_c.noTypeHere
923 (*
924 | _ -> k expr; Type_c.noTypeHere
925 *)
926
927 in
928 Ast_c.set_type_expr expr ty
929
930 )
931
932
933 (*****************************************************************************)
934 (* Visitor *)
935 (*****************************************************************************)
936
937 (* Processing includes that were added after a cpp_ast_c makes the
938 * type annotater quite slow, especially when the depth of cpp_ast_c is
939 * big. But for such includes the only thing we really want is to modify
940 * the environment to have enough type information. We don't need
941 * to type the expressions inside those includes (they will be typed
942 * when we process the include file directly). Here the goal is
943 * to not recurse.
944 *
945 * Note that as usually header files contain mostly structure
946 * definitions and defines, that means we still have to do lots of work.
947 * We only win on function definition bodies, but usually header files
948 * have just prototypes, or inline function definitions which anyway have
949 * usually a small body. But still, we win. It also makes clearer
950 * that when processing include as we just need the environment, the caller
951 * of this module can do further optimisations such as memorising the
952 * state of the environment after each header files.
953 *
954 *
955 * For sparse its makes the annotating speed goes from 9s to 4s
956 * For Linux the speedup is even better, from ??? to ???.
957 *
958 * Because There would be some copy paste with annotate_program, it is
959 * better to factorize code hence the just_add_in_env parameter below.
960 *
961 * todo? alternative optimisation for the include problem:
962 * - processing all headers files one time and construct big env
963 * - use hashtbl for env (but apparently not biggest problem)
964 *)
965
966 let rec visit_toplevel ~just_add_in_env ~depth elem =
967 let need_annotate_body = not just_add_in_env in
968
969 let bigf = { Visitor_c.default_visitor_c with
970
971 (* ------------------------------------------------------------ *)
972 Visitor_c.kcppdirective = (fun (k, bigf) directive ->
973 match directive with
974 (* do error messages for type annotater only for the real body of the
975 * file, not inside include.
976 *)
977 | Include {i_content = opt} ->
978 opt +> Common.do_option (fun (filename, program) ->
979 Common.save_excursion Flag_parsing_c.verbose_type (fun () ->
980 Flag_parsing_c.verbose_type := false;
981
982 (* old: Visitor_c.vk_program bigf program;
983 * opti: set the just_add_in_env
984 *)
985 program +> List.iter (fun elem ->
986 visit_toplevel ~just_add_in_env:true ~depth:(depth+1) elem
987 )
988 )
989 )
990
991 | Define ((s,ii), (defkind, defval)) ->
992
993
994 (* even if we are in a just_add_in_env phase, such as when
995 * we process include, as opposed to the body of functions,
996 * with macros we still to type the body of the macro as
997 * the macro has no type and so we infer its type from its
998 * body (and one day later maybe from its use).
999 *)
1000 (match defval with
1001 (* can try to optimize and recurse only when the define body
1002 * is simple ?
1003 *)
1004
1005 | DefineExpr expr ->
1006 if is_simple_expr expr
1007 (* even if not need_annotate_body, still recurse*)
1008 then k directive
1009 else
1010 if need_annotate_body
1011 then k directive;
1012 | _ ->
1013 if need_annotate_body
1014 then k directive;
1015 );
1016
1017 add_binding (Macro (s, (defkind, defval) )) true;
1018
1019 | Undef _
1020 | PragmaAndCo _ -> ()
1021 );
1022
1023 (* ------------------------------------------------------------ *)
1024 (* main typer code *)
1025 (* ------------------------------------------------------------ *)
1026 Visitor_c.kexpr = annotater_expr_visitor_subpart;
1027
1028 (* ------------------------------------------------------------ *)
1029 Visitor_c.kstatement = (fun (k, bigf) st ->
1030 match st with
1031 | Compound statxs, ii -> do_in_new_scope (fun () -> k st);
1032 | _ -> k st
1033 );
1034 (* ------------------------------------------------------------ *)
1035 Visitor_c.kdecl = (fun (k, bigf) d ->
1036 (match d with
1037 | (DeclList (xs, ii)) ->
1038 xs +> List.iter (fun ({v_namei = var; v_type = t;
1039 v_storage = sto; v_local = local}, iicomma) ->
1040
1041 (* to add possible definition in type found in Decl *)
1042 Visitor_c.vk_type bigf t;
1043
1044
1045 let local =
1046 match local with
1047 | Ast_c.NotLocalDecl -> Ast_c.NotLocalVar
1048 | Ast_c.LocalDecl -> Ast_c.LocalVar (offset t)
1049 in
1050
1051 var +> Common.do_option (fun (name, iniopt) ->
1052 let s = Ast_c.str_of_name name in
1053
1054 match sto with
1055 | StoTypedef, _inline ->
1056 add_binding (TypeDef (s,Lib.al_type t)) true;
1057 | _ ->
1058 add_binding (VarOrFunc (s, (Lib.al_type t, local))) true;
1059
1060
1061 if need_annotate_body then begin
1062 (* int x = sizeof(x) is legal so need process ini *)
1063 iniopt +> Common.do_option (fun (info, ini) ->
1064 Visitor_c.vk_ini bigf ini
1065 );
1066 end
1067 );
1068 );
1069 | MacroDecl _ ->
1070 if need_annotate_body
1071 then k d
1072 );
1073
1074 );
1075
1076 (* ------------------------------------------------------------ *)
1077 Visitor_c.ktype = (fun (k, bigf) typ ->
1078 (* bugfix: have a 'Lib.al_type typ' before, but because we can
1079 * have enum with possible expression, we don't want to change
1080 * the ref of abstract-lined types, but the real one, so
1081 * don't al_type here
1082 *)
1083 let (_q, t) = typ in
1084 match t with
1085 | StructUnion (su, Some s, structType),ii ->
1086 let structType' = Lib.al_fields structType in
1087 let ii' = Lib.al_ii ii in
1088 add_binding (StructUnionNameDef (s, ((su, structType'),ii'))) true;
1089
1090 if need_annotate_body
1091 then k typ (* todo: restrict ? new scope so use do_in_scope ? *)
1092
1093 | Enum (sopt, enums), ii ->
1094
1095 enums +> List.iter (fun ((name, eopt), iicomma) ->
1096
1097 let s = Ast_c.str_of_name name in
1098
1099 if need_annotate_body
1100 then eopt +> Common.do_option (fun (ieq, e) ->
1101 Visitor_c.vk_expr bigf e
1102 );
1103 add_binding (EnumConstant (s, sopt)) true;
1104 );
1105
1106
1107 (* TODO: if have a TypeName, then maybe can fill the option
1108 * information.
1109 *)
1110 | _ ->
1111 if need_annotate_body
1112 then k typ
1113
1114 );
1115
1116 (* ------------------------------------------------------------ *)
1117 Visitor_c.ktoplevel = (fun (k, bigf) elem ->
1118 _notyped_var := Hashtbl.create 100;
1119 match elem with
1120 | Definition def ->
1121 let {f_name = name;
1122 f_type = ((returnt, (paramst, b)) as ftyp);
1123 f_storage = sto;
1124 f_body = statxs;
1125 f_old_c_style = oldstyle;
1126 },ii
1127 = def
1128 in
1129 let (i1, i2) =
1130 match ii with
1131 | iifunc1::iifunc2::ibrace1::ibrace2::ifakestart::isto ->
1132 iifunc1, iifunc2
1133 | _ -> raise Impossible
1134 in
1135 let funcs = Ast_c.str_of_name name in
1136
1137 (match oldstyle with
1138 | None ->
1139 let typ' =
1140 Lib.al_type (Ast_c.nQ, (FunctionType ftyp, [i1;i2])) in
1141
1142 add_binding (VarOrFunc (funcs, (typ',islocal i1.Ast_c.pinfo)))
1143 false;
1144
1145 if need_annotate_body then
1146 do_in_new_scope (fun () ->
1147 paramst +> List.iter (fun ({p_namei= nameopt; p_type= t},_)->
1148 match nameopt with
1149 | Some name ->
1150 let s = Ast_c.str_of_name name in
1151 let local = Ast_c.LocalVar (offset t) in
1152 add_binding (VarOrFunc (s,(Lib.al_type t,local))) true
1153 | None ->
1154 pr2 "no type, certainly because Void type ?"
1155 );
1156 (* recurse *)
1157 k elem
1158 );
1159 | Some oldstyle ->
1160 (* generate regular function type *)
1161
1162 pr2 "TODO generate type for function";
1163 (* add bindings *)
1164 if need_annotate_body then
1165 do_in_new_scope (fun () ->
1166 (* recurse. should naturally call the kdecl visitor and
1167 * add binding
1168 *)
1169 k elem;
1170 );
1171
1172 );
1173 | Declaration _
1174
1175 | CppTop _
1176 | IfdefTop _
1177 | MacroTop _
1178 | EmptyDef _
1179 | NotParsedCorrectly _
1180 | FinalDef _
1181 ->
1182 k elem
1183 );
1184 }
1185 in
1186 if just_add_in_env
1187 then
1188 if depth > 1
1189 then Visitor_c.vk_toplevel bigf elem
1190 else
1191 Common.profile_code "TAC.annotate_only_included" (fun () ->
1192 Visitor_c.vk_toplevel bigf elem
1193 )
1194 else Visitor_c.vk_toplevel bigf elem
1195
1196 (*****************************************************************************)
1197 (* Entry point *)
1198 (*****************************************************************************)
1199 (* catch all the decl to grow the environment *)
1200
1201
1202 let rec (annotate_program2 :
1203 environment -> toplevel list -> (toplevel * environment Common.pair) list) =
1204 fun env prog ->
1205
1206 (* globals (re)initialialisation *)
1207 _scoped_env := env;
1208 _notyped_var := (Hashtbl.create 100);
1209
1210 prog +> List.map (fun elem ->
1211 let beforeenv = !_scoped_env in
1212 visit_toplevel ~just_add_in_env:false ~depth:0 elem;
1213 let afterenv = !_scoped_env in
1214 (elem, (beforeenv, afterenv))
1215 )
1216
1217
1218
1219
1220 (*****************************************************************************)
1221 (* Annotate test *)
1222 (*****************************************************************************)
1223
1224 (* julia: for coccinelle *)
1225 let annotate_test_expressions prog =
1226 let rec propagate_test e =
1227 let ((e_term,info),_) = e in
1228 let (ty,_) = !info in
1229 info := (ty,Test);
1230 match e_term with
1231 Binary(e1,Logical(AndLog),e2)
1232 | Binary(e1,Logical(OrLog),e2) -> propagate_test e1; propagate_test e2
1233 | Unary(e1,Not) -> propagate_test e1
1234 | ParenExpr(e) -> propagate_test e
1235 | _ -> () in
1236
1237 let bigf = { Visitor_c.default_visitor_c with
1238 Visitor_c.kexpr = (fun (k,bigf) expr ->
1239 (match unwrap expr with
1240 (CondExpr(e,_,_),_) -> propagate_test e
1241 | _ -> ()
1242 );
1243 k expr
1244 );
1245 Visitor_c.kstatement = (fun (k, bigf) st ->
1246 match unwrap st with
1247 Selection(s) ->
1248 (match s with If(e1,s1,s2) -> propagate_test e1 | _ -> ());
1249 k st;
1250 | Iteration(i) ->
1251 (match i with
1252 While(e,s) -> propagate_test e
1253 | DoWhile(s,e) -> propagate_test e
1254 | For(_,es,_,_) ->
1255 (match unwrap es with Some e -> propagate_test e | None -> ())
1256 | _ -> ());
1257 k st
1258 | _ -> k st
1259 )
1260 } in
1261 (prog +> List.iter (fun elem ->
1262 Visitor_c.vk_toplevel bigf elem
1263 ))
1264
1265
1266
1267 (*****************************************************************************)
1268 (* Annotate types *)
1269 (*****************************************************************************)
1270 let annotate_program env prog =
1271 Common.profile_code "TAC.annotate_program"
1272 (fun () ->
1273 let res = annotate_program2 env prog in
1274 annotate_test_expressions prog;
1275 res
1276 )
1277
1278 let annotate_type_and_localvar env prog =
1279 Common.profile_code "TAC.annotate_type"
1280 (fun () -> annotate_program2 env prog)
1281
1282
1283 (*****************************************************************************)
1284 (* changing default typing environment, do concatenation *)
1285 let init_env filename =
1286 pr2 ("init_env: " ^ filename);
1287 let (ast2, _stat) = Parse_c.parse_c_and_cpp filename in
1288 let ast = Parse_c.program_of_program2 ast2 in
1289
1290 let res = annotate_type_and_localvar !initial_env ast in
1291 match List.rev res with
1292 | [] -> pr2 "empty environment"
1293 | (_top,(env1,env2))::xs ->
1294 initial_env := !initial_env ++ env2;
1295 ()
1296