Release coccinelle-0.1.6a
[bpt/coccinelle.git] / parsing_c / type_annoter_c.ml
CommitLineData
0708f913
C
1(* Yoann Padioleau
2 *
3 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes,
4 * Copyright (C) 2009 University of Urbana Champaign
34e49164
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
91eba41f 15
34e49164
C
16open Common
17
18open Ast_c
19
20module Lib = Lib_parsing_c
21
22(*****************************************************************************)
91eba41f
C
23(* Prelude *)
24(*****************************************************************************)
25(* History:
26 * - Done a first type checker in 2002, cf typing-semantic/, but
27 * was assuming that have all type info, and so was assuming had called
28 * cpp and everything was right.
29 * - Wrote this file, in 2006?, as we added pattern matching on type
30 * in coccinelle. Partial type annotater.
31 * - Julia extended it in 2008? to have localvar/notlocalvar and
32 * test/notest information, again used by coccinelle.
0708f913 33 * - I extended it in Fall 2008 to have more type information for the
91eba41f
C
34 * global analysis. I also added some optimisations to process
35 * included code faster.
36 *
37 *
38 * Design choices. Can either do:
39 * - a kind of inferer
40 * - can first do a simple inferer, that just pass context
41 * - then a real inferer, managing partial info.
34e49164
C
42 * type context = fullType option
43 *
44 * - extract the information from the .h files
91eba41f
C
45 * (so no inference at all needed)
46 *
47 * Difference with julia's code in parsing_cocci/type_infer.ml:
48 * - She handles just the variable namespace. She does not type
49 * field access or enum or macros. This is because cocci programs are
50 * usually simple and have no structure definition or macro definitions
51 * that we need to type anyway.
52 * - She does more propagation.
53 * - She does not have to handle the typedef isomorphism which force me
54 * to use those typedef_fix and type_unfold_one_step
55 * - She does not handle I think the function pointer C isomorphism.
56 *
57 * - She has a cleaner type_cocci without any info. In my case
58 * I need to do those ugly al_type, or generate fake infos.
59 * - She has more compact code. Perhaps because she does not have to
60 * handle the extra exp_info that she added on me :) So I need those
61 * do_with_type, make_info_xxx, etc.
62 *
63 * Note: if need to debug this annotater, use -show_trace_profile, it can
64 * help. You can also set the typedef_debug flag below.
65 *
66 *
34e49164
C
67 *
68 * todo: expression contain types, and statements, which in turn can contain
69 * expression, so need recurse. Need define an annote_statement and
70 * annotate_type.
91eba41f 71 *
34e49164
C
72 * todo: how deal with typedef isomorphisms ? How store them in Ast_c ?
73 * store all posible variations in ast_c ? a list of type instead of just
74 * the type ?
75 *
0708f913
C
76 * todo: how to handle multiple possible definitions for entities like
77 * struct or typedefs ? Because of ifdef, we should store list of
78 * possibilities sometimes.
79 *
34e49164
C
80 * todo: define a new type ? like type_cocci ? where have a bool ?
81 *
91eba41f 82 * semi: How handle scope ? When search for type of field, we return
34e49164
C
83 * a type, but this type makes sense only in a certain scope.
84 * We could add a tag to each typedef, structUnionName to differentiate
85 * them and also associate in ast_c to the type the scope
86 * of this type, the env that were used to define this type.
91eba41f
C
87 *
88 * todo: handle better the search in previous env, the env'. Cf the
89 * termination problem in typedef_fix when I was searching in the same
90 * env.
91 *
34e49164
C
92 *)
93
94(*****************************************************************************)
95(* Wrappers *)
96(*****************************************************************************)
97let pr2 s =
98 if !Flag_parsing_c.verbose_type
99 then Common.pr2 s
100
0708f913
C
101let pr2_once s =
102 if !Flag_parsing_c.verbose_type
103 then Common.pr2_once s
104
34e49164
C
105(*****************************************************************************)
106(* Environment *)
107(*****************************************************************************)
108
91eba41f 109(* The different namespaces from stdC manual:
34e49164
C
110 *
111 * You introduce two new name spaces with every block that you write.
91eba41f
C
112 *
113 * One name space includes all
114 * - functions,
115 * - objects,
116 * - type definitions,
117 * - and enumeration constants
118 * that you declare or define within the block.
119 *
120 * The other name space includes all
121 * - enumeration,
122 * - structure,
123 * - and union
124 * *tags* that you define within the block.
34e49164
C
125 *
126 * You introduce a new member name space with every structure or union
127 * whose content you define. You identify a member name space by the
128 * type of left operand that you write for a member selection
129 * operator, as in x.y or p->y. A member name space ends with the end
130 * of the block in which you declare it.
131 *
132 * You introduce a new goto label name space with every function
133 * definition you write. Each goto label name space ends with its
134 * function definition.
135 *)
136
137(* But I don't try to do a type-checker, I try to "resolve" type of var
138 * so don't need make difference between namespaces here.
139 *
140 * But, why not make simply a (string, kindstring) assoc ?
141 * Because we dont want that a variable shadow a struct definition, because
142 * they are still in 2 different namespace. But could for typedef,
143 * because VarOrFunc and Typedef are in the same namespace.
144 * But could do a record as in c_info.ml
145 *)
146
147
91eba41f
C
148(* This type contains all "ident" like notion of C. Each time in Ast_c
149 * you have a string type (as in expression, function name, fields)
150 * then you need to manage the scope of this ident.
151 *
152 * The wrap for StructUnionNameDef contain the whole ii, the i for
153 * the string, the structUnion and the structType.
154 *
155 * Put Macro here ? after all the scoping rules for cpp macros is different
156 * and so does not vanish after the closing '}'.
157 *
158 * todo: EnumDef
34e49164
C
159 *)
160type namedef =
161 | VarOrFunc of string * Ast_c.exp_type
91eba41f
C
162 | EnumConstant of string * string option
163
164 | TypeDef of string * fullType
165 (* the structType contains nested "idents" with struct scope *)
34e49164 166 | StructUnionNameDef of string * (structUnion * structType) wrap
34e49164 167
91eba41f
C
168 (* cppext: *)
169 | Macro of string * define_body
170
171
172(* Because have nested scope, have nested list, hence the list list.
173 *
174 * opti? use a hash to accelerate ? hmm but may have some problems
175 * with hash to handle recursive lookup. For instance for the typedef
176 * example where have mutually recursive definition of the type,
177 * we must take care to not loop by starting the second search
178 * from the previous environment. With the list scheme in
179 * lookup_env below it's quite easy to do. With hash it may be
180 * more complicated.
181*)
34e49164
C
182type environment = namedef list list
183
91eba41f
C
184
185(* ------------------------------------------------------------ *)
186(* can be modified by the init_env function below, by
187 * the file environment_unix.h
188 *)
189let initial_env = ref [
190 [VarOrFunc("NULL",
191 (Lib.al_type (Parse_c.type_of_string "void *"),
192 Ast_c.NotLocalVar));
193
194 (*
195 VarOrFunc("malloc",
196 (Lib.al_type(Parse_c.type_of_string "void* (*)(int size)"),
197 Ast_c.NotLocalVar));
198 VarOrFunc("free",
199 (Lib.al_type(Parse_c.type_of_string "void (*)(void *ptr)"),
200 Ast_c.NotLocalVar));
201 *)
202 ]
34e49164
C
203]
204
205
91eba41f
C
206let typedef_debug = ref false
207
208
209(* ------------------------------------------------------------ *)
210(* generic, lookup and also return remaining env for further lookup *)
211let rec lookup_env2 f env =
34e49164
C
212 match env with
213 | [] -> raise Not_found
91eba41f 214 | []::zs -> lookup_env2 f zs
34e49164 215 | (x::xs)::zs ->
91eba41f
C
216 (match f x with
217 | None -> lookup_env2 f (xs::zs)
34e49164 218 | Some y -> y, xs::zs
91eba41f
C
219 )
220let lookup_env a b =
221 Common.profile_code "TAC.lookup_env" (fun () -> lookup_env2 a b)
222
223
224
225let member_env lookupf env =
226 try
227 let _ = lookupf env in
228 true
229 with Not_found -> false
230
231
232
233
234(* ------------------------------------------------------------ *)
34e49164 235
34e49164
C
236
237let lookup_var s env =
238 let f = function
239 | VarOrFunc (s2, typ) -> if s2 = s then Some typ else None
240 | _ -> None
241 in
242 lookup_env f env
243
244let lookup_typedef s env =
91eba41f 245 if !typedef_debug then pr2 ("looking for: " ^ s);
34e49164
C
246 let f = function
247 | TypeDef (s2, typ) -> if s2 = s then Some typ else None
248 | _ -> None
249 in
250 lookup_env f env
251
252let lookup_structunion (_su, s) env =
253 let f = function
254 | StructUnionNameDef (s2, typ) -> if s2 = s then Some typ else None
255 | _ -> None
256 in
257 lookup_env f env
258
91eba41f
C
259let lookup_macro s env =
260 let f = function
261 | Macro (s2, typ) -> if s2 = s then Some typ else None
262 | _ -> None
263 in
264 lookup_env f env
265
266let lookup_enum s env =
267 let f = function
268 | EnumConstant (s2, typ) -> if s2 = s then Some typ else None
269 | _ -> None
270 in
271 lookup_env f env
272
273
274let lookup_typedef a b =
275 Common.profile_code "TAC.lookup_typedef" (fun () -> lookup_typedef a b)
276
277
34e49164
C
278
279(*****************************************************************************)
280(* "type-lookup" *)
281(*****************************************************************************)
282
283(* find_final_type is used to know to what type a field correspond in
284 * x.foo. Sometimes the type of x is a typedef or a structName in which
285 * case we must look in environment to find the complete type, here
286 * structUnion that contains the information.
287 *
288 * Because in C one can redefine in nested blocks some typedefs,
289 * struct, or variables, we have a static scoping resolving process.
290 * So, when we look for the type of a var, if this var is in an
291 * enclosing block, then maybe its type refer to a typdef of this
292 * enclosing block, so must restart the "type-resolving" of this
293 * typedef from this enclosing block, not from the bottom. So our
294 * "resolving-type functions" take an env and also return an env from
295 * where the next search must be performed. *)
296
297(*
298let rec find_final_type ty env =
299
300 match Ast_c.unwrap_typeC ty with
301 | BaseType x -> (BaseType x) +> Ast_c.rewrap_typeC ty
302
303 | Pointer t -> (Pointer (find_final_type t env)) +> Ast_c.rewrap_typeC ty
304 | Array (e, t) -> Array (e, find_final_type t env) +> Ast_c.rewrap_typeC ty
305
306 | StructUnion (sopt, su) -> StructUnion (sopt, su) +> Ast_c.rewrap_typeC ty
307
308 | FunctionType t -> (FunctionType t) (* todo ? *) +> Ast_c.rewrap_typeC ty
309 | Enum (s, enumt) -> (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty
310 | EnumName s -> (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty
311
312 | StructUnionName (su, s) ->
313 (try
314 let ((structtyp,ii), env') = lookup_structunion (su, s) env in
315 Ast_c.nQ, (StructUnion (Some s, structtyp), ii)
316 (* old: +> Ast_c.rewrap_typeC ty
317 * but must wrap with good ii, otherwise pretty_print_c
318 * will be lost and raise some Impossible
319 *)
320 with Not_found ->
321 ty
322 )
323
324 | TypeName s ->
325 (try
326 let (t', env') = lookup_typedef s env in
327 find_final_type t' env'
328 with Not_found ->
329 ty
330 )
331
332 | ParenType t -> find_final_type t env
333 | Typeof e -> failwith "typeof"
334*)
335
336
337
338
91eba41f 339(* ------------------------------------------------------------ *)
34e49164
C
340let rec type_unfold_one_step ty env =
341
342 match Ast_c.unwrap_typeC ty with
91eba41f
C
343 | BaseType x -> ty
344 | Pointer t -> ty
345 | Array (e, t) -> ty
346
34e49164
C
347 | StructUnion (sopt, su, fields) -> ty
348
91eba41f 349 | FunctionType t -> ty
34e49164 350 | Enum (s, enumt) -> ty
91eba41f
C
351
352 | EnumName s -> ty (* todo: look in env when will have EnumDef *)
34e49164
C
353
354 | StructUnionName (su, s) ->
355 (try
356 let (((su,fields),ii), env') = lookup_structunion (su, s) env in
357 Ast_c.nQ, (StructUnion (su, Some s, fields), ii)
358 (* old: +> Ast_c.rewrap_typeC ty
359 * but must wrap with good ii, otherwise pretty_print_c
360 * will be lost and raise some Impossible
361 *)
362 with Not_found ->
363 ty
364 )
365
366 | TypeName (s,_typ) ->
367 (try
91eba41f 368 if !typedef_debug then pr2 "type_unfold_one_step: lookup_typedef";
34e49164
C
369 let (t', env') = lookup_typedef s env in
370 type_unfold_one_step t' env'
91eba41f 371 with Not_found ->
34e49164
C
372 ty
373 )
374
375 | ParenType t -> type_unfold_one_step t env
376 | TypeOfExpr e ->
377 pr2_once ("Type_annoter: not handling typeof");
378 ty
379 | TypeOfType t -> type_unfold_one_step t env
380
381
382
34e49164
C
383
384
385
34e49164
C
386
387
388
91eba41f 389(* normalizer. can be seen as the opposite of the previous function as
0708f913
C
390 * we "fold" at least for the structUnion. Should return something that
391 * Type_c.is_completed_fullType likes, something that makes it easier
392 * for the programmer to work on, that has all the needed information
393 * for most tasks.
91eba41f 394 *)
34e49164 395let rec typedef_fix ty env =
34e49164 396 match Ast_c.unwrap_typeC ty with
91eba41f
C
397 | BaseType x ->
398 ty
399 | Pointer t ->
400 Pointer (typedef_fix t env) +> Ast_c.rewrap_typeC ty
401 | Array (e, t) ->
402 Array (e, typedef_fix t env) +> Ast_c.rewrap_typeC ty
403 | StructUnion (su, sopt, fields) ->
404 (* normalize, fold.
405 * todo? but what if correspond to a nested struct def ?
406 *)
407 Type_c.structdef_to_struct_name ty
34e49164
C
408 | FunctionType ft ->
409 (FunctionType ft) (* todo ? *) +> Ast_c.rewrap_typeC ty
410 | Enum (s, enumt) ->
411 (Enum (s, enumt)) (* todo? *) +> Ast_c.rewrap_typeC ty
412 | EnumName s ->
413 (EnumName s) (* todo? *) +> Ast_c.rewrap_typeC ty
414
415 (* we prefer StructUnionName to StructUnion when it comes to typed metavar *)
416 | StructUnionName (su, s) -> ty
91eba41f
C
417
418 (* keep the typename but complete with more information *)
0708f913
C
419 | TypeName (s, typ) ->
420 (match typ with
421 | Some _ ->
422 pr2 ("typedef value already there:" ^ s);
423 ty
424 | None ->
425 (try
91eba41f 426 if !typedef_debug then pr2 "typedef_fix: lookup_typedef";
34e49164 427 let (t', env') = lookup_typedef s env in
91eba41f
C
428
429 (* bugfix: termination bug if use env instead of env' below, because
0708f913 430 * can have some weird mutually recursive typedef which
91eba41f
C
431 * each new type alias search for its mutual def.
432 *)
433 TypeName (s, Some (typedef_fix t' env')) +> Ast_c.rewrap_typeC ty
434 with Not_found ->
34e49164 435 ty
0708f913
C
436 ))
437
91eba41f
C
438 (* remove paren for better matching with typed metavar. kind of iso again *)
439 | ParenType t ->
440 typedef_fix t env
34e49164
C
441 | TypeOfExpr e ->
442 pr2_once ("Type_annoter: not handling typeof");
443 ty
444
91eba41f
C
445 | TypeOfType t ->
446 typedef_fix t env
447
448
449(*****************************************************************************)
450(* Helpers, part 1 *)
451(*****************************************************************************)
452
453let type_of_s2 s =
454 (Lib.al_type (Parse_c.type_of_string s))
455let type_of_s a =
456 Common.profile_code "Type_c.type_of_s" (fun () -> type_of_s2 a)
457
458
459(* pad: pb on:
460 * /home/pad/software-os-src2/freebsd/contrib/ipfilter/netinet/ip_fil_freebsd.c
461 * because in the code there is:
462 * static iss_seq_off = 0;
463 * which in the parser was generating a default int without a parse_info.
464 * I now add a fake parse_info for such default int so no more failwith
465 * normally.
466 *)
467let offset (_,(ty,iis)) =
468 match iis with
469 ii::_ -> ii.Ast_c.pinfo
470 | _ -> failwith "type has no text; need to think again"
471
472
473
474let rec is_simple_expr expr =
475 match Ast_c.unwrap_expr expr with
476 (* todo? handle more special cases ? *)
477
478 | Ident _ ->
479 true
480 | Constant (_) ->
481 true
482 | Unary (op, e) ->
483 true
484 | Binary (e1, op, e2) ->
485 true
486 | Cast (t, e) ->
487 true
488 | ParenExpr (e) -> is_simple_expr e
489
490 | _ -> false
491
492(*****************************************************************************)
493(* Typing rules *)
494(*****************************************************************************)
495(* now in type_c.ml *)
496
497
34e49164
C
498
499(*****************************************************************************)
500(* (Semi) Globals, Julia's style *)
501(*****************************************************************************)
502
503(* opti: cache ? use hash ? *)
91eba41f 504let _scoped_env = ref !initial_env
34e49164
C
505
506(* memoise unnanoted var, to avoid too much warning messages *)
507let _notyped_var = ref (Hashtbl.create 100)
508
509let new_scope() = _scoped_env := []::!_scoped_env
510let del_scope() = _scoped_env := List.tl !_scoped_env
511
512let do_in_new_scope f =
513 begin
514 new_scope();
515 let res = f() in
516 del_scope();
517 res
518 end
519
520let add_in_scope namedef =
521 let (current, older) = Common.uncons !_scoped_env in
522 _scoped_env := (namedef::current)::older
523
91eba41f
C
524
525(* ------------------------------------------------------------ *)
526
34e49164
C
527(* sort of hackish... *)
528let islocal info =
91eba41f 529 if List.length (!_scoped_env) = List.length !initial_env
34e49164
C
530 then Ast_c.NotLocalVar
531 else Ast_c.LocalVar info
532
91eba41f 533(* ------------------------------------------------------------ *)
34e49164 534(* the warning argument is here to allow some binding to overwrite an
91eba41f
C
535 * existing one. With function, we first have the prototype and then the def,
536 * and the def binding with the same string is not an error.
537 *
34e49164 538 * todo?: but if we define two times the same function, then we will not
91eba41f 539 * detect it :( it would require to make a diff between adding a binding
34e49164 540 * from a prototype and from a definition.
91eba41f
C
541 *
542 * opti: disabling the check_annotater flag have some important
543 * performance benefit.
544 *
34e49164 545 *)
91eba41f 546let add_binding2 namedef warning =
34e49164
C
547 let (current_scope, _older_scope) = Common.uncons !_scoped_env in
548
91eba41f 549 if !Flag_parsing_c.check_annotater then begin
34e49164 550 (match namedef with
91eba41f
C
551 | VarOrFunc (s, typ) ->
552 if Hashtbl.mem !_notyped_var s
553 then pr2 ("warning: found typing information for a variable that was" ^
554 "previously unknown:" ^ s);
555 | _ -> ()
556 );
557
558 let (memberf, s) =
559 (match namedef with
560 | VarOrFunc (s, typ) ->
561 member_env (lookup_var s), s
562 | TypeDef (s, typ) ->
563 member_env (lookup_typedef s), s
564 | StructUnionNameDef (s, (su, typ)) ->
565 member_env (lookup_structunion (su, s)), s
566 | Macro (s, body) ->
567 member_env (lookup_macro s), s
568 | EnumConstant (s, body) ->
569 member_env (lookup_enum s), s
570 ) in
571
572 if memberf [current_scope] && warning
573 then pr2 ("Type_annoter: warning, " ^ s ^
574 " is already in current binding" ^ "\n" ^
0708f913 575 " so there is a weird shadowing");
91eba41f 576 end;
34e49164 577 add_in_scope namedef
91eba41f
C
578
579let add_binding namedef warning =
580 Common.profile_code "TAC.add_binding" (fun () -> add_binding2 namedef warning)
34e49164
C
581
582
91eba41f 583
34e49164 584(*****************************************************************************)
91eba41f 585(* Helpers, part 2 *)
34e49164
C
586(*****************************************************************************)
587
91eba41f
C
588let lookup_opt_env lookupf s =
589 Common.optionise (fun () ->
590 lookupf s !_scoped_env
591 )
592
593let unwrap_unfold_env2 typ =
594 Ast_c.unwrap_typeC
595 (type_unfold_one_step typ !_scoped_env)
596let unwrap_unfold_env typ =
597 Common.profile_code "TAC.unwrap_unfold_env" (fun () -> unwrap_unfold_env2 typ)
34e49164 598
91eba41f
C
599let typedef_fix a b =
600 Common.profile_code "TAC.typedef_fix" (fun () -> typedef_fix a b)
34e49164 601
91eba41f
C
602let make_info_def_fix x =
603 Type_c.make_info_def (typedef_fix x !_scoped_env)
34e49164 604
91eba41f
C
605let make_info_fix (typ, local) =
606 Type_c.make_info ((typedef_fix typ !_scoped_env),local)
607
608
609let make_info_def = Type_c.make_info_def
34e49164 610
34e49164 611(*****************************************************************************)
91eba41f 612(* Main typer code, put later in a visitor *)
34e49164 613(*****************************************************************************)
34e49164 614
91eba41f 615let annotater_expr_visitor_subpart = (fun (k,bigf) expr ->
34e49164 616
91eba41f
C
617 let ty =
618 match Ast_c.unwrap_expr expr with
619
620 (* -------------------------------------------------- *)
621 (* todo: should analyse the 's' for int to know if unsigned or not *)
622 | Constant (String (s,kind)) -> make_info_def (type_of_s "char *")
0708f913 623 | Constant MultiString _ -> make_info_def (type_of_s "char *")
91eba41f
C
624 | Constant (Char (s,kind)) -> make_info_def (type_of_s "char")
625 | Constant (Int (s)) -> make_info_def (type_of_s "int")
626 | Constant (Float (s,kind)) ->
627 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
628 let fake = Ast_c.rewrap_str "float" fake in
629 let iinull = [fake] in
630 make_info_def
631 (Ast_c.nQ, (BaseType (FloatType kind), iinull))
632
633
634 (* -------------------------------------------------- *)
635 (* note: could factorize this code with the code for Ident
636 * and the other code for Funcall below. But as the Ident can be
637 * a macro-func, I prefer to handle it separately. So
638 * this rule can handle the macro-func, the Ident-rule can handle
639 * the macro-var, and the other FunCall-rule the regular
640 * function calls through fields.
641 * Also as I don't want a warning on the Ident that are a FunCall,
642 * easier to have a rule separate from the Ident rule.
643 *)
644 | FunCall (((Ident s, typ), ii) as e1, args) ->
645
646 (* recurse *)
647 args +> List.iter (fun (e,ii) ->
648 (* could typecheck if arguments agree with prototype *)
649 Visitor_c.vk_argument bigf e
650 );
651
652 (match lookup_opt_env lookup_var s with
653 | Some ((typ,local),_nextenv) ->
654
655 (* set type for ident *)
656 let tyinfo = make_info_fix (typ, local) in
657 Ast_c.set_type_expr e1 tyinfo;
658
659 (match unwrap_unfold_env typ with
660 | FunctionType (ret, params) -> make_info_def ret
661
662 (* can be function pointer, C have an iso for that,
663 * same pfn() syntax than regular function call.
664 *)
665 | Pointer (typ2) ->
666 (match unwrap_unfold_env typ2 with
667 | FunctionType (ret, params) -> make_info_def ret
668 | _ -> Type_c.noTypeHere
669 )
670 | _ -> Type_c.noTypeHere
671 )
672 | None ->
673
674 (match lookup_opt_env lookup_macro s with
675 | Some ((defkind, defval), _nextenv) ->
676 (match defkind, defval with
677 | DefineFunc _, DefineExpr e ->
678 let rettype = Ast_c.get_onlytype_expr e in
679
680 (* todo: could also set type for ident ?
681 have return type and at least type of concrete
682 parameters so can generate a fake FunctionType
683 *)
684 let macrotype_opt =
685 Type_c.fake_function_type rettype args
686 in
687
688 macrotype_opt +> Common.do_option (fun t ->
689 pr2 ("Type_annotater: generate fake function type" ^
690 "for macro: " ^ s);
691 let tyinfo = make_info_def_fix t in
692 Ast_c.set_type_expr e1 tyinfo;
693 );
694
695 Ast_c.get_type_expr e
696 | DefineVar, _ ->
697 pr2 ("Type_annoter: not a macro-func: " ^ s);
698 Type_c.noTypeHere
699 | DefineFunc _, _ ->
700 (* normally the FunCall case should have catch it *)
701 pr2 ("Type_annoter: not a macro-func-expr: " ^ s);
702 Type_c.noTypeHere
703 )
704 | None ->
0708f913 705 pr2_once ("type_annotater: no type for function ident: " ^ s);
91eba41f
C
706 Type_c.noTypeHere
707 )
708 )
34e49164 709
34e49164 710
91eba41f
C
711 | FunCall (e, args) ->
712 k expr;
713
714 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun typ ->
715 (* copy paste of above *)
716 (match unwrap_unfold_env typ with
717 | FunctionType (ret, params) -> make_info_def ret
718 | Pointer (typ) ->
719 (match unwrap_unfold_env typ with
720 | FunctionType (ret, params) -> make_info_def ret
721 | _ -> Type_c.noTypeHere
722 )
723 | _ -> Type_c.noTypeHere
34e49164 724 )
91eba41f
C
725 )
726
727
728 (* -------------------------------------------------- *)
729 | Ident (s) ->
730 (match lookup_opt_env lookup_var s with
731 | Some ((typ,local),_nextenv) ->
732 make_info_fix (typ,local)
733 | None ->
734 (match lookup_opt_env lookup_macro s with
735 | Some ((defkind, defval), _nextenv) ->
736 (match defkind, defval with
737 | DefineVar, DefineExpr e ->
738 Ast_c.get_type_expr e
739 | DefineVar, _ ->
740 pr2 ("Type_annoter: not a expression: " ^ s);
741 Type_c.noTypeHere
742 | DefineFunc _, _ ->
743 (* normally the FunCall case should have catch it *)
744 pr2 ("Type_annoter: not a macro-var: " ^ s);
745 Type_c.noTypeHere
746 )
747 | None ->
748 (match lookup_opt_env lookup_enum s with
749 | Some (_, _nextenv) ->
750 make_info_def (type_of_s "int")
751 | None ->
752 if not (s =~ "[A-Z_]+") (* if macro then no warning *)
753 then
754 if !Flag_parsing_c.check_annotater then
755 if not (Hashtbl.mem !_notyped_var s)
756 then begin
757 pr2 ("Type_annoter: not finding type for: " ^ s);
758 Hashtbl.add !_notyped_var s true;
759 end
760 else ()
761 else
762 pr2 ("Type_annoter: not finding type for: " ^ s)
763 ;
764 Type_c.noTypeHere
765 )
766 )
767 )
768
769 (* -------------------------------------------------- *)
770 (* C isomorphism on type on array and pointers *)
771 | Unary (e, DeRef)
772 | ArrayAccess (e, _) ->
773 k expr; (* recurse to set the types-ref of sub expressions *)
774
775 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
776 (* todo: maybe not good env !! *)
777 match unwrap_unfold_env t with
778 | Pointer x
779 | Array (_, x) ->
780 make_info_def_fix x
781 | _ -> Type_c.noTypeHere
782
783 )
784
785 | Unary (e, GetRef) ->
786 k expr; (* recurse to set the types-ref of sub expressions *)
787
788 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
789 (* must generate an element so that '=' can be used
790 * to compare type ?
791 *)
792 let fake = Ast_c.fakeInfo Common.fake_parse_info in
793 let fake = Ast_c.rewrap_str "*" fake in
794
795 let ft = (Ast_c.nQ, (Pointer t, [fake])) in
796 make_info_def_fix ft
797 )
798
799
800 (* -------------------------------------------------- *)
801 (* fields *)
802 | RecordAccess (e, fld)
803 | RecordPtAccess (e, fld) as x ->
34e49164 804
91eba41f
C
805 k expr; (* recurse to set the types-ref of sub expressions *)
806
807 (Ast_c.get_type_expr e) +> Type_c.do_with_type (fun t ->
808
809 let topt =
810 match x with
811 | RecordAccess _ -> Some t
812 | RecordPtAccess _ ->
813 (match unwrap_unfold_env t with
814 | Pointer (t) -> Some t
815 | _ -> None
34e49164 816 )
91eba41f
C
817 | _ -> raise Impossible
818
819 in
820 (match topt with
821 | None -> Type_c.noTypeHere
822 | Some t ->
823 match unwrap_unfold_env t with
824 | StructUnion (su, sopt, fields) ->
825 (try
826 (* todo: which env ? *)
827 make_info_def_fix
828 (Type_c.type_field fld (su, fields))
829 with
830 | Not_found ->
831 pr2 (spf
832 "TYPE-ERROR: field '%s' does not belong in struct %s"
833 fld (match sopt with Some s -> s |_ -> "<anon>"));
834 Type_c.noTypeHere
0708f913 835 | Multi_found ->
91eba41f
C
836 pr2 "TAC:MultiFound";
837 Type_c.noTypeHere
838 )
839 | _ -> Type_c.noTypeHere
34e49164 840 )
91eba41f
C
841 )
842
843
844
845 (* -------------------------------------------------- *)
846 | Cast (t, e) ->
847 k expr;
848 (* todo: if infer, can "push" info ? add_types_expr [t] e ? *)
849 make_info_def_fix (Lib.al_type t)
850
851 (* todo? lub, hmm maybe not, cos type must be e1 *)
852 | Assignment (e1, op, e2) ->
853 k expr;
854 Ast_c.get_type_expr e1
855 | Sequence (e1, e2) ->
856 k expr;
857 Ast_c.get_type_expr e2
858
859 (* todo: lub *)
860 | Binary (e1, op, e2) ->
861 k expr;
862 Type_c.lub (Type_c.get_opt_type e1) (Type_c.get_opt_type e2)
863
864 | CondExpr (cond, e1opt, e2) ->
865 k expr;
866 Ast_c.get_type_expr e2
34e49164 867
34e49164 868
91eba41f
C
869 | ParenExpr e ->
870 k expr;
871 Ast_c.get_type_expr e
872
873 | Infix (e, op) | Postfix (e, op) ->
874 k expr;
875 Ast_c.get_type_expr e
876
877 (* pad: julia wrote this ? *)
878 | Unary (e, UnPlus) ->
879 k expr; (* recurse to set the types-ref of sub expressions *)
880 make_info_def (type_of_s "int")
881 (* todo? can convert from unsigned to signed if UnMinus ? *)
882 | Unary (e, UnMinus) ->
883 k expr; (* recurse to set the types-ref of sub expressions *)
884 make_info_def (type_of_s "int")
885
886 | SizeOfType _|SizeOfExpr _ ->
887 k expr; (* recurse to set the types-ref of sub expressions *)
888 make_info_def (type_of_s "int")
889
890 | Constructor (ft, ini) ->
891 k expr; (* recurse to set the types-ref of sub expressions *)
892 make_info_def (Lib.al_type ft)
893
894 | Unary (e, Not) ->
895 k expr; (* recurse to set the types-ref of sub expressions *)
896 Ast_c.get_type_expr e
897 | Unary (e, Tilde) ->
898 k expr; (* recurse to set the types-ref of sub expressions *)
899 Ast_c.get_type_expr e
900
901 (* -------------------------------------------------- *)
902 (* todo *)
903 | Unary (_, GetRefLabel) ->
904 k expr; (* recurse to set the types-ref of sub expressions *)
905 pr2_once "Type annotater:not handling GetRefLabel";
906 Type_c.noTypeHere
907 (* todo *)
908 | StatementExpr _ ->
909 k expr; (* recurse to set the types-ref of sub expressions *)
910 pr2_once "Type annotater:not handling GetRefLabel";
911 Type_c.noTypeHere
912 (*
913 | _ -> k expr; Type_c.noTypeHere
914 *)
915
916 in
917 Ast_c.set_type_expr expr ty
918
919)
920
921
922(*****************************************************************************)
923(* Visitor *)
924(*****************************************************************************)
925
926(* Processing includes that were added after a cpp_ast_c makes the
927 * type annotater quite slow, especially when the depth of cpp_ast_c is
928 * big. But for such includes the only thing we really want is to modify
929 * the environment to have enough type information. We don't need
930 * to type the expressions inside those includes (they will be typed
931 * when we process the include file directly). Here the goal is
932 * to not recurse.
933 *
934 * Note that as usually header files contain mostly structure
935 * definitions and defines, that means we still have to do lots of work.
936 * We only win on function definition bodies, but usually header files
937 * have just prototypes, or inline function definitions which anyway have
938 * usually a small body. But still, we win. It also makes clearer
939 * that when processing include as we just need the environment, the caller
940 * of this module can do further optimisations such as memorising the
941 * state of the environment after each header files.
942 *
943 *
944 * For sparse its makes the annotating speed goes from 9s to 4s
945 * For Linux the speedup is even better, from ??? to ???.
946 *
947 * Because There would be some copy paste with annotate_program, it is
948 * better to factorize code hence the just_add_in_env parameter below.
949 *
950 * todo? alternative optimisation for the include problem:
951 * - processing all headers files one time and construct big env
952 * - use hashtbl for env (but apparently not biggest problem)
953 *)
954
955let rec visit_toplevel ~just_add_in_env ~depth elem =
956 let need_annotate_body = not just_add_in_env in
957
958 let bigf = { Visitor_c.default_visitor_c with
959
960 (* ------------------------------------------------------------ *)
961 Visitor_c.kcppdirective = (fun (k, bigf) directive ->
962 match directive with
963 (* do error messages for type annotater only for the real body of the
964 * file, not inside include.
965 *)
966 | Include {i_content = opt} ->
967 opt +> Common.do_option (fun (filename, program) ->
968 Common.save_excursion Flag_parsing_c.verbose_type (fun () ->
969 Flag_parsing_c.verbose_type := false;
970
971 (* old: Visitor_c.vk_program bigf program;
972 * opti: set the just_add_in_env
973 *)
974 program +> List.iter (fun elem ->
975 visit_toplevel ~just_add_in_env:true ~depth:(depth+1) elem
34e49164 976 )
91eba41f 977 )
34e49164 978 )
91eba41f
C
979
980 | Define ((s,ii), (defkind, defval)) ->
981
982
983 (* even if we are in a just_add_in_env phase, such as when
984 * we process include, as opposed to the body of functions,
985 * with macros we still to type the body of the macro as
986 * the macro has no type and so we infer its type from its
987 * body (and one day later maybe from its use).
988 *)
989 (match defval with
990 (* can try to optimize and recurse only when the define body
991 * is simple ?
992 *)
993
994 | DefineExpr expr ->
995 if is_simple_expr expr
996 (* even if not need_annotate_body, still recurse*)
997 then k directive
998 else
999 if need_annotate_body
1000 then k directive;
1001 | _ ->
1002 if need_annotate_body
1003 then k directive;
1004 );
1005
1006 add_binding (Macro (s, (defkind, defval) )) true;
1007
1008 | Undef _
1009 | PragmaAndCo _ -> ()
34e49164
C
1010 );
1011
91eba41f
C
1012 (* ------------------------------------------------------------ *)
1013 (* main typer code *)
1014 (* ------------------------------------------------------------ *)
1015 Visitor_c.kexpr = annotater_expr_visitor_subpart;
1016
1017 (* ------------------------------------------------------------ *)
34e49164
C
1018 Visitor_c.kstatement = (fun (k, bigf) st ->
1019 match st with
1020 | Compound statxs, ii -> do_in_new_scope (fun () -> k st);
1021 | _ -> k st
34e49164 1022 );
91eba41f 1023 (* ------------------------------------------------------------ *)
34e49164
C
1024 Visitor_c.kdecl = (fun (k, bigf) d ->
1025 (match d with
1026 | (DeclList (xs, ii)) ->
485bce71
C
1027 xs +> List.iter (fun ({v_namei = var; v_type = t;
1028 v_storage = sto; v_local = local}, iicomma) ->
1029
34e49164
C
1030 (* to add possible definition in type found in Decl *)
1031 Visitor_c.vk_type bigf t;
91eba41f
C
1032
1033
1034 let local =
1035 match local with
1036 | Ast_c.NotLocalDecl -> Ast_c.NotLocalVar
1037 | Ast_c.LocalDecl -> Ast_c.LocalVar (offset t)
1038 in
34e49164 1039
91eba41f 1040 var +> Common.do_option (fun ((s, ini), ii_s_ini) ->
34e49164
C
1041 match sto with
1042 | StoTypedef, _inline ->
1043 add_binding (TypeDef (s,Lib.al_type t)) true;
1044 | _ ->
1045 add_binding (VarOrFunc (s, (Lib.al_type t, local))) true;
91eba41f
C
1046
1047
1048 if need_annotate_body then begin
1049 (* int x = sizeof(x) is legal so need process ini *)
1050 ini +> Common.do_option (fun ini ->
1051 Visitor_c.vk_ini bigf ini
1052 );
1053 end
34e49164
C
1054 );
1055 );
91eba41f
C
1056 | MacroDecl _ ->
1057 if need_annotate_body
1058 then k d
34e49164
C
1059 );
1060
1061 );
1062
91eba41f 1063 (* ------------------------------------------------------------ *)
34e49164 1064 Visitor_c.ktype = (fun (k, bigf) typ ->
91eba41f
C
1065 (* bugfix: have a 'Lib.al_type typ' before, but because we can
1066 * have enum with possible expression, we don't want to change
1067 * the ref of abstract-lined types, but the real one, so
1068 * don't al_type here
1069 *)
1070 let (_q, t) = typ in
34e49164
C
1071 match t with
1072 | StructUnion (su, Some s, structType),ii ->
91eba41f
C
1073 let structType' = Lib.al_fields structType in
1074 let ii' = Lib.al_ii ii in
1075 add_binding (StructUnionNameDef (s, ((su, structType'),ii'))) true;
1076
1077 if need_annotate_body
1078 then k typ (* todo: restrict ? new scope so use do_in_scope ? *)
1079
1080 | Enum (sopt, enums), ii ->
1081
1082 enums +> List.iter (fun (((s, eopt),ii_s_eq), iicomma) ->
1083
1084 if need_annotate_body
1085 then eopt +> Common.do_option (fun e ->
1086 Visitor_c.vk_expr bigf e
1087 );
1088 add_binding (EnumConstant (s, sopt)) true;
1089 );
34e49164
C
1090
1091
1092 (* TODO: if have a TypeName, then maybe can fill the option
1093 * information.
1094 *)
91eba41f
C
1095 | _ ->
1096 if need_annotate_body
1097 then k typ
34e49164
C
1098
1099 );
1100
91eba41f 1101 (* ------------------------------------------------------------ *)
34e49164
C
1102 Visitor_c.ktoplevel = (fun (k, bigf) elem ->
1103 _notyped_var := Hashtbl.create 100;
1104 match elem with
1105 | Definition def ->
485bce71
C
1106 let {f_name = funcs;
1107 f_type = ((returnt, (paramst, b)) as ftyp);
1108 f_storage = sto;
91eba41f
C
1109 f_body = statxs;
1110 f_old_c_style = oldstyle;
1111 },ii
1112 = def
34e49164
C
1113 in
1114 let (i1, i2) =
1115 match ii with
1116 | is::iifunc1::iifunc2::ibrace1::ibrace2::ifakestart::isto ->
1117 iifunc1, iifunc2
1118 | _ -> raise Impossible
1119 in
91eba41f
C
1120
1121 (match oldstyle with
1122 | None ->
1123 let typ' =
1124 Lib.al_type (Ast_c.nQ, (FunctionType ftyp, [i1;i2])) in
1125
1126 add_binding (VarOrFunc (funcs, (typ',islocal i1.Ast_c.pinfo)))
1127 false;
1128
1129 if need_annotate_body then
1130 do_in_new_scope (fun () ->
1131 paramst +> List.iter (fun (((b, s, t), _),_) ->
1132 match s with
1133 | Some s ->
1134 let local = Ast_c.LocalVar (offset t) in
1135 add_binding (VarOrFunc (s,(Lib.al_type t,local))) true
1136 | None ->
1137 pr2 "no type, certainly because Void type ?"
1138 );
1139 (* recurse *)
1140 k elem
1141 );
1142 | Some oldstyle ->
1143 (* generate regular function type *)
1144
1145 pr2 "TODO generate type for function";
1146 (* add bindings *)
1147 if need_annotate_body then
1148 do_in_new_scope (fun () ->
1149 (* recurse. should naturally call the kdecl visitor and
1150 * add binding
1151 *)
1152 k elem;
1153 );
1154
34e49164 1155 );
91eba41f
C
1156 | Declaration _
1157
1158 | CppTop _
1159 | IfdefTop _
1160 | MacroTop _
1161 | EmptyDef _
1162 | NotParsedCorrectly _
1163 | FinalDef _
1164 ->
1165 k elem
34e49164
C
1166 );
1167 }
1168 in
91eba41f
C
1169 if just_add_in_env
1170 then
1171 if depth > 1
1172 then Visitor_c.vk_toplevel bigf elem
1173 else
1174 Common.profile_code "TAC.annotate_only_included" (fun () ->
1175 Visitor_c.vk_toplevel bigf elem
1176 )
1177 else Visitor_c.vk_toplevel bigf elem
1178
1179(*****************************************************************************)
1180(* Entry point *)
1181(*****************************************************************************)
1182(* catch all the decl to grow the environment *)
1183
1184
1185let rec (annotate_program2 :
1186 environment -> toplevel list -> (toplevel * environment Common.pair) list) =
1187 fun env prog ->
1188
1189 (* globals (re)initialialisation *)
1190 _scoped_env := env;
1191 _notyped_var := (Hashtbl.create 100);
34e49164
C
1192
1193 prog +> List.map (fun elem ->
1194 let beforeenv = !_scoped_env in
91eba41f 1195 visit_toplevel ~just_add_in_env:false ~depth:0 elem;
34e49164
C
1196 let afterenv = !_scoped_env in
1197 (elem, (beforeenv, afterenv))
1198 )
1199
91eba41f
C
1200
1201
1202
1203(*****************************************************************************)
1204(* Annotate test *)
1205(*****************************************************************************)
1206
1207(* julia: for coccinelle *)
34e49164
C
1208let annotate_test_expressions prog =
1209 let rec propagate_test e =
1210 let ((e_term,info),_) = e in
1211 let (ty,_) = !info in
1212 info := (ty,Test);
1213 match e_term with
1214 Binary(e1,Logical(AndLog),e2)
1215 | Binary(e1,Logical(OrLog),e2) -> propagate_test e1; propagate_test e2
1216 | Unary(e1,Not) -> propagate_test e1
1217 | ParenExpr(e) -> propagate_test e
1218 | _ -> () in
1219
1220 let bigf = { Visitor_c.default_visitor_c with
1221 Visitor_c.kexpr = (fun (k,bigf) expr ->
1222 (match unwrap expr with
1223 (CondExpr(e,_,_),_) -> propagate_test e
91eba41f
C
1224 | _ -> ()
1225 );
1226 k expr
1227 );
34e49164
C
1228 Visitor_c.kstatement = (fun (k, bigf) st ->
1229 match unwrap st with
1230 Selection(s) ->
1231 (match s with If(e1,s1,s2) -> propagate_test e1 | _ -> ());
1232 k st;
1233 | Iteration(i) ->
1234 (match i with
1235 While(e,s) -> propagate_test e
1236 | DoWhile(s,e) -> propagate_test e
1237 | For(_,es,_,_) ->
1238 (match unwrap es with Some e -> propagate_test e | None -> ())
1239 | _ -> ());
1240 k st
91eba41f
C
1241 | _ -> k st
1242 )
1243 } in
34e49164
C
1244 (prog +> List.iter (fun elem ->
1245 Visitor_c.vk_toplevel bigf elem
1246 ))
1247
91eba41f
C
1248
1249
1250(*****************************************************************************)
1251(* Annotate types *)
1252(*****************************************************************************)
1253let annotate_program env prog =
1254 Common.profile_code "TAC.annotate_program"
1255 (fun () ->
1256 let res = annotate_program2 env prog in
34e49164 1257 annotate_test_expressions prog;
91eba41f
C
1258 res
1259 )
1260
1261let annotate_type_and_localvar env prog =
1262 Common.profile_code "TAC.annotate_type"
1263 (fun () -> annotate_program2 env prog)
1264
1265
1266(*****************************************************************************)
1267(* changing default typing environment, do concatenation *)
1268let init_env filename =
1269 pr2 ("init_env: " ^ filename);
1270 let (ast2, _stat) = Parse_c.parse_c_and_cpp filename in
1271 let ast = Parse_c.program_of_program2 ast2 in
1272
1273 let res = annotate_type_and_localvar !initial_env ast in
1274 match List.rev res with
1275 | [] -> pr2 "empty environment"
1276 | (_top,(env1,env2))::xs ->
1277 initial_env := !initial_env ++ env2;
1278 ()
1279