c3c65a377069206bd4eca3598f449ab3b71e4f84
[bpt/coccinelle.git] / parsing_c / type_c.ml
1 (* Yoann Padioleau, Julia Lawall
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2007, 2008, 2009 University of Urbana Champaign and DIKU
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16 open Common
17
18 open Ast_c
19
20 (*****************************************************************************)
21 (* Wrappers *)
22 (*****************************************************************************)
23 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_type
24
25 (*****************************************************************************)
26 (* Types *)
27 (*****************************************************************************)
28
29 (* What info do we want in a clean C type ? Normally it would help
30 * if we remove some of the complexity of C with for instance typedefs
31 * by expanding those typedefs or structname and enumname to their
32 * final value. Then, when we do pattern matching we can conveniently forget
33 * to handle the typedef, enumname and similar cases. But sometimes,
34 * in coccinelle for instance, we want to keep some of those original
35 * info. So right now we have a in-the-middle solution by keeping
36 * the original typename in the ast and expanding some of them
37 * in the type_annotation phase. We don't do this expansion for
38 * structname because usually when we have a struct we actually
39 * prefer to just have the structname. It's only when we access
40 * field that we need that information, but the type_annotater has
41 * already done this job so no need in the parent expression to know
42 * the full definition of the structure. But for typedef, this is different.
43 *
44 * So really the finalType we want, the completed_type notion below,
45 * corresponds to a type we think is useful enough to work on, to do
46 * pattern matching on, and one where we have all the needed information
47 * and we don't need to look again somewhere else to get the information.
48 *
49 *
50 *
51 *
52 * todo? define a new clean fulltype ? as julia did with type_cocci.ml
53 * without the parsing info, with some normalization (for instance have
54 * only structUnionName and enumName, and remove the ParenType), some
55 * abstractions (don't care for instance about name in parameters of
56 * functionType, or size of array), and with new types such as Unknown
57 * or PartialFunctionType (when don't have type of return when infer
58 * the type of function call not based on type of function but on the
59 * type of its arguments).
60 *
61 *
62 *
63 *)
64
65 type finalType = Ast_c.fullType
66
67 type completed_and_simplified = Ast_c.fullType
68
69 type completed_typedef = Ast_c.fullType
70 type removed_typedef = Ast_c.fullType
71
72 (* move in ast_c ?
73 * use Ast_c.nQ, Ast_c.defaultInt, Ast_c.emptyAnnotCocci,
74 * Ast_c.emptyMetavarsBinding, Ast_c.emptyComments
75 *)
76 let (int_type: Ast_c.fullType) =
77 (* Lib_parsing_c.al_type (Parse_c.type_of_string "int")*)
78 Ast_c.mk_ty
79 (Ast_c.BaseType (Ast_c.IntType (Ast_c.Si (Ast_c.Signed, Ast_c.CInt))))
80 [Ast_c.al_info 0 (* al *)
81 {Ast_c.pinfo =
82 Ast_c.OriginTok
83 {Common.str = "int"; Common.charpos = 0; Common.line = -1;
84 Common.column = -1; Common.file = ""};
85 Ast_c.cocci_tag =
86 {contents = Some (Ast_cocci.CONTEXT (Ast_cocci.NoPos, Ast_cocci.NOTHING), [])};
87 Ast_c.comments_tag = {contents =
88 {Ast_c.mbefore = []; Ast_c.mafter = [];
89 Ast_c.mbefore2 = []; Ast_c.mafter2 = []
90 }}}]
91
92
93
94
95 (* normally if the type annotated has done a good job, this should always
96 * return true. Cf type_annotater_c.typedef_fix.
97 *)
98 let rec is_completed_and_simplified ty =
99 match Ast_c.unwrap_typeC ty with
100 | BaseType x -> true
101 | Pointer t -> is_completed_and_simplified t
102 | Array (e, t) -> is_completed_and_simplified t
103 | StructUnion (su, sopt, fields) ->
104 (* recurse fields ? Normally actually don't want,
105 * prefer to have a StructUnionName when it's possible *)
106 (match sopt with
107 | None -> true
108 | Some _ -> false (* should have transformed it in a StructUnionName *)
109 )
110 | FunctionType ft ->
111 (* todo? return type is completed ? params completed ? *)
112 true
113 | Enum (s, enumt) ->
114 true
115 | EnumName s ->
116 true
117
118 (* we prefer StructUnionName to StructUnion when it comes to typed metavar *)
119 | StructUnionName (su, s) -> true
120
121 (* should have completed with more information *)
122 | TypeName (_name, typ) ->
123 (match typ with
124 | None -> false
125 | Some t ->
126 (* recurse cos what if it's an alias of an alias ? *)
127 is_completed_and_simplified t
128 )
129
130 (* should have removed paren, for better matching with typed metavar.
131 * kind of iso again *)
132 | ParenType t ->
133 false
134 (* same *)
135 | TypeOfType t ->
136 false
137
138 | TypeOfExpr e ->
139 true (* well we don't handle it, so can't really say it's completed *)
140
141
142 let is_completed_typedef_fullType x = raise Todo
143
144 let is_removed_typedef_fullType x = raise Todo
145
146 (*****************************************************************************)
147 (* more "virtual" fulltype, the fullType_with_no_typename *)
148 (*****************************************************************************)
149 let remove_typedef x = raise Todo
150
151 (*****************************************************************************)
152 (* expression exp_info annotation vs finalType *)
153 (*****************************************************************************)
154
155 (* builders, needed because julia added gradually more information in
156 * the expression reference annotation in ast_c.
157 *)
158
159 let make_info x =
160 (Some x, Ast_c.NotTest)
161
162 let make_exp_type t =
163 (t, Ast_c.NotLocalVar)
164
165 let make_info_def t =
166 make_info (make_exp_type t)
167
168
169
170 let noTypeHere =
171 (None, Ast_c.NotTest)
172
173
174 let do_with_type f (t,_test) =
175 match t with
176 | None -> noTypeHere
177 | Some (t,_local) -> f t
178
179 let get_opt_type e =
180 match Ast_c.get_type_expr e with
181 | Some (t,_), _test -> Some t
182 | None, _test -> None
183
184
185
186 (*****************************************************************************)
187 (* Normalizers *)
188 (*****************************************************************************)
189
190
191 let structdef_to_struct_name ty =
192 let (qu, tybis) = ty in
193 match Ast_c.unwrap_typeC ty with
194 | (StructUnion (su, sopt, fields)) ->
195 let iis = Ast_c.get_ii_typeC_take_care tybis in
196 (match sopt, iis with
197 (* todo? but what if correspond to a nested struct def ? *)
198 | Some s , [i1;i2;i3;i4] ->
199 qu, Ast_c.mk_tybis (StructUnionName (su, s)) [i1;i2]
200 | None, _ ->
201 ty
202 | x -> raise Impossible
203 )
204 | _ -> raise Impossible
205
206
207 (*****************************************************************************)
208 (* Helpers *)
209 (*****************************************************************************)
210
211
212 let type_of_function (def,ii) =
213 let ftyp = def.f_type in
214
215 (* could use the info in the 'ii' ? *)
216
217 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
218 let fake_oparen = Ast_c.rewrap_str "(" fake in
219 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
220 let fake_cparen = Ast_c.rewrap_str ")" fake in
221
222 Ast_c.mk_ty (FunctionType ftyp) [fake_oparen;fake_cparen]
223
224
225 (* pre: only a single variable *)
226 let type_of_decl decl =
227 match decl with
228 | Ast_c.DeclList (xs,ii1) ->
229 (match xs with
230 | [] -> raise Impossible
231
232 (* todo? for other xs ? *)
233 | (x,ii2)::xs ->
234 let {v_namei = _var; v_type = v_type;
235 v_storage = (_storage,_inline)} = x in
236
237 (* TODO normalize ? what if nested structure definition ? *)
238 v_type
239 )
240 | Ast_c.MacroDecl _ ->
241 pr2_once "not handling MacroDecl type yet";
242 raise Todo
243
244
245
246 (* pre: it is indeed a struct def decl, and only a single variable *)
247 let structdef_of_decl decl =
248
249 match decl with
250 | Ast_c.DeclList (xs,ii1) ->
251 (match xs with
252 | [] -> raise Impossible
253
254 (* todo? for other xs ? *)
255 | (x,ii2)::xs ->
256 let {v_namei = var; v_type = v_type;
257 v_storage = (storage,inline)} = x in
258
259 (match Ast_c.unwrap_typeC v_type with
260 | Ast_c.StructUnion (su, _must_be_some, fields) ->
261 (su, fields)
262 | _ -> raise Impossible
263 )
264 )
265 | Ast_c.MacroDecl _ -> raise Impossible
266
267
268
269
270 (*****************************************************************************)
271 (* Type builder *)
272 (*****************************************************************************)
273
274 let (fake_function_type:
275 fullType option -> argument wrap2 list -> fullType option) =
276 fun rettype args ->
277
278 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
279 let fake_oparen = Ast_c.rewrap_str "(" fake in
280 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
281 let fake_cparen = Ast_c.rewrap_str ")" fake in
282
283 let (tyargs: parameterType wrap2 list) =
284 args +> Common.map_filter (fun (arg,ii) ->
285 match arg with
286 | Left e ->
287 (match Ast_c.get_onlytype_expr e with
288 | Some ft ->
289 let paramtype =
290 { Ast_c.p_namei = None;
291 p_register = false, Ast_c.noii;
292 p_type = ft;
293 }
294 in
295 Some (paramtype, ii)
296 | None -> None
297 )
298 | Right _ -> None
299 )
300 in
301 if List.length args <> List.length tyargs
302 then None
303 else
304 rettype +> Common.map_option (fun rettype ->
305 let (ftyp: functionType) = (rettype, (tyargs, (false,[]))) in
306 let (t: fullType) =
307 Ast_c.mk_ty (FunctionType ftyp) [fake_oparen;fake_cparen]
308 in
309 t
310 )
311
312
313 (*****************************************************************************)
314 (* Typing rules *)
315 (*****************************************************************************)
316
317
318 (* todo: the rules are far more complex, but I prefer to simplify for now.
319 * todo: should take operator as a parameter.
320 *
321 * todo: Also need handle pointer arithmetic! the type of 'pt + 2'
322 * is still the type of pt. cf parsing_cocci/type_infer.ml
323 *
324 * (* pad: in pointer arithmetic, as in ptr+1, the lub must be ptr *)
325 * | (T.Pointer(ty1),T.Pointer(ty2)) ->
326 * T.Pointer(loop(ty1,ty2))
327 * | (ty1,T.Pointer(ty2)) -> T.Pointer(ty2)
328 * | (T.Pointer(ty1),ty2) -> T.Pointer(ty1)
329 *
330 *)
331 let lub op t1 t2 =
332 let ftopt =
333 match t1, t2 with
334 | None, None -> None
335 | Some t, None -> Some t
336 | None, Some t -> Some t
337 (* check equal ? no cos can have pointer arithmetic so t2 can be <> t1
338 *
339 * todo: right now I favor the first term because usually pointer
340 * arithmetic are written with the pointer in the first position.
341 *
342 * Also when an expression contain a typedef, as in
343 * 'dma_addr + 1' where dma_addr was declared as a varialbe
344 * of type dma_addr_t, then again I want to have in the lub
345 * the typedef and it is often again in the first position.
346 *
347 *)
348 | Some t1, Some t2 ->
349 let t1bis = Ast_c.unwrap_typeC t1 in
350 let t2bis = Ast_c.unwrap_typeC t2 in
351 (* a small attempt to do better, no consideration of typedefs *)
352 (match op, t1bis, t2bis with
353 (* these rules follow ANSI C. See eg:
354 http://flexor.uwaterloo.ca/library/SGI_bookshelves/SGI_Developer/books/CLanguageRef/sgi_html/ch05.html *)
355 _,Ast_c.BaseType(bt1),Ast_c.BaseType(bt2) ->
356 (match bt1,bt2 with
357 Ast_c.Void,_ -> Some t2 (* something has gone wrong *)
358 | _,Ast_c.Void -> Some t1 (* something has gone wrong *)
359 | Ast_c.FloatType(Ast_c.CLongDouble),_ -> Some t1
360 | _,Ast_c.FloatType(Ast_c.CLongDouble) -> Some t2
361 | Ast_c.FloatType(Ast_c.CDouble),_ -> Some t1
362 | _,Ast_c.FloatType(Ast_c.CDouble) -> Some t2
363 | Ast_c.FloatType(Ast_c.CFloat),_ -> Some t1
364 | _,Ast_c.FloatType(Ast_c.CFloat) -> Some t2
365
366 | Ast_c.IntType(Ast_c.Si(Ast_c.UnSigned,Ast_c.CLongLong)),_ ->
367 Some t1
368 | _,Ast_c.IntType(Ast_c.Si(Ast_c.UnSigned,Ast_c.CLongLong)) ->
369 Some t2
370 | Ast_c.IntType(Ast_c.Si(Ast_c.Signed,Ast_c.CLongLong)),_ ->
371 Some t1
372 | _,Ast_c.IntType(Ast_c.Si(Ast_c.Signed,Ast_c.CLongLong)) ->
373 Some t2
374 | Ast_c.IntType(Ast_c.Si(Ast_c.UnSigned,Ast_c.CLong)),_ ->
375 Some t1
376 | _,Ast_c.IntType(Ast_c.Si(Ast_c.UnSigned,Ast_c.CLong)) ->
377 Some t2
378 | Ast_c.IntType(Ast_c.Si(Ast_c.Signed,Ast_c.CLong)),_ ->
379 Some t1
380 | _,Ast_c.IntType(Ast_c.Si(Ast_c.Signed,Ast_c.CLong)) ->
381 Some t2
382 | Ast_c.IntType(Ast_c.Si(Ast_c.UnSigned,Ast_c.CInt)),_ ->
383 Some t1
384 | _,Ast_c.IntType(Ast_c.Si(Ast_c.UnSigned,Ast_c.CInt)) ->
385 Some t2
386 | _ -> Some int_type)
387
388 | Ast_c.Plus,Ast_c.Pointer _,Ast_c.BaseType(Ast_c.IntType _) ->
389 Some t1
390 | Ast_c.Plus,Ast_c.BaseType(Ast_c.IntType _),Ast_c.Pointer _ ->
391 Some t2
392 | Ast_c.Minus,Ast_c.Pointer _,Ast_c.BaseType(Ast_c.IntType _) ->
393 Some t1
394 | Ast_c.Minus,Ast_c.BaseType(Ast_c.IntType _),Ast_c.Pointer _ ->
395 Some t2
396 | Ast_c.Minus,Ast_c.Pointer _,Ast_c.Pointer _ ->
397 Some int_type
398 (* todo, Pointer, Typedef, etc *)
399 | _, _, _ -> Some t1
400 )
401
402 in
403 match ftopt with
404 | None -> None, Ast_c.NotTest
405 | Some ft -> Some (ft, Ast_c.NotLocalVar), Ast_c.NotTest
406
407
408
409 (*****************************************************************************)
410 (* type lookup *)
411 (*****************************************************************************)
412
413 (* old: was using some nested find_some, but easier use ref
414 * update: handling union (used a lot in sparse)
415 * note: it is independent of the environment.
416 *)
417 let (type_field:
418 string -> (Ast_c.structUnion * Ast_c.structType) -> Ast_c.fullType) =
419 fun fld (su, fields) ->
420
421 let res = ref [] in
422
423 let rec aux_fields fields =
424 fields +> List.iter (fun x ->
425 match x with
426 | DeclarationField (FieldDeclList (onefield_multivars, iiptvirg)) ->
427 onefield_multivars +> List.iter (fun (fieldkind, iicomma) ->
428 match fieldkind with
429 | Simple (Some name, t) | BitField (Some name, t, _, _) ->
430 let s = Ast_c.str_of_name name in
431 if s =$= fld
432 then Common.push2 t res
433 else ()
434
435 | Simple (None, t) ->
436 (match Ast_c.unwrap_typeC t with
437
438 (* union *)
439 | StructUnion (Union, _, fields) ->
440 aux_fields fields
441
442 (* Special case of nested structure definition inside
443 * structure without associated field variable as in
444 * struct top = { ... struct xx { int subfield1; ... }; ... }
445 * cf sparse source, where can access subfields directly.
446 * It can also be used in conjunction with union.
447 *)
448 | StructUnion (Struct, _, fields) ->
449 aux_fields fields
450
451 | _ -> ()
452 )
453 | _ -> ()
454 )
455
456 | EmptyField info -> ()
457 | MacroDeclField _ -> pr2_once "DeclTodo"; ()
458
459 | CppDirectiveStruct _
460 | IfdefStruct _ -> pr2_once "StructCpp";
461 )
462 in
463 aux_fields fields;
464 match !res with
465 | [t] -> t
466 | [] ->
467 raise Not_found
468 | x::y::xs ->
469 pr2 ("MultiFound field: " ^ fld) ;
470 x
471
472
473
474 (*****************************************************************************)
475 (* helpers *)
476 (*****************************************************************************)
477
478
479 (* was in aliasing_function_c.ml before*)
480
481 (* assume normalized/completed ? so no ParenType handling to do ?
482 *)
483 let rec is_function_type x =
484 match Ast_c.unwrap_typeC x with
485 | FunctionType _ -> true
486 | _ -> false
487
488
489 (* assume normalized/completed ? so no ParenType handling to do ? *)
490 let rec function_pointer_type_opt x =
491 match Ast_c.unwrap_typeC x with
492 | Pointer y ->
493 (match Ast_c.unwrap_typeC y with
494 | FunctionType ft -> Some ft
495
496 (* fix *)
497 | TypeName (_name, Some ft2) ->
498 (match Ast_c.unwrap_typeC ft2 with
499 | FunctionType ft -> Some ft
500 | _ -> None
501 )
502
503 | _ -> None
504 )
505 (* bugfix: for many fields in structure, the field is a typename
506 * like irq_handler_t to a function pointer
507 *)
508 | TypeName (_name, Some ft) ->
509 function_pointer_type_opt ft
510 (* bugfix: in field, usually it has some ParenType *)
511
512 | ParenType ft ->
513 function_pointer_type_opt ft
514
515 | _ -> None
516
517