5edbdd25e07c648e271e89a538b171d4fafe0c6b
[bpt/coccinelle.git] / parsing_c / type_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2007, 2008, 2009 University of Urbana Champaign
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *)
14
15 open Common
16
17 open Ast_c
18
19 (*****************************************************************************)
20 (* Types *)
21 (*****************************************************************************)
22
23 (* What info do we want in a clean C type ? Normally it would help
24 * if we remove some of the complexity of C with for instance typedefs
25 * by expanding those typedefs or structname and enumname to their
26 * final value. Then, when we do pattern matching we can conveniently forget
27 * to handle the typedef, enumname and similar cases. But sometimes,
28 * in coccinelle for instance, we want to keep some of those original
29 * info. So right now we have a in-the-middle solution by keeping
30 * the original typename in the ast and expanding some of them
31 * in the type_annotation phase. We don't do this expansion for
32 * structname because usually when we have a struct we actually
33 * prefer to just have the structname. It's only when we access
34 * field that we need that information, but the type_annotater has
35 * already done this job so no need in the parent expression to know
36 * the full definition of the structure. But for typedef, this is different.
37 *
38 * So really the finalType we want, the completed_type notion below,
39 * corresponds to a type we think is useful enough to work on, to do
40 * pattern matching on, and one where we have all the needed information
41 * and we don't need to look again somewhere else to get the information.
42 *
43 *
44 *
45 *
46 * todo? define a new clean fulltype ? as julia did with type_cocci.ml
47 * without the parsing info, with some normalization (for instance have
48 * only structUnionName and enumName, and remove the ParenType), some
49 * abstractions (don't care for instance about name in parameters of
50 * functionType, or size of array), and with new types such as Unknown
51 * or PartialFunctionType (when don't have type of return when infer
52 * the type of function call not based on type of function but on the
53 * type of its arguments).
54 *
55 *
56 *
57 *)
58
59 type finalType = Ast_c.fullType
60
61 type completed_and_simplified = Ast_c.fullType
62
63 type completed_typedef = Ast_c.fullType
64 type removed_typedef = Ast_c.fullType
65
66
67 (* normally if the type annotated has done a good job, this should always
68 * return true. Cf type_annotater_c.typedef_fix.
69 *)
70 let rec is_completed_and_simplified ty =
71 match Ast_c.unwrap_typeC ty with
72 | BaseType x -> true
73 | Pointer t -> is_completed_and_simplified t
74 | Array (e, t) -> is_completed_and_simplified t
75 | StructUnion (su, sopt, fields) ->
76 (* recurse fields ? Normally actually don't want,
77 * prefer to have a StructUnionName when it's possible *)
78 (match sopt with
79 | None -> true
80 | Some _ -> false (* should have transformed it in a StructUnionName *)
81 )
82 | FunctionType ft ->
83 (* todo? return type is completed ? params completed ? *)
84 true
85 | Enum (s, enumt) ->
86 true
87 | EnumName s ->
88 true
89
90 (* we prefer StructUnionName to StructUnion when it comes to typed metavar *)
91 | StructUnionName (su, s) -> true
92
93 (* should have completed with more information *)
94 | TypeName (_name, typ) ->
95 (match typ with
96 | None -> false
97 | Some t ->
98 (* recurse cos what if it's an alias of an alias ? *)
99 is_completed_and_simplified t
100 )
101
102 (* should have removed paren, for better matching with typed metavar.
103 * kind of iso again *)
104 | ParenType t ->
105 false
106 (* same *)
107 | TypeOfType t ->
108 false
109
110 | TypeOfExpr e ->
111 true (* well we don't handle it, so can't really say it's completed *)
112
113
114 let is_completed_typedef_fullType x = raise Todo
115
116 let is_removed_typedef_fullType x = raise Todo
117
118 (*****************************************************************************)
119 (* more "virtual" fulltype, the fullType_with_no_typename *)
120 (*****************************************************************************)
121 let remove_typedef x = raise Todo
122
123 (*****************************************************************************)
124 (* expression exp_info annotation vs finalType *)
125 (*****************************************************************************)
126
127 (* builders, needed because julia added gradually more information in
128 * the expression reference annotation in ast_c.
129 *)
130
131 let make_info x =
132 (Some x, Ast_c.NotTest)
133
134 let make_exp_type t =
135 (t, Ast_c.NotLocalVar)
136
137 let make_info_def t =
138 make_info (make_exp_type t)
139
140
141
142 let noTypeHere =
143 (None, Ast_c.NotTest)
144
145
146
147
148
149 let do_with_type f (t,_test) =
150 match t with
151 | None -> noTypeHere
152 | Some (t,_local) -> f t
153
154 let get_opt_type e =
155 match Ast_c.get_type_expr e with
156 | Some (t,_), _test -> Some t
157 | None, _test -> None
158
159
160
161 (*****************************************************************************)
162 (* Normalizers *)
163 (*****************************************************************************)
164
165
166 let structdef_to_struct_name ty =
167 match ty with
168 | qu, (StructUnion (su, sopt, fields), iis) ->
169 (match sopt,iis with
170 (* todo? but what if correspond to a nested struct def ? *)
171 | Some s , [i1;i2;i3;i4] ->
172 qu, (StructUnionName (su, s), [i1;i2])
173 | None, _ ->
174 ty
175 | x -> raise Impossible
176 )
177 | _ -> raise Impossible
178
179
180 (*****************************************************************************)
181 (* Helpers *)
182 (*****************************************************************************)
183
184
185 let type_of_function (def,ii) =
186 let ftyp = def.f_type in
187
188 (* could use the info in the 'ii' ? *)
189
190 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
191 let fake_oparen = Ast_c.rewrap_str "(" fake in
192 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
193 let fake_cparen = Ast_c.rewrap_str ")" fake in
194
195 Ast_c.nQ, (FunctionType ftyp, [fake_oparen;fake_cparen])
196
197
198 (* pre: only a single variable *)
199 let type_of_decl decl =
200 match decl with
201 | Ast_c.DeclList (xs,ii1) ->
202 (match xs with
203 | [] -> raise Impossible
204
205 (* todo? for other xs ? *)
206 | (x,ii2)::xs ->
207 let {v_namei = _var; v_type = v_type;
208 v_storage = (_storage,_inline)} = x in
209
210 (* TODO normalize ? what if nested structure definition ? *)
211 v_type
212 )
213 | Ast_c.MacroDecl _ ->
214 pr2_once "not handling MacroDecl type yet";
215 raise Todo
216
217
218
219 (* pre: it is indeed a struct def decl, and only a single variable *)
220 let structdef_of_decl decl =
221
222 match decl with
223 | Ast_c.DeclList (xs,ii1) ->
224 (match xs with
225 | [] -> raise Impossible
226
227 (* todo? for other xs ? *)
228 | (x,ii2)::xs ->
229 let {v_namei = var; v_type = v_type;
230 v_storage = (storage,inline)} = x in
231
232 (match Ast_c.unwrap_typeC v_type with
233 | Ast_c.StructUnion (su, _must_be_some, fields) ->
234 (su, fields)
235 | _ -> raise Impossible
236 )
237 )
238 | Ast_c.MacroDecl _ -> raise Impossible
239
240
241
242
243 (*****************************************************************************)
244 (* Type builder *)
245 (*****************************************************************************)
246
247 let (fake_function_type:
248 fullType option -> argument wrap2 list -> fullType option) =
249 fun rettype args ->
250
251 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
252 let fake_oparen = Ast_c.rewrap_str "(" fake in
253 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
254 let fake_cparen = Ast_c.rewrap_str ")" fake in
255
256 let (tyargs: parameterType wrap2 list) =
257 args +> Common.map_filter (fun (arg,ii) ->
258 match arg with
259 | Left e ->
260 (match Ast_c.get_onlytype_expr e with
261 | Some ft ->
262 let paramtype =
263 { Ast_c.p_namei = None;
264 p_register = false, Ast_c.noii;
265 p_type = ft;
266 }
267 in
268 Some (paramtype, ii)
269 | None -> None
270 )
271 | Right _ -> None
272 )
273 in
274 if List.length args <> List.length tyargs
275 then None
276 else
277 rettype +> Common.map_option (fun rettype ->
278 let (ftyp: functionType) = (rettype, (tyargs, (false,[]))) in
279 let (t: fullType) =
280 (Ast_c.nQ, (FunctionType ftyp, [fake_oparen;fake_cparen]))
281 in
282 t
283 )
284
285
286 (*****************************************************************************)
287 (* Typing rules *)
288 (*****************************************************************************)
289
290
291 (* todo: the rules are far more complex, but I prefer to simplify for now.
292 * todo: should take operator as a parameter.
293 *
294 * todo: Also need handle pointer arithmetic! the type of 'pt + 2'
295 * is still the type of pt. cf parsing_cocci/type_infer.ml
296 *
297 * (* pad: in pointer arithmetic, as in ptr+1, the lub must be ptr *)
298 * | (T.Pointer(ty1),T.Pointer(ty2)) ->
299 * T.Pointer(loop(ty1,ty2))
300 * | (ty1,T.Pointer(ty2)) -> T.Pointer(ty2)
301 * | (T.Pointer(ty1),ty2) -> T.Pointer(ty1)
302 *
303 *)
304 let lub t1 t2 =
305 let ftopt =
306 match t1, t2 with
307 | None, None -> None
308 | Some t, None -> Some t
309 | None, Some t -> Some t
310 (* check equal ? no cos can have pointer arithmetic so t2 can be <> t1
311 *
312 * todo: right now I favor the first term because usually pointer
313 * arithmetic are written with the pointer in the first position.
314 *
315 * Also when an expression contain a typedef, as in
316 * 'dma_addr + 1' where dma_addr was declared as a varialbe
317 * of type dma_addr_t, then again I want to have in the lub
318 * the typedef and it is often again in the first position.
319 *
320 *)
321 | Some t1, Some t2 ->
322 let t1bis = Ast_c.unwrap_typeC t1 in
323 let t2bis = Ast_c.unwrap_typeC t2 in
324 (match t1bis, t2bis with
325 (* todo, Pointer, Typedef, etc *)
326 | _, _ -> Some t1
327 )
328
329 in
330 match ftopt with
331 | None -> None, Ast_c.NotTest
332 | Some ft -> Some (ft, Ast_c.NotLocalVar), Ast_c.NotTest
333
334
335
336 (*****************************************************************************)
337 (* type lookup *)
338 (*****************************************************************************)
339
340 (* old: was using some nested find_some, but easier use ref
341 * update: handling union (used a lot in sparse)
342 * note: it is independent of the environment.
343 *)
344 let (type_field:
345 string -> (Ast_c.structUnion * Ast_c.structType) -> Ast_c.fullType) =
346 fun fld (su, fields) ->
347
348 let res = ref [] in
349
350 let rec aux_fields fields =
351 fields +> List.iter (fun x ->
352 match Ast_c.unwrap x with
353 | DeclarationField (FieldDeclList (onefield_multivars, iiptvirg)) ->
354 onefield_multivars +> List.iter (fun (fieldkind, iicomma) ->
355 match fieldkind with
356 | Simple (Some name, t) | BitField (Some name, t, _, _) ->
357 let s = Ast_c.str_of_name name in
358 if s =$= fld
359 then Common.push2 t res
360 else ()
361
362 | Simple (None, t) ->
363 (match Ast_c.unwrap_typeC t with
364
365 (* union *)
366 | StructUnion (Union, _, fields) ->
367 aux_fields fields
368
369 (* Special case of nested structure definition inside
370 * structure without associated field variable as in
371 * struct top = { ... struct xx { int subfield1; ... }; ... }
372 * cf sparse source, where can access subfields directly.
373 * It can also be used in conjunction with union.
374 *)
375 | StructUnion (Struct, _, fields) ->
376 aux_fields fields
377
378 | _ -> ()
379 )
380 | _ -> ()
381 )
382
383 | EmptyField -> ()
384 | MacroStructDeclTodo -> pr2_once "DeclTodo"; ()
385
386 | CppDirectiveStruct _
387 | IfdefStruct _ -> pr2_once "StructCpp";
388 )
389 in
390 aux_fields fields;
391 match !res with
392 | [t] -> t
393 | [] ->
394 raise Not_found
395 | x::y::xs ->
396 pr2 ("MultiFound field: " ^ fld) ;
397 x
398
399
400
401 (*****************************************************************************)
402 (* helpers *)
403 (*****************************************************************************)
404
405
406 (* was in aliasing_function_c.ml before*)
407
408 (* assume normalized/completed ? so no ParenType handling to do ?
409 *)
410 let rec is_function_type x =
411 match Ast_c.unwrap_typeC x with
412 | FunctionType _ -> true
413 | _ -> false
414
415
416 (* assume normalized/completed ? so no ParenType handling to do ? *)
417 let rec function_pointer_type_opt x =
418 match Ast_c.unwrap_typeC x with
419 | Pointer y ->
420 (match Ast_c.unwrap_typeC y with
421 | FunctionType ft -> Some ft
422
423 (* fix *)
424 | TypeName (_name, Some ft2) ->
425 (match Ast_c.unwrap_typeC ft2 with
426 | FunctionType ft -> Some ft
427 | _ -> None
428 )
429
430 | _ -> None
431 )
432 (* bugfix: for many fields in structure, the field is a typename
433 * like irq_handler_t to a function pointer
434 *)
435 | TypeName (_name, Some ft) ->
436 function_pointer_type_opt ft
437 (* bugfix: in field, usually it has some ParenType *)
438
439 | ParenType ft ->
440 function_pointer_type_opt ft
441
442 | _ -> None
443
444