permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / type_c.ml
CommitLineData
708f4980 1(* Yoann Padioleau, Julia Lawall
0708f913 2 *
ae4735db 3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
708f4980 4 * Copyright (C) 2007, 2008, 2009 University of Urbana Champaign and DIKU
91eba41f
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
91eba41f
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16open Common
17
18open Ast_c
19
708f4980
C
20(*****************************************************************************)
21(* Wrappers *)
22(*****************************************************************************)
23let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_type
24
91eba41f
C
25(*****************************************************************************)
26(* Types *)
27(*****************************************************************************)
28
0708f913
C
29(* What info do we want in a clean C type ? Normally it would help
30 * if we remove some of the complexity of C with for instance typedefs
31 * by expanding those typedefs or structname and enumname to their
32 * final value. Then, when we do pattern matching we can conveniently forget
33 * to handle the typedef, enumname and similar cases. But sometimes,
34 * in coccinelle for instance, we want to keep some of those original
35 * info. So right now we have a in-the-middle solution by keeping
36 * the original typename in the ast and expanding some of them
37 * in the type_annotation phase. We don't do this expansion for
38 * structname because usually when we have a struct we actually
39 * prefer to just have the structname. It's only when we access
40 * field that we need that information, but the type_annotater has
41 * already done this job so no need in the parent expression to know
42 * the full definition of the structure. But for typedef, this is different.
ae4735db 43 *
0708f913
C
44 * So really the finalType we want, the completed_type notion below,
45 * corresponds to a type we think is useful enough to work on, to do
46 * pattern matching on, and one where we have all the needed information
47 * and we don't need to look again somewhere else to get the information.
48 *
ae4735db
C
49 *
50 *
51 *
0708f913 52 * todo? define a new clean fulltype ? as julia did with type_cocci.ml
91eba41f
C
53 * without the parsing info, with some normalization (for instance have
54 * only structUnionName and enumName, and remove the ParenType), some
55 * abstractions (don't care for instance about name in parameters of
56 * functionType, or size of array), and with new types such as Unknown
57 * or PartialFunctionType (when don't have type of return when infer
58 * the type of function call not based on type of function but on the
59 * type of its arguments).
ae4735db
C
60 *
61 *
62 *
91eba41f
C
63 *)
64
65type finalType = Ast_c.fullType
66
0708f913
C
67type completed_and_simplified = Ast_c.fullType
68
69type completed_typedef = Ast_c.fullType
70type removed_typedef = Ast_c.fullType
71
ae4735db
C
72(* move in ast_c ?
73 * use Ast_c.nQ, Ast_c.defaultInt, Ast_c.emptyAnnotCocci,
708f4980
C
74 * Ast_c.emptyMetavarsBinding, Ast_c.emptyComments
75*)
1eddfd50 76let mk_fulltype bt str =
708f4980 77 Ast_c.mk_ty
1eddfd50 78 (Ast_c.BaseType bt)
708f4980
C
79 [Ast_c.al_info 0 (* al *)
80 {Ast_c.pinfo =
81 Ast_c.OriginTok
1eddfd50 82 {Common.str = str; Common.charpos = 0; Common.line = -1;
708f4980 83 Common.column = -1; Common.file = ""};
ae4735db 84 Ast_c.cocci_tag =
1eddfd50
C
85 {contents =
86 Some (Ast_cocci.CONTEXT (Ast_cocci.NoPos, Ast_cocci.NOTHING), [])};
abad11c5 87 Ast_c.annots_tag = Token_annot.empty;
ae4735db 88 Ast_c.comments_tag = {contents =
708f4980
C
89 {Ast_c.mbefore = []; Ast_c.mafter = [];
90 Ast_c.mbefore2 = []; Ast_c.mafter2 = []
91 }}}]
92
1eddfd50
C
93let (int_type: Ast_c.fullType) =
94 (* Lib_parsing_c.al_type (Parse_c.type_of_string "int")*)
95 mk_fulltype (Ast_c.IntType (Ast_c.Si (Ast_c.Signed, Ast_c.CInt))) "int"
708f4980 96
1eddfd50
C
97let (ptr_diff_type: Ast_c.fullType) =
98 (* Lib_parsing_c.al_type (Parse_c.type_of_string "int")*)
99 mk_fulltype Ast_c.PtrDiffType "ptrdiff_t"
0708f913
C
100
101(* normally if the type annotated has done a good job, this should always
102 * return true. Cf type_annotater_c.typedef_fix.
103 *)
ae4735db
C
104let rec is_completed_and_simplified ty =
105 match Ast_c.unwrap_typeC ty with
f59c9fb7 106 | NoType -> true
0708f913
C
107 | BaseType x -> true
108 | Pointer t -> is_completed_and_simplified t
109 | Array (e, t) -> is_completed_and_simplified t
ae4735db
C
110 | StructUnion (su, sopt, fields) ->
111 (* recurse fields ? Normally actually don't want,
0708f913
C
112 * prefer to have a StructUnionName when it's possible *)
113 (match sopt with
114 | None -> true
115 | Some _ -> false (* should have transformed it in a StructUnionName *)
116 )
ae4735db 117 | FunctionType ft ->
0708f913
C
118 (* todo? return type is completed ? params completed ? *)
119 true
ae4735db 120 | Enum (s, enumt) ->
0708f913 121 true
ae4735db 122 | EnumName s ->
0708f913
C
123 true
124
125 (* we prefer StructUnionName to StructUnion when it comes to typed metavar *)
126 | StructUnionName (su, s) -> true
127
128 (* should have completed with more information *)
ae4735db 129 | TypeName (_name, typ) ->
0708f913
C
130 (match typ with
131 | None -> false
ae4735db 132 | Some t ->
0708f913
C
133 (* recurse cos what if it's an alias of an alias ? *)
134 is_completed_and_simplified t
135 )
136
137 (* should have removed paren, for better matching with typed metavar.
138 * kind of iso again *)
ae4735db 139 | ParenType t ->
0708f913
C
140 false
141 (* same *)
ae4735db 142 | TypeOfType t ->
0708f913
C
143 false
144
ae4735db 145 | TypeOfExpr e ->
0708f913
C
146 true (* well we don't handle it, so can't really say it's completed *)
147
148
ae4735db 149let is_completed_typedef_fullType x = raise Todo
0708f913
C
150
151let is_removed_typedef_fullType x = raise Todo
ae4735db 152
0708f913
C
153(*****************************************************************************)
154(* more "virtual" fulltype, the fullType_with_no_typename *)
155(*****************************************************************************)
156let remove_typedef x = raise Todo
157
91eba41f
C
158(*****************************************************************************)
159(* expression exp_info annotation vs finalType *)
160(*****************************************************************************)
161
ae4735db 162(* builders, needed because julia added gradually more information in
91eba41f
C
163 * the expression reference annotation in ast_c.
164 *)
165
ae4735db 166let make_info x =
91eba41f
C
167 (Some x, Ast_c.NotTest)
168
ae4735db 169let make_exp_type t =
91eba41f
C
170 (t, Ast_c.NotLocalVar)
171
ae4735db 172let make_info_def t =
91eba41f
C
173 make_info (make_exp_type t)
174
175
176
ae4735db 177let noTypeHere =
91eba41f
C
178 (None, Ast_c.NotTest)
179
180
ae4735db 181let do_with_type f (t,_test) =
91eba41f
C
182 match t with
183 | None -> noTypeHere
184 | Some (t,_local) -> f t
185
ae4735db 186let get_opt_type e =
91eba41f
C
187 match Ast_c.get_type_expr e with
188 | Some (t,_), _test -> Some t
189 | None, _test -> None
190
191
192
193(*****************************************************************************)
194(* Normalizers *)
195(*****************************************************************************)
196
197
ae4735db 198let structdef_to_struct_name ty =
708f4980 199 let (qu, tybis) = ty in
ae4735db
C
200 match Ast_c.unwrap_typeC ty with
201 | (StructUnion (su, sopt, fields)) ->
708f4980
C
202 let iis = Ast_c.get_ii_typeC_take_care tybis in
203 (match sopt, iis with
91eba41f 204 (* todo? but what if correspond to a nested struct def ? *)
ae4735db 205 | Some s , [i1;i2;i3;i4] ->
708f4980 206 qu, Ast_c.mk_tybis (StructUnionName (su, s)) [i1;i2]
ae4735db 207 | None, _ ->
91eba41f 208 ty
abad11c5 209 | x -> raise (Impossible 126)
91eba41f 210 )
abad11c5 211 | _ -> raise (Impossible 127)
91eba41f
C
212
213
214(*****************************************************************************)
215(* Helpers *)
216(*****************************************************************************)
217
218
ae4735db
C
219let type_of_function (def,ii) =
220 let ftyp = def.f_type in
91eba41f
C
221
222 (* could use the info in the 'ii' ? *)
223
224 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
225 let fake_oparen = Ast_c.rewrap_str "(" fake in
226 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
227 let fake_cparen = Ast_c.rewrap_str ")" fake in
228
708f4980 229 Ast_c.mk_ty (FunctionType ftyp) [fake_oparen;fake_cparen]
91eba41f
C
230
231
232(* pre: only a single variable *)
ae4735db 233let type_of_decl decl =
91eba41f 234 match decl with
ae4735db 235 | Ast_c.DeclList (xs,ii1) ->
91eba41f 236 (match xs with
abad11c5 237 | [] -> raise (Impossible 128)
ae4735db 238
91eba41f 239 (* todo? for other xs ? *)
ae4735db 240 | (x,ii2)::xs ->
91eba41f
C
241 let {v_namei = _var; v_type = v_type;
242 v_storage = (_storage,_inline)} = x in
243
244 (* TODO normalize ? what if nested structure definition ? *)
245 v_type
246 )
17ba0788 247 | Ast_c.MacroDecl _ | Ast_c.MacroDeclInit _ ->
91eba41f
C
248 pr2_once "not handling MacroDecl type yet";
249 raise Todo
250
251
252
253(* pre: it is indeed a struct def decl, and only a single variable *)
ae4735db 254let structdef_of_decl decl =
91eba41f
C
255
256 match decl with
ae4735db 257 | Ast_c.DeclList (xs,ii1) ->
91eba41f 258 (match xs with
abad11c5 259 | [] -> raise (Impossible 129)
ae4735db 260
91eba41f 261 (* todo? for other xs ? *)
ae4735db 262 | (x,ii2)::xs ->
91eba41f
C
263 let {v_namei = var; v_type = v_type;
264 v_storage = (storage,inline)} = x in
ae4735db 265
91eba41f 266 (match Ast_c.unwrap_typeC v_type with
ae4735db 267 | Ast_c.StructUnion (su, _must_be_some, fields) ->
91eba41f 268 (su, fields)
abad11c5 269 | _ -> raise (Impossible 130)
91eba41f
C
270 )
271 )
abad11c5 272 | Ast_c.MacroDecl _ | Ast_c.MacroDeclInit _ -> raise (Impossible 131)
91eba41f
C
273
274
275
276
277(*****************************************************************************)
278(* Type builder *)
279(*****************************************************************************)
280
ae4735db
C
281let (fake_function_type:
282 fullType option -> argument wrap2 list -> fullType option) =
91eba41f
C
283 fun rettype args ->
284
285 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
286 let fake_oparen = Ast_c.rewrap_str "(" fake in
287 let fake = Ast_c.fakeInfo (Common.fake_parse_info) in
288 let fake_cparen = Ast_c.rewrap_str ")" fake in
289
ae4735db
C
290 let (tyargs: parameterType wrap2 list) =
291 args +> Common.map_filter (fun (arg,ii) ->
91eba41f 292 match arg with
ae4735db 293 | Left e ->
91eba41f 294 (match Ast_c.get_onlytype_expr e with
ae4735db
C
295 | Some ft ->
296 let paramtype =
b1b2de81
C
297 { Ast_c.p_namei = None;
298 p_register = false, Ast_c.noii;
299 p_type = ft;
300 }
91eba41f
C
301 in
302 Some (paramtype, ii)
303 | None -> None
304 )
305 | Right _ -> None
306 )
307 in
308 if List.length args <> List.length tyargs
309 then None
310 else
ae4735db 311 rettype +> Common.map_option (fun rettype ->
91eba41f 312 let (ftyp: functionType) = (rettype, (tyargs, (false,[]))) in
ae4735db 313 let (t: fullType) =
708f4980 314 Ast_c.mk_ty (FunctionType ftyp) [fake_oparen;fake_cparen]
91eba41f
C
315 in
316 t
317 )
318
319
320(*****************************************************************************)
321(* Typing rules *)
322(*****************************************************************************)
323
324
325(* todo: the rules are far more complex, but I prefer to simplify for now.
326 * todo: should take operator as a parameter.
ae4735db 327 *
91eba41f
C
328 * todo: Also need handle pointer arithmetic! the type of 'pt + 2'
329 * is still the type of pt. cf parsing_cocci/type_infer.ml
ae4735db 330 *
91eba41f
C
331 * (* pad: in pointer arithmetic, as in ptr+1, the lub must be ptr *)
332 * | (T.Pointer(ty1),T.Pointer(ty2)) ->
333 * T.Pointer(loop(ty1,ty2))
334 * | (ty1,T.Pointer(ty2)) -> T.Pointer(ty2)
335 * | (T.Pointer(ty1),ty2) -> T.Pointer(ty1)
ae4735db 336 *
91eba41f 337*)
708f4980 338let lub op t1 t2 =
ae4735db 339 let ftopt =
91eba41f
C
340 match t1, t2 with
341 | None, None -> None
342 | Some t, None -> Some t
343 | None, Some t -> Some t
ae4735db
C
344 (* check equal ? no cos can have pointer arithmetic so t2 can be <> t1
345 *
346 * todo: right now I favor the first term because usually pointer
91eba41f 347 * arithmetic are written with the pointer in the first position.
ae4735db
C
348 *
349 * Also when an expression contain a typedef, as in
91eba41f
C
350 * 'dma_addr + 1' where dma_addr was declared as a varialbe
351 * of type dma_addr_t, then again I want to have in the lub
352 * the typedef and it is often again in the first position.
ae4735db 353 *
91eba41f 354 *)
ae4735db 355 | Some t1, Some t2 ->
91eba41f
C
356 let t1bis = Ast_c.unwrap_typeC t1 in
357 let t2bis = Ast_c.unwrap_typeC t2 in
708f4980
C
358 (* a small attempt to do better, no consideration of typedefs *)
359 (match op, t1bis, t2bis with
360 (* these rules follow ANSI C. See eg:
361 http://flexor.uwaterloo.ca/library/SGI_bookshelves/SGI_Developer/books/CLanguageRef/sgi_html/ch05.html *)
362 _,Ast_c.BaseType(bt1),Ast_c.BaseType(bt2) ->
363 (match bt1,bt2 with
364 Ast_c.Void,_ -> Some t2 (* something has gone wrong *)
365 | _,Ast_c.Void -> Some t1 (* something has gone wrong *)
366 | Ast_c.FloatType(Ast_c.CLongDouble),_ -> Some t1
367 | _,Ast_c.FloatType(Ast_c.CLongDouble) -> Some t2
368 | Ast_c.FloatType(Ast_c.CDouble),_ -> Some t1
369 | _,Ast_c.FloatType(Ast_c.CDouble) -> Some t2
370 | Ast_c.FloatType(Ast_c.CFloat),_ -> Some t1
371 | _,Ast_c.FloatType(Ast_c.CFloat) -> Some t2
372
1eddfd50
C
373 | Ast_c.PtrDiffType,_ -> Some t1
374 | _,Ast_c.PtrDiffType -> Some t2
375 | Ast_c.SSizeType,_ -> Some t1
376 | _,Ast_c.SSizeType -> Some t2
377 | Ast_c.SizeType,_ -> Some t1
378 | _,Ast_c.SizeType -> Some t2
379
708f4980
C
380 | Ast_c.IntType(Ast_c.Si(Ast_c.UnSigned,Ast_c.CLongLong)),_ ->
381 Some t1
382 | _,Ast_c.IntType(Ast_c.Si(Ast_c.UnSigned,Ast_c.CLongLong)) ->
383 Some t2
384 | Ast_c.IntType(Ast_c.Si(Ast_c.Signed,Ast_c.CLongLong)),_ ->
385 Some t1
386 | _,Ast_c.IntType(Ast_c.Si(Ast_c.Signed,Ast_c.CLongLong)) ->
387 Some t2
388 | Ast_c.IntType(Ast_c.Si(Ast_c.UnSigned,Ast_c.CLong)),_ ->
389 Some t1
390 | _,Ast_c.IntType(Ast_c.Si(Ast_c.UnSigned,Ast_c.CLong)) ->
391 Some t2
392 | Ast_c.IntType(Ast_c.Si(Ast_c.Signed,Ast_c.CLong)),_ ->
393 Some t1
394 | _,Ast_c.IntType(Ast_c.Si(Ast_c.Signed,Ast_c.CLong)) ->
395 Some t2
396 | Ast_c.IntType(Ast_c.Si(Ast_c.UnSigned,Ast_c.CInt)),_ ->
397 Some t1
398 | _,Ast_c.IntType(Ast_c.Si(Ast_c.UnSigned,Ast_c.CInt)) ->
399 Some t2
400 | _ -> Some int_type)
401
402 | Ast_c.Plus,Ast_c.Pointer _,Ast_c.BaseType(Ast_c.IntType _) ->
403 Some t1
404 | Ast_c.Plus,Ast_c.BaseType(Ast_c.IntType _),Ast_c.Pointer _ ->
405 Some t2
406 | Ast_c.Minus,Ast_c.Pointer _,Ast_c.BaseType(Ast_c.IntType _) ->
407 Some t1
408 | Ast_c.Minus,Ast_c.BaseType(Ast_c.IntType _),Ast_c.Pointer _ ->
409 Some t2
97111a47
C
410 | Ast_c.Minus,(Ast_c.Pointer _ | Ast_c.Array _),
411 (Ast_c.Pointer _ | Ast_c.Array _) ->
1eddfd50 412 Some ptr_diff_type
91eba41f 413 (* todo, Pointer, Typedef, etc *)
708f4980 414 | _, _, _ -> Some t1
91eba41f
C
415 )
416
417 in
418 match ftopt with
419 | None -> None, Ast_c.NotTest
420 | Some ft -> Some (ft, Ast_c.NotLocalVar), Ast_c.NotTest
421
91eba41f
C
422(*****************************************************************************)
423(* type lookup *)
424(*****************************************************************************)
425
ae4735db 426(* old: was using some nested find_some, but easier use ref
91eba41f 427 * update: handling union (used a lot in sparse)
ae4735db 428 * note: it is independent of the environment.
91eba41f 429*)
ae4735db
C
430let (type_field:
431 string -> (Ast_c.structUnion * Ast_c.structType) -> Ast_c.fullType) =
432 fun fld (su, fields) ->
91eba41f
C
433
434 let res = ref [] in
ae4735db
C
435
436 let rec aux_fields fields =
437 fields +> List.iter (fun x ->
708f4980 438 match x with
ae4735db
C
439 | DeclarationField (FieldDeclList (onefield_multivars, iiptvirg)) ->
440 onefield_multivars +> List.iter (fun (fieldkind, iicomma) ->
b1b2de81 441 match fieldkind with
ae4735db 442 | Simple (Some name, t) | BitField (Some name, t, _, _) ->
b1b2de81 443 let s = Ast_c.str_of_name name in
ae4735db 444 if s =$= fld
91eba41f
C
445 then Common.push2 t res
446 else ()
ae4735db
C
447
448 | Simple (None, t) ->
91eba41f
C
449 (match Ast_c.unwrap_typeC t with
450
451 (* union *)
ae4735db 452 | StructUnion (Union, _, fields) ->
91eba41f 453 aux_fields fields
ae4735db
C
454
455 (* Special case of nested structure definition inside
456 * structure without associated field variable as in
91eba41f
C
457 * struct top = { ... struct xx { int subfield1; ... }; ... }
458 * cf sparse source, where can access subfields directly.
459 * It can also be used in conjunction with union.
460 *)
ae4735db 461 | StructUnion (Struct, _, fields) ->
91eba41f 462 aux_fields fields
ae4735db 463
91eba41f
C
464 | _ -> ()
465 )
466 | _ -> ()
467 )
ae4735db 468
708f4980
C
469 | EmptyField info -> ()
470 | MacroDeclField _ -> pr2_once "DeclTodo"; ()
ae4735db 471
91eba41f 472 | CppDirectiveStruct _
ae4735db 473 | IfdefStruct _ -> pr2_once "StructCpp";
91eba41f
C
474 )
475 in
476 aux_fields fields;
477 match !res with
478 | [t] -> t
ae4735db 479 | [] ->
91eba41f 480 raise Not_found
ae4735db 481 | x::y::xs ->
91eba41f
C
482 pr2 ("MultiFound field: " ^ fld) ;
483 x
ae4735db 484
91eba41f
C
485
486
0708f913
C
487(*****************************************************************************)
488(* helpers *)
489(*****************************************************************************)
490
491
492(* was in aliasing_function_c.ml before*)
493
ae4735db 494(* assume normalized/completed ? so no ParenType handling to do ?
0708f913 495*)
ae4735db 496let rec is_function_type x =
0708f913
C
497 match Ast_c.unwrap_typeC x with
498 | FunctionType _ -> true
499 | _ -> false
500
501
502(* assume normalized/completed ? so no ParenType handling to do ? *)
ae4735db 503let rec function_pointer_type_opt x =
0708f913 504 match Ast_c.unwrap_typeC x with
ae4735db 505 | Pointer y ->
0708f913
C
506 (match Ast_c.unwrap_typeC y with
507 | FunctionType ft -> Some ft
508
509 (* fix *)
ae4735db 510 | TypeName (_name, Some ft2) ->
0708f913
C
511 (match Ast_c.unwrap_typeC ft2 with
512 | FunctionType ft -> Some ft
513 | _ -> None
514 )
515
516 | _ -> None
517 )
ae4735db
C
518 (* bugfix: for many fields in structure, the field is a typename
519 * like irq_handler_t to a function pointer
0708f913 520 *)
ae4735db 521 | TypeName (_name, Some ft) ->
0708f913
C
522 function_pointer_type_opt ft
523 (* bugfix: in field, usually it has some ParenType *)
524
ae4735db 525 | ParenType ft ->
0708f913
C
526 function_pointer_type_opt ft
527
528 | _ -> None
529
530