Release coccinelle-0.2.3rc1
[bpt/coccinelle.git] / parsing_c / compare_c.ml
CommitLineData
0708f913 1(* Yoann Padioleau
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913
C
4 * Copyright (C) 2006, 2007 Ecole des Mines de Nantes
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
0708f913
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
34e49164
C
15open Common
16
17open Ast_c
18
19
ae4735db
C
20type compare_result =
21 | Correct
34e49164
C
22 | Pb of string
23 | PbOnlyInNotParsedCorrectly of string
24
25
26(*****************************************************************************)
27(* Normalise before comparing *)
28(*****************************************************************************)
29
30(* List taken from CVS manual, 'Keyword substitution' chapter. Note
31 * that I do not put "Log" because it is used only in comment, and it
32 * is not enough to substituate until the end of the line. *)
33let cvs_keyword_list = [
34 "Id";"Date"; "Revision"; (* the common one *)
951c7801
C
35 "FreeBSD";"Heimdal";"KAME";"NetBSD";"OpenBSD";"OpenLDAP";"RuOBSD";
36 "SourceForge";
34e49164
C
37 "Name";"Author";"CVSHeader";"Header";"Locker";"RCSfile";"Source";"State";
38 "Rev";
39]
40
41(* Can also have just dollarIDdollar but it is only when you have not
42 * yet committed the file. After the commit it would be a dollarIddollar:.
43 * If reput Id:, do not join the regexp!! otherwise CVS will modify it :)
44 *)
ae4735db 45let cvs_keyword_regexp = Str.regexp
34e49164
C
46 ("\\$\\([A-Za-z_]+\\):[^\\$]*\\$")
47
48
ae4735db
C
49let cvs_compute_newstr s =
50 Str.global_substitute cvs_keyword_regexp (fun _s ->
34e49164
C
51 let substr = Str.matched_string s in
52 assert (substr ==~ cvs_keyword_regexp); (* use its side-effect *)
53 let tag = matched1 substr in
54
55 if not (List.mem tag cvs_keyword_list)
978fd7e5 56 then pr2_once ("unknown CVS keyword: " ^ tag);
ae4735db
C
57
58 "CVS_MAGIC_STRING"
59 ) s
34e49164
C
60
61
62
63
64(* todo: get rid of the type for expressions ? *)
ae4735db
C
65let normal_form_program xs =
66 let bigf = { Visitor_c.default_visitor_c_s with
34e49164 67
ae4735db 68 Visitor_c.kini_s = (fun (k,bigf) ini ->
34e49164 69 match ini with
ae4735db 70 | InitList xs, [i1;i2;iicommaopt] ->
34e49164
C
71 k (InitList xs, [i1;i2])
72 | _ -> k ini
73 );
ae4735db 74 Visitor_c.kexpr_s = (fun (k,bigf) e ->
34e49164
C
75 match e with
76 (* todo: should also do something for multistrings *)
708f4980 77 | (Constant (String (s,kind)), typ), [ii]
ae4735db 78 when Common.string_match_substring cvs_keyword_regexp s ->
34e49164
C
79 let newstr = cvs_compute_newstr s in
80 (Constant (String (newstr,kind)), typ), [rewrap_str newstr ii]
ae4735db 81 | _ -> k e
34e49164
C
82
83 );
ae4735db 84 Visitor_c.ktoplevel_s = (fun (k,bigf) p ->
34e49164 85 match p with
ae4735db 86 | CppTop (Define _) ->
34e49164
C
87 raise Todo
88 (*
89 let (i1, i2, i3) = Common.tuple_of_list3 ii in
90 if Common.string_match_substring cvs_keyword_regexp body
ae4735db 91 then
34e49164
C
92 let newstr = cvs_compute_newstr body in
93 Define ((s, newstr), [i1;i2;rewrap_str newstr i3])
94 else p
95 *)
96 | _ -> k p
97 );
98
99(*
ae4735db 100 Visitor_c.kinfo_s = (fun (k,bigf) i ->
34e49164
C
101 let s = Ast_c.get_str_of_info i in
102 if Common.string_match_substring cvs_keyword_regexp s
ae4735db 103 then
34e49164
C
104 let newstr = cvs_compute_newstr s in
105 rewrap_str newstr i
106 else i
107 );
108*)
109
110 }
111 in
112 xs +> List.map (fun p -> Visitor_c.vk_toplevel_s bigf p)
113
114
115
116
117
118
ae4735db
C
119let normal_form_token adjust_cvs x =
120 let x' =
121 match x with
34e49164
C
122 | Parser_c.TString ((s, kind),i1) -> Parser_c.TString (("",kind), i1)
123 | x -> x
124 in
ae4735db 125 x' +> Token_helpers.visitor_info_of_tok (fun info ->
34e49164
C
126 let info = Ast_c.al_info 0 info in
127 let str = Ast_c.str_of_info info in
951c7801 128 if adjust_cvs && Common.string_match_substring cvs_keyword_regexp str
ae4735db 129 then
34e49164
C
130 let newstr = cvs_compute_newstr str in
131 rewrap_str newstr info
132 else info
133 )
134
ae4735db 135
34e49164
C
136(*****************************************************************************)
137(* Compare at Ast level *)
138(*****************************************************************************)
139
140(* Note that I do a (simple) astdiff to know if there is a difference, but
141 * then I use diff to print the differences. So sometimes you have to dig
ae4735db 142 * a little to find really where the real difference (one not involving
34e49164
C
143 * just spacing difference) was.
144 * Note also that the astdiff is not very accurate. As I skip comments,
145 * macro definitions, those are not in the Ast and if there is a diff
146 * between 2 files regarding macro def, then I will not be able to report it :(
147 * update: I now put the toplevel #define at least in the Ast.
148 * update: You can use token_compare for more precise diff.
149 *
150 * todo?: finer grain astdiff, better report, more precise.
ae4735db 151 *
34e49164
C
152 * todo: do iso between if() S and if() { S }
153 *)
154let compare_ast filename1 filename2 =
155
156 let xs =
157 match !Flag_parsing_c.diff_lines with
158 None ->
159 Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2)
ae4735db 160 | Some n ->
34e49164
C
161 Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in
162
163 (* get rid of the --- and +++ lines *)
ae4735db
C
164 let xs =
165 if null xs
166 then xs
34e49164
C
167 else Common.drop 2 xs
168 in
169
170
ae4735db 171 let process_filename filename =
978fd7e5 172 let (c, _stat) = Parse_c.parse_c_and_cpp filename in
34e49164
C
173 let c = List.map fst c in
174 c +> Lib_parsing_c.al_program +> normal_form_program
175 in
176
177 let c1 = process_filename filename1 in
178 let c2 = process_filename filename2 in
ae4735db 179
34e49164
C
180 let error = ref 0 in
181 let pb_notparsed = ref 0 in
ae4735db
C
182
183 let res =
184 if List.length c1 <> List.length c2
34e49164 185 then Pb "not same number of entities (func, decl, ...)"
ae4735db 186 else
34e49164
C
187 begin
188 zip c1 c2 +> List.iter (function
189 | Declaration a, Declaration b -> if not (a =*= b) then incr error
190 | Definition a, Definition b -> if not (a =*= b) then incr error
191 | EmptyDef a, EmptyDef b -> if not (a =*= b) then incr error
ae4735db 192 | MacroTop (a1,b1,c1), MacroTop (a2,b2,c2) ->
34e49164 193 if not ((a1,b1,c1) =*= (a2,b2,c2)) then incr error
ae4735db 194 | CppTop (Include {i_include = a}), CppTop (Include {i_include = b}) ->
485bce71 195 if not (a =*= b) then incr error
ae4735db 196 | CppTop Define _, CppTop Define _ ->
34e49164
C
197 raise Todo
198 (* if not (a =*= b) then incr error *)
ae4735db 199 | NotParsedCorrectly a, NotParsedCorrectly b ->
34e49164 200 if not (a =*= b) then incr pb_notparsed
ae4735db 201 | NotParsedCorrectly a, _ ->
34e49164
C
202 (* Pb only in generated file *)
203 incr error;
204
ae4735db 205 | _, NotParsedCorrectly b ->
34e49164
C
206 incr pb_notparsed
207 | FinalDef a, FinalDef b -> if not (a =*= b) then incr error
485bce71
C
208
209 | IfdefTop a, IfdefTop b -> if not (a =*= b) then incr error
210
211 | (FinalDef _|EmptyDef _|
212 MacroTop (_, _, _)|IfdefTop _|
213 CppTop _|Definition _|Declaration _), _ -> incr error
ae4735db 214
34e49164
C
215 );
216 (match () with
ae4735db 217 | _ when !pb_notparsed > 0 && !error =|= 0 ->
34e49164
C
218 PbOnlyInNotParsedCorrectly ""
219 | _ when !error > 0 -> Pb ""
220 | _ -> Correct
221 )
222 end
223 in
224 res, xs
225
226
227
228(*****************************************************************************)
229(* Compare at token level *)
230(*****************************************************************************)
231
232(* Because I now commentize more in parsing, with parsing_hacks,
233 * compare_ast may say that 2 programs are equal whereas they are not.
234 * Here I compare token, and so have still the TCommentCpp and TCommentMisc
235 * so at least detect such differences.
ae4735db 236 *
34e49164
C
237 * Morover compare_ast is not very precise in his report when it
238 * detects a difference. So token_diff is better.
ae4735db 239 *
34e49164 240 * I do token_diff but I use programCelement2, so that
ae4735db 241 * I know if I am in a "notparsable" zone. The tokens are
34e49164 242 * in (snd programCelement2).
ae4735db
C
243 *
244 * Faire aussi un compare_token qui se moque des TCommentMisc,
245 * TCommentCPP et TIfdef ? Normalement si fait ca retrouvera
34e49164 246 * les meme resultats que compare_ast.
ae4735db 247 *
34e49164
C
248 *)
249
250
251(* Pass only "true" comments, dont pass TCommentMisc and TCommentCpp *)
5636bb2c 252let is_normal_space_or_comment to_expected = function
34e49164
C
253 | Parser_c.TCommentSpace _
254 | Parser_c.TCommentNewline _
255
256(* | Parser_c.TComma _ *) (* UGLY, because of gcc_opt_comma isomorphism *)
257 -> true
5636bb2c 258 | Parser_c.TComment _ -> to_expected (* only ignore in compare to expected *)
34e49164
C
259 | _ -> false
260
261
ae4735db
C
262(* convetion: compare_token generated_file expected_res
263 * because when there is a notparsablezone in generated_file, I
34e49164
C
264 * don't issue a PbOnlyInNotParsedCorrectly
265 *)
5636bb2c 266let do_compare_token adjust_cvs to_expected filename1 filename2 =
34e49164 267
ae4735db 268 let rec loop xs ys =
34e49164
C
269 match xs, ys with
270 | [], [] -> None
271
272 (* UGLY, because of gcc_opt_comma isomorphism *)
273 | (Parser_c.TComma _::Parser_c.TCBrace _::xs), (Parser_c.TCBrace _::ys) ->
274 loop xs ys
ae4735db 275 | (Parser_c.TCBrace _::xs), (Parser_c.TComma _::Parser_c.TCBrace _::ys) ->
34e49164
C
276 loop xs ys
277
ae4735db 278 | [], x::xs ->
34e49164 279 Some "not same number of tokens inside C elements"
ae4735db 280 | x::xs, [] ->
34e49164 281 Some "not same number of tokens inside C elements"
ae4735db
C
282
283 | x::xs, y::ys ->
951c7801
C
284 let x' = normal_form_token adjust_cvs x in
285 let y' = normal_form_token adjust_cvs y in
ae4735db 286 if x' =*= y'
34e49164 287 then loop xs ys
ae4735db
C
288 else
289 let str1, pos1 =
34e49164 290 Token_helpers.str_of_tok x, Token_helpers.pos_of_tok x in
ae4735db 291 let str2, pos2 =
34e49164
C
292 Token_helpers.str_of_tok y, Token_helpers.pos_of_tok y in
293 Some ("diff token: " ^ str1 ^" VS " ^ str2 ^ "\n" ^
294 Common.error_message filename1 (str1, pos1) ^ "\n" ^
295 Common.error_message filename2 (str2, pos2) ^ "\n"
296 )
ae4735db 297
34e49164 298 in
ae4735db 299 let final_loop xs ys =
34e49164 300 loop
5636bb2c
C
301 (xs +>
302 List.filter (fun x -> not (is_normal_space_or_comment to_expected x)))
303 (ys +>
304 List.filter (fun x -> not (is_normal_space_or_comment to_expected x)))
34e49164 305 in
ae4735db 306
34e49164
C
307 (*
308 let toks1 = Parse_c.tokens filename1 in
309 let toks2 = Parse_c.tokens filename2 in
310 loop toks1 toks2 in
311 *)
312
978fd7e5
C
313 let (c1, _stat) = Parse_c.parse_c_and_cpp filename1 in
314 let (c2, _stat) = Parse_c.parse_c_and_cpp filename2 in
34e49164 315
ae4735db
C
316 let res =
317 if List.length c1 <> List.length c2
34e49164 318 then Pb "not same number of entities (func, decl, ...)"
ae4735db
C
319 else
320 zip c1 c2 +> Common.fold_k (fun acc ((a,infoa),(b,infob)) k ->
34e49164 321 match a, b with
ae4735db 322 | NotParsedCorrectly a, NotParsedCorrectly b ->
34e49164
C
323 (match final_loop (snd infoa) (snd infob) with
324 | None -> k acc
325 | Some s -> PbOnlyInNotParsedCorrectly s
326 )
ae4735db
C
327
328 | NotParsedCorrectly a, _ ->
34e49164 329 Pb "PB parsing only in generated-file"
ae4735db 330 | _, NotParsedCorrectly b ->
34e49164 331 PbOnlyInNotParsedCorrectly "PB parsing only in expected-file"
ae4735db 332 | _, _ ->
34e49164
C
333 (match final_loop (snd infoa) (snd infob) with
334 | None -> k acc
335 | Some s -> Pb s
336 )
337 ) (fun acc -> acc)
338 (Correct)
339 in
340
341 let xs =
342 match !Flag_parsing_c.diff_lines with
343 None ->
344 Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2)
ae4735db 345 | Some n ->
34e49164
C
346 Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in
347
348 (* get rid of the --- and +++ lines *)
ae4735db
C
349 let xs =
350 if null xs
351 then xs
34e49164
C
352 else Common.drop 2 xs
353 in
354
ae4735db
C
355 if null xs && (res <> Correct)
356 then failwith
34e49164
C
357 "Impossible: How can diff be null and have not Correct in compare_c?";
358
359 res, xs
360
5636bb2c 361let compare_token = do_compare_token true true
34e49164
C
362
363
364(*****************************************************************************)
365
951c7801 366(* compare to a res file *)
5636bb2c 367let compare_default = do_compare_token true true
951c7801
C
368
369(* compare to the source of the transformation *)
5636bb2c 370let compare_to_original = do_compare_token false false
34e49164
C
371
372
373let compare_result_to_string (correct, diffxs) =
374 match correct with
ae4735db 375 | Correct ->
34e49164 376 "seems correct" ^ "\n"
ae4735db 377 | Pb s ->
34e49164
C
378 ("seems incorrect: " ^ s) ^ "\n" ^
379 "diff (result(-) vs expected_result(+)) = " ^ "\n" ^
380 (diffxs +> Common.join "\n") ^ "\n"
ae4735db 381 | PbOnlyInNotParsedCorrectly s ->
34e49164
C
382 "seems incorrect, but only because of code that was not parsable" ^ "\n"^
383 ("explanation:" ^ s) ^ "\n" ^
384 "diff (result(-) vs expected_result(+)) = " ^ "\n" ^
385 (diffxs +> Common.join "\n") ^ "\n"
386
387
ae4735db 388let compare_result_to_bool correct =
b1b2de81 389 correct =*= Correct