Release coccinelle-0.2.3rc1
[bpt/coccinelle.git] / parsing_c / compare_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2006, 2007 Ecole des Mines de Nantes
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15 open Common
16
17 open Ast_c
18
19
20 type compare_result =
21 | Correct
22 | Pb of string
23 | PbOnlyInNotParsedCorrectly of string
24
25
26 (*****************************************************************************)
27 (* Normalise before comparing *)
28 (*****************************************************************************)
29
30 (* List taken from CVS manual, 'Keyword substitution' chapter. Note
31 * that I do not put "Log" because it is used only in comment, and it
32 * is not enough to substituate until the end of the line. *)
33 let cvs_keyword_list = [
34 "Id";"Date"; "Revision"; (* the common one *)
35 "FreeBSD";"Heimdal";"KAME";"NetBSD";"OpenBSD";"OpenLDAP";"RuOBSD";
36 "SourceForge";
37 "Name";"Author";"CVSHeader";"Header";"Locker";"RCSfile";"Source";"State";
38 "Rev";
39 ]
40
41 (* Can also have just dollarIDdollar but it is only when you have not
42 * yet committed the file. After the commit it would be a dollarIddollar:.
43 * If reput Id:, do not join the regexp!! otherwise CVS will modify it :)
44 *)
45 let cvs_keyword_regexp = Str.regexp
46 ("\\$\\([A-Za-z_]+\\):[^\\$]*\\$")
47
48
49 let cvs_compute_newstr s =
50 Str.global_substitute cvs_keyword_regexp (fun _s ->
51 let substr = Str.matched_string s in
52 assert (substr ==~ cvs_keyword_regexp); (* use its side-effect *)
53 let tag = matched1 substr in
54
55 if not (List.mem tag cvs_keyword_list)
56 then pr2_once ("unknown CVS keyword: " ^ tag);
57
58 "CVS_MAGIC_STRING"
59 ) s
60
61
62
63
64 (* todo: get rid of the type for expressions ? *)
65 let normal_form_program xs =
66 let bigf = { Visitor_c.default_visitor_c_s with
67
68 Visitor_c.kini_s = (fun (k,bigf) ini ->
69 match ini with
70 | InitList xs, [i1;i2;iicommaopt] ->
71 k (InitList xs, [i1;i2])
72 | _ -> k ini
73 );
74 Visitor_c.kexpr_s = (fun (k,bigf) e ->
75 match e with
76 (* todo: should also do something for multistrings *)
77 | (Constant (String (s,kind)), typ), [ii]
78 when Common.string_match_substring cvs_keyword_regexp s ->
79 let newstr = cvs_compute_newstr s in
80 (Constant (String (newstr,kind)), typ), [rewrap_str newstr ii]
81 | _ -> k e
82
83 );
84 Visitor_c.ktoplevel_s = (fun (k,bigf) p ->
85 match p with
86 | CppTop (Define _) ->
87 raise Todo
88 (*
89 let (i1, i2, i3) = Common.tuple_of_list3 ii in
90 if Common.string_match_substring cvs_keyword_regexp body
91 then
92 let newstr = cvs_compute_newstr body in
93 Define ((s, newstr), [i1;i2;rewrap_str newstr i3])
94 else p
95 *)
96 | _ -> k p
97 );
98
99 (*
100 Visitor_c.kinfo_s = (fun (k,bigf) i ->
101 let s = Ast_c.get_str_of_info i in
102 if Common.string_match_substring cvs_keyword_regexp s
103 then
104 let newstr = cvs_compute_newstr s in
105 rewrap_str newstr i
106 else i
107 );
108 *)
109
110 }
111 in
112 xs +> List.map (fun p -> Visitor_c.vk_toplevel_s bigf p)
113
114
115
116
117
118
119 let normal_form_token adjust_cvs x =
120 let x' =
121 match x with
122 | Parser_c.TString ((s, kind),i1) -> Parser_c.TString (("",kind), i1)
123 | x -> x
124 in
125 x' +> Token_helpers.visitor_info_of_tok (fun info ->
126 let info = Ast_c.al_info 0 info in
127 let str = Ast_c.str_of_info info in
128 if adjust_cvs && Common.string_match_substring cvs_keyword_regexp str
129 then
130 let newstr = cvs_compute_newstr str in
131 rewrap_str newstr info
132 else info
133 )
134
135
136 (*****************************************************************************)
137 (* Compare at Ast level *)
138 (*****************************************************************************)
139
140 (* Note that I do a (simple) astdiff to know if there is a difference, but
141 * then I use diff to print the differences. So sometimes you have to dig
142 * a little to find really where the real difference (one not involving
143 * just spacing difference) was.
144 * Note also that the astdiff is not very accurate. As I skip comments,
145 * macro definitions, those are not in the Ast and if there is a diff
146 * between 2 files regarding macro def, then I will not be able to report it :(
147 * update: I now put the toplevel #define at least in the Ast.
148 * update: You can use token_compare for more precise diff.
149 *
150 * todo?: finer grain astdiff, better report, more precise.
151 *
152 * todo: do iso between if() S and if() { S }
153 *)
154 let compare_ast filename1 filename2 =
155
156 let xs =
157 match !Flag_parsing_c.diff_lines with
158 None ->
159 Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2)
160 | Some n ->
161 Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in
162
163 (* get rid of the --- and +++ lines *)
164 let xs =
165 if null xs
166 then xs
167 else Common.drop 2 xs
168 in
169
170
171 let process_filename filename =
172 let (c, _stat) = Parse_c.parse_c_and_cpp filename in
173 let c = List.map fst c in
174 c +> Lib_parsing_c.al_program +> normal_form_program
175 in
176
177 let c1 = process_filename filename1 in
178 let c2 = process_filename filename2 in
179
180 let error = ref 0 in
181 let pb_notparsed = ref 0 in
182
183 let res =
184 if List.length c1 <> List.length c2
185 then Pb "not same number of entities (func, decl, ...)"
186 else
187 begin
188 zip c1 c2 +> List.iter (function
189 | Declaration a, Declaration b -> if not (a =*= b) then incr error
190 | Definition a, Definition b -> if not (a =*= b) then incr error
191 | EmptyDef a, EmptyDef b -> if not (a =*= b) then incr error
192 | MacroTop (a1,b1,c1), MacroTop (a2,b2,c2) ->
193 if not ((a1,b1,c1) =*= (a2,b2,c2)) then incr error
194 | CppTop (Include {i_include = a}), CppTop (Include {i_include = b}) ->
195 if not (a =*= b) then incr error
196 | CppTop Define _, CppTop Define _ ->
197 raise Todo
198 (* if not (a =*= b) then incr error *)
199 | NotParsedCorrectly a, NotParsedCorrectly b ->
200 if not (a =*= b) then incr pb_notparsed
201 | NotParsedCorrectly a, _ ->
202 (* Pb only in generated file *)
203 incr error;
204
205 | _, NotParsedCorrectly b ->
206 incr pb_notparsed
207 | FinalDef a, FinalDef b -> if not (a =*= b) then incr error
208
209 | IfdefTop a, IfdefTop b -> if not (a =*= b) then incr error
210
211 | (FinalDef _|EmptyDef _|
212 MacroTop (_, _, _)|IfdefTop _|
213 CppTop _|Definition _|Declaration _), _ -> incr error
214
215 );
216 (match () with
217 | _ when !pb_notparsed > 0 && !error =|= 0 ->
218 PbOnlyInNotParsedCorrectly ""
219 | _ when !error > 0 -> Pb ""
220 | _ -> Correct
221 )
222 end
223 in
224 res, xs
225
226
227
228 (*****************************************************************************)
229 (* Compare at token level *)
230 (*****************************************************************************)
231
232 (* Because I now commentize more in parsing, with parsing_hacks,
233 * compare_ast may say that 2 programs are equal whereas they are not.
234 * Here I compare token, and so have still the TCommentCpp and TCommentMisc
235 * so at least detect such differences.
236 *
237 * Morover compare_ast is not very precise in his report when it
238 * detects a difference. So token_diff is better.
239 *
240 * I do token_diff but I use programCelement2, so that
241 * I know if I am in a "notparsable" zone. The tokens are
242 * in (snd programCelement2).
243 *
244 * Faire aussi un compare_token qui se moque des TCommentMisc,
245 * TCommentCPP et TIfdef ? Normalement si fait ca retrouvera
246 * les meme resultats que compare_ast.
247 *
248 *)
249
250
251 (* Pass only "true" comments, dont pass TCommentMisc and TCommentCpp *)
252 let is_normal_space_or_comment to_expected = function
253 | Parser_c.TCommentSpace _
254 | Parser_c.TCommentNewline _
255
256 (* | Parser_c.TComma _ *) (* UGLY, because of gcc_opt_comma isomorphism *)
257 -> true
258 | Parser_c.TComment _ -> to_expected (* only ignore in compare to expected *)
259 | _ -> false
260
261
262 (* convetion: compare_token generated_file expected_res
263 * because when there is a notparsablezone in generated_file, I
264 * don't issue a PbOnlyInNotParsedCorrectly
265 *)
266 let do_compare_token adjust_cvs to_expected filename1 filename2 =
267
268 let rec loop xs ys =
269 match xs, ys with
270 | [], [] -> None
271
272 (* UGLY, because of gcc_opt_comma isomorphism *)
273 | (Parser_c.TComma _::Parser_c.TCBrace _::xs), (Parser_c.TCBrace _::ys) ->
274 loop xs ys
275 | (Parser_c.TCBrace _::xs), (Parser_c.TComma _::Parser_c.TCBrace _::ys) ->
276 loop xs ys
277
278 | [], x::xs ->
279 Some "not same number of tokens inside C elements"
280 | x::xs, [] ->
281 Some "not same number of tokens inside C elements"
282
283 | x::xs, y::ys ->
284 let x' = normal_form_token adjust_cvs x in
285 let y' = normal_form_token adjust_cvs y in
286 if x' =*= y'
287 then loop xs ys
288 else
289 let str1, pos1 =
290 Token_helpers.str_of_tok x, Token_helpers.pos_of_tok x in
291 let str2, pos2 =
292 Token_helpers.str_of_tok y, Token_helpers.pos_of_tok y in
293 Some ("diff token: " ^ str1 ^" VS " ^ str2 ^ "\n" ^
294 Common.error_message filename1 (str1, pos1) ^ "\n" ^
295 Common.error_message filename2 (str2, pos2) ^ "\n"
296 )
297
298 in
299 let final_loop xs ys =
300 loop
301 (xs +>
302 List.filter (fun x -> not (is_normal_space_or_comment to_expected x)))
303 (ys +>
304 List.filter (fun x -> not (is_normal_space_or_comment to_expected x)))
305 in
306
307 (*
308 let toks1 = Parse_c.tokens filename1 in
309 let toks2 = Parse_c.tokens filename2 in
310 loop toks1 toks2 in
311 *)
312
313 let (c1, _stat) = Parse_c.parse_c_and_cpp filename1 in
314 let (c2, _stat) = Parse_c.parse_c_and_cpp filename2 in
315
316 let res =
317 if List.length c1 <> List.length c2
318 then Pb "not same number of entities (func, decl, ...)"
319 else
320 zip c1 c2 +> Common.fold_k (fun acc ((a,infoa),(b,infob)) k ->
321 match a, b with
322 | NotParsedCorrectly a, NotParsedCorrectly b ->
323 (match final_loop (snd infoa) (snd infob) with
324 | None -> k acc
325 | Some s -> PbOnlyInNotParsedCorrectly s
326 )
327
328 | NotParsedCorrectly a, _ ->
329 Pb "PB parsing only in generated-file"
330 | _, NotParsedCorrectly b ->
331 PbOnlyInNotParsedCorrectly "PB parsing only in expected-file"
332 | _, _ ->
333 (match final_loop (snd infoa) (snd infob) with
334 | None -> k acc
335 | Some s -> Pb s
336 )
337 ) (fun acc -> acc)
338 (Correct)
339 in
340
341 let xs =
342 match !Flag_parsing_c.diff_lines with
343 None ->
344 Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2)
345 | Some n ->
346 Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in
347
348 (* get rid of the --- and +++ lines *)
349 let xs =
350 if null xs
351 then xs
352 else Common.drop 2 xs
353 in
354
355 if null xs && (res <> Correct)
356 then failwith
357 "Impossible: How can diff be null and have not Correct in compare_c?";
358
359 res, xs
360
361 let compare_token = do_compare_token true true
362
363
364 (*****************************************************************************)
365
366 (* compare to a res file *)
367 let compare_default = do_compare_token true true
368
369 (* compare to the source of the transformation *)
370 let compare_to_original = do_compare_token false false
371
372
373 let compare_result_to_string (correct, diffxs) =
374 match correct with
375 | Correct ->
376 "seems correct" ^ "\n"
377 | Pb s ->
378 ("seems incorrect: " ^ s) ^ "\n" ^
379 "diff (result(-) vs expected_result(+)) = " ^ "\n" ^
380 (diffxs +> Common.join "\n") ^ "\n"
381 | PbOnlyInNotParsedCorrectly s ->
382 "seems incorrect, but only because of code that was not parsable" ^ "\n"^
383 ("explanation:" ^ s) ^ "\n" ^
384 "diff (result(-) vs expected_result(+)) = " ^ "\n" ^
385 (diffxs +> Common.join "\n") ^ "\n"
386
387
388 let compare_result_to_bool correct =
389 correct =*= Correct