Release coccinelle-0.1.11rc1
[bpt/coccinelle.git] / parsing_c / compare_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2006, 2007 Ecole des Mines de Nantes
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *)
14 open Common
15
16 open Ast_c
17
18
19 type compare_result =
20 | Correct
21 | Pb of string
22 | PbOnlyInNotParsedCorrectly of string
23
24
25 (*****************************************************************************)
26 (* Normalise before comparing *)
27 (*****************************************************************************)
28
29 (* List taken from CVS manual, 'Keyword substitution' chapter. Note
30 * that I do not put "Log" because it is used only in comment, and it
31 * is not enough to substituate until the end of the line. *)
32 let cvs_keyword_list = [
33 "Id";"Date"; "Revision"; (* the common one *)
34 "FreeBSD";"Heimdal";"KAME";"NetBSD";"OpenBSD";"OpenLDAP";"RuOBSD";
35 "SourceForge";
36 "Name";"Author";"CVSHeader";"Header";"Locker";"RCSfile";"Source";"State";
37 "Rev";
38 ]
39
40 (* Can also have just dollarIDdollar but it is only when you have not
41 * yet committed the file. After the commit it would be a dollarIddollar:.
42 * If reput Id:, do not join the regexp!! otherwise CVS will modify it :)
43 *)
44 let cvs_keyword_regexp = Str.regexp
45 ("\\$\\([A-Za-z_]+\\):[^\\$]*\\$")
46
47
48 let cvs_compute_newstr s =
49 Str.global_substitute cvs_keyword_regexp (fun _s ->
50 let substr = Str.matched_string s in
51 assert (substr ==~ cvs_keyword_regexp); (* use its side-effect *)
52 let tag = matched1 substr in
53
54 if not (List.mem tag cvs_keyword_list)
55 then pr2_once ("unknown CVS keyword: " ^ tag);
56
57 "CVS_MAGIC_STRING"
58 ) s
59
60
61
62
63 (* todo: get rid of the type for expressions ? *)
64 let normal_form_program xs =
65 let bigf = { Visitor_c.default_visitor_c_s with
66
67 Visitor_c.kini_s = (fun (k,bigf) ini ->
68 match ini with
69 | InitList xs, [i1;i2;iicommaopt] ->
70 k (InitList xs, [i1;i2])
71 | _ -> k ini
72 );
73 Visitor_c.kexpr_s = (fun (k,bigf) e ->
74 match e with
75 (* todo: should also do something for multistrings *)
76 | (Constant (String (s,kind)), typ), [ii]
77 when Common.string_match_substring cvs_keyword_regexp s ->
78 let newstr = cvs_compute_newstr s in
79 (Constant (String (newstr,kind)), typ), [rewrap_str newstr ii]
80 | _ -> k e
81
82 );
83 Visitor_c.ktoplevel_s = (fun (k,bigf) p ->
84 match p with
85 | CppTop (Define _) ->
86 raise Todo
87 (*
88 let (i1, i2, i3) = Common.tuple_of_list3 ii in
89 if Common.string_match_substring cvs_keyword_regexp body
90 then
91 let newstr = cvs_compute_newstr body in
92 Define ((s, newstr), [i1;i2;rewrap_str newstr i3])
93 else p
94 *)
95 | _ -> k p
96 );
97
98 (*
99 Visitor_c.kinfo_s = (fun (k,bigf) i ->
100 let s = Ast_c.get_str_of_info i in
101 if Common.string_match_substring cvs_keyword_regexp s
102 then
103 let newstr = cvs_compute_newstr s in
104 rewrap_str newstr i
105 else i
106 );
107 *)
108
109 }
110 in
111 xs +> List.map (fun p -> Visitor_c.vk_toplevel_s bigf p)
112
113
114
115
116
117
118 let normal_form_token adjust_cvs x =
119 let x' =
120 match x with
121 | Parser_c.TString ((s, kind),i1) -> Parser_c.TString (("",kind), i1)
122 | x -> x
123 in
124 x' +> Token_helpers.visitor_info_of_tok (fun info ->
125 let info = Ast_c.al_info 0 info in
126 let str = Ast_c.str_of_info info in
127 if adjust_cvs && Common.string_match_substring cvs_keyword_regexp str
128 then
129 let newstr = cvs_compute_newstr str in
130 rewrap_str newstr info
131 else info
132 )
133
134
135 (*****************************************************************************)
136 (* Compare at Ast level *)
137 (*****************************************************************************)
138
139 (* Note that I do a (simple) astdiff to know if there is a difference, but
140 * then I use diff to print the differences. So sometimes you have to dig
141 * a little to find really where the real difference (one not involving
142 * just spacing difference) was.
143 * Note also that the astdiff is not very accurate. As I skip comments,
144 * macro definitions, those are not in the Ast and if there is a diff
145 * between 2 files regarding macro def, then I will not be able to report it :(
146 * update: I now put the toplevel #define at least in the Ast.
147 * update: You can use token_compare for more precise diff.
148 *
149 * todo?: finer grain astdiff, better report, more precise.
150 *
151 * todo: do iso between if() S and if() { S }
152 *)
153 let compare_ast filename1 filename2 =
154
155 let xs =
156 match !Flag_parsing_c.diff_lines with
157 None ->
158 Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2)
159 | Some n ->
160 Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in
161
162 (* get rid of the --- and +++ lines *)
163 let xs =
164 if null xs
165 then xs
166 else Common.drop 2 xs
167 in
168
169
170 let process_filename filename =
171 let (c, _stat) = Parse_c.parse_c_and_cpp filename in
172 let c = List.map fst c in
173 c +> Lib_parsing_c.al_program +> normal_form_program
174 in
175
176 let c1 = process_filename filename1 in
177 let c2 = process_filename filename2 in
178
179 let error = ref 0 in
180 let pb_notparsed = ref 0 in
181
182 let res =
183 if List.length c1 <> List.length c2
184 then Pb "not same number of entities (func, decl, ...)"
185 else
186 begin
187 zip c1 c2 +> List.iter (function
188 | Declaration a, Declaration b -> if not (a =*= b) then incr error
189 | Definition a, Definition b -> if not (a =*= b) then incr error
190 | EmptyDef a, EmptyDef b -> if not (a =*= b) then incr error
191 | MacroTop (a1,b1,c1), MacroTop (a2,b2,c2) ->
192 if not ((a1,b1,c1) =*= (a2,b2,c2)) then incr error
193 | CppTop (Include {i_include = a}), CppTop (Include {i_include = b}) ->
194 if not (a =*= b) then incr error
195 | CppTop Define _, CppTop Define _ ->
196 raise Todo
197 (* if not (a =*= b) then incr error *)
198 | NotParsedCorrectly a, NotParsedCorrectly b ->
199 if not (a =*= b) then incr pb_notparsed
200 | NotParsedCorrectly a, _ ->
201 (* Pb only in generated file *)
202 incr error;
203
204 | _, NotParsedCorrectly b ->
205 incr pb_notparsed
206 | FinalDef a, FinalDef b -> if not (a =*= b) then incr error
207
208 | IfdefTop a, IfdefTop b -> if not (a =*= b) then incr error
209
210 | (FinalDef _|EmptyDef _|
211 MacroTop (_, _, _)|IfdefTop _|
212 CppTop _|Definition _|Declaration _), _ -> incr error
213
214 );
215 (match () with
216 | _ when !pb_notparsed > 0 && !error =|= 0 ->
217 PbOnlyInNotParsedCorrectly ""
218 | _ when !error > 0 -> Pb ""
219 | _ -> Correct
220 )
221 end
222 in
223 res, xs
224
225
226
227 (*****************************************************************************)
228 (* Compare at token level *)
229 (*****************************************************************************)
230
231 (* Because I now commentize more in parsing, with parsing_hacks,
232 * compare_ast may say that 2 programs are equal whereas they are not.
233 * Here I compare token, and so have still the TCommentCpp and TCommentMisc
234 * so at least detect such differences.
235 *
236 * Morover compare_ast is not very precise in his report when it
237 * detects a difference. So token_diff is better.
238 *
239 * I do token_diff but I use programCelement2, so that
240 * I know if I am in a "notparsable" zone. The tokens are
241 * in (snd programCelement2).
242 *
243 * Faire aussi un compare_token qui se moque des TCommentMisc,
244 * TCommentCPP et TIfdef ? Normalement si fait ca retrouvera
245 * les meme resultats que compare_ast.
246 *
247 *)
248
249
250 (* Pass only "true" comments, dont pass TCommentMisc and TCommentCpp *)
251 let is_normal_space_or_comment = function
252 | Parser_c.TComment _
253 | Parser_c.TCommentSpace _
254 | Parser_c.TCommentNewline _
255
256 (* | Parser_c.TComma _ *) (* UGLY, because of gcc_opt_comma isomorphism *)
257 -> true
258 | _ -> false
259
260
261 (* convetion: compare_token generated_file expected_res
262 * because when there is a notparsablezone in generated_file, I
263 * don't issue a PbOnlyInNotParsedCorrectly
264 *)
265 let do_compare_token adjust_cvs filename1 filename2 =
266
267 let rec loop xs ys =
268 match xs, ys with
269 | [], [] -> None
270
271 (* UGLY, because of gcc_opt_comma isomorphism *)
272 | (Parser_c.TComma _::Parser_c.TCBrace _::xs), (Parser_c.TCBrace _::ys) ->
273 loop xs ys
274 | (Parser_c.TCBrace _::xs), (Parser_c.TComma _::Parser_c.TCBrace _::ys) ->
275 loop xs ys
276
277 | [], x::xs ->
278 Some "not same number of tokens inside C elements"
279 | x::xs, [] ->
280 Some "not same number of tokens inside C elements"
281
282 | x::xs, y::ys ->
283 let x' = normal_form_token adjust_cvs x in
284 let y' = normal_form_token adjust_cvs y in
285 if x' =*= y'
286 then loop xs ys
287 else
288 let str1, pos1 =
289 Token_helpers.str_of_tok x, Token_helpers.pos_of_tok x in
290 let str2, pos2 =
291 Token_helpers.str_of_tok y, Token_helpers.pos_of_tok y in
292 Some ("diff token: " ^ str1 ^" VS " ^ str2 ^ "\n" ^
293 Common.error_message filename1 (str1, pos1) ^ "\n" ^
294 Common.error_message filename2 (str2, pos2) ^ "\n"
295 )
296
297 in
298 let final_loop xs ys =
299 loop
300 (xs +> List.filter (fun x -> not (is_normal_space_or_comment x)))
301 (ys +> List.filter (fun x -> not (is_normal_space_or_comment x)))
302 in
303
304 (*
305 let toks1 = Parse_c.tokens filename1 in
306 let toks2 = Parse_c.tokens filename2 in
307 loop toks1 toks2 in
308 *)
309
310 let (c1, _stat) = Parse_c.parse_c_and_cpp filename1 in
311 let (c2, _stat) = Parse_c.parse_c_and_cpp filename2 in
312
313 let res =
314 if List.length c1 <> List.length c2
315 then Pb "not same number of entities (func, decl, ...)"
316 else
317 zip c1 c2 +> Common.fold_k (fun acc ((a,infoa),(b,infob)) k ->
318 match a, b with
319 | NotParsedCorrectly a, NotParsedCorrectly b ->
320 (match final_loop (snd infoa) (snd infob) with
321 | None -> k acc
322 | Some s -> PbOnlyInNotParsedCorrectly s
323 )
324
325 | NotParsedCorrectly a, _ ->
326 Pb "PB parsing only in generated-file"
327 | _, NotParsedCorrectly b ->
328 PbOnlyInNotParsedCorrectly "PB parsing only in expected-file"
329 | _, _ ->
330 (match final_loop (snd infoa) (snd infob) with
331 | None -> k acc
332 | Some s -> Pb s
333 )
334 ) (fun acc -> acc)
335 (Correct)
336 in
337
338 let xs =
339 match !Flag_parsing_c.diff_lines with
340 None ->
341 Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2)
342 | Some n ->
343 Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in
344
345 (* get rid of the --- and +++ lines *)
346 let xs =
347 if null xs
348 then xs
349 else Common.drop 2 xs
350 in
351
352 if null xs && (res <> Correct)
353 then failwith
354 "Impossible: How can diff be null and have not Correct in compare_c?";
355
356 res, xs
357
358 let compare_token = do_compare_token true
359
360
361 (*****************************************************************************)
362
363 (* compare to a res file *)
364 let compare_default = do_compare_token true
365
366 (* compare to the source of the transformation *)
367 let compare_to_original = do_compare_token false
368
369
370 let compare_result_to_string (correct, diffxs) =
371 match correct with
372 | Correct ->
373 "seems correct" ^ "\n"
374 | Pb s ->
375 ("seems incorrect: " ^ s) ^ "\n" ^
376 "diff (result(-) vs expected_result(+)) = " ^ "\n" ^
377 (diffxs +> Common.join "\n") ^ "\n"
378 | PbOnlyInNotParsedCorrectly s ->
379 "seems incorrect, but only because of code that was not parsable" ^ "\n"^
380 ("explanation:" ^ s) ^ "\n" ^
381 "diff (result(-) vs expected_result(+)) = " ^ "\n" ^
382 (diffxs +> Common.join "\n") ^ "\n"
383
384
385 let compare_result_to_bool correct =
386 correct =*= Correct