bb5894acc6b026c478d6dadce267e29a7715c90c
[bpt/coccinelle.git] / parsing_c / compare_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2006, 2007 Ecole des Mines de Nantes
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *)
14 open Common
15
16 open Ast_c
17
18
19 type compare_result =
20 | Correct
21 | Pb of string
22 | PbOnlyInNotParsedCorrectly of string
23
24
25 (*****************************************************************************)
26 (* Normalise before comparing *)
27 (*****************************************************************************)
28
29 (* List taken from CVS manual, 'Keyword substitution' chapter. Note
30 * that I do not put "Log" because it is used only in comment, and it
31 * is not enough to substituate until the end of the line. *)
32 let cvs_keyword_list = [
33 "Id";"Date"; "Revision"; (* the common one *)
34 "Name";"Author";"CVSHeader";"Header";"Locker";"RCSfile";"Source";"State";
35 "Rev";
36 ]
37
38 (* Can also have just dollarIDdollar but it is only when you have not
39 * yet committed the file. After the commit it would be a dollarIddollar:.
40 * If reput Id:, do not join the regexp!! otherwise CVS will modify it :)
41 *)
42 let cvs_keyword_regexp = Str.regexp
43 ("\\$\\([A-Za-z_]+\\):[^\\$]*\\$")
44
45
46 let cvs_compute_newstr s =
47 Str.global_substitute cvs_keyword_regexp (fun _s ->
48 let substr = Str.matched_string s in
49 assert (substr ==~ cvs_keyword_regexp); (* use its side-effect *)
50 let tag = matched1 substr in
51
52 if not (List.mem tag cvs_keyword_list)
53 then failwith ("unknown CVS keyword: " ^ tag);
54
55 "CVS_MAGIC_STRING"
56 ) s
57
58
59
60
61 (* todo: get rid of the type for expressions ? *)
62 let normal_form_program xs =
63 let bigf = { Visitor_c.default_visitor_c_s with
64
65 Visitor_c.kini_s = (fun (k,bigf) ini ->
66 match ini with
67 | InitList xs, [i1;i2;iicommaopt] ->
68 k (InitList xs, [i1;i2])
69 | _ -> k ini
70 );
71 Visitor_c.kexpr_s = (fun (k,bigf) e ->
72 match e with
73 (* todo: should also do something for multistrings *)
74 | (Constant (String (s,kind)), typ), [ii]
75 when Common.string_match_substring cvs_keyword_regexp s ->
76 let newstr = cvs_compute_newstr s in
77 (Constant (String (newstr,kind)), typ), [rewrap_str newstr ii]
78 | _ -> k e
79
80 );
81 Visitor_c.ktoplevel_s = (fun (k,bigf) p ->
82 match p with
83 | CppTop (Define _) ->
84 raise Todo
85 (*
86 let (i1, i2, i3) = Common.tuple_of_list3 ii in
87 if Common.string_match_substring cvs_keyword_regexp body
88 then
89 let newstr = cvs_compute_newstr body in
90 Define ((s, newstr), [i1;i2;rewrap_str newstr i3])
91 else p
92 *)
93 | _ -> k p
94 );
95
96 (*
97 Visitor_c.kinfo_s = (fun (k,bigf) i ->
98 let s = Ast_c.get_str_of_info i in
99 if Common.string_match_substring cvs_keyword_regexp s
100 then
101 let newstr = cvs_compute_newstr s in
102 rewrap_str newstr i
103 else i
104 );
105 *)
106
107 }
108 in
109 xs +> List.map (fun p -> Visitor_c.vk_toplevel_s bigf p)
110
111
112
113
114
115
116 let normal_form_token x =
117 let x' =
118 match x with
119 | Parser_c.TString ((s, kind),i1) -> Parser_c.TString (("",kind), i1)
120 | x -> x
121 in
122 x' +> Token_helpers.visitor_info_of_tok (fun info ->
123 let info = Ast_c.al_info 0 info in
124 let str = Ast_c.str_of_info info in
125 if Common.string_match_substring cvs_keyword_regexp str
126 then
127 let newstr = cvs_compute_newstr str in
128 rewrap_str newstr info
129 else info
130 )
131
132
133 (*****************************************************************************)
134 (* Compare at Ast level *)
135 (*****************************************************************************)
136
137 (* Note that I do a (simple) astdiff to know if there is a difference, but
138 * then I use diff to print the differences. So sometimes you have to dig
139 * a little to find really where the real difference (one not involving
140 * just spacing difference) was.
141 * Note also that the astdiff is not very accurate. As I skip comments,
142 * macro definitions, those are not in the Ast and if there is a diff
143 * between 2 files regarding macro def, then I will not be able to report it :(
144 * update: I now put the toplevel #define at least in the Ast.
145 * update: You can use token_compare for more precise diff.
146 *
147 * todo?: finer grain astdiff, better report, more precise.
148 *
149 * todo: do iso between if() S and if() { S }
150 *)
151 let compare_ast filename1 filename2 =
152
153 let xs =
154 match !Flag_parsing_c.diff_lines with
155 None ->
156 Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2)
157 | Some n ->
158 Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in
159
160 (* get rid of the --- and +++ lines *)
161 let xs =
162 if null xs
163 then xs
164 else Common.drop 2 xs
165 in
166
167
168 let process_filename filename =
169 let (c, _stat) = Parse_c.parse_print_error_heuristic filename in
170 let c = List.map fst c in
171 c +> Lib_parsing_c.al_program +> normal_form_program
172 in
173
174 let c1 = process_filename filename1 in
175 let c2 = process_filename filename2 in
176
177 let error = ref 0 in
178 let pb_notparsed = ref 0 in
179
180 let res =
181 if List.length c1 <> List.length c2
182 then Pb "not same number of entities (func, decl, ...)"
183 else
184 begin
185 zip c1 c2 +> List.iter (function
186 | Declaration a, Declaration b -> if not (a =*= b) then incr error
187 | Definition a, Definition b -> if not (a =*= b) then incr error
188 | EmptyDef a, EmptyDef b -> if not (a =*= b) then incr error
189 | MacroTop (a1,b1,c1), MacroTop (a2,b2,c2) ->
190 if not ((a1,b1,c1) =*= (a2,b2,c2)) then incr error
191 | CppTop (Include {i_include = a}), CppTop (Include {i_include = b}) ->
192 if not (a =*= b) then incr error
193 | CppTop Define _, CppTop Define _ ->
194 raise Todo
195 (* if not (a =*= b) then incr error *)
196 | NotParsedCorrectly a, NotParsedCorrectly b ->
197 if not (a =*= b) then incr pb_notparsed
198 | NotParsedCorrectly a, _ ->
199 (* Pb only in generated file *)
200 incr error;
201
202 | _, NotParsedCorrectly b ->
203 incr pb_notparsed
204 | FinalDef a, FinalDef b -> if not (a =*= b) then incr error
205
206 | IfdefTop a, IfdefTop b -> if not (a =*= b) then incr error
207
208 | (FinalDef _|EmptyDef _|
209 MacroTop (_, _, _)|IfdefTop _|
210 CppTop _|Definition _|Declaration _), _ -> incr error
211
212 );
213 (match () with
214 | _ when !pb_notparsed > 0 && !error =|= 0 ->
215 PbOnlyInNotParsedCorrectly ""
216 | _ when !error > 0 -> Pb ""
217 | _ -> Correct
218 )
219 end
220 in
221 res, xs
222
223
224
225 (*****************************************************************************)
226 (* Compare at token level *)
227 (*****************************************************************************)
228
229 (* Because I now commentize more in parsing, with parsing_hacks,
230 * compare_ast may say that 2 programs are equal whereas they are not.
231 * Here I compare token, and so have still the TCommentCpp and TCommentMisc
232 * so at least detect such differences.
233 *
234 * Morover compare_ast is not very precise in his report when it
235 * detects a difference. So token_diff is better.
236 *
237 * I do token_diff but I use programCelement2, so that
238 * I know if I am in a "notparsable" zone. The tokens are
239 * in (snd programCelement2).
240 *
241 * Faire aussi un compare_token qui se moque des TCommentMisc,
242 * TCommentCPP et TIfdef ? Normalement si fait ca retrouvera
243 * les meme resultats que compare_ast.
244 *
245 *)
246
247
248 (* Pass only "true" comments, dont pass TCommentMisc and TCommentCpp *)
249 let is_normal_space_or_comment = function
250 | Parser_c.TComment _
251 | Parser_c.TCommentSpace _
252 | Parser_c.TCommentNewline _
253
254 (* | Parser_c.TComma _ *) (* UGLY, because of gcc_opt_comma isomorphism *)
255 -> true
256 | _ -> false
257
258
259 (* convetion: compare_token generated_file expected_res
260 * because when there is a notparsablezone in generated_file, I
261 * don't issue a PbOnlyInNotParsedCorrectly
262 *)
263 let compare_token filename1 filename2 =
264
265
266 let rec loop xs ys =
267 match xs, ys with
268 | [], [] -> None
269
270 (* UGLY, because of gcc_opt_comma isomorphism *)
271 | (Parser_c.TComma _::Parser_c.TCBrace _::xs), (Parser_c.TCBrace _::ys) ->
272 loop xs ys
273 | (Parser_c.TCBrace _::xs), (Parser_c.TComma _::Parser_c.TCBrace _::ys) ->
274 loop xs ys
275
276 | [], x::xs ->
277 Some "not same number of tokens inside C elements"
278 | x::xs, [] ->
279 Some "not same number of tokens inside C elements"
280
281 | x::xs, y::ys ->
282 let x' = normal_form_token x in
283 let y' = normal_form_token y in
284 if x' =*= y'
285 then loop xs ys
286 else
287 let str1, pos1 =
288 Token_helpers.str_of_tok x, Token_helpers.pos_of_tok x in
289 let str2, pos2 =
290 Token_helpers.str_of_tok y, Token_helpers.pos_of_tok y in
291 Some ("diff token: " ^ str1 ^" VS " ^ str2 ^ "\n" ^
292 Common.error_message filename1 (str1, pos1) ^ "\n" ^
293 Common.error_message filename2 (str2, pos2) ^ "\n"
294 )
295
296 in
297 let final_loop xs ys =
298 loop
299 (xs +> List.filter (fun x -> not (is_normal_space_or_comment x)))
300 (ys +> List.filter (fun x -> not (is_normal_space_or_comment x)))
301 in
302
303 (*
304 let toks1 = Parse_c.tokens filename1 in
305 let toks2 = Parse_c.tokens filename2 in
306 loop toks1 toks2 in
307 *)
308
309 let (c1, _stat) = Parse_c.parse_print_error_heuristic filename1 in
310 let (c2, _stat) = Parse_c.parse_print_error_heuristic filename2 in
311
312 let res =
313 if List.length c1 <> List.length c2
314 then Pb "not same number of entities (func, decl, ...)"
315 else
316 zip c1 c2 +> Common.fold_k (fun acc ((a,infoa),(b,infob)) k ->
317 match a, b with
318 | NotParsedCorrectly a, NotParsedCorrectly b ->
319 (match final_loop (snd infoa) (snd infob) with
320 | None -> k acc
321 | Some s -> PbOnlyInNotParsedCorrectly s
322 )
323
324 | NotParsedCorrectly a, _ ->
325 Pb "PB parsing only in generated-file"
326 | _, NotParsedCorrectly b ->
327 PbOnlyInNotParsedCorrectly "PB parsing only in expected-file"
328 | _, _ ->
329 (match final_loop (snd infoa) (snd infob) with
330 | None -> k acc
331 | Some s -> Pb s
332 )
333 ) (fun acc -> acc)
334 (Correct)
335 in
336
337 let xs =
338 match !Flag_parsing_c.diff_lines with
339 None ->
340 Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2)
341 | Some n ->
342 Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in
343
344 (* get rid of the --- and +++ lines *)
345 let xs =
346 if null xs
347 then xs
348 else Common.drop 2 xs
349 in
350
351 if null xs && (res <> Correct)
352 then failwith
353 "Impossible: How can diff be null and have not Correct in compare_c?";
354
355 res, xs
356
357
358
359
360 (*****************************************************************************)
361
362 let compare_default = compare_token
363
364
365 let compare_result_to_string (correct, diffxs) =
366 match correct with
367 | Correct ->
368 "seems correct" ^ "\n"
369 | Pb s ->
370 ("seems incorrect: " ^ s) ^ "\n" ^
371 "diff (result(-) vs expected_result(+)) = " ^ "\n" ^
372 (diffxs +> Common.join "\n") ^ "\n"
373 | PbOnlyInNotParsedCorrectly s ->
374 "seems incorrect, but only because of code that was not parsable" ^ "\n"^
375 ("explanation:" ^ s) ^ "\n" ^
376 "diff (result(-) vs expected_result(+)) = " ^ "\n" ^
377 (diffxs +> Common.join "\n") ^ "\n"
378
379
380 let compare_result_to_bool correct =
381 correct =*= Correct