Release coccinelle-0.1.2
[bpt/coccinelle.git] / parsing_c / compare_c.ml
1 open Common
2
3 open Ast_c
4
5
6 type compare_result =
7 | Correct
8 | Pb of string
9 | PbOnlyInNotParsedCorrectly of string
10
11
12 (*****************************************************************************)
13 (* Normalise before comparing *)
14 (*****************************************************************************)
15
16 (* List taken from CVS manual, 'Keyword substitution' chapter. Note
17 * that I do not put "Log" because it is used only in comment, and it
18 * is not enough to substituate until the end of the line. *)
19 let cvs_keyword_list = [
20 "Id";"Date"; "Revision"; (* the common one *)
21 "Name";"Author";"CVSHeader";"Header";"Locker";"RCSfile";"Source";"State";
22 "Rev";
23 ]
24
25 (* Can also have just dollarIDdollar but it is only when you have not
26 * yet committed the file. After the commit it would be a dollarIddollar:.
27 * If reput Id:, do not join the regexp!! otherwise CVS will modify it :)
28 *)
29 let cvs_keyword_regexp = Str.regexp
30 ("\\$\\([A-Za-z_]+\\):[^\\$]*\\$")
31
32
33 let cvs_compute_newstr s =
34 Str.global_substitute cvs_keyword_regexp (fun _s ->
35 let substr = Str.matched_string s in
36 assert (substr ==~ cvs_keyword_regexp); (* use its side-effect *)
37 let tag = matched1 substr in
38
39 if not (List.mem tag cvs_keyword_list)
40 then failwith ("unknown CVS keyword: " ^ tag);
41
42 "CVS_MAGIC_STRING"
43 ) s
44
45
46
47
48 (* todo: get rid of the type for expressions ? *)
49 let normal_form_program xs =
50 let bigf = { Visitor_c.default_visitor_c_s with
51
52 Visitor_c.kini_s = (fun (k,bigf) ini ->
53 match ini with
54 | InitList xs, [i1;i2;iicommaopt] ->
55 k (InitList xs, [i1;i2])
56 | _ -> k ini
57 );
58 Visitor_c.kexpr_s = (fun (k,bigf) e ->
59 match e with
60 (* todo: should also do something for multistrings *)
61 | (Constant (String (s,kind)), typ), [ii]
62 when Common.string_match_substring cvs_keyword_regexp s ->
63 let newstr = cvs_compute_newstr s in
64 (Constant (String (newstr,kind)), typ), [rewrap_str newstr ii]
65 | _ -> k e
66
67 );
68 Visitor_c.ktoplevel_s = (fun (k,bigf) p ->
69 match p with
70 | CppTop (Define _) ->
71 raise Todo
72 (*
73 let (i1, i2, i3) = Common.tuple_of_list3 ii in
74 if Common.string_match_substring cvs_keyword_regexp body
75 then
76 let newstr = cvs_compute_newstr body in
77 Define ((s, newstr), [i1;i2;rewrap_str newstr i3])
78 else p
79 *)
80 | _ -> k p
81 );
82
83 (*
84 Visitor_c.kinfo_s = (fun (k,bigf) i ->
85 let s = Ast_c.get_str_of_info i in
86 if Common.string_match_substring cvs_keyword_regexp s
87 then
88 let newstr = cvs_compute_newstr s in
89 rewrap_str newstr i
90 else i
91 );
92 *)
93
94 }
95 in
96 xs +> List.map (fun p -> Visitor_c.vk_toplevel_s bigf p)
97
98
99
100
101
102
103 let normal_form_token x =
104 let x' =
105 match x with
106 | Parser_c.TString ((s, kind),i1) -> Parser_c.TString (("",kind), i1)
107 | x -> x
108 in
109 x' +> Token_helpers.visitor_info_of_tok (fun info ->
110 let info = Ast_c.al_info 0 info in
111 let str = Ast_c.str_of_info info in
112 if Common.string_match_substring cvs_keyword_regexp str
113 then
114 let newstr = cvs_compute_newstr str in
115 rewrap_str newstr info
116 else info
117 )
118
119
120 (*****************************************************************************)
121 (* Compare at Ast level *)
122 (*****************************************************************************)
123
124 (* Note that I do a (simple) astdiff to know if there is a difference, but
125 * then I use diff to print the differences. So sometimes you have to dig
126 * a little to find really where the real difference (one not involving
127 * just spacing difference) was.
128 * Note also that the astdiff is not very accurate. As I skip comments,
129 * macro definitions, those are not in the Ast and if there is a diff
130 * between 2 files regarding macro def, then I will not be able to report it :(
131 * update: I now put the toplevel #define at least in the Ast.
132 * update: You can use token_compare for more precise diff.
133 *
134 * todo?: finer grain astdiff, better report, more precise.
135 *
136 * todo: do iso between if() S and if() { S }
137 *)
138 let compare_ast filename1 filename2 =
139
140 let xs =
141 match !Flag_parsing_c.diff_lines with
142 None ->
143 Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2)
144 | Some n ->
145 Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in
146
147 (* get rid of the --- and +++ lines *)
148 let xs =
149 if null xs
150 then xs
151 else Common.drop 2 xs
152 in
153
154
155 let process_filename filename =
156 let (c, _stat) = Parse_c.parse_print_error_heuristic filename in
157 let c = List.map fst c in
158 c +> Lib_parsing_c.al_program +> normal_form_program
159 in
160
161 let c1 = process_filename filename1 in
162 let c2 = process_filename filename2 in
163
164 let error = ref 0 in
165 let pb_notparsed = ref 0 in
166
167 let res =
168 if List.length c1 <> List.length c2
169 then Pb "not same number of entities (func, decl, ...)"
170 else
171 begin
172 zip c1 c2 +> List.iter (function
173 | Declaration a, Declaration b -> if not (a =*= b) then incr error
174 | Definition a, Definition b -> if not (a =*= b) then incr error
175 | EmptyDef a, EmptyDef b -> if not (a =*= b) then incr error
176 | MacroTop (a1,b1,c1), MacroTop (a2,b2,c2) ->
177 if not ((a1,b1,c1) =*= (a2,b2,c2)) then incr error
178 | CppTop (Include {i_include = a}), CppTop (Include {i_include = b}) ->
179 if not (a =*= b) then incr error
180 | CppTop Define _, CppTop Define _ ->
181 raise Todo
182 (* if not (a =*= b) then incr error *)
183 | NotParsedCorrectly a, NotParsedCorrectly b ->
184 if not (a =*= b) then incr pb_notparsed
185 | NotParsedCorrectly a, _ ->
186 (* Pb only in generated file *)
187 incr error;
188
189 | _, NotParsedCorrectly b ->
190 incr pb_notparsed
191 | FinalDef a, FinalDef b -> if not (a =*= b) then incr error
192
193 | IfdefTop a, IfdefTop b -> if not (a =*= b) then incr error
194
195 | (FinalDef _|EmptyDef _|
196 MacroTop (_, _, _)|IfdefTop _|
197 CppTop _|Definition _|Declaration _), _ -> incr error
198
199 );
200 (match () with
201 | _ when !pb_notparsed > 0 && !error = 0 ->
202 PbOnlyInNotParsedCorrectly ""
203 | _ when !error > 0 -> Pb ""
204 | _ -> Correct
205 )
206 end
207 in
208 res, xs
209
210
211
212 (*****************************************************************************)
213 (* Compare at token level *)
214 (*****************************************************************************)
215
216 (* Because I now commentize more in parsing, with parsing_hacks,
217 * compare_ast may say that 2 programs are equal whereas they are not.
218 * Here I compare token, and so have still the TCommentCpp and TCommentMisc
219 * so at least detect such differences.
220 *
221 * Morover compare_ast is not very precise in his report when it
222 * detects a difference. So token_diff is better.
223 *
224 * I do token_diff but I use programCelement2, so that
225 * I know if I am in a "notparsable" zone. The tokens are
226 * in (snd programCelement2).
227 *
228 * Faire aussi un compare_token qui se moque des TCommentMisc,
229 * TCommentCPP et TIfdef ? Normalement si fait ca retrouvera
230 * les meme resultats que compare_ast.
231 *
232 *)
233
234
235 (* Pass only "true" comments, dont pass TCommentMisc and TCommentCpp *)
236 let is_normal_space_or_comment = function
237 | Parser_c.TComment _
238 | Parser_c.TCommentSpace _
239 | Parser_c.TCommentNewline _
240
241 (* | Parser_c.TComma _ *) (* UGLY, because of gcc_opt_comma isomorphism *)
242 -> true
243 | _ -> false
244
245
246 (* convetion: compare_token generated_file expected_res
247 * because when there is a notparsablezone in generated_file, I
248 * don't issue a PbOnlyInNotParsedCorrectly
249 *)
250 let compare_token filename1 filename2 =
251
252
253 let rec loop xs ys =
254 match xs, ys with
255 | [], [] -> None
256
257 (* UGLY, because of gcc_opt_comma isomorphism *)
258 | (Parser_c.TComma _::Parser_c.TCBrace _::xs), (Parser_c.TCBrace _::ys) ->
259 loop xs ys
260 | (Parser_c.TCBrace _::xs), (Parser_c.TComma _::Parser_c.TCBrace _::ys) ->
261 loop xs ys
262
263 | [], x::xs ->
264 Some "not same number of tokens inside C elements"
265 | x::xs, [] ->
266 Some "not same number of tokens inside C elements"
267
268 | x::xs, y::ys ->
269 let x' = normal_form_token x in
270 let y' = normal_form_token y in
271 if x' = y'
272 then loop xs ys
273 else
274 let str1, pos1 =
275 Token_helpers.str_of_tok x, Token_helpers.pos_of_tok x in
276 let str2, pos2 =
277 Token_helpers.str_of_tok y, Token_helpers.pos_of_tok y in
278 Some ("diff token: " ^ str1 ^" VS " ^ str2 ^ "\n" ^
279 Common.error_message filename1 (str1, pos1) ^ "\n" ^
280 Common.error_message filename2 (str2, pos2) ^ "\n"
281 )
282
283 in
284 let final_loop xs ys =
285 loop
286 (xs +> List.filter (fun x -> not (is_normal_space_or_comment x)))
287 (ys +> List.filter (fun x -> not (is_normal_space_or_comment x)))
288 in
289
290 (*
291 let toks1 = Parse_c.tokens filename1 in
292 let toks2 = Parse_c.tokens filename2 in
293 loop toks1 toks2 in
294 *)
295
296 let (c1, _stat) = Parse_c.parse_print_error_heuristic filename1 in
297 let (c2, _stat) = Parse_c.parse_print_error_heuristic filename2 in
298
299 let res =
300 if List.length c1 <> List.length c2
301 then Pb "not same number of entities (func, decl, ...)"
302 else
303 zip c1 c2 +> Common.fold_k (fun acc ((a,infoa),(b,infob)) k ->
304 match a, b with
305 | NotParsedCorrectly a, NotParsedCorrectly b ->
306 (match final_loop (snd infoa) (snd infob) with
307 | None -> k acc
308 | Some s -> PbOnlyInNotParsedCorrectly s
309 )
310
311 | NotParsedCorrectly a, _ ->
312 Pb "PB parsing only in generated-file"
313 | _, NotParsedCorrectly b ->
314 PbOnlyInNotParsedCorrectly "PB parsing only in expected-file"
315 | _, _ ->
316 (match final_loop (snd infoa) (snd infob) with
317 | None -> k acc
318 | Some s -> Pb s
319 )
320 ) (fun acc -> acc)
321 (Correct)
322 in
323
324 let xs =
325 match !Flag_parsing_c.diff_lines with
326 None ->
327 Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2)
328 | Some n ->
329 Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in
330
331 (* get rid of the --- and +++ lines *)
332 let xs =
333 if null xs
334 then xs
335 else Common.drop 2 xs
336 in
337
338 if null xs && (res <> Correct)
339 then failwith
340 "Impossible: How can diff be null and have not Correct in compare_c?";
341
342 res, xs
343
344
345
346
347 (*****************************************************************************)
348
349 let compare_default = compare_token
350
351
352 let compare_result_to_string (correct, diffxs) =
353 match correct with
354 | Correct ->
355 "seems correct" ^ "\n"
356 | Pb s ->
357 ("seems incorrect: " ^ s) ^ "\n" ^
358 "diff (result(-) vs expected_result(+)) = " ^ "\n" ^
359 (diffxs +> Common.join "\n") ^ "\n"
360 | PbOnlyInNotParsedCorrectly s ->
361 "seems incorrect, but only because of code that was not parsable" ^ "\n"^
362 ("explanation:" ^ s) ^ "\n" ^
363 "diff (result(-) vs expected_result(+)) = " ^ "\n" ^
364 (diffxs +> Common.join "\n") ^ "\n"
365
366
367 let compare_result_to_bool correct =
368 correct = Correct