Release coccinelle-0.1
[bpt/coccinelle.git] / parsing_c / compare_c.ml
1 open Common
2
3 open Ast_c
4
5
6 type compare_result =
7 | Correct
8 | Pb of string
9 | PbOnlyInNotParsedCorrectly of string
10
11
12 (*****************************************************************************)
13 (* Normalise before comparing *)
14 (*****************************************************************************)
15
16 (* List taken from CVS manual, 'Keyword substitution' chapter. Note
17 * that I do not put "Log" because it is used only in comment, and it
18 * is not enough to substituate until the end of the line. *)
19 let cvs_keyword_list = [
20 "Id";"Date"; "Revision"; (* the common one *)
21 "Name";"Author";"CVSHeader";"Header";"Locker";"RCSfile";"Source";"State";
22 "Rev";
23 ]
24
25 (* Can also have just dollarIDdollar but it is only when you have not
26 * yet committed the file. After the commit it would be a dollarIddollar:.
27 * If reput Id:, do not join the regexp!! otherwise CVS will modify it :)
28 *)
29 let cvs_keyword_regexp = Str.regexp
30 ("\\$\\([A-Za-z_]+\\):[^\\$]*\\$")
31
32
33 let cvs_compute_newstr s =
34 Str.global_substitute cvs_keyword_regexp (fun _s ->
35 let substr = Str.matched_string s in
36 assert (substr ==~ cvs_keyword_regexp); (* use its side-effect *)
37 let tag = matched1 substr in
38
39 if not (List.mem tag cvs_keyword_list)
40 then failwith ("unknown CVS keyword: " ^ tag);
41
42 "CVS_MAGIC_STRING"
43 ) s
44
45
46
47
48 (* todo: get rid of the type for expressions ? *)
49 let normal_form_program xs =
50 let bigf = { Visitor_c.default_visitor_c_s with
51
52 Visitor_c.kini_s = (fun (k,bigf) ini ->
53 match ini with
54 | InitList xs, [i1;i2;iicommaopt] ->
55 k (InitList xs, [i1;i2])
56 | _ -> k ini
57 );
58 Visitor_c.kexpr_s = (fun (k,bigf) e ->
59 match e with
60 (* todo: should also do something for multistrings *)
61 | (Constant (String (s,kind)), typ), [ii]
62 when Common.string_match_substring cvs_keyword_regexp s ->
63 let newstr = cvs_compute_newstr s in
64 (Constant (String (newstr,kind)), typ), [rewrap_str newstr ii]
65 | _ -> k e
66
67 );
68 Visitor_c.ktoplevel_s = (fun (k,bigf) p ->
69 match p with
70 | Define _ ->
71 raise Todo
72 (*
73 let (i1, i2, i3) = Common.tuple_of_list3 ii in
74 if Common.string_match_substring cvs_keyword_regexp body
75 then
76 let newstr = cvs_compute_newstr body in
77 Define ((s, newstr), [i1;i2;rewrap_str newstr i3])
78 else p
79 *)
80 | _ -> k p
81 );
82
83 (*
84 Visitor_c.kinfo_s = (fun (k,bigf) i ->
85 let s = Ast_c.get_str_of_info i in
86 if Common.string_match_substring cvs_keyword_regexp s
87 then
88 let newstr = cvs_compute_newstr s in
89 rewrap_str newstr i
90 else i
91 );
92 *)
93
94 }
95 in
96 xs +> List.map (fun p -> Visitor_c.vk_toplevel_s bigf p)
97
98
99
100
101
102
103 let normal_form_token x =
104 let x' =
105 match x with
106 | Parser_c.TString ((s, kind),i1) -> Parser_c.TString (("",kind), i1)
107 | x -> x
108 in
109 x' +> Token_helpers.visitor_info_of_tok (fun info ->
110 let info = Ast_c.al_info 0 info in
111 let str = Ast_c.str_of_info info in
112 if Common.string_match_substring cvs_keyword_regexp str
113 then
114 let newstr = cvs_compute_newstr str in
115 rewrap_str newstr info
116 else info
117 )
118
119
120 (*****************************************************************************)
121 (* Compare at Ast level *)
122 (*****************************************************************************)
123
124 (* Note that I do a (simple) astdiff to know if there is a difference, but
125 * then I use diff to print the differences. So sometimes you have to dig
126 * a little to find really where the real difference (one not involving
127 * just spacing difference) was.
128 * Note also that the astdiff is not very accurate. As I skip comments,
129 * macro definitions, those are not in the Ast and if there is a diff
130 * between 2 files regarding macro def, then I will not be able to report it :(
131 * update: I now put the toplevel #define at least in the Ast.
132 * update: You can use token_compare for more precise diff.
133 *
134 * todo?: finer grain astdiff, better report, more precise.
135 *
136 * todo: do iso between if() S and if() { S }
137 *)
138 let compare_ast filename1 filename2 =
139
140 let xs =
141 match !Flag_parsing_c.diff_lines with
142 None ->
143 Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2)
144 | Some n ->
145 Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in
146
147 (* get rid of the --- and +++ lines *)
148 let xs =
149 if null xs
150 then xs
151 else Common.drop 2 xs
152 in
153
154
155 let process_filename filename =
156 let (c, _stat) = Parse_c.parse_print_error_heuristic filename in
157 let c = List.map fst c in
158 c +> Lib_parsing_c.al_program +> normal_form_program
159 in
160
161 let c1 = process_filename filename1 in
162 let c2 = process_filename filename2 in
163
164 let error = ref 0 in
165 let pb_notparsed = ref 0 in
166
167 let res =
168 if List.length c1 <> List.length c2
169 then Pb "not same number of entities (func, decl, ...)"
170 else
171 begin
172 zip c1 c2 +> List.iter (function
173 | Declaration a, Declaration b -> if not (a =*= b) then incr error
174 | Definition a, Definition b -> if not (a =*= b) then incr error
175 | EmptyDef a, EmptyDef b -> if not (a =*= b) then incr error
176 | MacroTop (a1,b1,c1), MacroTop (a2,b2,c2) ->
177 if not ((a1,b1,c1) =*= (a2,b2,c2)) then incr error
178 | Include (a,_), Include (b,_) -> if not (a =*= b) then incr error
179 | Define _, Define _ ->
180 raise Todo
181 (* if not (a =*= b) then incr error *)
182 | NotParsedCorrectly a, NotParsedCorrectly b ->
183 if not (a =*= b) then incr pb_notparsed
184 | NotParsedCorrectly a, _ ->
185 (* Pb only in generated file *)
186 incr error;
187
188 | _, NotParsedCorrectly b ->
189 incr pb_notparsed
190 | FinalDef a, FinalDef b -> if not (a =*= b) then incr error
191 | _, _ -> incr error
192 );
193 (match () with
194 | _ when !pb_notparsed > 0 && !error = 0 ->
195 PbOnlyInNotParsedCorrectly ""
196 | _ when !error > 0 -> Pb ""
197 | _ -> Correct
198 )
199 end
200 in
201 res, xs
202
203
204
205 (*****************************************************************************)
206 (* Compare at token level *)
207 (*****************************************************************************)
208
209 (* Because I now commentize more in parsing, with parsing_hacks,
210 * compare_ast may say that 2 programs are equal whereas they are not.
211 * Here I compare token, and so have still the TCommentCpp and TCommentMisc
212 * so at least detect such differences.
213 *
214 * Morover compare_ast is not very precise in his report when it
215 * detects a difference. So token_diff is better.
216 *
217 * I do token_diff but I use programCelement2, so that
218 * I know if I am in a "notparsable" zone. The tokens are
219 * in (snd programCelement2).
220 *
221 * Faire aussi un compare_token qui se moque des TCommentMisc,
222 * TCommentCPP et TIfdef ? Normalement si fait ca retrouvera
223 * les meme resultats que compare_ast.
224 *
225 *)
226
227
228 (* Pass only "true" comments, dont pass TCommentMisc and TCommentCpp *)
229 let is_normal_space_or_comment = function
230 | Parser_c.TComment _
231 | Parser_c.TCommentSpace _
232 | Parser_c.TCommentNewline _
233
234 (* | Parser_c.TComma _ *) (* UGLY, because of gcc_opt_comma isomorphism *)
235 -> true
236 | _ -> false
237
238
239 (* convetion: compare_token generated_file expected_res
240 * because when there is a notparsablezone in generated_file, I
241 * don't issue a PbOnlyInNotParsedCorrectly
242 *)
243 let compare_token filename1 filename2 =
244
245
246 let rec loop xs ys =
247 match xs, ys with
248 | [], [] -> None
249
250 (* UGLY, because of gcc_opt_comma isomorphism *)
251 | (Parser_c.TComma _::Parser_c.TCBrace _::xs), (Parser_c.TCBrace _::ys) ->
252 loop xs ys
253 | (Parser_c.TCBrace _::xs), (Parser_c.TComma _::Parser_c.TCBrace _::ys) ->
254 loop xs ys
255
256 | [], x::xs ->
257 Some "not same number of tokens inside C elements"
258 | x::xs, [] ->
259 Some "not same number of tokens inside C elements"
260
261 | x::xs, y::ys ->
262 let x' = normal_form_token x in
263 let y' = normal_form_token y in
264 if x' = y'
265 then loop xs ys
266 else
267 let str1, pos1 =
268 Token_helpers.str_of_tok x, Token_helpers.pos_of_tok x in
269 let str2, pos2 =
270 Token_helpers.str_of_tok y, Token_helpers.pos_of_tok y in
271 Some ("diff token: " ^ str1 ^" VS " ^ str2 ^ "\n" ^
272 Common.error_message filename1 (str1, pos1) ^ "\n" ^
273 Common.error_message filename2 (str2, pos2) ^ "\n"
274 )
275
276 in
277 let final_loop xs ys =
278 loop
279 (xs +> List.filter (fun x -> not (is_normal_space_or_comment x)))
280 (ys +> List.filter (fun x -> not (is_normal_space_or_comment x)))
281 in
282
283 (*
284 let toks1 = Parse_c.tokens filename1 in
285 let toks2 = Parse_c.tokens filename2 in
286 loop toks1 toks2 in
287 *)
288
289 let (c1, _stat) = Parse_c.parse_print_error_heuristic filename1 in
290 let (c2, _stat) = Parse_c.parse_print_error_heuristic filename2 in
291
292 let res =
293 if List.length c1 <> List.length c2
294 then Pb "not same number of entities (func, decl, ...)"
295 else
296 zip c1 c2 +> Common.fold_k (fun acc ((a,infoa),(b,infob)) k ->
297 match a, b with
298 | NotParsedCorrectly a, NotParsedCorrectly b ->
299 (match final_loop (snd infoa) (snd infob) with
300 | None -> k acc
301 | Some s -> PbOnlyInNotParsedCorrectly s
302 )
303
304 | NotParsedCorrectly a, _ ->
305 Pb "PB parsing only in generated-file"
306 | _, NotParsedCorrectly b ->
307 PbOnlyInNotParsedCorrectly "PB parsing only in expected-file"
308 | _, _ ->
309 (match final_loop (snd infoa) (snd infob) with
310 | None -> k acc
311 | Some s -> Pb s
312 )
313 ) (fun acc -> acc)
314 (Correct)
315 in
316
317 let xs =
318 match !Flag_parsing_c.diff_lines with
319 None ->
320 Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2)
321 | Some n ->
322 Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in
323
324 (* get rid of the --- and +++ lines *)
325 let xs =
326 if null xs
327 then xs
328 else Common.drop 2 xs
329 in
330
331 if null xs && (res <> Correct)
332 then failwith
333 "Impossible: How can diff be null and have not Correct in compare_c?";
334
335 res, xs
336
337
338
339
340 (*****************************************************************************)
341
342 let compare_default = compare_token
343
344
345 let compare_result_to_string (correct, diffxs) =
346 match correct with
347 | Correct ->
348 "seems correct" ^ "\n"
349 | Pb s ->
350 ("seems incorrect: " ^ s) ^ "\n" ^
351 "diff (result(-) vs expected_result(+)) = " ^ "\n" ^
352 (diffxs +> Common.join "\n") ^ "\n"
353 | PbOnlyInNotParsedCorrectly s ->
354 "seems incorrect, but only because of code that was not parsable" ^ "\n"^
355 ("explanation:" ^ s) ^ "\n" ^
356 "diff (result(-) vs expected_result(+)) = " ^ "\n" ^
357 (diffxs +> Common.join "\n") ^ "\n"
358
359
360 let compare_result_to_bool correct =
361 correct = Correct