permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / compare_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2006, 2007 Ecole des Mines de Nantes
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15 open Common
16
17 open Ast_c
18
19
20 type compare_result =
21 | Correct
22 | Pb of string
23 | PbOnlyInNotParsedCorrectly of string
24
25
26 (*****************************************************************************)
27 (* Normalise before comparing *)
28 (*****************************************************************************)
29
30 (* List taken from CVS manual, 'Keyword substitution' chapter. Note
31 * that I do not put "Log" because it is used only in comment, and it
32 * is not enough to substituate until the end of the line. *)
33 let cvs_keyword_list = [
34 "Id";"Date"; "Revision"; (* the common one *)
35 "FreeBSD";"Heimdal";"KAME";"NetBSD";"OpenBSD";"OpenLDAP";"RuOBSD";
36 "SourceForge";
37 "Name";"Author";"CVSHeader";"Header";"Locker";"RCSfile";"Source";"State";
38 "Rev";
39 ]
40
41 (* Can also have just dollarIDdollar but it is only when you have not
42 * yet committed the file. After the commit it would be a dollarIddollar:.
43 * If reput Id:, do not join the regexp!! otherwise CVS will modify it :)
44 *)
45 let cvs_keyword_regexp = Str.regexp
46 ("\\$\\([A-Za-z_]+\\):[^\\$]*\\$")
47
48
49 let cvs_compute_newstr s =
50 Str.global_substitute cvs_keyword_regexp (fun _s ->
51 let substr = Str.matched_string s in
52 assert (substr ==~ cvs_keyword_regexp); (* use its side-effect *)
53 let tag = matched1 substr in
54
55 if not (List.mem tag cvs_keyword_list)
56 then pr2_once ("unknown CVS keyword: " ^ tag);
57
58 "CVS_MAGIC_STRING"
59 ) s
60
61
62
63
64 (* todo: get rid of the type for expressions ? *)
65 let normal_form_program xs =
66 let bigf = { Visitor_c.default_visitor_c_s with
67
68 Visitor_c.kini_s = (fun (k,bigf) ini ->
69 match ini with
70 | InitList xs, [i1;i2;iicommaopt] ->
71 k (InitList xs, [i1;i2])
72 | _ -> k ini
73 );
74 Visitor_c.kexpr_s = (fun (k,bigf) e ->
75 match e with
76 (* todo: should also do something for multistrings *)
77 | (Constant (String (s,kind)), typ), [ii]
78 when Common.string_match_substring cvs_keyword_regexp s ->
79 let newstr = cvs_compute_newstr s in
80 (Constant (String (newstr,kind)), typ), [rewrap_str newstr ii]
81 | _ -> k e
82
83 );
84 Visitor_c.ktoplevel_s = (fun (k,bigf) p ->
85 match p with
86 | CppTop (Define _) ->
87 raise Todo
88 (*
89 let (i1, i2, i3) = Common.tuple_of_list3 ii in
90 if Common.string_match_substring cvs_keyword_regexp body
91 then
92 let newstr = cvs_compute_newstr body in
93 Define ((s, newstr), [i1;i2;rewrap_str newstr i3])
94 else p
95 *)
96 | _ -> k p
97 );
98
99 (*
100 Visitor_c.kinfo_s = (fun (k,bigf) i ->
101 let s = Ast_c.get_str_of_info i in
102 if Common.string_match_substring cvs_keyword_regexp s
103 then
104 let newstr = cvs_compute_newstr s in
105 rewrap_str newstr i
106 else i
107 );
108 *)
109
110 }
111 in
112 xs +> List.map (fun p -> Visitor_c.vk_toplevel_s bigf p)
113
114
115
116
117
118
119 let normal_form_token adjust_cvs x =
120 let x' =
121 match x with
122 | Parser_c.TString ((s, kind),i1) -> Parser_c.TString (("",kind), i1)
123 | x -> x
124 in
125 x' +> Token_helpers.visitor_info_of_tok (fun info ->
126 let info = Ast_c.al_info 0 info in
127 let str = Ast_c.str_of_info info in
128 if adjust_cvs && Common.string_match_substring cvs_keyword_regexp str
129 then
130 let newstr = cvs_compute_newstr str in
131 rewrap_str newstr info
132 else info
133 )
134
135
136 (*****************************************************************************)
137 (* Compare at Ast level *)
138 (*****************************************************************************)
139
140 (* Note that I do a (simple) astdiff to know if there is a difference, but
141 * then I use diff to print the differences. So sometimes you have to dig
142 * a little to find really where the real difference (one not involving
143 * just spacing difference) was.
144 * Note also that the astdiff is not very accurate. As I skip comments,
145 * macro definitions, those are not in the Ast and if there is a diff
146 * between 2 files regarding macro def, then I will not be able to report it :(
147 * update: I now put the toplevel #define at least in the Ast.
148 * update: You can use token_compare for more precise diff.
149 *
150 * todo?: finer grain astdiff, better report, more precise.
151 *
152 * todo: do iso between if() S and if() { S }
153 *)
154 let compare_ast filename1 filename2 =
155
156 let xs =
157 match !Flag_parsing_c.diff_lines with
158 None ->
159 Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2)
160 | Some n ->
161 Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in
162
163 (* get rid of the --- and +++ lines *)
164 let xs =
165 if null xs
166 then xs
167 else Common.drop 2 xs
168 in
169
170
171 let process_filename filename =
172 let (c, _stat) = Parse_c.parse_c_and_cpp filename in
173 let c = List.map fst c in
174 c +> Lib_parsing_c.al_program +> normal_form_program
175 in
176
177 let c1 = process_filename filename1 in
178 let c2 = process_filename filename2 in
179
180 let error = ref 0 in
181 let pb_notparsed = ref 0 in
182
183 let res =
184 if List.length c1 <> List.length c2
185 then Pb "not same number of entities (func, decl, ...)"
186 else
187 begin
188 let rec check = function
189 | Declaration a, Declaration b -> if not (a =*= b) then incr error
190 | Definition a, Definition b -> if not (a =*= b) then incr error
191 | EmptyDef a, EmptyDef b -> if not (a =*= b) then incr error
192 | MacroTop (a1,b1,c1), MacroTop (a2,b2,c2) ->
193 if not ((a1,b1,c1) =*= (a2,b2,c2)) then incr error
194 | CppTop (Include {i_include = a}), CppTop (Include {i_include = b}) ->
195 if not (a =*= b) then incr error
196 | CppTop Define _, CppTop Define _ ->
197 raise Todo
198 (* if not (a =*= b) then incr error *)
199 | NotParsedCorrectly a, NotParsedCorrectly b ->
200 if not (a =*= b) then incr pb_notparsed
201 | NotParsedCorrectly a, _ ->
202 (* Pb only in generated file *)
203 incr error;
204
205 | _, NotParsedCorrectly b ->
206 incr pb_notparsed
207 | FinalDef a, FinalDef b -> if not (a =*= b) then incr error
208
209 | IfdefTop a, IfdefTop b -> if not (a =*= b) then incr error
210
211 | Namespace (tlsa, iia), Namespace (tlsb, iib) ->
212 if not (iia =*= iib) then incr error;
213 zip tlsa tlsb +> List.iter check
214 | (FinalDef _|EmptyDef _|
215 MacroTop (_, _, _)|IfdefTop _|
216 CppTop _|Definition _|Declaration _|Namespace _), _ -> incr error
217 in
218 zip c1 c2 +> List.iter check;
219 (match () with
220 | _ when !pb_notparsed > 0 && !error =|= 0 ->
221 PbOnlyInNotParsedCorrectly ""
222 | _ when !error > 0 -> Pb ""
223 | _ -> Correct
224 )
225 end
226 in
227 res, xs
228
229
230
231 (*****************************************************************************)
232 (* Compare at token level *)
233 (*****************************************************************************)
234
235 (* Because I now commentize more in parsing, with parsing_hacks,
236 * compare_ast may say that 2 programs are equal whereas they are not.
237 * Here I compare token, and so have still the TCommentCpp and TCommentMisc
238 * so at least detect such differences.
239 *
240 * Morover compare_ast is not very precise in his report when it
241 * detects a difference. So token_diff is better.
242 *
243 * I do token_diff but I use programCelement2, so that
244 * I know if I am in a "notparsable" zone. The tokens are
245 * in (snd programCelement2).
246 *
247 * Faire aussi un compare_token qui se moque des TCommentMisc,
248 * TCommentCPP et TIfdef ? Normalement si fait ca retrouvera
249 * les meme resultats que compare_ast.
250 *
251 *)
252
253
254 (* Pass only "true" comments, dont pass TCommentMisc and TCommentCpp *)
255 let is_normal_space_or_comment to_expected = function
256 | Parser_c.TCommentSpace _
257 | Parser_c.TCommentNewline _
258
259 (* | Parser_c.TComma _ *) (* UGLY, because of gcc_opt_comma isomorphism *)
260 -> true
261 | Parser_c.TComment _ -> to_expected (* only ignore in compare to expected *)
262 | _ -> false
263
264
265 (* convetion: compare_token generated_file expected_res
266 * because when there is a notparsablezone in generated_file, I
267 * don't issue a PbOnlyInNotParsedCorrectly
268 *)
269 let do_compare_token adjust_cvs to_expected filename1 filename2 =
270
271 let rec loop xs ys =
272 match xs, ys with
273 | [], [] -> None
274
275 (* UGLY, because of gcc_opt_comma isomorphism *)
276 | (Parser_c.TComma _::Parser_c.TCBrace _::xs), (Parser_c.TCBrace _::ys) ->
277 loop xs ys
278 | (Parser_c.TCBrace _::xs), (Parser_c.TComma _::Parser_c.TCBrace _::ys) ->
279 loop xs ys
280
281 | [], x::xs ->
282 Some "not same number of tokens inside C elements"
283 | x::xs, [] ->
284 Some "not same number of tokens inside C elements"
285
286 | x::xs, y::ys ->
287 let x' = normal_form_token adjust_cvs x in
288 let y' = normal_form_token adjust_cvs y in
289 if x' =*= y'
290 then loop xs ys
291 else
292 let str1, pos1 =
293 Token_helpers.str_of_tok x, Token_helpers.pos_of_tok x in
294 let str2, pos2 =
295 Token_helpers.str_of_tok y, Token_helpers.pos_of_tok y in
296 Some ("diff token: " ^ str1 ^" VS " ^ str2 ^ "\n" ^
297 Common.error_message filename1 (str1, pos1) ^ "\n" ^
298 Common.error_message filename2 (str2, pos2) ^ "\n"
299 )
300
301 in
302 let final_loop xs ys =
303 loop
304 (xs +>
305 List.filter (fun x -> not (is_normal_space_or_comment to_expected x)))
306 (ys +>
307 List.filter (fun x -> not (is_normal_space_or_comment to_expected x)))
308 in
309
310 (*
311 let toks1 = Parse_c.tokens filename1 in
312 let toks2 = Parse_c.tokens filename2 in
313 loop toks1 toks2 in
314 *)
315
316 let (c1, _stat) = Parse_c.parse_c_and_cpp filename1 in
317 let (c2, _stat) = Parse_c.parse_c_and_cpp filename2 in
318
319 let res =
320 if List.length c1 <> List.length c2
321 then Pb "not same number of entities (func, decl, ...)"
322 else
323 zip c1 c2 +> Common.fold_k (fun acc ((a,infoa),(b,infob)) k ->
324 match a, b with
325 | NotParsedCorrectly a, NotParsedCorrectly b ->
326 (match final_loop (snd infoa) (snd infob) with
327 | None -> k acc
328 | Some s -> PbOnlyInNotParsedCorrectly s
329 )
330
331 | NotParsedCorrectly a, _ ->
332 Pb "PB parsing only in generated-file"
333 | _, NotParsedCorrectly b ->
334 PbOnlyInNotParsedCorrectly "PB parsing only in expected-file"
335 | _, _ ->
336 (match final_loop (snd infoa) (snd infob) with
337 | None -> k acc
338 | Some s -> Pb s
339 )
340 ) (fun acc -> acc)
341 (Correct)
342 in
343
344 let xs =
345 match !Flag_parsing_c.diff_lines with
346 None ->
347 Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2)
348 | Some n ->
349 Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in
350
351 (* get rid of the --- and +++ lines *)
352 let xs =
353 if null xs
354 then xs
355 else Common.drop 2 xs
356 in
357
358 if null xs && (res <> Correct)
359 then failwith
360 "Impossible: How can diff be null and have not Correct in compare_c?";
361
362 res, xs
363
364 let compare_token = do_compare_token true true
365
366
367 (*****************************************************************************)
368
369 (* compare to a res file *)
370 let compare_default = do_compare_token true true
371
372 (* compare to the source of the transformation *)
373 let compare_to_original = do_compare_token false false
374
375
376 let compare_result_to_string (correct, diffxs) =
377 match correct with
378 | Correct ->
379 "seems correct" ^ "\n"
380 | Pb s ->
381 ("seems incorrect: " ^ s) ^ "\n" ^
382 "diff (result(-) vs expected_result(+)) = " ^ "\n" ^
383 (diffxs +> Common.join "\n") ^ "\n"
384 | PbOnlyInNotParsedCorrectly s ->
385 "seems incorrect, but only because of code that was not parsable" ^ "\n"^
386 ("explanation:" ^ s) ^ "\n" ^
387 "diff (result(-) vs expected_result(+)) = " ^ "\n" ^
388 (diffxs +> Common.join "\n") ^ "\n"
389
390
391 let compare_result_to_bool correct =
392 correct =*= Correct