Commit | Line | Data |
---|---|---|
0708f913 | 1 | (* Yoann Padioleau |
ae4735db C |
2 | * |
3 | * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. | |
0708f913 C |
4 | * Copyright (C) 2006, 2007 Ecole des Mines de Nantes |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License (GPL) | |
8 | * version 2 as published by the Free Software Foundation. | |
ae4735db | 9 | * |
0708f913 C |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * file license.txt for more details. | |
14 | *) | |
34e49164 C |
15 | open Common |
16 | ||
17 | open Ast_c | |
18 | ||
19 | ||
ae4735db C |
20 | type compare_result = |
21 | | Correct | |
34e49164 C |
22 | | Pb of string |
23 | | PbOnlyInNotParsedCorrectly of string | |
24 | ||
25 | ||
26 | (*****************************************************************************) | |
27 | (* Normalise before comparing *) | |
28 | (*****************************************************************************) | |
29 | ||
30 | (* List taken from CVS manual, 'Keyword substitution' chapter. Note | |
31 | * that I do not put "Log" because it is used only in comment, and it | |
32 | * is not enough to substituate until the end of the line. *) | |
33 | let cvs_keyword_list = [ | |
34 | "Id";"Date"; "Revision"; (* the common one *) | |
951c7801 C |
35 | "FreeBSD";"Heimdal";"KAME";"NetBSD";"OpenBSD";"OpenLDAP";"RuOBSD"; |
36 | "SourceForge"; | |
34e49164 C |
37 | "Name";"Author";"CVSHeader";"Header";"Locker";"RCSfile";"Source";"State"; |
38 | "Rev"; | |
39 | ] | |
40 | ||
41 | (* Can also have just dollarIDdollar but it is only when you have not | |
42 | * yet committed the file. After the commit it would be a dollarIddollar:. | |
43 | * If reput Id:, do not join the regexp!! otherwise CVS will modify it :) | |
44 | *) | |
ae4735db | 45 | let cvs_keyword_regexp = Str.regexp |
34e49164 C |
46 | ("\\$\\([A-Za-z_]+\\):[^\\$]*\\$") |
47 | ||
48 | ||
ae4735db C |
49 | let cvs_compute_newstr s = |
50 | Str.global_substitute cvs_keyword_regexp (fun _s -> | |
34e49164 C |
51 | let substr = Str.matched_string s in |
52 | assert (substr ==~ cvs_keyword_regexp); (* use its side-effect *) | |
53 | let tag = matched1 substr in | |
54 | ||
55 | if not (List.mem tag cvs_keyword_list) | |
978fd7e5 | 56 | then pr2_once ("unknown CVS keyword: " ^ tag); |
ae4735db C |
57 | |
58 | "CVS_MAGIC_STRING" | |
59 | ) s | |
34e49164 C |
60 | |
61 | ||
62 | ||
63 | ||
64 | (* todo: get rid of the type for expressions ? *) | |
ae4735db C |
65 | let normal_form_program xs = |
66 | let bigf = { Visitor_c.default_visitor_c_s with | |
34e49164 | 67 | |
ae4735db | 68 | Visitor_c.kini_s = (fun (k,bigf) ini -> |
34e49164 | 69 | match ini with |
ae4735db | 70 | | InitList xs, [i1;i2;iicommaopt] -> |
34e49164 C |
71 | k (InitList xs, [i1;i2]) |
72 | | _ -> k ini | |
73 | ); | |
ae4735db | 74 | Visitor_c.kexpr_s = (fun (k,bigf) e -> |
34e49164 C |
75 | match e with |
76 | (* todo: should also do something for multistrings *) | |
708f4980 | 77 | | (Constant (String (s,kind)), typ), [ii] |
ae4735db | 78 | when Common.string_match_substring cvs_keyword_regexp s -> |
34e49164 C |
79 | let newstr = cvs_compute_newstr s in |
80 | (Constant (String (newstr,kind)), typ), [rewrap_str newstr ii] | |
ae4735db | 81 | | _ -> k e |
34e49164 C |
82 | |
83 | ); | |
ae4735db | 84 | Visitor_c.ktoplevel_s = (fun (k,bigf) p -> |
34e49164 | 85 | match p with |
ae4735db | 86 | | CppTop (Define _) -> |
34e49164 C |
87 | raise Todo |
88 | (* | |
89 | let (i1, i2, i3) = Common.tuple_of_list3 ii in | |
90 | if Common.string_match_substring cvs_keyword_regexp body | |
ae4735db | 91 | then |
34e49164 C |
92 | let newstr = cvs_compute_newstr body in |
93 | Define ((s, newstr), [i1;i2;rewrap_str newstr i3]) | |
94 | else p | |
95 | *) | |
96 | | _ -> k p | |
97 | ); | |
98 | ||
99 | (* | |
ae4735db | 100 | Visitor_c.kinfo_s = (fun (k,bigf) i -> |
34e49164 C |
101 | let s = Ast_c.get_str_of_info i in |
102 | if Common.string_match_substring cvs_keyword_regexp s | |
ae4735db | 103 | then |
34e49164 C |
104 | let newstr = cvs_compute_newstr s in |
105 | rewrap_str newstr i | |
106 | else i | |
107 | ); | |
108 | *) | |
109 | ||
110 | } | |
111 | in | |
112 | xs +> List.map (fun p -> Visitor_c.vk_toplevel_s bigf p) | |
113 | ||
114 | ||
115 | ||
116 | ||
117 | ||
118 | ||
ae4735db C |
119 | let normal_form_token adjust_cvs x = |
120 | let x' = | |
121 | match x with | |
34e49164 C |
122 | | Parser_c.TString ((s, kind),i1) -> Parser_c.TString (("",kind), i1) |
123 | | x -> x | |
124 | in | |
ae4735db | 125 | x' +> Token_helpers.visitor_info_of_tok (fun info -> |
34e49164 C |
126 | let info = Ast_c.al_info 0 info in |
127 | let str = Ast_c.str_of_info info in | |
951c7801 | 128 | if adjust_cvs && Common.string_match_substring cvs_keyword_regexp str |
ae4735db | 129 | then |
34e49164 C |
130 | let newstr = cvs_compute_newstr str in |
131 | rewrap_str newstr info | |
132 | else info | |
133 | ) | |
134 | ||
ae4735db | 135 | |
34e49164 C |
136 | (*****************************************************************************) |
137 | (* Compare at Ast level *) | |
138 | (*****************************************************************************) | |
139 | ||
140 | (* Note that I do a (simple) astdiff to know if there is a difference, but | |
141 | * then I use diff to print the differences. So sometimes you have to dig | |
ae4735db | 142 | * a little to find really where the real difference (one not involving |
34e49164 C |
143 | * just spacing difference) was. |
144 | * Note also that the astdiff is not very accurate. As I skip comments, | |
145 | * macro definitions, those are not in the Ast and if there is a diff | |
146 | * between 2 files regarding macro def, then I will not be able to report it :( | |
147 | * update: I now put the toplevel #define at least in the Ast. | |
148 | * update: You can use token_compare for more precise diff. | |
149 | * | |
150 | * todo?: finer grain astdiff, better report, more precise. | |
ae4735db | 151 | * |
34e49164 C |
152 | * todo: do iso between if() S and if() { S } |
153 | *) | |
154 | let compare_ast filename1 filename2 = | |
155 | ||
156 | let xs = | |
157 | match !Flag_parsing_c.diff_lines with | |
158 | None -> | |
159 | Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2) | |
ae4735db | 160 | | Some n -> |
34e49164 C |
161 | Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in |
162 | ||
163 | (* get rid of the --- and +++ lines *) | |
ae4735db C |
164 | let xs = |
165 | if null xs | |
166 | then xs | |
34e49164 C |
167 | else Common.drop 2 xs |
168 | in | |
169 | ||
170 | ||
ae4735db | 171 | let process_filename filename = |
978fd7e5 | 172 | let (c, _stat) = Parse_c.parse_c_and_cpp filename in |
34e49164 C |
173 | let c = List.map fst c in |
174 | c +> Lib_parsing_c.al_program +> normal_form_program | |
175 | in | |
176 | ||
177 | let c1 = process_filename filename1 in | |
178 | let c2 = process_filename filename2 in | |
ae4735db | 179 | |
34e49164 C |
180 | let error = ref 0 in |
181 | let pb_notparsed = ref 0 in | |
ae4735db C |
182 | |
183 | let res = | |
184 | if List.length c1 <> List.length c2 | |
34e49164 | 185 | then Pb "not same number of entities (func, decl, ...)" |
ae4735db | 186 | else |
34e49164 | 187 | begin |
1b9ae606 | 188 | let rec check = function |
34e49164 C |
189 | | Declaration a, Declaration b -> if not (a =*= b) then incr error |
190 | | Definition a, Definition b -> if not (a =*= b) then incr error | |
191 | | EmptyDef a, EmptyDef b -> if not (a =*= b) then incr error | |
ae4735db | 192 | | MacroTop (a1,b1,c1), MacroTop (a2,b2,c2) -> |
34e49164 | 193 | if not ((a1,b1,c1) =*= (a2,b2,c2)) then incr error |
ae4735db | 194 | | CppTop (Include {i_include = a}), CppTop (Include {i_include = b}) -> |
485bce71 | 195 | if not (a =*= b) then incr error |
ae4735db | 196 | | CppTop Define _, CppTop Define _ -> |
34e49164 C |
197 | raise Todo |
198 | (* if not (a =*= b) then incr error *) | |
ae4735db | 199 | | NotParsedCorrectly a, NotParsedCorrectly b -> |
34e49164 | 200 | if not (a =*= b) then incr pb_notparsed |
ae4735db | 201 | | NotParsedCorrectly a, _ -> |
34e49164 C |
202 | (* Pb only in generated file *) |
203 | incr error; | |
204 | ||
ae4735db | 205 | | _, NotParsedCorrectly b -> |
34e49164 C |
206 | incr pb_notparsed |
207 | | FinalDef a, FinalDef b -> if not (a =*= b) then incr error | |
485bce71 C |
208 | |
209 | | IfdefTop a, IfdefTop b -> if not (a =*= b) then incr error | |
210 | ||
1b9ae606 C |
211 | | Namespace (tlsa, iia), Namespace (tlsb, iib) -> |
212 | if not (iia =*= iib) then incr error; | |
213 | zip tlsa tlsb +> List.iter check | |
485bce71 C |
214 | | (FinalDef _|EmptyDef _| |
215 | MacroTop (_, _, _)|IfdefTop _| | |
1b9ae606 C |
216 | CppTop _|Definition _|Declaration _|Namespace _), _ -> incr error |
217 | in | |
218 | zip c1 c2 +> List.iter check; | |
34e49164 | 219 | (match () with |
ae4735db | 220 | | _ when !pb_notparsed > 0 && !error =|= 0 -> |
34e49164 C |
221 | PbOnlyInNotParsedCorrectly "" |
222 | | _ when !error > 0 -> Pb "" | |
223 | | _ -> Correct | |
224 | ) | |
225 | end | |
226 | in | |
227 | res, xs | |
228 | ||
229 | ||
230 | ||
231 | (*****************************************************************************) | |
232 | (* Compare at token level *) | |
233 | (*****************************************************************************) | |
234 | ||
235 | (* Because I now commentize more in parsing, with parsing_hacks, | |
236 | * compare_ast may say that 2 programs are equal whereas they are not. | |
237 | * Here I compare token, and so have still the TCommentCpp and TCommentMisc | |
238 | * so at least detect such differences. | |
ae4735db | 239 | * |
34e49164 C |
240 | * Morover compare_ast is not very precise in his report when it |
241 | * detects a difference. So token_diff is better. | |
ae4735db | 242 | * |
34e49164 | 243 | * I do token_diff but I use programCelement2, so that |
ae4735db | 244 | * I know if I am in a "notparsable" zone. The tokens are |
34e49164 | 245 | * in (snd programCelement2). |
ae4735db C |
246 | * |
247 | * Faire aussi un compare_token qui se moque des TCommentMisc, | |
248 | * TCommentCPP et TIfdef ? Normalement si fait ca retrouvera | |
34e49164 | 249 | * les meme resultats que compare_ast. |
ae4735db | 250 | * |
34e49164 C |
251 | *) |
252 | ||
253 | ||
254 | (* Pass only "true" comments, dont pass TCommentMisc and TCommentCpp *) | |
5636bb2c | 255 | let is_normal_space_or_comment to_expected = function |
34e49164 C |
256 | | Parser_c.TCommentSpace _ |
257 | | Parser_c.TCommentNewline _ | |
258 | ||
259 | (* | Parser_c.TComma _ *) (* UGLY, because of gcc_opt_comma isomorphism *) | |
260 | -> true | |
5636bb2c | 261 | | Parser_c.TComment _ -> to_expected (* only ignore in compare to expected *) |
34e49164 C |
262 | | _ -> false |
263 | ||
264 | ||
ae4735db C |
265 | (* convetion: compare_token generated_file expected_res |
266 | * because when there is a notparsablezone in generated_file, I | |
34e49164 C |
267 | * don't issue a PbOnlyInNotParsedCorrectly |
268 | *) | |
5636bb2c | 269 | let do_compare_token adjust_cvs to_expected filename1 filename2 = |
34e49164 | 270 | |
ae4735db | 271 | let rec loop xs ys = |
34e49164 C |
272 | match xs, ys with |
273 | | [], [] -> None | |
274 | ||
275 | (* UGLY, because of gcc_opt_comma isomorphism *) | |
276 | | (Parser_c.TComma _::Parser_c.TCBrace _::xs), (Parser_c.TCBrace _::ys) -> | |
277 | loop xs ys | |
ae4735db | 278 | | (Parser_c.TCBrace _::xs), (Parser_c.TComma _::Parser_c.TCBrace _::ys) -> |
34e49164 C |
279 | loop xs ys |
280 | ||
ae4735db | 281 | | [], x::xs -> |
34e49164 | 282 | Some "not same number of tokens inside C elements" |
ae4735db | 283 | | x::xs, [] -> |
34e49164 | 284 | Some "not same number of tokens inside C elements" |
ae4735db C |
285 | |
286 | | x::xs, y::ys -> | |
951c7801 C |
287 | let x' = normal_form_token adjust_cvs x in |
288 | let y' = normal_form_token adjust_cvs y in | |
ae4735db | 289 | if x' =*= y' |
34e49164 | 290 | then loop xs ys |
ae4735db C |
291 | else |
292 | let str1, pos1 = | |
34e49164 | 293 | Token_helpers.str_of_tok x, Token_helpers.pos_of_tok x in |
ae4735db | 294 | let str2, pos2 = |
34e49164 C |
295 | Token_helpers.str_of_tok y, Token_helpers.pos_of_tok y in |
296 | Some ("diff token: " ^ str1 ^" VS " ^ str2 ^ "\n" ^ | |
297 | Common.error_message filename1 (str1, pos1) ^ "\n" ^ | |
298 | Common.error_message filename2 (str2, pos2) ^ "\n" | |
299 | ) | |
ae4735db | 300 | |
34e49164 | 301 | in |
ae4735db | 302 | let final_loop xs ys = |
34e49164 | 303 | loop |
5636bb2c C |
304 | (xs +> |
305 | List.filter (fun x -> not (is_normal_space_or_comment to_expected x))) | |
306 | (ys +> | |
307 | List.filter (fun x -> not (is_normal_space_or_comment to_expected x))) | |
34e49164 | 308 | in |
ae4735db | 309 | |
34e49164 C |
310 | (* |
311 | let toks1 = Parse_c.tokens filename1 in | |
312 | let toks2 = Parse_c.tokens filename2 in | |
313 | loop toks1 toks2 in | |
314 | *) | |
315 | ||
978fd7e5 C |
316 | let (c1, _stat) = Parse_c.parse_c_and_cpp filename1 in |
317 | let (c2, _stat) = Parse_c.parse_c_and_cpp filename2 in | |
34e49164 | 318 | |
ae4735db C |
319 | let res = |
320 | if List.length c1 <> List.length c2 | |
34e49164 | 321 | then Pb "not same number of entities (func, decl, ...)" |
ae4735db C |
322 | else |
323 | zip c1 c2 +> Common.fold_k (fun acc ((a,infoa),(b,infob)) k -> | |
34e49164 | 324 | match a, b with |
ae4735db | 325 | | NotParsedCorrectly a, NotParsedCorrectly b -> |
34e49164 C |
326 | (match final_loop (snd infoa) (snd infob) with |
327 | | None -> k acc | |
328 | | Some s -> PbOnlyInNotParsedCorrectly s | |
329 | ) | |
ae4735db C |
330 | |
331 | | NotParsedCorrectly a, _ -> | |
34e49164 | 332 | Pb "PB parsing only in generated-file" |
ae4735db | 333 | | _, NotParsedCorrectly b -> |
34e49164 | 334 | PbOnlyInNotParsedCorrectly "PB parsing only in expected-file" |
ae4735db | 335 | | _, _ -> |
34e49164 C |
336 | (match final_loop (snd infoa) (snd infob) with |
337 | | None -> k acc | |
338 | | Some s -> Pb s | |
339 | ) | |
340 | ) (fun acc -> acc) | |
341 | (Correct) | |
342 | in | |
343 | ||
344 | let xs = | |
345 | match !Flag_parsing_c.diff_lines with | |
346 | None -> | |
347 | Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2) | |
ae4735db | 348 | | Some n -> |
34e49164 C |
349 | Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in |
350 | ||
351 | (* get rid of the --- and +++ lines *) | |
ae4735db C |
352 | let xs = |
353 | if null xs | |
354 | then xs | |
34e49164 C |
355 | else Common.drop 2 xs |
356 | in | |
357 | ||
ae4735db C |
358 | if null xs && (res <> Correct) |
359 | then failwith | |
34e49164 C |
360 | "Impossible: How can diff be null and have not Correct in compare_c?"; |
361 | ||
362 | res, xs | |
363 | ||
5636bb2c | 364 | let compare_token = do_compare_token true true |
34e49164 C |
365 | |
366 | ||
367 | (*****************************************************************************) | |
368 | ||
951c7801 | 369 | (* compare to a res file *) |
5636bb2c | 370 | let compare_default = do_compare_token true true |
951c7801 C |
371 | |
372 | (* compare to the source of the transformation *) | |
5636bb2c | 373 | let compare_to_original = do_compare_token false false |
34e49164 C |
374 | |
375 | ||
376 | let compare_result_to_string (correct, diffxs) = | |
377 | match correct with | |
ae4735db | 378 | | Correct -> |
34e49164 | 379 | "seems correct" ^ "\n" |
ae4735db | 380 | | Pb s -> |
34e49164 C |
381 | ("seems incorrect: " ^ s) ^ "\n" ^ |
382 | "diff (result(-) vs expected_result(+)) = " ^ "\n" ^ | |
383 | (diffxs +> Common.join "\n") ^ "\n" | |
ae4735db | 384 | | PbOnlyInNotParsedCorrectly s -> |
34e49164 C |
385 | "seems incorrect, but only because of code that was not parsable" ^ "\n"^ |
386 | ("explanation:" ^ s) ^ "\n" ^ | |
387 | "diff (result(-) vs expected_result(+)) = " ^ "\n" ^ | |
388 | (diffxs +> Common.join "\n") ^ "\n" | |
389 | ||
390 | ||
ae4735db | 391 | let compare_result_to_bool correct = |
b1b2de81 | 392 | correct =*= Correct |