Commit | Line | Data |
---|---|---|
0708f913 | 1 | (* Yoann Padioleau |
ae4735db C |
2 | * |
3 | * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. | |
0708f913 C |
4 | * Copyright (C) 2006, 2007 Ecole des Mines de Nantes |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License (GPL) | |
8 | * version 2 as published by the Free Software Foundation. | |
ae4735db | 9 | * |
0708f913 C |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * file license.txt for more details. | |
14 | *) | |
34e49164 C |
15 | open Common |
16 | ||
17 | open Ast_c | |
18 | ||
19 | ||
ae4735db C |
20 | type compare_result = |
21 | | Correct | |
34e49164 C |
22 | | Pb of string |
23 | | PbOnlyInNotParsedCorrectly of string | |
24 | ||
25 | ||
26 | (*****************************************************************************) | |
27 | (* Normalise before comparing *) | |
28 | (*****************************************************************************) | |
29 | ||
30 | (* List taken from CVS manual, 'Keyword substitution' chapter. Note | |
31 | * that I do not put "Log" because it is used only in comment, and it | |
32 | * is not enough to substituate until the end of the line. *) | |
33 | let cvs_keyword_list = [ | |
34 | "Id";"Date"; "Revision"; (* the common one *) | |
951c7801 C |
35 | "FreeBSD";"Heimdal";"KAME";"NetBSD";"OpenBSD";"OpenLDAP";"RuOBSD"; |
36 | "SourceForge"; | |
34e49164 C |
37 | "Name";"Author";"CVSHeader";"Header";"Locker";"RCSfile";"Source";"State"; |
38 | "Rev"; | |
39 | ] | |
40 | ||
41 | (* Can also have just dollarIDdollar but it is only when you have not | |
42 | * yet committed the file. After the commit it would be a dollarIddollar:. | |
43 | * If reput Id:, do not join the regexp!! otherwise CVS will modify it :) | |
44 | *) | |
ae4735db | 45 | let cvs_keyword_regexp = Str.regexp |
34e49164 C |
46 | ("\\$\\([A-Za-z_]+\\):[^\\$]*\\$") |
47 | ||
48 | ||
ae4735db C |
49 | let cvs_compute_newstr s = |
50 | Str.global_substitute cvs_keyword_regexp (fun _s -> | |
34e49164 C |
51 | let substr = Str.matched_string s in |
52 | assert (substr ==~ cvs_keyword_regexp); (* use its side-effect *) | |
53 | let tag = matched1 substr in | |
54 | ||
55 | if not (List.mem tag cvs_keyword_list) | |
978fd7e5 | 56 | then pr2_once ("unknown CVS keyword: " ^ tag); |
ae4735db C |
57 | |
58 | "CVS_MAGIC_STRING" | |
59 | ) s | |
34e49164 C |
60 | |
61 | ||
62 | ||
63 | ||
64 | (* todo: get rid of the type for expressions ? *) | |
ae4735db C |
65 | let normal_form_program xs = |
66 | let bigf = { Visitor_c.default_visitor_c_s with | |
34e49164 | 67 | |
ae4735db | 68 | Visitor_c.kini_s = (fun (k,bigf) ini -> |
34e49164 | 69 | match ini with |
ae4735db | 70 | | InitList xs, [i1;i2;iicommaopt] -> |
34e49164 C |
71 | k (InitList xs, [i1;i2]) |
72 | | _ -> k ini | |
73 | ); | |
ae4735db | 74 | Visitor_c.kexpr_s = (fun (k,bigf) e -> |
34e49164 C |
75 | match e with |
76 | (* todo: should also do something for multistrings *) | |
708f4980 | 77 | | (Constant (String (s,kind)), typ), [ii] |
ae4735db | 78 | when Common.string_match_substring cvs_keyword_regexp s -> |
34e49164 C |
79 | let newstr = cvs_compute_newstr s in |
80 | (Constant (String (newstr,kind)), typ), [rewrap_str newstr ii] | |
ae4735db | 81 | | _ -> k e |
34e49164 C |
82 | |
83 | ); | |
ae4735db | 84 | Visitor_c.ktoplevel_s = (fun (k,bigf) p -> |
34e49164 | 85 | match p with |
ae4735db | 86 | | CppTop (Define _) -> |
34e49164 C |
87 | raise Todo |
88 | (* | |
89 | let (i1, i2, i3) = Common.tuple_of_list3 ii in | |
90 | if Common.string_match_substring cvs_keyword_regexp body | |
ae4735db | 91 | then |
34e49164 C |
92 | let newstr = cvs_compute_newstr body in |
93 | Define ((s, newstr), [i1;i2;rewrap_str newstr i3]) | |
94 | else p | |
95 | *) | |
96 | | _ -> k p | |
97 | ); | |
98 | ||
99 | (* | |
ae4735db | 100 | Visitor_c.kinfo_s = (fun (k,bigf) i -> |
34e49164 C |
101 | let s = Ast_c.get_str_of_info i in |
102 | if Common.string_match_substring cvs_keyword_regexp s | |
ae4735db | 103 | then |
34e49164 C |
104 | let newstr = cvs_compute_newstr s in |
105 | rewrap_str newstr i | |
106 | else i | |
107 | ); | |
108 | *) | |
109 | ||
110 | } | |
111 | in | |
112 | xs +> List.map (fun p -> Visitor_c.vk_toplevel_s bigf p) | |
113 | ||
114 | ||
115 | ||
116 | ||
117 | ||
118 | ||
ae4735db C |
119 | let normal_form_token adjust_cvs x = |
120 | let x' = | |
121 | match x with | |
34e49164 C |
122 | | Parser_c.TString ((s, kind),i1) -> Parser_c.TString (("",kind), i1) |
123 | | x -> x | |
124 | in | |
ae4735db | 125 | x' +> Token_helpers.visitor_info_of_tok (fun info -> |
34e49164 C |
126 | let info = Ast_c.al_info 0 info in |
127 | let str = Ast_c.str_of_info info in | |
951c7801 | 128 | if adjust_cvs && Common.string_match_substring cvs_keyword_regexp str |
ae4735db | 129 | then |
34e49164 C |
130 | let newstr = cvs_compute_newstr str in |
131 | rewrap_str newstr info | |
132 | else info | |
133 | ) | |
134 | ||
ae4735db | 135 | |
34e49164 C |
136 | (*****************************************************************************) |
137 | (* Compare at Ast level *) | |
138 | (*****************************************************************************) | |
139 | ||
140 | (* Note that I do a (simple) astdiff to know if there is a difference, but | |
141 | * then I use diff to print the differences. So sometimes you have to dig | |
ae4735db | 142 | * a little to find really where the real difference (one not involving |
34e49164 C |
143 | * just spacing difference) was. |
144 | * Note also that the astdiff is not very accurate. As I skip comments, | |
145 | * macro definitions, those are not in the Ast and if there is a diff | |
146 | * between 2 files regarding macro def, then I will not be able to report it :( | |
147 | * update: I now put the toplevel #define at least in the Ast. | |
148 | * update: You can use token_compare for more precise diff. | |
149 | * | |
150 | * todo?: finer grain astdiff, better report, more precise. | |
ae4735db | 151 | * |
34e49164 C |
152 | * todo: do iso between if() S and if() { S } |
153 | *) | |
154 | let compare_ast filename1 filename2 = | |
155 | ||
156 | let xs = | |
157 | match !Flag_parsing_c.diff_lines with | |
158 | None -> | |
159 | Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2) | |
ae4735db | 160 | | Some n -> |
34e49164 C |
161 | Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in |
162 | ||
163 | (* get rid of the --- and +++ lines *) | |
ae4735db C |
164 | let xs = |
165 | if null xs | |
166 | then xs | |
34e49164 C |
167 | else Common.drop 2 xs |
168 | in | |
169 | ||
170 | ||
ae4735db | 171 | let process_filename filename = |
978fd7e5 | 172 | let (c, _stat) = Parse_c.parse_c_and_cpp filename in |
34e49164 C |
173 | let c = List.map fst c in |
174 | c +> Lib_parsing_c.al_program +> normal_form_program | |
175 | in | |
176 | ||
177 | let c1 = process_filename filename1 in | |
178 | let c2 = process_filename filename2 in | |
ae4735db | 179 | |
34e49164 C |
180 | let error = ref 0 in |
181 | let pb_notparsed = ref 0 in | |
ae4735db C |
182 | |
183 | let res = | |
184 | if List.length c1 <> List.length c2 | |
34e49164 | 185 | then Pb "not same number of entities (func, decl, ...)" |
ae4735db | 186 | else |
34e49164 C |
187 | begin |
188 | zip c1 c2 +> List.iter (function | |
189 | | Declaration a, Declaration b -> if not (a =*= b) then incr error | |
190 | | Definition a, Definition b -> if not (a =*= b) then incr error | |
191 | | EmptyDef a, EmptyDef b -> if not (a =*= b) then incr error | |
ae4735db | 192 | | MacroTop (a1,b1,c1), MacroTop (a2,b2,c2) -> |
34e49164 | 193 | if not ((a1,b1,c1) =*= (a2,b2,c2)) then incr error |
ae4735db | 194 | | CppTop (Include {i_include = a}), CppTop (Include {i_include = b}) -> |
485bce71 | 195 | if not (a =*= b) then incr error |
ae4735db | 196 | | CppTop Define _, CppTop Define _ -> |
34e49164 C |
197 | raise Todo |
198 | (* if not (a =*= b) then incr error *) | |
ae4735db | 199 | | NotParsedCorrectly a, NotParsedCorrectly b -> |
34e49164 | 200 | if not (a =*= b) then incr pb_notparsed |
ae4735db | 201 | | NotParsedCorrectly a, _ -> |
34e49164 C |
202 | (* Pb only in generated file *) |
203 | incr error; | |
204 | ||
ae4735db | 205 | | _, NotParsedCorrectly b -> |
34e49164 C |
206 | incr pb_notparsed |
207 | | FinalDef a, FinalDef b -> if not (a =*= b) then incr error | |
485bce71 C |
208 | |
209 | | IfdefTop a, IfdefTop b -> if not (a =*= b) then incr error | |
210 | ||
211 | | (FinalDef _|EmptyDef _| | |
212 | MacroTop (_, _, _)|IfdefTop _| | |
213 | CppTop _|Definition _|Declaration _), _ -> incr error | |
ae4735db | 214 | |
34e49164 C |
215 | ); |
216 | (match () with | |
ae4735db | 217 | | _ when !pb_notparsed > 0 && !error =|= 0 -> |
34e49164 C |
218 | PbOnlyInNotParsedCorrectly "" |
219 | | _ when !error > 0 -> Pb "" | |
220 | | _ -> Correct | |
221 | ) | |
222 | end | |
223 | in | |
224 | res, xs | |
225 | ||
226 | ||
227 | ||
228 | (*****************************************************************************) | |
229 | (* Compare at token level *) | |
230 | (*****************************************************************************) | |
231 | ||
232 | (* Because I now commentize more in parsing, with parsing_hacks, | |
233 | * compare_ast may say that 2 programs are equal whereas they are not. | |
234 | * Here I compare token, and so have still the TCommentCpp and TCommentMisc | |
235 | * so at least detect such differences. | |
ae4735db | 236 | * |
34e49164 C |
237 | * Morover compare_ast is not very precise in his report when it |
238 | * detects a difference. So token_diff is better. | |
ae4735db | 239 | * |
34e49164 | 240 | * I do token_diff but I use programCelement2, so that |
ae4735db | 241 | * I know if I am in a "notparsable" zone. The tokens are |
34e49164 | 242 | * in (snd programCelement2). |
ae4735db C |
243 | * |
244 | * Faire aussi un compare_token qui se moque des TCommentMisc, | |
245 | * TCommentCPP et TIfdef ? Normalement si fait ca retrouvera | |
34e49164 | 246 | * les meme resultats que compare_ast. |
ae4735db | 247 | * |
34e49164 C |
248 | *) |
249 | ||
250 | ||
251 | (* Pass only "true" comments, dont pass TCommentMisc and TCommentCpp *) | |
5636bb2c | 252 | let is_normal_space_or_comment to_expected = function |
34e49164 C |
253 | | Parser_c.TCommentSpace _ |
254 | | Parser_c.TCommentNewline _ | |
255 | ||
256 | (* | Parser_c.TComma _ *) (* UGLY, because of gcc_opt_comma isomorphism *) | |
257 | -> true | |
5636bb2c | 258 | | Parser_c.TComment _ -> to_expected (* only ignore in compare to expected *) |
34e49164 C |
259 | | _ -> false |
260 | ||
261 | ||
ae4735db C |
262 | (* convetion: compare_token generated_file expected_res |
263 | * because when there is a notparsablezone in generated_file, I | |
34e49164 C |
264 | * don't issue a PbOnlyInNotParsedCorrectly |
265 | *) | |
5636bb2c | 266 | let do_compare_token adjust_cvs to_expected filename1 filename2 = |
34e49164 | 267 | |
ae4735db | 268 | let rec loop xs ys = |
34e49164 C |
269 | match xs, ys with |
270 | | [], [] -> None | |
271 | ||
272 | (* UGLY, because of gcc_opt_comma isomorphism *) | |
273 | | (Parser_c.TComma _::Parser_c.TCBrace _::xs), (Parser_c.TCBrace _::ys) -> | |
274 | loop xs ys | |
ae4735db | 275 | | (Parser_c.TCBrace _::xs), (Parser_c.TComma _::Parser_c.TCBrace _::ys) -> |
34e49164 C |
276 | loop xs ys |
277 | ||
ae4735db | 278 | | [], x::xs -> |
34e49164 | 279 | Some "not same number of tokens inside C elements" |
ae4735db | 280 | | x::xs, [] -> |
34e49164 | 281 | Some "not same number of tokens inside C elements" |
ae4735db C |
282 | |
283 | | x::xs, y::ys -> | |
951c7801 C |
284 | let x' = normal_form_token adjust_cvs x in |
285 | let y' = normal_form_token adjust_cvs y in | |
ae4735db | 286 | if x' =*= y' |
34e49164 | 287 | then loop xs ys |
ae4735db C |
288 | else |
289 | let str1, pos1 = | |
34e49164 | 290 | Token_helpers.str_of_tok x, Token_helpers.pos_of_tok x in |
ae4735db | 291 | let str2, pos2 = |
34e49164 C |
292 | Token_helpers.str_of_tok y, Token_helpers.pos_of_tok y in |
293 | Some ("diff token: " ^ str1 ^" VS " ^ str2 ^ "\n" ^ | |
294 | Common.error_message filename1 (str1, pos1) ^ "\n" ^ | |
295 | Common.error_message filename2 (str2, pos2) ^ "\n" | |
296 | ) | |
ae4735db | 297 | |
34e49164 | 298 | in |
ae4735db | 299 | let final_loop xs ys = |
34e49164 | 300 | loop |
5636bb2c C |
301 | (xs +> |
302 | List.filter (fun x -> not (is_normal_space_or_comment to_expected x))) | |
303 | (ys +> | |
304 | List.filter (fun x -> not (is_normal_space_or_comment to_expected x))) | |
34e49164 | 305 | in |
ae4735db | 306 | |
34e49164 C |
307 | (* |
308 | let toks1 = Parse_c.tokens filename1 in | |
309 | let toks2 = Parse_c.tokens filename2 in | |
310 | loop toks1 toks2 in | |
311 | *) | |
312 | ||
978fd7e5 C |
313 | let (c1, _stat) = Parse_c.parse_c_and_cpp filename1 in |
314 | let (c2, _stat) = Parse_c.parse_c_and_cpp filename2 in | |
34e49164 | 315 | |
ae4735db C |
316 | let res = |
317 | if List.length c1 <> List.length c2 | |
34e49164 | 318 | then Pb "not same number of entities (func, decl, ...)" |
ae4735db C |
319 | else |
320 | zip c1 c2 +> Common.fold_k (fun acc ((a,infoa),(b,infob)) k -> | |
34e49164 | 321 | match a, b with |
ae4735db | 322 | | NotParsedCorrectly a, NotParsedCorrectly b -> |
34e49164 C |
323 | (match final_loop (snd infoa) (snd infob) with |
324 | | None -> k acc | |
325 | | Some s -> PbOnlyInNotParsedCorrectly s | |
326 | ) | |
ae4735db C |
327 | |
328 | | NotParsedCorrectly a, _ -> | |
34e49164 | 329 | Pb "PB parsing only in generated-file" |
ae4735db | 330 | | _, NotParsedCorrectly b -> |
34e49164 | 331 | PbOnlyInNotParsedCorrectly "PB parsing only in expected-file" |
ae4735db | 332 | | _, _ -> |
34e49164 C |
333 | (match final_loop (snd infoa) (snd infob) with |
334 | | None -> k acc | |
335 | | Some s -> Pb s | |
336 | ) | |
337 | ) (fun acc -> acc) | |
338 | (Correct) | |
339 | in | |
340 | ||
341 | let xs = | |
342 | match !Flag_parsing_c.diff_lines with | |
343 | None -> | |
344 | Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2) | |
ae4735db | 345 | | Some n -> |
34e49164 C |
346 | Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in |
347 | ||
348 | (* get rid of the --- and +++ lines *) | |
ae4735db C |
349 | let xs = |
350 | if null xs | |
351 | then xs | |
34e49164 C |
352 | else Common.drop 2 xs |
353 | in | |
354 | ||
ae4735db C |
355 | if null xs && (res <> Correct) |
356 | then failwith | |
34e49164 C |
357 | "Impossible: How can diff be null and have not Correct in compare_c?"; |
358 | ||
359 | res, xs | |
360 | ||
5636bb2c | 361 | let compare_token = do_compare_token true true |
34e49164 C |
362 | |
363 | ||
364 | (*****************************************************************************) | |
365 | ||
951c7801 | 366 | (* compare to a res file *) |
5636bb2c | 367 | let compare_default = do_compare_token true true |
951c7801 C |
368 | |
369 | (* compare to the source of the transformation *) | |
5636bb2c | 370 | let compare_to_original = do_compare_token false false |
34e49164 C |
371 | |
372 | ||
373 | let compare_result_to_string (correct, diffxs) = | |
374 | match correct with | |
ae4735db | 375 | | Correct -> |
34e49164 | 376 | "seems correct" ^ "\n" |
ae4735db | 377 | | Pb s -> |
34e49164 C |
378 | ("seems incorrect: " ^ s) ^ "\n" ^ |
379 | "diff (result(-) vs expected_result(+)) = " ^ "\n" ^ | |
380 | (diffxs +> Common.join "\n") ^ "\n" | |
ae4735db | 381 | | PbOnlyInNotParsedCorrectly s -> |
34e49164 C |
382 | "seems incorrect, but only because of code that was not parsable" ^ "\n"^ |
383 | ("explanation:" ^ s) ^ "\n" ^ | |
384 | "diff (result(-) vs expected_result(+)) = " ^ "\n" ^ | |
385 | (diffxs +> Common.join "\n") ^ "\n" | |
386 | ||
387 | ||
ae4735db | 388 | let compare_result_to_bool correct = |
b1b2de81 | 389 | correct =*= Correct |