| 1 | (* Yoann Padioleau |
| 2 | * |
| 3 | * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. |
| 4 | * Copyright (C) 2006, 2007 Ecole des Mines de Nantes |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License (GPL) |
| 8 | * version 2 as published by the Free Software Foundation. |
| 9 | * |
| 10 | * This program is distributed in the hope that it will be useful, |
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | * file license.txt for more details. |
| 14 | *) |
| 15 | open Common |
| 16 | |
| 17 | open Ast_c |
| 18 | |
| 19 | |
| 20 | type compare_result = |
| 21 | | Correct |
| 22 | | Pb of string |
| 23 | | PbOnlyInNotParsedCorrectly of string |
| 24 | |
| 25 | |
| 26 | (*****************************************************************************) |
| 27 | (* Normalise before comparing *) |
| 28 | (*****************************************************************************) |
| 29 | |
| 30 | (* List taken from CVS manual, 'Keyword substitution' chapter. Note |
| 31 | * that I do not put "Log" because it is used only in comment, and it |
| 32 | * is not enough to substituate until the end of the line. *) |
| 33 | let cvs_keyword_list = [ |
| 34 | "Id";"Date"; "Revision"; (* the common one *) |
| 35 | "FreeBSD";"Heimdal";"KAME";"NetBSD";"OpenBSD";"OpenLDAP";"RuOBSD"; |
| 36 | "SourceForge"; |
| 37 | "Name";"Author";"CVSHeader";"Header";"Locker";"RCSfile";"Source";"State"; |
| 38 | "Rev"; |
| 39 | ] |
| 40 | |
| 41 | (* Can also have just dollarIDdollar but it is only when you have not |
| 42 | * yet committed the file. After the commit it would be a dollarIddollar:. |
| 43 | * If reput Id:, do not join the regexp!! otherwise CVS will modify it :) |
| 44 | *) |
| 45 | let cvs_keyword_regexp = Str.regexp |
| 46 | ("\\$\\([A-Za-z_]+\\):[^\\$]*\\$") |
| 47 | |
| 48 | |
| 49 | let cvs_compute_newstr s = |
| 50 | Str.global_substitute cvs_keyword_regexp (fun _s -> |
| 51 | let substr = Str.matched_string s in |
| 52 | assert (substr ==~ cvs_keyword_regexp); (* use its side-effect *) |
| 53 | let tag = matched1 substr in |
| 54 | |
| 55 | if not (List.mem tag cvs_keyword_list) |
| 56 | then pr2_once ("unknown CVS keyword: " ^ tag); |
| 57 | |
| 58 | "CVS_MAGIC_STRING" |
| 59 | ) s |
| 60 | |
| 61 | |
| 62 | |
| 63 | |
| 64 | (* todo: get rid of the type for expressions ? *) |
| 65 | let normal_form_program xs = |
| 66 | let bigf = { Visitor_c.default_visitor_c_s with |
| 67 | |
| 68 | Visitor_c.kini_s = (fun (k,bigf) ini -> |
| 69 | match ini with |
| 70 | | InitList xs, [i1;i2;iicommaopt] -> |
| 71 | k (InitList xs, [i1;i2]) |
| 72 | | _ -> k ini |
| 73 | ); |
| 74 | Visitor_c.kexpr_s = (fun (k,bigf) e -> |
| 75 | match e with |
| 76 | (* todo: should also do something for multistrings *) |
| 77 | | (Constant (String (s,kind)), typ), [ii] |
| 78 | when Common.string_match_substring cvs_keyword_regexp s -> |
| 79 | let newstr = cvs_compute_newstr s in |
| 80 | (Constant (String (newstr,kind)), typ), [rewrap_str newstr ii] |
| 81 | | _ -> k e |
| 82 | |
| 83 | ); |
| 84 | Visitor_c.ktoplevel_s = (fun (k,bigf) p -> |
| 85 | match p with |
| 86 | | CppTop (Define _) -> |
| 87 | raise Todo |
| 88 | (* |
| 89 | let (i1, i2, i3) = Common.tuple_of_list3 ii in |
| 90 | if Common.string_match_substring cvs_keyword_regexp body |
| 91 | then |
| 92 | let newstr = cvs_compute_newstr body in |
| 93 | Define ((s, newstr), [i1;i2;rewrap_str newstr i3]) |
| 94 | else p |
| 95 | *) |
| 96 | | _ -> k p |
| 97 | ); |
| 98 | |
| 99 | (* |
| 100 | Visitor_c.kinfo_s = (fun (k,bigf) i -> |
| 101 | let s = Ast_c.get_str_of_info i in |
| 102 | if Common.string_match_substring cvs_keyword_regexp s |
| 103 | then |
| 104 | let newstr = cvs_compute_newstr s in |
| 105 | rewrap_str newstr i |
| 106 | else i |
| 107 | ); |
| 108 | *) |
| 109 | |
| 110 | } |
| 111 | in |
| 112 | xs +> List.map (fun p -> Visitor_c.vk_toplevel_s bigf p) |
| 113 | |
| 114 | |
| 115 | |
| 116 | |
| 117 | |
| 118 | |
| 119 | let normal_form_token adjust_cvs x = |
| 120 | let x' = |
| 121 | match x with |
| 122 | | Parser_c.TString ((s, kind),i1) -> Parser_c.TString (("",kind), i1) |
| 123 | | x -> x |
| 124 | in |
| 125 | x' +> Token_helpers.visitor_info_of_tok (fun info -> |
| 126 | let info = Ast_c.al_info 0 info in |
| 127 | let str = Ast_c.str_of_info info in |
| 128 | if adjust_cvs && Common.string_match_substring cvs_keyword_regexp str |
| 129 | then |
| 130 | let newstr = cvs_compute_newstr str in |
| 131 | rewrap_str newstr info |
| 132 | else info |
| 133 | ) |
| 134 | |
| 135 | |
| 136 | (*****************************************************************************) |
| 137 | (* Compare at Ast level *) |
| 138 | (*****************************************************************************) |
| 139 | |
| 140 | (* Note that I do a (simple) astdiff to know if there is a difference, but |
| 141 | * then I use diff to print the differences. So sometimes you have to dig |
| 142 | * a little to find really where the real difference (one not involving |
| 143 | * just spacing difference) was. |
| 144 | * Note also that the astdiff is not very accurate. As I skip comments, |
| 145 | * macro definitions, those are not in the Ast and if there is a diff |
| 146 | * between 2 files regarding macro def, then I will not be able to report it :( |
| 147 | * update: I now put the toplevel #define at least in the Ast. |
| 148 | * update: You can use token_compare for more precise diff. |
| 149 | * |
| 150 | * todo?: finer grain astdiff, better report, more precise. |
| 151 | * |
| 152 | * todo: do iso between if() S and if() { S } |
| 153 | *) |
| 154 | let compare_ast filename1 filename2 = |
| 155 | |
| 156 | let xs = |
| 157 | match !Flag_parsing_c.diff_lines with |
| 158 | None -> |
| 159 | Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2) |
| 160 | | Some n -> |
| 161 | Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in |
| 162 | |
| 163 | (* get rid of the --- and +++ lines *) |
| 164 | let xs = |
| 165 | if null xs |
| 166 | then xs |
| 167 | else Common.drop 2 xs |
| 168 | in |
| 169 | |
| 170 | |
| 171 | let process_filename filename = |
| 172 | let (c, _stat) = Parse_c.parse_c_and_cpp filename in |
| 173 | let c = List.map fst c in |
| 174 | c +> Lib_parsing_c.al_program +> normal_form_program |
| 175 | in |
| 176 | |
| 177 | let c1 = process_filename filename1 in |
| 178 | let c2 = process_filename filename2 in |
| 179 | |
| 180 | let error = ref 0 in |
| 181 | let pb_notparsed = ref 0 in |
| 182 | |
| 183 | let res = |
| 184 | if List.length c1 <> List.length c2 |
| 185 | then Pb "not same number of entities (func, decl, ...)" |
| 186 | else |
| 187 | begin |
| 188 | zip c1 c2 +> List.iter (function |
| 189 | | Declaration a, Declaration b -> if not (a =*= b) then incr error |
| 190 | | Definition a, Definition b -> if not (a =*= b) then incr error |
| 191 | | EmptyDef a, EmptyDef b -> if not (a =*= b) then incr error |
| 192 | | MacroTop (a1,b1,c1), MacroTop (a2,b2,c2) -> |
| 193 | if not ((a1,b1,c1) =*= (a2,b2,c2)) then incr error |
| 194 | | CppTop (Include {i_include = a}), CppTop (Include {i_include = b}) -> |
| 195 | if not (a =*= b) then incr error |
| 196 | | CppTop Define _, CppTop Define _ -> |
| 197 | raise Todo |
| 198 | (* if not (a =*= b) then incr error *) |
| 199 | | NotParsedCorrectly a, NotParsedCorrectly b -> |
| 200 | if not (a =*= b) then incr pb_notparsed |
| 201 | | NotParsedCorrectly a, _ -> |
| 202 | (* Pb only in generated file *) |
| 203 | incr error; |
| 204 | |
| 205 | | _, NotParsedCorrectly b -> |
| 206 | incr pb_notparsed |
| 207 | | FinalDef a, FinalDef b -> if not (a =*= b) then incr error |
| 208 | |
| 209 | | IfdefTop a, IfdefTop b -> if not (a =*= b) then incr error |
| 210 | |
| 211 | | (FinalDef _|EmptyDef _| |
| 212 | MacroTop (_, _, _)|IfdefTop _| |
| 213 | CppTop _|Definition _|Declaration _), _ -> incr error |
| 214 | |
| 215 | ); |
| 216 | (match () with |
| 217 | | _ when !pb_notparsed > 0 && !error =|= 0 -> |
| 218 | PbOnlyInNotParsedCorrectly "" |
| 219 | | _ when !error > 0 -> Pb "" |
| 220 | | _ -> Correct |
| 221 | ) |
| 222 | end |
| 223 | in |
| 224 | res, xs |
| 225 | |
| 226 | |
| 227 | |
| 228 | (*****************************************************************************) |
| 229 | (* Compare at token level *) |
| 230 | (*****************************************************************************) |
| 231 | |
| 232 | (* Because I now commentize more in parsing, with parsing_hacks, |
| 233 | * compare_ast may say that 2 programs are equal whereas they are not. |
| 234 | * Here I compare token, and so have still the TCommentCpp and TCommentMisc |
| 235 | * so at least detect such differences. |
| 236 | * |
| 237 | * Morover compare_ast is not very precise in his report when it |
| 238 | * detects a difference. So token_diff is better. |
| 239 | * |
| 240 | * I do token_diff but I use programCelement2, so that |
| 241 | * I know if I am in a "notparsable" zone. The tokens are |
| 242 | * in (snd programCelement2). |
| 243 | * |
| 244 | * Faire aussi un compare_token qui se moque des TCommentMisc, |
| 245 | * TCommentCPP et TIfdef ? Normalement si fait ca retrouvera |
| 246 | * les meme resultats que compare_ast. |
| 247 | * |
| 248 | *) |
| 249 | |
| 250 | |
| 251 | (* Pass only "true" comments, dont pass TCommentMisc and TCommentCpp *) |
| 252 | let is_normal_space_or_comment to_expected = function |
| 253 | | Parser_c.TCommentSpace _ |
| 254 | | Parser_c.TCommentNewline _ |
| 255 | |
| 256 | (* | Parser_c.TComma _ *) (* UGLY, because of gcc_opt_comma isomorphism *) |
| 257 | -> true |
| 258 | | Parser_c.TComment _ -> to_expected (* only ignore in compare to expected *) |
| 259 | | _ -> false |
| 260 | |
| 261 | |
| 262 | (* convetion: compare_token generated_file expected_res |
| 263 | * because when there is a notparsablezone in generated_file, I |
| 264 | * don't issue a PbOnlyInNotParsedCorrectly |
| 265 | *) |
| 266 | let do_compare_token adjust_cvs to_expected filename1 filename2 = |
| 267 | |
| 268 | let rec loop xs ys = |
| 269 | match xs, ys with |
| 270 | | [], [] -> None |
| 271 | |
| 272 | (* UGLY, because of gcc_opt_comma isomorphism *) |
| 273 | | (Parser_c.TComma _::Parser_c.TCBrace _::xs), (Parser_c.TCBrace _::ys) -> |
| 274 | loop xs ys |
| 275 | | (Parser_c.TCBrace _::xs), (Parser_c.TComma _::Parser_c.TCBrace _::ys) -> |
| 276 | loop xs ys |
| 277 | |
| 278 | | [], x::xs -> |
| 279 | Some "not same number of tokens inside C elements" |
| 280 | | x::xs, [] -> |
| 281 | Some "not same number of tokens inside C elements" |
| 282 | |
| 283 | | x::xs, y::ys -> |
| 284 | let x' = normal_form_token adjust_cvs x in |
| 285 | let y' = normal_form_token adjust_cvs y in |
| 286 | if x' =*= y' |
| 287 | then loop xs ys |
| 288 | else |
| 289 | let str1, pos1 = |
| 290 | Token_helpers.str_of_tok x, Token_helpers.pos_of_tok x in |
| 291 | let str2, pos2 = |
| 292 | Token_helpers.str_of_tok y, Token_helpers.pos_of_tok y in |
| 293 | Some ("diff token: " ^ str1 ^" VS " ^ str2 ^ "\n" ^ |
| 294 | Common.error_message filename1 (str1, pos1) ^ "\n" ^ |
| 295 | Common.error_message filename2 (str2, pos2) ^ "\n" |
| 296 | ) |
| 297 | |
| 298 | in |
| 299 | let final_loop xs ys = |
| 300 | loop |
| 301 | (xs +> |
| 302 | List.filter (fun x -> not (is_normal_space_or_comment to_expected x))) |
| 303 | (ys +> |
| 304 | List.filter (fun x -> not (is_normal_space_or_comment to_expected x))) |
| 305 | in |
| 306 | |
| 307 | (* |
| 308 | let toks1 = Parse_c.tokens filename1 in |
| 309 | let toks2 = Parse_c.tokens filename2 in |
| 310 | loop toks1 toks2 in |
| 311 | *) |
| 312 | |
| 313 | let (c1, _stat) = Parse_c.parse_c_and_cpp filename1 in |
| 314 | let (c2, _stat) = Parse_c.parse_c_and_cpp filename2 in |
| 315 | |
| 316 | let res = |
| 317 | if List.length c1 <> List.length c2 |
| 318 | then Pb "not same number of entities (func, decl, ...)" |
| 319 | else |
| 320 | zip c1 c2 +> Common.fold_k (fun acc ((a,infoa),(b,infob)) k -> |
| 321 | match a, b with |
| 322 | | NotParsedCorrectly a, NotParsedCorrectly b -> |
| 323 | (match final_loop (snd infoa) (snd infob) with |
| 324 | | None -> k acc |
| 325 | | Some s -> PbOnlyInNotParsedCorrectly s |
| 326 | ) |
| 327 | |
| 328 | | NotParsedCorrectly a, _ -> |
| 329 | Pb "PB parsing only in generated-file" |
| 330 | | _, NotParsedCorrectly b -> |
| 331 | PbOnlyInNotParsedCorrectly "PB parsing only in expected-file" |
| 332 | | _, _ -> |
| 333 | (match final_loop (snd infoa) (snd infob) with |
| 334 | | None -> k acc |
| 335 | | Some s -> Pb s |
| 336 | ) |
| 337 | ) (fun acc -> acc) |
| 338 | (Correct) |
| 339 | in |
| 340 | |
| 341 | let xs = |
| 342 | match !Flag_parsing_c.diff_lines with |
| 343 | None -> |
| 344 | Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2) |
| 345 | | Some n -> |
| 346 | Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in |
| 347 | |
| 348 | (* get rid of the --- and +++ lines *) |
| 349 | let xs = |
| 350 | if null xs |
| 351 | then xs |
| 352 | else Common.drop 2 xs |
| 353 | in |
| 354 | |
| 355 | if null xs && (res <> Correct) |
| 356 | then failwith |
| 357 | "Impossible: How can diff be null and have not Correct in compare_c?"; |
| 358 | |
| 359 | res, xs |
| 360 | |
| 361 | let compare_token = do_compare_token true true |
| 362 | |
| 363 | |
| 364 | (*****************************************************************************) |
| 365 | |
| 366 | (* compare to a res file *) |
| 367 | let compare_default = do_compare_token true true |
| 368 | |
| 369 | (* compare to the source of the transformation *) |
| 370 | let compare_to_original = do_compare_token false false |
| 371 | |
| 372 | |
| 373 | let compare_result_to_string (correct, diffxs) = |
| 374 | match correct with |
| 375 | | Correct -> |
| 376 | "seems correct" ^ "\n" |
| 377 | | Pb s -> |
| 378 | ("seems incorrect: " ^ s) ^ "\n" ^ |
| 379 | "diff (result(-) vs expected_result(+)) = " ^ "\n" ^ |
| 380 | (diffxs +> Common.join "\n") ^ "\n" |
| 381 | | PbOnlyInNotParsedCorrectly s -> |
| 382 | "seems incorrect, but only because of code that was not parsable" ^ "\n"^ |
| 383 | ("explanation:" ^ s) ^ "\n" ^ |
| 384 | "diff (result(-) vs expected_result(+)) = " ^ "\n" ^ |
| 385 | (diffxs +> Common.join "\n") ^ "\n" |
| 386 | |
| 387 | |
| 388 | let compare_result_to_bool correct = |
| 389 | correct =*= Correct |