| 1 | open Common |
| 2 | |
| 3 | open Ast_c |
| 4 | |
| 5 | |
| 6 | type compare_result = |
| 7 | | Correct |
| 8 | | Pb of string |
| 9 | | PbOnlyInNotParsedCorrectly of string |
| 10 | |
| 11 | |
| 12 | (*****************************************************************************) |
| 13 | (* Normalise before comparing *) |
| 14 | (*****************************************************************************) |
| 15 | |
| 16 | (* List taken from CVS manual, 'Keyword substitution' chapter. Note |
| 17 | * that I do not put "Log" because it is used only in comment, and it |
| 18 | * is not enough to substituate until the end of the line. *) |
| 19 | let cvs_keyword_list = [ |
| 20 | "Id";"Date"; "Revision"; (* the common one *) |
| 21 | "Name";"Author";"CVSHeader";"Header";"Locker";"RCSfile";"Source";"State"; |
| 22 | "Rev"; |
| 23 | ] |
| 24 | |
| 25 | (* Can also have just dollarIDdollar but it is only when you have not |
| 26 | * yet committed the file. After the commit it would be a dollarIddollar:. |
| 27 | * If reput Id:, do not join the regexp!! otherwise CVS will modify it :) |
| 28 | *) |
| 29 | let cvs_keyword_regexp = Str.regexp |
| 30 | ("\\$\\([A-Za-z_]+\\):[^\\$]*\\$") |
| 31 | |
| 32 | |
| 33 | let cvs_compute_newstr s = |
| 34 | Str.global_substitute cvs_keyword_regexp (fun _s -> |
| 35 | let substr = Str.matched_string s in |
| 36 | assert (substr ==~ cvs_keyword_regexp); (* use its side-effect *) |
| 37 | let tag = matched1 substr in |
| 38 | |
| 39 | if not (List.mem tag cvs_keyword_list) |
| 40 | then failwith ("unknown CVS keyword: " ^ tag); |
| 41 | |
| 42 | "CVS_MAGIC_STRING" |
| 43 | ) s |
| 44 | |
| 45 | |
| 46 | |
| 47 | |
| 48 | (* todo: get rid of the type for expressions ? *) |
| 49 | let normal_form_program xs = |
| 50 | let bigf = { Visitor_c.default_visitor_c_s with |
| 51 | |
| 52 | Visitor_c.kini_s = (fun (k,bigf) ini -> |
| 53 | match ini with |
| 54 | | InitList xs, [i1;i2;iicommaopt] -> |
| 55 | k (InitList xs, [i1;i2]) |
| 56 | | _ -> k ini |
| 57 | ); |
| 58 | Visitor_c.kexpr_s = (fun (k,bigf) e -> |
| 59 | match e with |
| 60 | (* todo: should also do something for multistrings *) |
| 61 | | (Constant (String (s,kind)), typ), [ii] |
| 62 | when Common.string_match_substring cvs_keyword_regexp s -> |
| 63 | let newstr = cvs_compute_newstr s in |
| 64 | (Constant (String (newstr,kind)), typ), [rewrap_str newstr ii] |
| 65 | | _ -> k e |
| 66 | |
| 67 | ); |
| 68 | Visitor_c.ktoplevel_s = (fun (k,bigf) p -> |
| 69 | match p with |
| 70 | | CppTop (Define _) -> |
| 71 | raise Todo |
| 72 | (* |
| 73 | let (i1, i2, i3) = Common.tuple_of_list3 ii in |
| 74 | if Common.string_match_substring cvs_keyword_regexp body |
| 75 | then |
| 76 | let newstr = cvs_compute_newstr body in |
| 77 | Define ((s, newstr), [i1;i2;rewrap_str newstr i3]) |
| 78 | else p |
| 79 | *) |
| 80 | | _ -> k p |
| 81 | ); |
| 82 | |
| 83 | (* |
| 84 | Visitor_c.kinfo_s = (fun (k,bigf) i -> |
| 85 | let s = Ast_c.get_str_of_info i in |
| 86 | if Common.string_match_substring cvs_keyword_regexp s |
| 87 | then |
| 88 | let newstr = cvs_compute_newstr s in |
| 89 | rewrap_str newstr i |
| 90 | else i |
| 91 | ); |
| 92 | *) |
| 93 | |
| 94 | } |
| 95 | in |
| 96 | xs +> List.map (fun p -> Visitor_c.vk_toplevel_s bigf p) |
| 97 | |
| 98 | |
| 99 | |
| 100 | |
| 101 | |
| 102 | |
| 103 | let normal_form_token x = |
| 104 | let x' = |
| 105 | match x with |
| 106 | | Parser_c.TString ((s, kind),i1) -> Parser_c.TString (("",kind), i1) |
| 107 | | x -> x |
| 108 | in |
| 109 | x' +> Token_helpers.visitor_info_of_tok (fun info -> |
| 110 | let info = Ast_c.al_info 0 info in |
| 111 | let str = Ast_c.str_of_info info in |
| 112 | if Common.string_match_substring cvs_keyword_regexp str |
| 113 | then |
| 114 | let newstr = cvs_compute_newstr str in |
| 115 | rewrap_str newstr info |
| 116 | else info |
| 117 | ) |
| 118 | |
| 119 | |
| 120 | (*****************************************************************************) |
| 121 | (* Compare at Ast level *) |
| 122 | (*****************************************************************************) |
| 123 | |
| 124 | (* Note that I do a (simple) astdiff to know if there is a difference, but |
| 125 | * then I use diff to print the differences. So sometimes you have to dig |
| 126 | * a little to find really where the real difference (one not involving |
| 127 | * just spacing difference) was. |
| 128 | * Note also that the astdiff is not very accurate. As I skip comments, |
| 129 | * macro definitions, those are not in the Ast and if there is a diff |
| 130 | * between 2 files regarding macro def, then I will not be able to report it :( |
| 131 | * update: I now put the toplevel #define at least in the Ast. |
| 132 | * update: You can use token_compare for more precise diff. |
| 133 | * |
| 134 | * todo?: finer grain astdiff, better report, more precise. |
| 135 | * |
| 136 | * todo: do iso between if() S and if() { S } |
| 137 | *) |
| 138 | let compare_ast filename1 filename2 = |
| 139 | |
| 140 | let xs = |
| 141 | match !Flag_parsing_c.diff_lines with |
| 142 | None -> |
| 143 | Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2) |
| 144 | | Some n -> |
| 145 | Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in |
| 146 | |
| 147 | (* get rid of the --- and +++ lines *) |
| 148 | let xs = |
| 149 | if null xs |
| 150 | then xs |
| 151 | else Common.drop 2 xs |
| 152 | in |
| 153 | |
| 154 | |
| 155 | let process_filename filename = |
| 156 | let (c, _stat) = Parse_c.parse_print_error_heuristic filename in |
| 157 | let c = List.map fst c in |
| 158 | c +> Lib_parsing_c.al_program +> normal_form_program |
| 159 | in |
| 160 | |
| 161 | let c1 = process_filename filename1 in |
| 162 | let c2 = process_filename filename2 in |
| 163 | |
| 164 | let error = ref 0 in |
| 165 | let pb_notparsed = ref 0 in |
| 166 | |
| 167 | let res = |
| 168 | if List.length c1 <> List.length c2 |
| 169 | then Pb "not same number of entities (func, decl, ...)" |
| 170 | else |
| 171 | begin |
| 172 | zip c1 c2 +> List.iter (function |
| 173 | | Declaration a, Declaration b -> if not (a =*= b) then incr error |
| 174 | | Definition a, Definition b -> if not (a =*= b) then incr error |
| 175 | | EmptyDef a, EmptyDef b -> if not (a =*= b) then incr error |
| 176 | | MacroTop (a1,b1,c1), MacroTop (a2,b2,c2) -> |
| 177 | if not ((a1,b1,c1) =*= (a2,b2,c2)) then incr error |
| 178 | | CppTop (Include {i_include = a}), CppTop (Include {i_include = b}) -> |
| 179 | if not (a =*= b) then incr error |
| 180 | | CppTop Define _, CppTop Define _ -> |
| 181 | raise Todo |
| 182 | (* if not (a =*= b) then incr error *) |
| 183 | | NotParsedCorrectly a, NotParsedCorrectly b -> |
| 184 | if not (a =*= b) then incr pb_notparsed |
| 185 | | NotParsedCorrectly a, _ -> |
| 186 | (* Pb only in generated file *) |
| 187 | incr error; |
| 188 | |
| 189 | | _, NotParsedCorrectly b -> |
| 190 | incr pb_notparsed |
| 191 | | FinalDef a, FinalDef b -> if not (a =*= b) then incr error |
| 192 | |
| 193 | | IfdefTop a, IfdefTop b -> if not (a =*= b) then incr error |
| 194 | |
| 195 | | (FinalDef _|EmptyDef _| |
| 196 | MacroTop (_, _, _)|IfdefTop _| |
| 197 | CppTop _|Definition _|Declaration _), _ -> incr error |
| 198 | |
| 199 | ); |
| 200 | (match () with |
| 201 | | _ when !pb_notparsed > 0 && !error = 0 -> |
| 202 | PbOnlyInNotParsedCorrectly "" |
| 203 | | _ when !error > 0 -> Pb "" |
| 204 | | _ -> Correct |
| 205 | ) |
| 206 | end |
| 207 | in |
| 208 | res, xs |
| 209 | |
| 210 | |
| 211 | |
| 212 | (*****************************************************************************) |
| 213 | (* Compare at token level *) |
| 214 | (*****************************************************************************) |
| 215 | |
| 216 | (* Because I now commentize more in parsing, with parsing_hacks, |
| 217 | * compare_ast may say that 2 programs are equal whereas they are not. |
| 218 | * Here I compare token, and so have still the TCommentCpp and TCommentMisc |
| 219 | * so at least detect such differences. |
| 220 | * |
| 221 | * Morover compare_ast is not very precise in his report when it |
| 222 | * detects a difference. So token_diff is better. |
| 223 | * |
| 224 | * I do token_diff but I use programCelement2, so that |
| 225 | * I know if I am in a "notparsable" zone. The tokens are |
| 226 | * in (snd programCelement2). |
| 227 | * |
| 228 | * Faire aussi un compare_token qui se moque des TCommentMisc, |
| 229 | * TCommentCPP et TIfdef ? Normalement si fait ca retrouvera |
| 230 | * les meme resultats que compare_ast. |
| 231 | * |
| 232 | *) |
| 233 | |
| 234 | |
| 235 | (* Pass only "true" comments, dont pass TCommentMisc and TCommentCpp *) |
| 236 | let is_normal_space_or_comment = function |
| 237 | | Parser_c.TComment _ |
| 238 | | Parser_c.TCommentSpace _ |
| 239 | | Parser_c.TCommentNewline _ |
| 240 | |
| 241 | (* | Parser_c.TComma _ *) (* UGLY, because of gcc_opt_comma isomorphism *) |
| 242 | -> true |
| 243 | | _ -> false |
| 244 | |
| 245 | |
| 246 | (* convetion: compare_token generated_file expected_res |
| 247 | * because when there is a notparsablezone in generated_file, I |
| 248 | * don't issue a PbOnlyInNotParsedCorrectly |
| 249 | *) |
| 250 | let compare_token filename1 filename2 = |
| 251 | |
| 252 | |
| 253 | let rec loop xs ys = |
| 254 | match xs, ys with |
| 255 | | [], [] -> None |
| 256 | |
| 257 | (* UGLY, because of gcc_opt_comma isomorphism *) |
| 258 | | (Parser_c.TComma _::Parser_c.TCBrace _::xs), (Parser_c.TCBrace _::ys) -> |
| 259 | loop xs ys |
| 260 | | (Parser_c.TCBrace _::xs), (Parser_c.TComma _::Parser_c.TCBrace _::ys) -> |
| 261 | loop xs ys |
| 262 | |
| 263 | | [], x::xs -> |
| 264 | Some "not same number of tokens inside C elements" |
| 265 | | x::xs, [] -> |
| 266 | Some "not same number of tokens inside C elements" |
| 267 | |
| 268 | | x::xs, y::ys -> |
| 269 | let x' = normal_form_token x in |
| 270 | let y' = normal_form_token y in |
| 271 | if x' = y' |
| 272 | then loop xs ys |
| 273 | else |
| 274 | let str1, pos1 = |
| 275 | Token_helpers.str_of_tok x, Token_helpers.pos_of_tok x in |
| 276 | let str2, pos2 = |
| 277 | Token_helpers.str_of_tok y, Token_helpers.pos_of_tok y in |
| 278 | Some ("diff token: " ^ str1 ^" VS " ^ str2 ^ "\n" ^ |
| 279 | Common.error_message filename1 (str1, pos1) ^ "\n" ^ |
| 280 | Common.error_message filename2 (str2, pos2) ^ "\n" |
| 281 | ) |
| 282 | |
| 283 | in |
| 284 | let final_loop xs ys = |
| 285 | loop |
| 286 | (xs +> List.filter (fun x -> not (is_normal_space_or_comment x))) |
| 287 | (ys +> List.filter (fun x -> not (is_normal_space_or_comment x))) |
| 288 | in |
| 289 | |
| 290 | (* |
| 291 | let toks1 = Parse_c.tokens filename1 in |
| 292 | let toks2 = Parse_c.tokens filename2 in |
| 293 | loop toks1 toks2 in |
| 294 | *) |
| 295 | |
| 296 | let (c1, _stat) = Parse_c.parse_print_error_heuristic filename1 in |
| 297 | let (c2, _stat) = Parse_c.parse_print_error_heuristic filename2 in |
| 298 | |
| 299 | let res = |
| 300 | if List.length c1 <> List.length c2 |
| 301 | then Pb "not same number of entities (func, decl, ...)" |
| 302 | else |
| 303 | zip c1 c2 +> Common.fold_k (fun acc ((a,infoa),(b,infob)) k -> |
| 304 | match a, b with |
| 305 | | NotParsedCorrectly a, NotParsedCorrectly b -> |
| 306 | (match final_loop (snd infoa) (snd infob) with |
| 307 | | None -> k acc |
| 308 | | Some s -> PbOnlyInNotParsedCorrectly s |
| 309 | ) |
| 310 | |
| 311 | | NotParsedCorrectly a, _ -> |
| 312 | Pb "PB parsing only in generated-file" |
| 313 | | _, NotParsedCorrectly b -> |
| 314 | PbOnlyInNotParsedCorrectly "PB parsing only in expected-file" |
| 315 | | _, _ -> |
| 316 | (match final_loop (snd infoa) (snd infob) with |
| 317 | | None -> k acc |
| 318 | | Some s -> Pb s |
| 319 | ) |
| 320 | ) (fun acc -> acc) |
| 321 | (Correct) |
| 322 | in |
| 323 | |
| 324 | let xs = |
| 325 | match !Flag_parsing_c.diff_lines with |
| 326 | None -> |
| 327 | Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2) |
| 328 | | Some n -> |
| 329 | Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in |
| 330 | |
| 331 | (* get rid of the --- and +++ lines *) |
| 332 | let xs = |
| 333 | if null xs |
| 334 | then xs |
| 335 | else Common.drop 2 xs |
| 336 | in |
| 337 | |
| 338 | if null xs && (res <> Correct) |
| 339 | then failwith |
| 340 | "Impossible: How can diff be null and have not Correct in compare_c?"; |
| 341 | |
| 342 | res, xs |
| 343 | |
| 344 | |
| 345 | |
| 346 | |
| 347 | (*****************************************************************************) |
| 348 | |
| 349 | let compare_default = compare_token |
| 350 | |
| 351 | |
| 352 | let compare_result_to_string (correct, diffxs) = |
| 353 | match correct with |
| 354 | | Correct -> |
| 355 | "seems correct" ^ "\n" |
| 356 | | Pb s -> |
| 357 | ("seems incorrect: " ^ s) ^ "\n" ^ |
| 358 | "diff (result(-) vs expected_result(+)) = " ^ "\n" ^ |
| 359 | (diffxs +> Common.join "\n") ^ "\n" |
| 360 | | PbOnlyInNotParsedCorrectly s -> |
| 361 | "seems incorrect, but only because of code that was not parsable" ^ "\n"^ |
| 362 | ("explanation:" ^ s) ^ "\n" ^ |
| 363 | "diff (result(-) vs expected_result(+)) = " ^ "\n" ^ |
| 364 | (diffxs +> Common.join "\n") ^ "\n" |
| 365 | |
| 366 | |
| 367 | let compare_result_to_bool correct = |
| 368 | correct = Correct |