permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / compare_c.ml
CommitLineData
0708f913 1(* Yoann Padioleau
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913
C
4 * Copyright (C) 2006, 2007 Ecole des Mines de Nantes
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
0708f913
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
34e49164
C
15open Common
16
17open Ast_c
18
19
ae4735db
C
20type compare_result =
21 | Correct
34e49164
C
22 | Pb of string
23 | PbOnlyInNotParsedCorrectly of string
24
25
26(*****************************************************************************)
27(* Normalise before comparing *)
28(*****************************************************************************)
29
30(* List taken from CVS manual, 'Keyword substitution' chapter. Note
31 * that I do not put "Log" because it is used only in comment, and it
32 * is not enough to substituate until the end of the line. *)
33let cvs_keyword_list = [
34 "Id";"Date"; "Revision"; (* the common one *)
951c7801
C
35 "FreeBSD";"Heimdal";"KAME";"NetBSD";"OpenBSD";"OpenLDAP";"RuOBSD";
36 "SourceForge";
34e49164
C
37 "Name";"Author";"CVSHeader";"Header";"Locker";"RCSfile";"Source";"State";
38 "Rev";
39]
40
41(* Can also have just dollarIDdollar but it is only when you have not
42 * yet committed the file. After the commit it would be a dollarIddollar:.
43 * If reput Id:, do not join the regexp!! otherwise CVS will modify it :)
44 *)
ae4735db 45let cvs_keyword_regexp = Str.regexp
34e49164
C
46 ("\\$\\([A-Za-z_]+\\):[^\\$]*\\$")
47
48
ae4735db
C
49let cvs_compute_newstr s =
50 Str.global_substitute cvs_keyword_regexp (fun _s ->
34e49164
C
51 let substr = Str.matched_string s in
52 assert (substr ==~ cvs_keyword_regexp); (* use its side-effect *)
53 let tag = matched1 substr in
54
55 if not (List.mem tag cvs_keyword_list)
978fd7e5 56 then pr2_once ("unknown CVS keyword: " ^ tag);
ae4735db
C
57
58 "CVS_MAGIC_STRING"
59 ) s
34e49164
C
60
61
62
63
64(* todo: get rid of the type for expressions ? *)
ae4735db
C
65let normal_form_program xs =
66 let bigf = { Visitor_c.default_visitor_c_s with
34e49164 67
ae4735db 68 Visitor_c.kini_s = (fun (k,bigf) ini ->
34e49164 69 match ini with
ae4735db 70 | InitList xs, [i1;i2;iicommaopt] ->
34e49164
C
71 k (InitList xs, [i1;i2])
72 | _ -> k ini
73 );
ae4735db 74 Visitor_c.kexpr_s = (fun (k,bigf) e ->
34e49164
C
75 match e with
76 (* todo: should also do something for multistrings *)
708f4980 77 | (Constant (String (s,kind)), typ), [ii]
ae4735db 78 when Common.string_match_substring cvs_keyword_regexp s ->
34e49164
C
79 let newstr = cvs_compute_newstr s in
80 (Constant (String (newstr,kind)), typ), [rewrap_str newstr ii]
ae4735db 81 | _ -> k e
34e49164
C
82
83 );
ae4735db 84 Visitor_c.ktoplevel_s = (fun (k,bigf) p ->
34e49164 85 match p with
ae4735db 86 | CppTop (Define _) ->
34e49164
C
87 raise Todo
88 (*
89 let (i1, i2, i3) = Common.tuple_of_list3 ii in
90 if Common.string_match_substring cvs_keyword_regexp body
ae4735db 91 then
34e49164
C
92 let newstr = cvs_compute_newstr body in
93 Define ((s, newstr), [i1;i2;rewrap_str newstr i3])
94 else p
95 *)
96 | _ -> k p
97 );
98
99(*
ae4735db 100 Visitor_c.kinfo_s = (fun (k,bigf) i ->
34e49164
C
101 let s = Ast_c.get_str_of_info i in
102 if Common.string_match_substring cvs_keyword_regexp s
ae4735db 103 then
34e49164
C
104 let newstr = cvs_compute_newstr s in
105 rewrap_str newstr i
106 else i
107 );
108*)
109
110 }
111 in
112 xs +> List.map (fun p -> Visitor_c.vk_toplevel_s bigf p)
113
114
115
116
117
118
ae4735db
C
119let normal_form_token adjust_cvs x =
120 let x' =
121 match x with
34e49164
C
122 | Parser_c.TString ((s, kind),i1) -> Parser_c.TString (("",kind), i1)
123 | x -> x
124 in
ae4735db 125 x' +> Token_helpers.visitor_info_of_tok (fun info ->
34e49164
C
126 let info = Ast_c.al_info 0 info in
127 let str = Ast_c.str_of_info info in
951c7801 128 if adjust_cvs && Common.string_match_substring cvs_keyword_regexp str
ae4735db 129 then
34e49164
C
130 let newstr = cvs_compute_newstr str in
131 rewrap_str newstr info
132 else info
133 )
134
ae4735db 135
34e49164
C
136(*****************************************************************************)
137(* Compare at Ast level *)
138(*****************************************************************************)
139
140(* Note that I do a (simple) astdiff to know if there is a difference, but
141 * then I use diff to print the differences. So sometimes you have to dig
ae4735db 142 * a little to find really where the real difference (one not involving
34e49164
C
143 * just spacing difference) was.
144 * Note also that the astdiff is not very accurate. As I skip comments,
145 * macro definitions, those are not in the Ast and if there is a diff
146 * between 2 files regarding macro def, then I will not be able to report it :(
147 * update: I now put the toplevel #define at least in the Ast.
148 * update: You can use token_compare for more precise diff.
149 *
150 * todo?: finer grain astdiff, better report, more precise.
ae4735db 151 *
34e49164
C
152 * todo: do iso between if() S and if() { S }
153 *)
154let compare_ast filename1 filename2 =
155
156 let xs =
157 match !Flag_parsing_c.diff_lines with
158 None ->
159 Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2)
ae4735db 160 | Some n ->
34e49164
C
161 Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in
162
163 (* get rid of the --- and +++ lines *)
ae4735db
C
164 let xs =
165 if null xs
166 then xs
34e49164
C
167 else Common.drop 2 xs
168 in
169
170
ae4735db 171 let process_filename filename =
978fd7e5 172 let (c, _stat) = Parse_c.parse_c_and_cpp filename in
34e49164
C
173 let c = List.map fst c in
174 c +> Lib_parsing_c.al_program +> normal_form_program
175 in
176
177 let c1 = process_filename filename1 in
178 let c2 = process_filename filename2 in
ae4735db 179
34e49164
C
180 let error = ref 0 in
181 let pb_notparsed = ref 0 in
ae4735db
C
182
183 let res =
184 if List.length c1 <> List.length c2
34e49164 185 then Pb "not same number of entities (func, decl, ...)"
ae4735db 186 else
34e49164 187 begin
1b9ae606 188 let rec check = function
34e49164
C
189 | Declaration a, Declaration b -> if not (a =*= b) then incr error
190 | Definition a, Definition b -> if not (a =*= b) then incr error
191 | EmptyDef a, EmptyDef b -> if not (a =*= b) then incr error
ae4735db 192 | MacroTop (a1,b1,c1), MacroTop (a2,b2,c2) ->
34e49164 193 if not ((a1,b1,c1) =*= (a2,b2,c2)) then incr error
ae4735db 194 | CppTop (Include {i_include = a}), CppTop (Include {i_include = b}) ->
485bce71 195 if not (a =*= b) then incr error
ae4735db 196 | CppTop Define _, CppTop Define _ ->
34e49164
C
197 raise Todo
198 (* if not (a =*= b) then incr error *)
ae4735db 199 | NotParsedCorrectly a, NotParsedCorrectly b ->
34e49164 200 if not (a =*= b) then incr pb_notparsed
ae4735db 201 | NotParsedCorrectly a, _ ->
34e49164
C
202 (* Pb only in generated file *)
203 incr error;
204
ae4735db 205 | _, NotParsedCorrectly b ->
34e49164
C
206 incr pb_notparsed
207 | FinalDef a, FinalDef b -> if not (a =*= b) then incr error
485bce71
C
208
209 | IfdefTop a, IfdefTop b -> if not (a =*= b) then incr error
210
1b9ae606
C
211 | Namespace (tlsa, iia), Namespace (tlsb, iib) ->
212 if not (iia =*= iib) then incr error;
213 zip tlsa tlsb +> List.iter check
485bce71
C
214 | (FinalDef _|EmptyDef _|
215 MacroTop (_, _, _)|IfdefTop _|
1b9ae606
C
216 CppTop _|Definition _|Declaration _|Namespace _), _ -> incr error
217 in
218 zip c1 c2 +> List.iter check;
34e49164 219 (match () with
ae4735db 220 | _ when !pb_notparsed > 0 && !error =|= 0 ->
34e49164
C
221 PbOnlyInNotParsedCorrectly ""
222 | _ when !error > 0 -> Pb ""
223 | _ -> Correct
224 )
225 end
226 in
227 res, xs
228
229
230
231(*****************************************************************************)
232(* Compare at token level *)
233(*****************************************************************************)
234
235(* Because I now commentize more in parsing, with parsing_hacks,
236 * compare_ast may say that 2 programs are equal whereas they are not.
237 * Here I compare token, and so have still the TCommentCpp and TCommentMisc
238 * so at least detect such differences.
ae4735db 239 *
34e49164
C
240 * Morover compare_ast is not very precise in his report when it
241 * detects a difference. So token_diff is better.
ae4735db 242 *
34e49164 243 * I do token_diff but I use programCelement2, so that
ae4735db 244 * I know if I am in a "notparsable" zone. The tokens are
34e49164 245 * in (snd programCelement2).
ae4735db
C
246 *
247 * Faire aussi un compare_token qui se moque des TCommentMisc,
248 * TCommentCPP et TIfdef ? Normalement si fait ca retrouvera
34e49164 249 * les meme resultats que compare_ast.
ae4735db 250 *
34e49164
C
251 *)
252
253
254(* Pass only "true" comments, dont pass TCommentMisc and TCommentCpp *)
5636bb2c 255let is_normal_space_or_comment to_expected = function
34e49164
C
256 | Parser_c.TCommentSpace _
257 | Parser_c.TCommentNewline _
258
259(* | Parser_c.TComma _ *) (* UGLY, because of gcc_opt_comma isomorphism *)
260 -> true
5636bb2c 261 | Parser_c.TComment _ -> to_expected (* only ignore in compare to expected *)
34e49164
C
262 | _ -> false
263
264
ae4735db
C
265(* convetion: compare_token generated_file expected_res
266 * because when there is a notparsablezone in generated_file, I
34e49164
C
267 * don't issue a PbOnlyInNotParsedCorrectly
268 *)
5636bb2c 269let do_compare_token adjust_cvs to_expected filename1 filename2 =
34e49164 270
ae4735db 271 let rec loop xs ys =
34e49164
C
272 match xs, ys with
273 | [], [] -> None
274
275 (* UGLY, because of gcc_opt_comma isomorphism *)
276 | (Parser_c.TComma _::Parser_c.TCBrace _::xs), (Parser_c.TCBrace _::ys) ->
277 loop xs ys
ae4735db 278 | (Parser_c.TCBrace _::xs), (Parser_c.TComma _::Parser_c.TCBrace _::ys) ->
34e49164
C
279 loop xs ys
280
ae4735db 281 | [], x::xs ->
34e49164 282 Some "not same number of tokens inside C elements"
ae4735db 283 | x::xs, [] ->
34e49164 284 Some "not same number of tokens inside C elements"
ae4735db
C
285
286 | x::xs, y::ys ->
951c7801
C
287 let x' = normal_form_token adjust_cvs x in
288 let y' = normal_form_token adjust_cvs y in
ae4735db 289 if x' =*= y'
34e49164 290 then loop xs ys
ae4735db
C
291 else
292 let str1, pos1 =
34e49164 293 Token_helpers.str_of_tok x, Token_helpers.pos_of_tok x in
ae4735db 294 let str2, pos2 =
34e49164
C
295 Token_helpers.str_of_tok y, Token_helpers.pos_of_tok y in
296 Some ("diff token: " ^ str1 ^" VS " ^ str2 ^ "\n" ^
297 Common.error_message filename1 (str1, pos1) ^ "\n" ^
298 Common.error_message filename2 (str2, pos2) ^ "\n"
299 )
ae4735db 300
34e49164 301 in
ae4735db 302 let final_loop xs ys =
34e49164 303 loop
5636bb2c
C
304 (xs +>
305 List.filter (fun x -> not (is_normal_space_or_comment to_expected x)))
306 (ys +>
307 List.filter (fun x -> not (is_normal_space_or_comment to_expected x)))
34e49164 308 in
ae4735db 309
34e49164
C
310 (*
311 let toks1 = Parse_c.tokens filename1 in
312 let toks2 = Parse_c.tokens filename2 in
313 loop toks1 toks2 in
314 *)
315
978fd7e5
C
316 let (c1, _stat) = Parse_c.parse_c_and_cpp filename1 in
317 let (c2, _stat) = Parse_c.parse_c_and_cpp filename2 in
34e49164 318
ae4735db
C
319 let res =
320 if List.length c1 <> List.length c2
34e49164 321 then Pb "not same number of entities (func, decl, ...)"
ae4735db
C
322 else
323 zip c1 c2 +> Common.fold_k (fun acc ((a,infoa),(b,infob)) k ->
34e49164 324 match a, b with
ae4735db 325 | NotParsedCorrectly a, NotParsedCorrectly b ->
34e49164
C
326 (match final_loop (snd infoa) (snd infob) with
327 | None -> k acc
328 | Some s -> PbOnlyInNotParsedCorrectly s
329 )
ae4735db
C
330
331 | NotParsedCorrectly a, _ ->
34e49164 332 Pb "PB parsing only in generated-file"
ae4735db 333 | _, NotParsedCorrectly b ->
34e49164 334 PbOnlyInNotParsedCorrectly "PB parsing only in expected-file"
ae4735db 335 | _, _ ->
34e49164
C
336 (match final_loop (snd infoa) (snd infob) with
337 | None -> k acc
338 | Some s -> Pb s
339 )
340 ) (fun acc -> acc)
341 (Correct)
342 in
343
344 let xs =
345 match !Flag_parsing_c.diff_lines with
346 None ->
347 Common.cmd_to_list ("diff -u -b -B "^filename1^ " " ^ filename2)
ae4735db 348 | Some n ->
34e49164
C
349 Common.cmd_to_list ("diff -U "^n^" -b -B "^filename1^" "^filename2) in
350
351 (* get rid of the --- and +++ lines *)
ae4735db
C
352 let xs =
353 if null xs
354 then xs
34e49164
C
355 else Common.drop 2 xs
356 in
357
ae4735db
C
358 if null xs && (res <> Correct)
359 then failwith
34e49164
C
360 "Impossible: How can diff be null and have not Correct in compare_c?";
361
362 res, xs
363
5636bb2c 364let compare_token = do_compare_token true true
34e49164
C
365
366
367(*****************************************************************************)
368
951c7801 369(* compare to a res file *)
5636bb2c 370let compare_default = do_compare_token true true
951c7801
C
371
372(* compare to the source of the transformation *)
5636bb2c 373let compare_to_original = do_compare_token false false
34e49164
C
374
375
376let compare_result_to_string (correct, diffxs) =
377 match correct with
ae4735db 378 | Correct ->
34e49164 379 "seems correct" ^ "\n"
ae4735db 380 | Pb s ->
34e49164
C
381 ("seems incorrect: " ^ s) ^ "\n" ^
382 "diff (result(-) vs expected_result(+)) = " ^ "\n" ^
383 (diffxs +> Common.join "\n") ^ "\n"
ae4735db 384 | PbOnlyInNotParsedCorrectly s ->
34e49164
C
385 "seems incorrect, but only because of code that was not parsable" ^ "\n"^
386 ("explanation:" ^ s) ^ "\n" ^
387 "diff (result(-) vs expected_result(+)) = " ^ "\n" ^
388 (diffxs +> Common.join "\n") ^ "\n"
389
390
ae4735db 391let compare_result_to_bool correct =
b1b2de81 392 correct =*= Correct