[bpt/coccinelle.git] / parsing_c / parsing_stat.ml

(* Yoann Padioleau
 * 
 * Copyright (C) 2008, 2009 University of Urbana Champaign
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License (GPL)
 * version 2 as published by the Free Software Foundation.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * file license.txt for more details.
 *)

open Common 

(* if do .mli:
val print_parsing_stat_list: parsing_stat list -> unit
*)

(*****************************************************************************)
(* Stat *)
(*****************************************************************************)
type parsing_stat = {
    filename: filename;
    mutable have_timeout: bool;

    mutable correct: int;  
    mutable bad: int;

    mutable commentized: int; (* by our cpp commentizer *)

    (* if want to know exactly what was passed through, uncomment:
     *  
     * mutable passing_through_lines: int;
     * 
     * it differs from bad by starting from the error to
     * the synchro point instead of starting from start of
     * function to end of function.
     *)

    mutable problematic_lines: 
      (string list (* ident in error line *) * int (* line_error *)) list;

  } 

let default_stat file =  { 
    filename = file;
    have_timeout = false;
    correct = 0; bad = 0;
    commentized = 0;
    problematic_lines = [];
  }

(* todo: stat per dir ?  give in terms of func_or_decl numbers:   
 * nbfunc_or_decl pbs / nbfunc_or_decl total ?/ 
 *
 * note: cela dit si y'a des fichiers avec des #ifdef dont on connait pas les 
 * valeurs alors on parsera correctement tout le fichier et pourtant y'aura 
 * aucune def  et donc aucune couverture en fait.   
 * ==> TODO evaluer les parties non parsé ? 
 *)

let print_parsing_stat_list ?(verbose=false) = fun statxs -> 
  let total = List.length statxs in
  let perfect = 
    statxs 
      +> List.filter (function 
          {have_timeout = false; bad = 0} -> true | _ -> false)
      +> List.length 
  in

  if verbose then begin
  pr "\n\n\n---------------------------------------------------------------";
  pr "pbs with files:";
  statxs 
    +> List.filter (function 
      | {have_timeout = true} -> true 
      | {bad = n} when n > 0 -> true 
      | _ -> false)
    +> List.iter (function 
        {filename = file; have_timeout = timeout; bad = n} -> 
          pr (file ^ "  " ^ (if timeout then "TIMEOUT" else i_to_s n));
        );

  pr "\n\n\n";
  pr "files with lots of tokens passed/commentized:";
  let threshold_passed = 100 in
  statxs 
    +> List.filter (function 
      | {commentized = n} when n > threshold_passed -> true
      | _ -> false)
    +> List.iter (function 
        {filename = file; commentized = n} -> 
          pr (file ^ "  " ^ (i_to_s n));
        );

  pr "\n\n\n---------------------------------------------------------------";
  end;

  pr (
  (sprintf "NB total files = %d; " total) ^
  (sprintf "perfect = %d; " perfect) ^
  (sprintf "pbs = %d; "     (statxs +> List.filter (function 
      {have_timeout = b; bad = n} when n > 0 -> true | _ -> false) 
                               +> List.length)) ^
  (sprintf "timeout = %d; " (statxs +> List.filter (function 
      {have_timeout = true; bad = n} -> true | _ -> false) 
                               +> List.length)) ^
  (sprintf "=========> %d" ((100 * perfect) / total)) ^ "%"
                                                          
  );
  let good = statxs +> List.fold_left (fun acc {correct = x} -> acc+x) 0 in
  let bad  = statxs +> List.fold_left (fun acc {bad = x} -> acc+x) 0  in
  let passed = statxs +> List.fold_left (fun acc {commentized = x} -> acc+x) 0
  in
  let gf, badf = float_of_int good, float_of_int bad in
  let passedf = float_of_int passed in
  pr (
  (sprintf "nb good = %d,  nb passed = %d " good passed) ^
  (sprintf "=========> %f"  (100.0 *. (passedf /. gf)) ^ "% passed")
   );
  pr (
  (sprintf "nb good = %d,  nb bad = %d " good bad) ^
  (sprintf "=========> %f"  (100.0 *. (gf /. (gf +. badf))) ^ "% good"
   )
  )

(*****************************************************************************)
(* Recurring error diagnostic *)
(*****************************************************************************)
(* asked/inspired by reviewer of CC'09 *)

let lines_around_error_line ~context (file, line) = 
  let arr = Common.cat_array file in
  
  let startl = max 0 (line - context) in
  let endl   = min (Array.length arr) (line + context) in
  let res = ref [] in 

  for i = startl to endl -1 do 
    Common.push2 arr.(i) res
  done;
  List.rev !res


let print_recurring_problematic_tokens xs = 
  let h = Hashtbl.create 101 in
  xs +> List.iter (fun x -> 
    let file = x.filename in 
    x.problematic_lines +> List.iter (fun (xs, line_error) -> 
      xs +> List.iter (fun s -> 
        Common.hupdate_default s
          (fun (old, example)  -> old + 1, example) 
          (fun() -> 0, (file, line_error)) h;
      )));
  pr2_xxxxxxxxxxxxxxxxx();
  pr2 ("maybe 10 most problematic tokens");
  pr2_xxxxxxxxxxxxxxxxx();
  Common.hash_to_list h
  +> List.sort (fun (k1,(v1,_)) (k2,(v2,_)) -> compare v2 v1) 
  +> Common.take_safe 10
  +> List.iter (fun (k,(i, (file_ex, line_ex))) -> 
    pr2 (spf "%s: present in %d parsing errors" k i);
    pr2 ("example: ");
    let lines = lines_around_error_line ~context:2 (file_ex, line_ex) in
    lines +> List.iter (fun s -> pr2 ("       " ^ s));
    
  );
  pr2_xxxxxxxxxxxxxxxxx();
  ()

  
(*****************************************************************************)
(* Stat *)
(*****************************************************************************)

(* Those variables were written for CC09, to evaluate the need for 
 * some of our heuristics and extensions.
 * 
 * coupling: if you add a new var, modify also assoc_stat_number below 
 *)

let nTypedefInfer = ref 0

let nIncludeGrammar = ref 0
let nIncludeHack = ref 0

let nIteratorGrammar = ref 0 
let nIteratorHeuristic = ref 0 

let nMacroTopDecl = ref 0
let nMacroStructDecl = ref 0
let nMacroDecl = ref 0
let nMacroStmt = ref 0
let nMacroString = ref 0
let nMacroHigherOrder = ref 0 (* actions *)
let nMacrohigherTypeGrammar = ref 0
let nMacroAttribute = ref 0

let nIfdefTop = ref 0
let nIfdefStmt = ref 0
let nIfdefStruct = ref 0
let nIfdefInitializer = ref 0
(* nIfdefExpr, nIfdefType *)

let nIfdefFunheader = ref 0

let nIfdefExprPassing = ref 0
let nIfdefPassing = ref 0

let nIncludePassing = ref 0
let nDefinePassing = ref 0

let nIfdefZero = ref 0
let nIfdefVersion = ref 0


let nGccTypeof = ref 0
let nGccLongLong = ref 0
let nGccAsm = ref 0
let nGccInline = ref 0
let nGccAttribute = ref 0
let nGccCaseRange = ref 0
let nGccMixDecl = ref 0
let nGccDesignator = ref 0
let nGccStmtExpr = ref 0
let nGccConstructor = ref 0
let nGccEmptyStruct = ref 0
let nGccNestedFunc = ref 0

let nGccMisc = ref 0


let nDefineHack = ref 0

let nDefineConstant = ref 0
let nDefineStmt = ref 0
let nDefineExpr = ref 0
(* both below require some heuristic support *)
let nDefineWhile0 = ref 0
let nDefineInit = ref 0

let nDefineOther = ref 0

let nUndef = ref 0
let nPragmaAndCo = ref 0

(* let nDirectiveTop = ref 0 *)
let nDirectiveStmt = ref 0
let nDirectiveStruct = ref 0
let nDirectiveInitializer = ref 0


(* from standard.h *)
let nMacroHint = ref 0
let nMacroExpand = ref 0

let nNotParsedCorrectly = ref 0

let assoc_stat_number = 
  [
    "nTypedefInfer", nTypedefInfer;

    "nIteratorHeuristic", nIteratorHeuristic;

    "nMacroTopDecl", nMacroTopDecl;
    "nMacroStructDecl", nMacroStructDecl;
    "nMacroDecl", nMacroDecl;
    "nMacroStmt", nMacroStmt;
    "nMacroString", nMacroString;
    "nMacroHigherOrder", nMacroHigherOrder;
    "nMacroAttribute", nMacroAttribute;

    "nMacrohigherTypeGrammar", nMacrohigherTypeGrammar;

    "nIfdefTop", nIfdefTop;
    "nIfdefStmt", nIfdefStmt;
    "nIfdefStruct", nIfdefStruct;
    "nIfdefInitializer", nIfdefInitializer;

    "nIfdefFunheader", nIfdefFunheader;
    "nIfdefZero", nIfdefZero;
    "nIfdefVersion", nIfdefVersion;
    "nIfdefExprPassing", nIfdefExprPassing;
    "nIfdefPassing", nIfdefPassing;

    "nIncludePassing", nIncludePassing;
    "nDefinePassing", nDefinePassing;

    "nMacroExpand", nMacroExpand;
    "nMacroHint", nMacroHint;


    "nGccTypeof", nGccTypeof;
    "nGccLongLong", nGccLongLong;
    "nGccAsm", nGccAsm;
    "nGccInline", nGccInline;
    "nGccAttribute", nGccAttribute;
    "nGccCaseRange", nGccCaseRange;
    "nGccMixDecl", nGccMixDecl;
    "nGccDesignator", nGccDesignator;
    "nGccStmtExpr", nGccStmtExpr;
    "nGccConstructor", nGccConstructor;
    "nGccEmptyStruct", nGccEmptyStruct;
    "nGccNestedFunc", nGccNestedFunc;

    "nGccMisc", nGccMisc;


    "nDefineHack", nDefineHack;

    "nDefineConstant", nDefineConstant;
    "nDefineStmt", nDefineStmt;
    "nDefineExpr", nDefineExpr;
    "nDefineInit", nDefineInit;
    "nDefineOther", nDefineOther;

    "nUndef", nUndef;
    "nPragmaAndCo", nPragmaAndCo;

    "nDirectiveStmt", nDirectiveStmt;
    "nDirectiveStruct", nDirectiveStruct;
    "nDirectiveInitializer", nDirectiveInitializer;

    "nNotParsedCorrectly", nNotParsedCorrectly;


    (* less *)
    "nIncludeGrammar", nIncludeGrammar;
    "nIncludeHack", nIncludeHack;

    "nIteratorGrammar", nIteratorGrammar;
  ]

let print_stat_numbers () = 
  assoc_stat_number +> List.iter (fun (k, vref) -> 
    pr2 (spf "%-30s -> %d" k !vref);
  )
Commit	Line	Data
0708f913 C	1	(* Yoann Padioleau
	2	*
	3	* Copyright (C) 2008, 2009 University of Urbana Champaign
	4	*
	5	* This program is free software; you can redistribute it and/or
	6	* modify it under the terms of the GNU General Public License (GPL)
	7	* version 2 as published by the Free Software Foundation.
	8	*
	9	* This program is distributed in the hope that it will be useful,
	10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	* file license.txt for more details.
	13	*)
	14
485bce71 C	15	open Common
	16
	17	(* if do .mli:
	18	val print_parsing_stat_list: parsing_stat list -> unit
	19	*)
	20
	21	(*****************************************************************************)
	22	(* Stat *)
	23	(*****************************************************************************)
	24	type parsing_stat = {
	25	filename: filename;
	26	mutable have_timeout: bool;
	27
	28	mutable correct: int;
	29	mutable bad: int;
	30
	31	mutable commentized: int; (* by our cpp commentizer *)
	32
	33	(* if want to know exactly what was passed through, uncomment:
	34	*
	35	* mutable passing_through_lines: int;
	36	*
	37	* it differs from bad by starting from the error to
	38	* the synchro point instead of starting from start of
	39	* function to end of function.
	40	*)
	41
91eba41f C	42	mutable problematic_lines:
	43	(string list (* ident in error line ) int (* line_error *)) list;
	44
485bce71 C	45	}
	46
	47	let default_stat file = {
	48	filename = file;
	49	have_timeout = false;
	50	correct = 0; bad = 0;
	51	commentized = 0;
91eba41f	52	problematic_lines = [];
485bce71 C	53	}
	54
	55	(* todo: stat per dir ? give in terms of func_or_decl numbers:
	56	* nbfunc_or_decl pbs / nbfunc_or_decl total ?/
	57	*
	58	* note: cela dit si y'a des fichiers avec des #ifdef dont on connait pas les
	59	* valeurs alors on parsera correctement tout le fichier et pourtant y'aura
	60	* aucune def et donc aucune couverture en fait.
	61	* ==> TODO evaluer les parties non parsé ?
	62	*)
	63
	64	let print_parsing_stat_list ?(verbose=false) = fun statxs ->
	65	let total = List.length statxs in
	66	let perfect =
	67	statxs
	68	+> List.filter (function
	69	{have_timeout = false; bad = 0} -> true \| _ -> false)
	70	+> List.length
	71	in
	72
	73	if verbose then begin
	74	pr "\n\n\n---------------------------------------------------------------";
	75	pr "pbs with files:";
	76	statxs
	77	+> List.filter (function
	78	\| {have_timeout = true} -> true
	79	\| {bad = n} when n > 0 -> true
	80	\| _ -> false)
	81	+> List.iter (function
	82	{filename = file; have_timeout = timeout; bad = n} ->
	83	pr (file ^ " " ^ (if timeout then "TIMEOUT" else i_to_s n));
	84	);
	85
	86	pr "\n\n\n";
	87	pr "files with lots of tokens passed/commentized:";
	88	let threshold_passed = 100 in
	89	statxs
	90	+> List.filter (function
	91	\| {commentized = n} when n > threshold_passed -> true
	92	\| _ -> false)
	93	+> List.iter (function
	94	{filename = file; commentized = n} ->
	95	pr (file ^ " " ^ (i_to_s n));
	96	);
	97
	98	pr "\n\n\n---------------------------------------------------------------";
	99	end;
	100
	101	pr (
	102	(sprintf "NB total files = %d; " total) ^
	103	(sprintf "perfect = %d; " perfect) ^
	104	(sprintf "pbs = %d; " (statxs +> List.filter (function
	105	{have_timeout = b; bad = n} when n > 0 -> true \| _ -> false)
	106	+> List.length)) ^
	107	(sprintf "timeout = %d; " (statxs +> List.filter (function
	108	{have_timeout = true; bad = n} -> true \| _ -> false)
	109	+> List.length)) ^
	110	(sprintf "=========> %d" ((100 * perfect) / total)) ^ "%"
	111
	112	);
	113	let good = statxs +> List.fold_left (fun acc {correct = x} -> acc+x) 0 in
	114	let bad = statxs +> List.fold_left (fun acc {bad = x} -> acc+x) 0 in
	115	let passed = statxs +> List.fold_left (fun acc {commentized = x} -> acc+x) 0
	116	in
117	let gf, badf = float_of_int good, float_of_int bad in
118	let passedf = float_of_int passed in
119	pr (
120	(sprintf "nb good = %d, nb passed = %d " good passed) ^
113803cf	121	(sprintf "=========> %f" (100.0 *. (passedf /. gf)) ^ "% passed")
485bce71 C	122	);
	123	pr (
	124	(sprintf "nb good = %d, nb bad = %d " good bad) ^
113803cf	125	(sprintf "=========> %f" (100.0 *. (gf /. (gf +. badf))) ^ "% good"
485bce71 C	126	)
	127	)
	128
91eba41f C	129	(*****************************************************************************)
	130	(* Recurring error diagnostic *)
	131	(*****************************************************************************)
	132	(* asked/inspired by reviewer of CC'09 *)
	133
	134	let lines_around_error_line ~context (file, line) =
	135	let arr = Common.cat_array file in
	136
	137	let startl = max 0 (line - context) in
	138	let endl = min (Array.length arr) (line + context) in
	139	let res = ref [] in
	140
113803cf	141	for i = startl to endl -1 do
91eba41f C	142	Common.push2 arr.(i) res
	143	done;
	144	List.rev !res
	145
	146
	147
	148	let print_recurring_problematic_tokens xs =
	149	let h = Hashtbl.create 101 in
	150	xs +> List.iter (fun x ->
	151	let file = x.filename in
	152	x.problematic_lines +> List.iter (fun (xs, line_error) ->
	153	xs +> List.iter (fun s ->
	154	Common.hupdate_default s
	155	(fun (old, example) -> old + 1, example)
	156	(fun() -> 0, (file, line_error)) h;
	157	)));
	158	pr2_xxxxxxxxxxxxxxxxx();
	159	pr2 ("maybe 10 most problematic tokens");
	160	pr2_xxxxxxxxxxxxxxxxx();
	161	Common.hash_to_list h
	162	+> List.sort (fun (k1,(v1,_)) (k2,(v2,_)) -> compare v2 v1)
	163	+> Common.take_safe 10
	164	+> List.iter (fun (k,(i, (file_ex, line_ex))) ->
	165	pr2 (spf "%s: present in %d parsing errors" k i);
	166	pr2 ("example: ");
	167	let lines = lines_around_error_line ~context:2 (file_ex, line_ex) in
	168	lines +> List.iter (fun s -> pr2 (" " ^ s));
	169
	170	);
	171	pr2_xxxxxxxxxxxxxxxxx();
	172	()
	173
	174
	175
	176
485bce71 C	177	(*****************************************************************************)
	178	(* Stat *)
	179	(*****************************************************************************)
	180
91eba41f C	181	(* Those variables were written for CC09, to evaluate the need for
	182	* some of our heuristics and extensions.
	183	*
	184	* coupling: if you add a new var, modify also assoc_stat_number below
	185	*)
485bce71 C	186
	187	let nTypedefInfer = ref 0
	188
	189	let nIncludeGrammar = ref 0
	190	let nIncludeHack = ref 0
	191
	192	let nIteratorGrammar = ref 0
	193	let nIteratorHeuristic = ref 0
	194
	195	let nMacroTopDecl = ref 0
	196	let nMacroStructDecl = ref 0
	197	let nMacroDecl = ref 0
	198	let nMacroStmt = ref 0
	199	let nMacroString = ref 0
	200	let nMacroHigherOrder = ref 0 (* actions *)
	201	let nMacrohigherTypeGrammar = ref 0
	202	let nMacroAttribute = ref 0
	203
	204	let nIfdefTop = ref 0
	205	let nIfdefStmt = ref 0
	206	let nIfdefStruct = ref 0
	207	let nIfdefInitializer = ref 0
	208	(* nIfdefExpr, nIfdefType *)
	209
	210	let nIfdefFunheader = ref 0
	211
	212	let nIfdefExprPassing = ref 0
	213	let nIfdefPassing = ref 0
	214
	215	let nIncludePassing = ref 0
	216	let nDefinePassing = ref 0
	217
	218	let nIfdefZero = ref 0
	219	let nIfdefVersion = ref 0
	220
	221
	222
	223	let nGccTypeof = ref 0
	224	let nGccLongLong = ref 0
	225	let nGccAsm = ref 0
	226	let nGccInline = ref 0
	227	let nGccAttribute = ref 0
	228	let nGccCaseRange = ref 0
	229	let nGccMixDecl = ref 0
	230	let nGccDesignator = ref 0
	231	let nGccStmtExpr = ref 0
	232	let nGccConstructor = ref 0
	233	let nGccEmptyStruct = ref 0
	234	let nGccNestedFunc = ref 0
	235
	236	let nGccMisc = ref 0
	237
	238
	239
	240	let nDefineHack = ref 0
	241
	242	let nDefineConstant = ref 0
	243	let nDefineStmt = ref 0
	244	let nDefineExpr = ref 0
	245	(* both below require some heuristic support *)
	246	let nDefineWhile0 = ref 0
	247	let nDefineInit = ref 0
	248
	249	let nDefineOther = ref 0
250
251	let nUndef = ref 0
252	let nPragmaAndCo = ref 0
253
254	(* let nDirectiveTop = ref 0 *)
255	let nDirectiveStmt = ref 0
256	let nDirectiveStruct = ref 0
257	let nDirectiveInitializer = ref 0
258
259
260	(* from standard.h *)
261	let nMacroHint = ref 0
262	let nMacroExpand = ref 0
263
264	let nNotParsedCorrectly = ref 0
265
266	let assoc_stat_number =
267	[
268	"nTypedefInfer", nTypedefInfer;
269
270	"nIteratorHeuristic", nIteratorHeuristic;
271
272	"nMacroTopDecl", nMacroTopDecl;
273	"nMacroStructDecl", nMacroStructDecl;
274	"nMacroDecl", nMacroDecl;
275	"nMacroStmt", nMacroStmt;
276	"nMacroString", nMacroString;
277	"nMacroHigherOrder", nMacroHigherOrder;
278	"nMacroAttribute", nMacroAttribute;
279
280	"nMacrohigherTypeGrammar", nMacrohigherTypeGrammar;
281
282	"nIfdefTop", nIfdefTop;
283	"nIfdefStmt", nIfdefStmt;
284	"nIfdefStruct", nIfdefStruct;
285	"nIfdefInitializer", nIfdefInitializer;
286
287	"nIfdefFunheader", nIfdefFunheader;
288	"nIfdefZero", nIfdefZero;
289	"nIfdefVersion", nIfdefVersion;
290	"nIfdefExprPassing", nIfdefExprPassing;
291	"nIfdefPassing", nIfdefPassing;
292
293	"nIncludePassing", nIncludePassing;
294	"nDefinePassing", nDefinePassing;
295
296	"nMacroExpand", nMacroExpand;
297	"nMacroHint", nMacroHint;
298
299
300	"nGccTypeof", nGccTypeof;
301	"nGccLongLong", nGccLongLong;
302	"nGccAsm", nGccAsm;
303	"nGccInline", nGccInline;
304	"nGccAttribute", nGccAttribute;
305	"nGccCaseRange", nGccCaseRange;
306	"nGccMixDecl", nGccMixDecl;
307	"nGccDesignator", nGccDesignator;
308	"nGccStmtExpr", nGccStmtExpr;
309	"nGccConstructor", nGccConstructor;
310	"nGccEmptyStruct", nGccEmptyStruct;
311	"nGccNestedFunc", nGccNestedFunc;
312
313	"nGccMisc", nGccMisc;
314
315
316	"nDefineHack", nDefineHack;
317
318	"nDefineConstant", nDefineConstant;
319	"nDefineStmt", nDefineStmt;
320	"nDefineExpr", nDefineExpr;
321	"nDefineInit", nDefineInit;
322	"nDefineOther", nDefineOther;
323
324	"nUndef", nUndef;
325	"nPragmaAndCo", nPragmaAndCo;
326
327	"nDirectiveStmt", nDirectiveStmt;
328	"nDirectiveStruct", nDirectiveStruct;
329	"nDirectiveInitializer", nDirectiveInitializer;
330
331	"nNotParsedCorrectly", nNotParsedCorrectly;
332
333
334	(* less *)
335	"nIncludeGrammar", nIncludeGrammar;
336	"nIncludeHack", nIncludeHack;
337
338	"nIteratorGrammar", nIteratorGrammar;
339	]
340
341	let print_stat_numbers () =
342	assoc_stat_number +> List.iter (fun (k, vref) ->
343	pr2 (spf "%-30s -> %d" k !vref);
344	)