HCoop Git - bpt/coccinelle.git/blame_incremental

... / ...

Commit	Line	Data
	1	(* Copyright (C) 2006, 2007, 2008 Yoann Padioleau
	2	*
	3	* This program is free software; you can redistribute it and/or
	4	* modify it under the terms of the GNU General Public License (GPL)
	5	* version 2 as published by the Free Software Foundation.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* file license.txt for more details.
	11	*)
	12
	13	open Common
	14
	15	module TH = Token_helpers
	16	module LP = Lexer_parser
	17
	18	module Stat = Parsing_stat
	19
	20	(*****************************************************************************)
	21	(* Wrappers *)
	22	(*****************************************************************************)
	23	let pr2 s =
	24	if !Flag_parsing_c.verbose_parsing
	25	then Common.pr2 s
	26
	27	let pr2_once s =
	28	if !Flag_parsing_c.verbose_parsing
	29	then Common.pr2_once s
	30
	31	(*****************************************************************************)
	32	(* Helpers *)
	33	(*****************************************************************************)
	34
	35	let lexbuf_to_strpos lexbuf =
	36	(Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf)
	37
	38	let token_to_strpos tok =
	39	(TH.str_of_tok tok, TH.pos_of_tok tok)
	40
	41
	42	let error_msg_tok tok =
	43	let file = TH.file_of_tok tok in
	44	if !Flag_parsing_c.verbose_parsing
	45	then Common.error_message file (token_to_strpos tok)
	46	else ("error in " ^ file ^ "set verbose_parsing for more info")
	47
	48
	49	let print_bad line_error (start_line, end_line) filelines =
	50	begin
	51	pr2 ("badcount: " ^ i_to_s (end_line - start_line));
	52
	53	for i = start_line to end_line do
	54	let line = filelines.(i) in
	55
	56	if i = line_error
	57	then pr2 ("BAD:!!!!!" ^ " " ^ line)
	58	else pr2 ("bad:" ^ " " ^ line)
	59	done
	60	end
	61
	62
	63
	64	let mk_info_item2 filename toks =
	65	let buf = Buffer.create 100 in
	66	let s =
	67	(* old: get_slice_file filename (line1, line2) *)
	68	begin
	69	toks +> List.iter (fun tok ->
	70	match TH.pinfo_of_tok tok with
	71	\| Ast_c.OriginTok _ -> Buffer.add_string buf (TH.str_of_tok tok)
	72	\| Ast_c.AbstractLineTok _ -> raise Impossible
	73	\| _ -> ()
	74	);
	75	Buffer.contents buf
	76	end
	77	in
	78	(s, toks)
	79
	80	let mk_info_item a b =
	81	Common.profile_code "C parsing.mk_info_item"
	82	(fun () -> mk_info_item2 a b)
	83
	84
	85
	86
	87	(*****************************************************************************)
	88	(* Stats on what was passed/commentized *)
	89	(*****************************************************************************)
	90
	91	let commentized xs = xs +> Common.map_filter (function
	92	\| Parser_c.TCommentCpp (cppkind, ii) ->
	93	let s = Ast_c.str_of_info ii in
	94	let legal_passing =
	95	match !Flag_parsing_c.filter_passed_level with
	96	\| 0 -> false
	97	\| 1 ->
	98	List.mem cppkind [Ast_c.CppAttr]
	99	\|\|
	100	(s =~ "__.*")
	101	\| 2 ->
	102	List.mem cppkind [Ast_c.CppAttr;Ast_c.CppPassingNormal]
	103	\|\|
	104	(s =~ "__.*")
	105	\| 3 ->
	106	List.mem cppkind [Ast_c.CppAttr;Ast_c.CppPassingNormal;Ast_c.CppDirective]
	107	\|\|
	108	(s =~ "__.*")
	109	\| 4 ->
	110	List.mem cppkind [Ast_c.CppAttr;Ast_c.CppPassingNormal;Ast_c.CppMacro]
	111	\|\|
	112	(s =~ "__.*")
	113
	114
	115	\| 5 ->
	116	List.mem cppkind [Ast_c.CppAttr;Ast_c.CppPassingNormal;Ast_c.CppDirective;Ast_c.CppMacro]
	117	\|\|
	118	(s =~ "__.*")
	119
	120
	121
	122
	123	\| _ -> failwith "not valid level passing number"
	124	in
	125	if legal_passing then None else Some (ii.Ast_c.pinfo)
	126
	127	(*
	128	\| Ast_c.CppOther ->
	129	(match s with
	130	\| s when s =~ "KERN_.*" -> None
	131	\| s when s =~ "__.*" -> None
	132	\| _ ->
	133	Some (ii.Ast_c.pinfo)
	134	)
	135	*)
	136
	137
	138	\| Parser_c.TCommentMisc ii
	139	\| Parser_c.TAction ii
	140	->
	141	Some (ii.Ast_c.pinfo)
	142	\| _ ->
	143	None
	144	)
	145
	146	let count_lines_commentized xs =
	147	let line = ref (-1) in
	148	let count = ref 0 in
	149	begin
	150	commentized xs +>
	151	List.iter
	152	(function
	153	Ast_c.OriginTok pinfo \| Ast_c.ExpandedTok (_,(pinfo,_)) ->
	154	let newline = pinfo.Common.line in
	155	if newline <> !line
	156	then begin
	157	line := newline;
	158	incr count
	159	end
	160	\| _ -> ());
	161	!count
	162	end
	163
	164
	165
	166	let print_commentized xs =
	167	let line = ref (-1) in
	168	begin
	169	let ys = commentized xs in
	170	ys +>
	171	List.iter
	172	(function
	173	Ast_c.OriginTok pinfo \| Ast_c.ExpandedTok (_,(pinfo,_)) ->
	174	let newline = pinfo.Common.line in
	175	let s = pinfo.Common.str in
	176	let s = Str.global_substitute
	177	(Str.regexp "\n") (fun s -> "") s
	178	in
	179	if newline = !line
	180	then prerr_string (s ^ " ")
	181	else begin
	182	if !line = -1
	183	then pr2_no_nl "passed:"
	184	else pr2_no_nl "\npassed:";
	185	line := newline;
	186	pr2_no_nl (s ^ " ");
	187	end
	188	\| _ -> ());
	189	if not (null ys) then pr2 "";
	190	end
	191
	192
	193
	194
	195	(*****************************************************************************)
	196	(* Lexing only *)
	197	(*****************************************************************************)
	198
	199	(* called by parse_print_error_heuristic *)
	200	let tokens2 file =
	201	let table = Common.full_charpos_to_pos file in
	202
	203	Common.with_open_infile file (fun chan ->
	204	let lexbuf = Lexing.from_channel chan in
	205	try
	206	let rec tokens_aux acc =
	207	let tok = Lexer_c.token lexbuf in
	208	(* fill in the line and col information *)
	209	let tok = tok +> TH.visitor_info_of_tok (fun ii ->
	210	{ ii with Ast_c.pinfo=
	211	(* could assert pinfo.filename = file ? *)
	212	match Ast_c.pinfo_of_info ii with
	213	Ast_c.OriginTok pi ->
	214	Ast_c.OriginTok (Common.complete_parse_info file table pi)
	215	\| Ast_c.ExpandedTok (pi,vpi) ->
	216	Ast_c.ExpandedTok((Common.complete_parse_info file table pi),vpi)
	217	\| Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi)
	218	\| Ast_c.AbstractLineTok pi -> failwith "should not occur"
	219	})
	220	in
	221
	222	if TH.is_eof tok
	223	then List.rev (tok::acc)
	224	else tokens_aux (tok::acc)
	225	in
	226	tokens_aux []
	227	with
	228	\| Lexer_c.Lexical s ->
	229	failwith ("lexical error " ^ s ^ "\n =" ^
	230	(Common.error_message file (lexbuf_to_strpos lexbuf)))
	231	\| e -> raise e
	232	)
	233
	234	let time_lexing ?(profile=true) a =
	235	if profile
	236	then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a)
	237	else tokens2 a
	238	let tokens ?profile a =
	239	Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a)
	240
	241
	242	let tokens_of_string string =
	243	let lexbuf = Lexing.from_string string in
	244	try
	245	let rec tokens_s_aux () =
	246	let tok = Lexer_c.token lexbuf in
	247	if TH.is_eof tok
	248	then [tok]
	249	else tok::(tokens_s_aux ())
	250	in
	251	tokens_s_aux ()
	252	with
	253	\| Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" )
	254	\| e -> raise e
	255
	256
	257	(*****************************************************************************)
	258	(* Parsing, but very basic, no more used *)
	259	(*****************************************************************************)
	260
	261	(*
	262	* !!!Those function use refs, and are not reentrant !!! so take care.
	263	* It use globals defined in Lexer_parser.
	264	*
	265	* update: because now lexer return comments tokens, those functions
	266	* may not work anymore.
	267	*)
	268
	269	let parse file =
	270	let lexbuf = Lexing.from_channel (open_in file) in
	271	let result = Parser_c.main Lexer_c.token lexbuf in
	272	result
	273
	274
	275	let parse_print_error file =
	276	let chan = (open_in file) in
	277	let lexbuf = Lexing.from_channel chan in
	278
	279	let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in
	280	try
	281	lexbuf +> Parser_c.main Lexer_c.token
	282	with
	283	\| Lexer_c.Lexical s ->
	284	failwith ("lexical error " ^s^ "\n =" ^ error_msg ())
	285	\| Parsing.Parse_error ->
	286	failwith ("parse error \n = " ^ error_msg ())
	287	\| Semantic_c.Semantic (s, i) ->
	288	failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ())
	289	\| e -> raise e
	290
	291
	292
	293
	294	(*****************************************************************************)
	295	(* Parsing subelements, useful to debug parser *)
	296	(*****************************************************************************)
	297
	298	(*
	299	* !!!Those function use refs, and are not reentrant !!! so take care.
	300	* It use globals defined in Lexer_parser.
	301	*)
	302
	303
	304	(* old:
	305	* let parse_gen parsefunc s =
	306	* let lexbuf = Lexing.from_string s in
	307	* let result = parsefunc Lexer_c.token lexbuf in
	308	* result
	309	*)
	310
	311	let parse_gen parsefunc s =
	312	let toks = tokens_of_string s +> List.filter TH.is_not_comment in
	313
	314
	315	(* Why use this lexing scheme ? Why not classically give lexer func
	316	* to parser ? Because I now keep comments in lexer. Could
	317	* just do a simple wrapper that when comment ask again for a token,
	318	* but maybe simpler to use cur_tok technique.
	319	*)
	320	let all_tokens = ref toks in
	321	let cur_tok = ref (List.hd !all_tokens) in
	322
	323	let lexer_function =
	324	(fun _ ->
	325	if TH.is_eof !cur_tok
	326	then (pr2 "LEXER: ALREADY AT END"; !cur_tok)
	327	else
	328	let v = Common.pop2 all_tokens in
	329	cur_tok := v;
	330	!cur_tok
	331	)
	332	in
	333	let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
	334	let result = parsefunc lexer_function lexbuf_fake in
	335	result
	336
	337
	338	let type_of_string = parse_gen Parser_c.type_name
	339	let statement_of_string = parse_gen Parser_c.statement
	340	let expression_of_string = parse_gen Parser_c.expr
	341
	342	(* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
	343
	344
	345
	346
	347
	348	(*****************************************************************************)
	349	(* Consistency checking *)
	350	(*****************************************************************************)
	351
	352	type class_ident =
	353	\| CIdent (* can be var, func, field, tag, enum constant *)
	354	\| CTypedef
	355
	356	let str_of_class_ident = function
	357	\| CIdent -> "Ident"
	358	\| CTypedef -> "Typedef"
	359
	360	(*
	361	\| CMacro
	362	\| CMacroString
	363	\| CMacroStmt
	364	\| CMacroDecl
	365	\| CMacroIterator
	366	\| CAttr
	367
	368	(* but take care that must still be able to use '=' *)
	369	type context = InFunction \| InEnum \| InStruct \| InInitializer \| InParams
	370	type class_token =
	371	\| CIdent of class_ident
	372
	373	\| CComment
	374	\| CSpace
	375	\| CCommentCpp of cppkind
	376	\| CCommentMisc
	377	\| CCppDirective
	378
	379	\| COPar
	380	\| CCPar
	381	\| COBrace
	382	\| CCBrace
	383
	384	\| CSymbol
	385	\| CReservedKwd (type \| decl \| qualif \| flow \| misc \| attr)
	386	*)
	387
	388	(* parse_typedef_fix4 *)
	389	let consistency_checking2 xs =
	390
	391	(* first phase, gather data *)
	392	let stat = Hashtbl.create 101 in
	393
	394	(* default value for hash *)
	395	let v1 () = Hashtbl.create 101 in
	396	let v2 () = ref 0 in
	397
	398	let bigf = { Visitor_c.default_visitor_c with
	399
	400	Visitor_c.kexpr = (fun (k,bigf) x ->
	401	match Ast_c.unwrap_expr x with
	402	\| Ast_c.Ident s ->
	403	stat +>
	404	Common.hfind_default s v1 +> Common.hfind_default CIdent v2 +>
	405	(fun aref -> incr aref)
	406
	407	\| _ -> k x
	408	);
	409	Visitor_c.ktype = (fun (k,bigf) t ->
	410	match Ast_c.unwrap_typeC t with
	411	\| Ast_c.TypeName (s,_typ) ->
	412	stat +>
	413	Common.hfind_default s v1 +> Common.hfind_default CTypedef v2 +>
	414	(fun aref -> incr aref)
	415
	416	\| _ -> k t
	417	);
	418	}
	419	in
	420	xs +> List.iter (fun (p, info_item) -> Visitor_c.vk_toplevel bigf p);
	421
	422
	423	let ident_to_type = ref [] in
	424
	425
	426	(* second phase, analyze data *)
	427	stat +> Hashtbl.iter (fun k v ->
	428	let xs = Common.hash_to_list v in
	429	if List.length xs >= 2
	430	then begin
	431	pr2 ("CONFLICT:" ^ k);
	432	let sorted = xs +> List.sort (fun (ka,va) (kb,vb) ->
	433	if !va = !vb then
	434	(match ka, kb with
	435	\| CTypedef, _ -> 1 (* first is smaller *)
	436	\| _, CTypedef -> -1
	437	\| _ -> 0
	438	)
	439	else compare !va !vb
	440	) in
	441	let sorted = List.rev sorted in
	442	match sorted with
	443	\| [CTypedef, i1;CIdent, i2] ->
	444	pr2 ("transforming some ident in typedef");
	445	push2 k ident_to_type;
	446	\| _ ->
	447	pr2 ("TODO:other transforming?");
	448
	449	end
	450	);
	451
	452	(* third phase, update ast.
	453	* todo? but normally should try to handle correctly scope ? maybe sometime
	454	* sizeof(id) and even if id was for a long time an identifier, maybe
	455	* a few time, because of the scope it's actually really a type.
	456	*)
	457	if (null !ident_to_type)
	458	then xs
	459	else
	460	let bigf = { Visitor_c.default_visitor_c_s with
	461	Visitor_c.kdefineval_s = (fun (k,bigf) x ->
	462	match x with
	463	\| Ast_c.DefineExpr e ->
	464	(match e with
	465	\| (Ast_c.Ident s, _), ii when List.mem s !ident_to_type ->
	466	let t = (Ast_c.nQ,
	467	(Ast_c.TypeName (s, Ast_c.noTypedefDef()), ii)) in
	468
	469	Ast_c.DefineType t
	470	\| _ -> k x
	471	)
	472	\| _ -> k x
	473	);
	474	Visitor_c.kexpr_s = (fun (k, bigf) x ->
	475	match x with
	476	\| (Ast_c.SizeOfExpr e, tref), isizeof ->
	477	let i1 = tuple_of_list1 isizeof in
	478	(match e with
	479	\| (Ast_c.ParenExpr e, _), iiparen ->
	480	(match e with
	481	\| (Ast_c.Ident s, _), ii when List.mem s !ident_to_type ->
	482	let (i2, i3) = tuple_of_list2 iiparen in
	483	let t = (Ast_c.nQ,
	484	(Ast_c.TypeName (s, Ast_c.noTypedefDef()), ii)) in
	485	(Ast_c.SizeOfType t, tref), [i1;i2;i3]
	486
	487	\| _ -> k x
	488	)
	489	\| _ -> k x
	490	)
	491	\| _ -> k x
	492	);
	493	} in
	494	xs +> List.map (fun (p, info_item) ->
	495	Visitor_c.vk_toplevel_s bigf p, info_item
	496	)
	497
	498
	499	let consistency_checking a =
	500	Common.profile_code "C consistencycheck" (fun () -> consistency_checking2 a)
	501
	502
	503
	504	(*****************************************************************************)
	505	(* Error recovery *)
	506	(*****************************************************************************)
	507
	508	(* todo: do something if find Parser_c.Eof ? *)
	509	let rec find_next_synchro next already_passed =
	510
	511	(* Maybe because not enough }, because for example an ifdef contains
	512	* in both branch some opening {, we later eat too much, "on deborde
	513	* sur la fonction d'apres". So already_passed may be too big and
	514	* looking for next synchro point starting from next may not be the
	515	* best. So maybe we can find synchro point inside already_passed
	516	* instead of looking in next.
	517	*
	518	* But take care! must progress. We must not stay in infinite loop!
	519	* For instance now I have as a error recovery to look for
	520	* a "start of something", corresponding to start of function,
	521	* but must go beyond this start otherwise will loop.
	522	* So look at premier(external_declaration2) in parser.output and
	523	* pass at least those first tokens.
	524	*
	525	* I have chosen to start search for next synchro point after the
	526	* first { I found, so quite sure we will not loop. *)
	527
	528	let last_round = List.rev already_passed in
	529	let is_define =
	530	let xs = last_round +> List.filter TH.is_not_comment in
	531	match xs with
	532	\| Parser_c.TDefine _::_ -> true
	533	\| _ -> false
	534	in
	535	if is_define
	536	then find_next_synchro_define (last_round ++ next) []
	537	else
	538
	539	let (before, after) =
	540	last_round +> Common.span (fun tok ->
	541	match tok with
	542	(* by looking at TOBrace we are sure that the "start of something"
	543	* will not arrive too early
	544	*)
	545	\| Parser_c.TOBrace _ -> false
	546	\| Parser_c.TDefine _ -> false
	547	\| _ -> true
	548	)
	549	in
	550	find_next_synchro_orig (after ++ next) (List.rev before)
	551
	552
	553
	554	and find_next_synchro_define next already_passed =
	555	match next with
	556	\| [] ->
	557	pr2 "ERROR-RECOV: end of file while in recovery mode";
	558	already_passed, []
	559	\| (Parser_c.TDefEOL i as v)::xs ->
	560	pr2 ("ERROR-RECOV: found sync end of #define, line "^i_to_s(TH.line_of_tok v));
	561	v::already_passed, xs
	562	\| v::xs ->
	563	find_next_synchro_define xs (v::already_passed)
	564
	565
	566
	567
	568	and find_next_synchro_orig next already_passed =
	569	match next with
	570	\| [] ->
	571	pr2 "ERROR-RECOV: end of file while in recovery mode";
	572	already_passed, []
	573
	574	\| (Parser_c.TCBrace i as v)::xs when TH.col_of_tok v = 0 ->
	575	pr2 ("ERROR-RECOV: found sync '}' at line "^i_to_s (TH.line_of_tok v));
	576
	577	(match xs with
	578	\| [] -> raise Impossible (* there is a EOF token normally *)
	579
	580	(* still useful: now parser.mly allow empty ';' so normally no pb *)
	581	\| Parser_c.TPtVirg iptvirg::xs ->
	582	pr2 "ERROR-RECOV: found sync bis, eating } and ;";
	583	(Parser_c.TPtVirg iptvirg)::v::already_passed, xs
	584
	585	\| Parser_c.TIdent x::Parser_c.TPtVirg iptvirg::xs ->
	586	pr2 "ERROR-RECOV: found sync bis, eating ident, }, and ;";
	587	(Parser_c.TPtVirg iptvirg)::(Parser_c.TIdent x)::v::already_passed,
	588	xs
	589
	590	\| Parser_c.TCommentSpace sp::Parser_c.TIdent x::Parser_c.TPtVirg iptvirg
	591	::xs ->
	592	pr2 "ERROR-RECOV: found sync bis, eating ident, }, and ;";
	593	(Parser_c.TCommentSpace sp)::
	594	(Parser_c.TPtVirg iptvirg)::
	595	(Parser_c.TIdent x)::
	596	v::
	597	already_passed,
	598	xs
	599
	600	\| Parser_c.TCommentNewline sp::Parser_c.TIdent x::Parser_c.TPtVirg iptvirg
	601	::xs ->
	602	pr2 "ERROR-RECOV: found sync bis, eating ident, }, and ;";
	603	(Parser_c.TCommentNewline sp)::
	604	(Parser_c.TPtVirg iptvirg)::
	605	(Parser_c.TIdent x)::
	606	v::
	607	already_passed,
	608	xs
	609
	610	\| _ ->
	611	v::already_passed, xs
	612	)
	613	\| v::xs when TH.col_of_tok v = 0 && TH.is_start_of_something v ->
	614	pr2 ("ERROR-RECOV: found sync col 0 at line "^ i_to_s(TH.line_of_tok v));
	615	already_passed, v::xs
	616
	617	\| v::xs ->
	618	find_next_synchro_orig xs (v::already_passed)
	619
	620
	621	(*****************************************************************************)
	622	(* Include/Define hacks *)
	623	(*****************************************************************************)
	624
	625	(* Sometimes I prefer to generate a single token for a list of things in the
	626	* lexer so that if I have to passed them, like for passing TInclude then
	627	* it's easy. Also if I don't do a single token, then I need to
	628	* parse the rest which may not need special stuff, like detecting
	629	* end of line which the parser is not really ready for. So for instance
	630	* could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just
	631	* lex #include ? so then need recognize <a/b/c/xxx.h> as one token ?
	632	* but this kind of token is valid only after a #include and the
	633	* lexing and parsing rules are different for such tokens so not that
	634	* easy to parse such things in parser_c.mly. Hence the following hacks.
	635	*
	636	* less?: maybe could get rid of this like I get rid of some of fix_define.
	637	*)
	638
	639	(* ------------------------------------------------------------------------- *)
	640	(* helpers *)
	641	(* ------------------------------------------------------------------------- *)
	642
	643	(* used to generate new token from existing one *)
	644	let new_info posadd str ii =
	645	{ Ast_c.pinfo =
	646	Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
	647	charpos = Ast_c.pos_of_info ii + posadd;
	648	str = str;
	649	column = Ast_c.col_of_info ii + posadd;
	650	};
	651	(* must generate a new ref each time, otherwise share *)
	652	cocci_tag = ref Ast_c.emptyAnnot;
	653	comments_tag = ref Ast_c.emptyComments;
	654	}
	655
	656
	657	let rec comment_until_defeol xs =
	658	match xs with
	659	\| [] -> failwith "cant find end of define token TDefEOL"
	660	\| x::xs ->
	661	(match x with
	662	\| Parser_c.TDefEOL i ->
	663	Parser_c.TCommentCpp (Ast_c.CppDirective, TH.info_of_tok x)
	664	::xs
	665	\| _ ->
	666	let x' =
	667	(* bugfix: otherwise may lose a TComment token *)
	668	if TH.is_real_comment x
	669	then x
	670	else Parser_c.TCommentCpp (Ast_c.CppPassingNormal (good?), TH.info_of_tok x)
	671	in
	672	x'::comment_until_defeol xs
	673	)
	674
	675	let drop_until_defeol xs =
	676	List.tl
	677	(Common.drop_until (function Parser_c.TDefEOL _ -> true \| _ -> false) xs)
	678
	679
	680
	681	(* ------------------------------------------------------------------------- *)
	682	(* returns a pair (replaced token, list of next tokens) *)
	683	(* ------------------------------------------------------------------------- *)
	684
	685	let tokens_include (info, includes, filename, inifdef) =
	686	Parser_c.TIncludeStart (Ast_c.rewrap_str includes info, inifdef),
	687	[Parser_c.TIncludeFilename
	688	(filename, (new_info (String.length includes) filename info))
	689	]
	690
	691	(*****************************************************************************)
	692	(* Parsing default define macros, usually in a standard.h file *)
	693	(*****************************************************************************)
	694
	695	let parse_cpp_define_file2 file =
	696	let toks = tokens ~profile:false file in
	697	let toks = Parsing_hacks.fix_tokens_define toks in
	698	Parsing_hacks.extract_cpp_define toks
	699
	700	let parse_cpp_define_file a =
	701	Common.profile_code_exclusif "HACK" (fun () -> parse_cpp_define_file2 a)
	702
	703	(* can not be put in parsing_hack, cos then mutually recursive problem as
	704	* we also want to parse the standard.h file.
	705	*)
	706	let init_defs std_h =
	707	if not (Common.lfile_exists std_h)
	708	then pr2 ("warning: Can't find default macro file: " ^ std_h)
	709	else begin
	710	pr2 ("init_defs: " ^ std_h);
	711	Parsing_hacks._defs := Common.hash_of_list (parse_cpp_define_file std_h);
	712	end
	713
	714
	715	(*****************************************************************************)
	716	(* Main entry point *)
	717	(*****************************************************************************)
	718
	719	type info_item = string * Parser_c.token list
	720
	721	type program2 = toplevel2 list
	722	and toplevel2 = Ast_c.toplevel * info_item
	723
	724	let program_of_program2 xs =
	725	xs +> List.map fst
	726
	727	let with_program2 f program2 =
	728	program2
	729	+> Common.unzip
	730	+> (fun (program, infos) ->
	731	f program, infos
	732	)
	733	+> Common.uncurry Common.zip
	734
	735
	736
	737	(* The use of local refs (remaining_tokens, passed_tokens, ...) makes
	738	* possible error recovery. Indeed, they allow to skip some tokens and
	739	* still be able to call again the ocamlyacc parser. It is ugly code
	740	* because we cant modify ocamllex and ocamlyacc. As we want some
	741	* extended lexing tricks, we have to use such refs.
	742	*
	743	* Those refs are now also used for my lalr(k) technique. Indeed They
	744	* store the futur and previous tokens that were parsed, and so
	745	* provide enough context information for powerful lex trick.
	746	*
	747	* - passed_tokens_last_ckp stores the passed tokens since last
	748	* checkpoint. Used for NotParsedCorrectly and also to build the
	749	* info_item attached to each program_element.
	750	* - passed_tokens_clean is used for lookahead, in fact for lookback.
	751	* - remaining_tokens_clean is used for lookahead. Now remaining_tokens
	752	* contain some comments and so would make pattern matching difficult
	753	* in lookahead. Hence this variable. We would like also to get rid
	754	* of cpp instruction because sometimes a cpp instruction is between
	755	* two tokens and makes a pattern matching fail. But lookahead also
	756	* transform some cpp instruction (in comment) so can't remove them.
	757	*
	758	* So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
	759	* whereas passed_tokens_clean and remaining_tokens_clean does not contain
	760	* comment-tokens.
	761	*
	762	* Normally we have:
	763	* toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
	764	* after the call to pop2.
	765	* toks = (reverse passed_tok) ++ remaining_tokens
	766	* at the and of the lexer_function call.
	767	* At the very beginning, cur_tok and remaining_tokens overlap, but not after.
	768	* At the end of lexer_function call, cur_tok overlap with passed_tok.
	769	*
	770	* convention: I use "tr" for "tokens refs"
	771	*
	772	* I now also need this lexing trick because the lexer return comment
	773	* tokens.
	774	*)
	775
	776	type tokens_state = {
	777	mutable rest : Parser_c.token list;
	778	mutable rest_clean : Parser_c.token list;
	779	mutable current : Parser_c.token;
	780	(* it's passed since last "checkpoint", not passed from the beginning *)
	781	mutable passed : Parser_c.token list;
	782	mutable passed_clean : Parser_c.token list;
	783	}
	784	let clone_tokens_stat tr =
	785	{ rest = tr.rest;
	786	rest_clean = tr.rest_clean;
	787	current = tr.current;
	788	passed = tr.passed;
	789	passed_clean = tr.passed_clean;
	790	}
	791	let copy_tokens_stat ~src ~dst =
	792	dst.rest <- src.rest;
	793	dst.rest_clean <- src.rest_clean;
	794	dst.current <- src.current;
	795	dst.passed <- src.passed;
	796	dst.passed_clean <- src.passed_clean;
	797	()
	798
	799	let rec filter_noise n xs =
	800	match n, xs with
	801	\| _, [] -> []
	802	\| 0, xs -> xs
	803	\| n, x::xs ->
	804	(match x with
	805	\| Parser_c.TMacroAttr _ ->
	806	filter_noise (n-1) xs
	807	\| _ ->
	808	x::filter_noise (n-1) xs
	809	)
	810
	811	let clean_for_lookahead xs =
	812	match xs with
	813	\| [] -> []
	814	\| [x] -> [x]
	815	\| x::xs ->
	816	x::filter_noise 10 xs
	817
	818
	819
	820	(* Hacked lex. This function use refs passed by parse_print_error_heuristic
	821	* tr means token refs.
	822	*)
	823	let rec lexer_function ~pass tr = fun lexbuf ->
	824	match tr.rest with
	825	\| [] -> pr2 "ALREADY AT END"; tr.current
	826	\| v::xs ->
	827	tr.rest <- xs;
	828	tr.current <- v;
	829
	830	if !Flag_parsing_c.debug_lexer then Common.pr2_gen v;
	831
	832	if TH.is_comment v
	833	then begin
	834	tr.passed <- v::tr.passed;
	835	lexer_function ~pass tr lexbuf
	836	end
	837	else begin
	838	let x = List.hd tr.rest_clean in
	839	tr.rest_clean <- List.tl tr.rest_clean;
	840	assert (x = v);
	841
	842	(match v with
	843	(* fix_define1. Why not in parsing_hacks lookahead and do passing like
	844	* I do for some ifdef directives ? Because here I also need to
	845	* generate some tokens sometimes.
	846	*)
	847	\| Parser_c.TDefine (tok) ->
	848	if not (LP.current_context () = LP.InTopLevel) &&
	849	(!Flag_parsing_c.cpp_directive_passing \|\| (pass = 2))
	850	then begin
	851	incr Stat.nDefinePassing;
	852	pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
	853	let v' = Parser_c.TCommentCpp (Ast_c.CppDirective,TH.info_of_tok v)
	854	in
	855	tr.passed <- v'::tr.passed;
	856	tr.rest <- comment_until_defeol tr.rest;
	857	tr.rest_clean <- drop_until_defeol tr.rest_clean;
	858	lexer_function ~pass tr lexbuf
	859	end
	860	else begin
	861	tr.passed <- v::tr.passed;
	862	tr.passed_clean <- v::tr.passed_clean;
	863	v
	864	end
	865
	866	\| Parser_c.TInclude (includes, filename, inifdef, info) ->
	867	if not (LP.current_context () = LP.InTopLevel) &&
	868	(!Flag_parsing_c.cpp_directive_passing \|\| (pass = 2))
	869	then begin
	870	incr Stat.nIncludePassing;
	871	pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
	872	let v = Parser_c.TCommentCpp(Ast_c.CppDirective, info) in
	873	tr.passed <- v::tr.passed;
	874	lexer_function ~pass tr lexbuf
	875	end
	876	else begin
	877	let (v,new_tokens) =
	878	tokens_include (info, includes, filename, inifdef) in
	879	let new_tokens_clean =
	880	new_tokens +> List.filter TH.is_not_comment in
	881
	882	tr.passed <- v::tr.passed;
	883	tr.passed_clean <- v::tr.passed_clean;
	884	tr.rest <- new_tokens ++ tr.rest;
	885	tr.rest_clean <- new_tokens_clean ++ tr.rest_clean;
	886	v
	887	end
	888
	889	\| _ ->
	890
	891	(* typedef_fix1 *)
	892	let v = match v with
	893	\| Parser_c.TIdent (s, ii) ->
	894	if
	895	LP.is_typedef s &&
	896	not (!Flag_parsing_c.disable_add_typedef) &&
	897	pass = 1
	898	then Parser_c.TypedefIdent (s, ii)
	899	else Parser_c.TIdent (s, ii)
	900	\| x -> x
	901	in
	902
	903	let v = Parsing_hacks.lookahead ~pass
	904	(clean_for_lookahead (v::tr.rest_clean))
	905	tr.passed_clean in
	906
	907	tr.passed <- v::tr.passed;
	908
	909	(* the lookahead may have changed the status of the token and
	910	* consider it as a comment, for instance some #include are
	911	* turned into comments, hence this code. *)
	912	match v with
	913	\| Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf
	914	\| v ->
	915	tr.passed_clean <- v::tr.passed_clean;
	916	v
	917	)
	918	end
	919
	920
	921
	922	let get_one_elem ~pass tr (file, filelines) =
	923
	924	if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef
	925	then pr2 "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
	926
	927	(* normally have to do that only when come from an exception in which
	928	* case the dt() may not have been done
	929	* TODO but if was in scoped scope ? have to let only the last scope
	930	* so need do a LP.lexer_reset_typedef ();
	931	*)
	932	LP.enable_typedef();
	933	LP._lexer_hint := (LP.default_hint ());
	934	LP.save_typedef_state();
	935
	936	tr.passed <- [];
	937
	938	let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
	939
	940	(try
	941	(* -------------------------------------------------- *)
	942	(* Call parser *)
	943	(* -------------------------------------------------- *)
	944	Common.profile_code_exclusif "YACC" (fun () ->
	945	Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake)
	946	)
	947	with e -> begin
	948	if (pass = 1 && !Flag_parsing_c.disable_two_pass)\|\| (pass = 2)
	949	then begin
	950	(match e with
	951	(* Lexical is not anymore launched I think *)
	952	\| Lexer_c.Lexical s ->
	953	pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok tr.current)
	954	\| Parsing.Parse_error ->
	955	pr2 ("parse error \n = " ^ error_msg_tok tr.current)
	956	\| Semantic_c.Semantic (s, i) ->
	957	pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok tr.current)
	958	\| e -> raise e
	959	)
	960	end;
	961	LP.restore_typedef_state();
	962
	963	(* must keep here, before the code that adjusts the tr fields *)
	964	let line_error = TH.line_of_tok tr.current in
	965
	966
	967	(* error recovery, go to next synchro point *)
	968	let (passed', rest') = find_next_synchro tr.rest tr.passed in
	969	tr.rest <- rest';
	970	tr.passed <- passed';
	971
	972	tr.current <- List.hd passed';
	973	tr.passed_clean <- []; (* enough ? *)
	974	(* with error recovery, rest and rest_clean may not be in sync *)
	975	tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment);
	976
	977
	978	let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in
	979	Right (info_of_bads, line_error)
	980	end
	981	)
	982
	983
	984
	985
	986	(* note: as now we go in 2 passes, there is first all the error message of
	987	* the lexer, and then the error of the parser. It is not anymore
	988	* interwinded.
	989	*
	990	* !!!This function use refs, and is not reentrant !!! so take care.
	991	* It use globals defined in Lexer_parser and also the _defs global
	992	* in parsing_hack.ml.
	993	*
	994	* This function uses internally some semi globals in the
	995	* tokens_stat record and parsing_stat record.
	996	*)
	997
	998	let parse_print_error_heuristic2 file =
	999
	1000	let filelines = (""::Common.cat file) +> Array.of_list in
	1001	let stat = Parsing_stat.default_stat file in
	1002
	1003	(* -------------------------------------------------- *)
	1004	(* call lexer and get all the tokens *)
	1005	(* -------------------------------------------------- *)
	1006	LP.lexer_reset_typedef();
	1007	Parsing_hacks.ifdef_paren_cnt := 0;
	1008	let toks_orig = tokens file in
	1009
	1010	let toks = Parsing_hacks.fix_tokens_define toks_orig in
	1011	let toks = Parsing_hacks.fix_tokens_cpp toks in
	1012
	1013	let tr = {
	1014	rest = toks;
	1015	rest_clean = (toks +> List.filter TH.is_not_comment);
	1016	current = (List.hd toks);
	1017	passed = [];
	1018	passed_clean = [];
	1019	} in
	1020
	1021
	1022
	1023
	1024	let rec loop tr =
	1025
	1026	(* todo?: I am not sure that it represents current_line, cos maybe
	1027	* tr.current partipated in the previous parsing phase, so maybe tr.current
	1028	* is not the first token of the next parsing phase. Same with checkpoint2.
	1029	* It would be better to record when we have a } or ; in parser.mly,
	1030	* cos we know that they are the last symbols of external_declaration2.
	1031	*
	1032	* bugfix: may not be equal to 'file' as after macro expansions we can
	1033	* start to parse a new entity from the body of a macro, for instance
	1034	* when parsing a define_machine() body, cf standard.h
	1035	*)
	1036	let checkpoint = TH.line_of_tok tr.current in
	1037	let checkpoint_file = TH.file_of_tok tr.current in
	1038
	1039	let tr_save = clone_tokens_stat tr in
	1040
	1041	(* call the parser *)
	1042	let elem =
	1043	let pass1 = get_one_elem ~pass:1 tr (file, filelines) in
	1044	match pass1 with
	1045	\| Left e -> Left e
	1046	\| Right res ->
	1047	if !Flag_parsing_c.disable_two_pass
	1048	then Right res
	1049	else begin
	1050	pr2 "parsing pass2: try again";
	1051	copy_tokens_stat ~src:tr_save ~dst: tr;
	1052	let pass2 = get_one_elem ~pass:2 tr (file, filelines) in
	1053	pass2
	1054	end
	1055	in
	1056
	1057
	1058	(* again not sure if checkpoint2 corresponds to end of bad region *)
	1059	let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *)
	1060	let checkpoint2_file = TH.file_of_tok tr.current in
	1061
	1062	let was_define =
	1063	(match elem with
	1064	\| Left _ -> false
	1065	\| Right (_, line_error) ->
	1066	let was_define =
	1067	let xs = tr.passed +> List.rev +> List.filter TH.is_not_comment in
	1068	if List.length xs >= 2
	1069	then
	1070	(match Common.head_middle_tail xs with
	1071	\| Parser_c.TDefine _, _, Parser_c.TDefEOL _ ->
	1072	true
	1073	\| _ -> false
	1074	)
	1075	else begin
	1076	pr2 "WIERD: length list of error recovery tokens < 2 ";
	1077	false
	1078	end
	1079	in
	1080	(if was_define && !Flag_parsing_c.filter_msg_define_error
	1081	then ()
	1082	else
	1083	(* bugfix: *)
	1084	if (checkpoint_file = checkpoint2_file) && checkpoint_file = file
	1085	then print_bad line_error (checkpoint, checkpoint2) filelines
	1086	else pr2 "PB: bad: but on tokens not from original file"
	1087	);
	1088	was_define
	1089	) in
	1090
	1091
	1092	let diffline =
	1093	if (checkpoint_file = checkpoint2_file) && (checkpoint_file = file)
	1094	then (checkpoint2 - checkpoint)
	1095	else 0
	1096	(* TODO? so if error come in middle of something ? where the
	1097	* start token was from original file but synchro found in body
	1098	* of macro ? then can have wrong number of lines stat.
	1099	* Maybe simpler just to look at tr.passed and count
	1100	* the lines in the token from the correct file ?
	1101	*)
	1102	in
	1103	let info = mk_info_item file (List.rev tr.passed) in
	1104
	1105	(* some stat updates *)
	1106	stat.Stat.commentized <-
	1107	stat.Stat.commentized + count_lines_commentized (snd info);
	1108
	1109	let elem =
	1110	match elem with
	1111	\| Left e -> e
	1112	\| Right (info_of_bads, _line_error) ->
	1113	Ast_c.NotParsedCorrectly info_of_bads
	1114	in
	1115	(match elem with
	1116	\| Ast_c.NotParsedCorrectly xs ->
	1117	if was_define && !Flag_parsing_c.filter_define_error
	1118	then stat.Stat.correct <- stat.Stat.correct + diffline
	1119	else stat.Stat.bad <- stat.Stat.bad + diffline
	1120	\| _ -> stat.Stat.correct <- stat.Stat.correct + diffline
	1121	);
	1122
	1123	(match elem with
	1124	\| Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)]
	1125	\| xs -> (xs, info):: loop tr (* recurse *)
	1126	)
	1127	in
	1128	let v = loop tr in
	1129
	1130	let v = consistency_checking v in
	1131	(v, stat)
	1132
	1133
	1134	let time_total_parsing a =
	1135	Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a)
	1136
	1137	let parse_print_error_heuristic a =
	1138	Common.profile_code "C parsing" (fun () -> time_total_parsing a)
	1139
	1140
	1141	(* alias *)
	1142	let parse_c_and_cpp a = parse_print_error_heuristic a
	1143
	1144	(*****************************************************************************)
	1145	(* Same but faster cos memoize stuff *)
	1146	(*****************************************************************************)
	1147	let parse_cache file =
	1148	if not !Flag_parsing_c.use_cache then parse_print_error_heuristic file
	1149	else
	1150	let _ = pr2 "TOFIX" in
	1151	let need_no_changed_files =
	1152	(* should use Sys.argv.(0), would be safer. *)
	1153
	1154	[
	1155	(* TOFIX
	1156	Config.path ^ "/parsing_c/c_parser.cma";
	1157	(* we may also depend now on the semantic patch because
	1158	the SP may use macro and so we will disable some of the
	1159	macro expansions from standard.h.
	1160	*)
	1161	!Config.std_h;
	1162	*)
	1163	]
	1164	in
	1165	let need_no_changed_variables =
	1166	(* could add some of the flags of flag_parsing_c.ml *)
	1167	[]
	1168	in
	1169	Common.cache_computation_robust
	1170	file ".ast_raw"
	1171	(need_no_changed_files, need_no_changed_variables) ".depend_raw"
	1172	(fun () -> parse_print_error_heuristic file)
	1173
	1174
	1175
	1176	(*****************************************************************************)
	1177	(* Some special cases *)
	1178	(*****************************************************************************)
	1179
	1180	let (cstatement_of_string: string -> Ast_c.statement) = fun s ->
	1181	Common.write_file ("/tmp/__cocci.c") ("void main() { \n" ^ s ^ "\n}");
	1182	let program = parse_c_and_cpp ("/tmp/__cocci.c") +> fst in
	1183	program +> Common.find_some (fun (e,_) ->
	1184	match e with
	1185	\| Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st
	1186	\| _ -> None
	1187	)
	1188
	1189	let (cexpression_of_string: string -> Ast_c.expression) = fun s ->
	1190	Common.write_file ("/tmp/__cocci.c") ("void main() { \n" ^ s ^ ";\n}");
	1191	let program = parse_c_and_cpp ("/tmp/__cocci.c") +> fst in
	1192	program +> Common.find_some (fun (e,_) ->
	1193	match e with
	1194	\| Ast_c.Definition ({Ast_c.f_body = compound},_) ->
	1195	(match compound with
	1196	\| [Ast_c.StmtElem (Ast_c.ExprStatement (Some e),ii)] -> Some e
	1197	\| _ -> None
	1198	)
	1199	\| _ -> None
	1200	)