(* Yoann Padioleau
- *
+ *
+ * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
* Copyright (C) 2007, 2008 Ecole des Mines de Nantes
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License (GPL)
* version 2 as published by the Free Software Foundation.
- *
+ *
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
open Common
-module TH = Token_helpers
+module TH = Token_helpers
-open Parser_c
+open Parser_c
(*****************************************************************************)
(* Some debugging functions *)
(*****************************************************************************)
-let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
+let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
(* ------------------------------------------------------------------------- *)
(* fuzzy parsing, different "views" over the same program *)
* indexed by the charpos, there could have been some problems:
* how my fake_pos interact with the way I tag and adjust token ?
* because I base my tagging on the position of the token ! so sometimes
- * could tag another fakeInfo that should not be tagged ?
+ * could tag another fakeInfo that should not be tagged ?
* fortunately I don't use anymore this technique.
*)
(* update: quite close to the Place_c.Inxxx *)
-type context =
+type context =
InFunction | InEnum | InStruct | InInitializer | NoContext
-type token_extended = {
+type token_extended = {
mutable tok: Parser_c.token;
mutable where: context;
mutable new_tokens_before : Parser_c.token list;
(* line x col cache, more easily accessible, of the info in the token *)
- line: int;
+ line: int;
col : int;
}
(* todo? is it ok to reset as a comment a TDefEOL ? if do that, then
* can confuse the parser.
*)
-let set_as_comment cppkind x =
- if TH.is_eof x.tok
+let set_as_comment cppkind x =
+ if TH.is_eof x.tok
then () (* otherwise parse_c will be lost if don't find a EOF token *)
- else
+ else
x.tok <- TCommentCpp (cppkind, TH.info_of_tok x.tok)
-let mk_token_extended x =
+let save_as_comment cppkind x =
+ if TH.is_eof x.tok
+ then () (* otherwise parse_c will be lost if don't find a EOF token *)
+ else
+ let t =
+ match x.tok with
+ TIfdef _ | TIfdefMisc _ | TIfdefVersion _ -> Token_c.IfDef
+ | TIfdefBool _ -> Token_c.IfDef0
+ | TIfdefelse _ | TIfdefelif _ -> Token_c.Else
+ | TEndif _ -> Token_c.Endif
+ | _ -> Token_c.Other in
+ x.tok <- TCommentCpp (cppkind t, TH.info_of_tok x.tok)
+
+let mk_token_extended x =
let (line, col) = TH.linecol_of_tok x in
- { tok = x;
- line = line; col = col;
- where = NoContext;
+ { tok = x;
+ line = line; col = col;
+ where = NoContext;
new_tokens_before = [];
}
-let rebuild_tokens_extented toks_ext =
+let rebuild_tokens_extented toks_ext =
let _tokens = ref [] in
- toks_ext +> List.iter (fun tok ->
+ toks_ext +> List.iter (fun tok ->
tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
- push2 tok.tok _tokens
+ push2 tok.tok _tokens
);
let tokens = List.rev !_tokens in
(tokens +> acc_map mk_token_extended)
-(* x list list, because x list separated by ',' *)
-type paren_grouped =
+(* x list list, because x list separated by ',' *)
+type paren_grouped =
| Parenthised of paren_grouped list list * token_extended list
| PToken of token_extended
-type brace_grouped =
- | Braceised of
+type brace_grouped =
+ | Braceised of
brace_grouped list list * token_extended * token_extended option
| BToken of token_extended
* and so when we want to comment a ifdef, we don't know which endif
* we must also comment. Especially true for the #if 0 which sometimes
* have a #else part.
- *
- * x list list, because x list separated by #else or #elif
- *)
-type ifdef_grouped =
+ *
+ * x list list, because x list separated by #else or #elif
+ *)
+type ifdef_grouped =
| Ifdef of ifdef_grouped list list * token_extended list
| Ifdefbool of bool * ifdef_grouped list list * token_extended list
| NotIfdefLine of token_extended list
-type 'a line_grouped =
+type 'a line_grouped =
Line of 'a list
-type body_function_grouped =
+type body_function_grouped =
| BodyFunction of token_extended list
| NotBodyLine of token_extended list
(* view builders *)
(* ------------------------------------------------------------------------- *)
-(* todo: synchro ! use more indentation
+(* todo: synchro ! use more indentation
* if paren not closed and same indentation level, certainly because
* part of a mid-ifdef-expression.
*)
-let rec mk_parenthised xs =
+let rec mk_parenthised xs =
let rec loop acc = function
| [] -> acc
- | x::xs ->
- (match x.tok with
- | TOPar _ | TOParDefine _ ->
+ | x::xs ->
+ (match x.tok with
+ | TOPar _ | TOParDefine _ ->
let body, extras, xs = mk_parameters [x] [] xs in
loop (Parenthised (body,extras)::acc) xs
- | _ ->
+ | _ ->
loop (PToken x::acc) xs
) in
List.rev(loop [] xs)
(* return the body of the parenthised expression and the rest of the tokens *)
-and mk_parameters extras acc_before_sep xs =
+and mk_parameters extras acc_before_sep xs =
match xs with
- | [] ->
+ | [] ->
(* maybe because of #ifdef which "opens" '(' in 2 branches *)
pr2 "PB: not found closing paren in fuzzy parsing";
[List.rev acc_before_sep], List.rev extras, []
- | x::xs ->
- (match x.tok with
+ | x::xs ->
+ (match x.tok with
(* synchro *)
- | TOBrace _ when x.col =|= 0 ->
+ | TOBrace _ when x.col =|= 0 ->
pr2 "PB: found synchro point } in paren";
[List.rev acc_before_sep], List.rev (extras), (x::xs)
- | TCPar _ | TCParEOL _ ->
+ | TCPar _ | TCParEOL _ ->
[List.rev acc_before_sep], List.rev (x::extras), xs
- | TOPar _ | TOParDefine _ ->
+ | TOPar _ | TOParDefine _ ->
let body, extrasnest, xs = mk_parameters [x] [] xs in
- mk_parameters extras
- (Parenthised (body,extrasnest)::acc_before_sep)
+ mk_parameters extras
+ (Parenthised (body,extrasnest)::acc_before_sep)
xs
- | TComma _ ->
+ | TComma _ ->
let body, extras, xs = mk_parameters (x::extras) [] xs in
- (List.rev acc_before_sep)::body, extras, xs
- | _ ->
+ (List.rev acc_before_sep)::body, extras, xs
+ | _ ->
mk_parameters extras (PToken x::acc_before_sep) xs
)
-let rec mk_braceised xs =
+let rec mk_braceised xs =
let rec loop acc = function
| [] -> acc
- | x::xs ->
- (match x.tok with
- | TOBrace _ ->
+ | x::xs ->
+ (match x.tok with
+ | TOBrace _ ->
let body, endbrace, xs = mk_braceised_aux [] xs in
loop (Braceised (body, x, endbrace)::acc) xs
- | TCBrace _ ->
+ | TCBrace _ ->
pr2 "PB: found closing brace alone in fuzzy parsing";
loop (BToken x::acc) xs
- | _ ->
+ | _ ->
loop (BToken x::acc) xs) in
List.rev(loop [] xs)
(* return the body of the parenthised expression and the rest of the tokens *)
-and mk_braceised_aux acc xs =
+and mk_braceised_aux acc xs =
match xs with
- | [] ->
+ | [] ->
(* maybe because of #ifdef which "opens" '(' in 2 branches *)
pr2 "PB: not found closing brace in fuzzy parsing";
[List.rev acc], None, []
- | x::xs ->
- (match x.tok with
+ | x::xs ->
+ (match x.tok with
| TCBrace _ -> [List.rev acc], Some x, xs
- | TOBrace _ ->
+ | TOBrace _ ->
let body, endbrace, xs = mk_braceised_aux [] xs in
mk_braceised_aux (Braceised (body,x, endbrace)::acc) xs
- | _ ->
+ | _ ->
mk_braceised_aux (BToken x::acc) xs
)
-
-let rec mk_ifdef xs =
+
+let rec mk_ifdef xs =
match xs with
| [] -> []
- | x::xs ->
- (match x.tok with
- | TIfdef _ ->
+ | x::xs ->
+ (match x.tok with
+ | TIfdef _ ->
let body, extra, xs = mk_ifdef_parameters [x] [] xs in
Ifdef (body, extra)::mk_ifdef xs
- | TIfdefBool (b,_, _) ->
+ | TIfdefBool (b,_, _) ->
let body, extra, xs = mk_ifdef_parameters [x] [] xs in
-
+
(* if not passing, then consider a #if 0 as an ordinary #ifdef *)
if !Flag_parsing_c.if0_passing
then Ifdefbool (b, body, extra)::mk_ifdef xs
else Ifdef(body, extra)::mk_ifdef xs
- | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) ->
+ | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) ->
let body, extra, xs = mk_ifdef_parameters [x] [] xs in
Ifdefbool (b, body, extra)::mk_ifdef xs
-
- | _ ->
+
+ | _ ->
(* todo? can have some Ifdef in the line ? *)
let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
- NotIfdefLine line::mk_ifdef xs
+ NotIfdefLine line::mk_ifdef xs
)
-and mk_ifdef_parameters extras acc_before_sep xs =
+and mk_ifdef_parameters extras acc_before_sep xs =
match xs with
- | [] ->
+ | [] ->
(* Note that mk_ifdef is assuming that CPP instruction are alone
* on their line. Because I do a span (fun x -> is_same_line ...)
* I might take with me a #endif if this one is mixed on a line
*)
pr2 "PB: not found closing ifdef in fuzzy parsing";
[List.rev acc_before_sep], List.rev extras, []
- | x::xs ->
- (match x.tok with
- | TEndif _ ->
+ | x::xs ->
+ (match x.tok with
+ | TEndif _ ->
[List.rev acc_before_sep], List.rev (x::extras), xs
- | TIfdef _ ->
+ | TIfdef _ ->
let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
- mk_ifdef_parameters
+ mk_ifdef_parameters
extras (Ifdef (body, extrasnest)::acc_before_sep) xs
- | TIfdefBool (b,_,_) ->
+ | TIfdefBool (b,_,_) ->
let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
if !Flag_parsing_c.if0_passing
then
- mk_ifdef_parameters
+ mk_ifdef_parameters
extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs
- else
- mk_ifdef_parameters
+ else
+ mk_ifdef_parameters
extras (Ifdef (body, extrasnest)::acc_before_sep) xs
- | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) ->
+ | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) ->
let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
- mk_ifdef_parameters
+ mk_ifdef_parameters
extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs
- | TIfdefelse _
- | TIfdefelif _ ->
+ | TIfdefelse _
+ | TIfdefelif _ ->
let body, extras, xs = mk_ifdef_parameters (x::extras) [] xs in
- (List.rev acc_before_sep)::body, extras, xs
- | _ ->
+ (List.rev acc_before_sep)::body, extras, xs
+ | _ ->
let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
mk_ifdef_parameters extras (NotIfdefLine line::acc_before_sep) xs
)
let line_of_paren = function
| PToken x -> x.line
- | Parenthised (xxs, info_parens) ->
+ | Parenthised (xxs, info_parens) ->
(match info_parens with
| [] -> raise Impossible
| x::xs -> x.line
let rec span_line_paren line = function
| [] -> [],[]
- | x::xs ->
+ | x::xs ->
(match x with
- | PToken tok when TH.is_eof tok.tok ->
+ | PToken tok when TH.is_eof tok.tok ->
[], x::xs
- | _ ->
- if line_of_paren x =|= line
+ | _ ->
+ if line_of_paren x =|= line
then
let (l1, l2) = span_line_paren line xs in
(x::l1, l2)
else ([], x::xs)
)
-
-let rec mk_line_parenthised xs =
+
+let rec mk_line_parenthised xs =
match xs with
| [] -> []
- | x::xs ->
+ | x::xs ->
let line_no = line_of_paren x in
let line, xs = span_line_paren line_no xs in
Line (x::line)::mk_line_parenthised xs
(* --------------------------------------- *)
-let rec mk_body_function_grouped xs =
- match xs with
+let rec mk_body_function_grouped xs =
+ match xs with
| [] -> []
- | x::xs ->
+ | x::xs ->
(match x with
- | {tok = TOBrace _; col = 0} ->
- let is_closing_brace = function
- | {tok = TCBrace _; col = 0 } -> true
- | _ -> false
+ | {tok = TOBrace _; col = 0} ->
+ let is_closing_brace = function
+ | {tok = TCBrace _; col = 0 } -> true
+ | _ -> false
in
let body, xs = Common.span (fun x -> not (is_closing_brace x)) xs in
(match xs with
- | ({tok = TCBrace _; col = 0 })::xs ->
+ | ({tok = TCBrace _; col = 0 })::xs ->
BodyFunction body::mk_body_function_grouped xs
- | [] ->
+ | [] ->
pr2 "PB:not found closing brace in fuzzy parsing";
[NotBodyLine body]
| _ -> raise Impossible
)
-
- | _ ->
+
+ | _ ->
let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
- NotBodyLine line::mk_body_function_grouped xs
+ NotBodyLine line::mk_body_function_grouped xs
)
(* view iterators *)
(* ------------------------------------------------------------------------- *)
-let rec iter_token_paren f xs =
+let rec iter_token_paren f xs =
xs +> List.iter (function
| PToken tok -> f tok;
- | Parenthised (xxs, info_parens) ->
+ | Parenthised (xxs, info_parens) ->
info_parens +> List.iter f;
xxs +> List.iter (fun xs -> iter_token_paren f xs)
)
-let rec iter_token_brace f xs =
+let rec iter_token_brace f xs =
xs +> List.iter (function
| BToken tok -> f tok;
- | Braceised (xxs, tok1, tok2opt) ->
+ | Braceised (xxs, tok1, tok2opt) ->
f tok1; do_option f tok2opt;
xxs +> List.iter (fun xs -> iter_token_brace f xs)
)
-let rec iter_token_ifdef f xs =
+let rec iter_token_ifdef f xs =
xs +> List.iter (function
| NotIfdefLine xs -> xs +> List.iter f;
- | Ifdefbool (_, xxs, info_ifdef)
- | Ifdef (xxs, info_ifdef) ->
+ | Ifdefbool (_, xxs, info_ifdef)
+ | Ifdef (xxs, info_ifdef) ->
info_ifdef +> List.iter f;
xxs +> List.iter (iter_token_ifdef f)
)
-let tokens_of_paren xs =
+let tokens_of_paren xs =
let g = ref [] in
xs +> iter_token_paren (fun tok -> push2 tok g);
List.rev !g
-let tokens_of_paren_ordered xs =
+let tokens_of_paren_ordered xs =
let g = ref [] in
let rec aux_tokens_ordered = function
| PToken tok -> push2 tok g;
- | Parenthised (xxs, info_parens) ->
- let (opar, cpar, commas) =
+ | Parenthised (xxs, info_parens) ->
+ let (opar, cpar, commas) =
match info_parens with
- | opar::xs ->
+ | opar::xs ->
(match List.rev xs with
- | cpar::xs ->
+ | cpar::xs ->
opar, cpar, List.rev xs
| _ -> raise Impossible
)
match xxs, commas with
| [], [] -> ()
| [xs], [] -> xs +> List.iter aux_tokens_ordered
- | xs::ys::xxs, comma::commas ->
+ | xs::ys::xxs, comma::commas ->
xs +> List.iter aux_tokens_ordered;
push2 comma g;
aux_args (ys::xxs, commas)
(* ------------------------------------------------------------------------- *)
-let rec set_in_function_tag xs =
- (* could try: ) { } but it can be the ) of a if or while, so
+let rec set_in_function_tag xs =
+ (* could try: ) { } but it can be the ) of a if or while, so
* better to base the heuristic on the position in column zero.
* Note that some struct or enum or init put also their { in first column
* but set_in_other will overwrite the previous InFunction tag.
match xs with
| [] -> ()
(* ) { and the closing } is in column zero, then certainly a function *)
- | BToken ({tok = TCPar _ })::(Braceised (body, tok1, Some tok2))::xs
- when tok1.col <> 0 && tok2.col =|= 0 ->
- body +> List.iter (iter_token_brace (fun tok ->
+ | BToken ({tok = TCPar _ })::(Braceised (body, tok1, Some tok2))::xs
+ when tok1.col <> 0 && tok2.col =|= 0 ->
+ body +> List.iter (iter_token_brace (fun tok ->
tok.where <- InFunction
));
set_in_function_tag xs
| (BToken x)::xs -> set_in_function_tag xs
- | (Braceised (body, tok1, Some tok2))::xs
- when tok1.col =|= 0 && tok2.col =|= 0 ->
- body +> List.iter (iter_token_brace (fun tok ->
+ | (Braceised (body, tok1, Some tok2))::xs
+ when tok1.col =|= 0 && tok2.col =|= 0 ->
+ body +> List.iter (iter_token_brace (fun tok ->
tok.where <- InFunction
));
set_in_function_tag xs
- | Braceised (body, tok1, tok2)::xs ->
+ | Braceised (body, tok1, tok2)::xs ->
set_in_function_tag xs
-
-let rec set_in_other xs =
- match xs with
+
+let rec set_in_other xs =
+ match xs with
| [] -> ()
(* enum x { } *)
| BToken ({tok = Tenum _})::BToken ({tok = TIdent _})
- ::Braceised(body, tok1, tok2)::xs
+ ::Braceised(body, tok1, tok2)::xs
| BToken ({tok = Tenum _})
- ::Braceised(body, tok1, tok2)::xs
- ->
- body +> List.iter (iter_token_brace (fun tok ->
+ ::Braceised(body, tok1, tok2)::xs
+ ->
+ body +> List.iter (iter_token_brace (fun tok ->
tok.where <- InEnum;
));
set_in_other xs
(* struct x { } *)
| BToken ({tok = Tstruct _})::BToken ({tok = TIdent _})
- ::Braceised(body, tok1, tok2)::xs ->
- body +> List.iter (iter_token_brace (fun tok ->
+ ::Braceised(body, tok1, tok2)::xs ->
+ body +> List.iter (iter_token_brace (fun tok ->
tok.where <- InStruct;
));
set_in_other xs
(* = { } *)
| BToken ({tok = TEq _})
- ::Braceised(body, tok1, tok2)::xs ->
- body +> List.iter (iter_token_brace (fun tok ->
+ ::Braceised(body, tok1, tok2)::xs ->
+ body +> List.iter (iter_token_brace (fun tok ->
tok.where <- InInitializer;
));
set_in_other xs
| BToken _::xs -> set_in_other xs
- | Braceised(body, tok1, tok2)::xs ->
+ | Braceised(body, tok1, tok2)::xs ->
body +> List.iter set_in_other;
set_in_other xs
-
-
-let set_context_tag xs =
+
+
+let set_context_tag xs =
begin
set_in_function_tag xs;
set_in_other xs;
end
-
+