(* Yoann Padioleau, Julia Lawall
- *
+ *
+ * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
* Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License (GPL)
* version 2 as published by the Free Software Foundation.
- *
+ *
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* file license.txt for more details.
- *
- *
+ *
+ *
* Modifications by Julia Lawall for better newline handling.
*)
open Common
(* Types used during the intermediate phases of the unparsing *)
(*****************************************************************************)
-type token1 =
+type token1 =
| Fake1 of info
| T1 of Parser_c.token
* token and get something simpler ? because we need to know if the
* info is a TCommentCpp or TCommentSpace, etc for some of the further
* analysis so easier to keep with the token.
- *
+ *
* This type contains the whole information. Have all the tokens with this
* type.
*)
type min =
- Min of (int list (* match numbers *) * int (* adjacency information *))
+ Min of (int list (* match numbers from witness trees *) *
+ int (* adjacency information *))
| Ctx
-type token2 =
- | T2 of Parser_c.token * min *
+type token2 =
+ | T2 of Parser_c.token * min *
int option (* orig index, abstracting away comments and space *)
| Fake2
| Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
+ * Unparse_cocci.nlhint option
| C2 of string
+ | Comma of string
| Indent_cocci2
- | Unindent_cocci2
+ | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
(* not used yet *)
-type token3 =
+type token3 =
| T3 of Parser_c.token
| Cocci3 of string
| C3 of string
(* Helpers *)
(*****************************************************************************)
-let info_of_token1 t =
+let info_of_token1 t =
match t with
| Fake1 info -> info
| T1 tok -> TH.info_of_tok tok
let str_of_token2 = function
| T2 (t,_,_) -> TH.str_of_tok t
| Fake2 -> ""
- | Cocci2 (s,_,_,_) -> s
+ | Cocci2 (s,_,_,_,_) -> s
| C2 s -> s
+ | Comma s -> s
| Indent_cocci2 -> ""
- | Unindent_cocci2 -> ""
+ | Unindent_cocci2 _ -> ""
let print_token2 = function
| T2 (t,b,_) ->
| Ctx -> "" in
"T2:"^b_str^TH.str_of_tok t
| Fake2 -> "fake"
- | Cocci2 (s,_,lc,rc) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
+ | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
| C2 s -> "C2:"^s
+ | Comma s -> "Comma:"^s
| Indent_cocci2 -> "Indent"
- | Unindent_cocci2 -> "Unindent"
+ | Unindent_cocci2 _ -> "Unindent"
let simple_print_all_tokens1 l =
- List.iter (function x -> Printf.printf "%s " (print_token1 x)) l;
+ List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l;
Printf.printf "\n"
let simple_print_all_tokens2 l =
-let mk_token_extended x =
- let origidx =
+let mk_token_extended x =
+ let origidx =
match x with
- | T2 (_,_, idx) -> idx
+ | T2 (_,_, idx) -> idx
| _ -> None
in
- { tok2 = x;
+ { tok2 = x;
str = str_of_token2 x;
idx = origidx;
new_tokens_before = [];
remove = false;
}
-let rebuild_tokens_extented toks_ext =
+let rebuild_tokens_extented toks_ext =
let _tokens = ref [] in
- toks_ext +> List.iter (fun tok ->
+ toks_ext +> List.iter (fun tok ->
tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
if not tok.remove then push2 tok.tok2 _tokens;
);
| Ast_cocci.MINUS (_,_,_,x::xs) -> true
| Ast_cocci.PLUS _ -> raise Impossible
-let contain_plus info =
+let contain_plus info =
let mck = Ast_c.mcode_of_info info in
mcode_contain_plus mck
(* Last fix on the ast *)
(*****************************************************************************)
-(* Because of the ugly trick to handle initialiser, I generate fake ','
+(* Because of the ugly trick to handle initialiser, I generate fake ','
* for the last initializer element, but if there is nothing around it,
* I don't want in the end to print it.
*)
-let remove_useless_fakeInfo_struct program =
+let remove_useless_fakeInfo_struct program =
let bigf = { Visitor_c.default_visitor_c_s with
- Visitor_c.kini_s = (fun (k,bigf) ini ->
+ Visitor_c.kini_s = (fun (k,bigf) ini ->
match k ini with
| InitList args, ii ->
(match ii with
(* sometimes the guy put a normal iicommaopt *)
then InitList args, [i1;i2]
else InitList args, [i1;i2;iicommaopt]
- | [i1;i2;iicommaopt;end_comma_opt] ->
+ | [i1;i2;iicommaopt;end_comma_opt] ->
(* only in #define. end_comma_opt canot be fake *)
(* not sure if this will be considered ambiguous with a previous
case? *)
(* Tokens1 generation *)
(*****************************************************************************)
-let get_fakeInfo_and_tokens celem toks =
- let toks_in = ref toks in
+let get_fakeInfo_and_tokens celem toks =
+ let toks_in = ref toks in
let toks_out = ref [] in
(* todo? verify good order of position ? *)
- let pr_elem info =
+ let pr_elem info =
match Ast_c.pinfo_of_info info with
- | FakeTok _ ->
+ | FakeTok _ ->
Common.push2 (Fake1 info) toks_out
- | OriginTok _ | ExpandedTok _ ->
+ | OriginTok _ | ExpandedTok _ ->
(* get the associated comments/space/cppcomment tokens *)
let (before, x, after) =
- !toks_in +> Common.split_when (fun tok ->
+ !toks_in +> Common.split_when (fun tok ->
info =*= TH.info_of_tok tok)
in
assert(info =*= TH.info_of_tok x);
(*old: assert(before +> List.for_all (TH.is_comment)); *)
- before +> List.iter (fun x ->
+ before +> List.iter (fun x ->
if not (TH.is_comment x)
then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
(* case such as int asm d3("x"); not yet in ast *)
before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
push2 (T1 x) toks_out;
toks_in := after;
- | AbstractLineTok _ ->
+ | AbstractLineTok _ ->
(* can be called on type info when for instance use -type_c *)
if !Flag_parsing_c.pretty_print_type_info
then Common.push2 (Fake1 info) toks_out
- else raise Impossible (* at this stage *)
+ else raise Impossible (* at this stage *)
in
let pr_space _ = () in (* use the spacing that is there already *)
match fake_info with
Some(bef,((Fake1 info) as fake),aft) ->
(match !(info.cocci_tag) with
- | Some x ->
+ | Some x ->
(match x with
(Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
(* move the fake node forwards *)
failwith "fake node should not be before-after"
| _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
)
- | None ->
+ | None ->
bef @ fake :: (loop aft)
)
| None -> toks
(*****************************************************************************)
let comment2t2 = function
- (Token_c.TCommentCpp x,(info : Token_c.info)) ->
+ (Token_c.TCommentCpp
+ (* not sure iif the following list is exhaustive or complete *)
+ (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
+ (info : Token_c.info)) ->
+ C2(info.Common.str)
+ | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
C2("\n"^info.Common.str^"\n")
| x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
-let expand_mcode toks =
+let expand_mcode toks =
let toks_out = ref [] in
let index = ref 0 in
- let add_elem t minus =
+ let add_elem t minus =
match t with
- | Fake1 info ->
+ | Fake1 info ->
let str = Ast_c.str_of_info info in
if str =$= ""
then push2 (Fake2) toks_out
- (* perhaps the fake ',' *)
- else push2 (C2 str) toks_out
-
-
+ (* fx the fake "," at the end of a structure or enum.
+ no idea what other fake info there can be... *)
+ else push2 (Comma str) toks_out
+
+
| T1 tok ->
(*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
(* no tag on expandedTok ! *)
"expanded token %s on line %d is either modified or stored in a metavariable"
(TH.str_of_tok tok) (TH.line_of_tok tok)));
- let tok' = tok +> TH.visitor_info_of_tok (fun i ->
+ let tok' = tok +> TH.visitor_info_of_tok (fun i ->
{ i with cocci_tag = ref Ast_c.emptyAnnot; }
) in
- let optindex =
+ let optindex =
if TH.is_origin tok && not (TH.is_real_comment tok)
then begin
incr index;
push2 (T2 (tok', minus, optindex)) toks_out
in
- let expand_info t =
- let (mcode,env) =
+ let expand_info t =
+ let (mcode,env) =
Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
- let pr_cocci s ln col rcol =
- push2 (Cocci2(s,ln,col,rcol)) toks_out in
- let pr_c info =
+ let pr_cocci s ln col rcol hint =
+ push2 (Cocci2(s,ln,col,rcol,hint)) toks_out in
+ let pr_c info =
(match Ast_c.pinfo_of_info info with
Ast_c.AbstractLineTok _ ->
push2 (C2 (Ast_c.str_of_info info)) toks_out
List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
let pr_barrier ln col = (* marks a position, used around C code *)
- push2 (Cocci2("",ln,col,col)) toks_out in
+ push2 (Cocci2("",ln,col,col,None)) toks_out in
let pr_nobarrier ln col = () in (* not needed for linux spacing *)
let pr_cspace _ = push2 (C2 " ") toks_out in
let pr_arity _ = () (* not interested *) in
let indent _ = push2 Indent_cocci2 toks_out in
- let unindent _ = push2 Unindent_cocci2 toks_out in
+ let unindent x = push2 (Unindent_cocci2 x) toks_out in
let args_pp =
(env, pr_cocci, pr_c, pr_cspace,
Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
indent, unindent) in
- (* old: when for yacfe with partial cocci:
- * add_elem t false;
+ (* old: when for yacfe with partial cocci:
+ * add_elem t false;
*)
(* patch: when need full coccinelle transformation *)
let unparser = Unparse_cocci.pp_list_list_any args_pp false in
match mcode with
- | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
+ | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
(* Why adding ? because I want to have all the information, the whole
- * set of tokens, so I can then process and remove the
+ * set of tokens, so I can then process and remove the
* is_between_two_minus for instance *)
add_elem t (Min (inst,adj));
unparser any_xxs Unparse_cocci.InPlace
- | Ast_cocci.CONTEXT (_,any_befaft) ->
+ | Ast_cocci.CONTEXT (_,any_befaft) ->
(match any_befaft with
- | Ast_cocci.NOTHING ->
+ | Ast_cocci.NOTHING ->
add_elem t Ctx
| Ast_cocci.BEFORE (xxs,_) ->
unparser xxs Unparse_cocci.Before;
add_elem t Ctx
- | Ast_cocci.AFTER (xxs,_) ->
+ | Ast_cocci.AFTER (xxs,_) ->
add_elem t Ctx;
unparser xxs Unparse_cocci.After;
- | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
+ | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
unparser xxs Unparse_cocci.Before;
add_elem t Ctx;
unparser yys Unparse_cocci.After;
toks +> List.iter expand_info;
List.rev !toks_out
-
+
(*****************************************************************************)
(* Tokens2 processing, filtering, adjusting *)
let is_space = function
| T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *)
- | _ -> false
+ | _ -> false
let is_newline = function
| T2(Parser_c.TCommentNewline _,_b,_i) -> true
| _ -> false
let is_whitespace = function
- | (T2 (t,_b,_i)) ->
+ | (T2 (t,_b,_i)) ->
(match t with
| Parser_c.TCommentSpace _ -> true (* only whitespace *)
| Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
| _ -> false
)
- | _ -> false
+ | _ -> false
let is_minusable_comment = function
- | (T2 (t,_b,_i)) ->
+ | (T2 (t,_b,_i)) ->
(match t with
| Parser_c.TCommentSpace _ (* only whitespace *)
- (* patch: coccinelle *)
+ (* patch: coccinelle *)
| Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
+ | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
| Parser_c.TComment _
| Parser_c.TCommentCpp (Token_c.CppAttr, _)
| Parser_c.TCommentCpp (Token_c.CppMacro, _)
| _ -> false
)
- | _ -> false
+ | _ -> false
let is_minusable_comment_nocpp = function
- | (T2 (t,_b,_i)) ->
+ | (T2 (t,_b,_i)) ->
(match t with
| Parser_c.TCommentSpace _ (* only whitespace *)
- (* patch: coccinelle *)
+ (* patch: coccinelle *)
| Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
+ | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
| Parser_c.TComment _ -> true
| Parser_c.TCommentCpp (Token_c.CppAttr, _)
| Parser_c.TCommentCpp (Token_c.CppMacro, _)
| _ -> false
)
- | _ -> false
+ | _ -> false
let all_coccis = function
- Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true
+ Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ -> true
| _ -> false
(*previously gave up if the first character was a newline, but not clear why*)
let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
let set_minus_comment adj = function
- | T2 (t,Ctx,idx) ->
+ | T2 (t,Ctx,idx) ->
let str = TH.str_of_tok t in
(match t with
| Parser_c.TCommentSpace _
-(* patch: coccinelle *)
+(* patch: coccinelle *)
| Parser_c.TCommentNewline _ -> ()
- | Parser_c.TComment _
- | Parser_c.TCommentCpp (Token_c.CppAttr, _)
+ | Parser_c.TComment _
+ | Parser_c.TCommentCpp (Token_c.CppAttr, _)
| Parser_c.TCommentCpp (Token_c.CppMacro, _)
| Parser_c.TCommentCpp (Token_c.CppDirective, _)
- ->
+ ->
pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
(TH.line_of_tok t) str)
| _ -> raise Impossible
);
T2 (t, Min adj, idx)
-(* patch: coccinelle *)
+(* patch: coccinelle *)
| T2 (t,Min adj,idx) as x -> x
| _ -> raise Impossible
let set_minus_comment_or_plus adj = function
- Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x
+ Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x
| x -> set_minus_comment adj x
let drop_minus xs =
| _ -> false
)
-let remove_minus_and_between_and_expanded_and_fake xs =
-
- (* get rid of exampled and fake tok *)
- let xs = xs +> Common.exclude (function
+let drop_expanded_and_fake xs =
+ xs +> Common.exclude (function
| T2 (t,_,_) when TH.is_expanded t -> true
| Fake2 -> true
-
| _ -> false
)
- in
+
+let remove_minus_and_between_and_expanded_and_fake xs =
+
+ (* get rid of expanded and fake tok *)
+ let xs = drop_expanded_and_fake xs in
let minus_or_comment = function
T2(_,Min adj,_) -> true
(* non-empty intersection of witness trees *)
not ((Common.inter_set index1 index2) = []) in
- let rec adjust_around_minus = function
- [] -> []
- | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
- (((T2(_,Min adj,_))::_) as rest) ->
- (* an initial newline, as in a replaced statement *)
- let (between_minus,rest) = Common.span minus_or_comment rest in
- (match rest with
- [] -> (set_minus_comment adj x) ::
- (List.map (set_minus_comment adj) between_minus)
- | T2(_,Ctx,_)::_ when is_newline (List.hd(List.rev between_minus)) ->
- (set_minus_comment adj x)::(adjust_within_minus between_minus) @
- (adjust_around_minus rest)
- | _ ->
- x :: (adjust_within_minus between_minus) @
- (adjust_around_minus rest))
- | ((T2(_,Min adj,_))::_) as rest ->
- (* no initial newline, as in a replaced expression *)
- let (between_minus,rest) = Common.span minus_or_comment rest in
- (match rest with
- [] ->
- (List.map (set_minus_comment adj) between_minus)
- | _ ->
- (adjust_within_minus between_minus) @
- (adjust_around_minus rest))
- | x::xs -> x::adjust_around_minus xs
- and adjust_within_minus = function
- [] -> []
- | (T2(_,Min adj1,_) as t1)::xs ->
- let (between_minus,rest) = Common.span is_minusable_comment xs in
- (match rest with
- [] ->
- (* keep last newline *)
- let (drop,keep) =
- try
- let (drop,nl,keep) =
- Common.split_when is_newline between_minus in
- (drop, nl :: keep)
- with Not_found -> (between_minus,[]) in
- t1 ::
- List.map (set_minus_comment_or_plus adj1) drop @
- keep
- | (T2(_,Min adj2,_) as t2)::rest when common_adj adj1 adj2 ->
- t1::
- List.map (set_minus_comment_or_plus adj1) between_minus @
- adjust_within_minus (t2::rest)
- | x::xs ->
- t1::(between_minus @ adjust_within_minus (x::xs)))
- | _ -> failwith "only minus and space possible" in
-
(* new idea: collects regions not containing non-space context code
if two adjacent adjacent minus tokens satisfy common_adj then delete
all spaces, comments etc between them
let (minus_list,rest) = Common.span not_context (t1::xs) in
let contains_plus = List.exists is_plus minus_list in
adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
- | x::xs -> x :: adjust_around_minus xs
+ | x::xs ->
+ x :: adjust_around_minus xs
and adjust_within_minus cp (* contains plus *) = function
(T2(_,Min adj1,_) as t1)::xs ->
let not_minus = function T2(_,Min _,_) -> false | _ -> true in
(List.map (set_minus_comment_or_plus adj1) not_minus_list)
@ (adjust_within_minus cp (t2::xs))
| (T2(_,Min adj2,_) as t2)::xs ->
- let is_whitespace_or_plus = function
- (T2 _) as x -> is_space x
- | _ -> true (*plus*) in
- if List.for_all is_whitespace_or_plus not_minus_list
+ if not cp && List.for_all is_whitespace not_minus_list
then
(List.map (set_minus_comment_or_plus adj1) not_minus_list)
@ (adjust_within_minus cp (t2::xs))
- else not_minus_list @ (adjust_within_minus cp (t2::xs))
+ else
+ not_minus_list @ (adjust_within_minus cp (t2::xs))
| _ ->
if cp
then xs
(T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
| _ -> true
and is_plus = function
- C2 _ | Cocci2 _ -> true
+ C2 _ | Comma _ | Cocci2 _ -> true
| _ -> false in
let xs = adjust_around_minus xs in
m ::
(List.map (set_minus_comment adj) spaces) @
(adjust_before_brace rest)
+ | ((T2 (t0, Ctx, idx0)) as m0) :: ((T2 (t, Min adj, idx)) as m) :: rest
+ when TH.str_of_tok t0 = "" ->
+ (* This is for the case of a #define that is completely deleted,
+ because a #define has a strange EOL token at the end.
+ We hope there i no other kind of token that is represented by
+ "", but it seems like changing the kind of token might break
+ the end of entity recognition in the C parser.
+ See parsing_hacks.ml *)
+ let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
+ m0 :: m ::
+ (List.map (set_minus_comment adj) spaces) @
+ (adjust_before_brace rest)
| rest -> adjust_before_brace rest in
let xs = List.rev (from_newline (List.rev xs)) in
let is_ident_like s = s ==~ Common.regexp_alpha
-let rec add_space xs =
+let rec drop_space_at_endline = function
+ [] -> []
+ | [x] -> [x]
+ | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest ->
+ let (outer_spaces,rest) = Common.span is_space rest in
+ let minus_or_comment_or_space_nocpp = function
+ T2(_,Min adj,_) -> true
+ | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true
+ | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false
+ | x -> false in
+ let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in
+ let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
+ if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus
+ then
+ match rest with
+ ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest ->
+ (* drop trailing spaces *)
+ minus@a::(drop_space_at_endline rest)
+ | _ -> fail()
+ else fail()
+ | a :: rest -> a :: drop_space_at_endline rest
+
+(* if a removed ( is between two tokens, then add a space *)
+let rec paren_to_space = function
+ [] -> []
+ | [x] -> [x]
+ | [x;y] -> [x;y]
+ | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest
+ when not (is_whitespace a) && TH.str_of_tok t = "(" ->
+ a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
+ | a :: rest -> a :: (paren_to_space rest)
+
+let rec add_space xs =
match xs with
| [] -> []
| [x] -> [x]
- | (Cocci2(sx,lnx,_,rcolx) as x)::((Cocci2(sy,lny,lcoly,_)) as y)::xs
+ | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
(* this only works within a line. could consider whether
something should be done to add newlines too, rather than
printing them explicitly in unparse_cocci. *)
x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
- | x::y::xs ->
+ | ((T2(_,Ctx,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *)
+ let sx = str_of_token2 x in
+ let sy = str_of_token2 y in
+ if is_ident_like sx && (is_ident_like sy or List.mem sy ["="])
+ then x::C2 " "::(add_space (y::xs))
+ else x::(add_space (y::xs))
+ | x::y::xs -> (* not boundary, not sure if it is possible *)
let sx = str_of_token2 x in
let sy = str_of_token2 y in
if is_ident_like sx && is_ident_like sy
then x::C2 " "::(add_space (y::xs))
else x::(add_space (y::xs))
+(* A fake comma is added at the end of an unordered initlist or a enum
+decl, if the initlist or enum doesn't already end in a comma. This is only
+needed if there is + code, ie if we see Cocci after it in the code sequence *)
+let rec drop_end_comma = function
+ [] -> []
+ | [x] -> [x]
+ | ((Comma ",") as x) :: rest ->
+ let (newlines,rest2) = Common.span is_whitespace rest in
+ (match rest2 with
+ (Cocci2 _) :: _ -> x :: drop_end_comma rest
+ | _ -> drop_end_comma rest)
+ | x :: xs -> x :: drop_end_comma xs
+
+(* The following only works for the outermost function call. Stack records
+the column of all open parentheses. Space_cell contains the most recent
+comma in the outermost function call. The goal is to decide whether this
+should be followed by a space or a newline and indent. *)
+let add_newlines toks tabbing_unit =
+ let create_indent n =
+ let (tu,tlen) =
+ match tabbing_unit with
+ Some ("\t",_) -> ("\t",8)
+ | Some ("",_) -> ("\t",8) (* not sure why... *)
+ | Some (s,_) -> (s,String.length s) (* assuming only spaces *)
+ | None -> ("\t",8) in
+ let rec loop seen =
+ if seen + tlen <= n
+ then tu ^ loop (seen + tlen)
+ else String.make (n-seen) ' ' in
+ loop 0 in
+ let check_for_newline count x = function
+ Some (start,space_cell) when count > Flag_parsing_c.max_width ->
+ space_cell := "\n"^(create_indent x);
+ Some (x + (count - start))
+ | _ -> None in
+ (* the following is for strings that may contain newline *)
+ let string_length s count =
+ let l = list_of_string s in
+ List.fold_left
+ (function count ->
+ function
+ '\t' -> count + 8
+ | '\n' -> 0
+ | c -> count + 1)
+ count l in
+ let rec loop info count = function
+ [] -> []
+ | ((T2(tok,_,_)) as a)::xs ->
+ a :: loop info (string_length (TH.str_of_tok tok) count) xs
+ | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
+ let (stack,space_cell) = info in
+ let rest =
+ match hint with
+ None -> loop info (count + (String.length s)) xs
+ | Some Unparse_cocci.StartBox ->
+ let count = count + (String.length s) in
+ loop (count::stack,space_cell) count xs
+ | Some Unparse_cocci.EndBox ->
+ let count = count + (String.length s) in
+ (match stack with
+ [x] ->
+ (match check_for_newline count x space_cell with
+ Some count -> loop ([],None) count xs
+ | None -> loop ([],None) count xs)
+ | _ -> loop (List.tl stack,space_cell) count xs)
+ | Some (Unparse_cocci.SpaceOrNewline sp) ->
+ let count = count + (String.length s) + 1 (*space*) in
+ (match stack with
+ [x] ->
+ (match check_for_newline count x space_cell with
+ Some count -> loop (stack,Some (x,sp)) count xs
+ | None -> loop (stack,Some (count,sp)) count xs)
+ | _ -> loop info count xs) in
+ a :: rest
+ | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
+ | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
+ | Fake2 :: _ | Indent_cocci2 :: _
+ | Unindent_cocci2 _::_ ->
+ failwith "unexpected fake, indent, or unindent" in
+ let redo_spaces prev = function
+ Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
+ C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
+ | t -> t::prev in
+ (match !Flag_parsing_c.spacing with
+ Flag_parsing_c.SMPL -> toks
+ | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)))
(* When insert some new code, because of a + in a SP, we must add this
* code at the right place, with the good indentation. So each time we
* encounter some spacing info, with some newline, we maintain the
* current indentation level used.
- *
+ *
* TODO problems: not accurate. ex: TODO
- *
+ *
* TODO: if in #define region, should add a \ \n
*)
-let new_tabbing2 space =
+let new_tabbing2 space =
(list_of_string space)
+> List.rev
+> Common.take_until (fun c -> c =<= '\n')
+> List.map string_of_char
+> String.concat ""
-let new_tabbing a =
+let new_tabbing a =
Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
-let rec adjust_indentation xs =
+let rec adjust_indentation xs =
+
let _current_tabbing = ref "" in
let tabbing_unit = ref None in
| x::xs -> find_first_tab started xs in
find_first_tab false xs;
- let rec aux started xs =
+ let rec balanced ct = function
+ [] -> ct >= 0
+ | ((T2(tok,_,_)) as x)::xs ->
+ (match str_of_token2 x with
+ "(" -> balanced (ct+1) xs
+ | ")" -> balanced (ct-1) xs
+ | _ -> balanced ct xs)
+ | x::xs -> balanced ct xs in
+
+ let rec aux started xs =
match xs with
| [] -> []
(* patch: coccinelle *)
| ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
- ((Cocci2 ("{",_,_,_)) as a)::xs
+ ((Cocci2 ("{",_,_,_,_)) as a)::xs
when started && str_of_token2 x =$= ")" ->
(* to be done for if, etc, but not for a function header *)
x::(C2 " ")::a::(aux started xs)
- | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
- let old_tabbing = !_current_tabbing in
+ | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs
+ when balanced 0 (fst(Common.span (function x -> not(is_newline x)) xs)) ->
+ let old_tabbing = !_current_tabbing in
str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
(* only trust the indentation after the first { *)
- (if started then adjust_tabbing_unit old_tabbing !_current_tabbing);
+ (if started
+ then adjust_tabbing_unit old_tabbing !_current_tabbing);
let coccis_rest = Common.span all_coccis xs in
(match coccis_rest with
(_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
None -> aux started xs
| Some (tu,_) ->
_current_tabbing := (!_current_tabbing)^tu;
- Cocci2 (tu,-1,-1,-1)::aux started xs)
- | Unindent_cocci2::xs ->
+ Cocci2 (tu,-1,-1,-1,None)::aux started xs)
+ | Unindent_cocci2(permanent)::xs ->
(match !tabbing_unit with
None -> aux started xs
| Some (_,tu) ->
_current_tabbing := remtab tu (!_current_tabbing);
aux started xs)
(* border between existing code and cocci code *)
- | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_)) as y)::xs
+ | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
when str_of_token2 x =$= "{" ->
x::aux true (y::Indent_cocci2::xs)
| ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
when str_of_token2 y =$= "}" ->
- x::aux started (y::Unindent_cocci2::xs)
+ x::aux started (y::Unindent_cocci2 true::xs)
(* starting the body of the function *)
| ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs
- | ((Cocci2("{",_,_,_)) as a)::xs -> a::aux true xs
- | ((Cocci2("\n",_,_,_)) as x)::xs ->
+ | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
+ | ((Cocci2("\n",_,_,_,_)) as x)::Unindent_cocci2(false)::xs ->
+ x::aux started xs
+ | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
(* dont inline in expr because of weird eval order of ocaml *)
- let s = !_current_tabbing in
- x::Cocci2 (s,-1,-1,-1)::aux started xs
+ let s = !_current_tabbing in
+ x::Cocci2 (s,-1,-1,-1,None)::aux started xs
| x::xs -> x::aux started xs in
- aux false xs
+ (aux false xs,!tabbing_unit)
let rec find_paren_comma = function
(* do nothing if was like this in original file *)
| ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
- ::xs when p2 =|= p1 + 1 ->
+ ::xs when p2 =|= p1 + 1 ->
find_paren_comma (x2::xs)
| ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
- ::xs when p2 =|= p1 + 1 ->
+ ::xs when p2 =|= p1 + 1 ->
find_paren_comma (x2::xs)
| ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
- ::xs when p2 =|= p1 + 1 ->
+ ::xs when p2 =|= p1 + 1 ->
find_paren_comma (x2::xs)
(* otherwise yes can adjust *)
- | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
+ | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
x2.remove <- true;
find_paren_comma (x2::xs)
- | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
+ | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
x1.remove <- true;
find_paren_comma (x2::xs)
- | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
+ | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
x1.remove <- true;
find_paren_comma (x2::xs)
- | x::xs ->
+ | x::xs ->
find_paren_comma xs
-
-let fix_tokens toks =
+
+let fix_tokens toks =
let toks = toks +> List.map mk_token_extended in
let cleaner = toks +> Common.exclude (function
| Fake2 -> KFake
| Cocci2 _ -> KCocci
| C2 _ -> KC
+ | Comma _ -> KC
| T2 (t,_,_) ->
(match TH.pinfo_of_tok t with
| ExpandedTok _ -> KExpanded
| FakeTok _ -> raise Impossible (* now a Fake2 *)
| AbstractLineTok _ -> raise Impossible (* now a KC *)
)
- | Unindent_cocci2 | Indent_cocci2 -> raise Impossible
-
+ | Unindent_cocci2 _ | Indent_cocci2 -> raise Impossible
+
let end_mark = "!"
let start_mark = function
| KFake -> "!F!"
- | KCocci -> "!S!"
+ | KCocci -> "!S!"
| KC -> "!A!"
| KExpanded -> "!E!"
| KOrigin -> ""
if !Flag_parsing_c.debug_unparsing
then
let current_kind = ref KOrigin in
- xs +> List.iter (fun t ->
+ xs +> List.iter (fun t ->
let newkind = kind_of_token2 t in
if newkind =*= !current_kind
then pr (str_of_token2 t)
current_kind := newkind
end
);
- else
+ else
xs +> List.iter (fun x -> pr (str_of_token2 x))
-
+
* fancy stuff when a function was not modified at all. Just need to
* print the list of token as-is. But now pretty_print_c.ml handles
* almost everything so maybe less useful. Maybe PPviatok allows to
- * optimize a little the pretty printing.
- *
+ * optimize a little the pretty printing.
+ *
* update: now have PPviastr which goes even faster than PPviatok, so
* PPviatok has disappeared.
*)
(* The pp_program function will call pretty_print_c.ml with a special
- * function to print the leaf components, the tokens. When we want to
+ * function to print the leaf components, the tokens. When we want to
* print a token, we need to print also maybe the space and comments that
- * were close to it in the original file (and that was omitted during the
+ * were close to it in the original file (and that was omitted during the
* parsing phase), and honor what the cocci-info attached to the token says.
* Maybe we will not print the token if it's a MINUS-token, and maybe we will
- * print it and also print some cocci-code attached in a PLUS to it.
+ * print it and also print some cocci-code attached in a PLUS to it.
* So we will also maybe call unparse_cocci. Because the cocci-code may
* contain metavariables, unparse_cocci will in fact sometimes call back
* pretty_print_c (which will this time don't call back again unparse_cocci)
*)
-let pp_program2 xs outfile =
- Common.with_open_outfile outfile (fun (pr,chan) ->
- let pr s =
- if !Flag_parsing_c.debug_unparsing
+let pp_program2 xs outfile =
+ Common.with_open_outfile outfile (fun (pr,chan) ->
+ let pr s =
+ if !Flag_parsing_c.debug_unparsing
then begin pr2_no_nl s; flush stderr end
- else pr s
+ else pr s
(* flush chan; *)
(* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
in
-
- xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
+
+ xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
(* here can still work on ast *)
let e = remove_useless_fakeInfo_struct e in
-
+
match ppmethod with
| PPnormal ->
(* now work on tokens *)
-
(* phase1: just get all the tokens, all the information *)
- assert(toks_e +> List.for_all (fun t ->
+ assert(toks_e +> List.for_all (fun t ->
TH.is_origin t or TH.is_expanded t
));
let toks = get_fakeInfo_and_tokens e toks_e in
let toks = displace_fake_nodes toks in
(* assert Origin;ExpandedTok;Faketok *)
let toks = expand_mcode toks in
+
(* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
* and no tag information, just NOTHING. *)
let toks =
if !Flag.sgrep_mode2
- then drop_minus toks (* nothing to do for sgrep *)
+ then
+ (* nothing else to do for sgrep *)
+ drop_expanded_and_fake (drop_minus toks)
else
(* phase2: can now start to filter and adjust *)
- let toks = adjust_indentation toks in
+ let (toks,tu) = adjust_indentation toks in
let toks = adjust_before_semicolon toks in(*before remove minus*)
+ let toks = drop_space_at_endline toks in
+ let toks = paren_to_space toks in
+ let toks = drop_end_comma toks in
let toks = remove_minus_and_between_and_expanded_and_fake toks in
(* assert Origin + Cocci + C and no minus *)
let toks = add_space toks in
+ let toks = add_newlines toks tu in
let toks = fix_tokens toks in
- toks in
+ toks in
- (* in theory here could reparse and rework the ast! or
+ (* in theory here could reparse and rework the ast! or
* apply some SP. Not before cos julia may have generated
* not parsable file. Need do unparsing_tricks call before being
* ready to reparse. *)
)
)
-let pp_program a b =
+let pp_program a b =
Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
-let pp_program_default xs outfile =
+let pp_program_default xs outfile =
let xs' = xs +> List.map (fun x -> x, PPnormal) in
pp_program xs' outfile