X-Git-Url: http://git.hcoop.net/bpt/coccinelle.git/blobdiff_plain/fc1ad9719a3a6317b3a4749dac68e7272a3617db..18b1275a488603d2b4fcf2a36dd4ac14e0518268:/parsing_c/unparse_c.ml diff --git a/parsing_c/unparse_c.ml b/parsing_c/unparse_c.ml index 6d0dc04..15217bd 100644 --- a/parsing_c/unparse_c.ml +++ b/parsing_c/unparse_c.ml @@ -1,17 +1,18 @@ (* Yoann Padioleau, Julia Lawall - * + * + * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License (GPL) * version 2 as published by the Free Software Foundation. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * file license.txt for more details. - * - * + * + * * Modifications by Julia Lawall for better newline handling. *) open Common @@ -36,7 +37,7 @@ let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing (* Types used during the intermediate phases of the unparsing *) (*****************************************************************************) -type token1 = +type token1 = | Fake1 of info | T1 of Parser_c.token @@ -45,7 +46,7 @@ type token1 = * token and get something simpler ? because we need to know if the * info is a TCommentCpp or TCommentSpace, etc for some of the further * analysis so easier to keep with the token. - * + * * This type contains the whole information. Have all the tokens with this * type. *) @@ -53,17 +54,18 @@ type min = Min of (int list (* match numbers *) * int (* adjacency information *)) | Ctx -type token2 = - | T2 of Parser_c.token * min * +type token2 = + | T2 of Parser_c.token * min * int option (* orig index, abstracting away comments and space *) | Fake2 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *) + * Unparse_cocci.nlhint option | C2 of string | Indent_cocci2 - | Unindent_cocci2 + | Unindent_cocci2 of bool (* true for permanent, false for temporary *) (* not used yet *) -type token3 = +type token3 = | T3 of Parser_c.token | Cocci3 of string | C3 of string @@ -83,7 +85,7 @@ type token_extended = { (* Helpers *) (*****************************************************************************) -let info_of_token1 t = +let info_of_token1 t = match t with | Fake1 info -> info | T1 tok -> TH.info_of_tok tok @@ -95,10 +97,10 @@ let print_token1 = function let str_of_token2 = function | T2 (t,_,_) -> TH.str_of_tok t | Fake2 -> "" - | Cocci2 (s,_,_,_) -> s + | Cocci2 (s,_,_,_,_) -> s | C2 s -> s | Indent_cocci2 -> "" - | Unindent_cocci2 -> "" + | Unindent_cocci2 _ -> "" let print_token2 = function | T2 (t,b,_) -> @@ -110,13 +112,13 @@ let print_token2 = function | Ctx -> "" in "T2:"^b_str^TH.str_of_tok t | Fake2 -> "fake" - | Cocci2 (s,_,lc,rc) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s + | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s | C2 s -> "C2:"^s | Indent_cocci2 -> "Indent" - | Unindent_cocci2 -> "Unindent" + | Unindent_cocci2 _ -> "Unindent" let simple_print_all_tokens1 l = - List.iter (function x -> Printf.printf "%s " (print_token1 x)) l; + List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l; Printf.printf "\n" let simple_print_all_tokens2 l = @@ -129,22 +131,22 @@ let str_of_token3 = function -let mk_token_extended x = - let origidx = +let mk_token_extended x = + let origidx = match x with - | T2 (_,_, idx) -> idx + | T2 (_,_, idx) -> idx | _ -> None in - { tok2 = x; + { tok2 = x; str = str_of_token2 x; idx = origidx; new_tokens_before = []; remove = false; } -let rebuild_tokens_extented toks_ext = +let rebuild_tokens_extented toks_ext = let _tokens = ref [] in - toks_ext +> List.iter (fun tok -> + toks_ext +> List.iter (fun tok -> tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens); if not tok.remove then push2 tok.tok2 _tokens; ); @@ -158,9 +160,9 @@ let mcode_contain_plus = function (* patch: when need full coccinelle transformation *) | Ast_cocci.MINUS (_,_,_,[]) -> false | Ast_cocci.MINUS (_,_,_,x::xs) -> true - | Ast_cocci.PLUS -> raise Impossible + | Ast_cocci.PLUS _ -> raise Impossible -let contain_plus info = +let contain_plus info = let mck = Ast_c.mcode_of_info info in mcode_contain_plus mck @@ -168,14 +170,14 @@ let contain_plus info = (* Last fix on the ast *) (*****************************************************************************) -(* Because of the ugly trick to handle initialiser, I generate fake ',' +(* Because of the ugly trick to handle initialiser, I generate fake ',' * for the last initializer element, but if there is nothing around it, * I don't want in the end to print it. *) -let remove_useless_fakeInfo_struct program = +let remove_useless_fakeInfo_struct program = let bigf = { Visitor_c.default_visitor_c_s with - Visitor_c.kini_s = (fun (k,bigf) ini -> + Visitor_c.kini_s = (fun (k,bigf) ini -> match k ini with | InitList args, ii -> (match ii with @@ -186,7 +188,7 @@ let remove_useless_fakeInfo_struct program = (* sometimes the guy put a normal iicommaopt *) then InitList args, [i1;i2] else InitList args, [i1;i2;iicommaopt] - | [i1;i2;iicommaopt;end_comma_opt] -> + | [i1;i2;iicommaopt;end_comma_opt] -> (* only in #define. end_comma_opt canot be fake *) (* not sure if this will be considered ambiguous with a previous case? *) @@ -207,24 +209,24 @@ let remove_useless_fakeInfo_struct program = (* Tokens1 generation *) (*****************************************************************************) -let get_fakeInfo_and_tokens celem toks = - let toks_in = ref toks in +let get_fakeInfo_and_tokens celem toks = + let toks_in = ref toks in let toks_out = ref [] in (* todo? verify good order of position ? *) - let pr_elem info = + let pr_elem info = match Ast_c.pinfo_of_info info with - | FakeTok _ -> + | FakeTok _ -> Common.push2 (Fake1 info) toks_out - | OriginTok _ | ExpandedTok _ -> + | OriginTok _ | ExpandedTok _ -> (* get the associated comments/space/cppcomment tokens *) let (before, x, after) = - !toks_in +> Common.split_when (fun tok -> + !toks_in +> Common.split_when (fun tok -> info =*= TH.info_of_tok tok) in assert(info =*= TH.info_of_tok x); (*old: assert(before +> List.for_all (TH.is_comment)); *) - before +> List.iter (fun x -> + before +> List.iter (fun x -> if not (TH.is_comment x) then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x) (* case such as int asm d3("x"); not yet in ast *) @@ -232,11 +234,11 @@ let get_fakeInfo_and_tokens celem toks = before +> List.iter (fun x -> Common.push2 (T1 x) toks_out); push2 (T1 x) toks_out; toks_in := after; - | AbstractLineTok _ -> + | AbstractLineTok _ -> (* can be called on type info when for instance use -type_c *) if !Flag_parsing_c.pretty_print_type_info then Common.push2 (Fake1 info) toks_out - else raise Impossible (* at this stage *) + else raise Impossible (* at this stage *) in let pr_space _ = () in (* use the spacing that is there already *) @@ -267,7 +269,7 @@ let displace_fake_nodes toks = match fake_info with Some(bef,((Fake1 info) as fake),aft) -> (match !(info.cocci_tag) with - | Some x -> + | Some x -> (match x with (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) -> (* move the fake node forwards *) @@ -286,7 +288,7 @@ let displace_fake_nodes toks = failwith "fake node should not be before-after" | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *) ) - | None -> + | None -> bef @ fake :: (loop aft) ) | None -> toks @@ -298,25 +300,30 @@ let displace_fake_nodes toks = (*****************************************************************************) let comment2t2 = function - (Token_c.TCommentCpp x,(info : Token_c.info)) -> + (Token_c.TCommentCpp + (* not sure iif the following list is exhaustive or complete *) + (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError), + (info : Token_c.info)) -> + C2(info.Common.str) + | (Token_c.TCommentCpp x,(info : Token_c.info)) -> C2("\n"^info.Common.str^"\n") | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x)) -let expand_mcode toks = +let expand_mcode toks = let toks_out = ref [] in let index = ref 0 in - let add_elem t minus = + let add_elem t minus = match t with - | Fake1 info -> + | Fake1 info -> let str = Ast_c.str_of_info info in if str =$= "" then push2 (Fake2) toks_out (* perhaps the fake ',' *) else push2 (C2 str) toks_out - - + + | T1 tok -> (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*) (* no tag on expandedTok ! *) @@ -333,11 +340,11 @@ let expand_mcode toks = "expanded token %s on line %d is either modified or stored in a metavariable" (TH.str_of_tok tok) (TH.line_of_tok tok))); - let tok' = tok +> TH.visitor_info_of_tok (fun i -> + let tok' = tok +> TH.visitor_info_of_tok (fun i -> { i with cocci_tag = ref Ast_c.emptyAnnot; } ) in - let optindex = + let optindex = if TH.is_origin tok && not (TH.is_real_comment tok) then begin incr index; @@ -349,13 +356,13 @@ let expand_mcode toks = push2 (T2 (tok', minus, optindex)) toks_out in - let expand_info t = - let (mcode,env) = + let expand_info t = + let (mcode,env) = Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in - let pr_cocci s ln col rcol = - push2 (Cocci2(s,ln,col,rcol)) toks_out in - let pr_c info = + let pr_cocci s ln col rcol hint = + push2 (Cocci2(s,ln,col,rcol,hint)) toks_out in + let pr_c info = (match Ast_c.pinfo_of_info info with Ast_c.AbstractLineTok _ -> push2 (C2 (Ast_c.str_of_info info)) toks_out @@ -368,7 +375,7 @@ let expand_mcode toks = List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in let pr_barrier ln col = (* marks a position, used around C code *) - push2 (Cocci2("",ln,col,col)) toks_out in + push2 (Cocci2("",ln,col,col,None)) toks_out in let pr_nobarrier ln col = () in (* not needed for linux spacing *) let pr_cspace _ = push2 (C2 " ") toks_out in @@ -377,7 +384,7 @@ let expand_mcode toks = let pr_arity _ = () (* not interested *) in let indent _ = push2 Indent_cocci2 toks_out in - let unindent _ = push2 Unindent_cocci2 toks_out in + let unindent x = push2 (Unindent_cocci2 x) toks_out in let args_pp = (env, pr_cocci, pr_c, pr_cspace, @@ -388,41 +395,41 @@ let expand_mcode toks = Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier), indent, unindent) in - (* old: when for yacfe with partial cocci: - * add_elem t false; + (* old: when for yacfe with partial cocci: + * add_elem t false; *) (* patch: when need full coccinelle transformation *) let unparser = Unparse_cocci.pp_list_list_any args_pp false in match mcode with - | Ast_cocci.MINUS (_,inst,adj,any_xxs) -> + | Ast_cocci.MINUS (_,inst,adj,any_xxs) -> (* Why adding ? because I want to have all the information, the whole - * set of tokens, so I can then process and remove the + * set of tokens, so I can then process and remove the * is_between_two_minus for instance *) add_elem t (Min (inst,adj)); unparser any_xxs Unparse_cocci.InPlace - | Ast_cocci.CONTEXT (_,any_befaft) -> + | Ast_cocci.CONTEXT (_,any_befaft) -> (match any_befaft with - | Ast_cocci.NOTHING -> + | Ast_cocci.NOTHING -> add_elem t Ctx - | Ast_cocci.BEFORE xxs -> + | Ast_cocci.BEFORE (xxs,_) -> unparser xxs Unparse_cocci.Before; add_elem t Ctx - | Ast_cocci.AFTER xxs -> + | Ast_cocci.AFTER (xxs,_) -> add_elem t Ctx; unparser xxs Unparse_cocci.After; - | Ast_cocci.BEFOREAFTER (xxs, yys) -> + | Ast_cocci.BEFOREAFTER (xxs, yys, _) -> unparser xxs Unparse_cocci.Before; add_elem t Ctx; unparser yys Unparse_cocci.After; ) - | Ast_cocci.PLUS -> raise Impossible + | Ast_cocci.PLUS _ -> raise Impossible in toks +> List.iter expand_info; List.rev !toks_out - + (*****************************************************************************) (* Tokens2 processing, filtering, adjusting *) @@ -430,27 +437,28 @@ let expand_mcode toks = let is_space = function | T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *) - | _ -> false + | _ -> false let is_newline = function | T2(Parser_c.TCommentNewline _,_b,_i) -> true | _ -> false let is_whitespace = function - | (T2 (t,_b,_i)) -> + | (T2 (t,_b,_i)) -> (match t with | Parser_c.TCommentSpace _ -> true (* only whitespace *) | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true | _ -> false ) - | _ -> false + | _ -> false let is_minusable_comment = function - | (T2 (t,_b,_i)) -> + | (T2 (t,_b,_i)) -> (match t with | Parser_c.TCommentSpace _ (* only whitespace *) - (* patch: coccinelle *) + (* patch: coccinelle *) | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true + | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false | Parser_c.TComment _ | Parser_c.TCommentCpp (Token_c.CppAttr, _) | Parser_c.TCommentCpp (Token_c.CppMacro, _) @@ -463,14 +471,15 @@ let is_minusable_comment = function | _ -> false ) - | _ -> false + | _ -> false let is_minusable_comment_nocpp = function - | (T2 (t,_b,_i)) -> + | (T2 (t,_b,_i)) -> (match t with | Parser_c.TCommentSpace _ (* only whitespace *) - (* patch: coccinelle *) + (* patch: coccinelle *) | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true + | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false | Parser_c.TComment _ -> true | Parser_c.TCommentCpp (Token_c.CppAttr, _) | Parser_c.TCommentCpp (Token_c.CppMacro, _) @@ -483,39 +492,39 @@ let is_minusable_comment_nocpp = function | _ -> false ) - | _ -> false + | _ -> false let all_coccis = function - Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true + Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 _ -> true | _ -> false (*previously gave up if the first character was a newline, but not clear why*) let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x let set_minus_comment adj = function - | T2 (t,Ctx,idx) -> + | T2 (t,Ctx,idx) -> let str = TH.str_of_tok t in (match t with | Parser_c.TCommentSpace _ -(* patch: coccinelle *) +(* patch: coccinelle *) | Parser_c.TCommentNewline _ -> () - | Parser_c.TComment _ - | Parser_c.TCommentCpp (Token_c.CppAttr, _) + | Parser_c.TComment _ + | Parser_c.TCommentCpp (Token_c.CppAttr, _) | Parser_c.TCommentCpp (Token_c.CppMacro, _) | Parser_c.TCommentCpp (Token_c.CppDirective, _) - -> + -> pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s" (TH.line_of_tok t) str) | _ -> raise Impossible ); T2 (t, Min adj, idx) -(* patch: coccinelle *) +(* patch: coccinelle *) | T2 (t,Min adj,idx) as x -> x | _ -> raise Impossible let set_minus_comment_or_plus adj = function - Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x + Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x | x -> set_minus_comment adj x let drop_minus xs = @@ -527,10 +536,9 @@ let drop_minus xs = let remove_minus_and_between_and_expanded_and_fake xs = (* get rid of exampled and fake tok *) - let xs = xs +> Common.exclude (function + let xs = xs +> Common.exclude (function | T2 (t,_,_) when TH.is_expanded t -> true | Fake2 -> true - | _ -> false ) in @@ -635,7 +643,7 @@ let remove_minus_and_between_and_expanded_and_fake xs = @ (adjust_within_minus cp (t2::xs)) | (T2(_,Min adj2,_) as t2)::xs -> let is_whitespace_or_plus = function - (T2 _) as x -> is_space x + (T2 _) as x -> is_whitespace x | _ -> true (*plus*) in if List.for_all is_whitespace_or_plus not_minus_list then @@ -745,36 +753,140 @@ let adjust_before_semicolon toks = let is_ident_like s = s ==~ Common.regexp_alpha -let rec add_space xs = +let rec drop_space_at_endline = function + [] -> [] + | [x] -> [x] + | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest -> + let (outer_spaces,rest) = Common.span is_space rest in + let minus_or_comment_or_space_nocpp = function + T2(_,Min adj,_) -> true + | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true + | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false + | x -> false in + let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in + let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in + if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus + then + match rest with + ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest -> + (* drop trailing spaces *) + minus@a::(drop_space_at_endline rest) + | _ -> fail() + else fail() + | a :: rest -> a :: drop_space_at_endline rest + +(* if a removed ( is between two tokens, then add a space *) +let rec paren_to_space = function + [] -> [] + | [x] -> [x] + | [x;y] -> [x;y] + | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest + when not (is_whitespace a) && TH.str_of_tok t = "(" -> + simple_print_all_tokens2 [a;b;c]; + a :: b :: (C2 " ") :: (paren_to_space (c :: rest)) + | a :: rest -> a :: (paren_to_space rest) + +let rec add_space xs = match xs with | [] -> [] | [x] -> [x] - | (Cocci2(sx,lnx,_,rcolx) as x)::((Cocci2(sy,lny,lcoly,_)) as y)::xs + | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL && not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly -> (* this only works within a line. could consider whether something should be done to add newlines too, rather than printing them explicitly in unparse_cocci. *) x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs) - | x::y::xs -> + | x::y::xs -> let sx = str_of_token2 x in let sy = str_of_token2 y in if is_ident_like sx && is_ident_like sy then x::C2 " "::(add_space (y::xs)) else x::(add_space (y::xs)) - +(* The following only works for the outermost function call. Stack records +the column of all open parentheses. Space_cell contains the most recent +comma in the outermost function call. The goal is to decide whether this +should be followed by a space or a newline and indent. *) +let add_newlines toks tabbing_unit = + let create_indent n = + let (tu,tlen) = + match tabbing_unit with + Some ("\t",_) -> ("\t",8) + | Some ("",_) -> ("\t",8) (* not sure why... *) + | Some (s,_) -> (s,String.length s) (* assuming only spaces *) + | None -> ("\t",8) in + let rec loop seen = + if seen + tlen <= n + then tu ^ loop (seen + tlen) + else String.make (n-seen) ' ' in + loop 0 in + let check_for_newline count x = function + Some (start,space_cell) when count > Flag_parsing_c.max_width -> + space_cell := "\n"^(create_indent x); + Some (x + (count - start)) + | _ -> None in + (* the following is for strings that may contain newline *) + let string_length s count = + let l = list_of_string s in + List.fold_left + (function count -> + function + '\t' -> count + 8 + | '\n' -> 0 + | c -> count + 1) + count l in + let rec loop info count = function + [] -> [] + | ((T2(tok,_,_)) as a)::xs -> + a :: loop info (string_length (TH.str_of_tok tok) count) xs + | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs -> + let (stack,space_cell) = info in + let rest = + match hint with + None -> loop info (count + (String.length s)) xs + | Some Unparse_cocci.StartBox -> + let count = count + (String.length s) in + loop (count::stack,space_cell) count xs + | Some Unparse_cocci.EndBox -> + let count = count + (String.length s) in + (match stack with + [x] -> + (match check_for_newline count x space_cell with + Some count -> loop ([],None) count xs + | None -> loop ([],None) count xs) + | _ -> loop (List.tl stack,space_cell) count xs) + | Some (Unparse_cocci.SpaceOrNewline sp) -> + let count = count + (String.length s) + 1 (*space*) in + (match stack with + [x] -> + (match check_for_newline count x space_cell with + Some count -> loop (stack,Some (x,sp)) count xs + | None -> loop (stack,Some (count,sp)) count xs) + | _ -> loop info count xs) in + a :: rest + | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs + | Fake2 :: _ | Indent_cocci2 :: _ + | Unindent_cocci2 _::_ -> + failwith "unexpected fake, indent, or unindent" in + let redo_spaces prev = function + Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) -> + C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev + | t -> t::prev in + (match !Flag_parsing_c.spacing with + Flag_parsing_c.SMPL -> toks + | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks))) (* When insert some new code, because of a + in a SP, we must add this * code at the right place, with the good indentation. So each time we * encounter some spacing info, with some newline, we maintain the * current indentation level used. - * + * * TODO problems: not accurate. ex: TODO - * + * * TODO: if in #define region, should add a \ \n *) -let new_tabbing2 space = +let new_tabbing2 space = (list_of_string space) +> List.rev +> Common.take_until (fun c -> c =<= '\n') @@ -782,11 +894,12 @@ let new_tabbing2 space = +> List.map string_of_char +> String.concat "" -let new_tabbing a = +let new_tabbing a = Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a) -let rec adjust_indentation xs = +let rec adjust_indentation xs = + let _current_tabbing = ref "" in let tabbing_unit = ref None in @@ -827,20 +940,21 @@ let rec adjust_indentation xs = | x::xs -> find_first_tab started xs in find_first_tab false xs; - let rec aux started xs = + let rec aux started xs = match xs with | [] -> [] (* patch: coccinelle *) | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _)):: - ((Cocci2 ("{",_,_,_)) as a)::xs + ((Cocci2 ("{",_,_,_,_)) as a)::xs when started && str_of_token2 x =$= ")" -> (* to be done for if, etc, but not for a function header *) x::(C2 " ")::a::(aux started xs) | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs -> - let old_tabbing = !_current_tabbing in + let old_tabbing = !_current_tabbing in str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s); (* only trust the indentation after the first { *) - (if started then adjust_tabbing_unit old_tabbing !_current_tabbing); + (if started + then adjust_tabbing_unit old_tabbing !_current_tabbing); let coccis_rest = Common.span all_coccis xs in (match coccis_rest with (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" -> @@ -852,29 +966,31 @@ let rec adjust_indentation xs = None -> aux started xs | Some (tu,_) -> _current_tabbing := (!_current_tabbing)^tu; - Cocci2 (tu,-1,-1,-1)::aux started xs) - | Unindent_cocci2::xs -> + Cocci2 (tu,-1,-1,-1,None)::aux started xs) + | Unindent_cocci2(permanent)::xs -> (match !tabbing_unit with None -> aux started xs | Some (_,tu) -> _current_tabbing := remtab tu (!_current_tabbing); aux started xs) (* border between existing code and cocci code *) - | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_)) as y)::xs + | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs when str_of_token2 x =$= "{" -> x::aux true (y::Indent_cocci2::xs) | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs when str_of_token2 y =$= "}" -> - x::aux started (y::Unindent_cocci2::xs) + x::aux started (y::Unindent_cocci2 true::xs) (* starting the body of the function *) | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs - | ((Cocci2("{",_,_,_)) as a)::xs -> a::aux true xs - | ((Cocci2("\n",_,_,_)) as x)::xs -> + | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs + | ((Cocci2("\n",_,_,_,_)) as x)::Unindent_cocci2(false)::xs -> + x::aux started xs + | ((Cocci2("\n",_,_,_,_)) as x)::xs -> (* dont inline in expr because of weird eval order of ocaml *) - let s = !_current_tabbing in - x::Cocci2 (s,-1,-1,-1)::aux started xs + let s = !_current_tabbing in + x::Cocci2 (s,-1,-1,-1,None)::aux started xs | x::xs -> x::aux started xs in - aux false xs + (aux false xs,!tabbing_unit) let rec find_paren_comma = function @@ -882,34 +998,34 @@ let rec find_paren_comma = function (* do nothing if was like this in original file *) | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2) - ::xs when p2 =|= p1 + 1 -> + ::xs when p2 =|= p1 + 1 -> find_paren_comma (x2::xs) | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2) - ::xs when p2 =|= p1 + 1 -> + ::xs when p2 =|= p1 + 1 -> find_paren_comma (x2::xs) | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2) - ::xs when p2 =|= p1 + 1 -> + ::xs when p2 =|= p1 + 1 -> find_paren_comma (x2::xs) (* otherwise yes can adjust *) - | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs -> + | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs -> x2.remove <- true; find_paren_comma (x2::xs) - | ({ str = "," } as x1)::({ str = ","} as x2)::xs -> + | ({ str = "," } as x1)::({ str = ","} as x2)::xs -> x1.remove <- true; find_paren_comma (x2::xs) - | ({ str = "," } as x1)::({ str = ")"} as x2)::xs -> + | ({ str = "," } as x1)::({ str = ")"} as x2)::xs -> x1.remove <- true; find_paren_comma (x2::xs) - | x::xs -> + | x::xs -> find_paren_comma xs - -let fix_tokens toks = + +let fix_tokens toks = let toks = toks +> List.map mk_token_extended in let cleaner = toks +> Common.exclude (function @@ -941,13 +1057,13 @@ let kind_of_token2 = function | FakeTok _ -> raise Impossible (* now a Fake2 *) | AbstractLineTok _ -> raise Impossible (* now a KC *) ) - | Unindent_cocci2 | Indent_cocci2 -> raise Impossible - + | Unindent_cocci2 _ | Indent_cocci2 -> raise Impossible + let end_mark = "!" let start_mark = function | KFake -> "!F!" - | KCocci -> "!S!" + | KCocci -> "!S!" | KC -> "!A!" | KExpanded -> "!E!" | KOrigin -> "" @@ -956,7 +1072,7 @@ let print_all_tokens2 pr xs = if !Flag_parsing_c.debug_unparsing then let current_kind = ref KOrigin in - xs +> List.iter (fun t -> + xs +> List.iter (fun t -> let newkind = kind_of_token2 t in if newkind =*= !current_kind then pr (str_of_token2 t) @@ -967,9 +1083,9 @@ let print_all_tokens2 pr xs = current_kind := newkind end ); - else + else xs +> List.iter (fun x -> pr (str_of_token2 x)) - + @@ -984,8 +1100,8 @@ let print_all_tokens2 pr xs = * fancy stuff when a function was not modified at all. Just need to * print the list of token as-is. But now pretty_print_c.ml handles * almost everything so maybe less useful. Maybe PPviatok allows to - * optimize a little the pretty printing. - * + * optimize a little the pretty printing. + * * update: now have PPviastr which goes even faster than PPviatok, so * PPviatok has disappeared. *) @@ -996,37 +1112,37 @@ type ppmethod = PPnormal | PPviastr (* The pp_program function will call pretty_print_c.ml with a special - * function to print the leaf components, the tokens. When we want to + * function to print the leaf components, the tokens. When we want to * print a token, we need to print also maybe the space and comments that - * were close to it in the original file (and that was omitted during the + * were close to it in the original file (and that was omitted during the * parsing phase), and honor what the cocci-info attached to the token says. * Maybe we will not print the token if it's a MINUS-token, and maybe we will - * print it and also print some cocci-code attached in a PLUS to it. + * print it and also print some cocci-code attached in a PLUS to it. * So we will also maybe call unparse_cocci. Because the cocci-code may * contain metavariables, unparse_cocci will in fact sometimes call back * pretty_print_c (which will this time don't call back again unparse_cocci) *) -let pp_program2 xs outfile = - Common.with_open_outfile outfile (fun (pr,chan) -> - let pr s = - if !Flag_parsing_c.debug_unparsing +let pp_program2 xs outfile = + Common.with_open_outfile outfile (fun (pr,chan) -> + let pr s = + if !Flag_parsing_c.debug_unparsing then begin pr2_no_nl s; flush stderr end - else pr s + else pr s (* flush chan; *) (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *) in - - xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) -> + + xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) -> (* here can still work on ast *) let e = remove_useless_fakeInfo_struct e in - + match ppmethod with | PPnormal -> (* now work on tokens *) (* phase1: just get all the tokens, all the information *) - assert(toks_e +> List.for_all (fun t -> + assert(toks_e +> List.for_all (fun t -> TH.is_origin t or TH.is_expanded t )); let toks = get_fakeInfo_and_tokens e toks_e in @@ -1041,15 +1157,18 @@ let pp_program2 xs outfile = then drop_minus toks (* nothing to do for sgrep *) else (* phase2: can now start to filter and adjust *) - let toks = adjust_indentation toks in + let (toks,tu) = adjust_indentation toks in let toks = adjust_before_semicolon toks in(*before remove minus*) + let toks = drop_space_at_endline toks in + let toks = paren_to_space toks in let toks = remove_minus_and_between_and_expanded_and_fake toks in (* assert Origin + Cocci + C and no minus *) let toks = add_space toks in + let toks = add_newlines toks tu in let toks = fix_tokens toks in toks in - (* in theory here could reparse and rework the ast! or + (* in theory here could reparse and rework the ast! or * apply some SP. Not before cos julia may have generated * not parsable file. Need do unparsing_tricks call before being * ready to reparse. *) @@ -1059,10 +1178,10 @@ let pp_program2 xs outfile = ) ) -let pp_program a b = +let pp_program a b = Common.profile_code "C unparsing" (fun () -> pp_program2 a b) -let pp_program_default xs outfile = +let pp_program_default xs outfile = let xs' = xs +> List.map (fun x -> x, PPnormal) in pp_program xs' outfile