parsing_c/unparse_c.ml

   1 (* Yoann Padioleau, Julia Lawall
   2  *
   3  * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License (GPL)
   7  * version 2 as published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * file license.txt for more details.
  13  *
  14  *
  15  * Modifications by Julia Lawall for better newline handling.
  16  *)
  17 open Common
  18
  19 open Ast_c
  20
  21 module TH = Token_helpers
  22
  23
  24 (* should keep comments and directives in between adjacent deleted terms,
  25 but not comments and directives within deleted terms.  should use the
  26 labels found in the control-flow graph *)
  27
  28
  29
  30 (*****************************************************************************)
  31 (* Wrappers *)
  32 (*****************************************************************************)
  33 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
  34
  35 (*****************************************************************************)
  36 (* Types used during the intermediate phases of the unparsing *)
  37 (*****************************************************************************)
  38
  39 type token1 =
  40   | Fake1 of info
  41   | T1 of Parser_c.token
  42
  43 (* The cocci_tag of the token should always be a NOTHING. The mark of
  44  * the token can only be OriginTok or ExpandedTok. Why not get rid of
  45  * token and get something simpler ? because we need to know if the
  46  * info is a TCommentCpp or TCommentSpace, etc for some of the further
  47  * analysis so easier to keep with the token.
  48  *
  49  * This type contains the whole information. Have all the tokens with this
  50  * type.
  51  *)
  52 type min =
  53     Min of (int list (* match numbers *) * int (* adjacency information *))
  54   | Ctx
  55
  56 type token2 =
  57   | T2 of Parser_c.token * min *
  58           int option (* orig index, abstracting away comments and space *)
  59   | Fake2
  60   | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
  61   | C2 of string
  62   | Indent_cocci2
  63   | Unindent_cocci2
  64
  65 (* not used yet *)
  66 type token3 =
  67   | T3 of Parser_c.token
  68   | Cocci3 of string
  69   | C3 of string
  70
  71
  72 (* similar to the tech in parsing_hack *)
  73 type token_extended = {
  74   tok2 : token2;
  75   str  : string;
  76   idx: int option; (* to know if 2 tokens were consecutive in orig file *)
  77   mutable new_tokens_before : token2 list;
  78   mutable remove : bool;
  79 }
  80
  81
  82 (*****************************************************************************)
  83 (* Helpers *)
  84 (*****************************************************************************)
  85
  86 let info_of_token1 t =
  87   match t with
  88   | Fake1 info -> info
  89   | T1 tok -> TH.info_of_tok tok
  90
  91 let print_token1 = function
  92     T1 tok -> TH.str_of_tok tok
  93   | Fake1 info -> "fake"
  94
  95 let str_of_token2 = function
  96   | T2 (t,_,_) -> TH.str_of_tok t
  97   | Fake2 -> ""
  98   | Cocci2 (s,_,_,_) -> s
  99   | C2 s -> s
 100   | Indent_cocci2 -> ""
 101   | Unindent_cocci2 -> ""
 102
 103 let print_token2 = function
 104   | T2 (t,b,_) ->
 105       let b_str =
 106         match b with
 107           Min (index,adj) ->
 108             Printf.sprintf "-%d[%s]" adj
 109               (String.concat " " (List.map string_of_int index))
 110         | Ctx -> "" in
 111       "T2:"^b_str^TH.str_of_tok t
 112   | Fake2 -> "fake"
 113   | Cocci2 (s,_,lc,rc) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
 114   | C2 s -> "C2:"^s
 115   | Indent_cocci2 -> "Indent"
 116   | Unindent_cocci2 -> "Unindent"
 117
 118 let simple_print_all_tokens1 l =
 119   List.iter (function x -> Printf.printf "%s " (print_token1 x)) l;
 120   Printf.printf "\n"
 121
 122 let simple_print_all_tokens2 l =
 123   List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
 124   Printf.printf "\n"
 125
 126 let str_of_token3 = function
 127   | T3 t -> TH.str_of_tok t
 128   | Cocci3 s | C3 s -> s
 129
 130
 131
 132 let mk_token_extended x =
 133   let origidx =
 134     match x with
 135     | T2 (_,_, idx) -> idx
 136     | _ -> None
 137   in
 138   { tok2 = x;
 139     str = str_of_token2 x;
 140     idx = origidx;
 141     new_tokens_before = [];
 142     remove = false;
 143   }
 144
 145 let rebuild_tokens_extented toks_ext =
 146   let _tokens = ref [] in
 147   toks_ext +> List.iter (fun tok ->
 148     tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
 149     if not tok.remove then push2 tok.tok2 _tokens;
 150   );
 151   let tokens = List.rev !_tokens in
 152   (tokens +> List.map mk_token_extended)
 153
 154
 155 let mcode_contain_plus = function
 156   | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
 157   | Ast_cocci.CONTEXT _ -> true
 158 (* patch: when need full coccinelle transformation *)
 159   | Ast_cocci.MINUS (_,_,_,[]) -> false
 160   | Ast_cocci.MINUS (_,_,_,x::xs) -> true
 161   | Ast_cocci.PLUS _ -> raise Impossible
 162
 163 let contain_plus info =
 164   let mck = Ast_c.mcode_of_info info in
 165   mcode_contain_plus mck
 166
 167 (*****************************************************************************)
 168 (* Last fix on the ast *)
 169 (*****************************************************************************)
 170
 171 (* Because of the ugly trick to handle initialiser, I generate fake ','
 172  * for the last initializer element, but if there is nothing around it,
 173  * I don't want in the end to print it.
 174  *)
 175
 176 let remove_useless_fakeInfo_struct program =
 177   let bigf = { Visitor_c.default_visitor_c_s with
 178     Visitor_c.kini_s = (fun (k,bigf) ini ->
 179       match k ini with
 180       | InitList args, ii ->
 181           (match ii with
 182           | [_i1;_i2] -> ini
 183           | [i1;i2;iicommaopt] ->
 184               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 185                  && (Ast_c.is_fake iicommaopt)
 186                  (* sometimes the guy put a normal iicommaopt *)
 187               then InitList args, [i1;i2]
 188               else InitList args, [i1;i2;iicommaopt]
 189           | [i1;i2;iicommaopt;end_comma_opt] ->
 190               (* only in #define. end_comma_opt canot be fake *)
 191               (* not sure if this will be considered ambiguous with a previous
 192                  case? *)
 193               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 194                  && (Ast_c.is_fake iicommaopt)
 195                  (* sometimes the guy put a normal iicommaopt *)
 196               then InitList args, [i1;i2;end_comma_opt]
 197               else InitList args, [i1;i2;iicommaopt;end_comma_opt]
 198           | _ -> raise Impossible
 199           )
 200       | x -> x
 201     )
 202   } in
 203   Visitor_c.vk_toplevel_s bigf program
 204
 205
 206 (*****************************************************************************)
 207 (* Tokens1 generation *)
 208 (*****************************************************************************)
 209
 210 let get_fakeInfo_and_tokens celem toks =
 211   let toks_in  = ref toks in
 212   let toks_out = ref [] in
 213
 214   (* todo? verify good order of position ? *)
 215   let pr_elem info =
 216     match Ast_c.pinfo_of_info info with
 217     | FakeTok _ ->
 218         Common.push2 (Fake1 info) toks_out
 219     | OriginTok _ | ExpandedTok _ ->
 220         (* get the associated comments/space/cppcomment tokens *)
 221         let (before, x, after) =
 222           !toks_in +> Common.split_when (fun tok ->
 223           info =*= TH.info_of_tok tok)
 224         in
 225         assert(info =*= TH.info_of_tok x);
 226         (*old: assert(before +> List.for_all (TH.is_comment)); *)
 227         before +> List.iter (fun x ->
 228           if not (TH.is_comment x)
 229           then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
 230           (* case such as  int asm d3("x"); not yet in ast *)
 231         );
 232         before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
 233         push2 (T1 x) toks_out;
 234         toks_in := after;
 235     | AbstractLineTok _ ->
 236         (* can be called on type info when for instance use -type_c *)
 237         if !Flag_parsing_c.pretty_print_type_info
 238         then Common.push2 (Fake1 info) toks_out
 239         else raise Impossible (* at this stage *)
 240   in
 241
 242   let pr_space _ = () in (* use the spacing that is there already *)
 243
 244   Pretty_print_c.pp_program_gen pr_elem pr_space celem;
 245
 246   if not (null !toks_in)
 247   then failwith "WEIRD: unparsing not finished";
 248
 249   List.rev !toks_out
 250
 251 (* Fake nodes that have BEFORE code should be moved over any subsequent
 252 whitespace and newlines, but not any comments, to get as close to the affected
 253 code as possible.  Similarly, fake nodes that have AFTER code should be moved
 254 backwards.  No fake nodes should have both before and after code. *)
 255
 256 let displace_fake_nodes toks =
 257   let is_fake = function Fake1 _ -> true | _ -> false in
 258   let is_whitespace = function
 259       T1(Parser_c.TCommentSpace _)
 260       (* patch: cocci    *)
 261     | T1(Parser_c.TCommentNewline _) -> true
 262     | _ -> false in
 263   let rec loop toks =
 264     let fake_info =
 265       try Some (Common.split_when is_fake toks)
 266       with Not_found -> None in
 267     match fake_info with
 268       Some(bef,((Fake1 info) as fake),aft) ->
 269         (match !(info.cocci_tag) with
 270         | Some x ->
 271           (match x with
 272             (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
 273             (* move the fake node forwards *)
 274               let (whitespace,rest) = Common.span is_whitespace aft in
 275               bef @ whitespace @ fake :: (loop rest)
 276           | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
 277             (* move the fake node backwards *)
 278               let revbef = List.rev bef in
 279               let (revwhitespace,revprev) = Common.span is_whitespace revbef in
 280               let whitespace = List.rev revwhitespace in
 281               let prev = List.rev revprev in
 282               prev @ fake :: (loop (whitespace @ aft))
 283           | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
 284               bef @ fake :: (loop aft)
 285           | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
 286               failwith "fake node should not be before-after"
 287           | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
 288         )
 289         | None ->
 290             bef @ fake :: (loop aft)
 291         )
 292     | None -> toks
 293     | _ -> raise Impossible in
 294   loop toks
 295
 296 (*****************************************************************************)
 297 (* Tokens2 generation *)
 298 (*****************************************************************************)
 299
 300 let comment2t2 = function
 301     (Token_c.TCommentCpp x,(info : Token_c.info)) ->
 302       C2("\n"^info.Common.str^"\n")
 303   | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
 304
 305 let expand_mcode toks =
 306   let toks_out = ref [] in
 307
 308   let index = ref 0 in
 309
 310   let add_elem t minus =
 311     match t with
 312     | Fake1 info ->
 313         let str = Ast_c.str_of_info info in
 314         if str =$= ""
 315         then push2 (Fake2) toks_out
 316         (* perhaps the fake ',' *)
 317         else push2 (C2 str) toks_out
 318
 319
 320     | T1 tok ->
 321         (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
 322         (* no tag on expandedTok ! *)
 323         let modified = function
 324             None -> false
 325           | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
 326           | _ -> true in
 327         (if (TH.is_expanded tok &&
 328              modified !((TH.info_of_tok tok).cocci_tag)
 329             (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
 330         then
 331           failwith
 332             (Printf.sprintf
 333                "expanded token %s on line %d is either modified or stored in a metavariable"
 334                (TH.str_of_tok tok) (TH.line_of_tok tok)));
 335
 336         let tok' = tok +> TH.visitor_info_of_tok (fun i ->
 337           { i with cocci_tag = ref Ast_c.emptyAnnot; }
 338         ) in
 339
 340         let optindex =
 341           if TH.is_origin tok && not (TH.is_real_comment tok)
 342           then begin
 343               incr index;
 344               Some !index
 345           end
 346           else None
 347         in
 348
 349         push2 (T2 (tok', minus, optindex)) toks_out
 350   in
 351
 352   let expand_info t =
 353     let (mcode,env) =
 354       Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
 355
 356     let pr_cocci s ln col rcol =
 357       push2 (Cocci2(s,ln,col,rcol)) toks_out  in
 358     let pr_c info =
 359       (match Ast_c.pinfo_of_info info with
 360         Ast_c.AbstractLineTok _ ->
 361           push2 (C2 (Ast_c.str_of_info info)) toks_out
 362       | Ast_c.FakeTok (s,_) ->
 363           push2 (C2 s) toks_out
 364       | _ ->
 365           Printf.printf "line: %s\n" (Common.dump info);
 366           failwith "not an abstract line");
 367       (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
 368       List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
 369
 370     let pr_barrier ln col = (* marks a position, used around C code *)
 371       push2 (Cocci2("",ln,col,col)) toks_out  in
 372     let pr_nobarrier ln col = () in (* not needed for linux spacing *)
 373
 374     let pr_cspace _ = push2 (C2 " ") toks_out in
 375
 376     let pr_space _ = () (* rely on add_space in cocci code *) in
 377     let pr_arity _ = () (* not interested *) in
 378
 379     let indent _   = push2 Indent_cocci2 toks_out in
 380     let unindent _ = push2 Unindent_cocci2 toks_out in
 381
 382     let args_pp =
 383       (env, pr_cocci, pr_c, pr_cspace,
 384        (match !Flag_parsing_c.spacing with
 385          Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
 386        pr_arity,
 387        (match !Flag_parsing_c.spacing with
 388          Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
 389        indent, unindent) in
 390
 391     (* old: when for yacfe with partial cocci:
 392      *    add_elem t false;
 393     *)
 394
 395     (* patch: when need full coccinelle transformation *)
 396     let unparser = Unparse_cocci.pp_list_list_any args_pp false in
 397     match mcode with
 398     | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
 399         (* Why adding ? because I want to have all the information, the whole
 400          * set of tokens, so I can then process and remove the
 401          * is_between_two_minus for instance *)
 402         add_elem t (Min (inst,adj));
 403         unparser any_xxs Unparse_cocci.InPlace
 404     | Ast_cocci.CONTEXT (_,any_befaft) ->
 405         (match any_befaft with
 406         | Ast_cocci.NOTHING ->
 407             add_elem t Ctx
 408         | Ast_cocci.BEFORE (xxs,_) ->
 409             unparser xxs Unparse_cocci.Before;
 410             add_elem t Ctx
 411         | Ast_cocci.AFTER (xxs,_) ->
 412             add_elem t Ctx;
 413             unparser xxs Unparse_cocci.After;
 414         | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
 415             unparser xxs Unparse_cocci.Before;
 416             add_elem t Ctx;
 417             unparser yys Unparse_cocci.After;
 418         )
 419     | Ast_cocci.PLUS _ -> raise Impossible
 420
 421   in
 422
 423   toks +> List.iter expand_info;
 424   List.rev !toks_out
 425
 426
 427 (*****************************************************************************)
 428 (* Tokens2 processing, filtering, adjusting *)
 429 (*****************************************************************************)
 430
 431 let is_space = function
 432   | T2(Parser_c.TCommentSpace _,_b,_i) -> true  (* only whitespace *)
 433   | _ -> false
 434
 435 let is_newline = function
 436   | T2(Parser_c.TCommentNewline _,_b,_i) -> true
 437   | _ -> false
 438
 439 let is_whitespace = function
 440   | (T2 (t,_b,_i)) ->
 441       (match t with
 442       | Parser_c.TCommentSpace _ -> true  (* only whitespace *)
 443       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 444       | _ -> false
 445       )
 446   | _ -> false
 447
 448 let is_minusable_comment = function
 449   | (T2 (t,_b,_i)) ->
 450       (match t with
 451       | Parser_c.TCommentSpace _   (* only whitespace *)
 452       (* patch: coccinelle *)
 453       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 454       | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
 455       | Parser_c.TComment _
 456       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 457       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 458       | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
 459         -> true
 460
 461       | Parser_c.TCommentMisc _
 462       | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
 463         -> false
 464
 465       | _ -> false
 466       )
 467   | _ -> false
 468
 469 let is_minusable_comment_nocpp = function
 470   | (T2 (t,_b,_i)) ->
 471       (match t with
 472       | Parser_c.TCommentSpace _   (* only whitespace *)
 473       (* patch: coccinelle *)
 474       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 475       | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
 476       | Parser_c.TComment _ -> true
 477       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 478       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 479       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
 480         -> false
 481
 482       | Parser_c.TCommentMisc _
 483       | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
 484         -> false
 485
 486       | _ -> false
 487       )
 488   | _ -> false
 489
 490 let all_coccis = function
 491     Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true
 492   | _ -> false
 493
 494 (*previously gave up if the first character was a newline, but not clear why*)
 495 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
 496
 497 let set_minus_comment adj = function
 498   | T2 (t,Ctx,idx) ->
 499       let str = TH.str_of_tok t in
 500       (match t with
 501       | Parser_c.TCommentSpace _
 502 (* patch: coccinelle *)
 503       | Parser_c.TCommentNewline _ -> ()
 504
 505       | Parser_c.TComment _
 506       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 507       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 508       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
 509         ->
 510           pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
 511                  (TH.line_of_tok t) str)
 512       | _ -> raise Impossible
 513       );
 514       T2 (t, Min adj, idx)
 515 (* patch: coccinelle *)
 516   | T2 (t,Min adj,idx) as x -> x
 517   | _ -> raise Impossible
 518
 519 let set_minus_comment_or_plus adj = function
 520     Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x
 521   | x -> set_minus_comment adj x
 522
 523 let drop_minus xs =
 524   xs +> Common.exclude (function
 525     | T2 (t,Min adj,_) -> true
 526     | _ -> false
 527   )
 528
 529 let remove_minus_and_between_and_expanded_and_fake xs =
 530
 531   (* get rid of exampled and fake tok *)
 532   let xs = xs +> Common.exclude (function
 533     | T2 (t,_,_) when TH.is_expanded t -> true
 534     | Fake2 -> true
 535
 536     | _ -> false
 537   )
 538   in
 539
 540   let minus_or_comment = function
 541       T2(_,Min adj,_) -> true
 542     | x -> is_minusable_comment x in
 543
 544   let minus_or_comment_nocpp = function
 545       T2(_,Min adj,_) -> true
 546     | x -> is_minusable_comment_nocpp x in
 547
 548   let common_adj (index1,adj1) (index2,adj2) =
 549     adj1 = adj2 (* same adjacency info *) &&
 550     (* non-empty intersection of witness trees *)
 551     not ((Common.inter_set index1 index2) = []) in
 552
 553   let rec adjust_around_minus = function
 554       [] -> []
 555     | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
 556       (((T2(_,Min adj,_))::_) as rest) ->
 557         (* an initial newline, as in a replaced statement *)
 558         let (between_minus,rest) = Common.span minus_or_comment rest in
 559         (match rest with
 560           [] -> (set_minus_comment adj x) ::
 561             (List.map (set_minus_comment adj) between_minus)
 562         | T2(_,Ctx,_)::_ when is_newline (List.hd(List.rev between_minus)) ->
 563             (set_minus_comment adj x)::(adjust_within_minus between_minus) @
 564             (adjust_around_minus rest)
 565         | _ ->
 566             x :: (adjust_within_minus between_minus) @
 567             (adjust_around_minus rest))
 568     | ((T2(_,Min adj,_))::_) as rest ->
 569         (* no initial newline, as in a replaced expression *)
 570         let (between_minus,rest) = Common.span minus_or_comment rest in
 571         (match rest with
 572           [] ->
 573             (List.map (set_minus_comment adj) between_minus)
 574         | _ ->
 575             (adjust_within_minus between_minus) @
 576             (adjust_around_minus rest))
 577     | x::xs -> x::adjust_around_minus xs
 578   and adjust_within_minus = function
 579       [] -> []
 580     | (T2(_,Min adj1,_) as t1)::xs ->
 581         let (between_minus,rest) = Common.span is_minusable_comment xs in
 582         (match rest with
 583           [] ->
 584             (* keep last newline *)
 585             let (drop,keep) =
 586               try
 587                 let (drop,nl,keep) =
 588                   Common.split_when is_newline between_minus in
 589                 (drop, nl :: keep)
 590               with Not_found -> (between_minus,[]) in
 591             t1 ::
 592             List.map (set_minus_comment_or_plus adj1) drop @
 593             keep
 594         | (T2(_,Min adj2,_) as t2)::rest when common_adj adj1 adj2 ->
 595             t1::
 596             List.map (set_minus_comment_or_plus adj1) between_minus @
 597             adjust_within_minus (t2::rest)
 598         | x::xs ->
 599             t1::(between_minus @ adjust_within_minus (x::xs)))
 600     | _ -> failwith "only minus and space possible" in
 601
 602   (* new idea: collects regions not containing non-space context code
 603      if two adjacent adjacent minus tokens satisfy common_adj then delete
 604      all spaces, comments etc between them
 605      if two adjacent minus tokens do not satisfy common_adj only delete
 606      the spaces between them if there are no comments, etc.
 607      if the region contain no plus code and is both preceded and followed
 608      by a newline, delete the initial newline. *)
 609
 610   let rec adjust_around_minus = function
 611       [] -> []
 612     | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
 613       (T2(_,Min adj1,_) as t1)::xs ->
 614         let (minus_list,rest) = Common.span not_context (t1::xs) in
 615         let contains_plus = List.exists is_plus minus_list in
 616         let x =
 617           match List.rev minus_list with
 618             (T2(Parser_c.TCommentNewline c,_b,_i))::rest
 619             when List.for_all minus_or_comment minus_list ->
 620               set_minus_comment_or_plus adj1 x
 621           | _ -> x in
 622         x :: adjust_within_minus contains_plus minus_list @
 623         adjust_around_minus rest
 624     | (T2(_,Min adj1,_) as t1)::xs ->
 625         let (minus_list,rest) = Common.span not_context (t1::xs) in
 626         let contains_plus = List.exists is_plus minus_list in
 627         adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
 628     | x::xs -> x :: adjust_around_minus xs
 629   and adjust_within_minus cp (* contains plus *) = function
 630       (T2(_,Min adj1,_) as t1)::xs ->
 631         let not_minus = function T2(_,Min _,_) -> false | _ -> true in
 632         let (not_minus_list,rest) = Common.span not_minus xs in
 633         t1 ::
 634         (match rest with
 635           (T2(_,Min adj2,_) as t2)::xs when common_adj adj1 adj2 ->
 636             (List.map (set_minus_comment_or_plus adj1) not_minus_list)
 637             @ (adjust_within_minus cp (t2::xs))
 638         | (T2(_,Min adj2,_) as t2)::xs ->
 639             let is_whitespace_or_plus = function
 640                 (T2 _) as x -> is_space x
 641               | _ -> true (*plus*) in
 642             if List.for_all is_whitespace_or_plus not_minus_list
 643             then
 644               (List.map (set_minus_comment_or_plus adj1) not_minus_list)
 645               @ (adjust_within_minus cp (t2::xs))
 646             else not_minus_list @ (adjust_within_minus cp (t2::xs))
 647         | _ ->
 648             if cp
 649             then xs
 650             else
 651               let (spaces,rest) = Common.span is_space xs in
 652               (List.map (set_minus_comment_or_plus adj1) spaces)
 653               @ rest)
 654     | xs -> failwith "should always start with minus"
 655   and not_context = function
 656       (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
 657     | _ -> true
 658   and is_plus = function
 659       C2 _ | Cocci2 _ -> true
 660     | _ -> false in
 661
 662   let xs = adjust_around_minus xs in
 663
 664   (* this drops blank lines after a brace introduced by removing code *)
 665   let minus_or_comment_nonl = function
 666       T2(_,Min adj,_) -> true
 667     | T2(Parser_c.TCommentNewline _,_b,_i) -> false
 668     | x -> is_minusable_comment x in
 669
 670   let rec adjust_after_brace = function
 671       [] -> []
 672     | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
 673        when str_of_token2 x =$= "{" ->
 674          let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
 675          let is_whitespace = function
 676              T2(Parser_c.TCommentSpace _,_b,_i)
 677              (* patch: cocci    *)
 678            | T2(Parser_c.TCommentNewline _,_b,_i) -> true
 679            | _ -> false in
 680          let (newlines,rest) = Common.span is_whitespace rest in
 681          let (drop_newlines,last_newline) =
 682            let rec loop = function
 683                [] -> ([],[])
 684              | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
 685                  (List.rev rest,[x])
 686              | x::xs ->
 687                  let (drop_newlines,last_newline) = loop xs in
 688                  (drop_newlines,x::last_newline) in
 689            loop (List.rev newlines) in
 690          x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
 691          last_newline@
 692          adjust_after_brace rest
 693     | x::xs -> x::adjust_after_brace xs in
 694
 695   let xs = adjust_after_brace xs in
 696
 697   (* search backwards from context } over spaces until reaching a newline.
 698      then go back over all minus code until reaching some context or + code.
 699      get rid of all intervening spaces, newlines, and comments
 700      input is reversed *)
 701   let rec adjust_before_brace = function
 702       [] -> []
 703     | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
 704         let (outer_spaces,rest) = Common.span is_space xs in
 705         x :: outer_spaces @
 706         (match rest with
 707           ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
 708           (* the rest of this code is the same as from_newline below
 709              but merging them seems to be error prone... *)
 710           ((T2 (t, Min adj, idx)) as m) :: rest ->
 711             let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 712             h :: m ::
 713             (List.map (set_minus_comment adj) spaces) @
 714             (adjust_before_brace rest)
 715         | _ -> adjust_before_brace rest)
 716     | x::xs -> x :: (adjust_before_brace xs) in
 717
 718   let from_newline = function
 719       ((T2 (t, Min adj, idx)) as m) :: rest ->
 720         let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 721         m ::
 722         (List.map (set_minus_comment adj) spaces) @
 723         (adjust_before_brace rest)
 724     | rest -> adjust_before_brace rest in
 725
 726   let xs = List.rev (from_newline (List.rev xs)) in
 727   let xs = drop_minus xs in
 728   xs
 729
 730 (* normally, in C code, a semicolon is not preceded by a space or newline *)
 731 let adjust_before_semicolon toks =
 732   let toks = List.rev toks in
 733   let rec search_semic = function
 734       [] -> []
 735     | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
 736         if List.mem (str_of_token2 x) [";";")";","]
 737         then x :: search_minus false xs
 738         else x :: search_semic xs
 739     | x::xs -> x :: search_semic xs
 740   and search_minus seen_minus xs =
 741     let (spaces, rest) = Common.span is_space xs in
 742     (* only delete spaces if something is actually deleted *)
 743     match rest with
 744       ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
 745     | _ -> if seen_minus then rest else xs in
 746   List.rev (search_semic toks)
 747
 748 let is_ident_like s = s ==~ Common.regexp_alpha
 749
 750 let rec add_space xs =
 751   match xs with
 752   | [] -> []
 753   | [x] -> [x]
 754   | (Cocci2(sx,lnx,_,rcolx) as x)::((Cocci2(sy,lny,lcoly,_)) as y)::xs
 755     when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
 756       not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
 757         (* this only works within a line.  could consider whether
 758            something should be done to add newlines too, rather than
 759            printing them explicitly in unparse_cocci. *)
 760         x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
 761   | x::y::xs ->
 762       let sx = str_of_token2 x in
 763       let sy = str_of_token2 y in
 764       if is_ident_like sx && is_ident_like sy
 765       then x::C2 " "::(add_space (y::xs))
 766       else x::(add_space (y::xs))
 767
 768
 769
 770 (* When insert some new code, because of a + in a SP, we must add this
 771  * code at the right place, with the good indentation. So each time we
 772  * encounter some spacing info, with some newline, we maintain the
 773  * current indentation level used.
 774  *
 775  * TODO problems: not accurate. ex: TODO
 776  *
 777  * TODO: if in #define region, should add a \ \n
 778  *)
 779 let new_tabbing2 space =
 780   (list_of_string space)
 781     +> List.rev
 782     +> Common.take_until (fun c -> c =<= '\n')
 783     +> List.rev
 784     +> List.map string_of_char
 785     +> String.concat ""
 786
 787 let new_tabbing a =
 788   Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
 789
 790
 791 let rec adjust_indentation xs =
 792   let _current_tabbing = ref "" in
 793   let tabbing_unit = ref None in
 794
 795   let string_of_list l = String.concat "" (List.map string_of_char l) in
 796
 797   (* try to pick a tabbing unit for the plus code *)
 798   let adjust_tabbing_unit old_tab new_tab =
 799     if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
 800     then
 801       let old_tab = list_of_string old_tab in
 802       let new_tab = list_of_string new_tab in
 803       let rec loop = function
 804           ([],new_tab) ->
 805             tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
 806         | (_,[]) -> failwith "not possible"
 807         | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
 808       loop (old_tab,new_tab) in
 809
 810   let remtab tu current_tab =
 811     let current_tab = List.rev(list_of_string current_tab) in
 812     let rec loop = function
 813         ([],new_tab) -> string_of_list (List.rev new_tab)
 814       | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
 815       | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
 816       | (_,ns) -> (* mismatch; remove what we can *)
 817           string_of_list (List.rev ns) in
 818     loop (tu,current_tab) in
 819
 820   let rec find_first_tab started = function
 821       [] -> ()
 822     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
 823         find_first_tab true xs
 824 (* patch: coccinelle *)
 825     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
 826       when started ->
 827         let s = str_of_token2 x +> new_tabbing in
 828         tabbing_unit := Some (s,List.rev (list_of_string s))
 829     | x::xs -> find_first_tab started xs in
 830   find_first_tab false xs;
 831
 832   let rec aux started xs =
 833     match xs with
 834     | [] ->  []
 835 (* patch: coccinelle *)
 836     | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
 837       ((Cocci2 ("{",_,_,_)) as a)::xs
 838       when started && str_of_token2 x =$= ")" ->
 839         (* to be done for if, etc, but not for a function header *)
 840         x::(C2 " ")::a::(aux started xs)
 841     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
 842         let old_tabbing = !_current_tabbing in
 843         str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
 844         (* only trust the indentation after the first { *)
 845         (if started then adjust_tabbing_unit old_tabbing !_current_tabbing);
 846         let coccis_rest = Common.span all_coccis xs in
 847         (match coccis_rest with
 848           (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
 849             (* the case where cocci code has been added before a close } *)
 850             x::aux started (Indent_cocci2::xs)
 851         | _ -> x::aux started xs)
 852     | Indent_cocci2::xs ->
 853         (match !tabbing_unit with
 854           None -> aux started xs
 855         | Some (tu,_) ->
 856             _current_tabbing := (!_current_tabbing)^tu;
 857             Cocci2 (tu,-1,-1,-1)::aux started xs)
 858     | Unindent_cocci2::xs ->
 859         (match !tabbing_unit with
 860           None -> aux started xs
 861         | Some (_,tu) ->
 862             _current_tabbing := remtab tu (!_current_tabbing);
 863             aux started xs)
 864     (* border between existing code and cocci code *)
 865     | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_)) as y)::xs
 866       when str_of_token2 x =$= "{" ->
 867         x::aux true (y::Indent_cocci2::xs)
 868     | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
 869       when str_of_token2 y =$= "}" ->
 870         x::aux started (y::Unindent_cocci2::xs)
 871     (* starting the body of the function *)
 872     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->  x::aux true xs
 873     | ((Cocci2("{",_,_,_)) as a)::xs -> a::aux true xs
 874     | ((Cocci2("\n",_,_,_)) as x)::xs ->
 875             (* dont inline in expr because of weird eval order of ocaml *)
 876         let s = !_current_tabbing in
 877         x::Cocci2 (s,-1,-1,-1)::aux started xs
 878     | x::xs -> x::aux started xs in
 879   aux false xs
 880
 881
 882 let rec find_paren_comma = function
 883   | [] -> ()
 884
 885   (* do nothing if was like this in original file *)
 886   | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
 887     ::xs when p2 =|= p1 + 1 ->
 888       find_paren_comma (x2::xs)
 889
 890   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
 891     ::xs when p2 =|= p1 + 1 ->
 892       find_paren_comma (x2::xs)
 893
 894   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
 895     ::xs when p2 =|= p1 + 1 ->
 896       find_paren_comma (x2::xs)
 897
 898   (* otherwise yes can adjust *)
 899   | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
 900       x2.remove <- true;
 901       find_paren_comma (x2::xs)
 902   | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
 903       x1.remove <- true;
 904       find_paren_comma (x2::xs)
 905
 906   | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
 907       x1.remove <- true;
 908       find_paren_comma (x2::xs)
 909
 910   | x::xs ->
 911       find_paren_comma xs
 912
 913
 914 let fix_tokens toks =
 915   let toks = toks +> List.map mk_token_extended in
 916
 917   let cleaner = toks +> Common.exclude (function
 918     | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
 919     | _ -> false
 920   ) in
 921   find_paren_comma cleaner;
 922
 923   let toks = rebuild_tokens_extented toks in
 924   toks +> List.map (fun x -> x.tok2)
 925
 926
 927
 928 (*****************************************************************************)
 929 (* Final unparsing (and debugging support) *)
 930 (*****************************************************************************)
 931
 932 (* for debugging *)
 933 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
 934
 935 let kind_of_token2 = function
 936   | Fake2 -> KFake
 937   | Cocci2 _ -> KCocci
 938   | C2 _ -> KC
 939   | T2 (t,_,_) ->
 940       (match TH.pinfo_of_tok t with
 941       | ExpandedTok _ -> KExpanded
 942       | OriginTok _ -> KOrigin
 943       | FakeTok _ -> raise Impossible (* now a Fake2 *)
 944       | AbstractLineTok _ -> raise Impossible (* now a KC *)
 945       )
 946   | Unindent_cocci2 | Indent_cocci2 -> raise Impossible
 947
 948 let end_mark = "!"
 949
 950 let start_mark = function
 951   | KFake -> "!F!"
 952   | KCocci -> "!S!"
 953   | KC -> "!A!"
 954   | KExpanded -> "!E!"
 955   | KOrigin -> ""
 956
 957 let print_all_tokens2 pr xs =
 958   if !Flag_parsing_c.debug_unparsing
 959   then
 960     let current_kind = ref KOrigin in
 961     xs +> List.iter (fun t ->
 962       let newkind = kind_of_token2 t in
 963       if newkind =*= !current_kind
 964       then pr (str_of_token2 t)
 965       else begin
 966         pr (end_mark);
 967         pr (start_mark newkind);
 968         pr (str_of_token2 t);
 969         current_kind := newkind
 970       end
 971     );
 972   else
 973     xs +> List.iter (fun x -> pr (str_of_token2 x))
 974
 975
 976
 977
 978 (*****************************************************************************)
 979 (* Entry points *)
 980 (*****************************************************************************)
 981
 982 (* old: PPviatok was made in the beginning to allow to pretty print a
 983  * complete C file, including a modified C file by transformation.ml,
 984  * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
 985  * maybe flow_to_ast) all the cases. Indeed we don't need to do some
 986  * fancy stuff when a function was not modified at all. Just need to
 987  * print the list of token as-is. But now pretty_print_c.ml handles
 988  * almost everything so maybe less useful. Maybe PPviatok allows to
 989  * optimize a little the pretty printing.
 990  *
 991  * update: now have PPviastr which goes even faster than PPviatok, so
 992  * PPviatok has disappeared.
 993  *)
 994
 995 type ppmethod = PPnormal | PPviastr
 996
 997
 998
 999
1000 (* The pp_program function will call pretty_print_c.ml with a special
1001  * function to print the leaf components, the tokens. When we want to
1002  * print a token, we need to print also maybe the space and comments that
1003  * were close to it in the original file (and that was omitted during the
1004  * parsing phase), and honor what the cocci-info attached to the token says.
1005  * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1006  * print it and also print some cocci-code attached in a PLUS to it.
1007  * So we will also maybe call unparse_cocci. Because the cocci-code may
1008  * contain metavariables, unparse_cocci will in fact sometimes call back
1009  * pretty_print_c (which will this time don't call back again unparse_cocci)
1010  *)
1011
1012 let pp_program2 xs outfile  =
1013   Common.with_open_outfile outfile (fun (pr,chan) ->
1014     let pr s =
1015       if !Flag_parsing_c.debug_unparsing
1016       then begin pr2_no_nl s; flush stderr end
1017       else pr s
1018         (* flush chan; *)
1019         (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1020     in
1021
1022     xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
1023       (* here can still work on ast *)
1024       let e = remove_useless_fakeInfo_struct e in
1025
1026       match ppmethod with
1027       | PPnormal ->
1028           (* now work on tokens *)
1029
1030           (* phase1: just get all the tokens, all the information *)
1031           assert(toks_e +> List.for_all (fun t ->
1032             TH.is_origin t or TH.is_expanded t
1033           ));
1034           let toks = get_fakeInfo_and_tokens e toks_e in
1035           let toks = displace_fake_nodes toks in
1036           (* assert Origin;ExpandedTok;Faketok *)
1037           let toks = expand_mcode toks in
1038           (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1039            * and no tag information, just NOTHING. *)
1040
1041           let toks =
1042             if !Flag.sgrep_mode2
1043             then drop_minus toks (* nothing to do for sgrep *)
1044             else
1045               (* phase2: can now start to filter and adjust *)
1046               let toks = adjust_indentation toks in
1047               let toks = adjust_before_semicolon toks in(*before remove minus*)
1048               let toks = remove_minus_and_between_and_expanded_and_fake toks in
1049               (* assert Origin + Cocci + C and no minus *)
1050               let toks = add_space toks in
1051               let toks = fix_tokens toks in
1052               toks in
1053
1054           (* in theory here could reparse and rework the ast! or
1055            * apply some SP. Not before cos julia may have generated
1056            * not parsable file. Need do unparsing_tricks call before being
1057            * ready to reparse. *)
1058           print_all_tokens2 pr toks;
1059
1060       | PPviastr -> pr str
1061     )
1062   )
1063
1064 let pp_program a b =
1065   Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1066
1067
1068 let pp_program_default xs outfile =
1069   let xs' = xs +> List.map (fun x -> x, PPnormal) in
1070   pp_program xs' outfile