parsing_c/unparse_c.ml

   1 (* Yoann Padioleau, Julia Lawall
   2  *
   3  * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License (GPL)
   7  * version 2 as published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * file license.txt for more details.
  13  *
  14  *
  15  * Modifications by Julia Lawall for better newline handling.
  16  *)
  17 open Common
  18
  19 open Ast_c
  20
  21 module TH = Token_helpers
  22
  23
  24 (* should keep comments and directives in between adjacent deleted terms,
  25 but not comments and directives within deleted terms.  should use the
  26 labels found in the control-flow graph *)
  27
  28
  29
  30 (*****************************************************************************)
  31 (* Wrappers *)
  32 (*****************************************************************************)
  33 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
  34
  35 (*****************************************************************************)
  36 (* Types used during the intermediate phases of the unparsing *)
  37 (*****************************************************************************)
  38
  39 type token1 =
  40   | Fake1 of info
  41   | T1 of Parser_c.token
  42
  43 (* The cocci_tag of the token should always be a NOTHING. The mark of
  44  * the token can only be OriginTok or ExpandedTok. Why not get rid of
  45  * token and get something simpler ? because we need to know if the
  46  * info is a TCommentCpp or TCommentSpace, etc for some of the further
  47  * analysis so easier to keep with the token.
  48  *
  49  * This type contains the whole information. Have all the tokens with this
  50  * type.
  51  *)
  52 type min =
  53     Min of (int list (* match numbers *) * int (* adjacency information *))
  54   | Ctx
  55
  56 type token2 =
  57   | T2 of Parser_c.token * min *
  58           int option (* orig index, abstracting away comments and space *)
  59   | Fake2
  60   | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
  61   | C2 of string
  62   | Indent_cocci2
  63   | Unindent_cocci2
  64
  65 (* not used yet *)
  66 type token3 =
  67   | T3 of Parser_c.token
  68   | Cocci3 of string
  69   | C3 of string
  70
  71
  72 (* similar to the tech in parsing_hack *)
  73 type token_extended = {
  74   tok2 : token2;
  75   str  : string;
  76   idx: int option; (* to know if 2 tokens were consecutive in orig file *)
  77   mutable new_tokens_before : token2 list;
  78   mutable remove : bool;
  79 }
  80
  81
  82 (*****************************************************************************)
  83 (* Helpers *)
  84 (*****************************************************************************)
  85
  86 let info_of_token1 t =
  87   match t with
  88   | Fake1 info -> info
  89   | T1 tok -> TH.info_of_tok tok
  90
  91 let print_token1 = function
  92     T1 tok -> TH.str_of_tok tok
  93   | Fake1 info -> "fake"
  94
  95 let str_of_token2 = function
  96   | T2 (t,_,_) -> TH.str_of_tok t
  97   | Fake2 -> ""
  98   | Cocci2 (s,_,_,_) -> s
  99   | C2 s -> s
 100   | Indent_cocci2 -> ""
 101   | Unindent_cocci2 -> ""
 102
 103 let print_token2 = function
 104   | T2 (t,b,_) ->
 105       let b_str =
 106         match b with
 107           Min (index,adj) ->
 108             Printf.sprintf "-%d[%s]" adj
 109               (String.concat " " (List.map string_of_int index))
 110         | Ctx -> "" in
 111       "T2:"^b_str^TH.str_of_tok t
 112   | Fake2 -> "fake"
 113   | Cocci2 (s,_,lc,rc) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
 114   | C2 s -> "C2:"^s
 115   | Indent_cocci2 -> "Indent"
 116   | Unindent_cocci2 -> "Unindent"
 117
 118 let simple_print_all_tokens1 l =
 119   List.iter (function x -> Printf.printf "%s " (print_token1 x)) l;
 120   Printf.printf "\n"
 121
 122 let simple_print_all_tokens2 l =
 123   List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
 124   Printf.printf "\n"
 125
 126 let str_of_token3 = function
 127   | T3 t -> TH.str_of_tok t
 128   | Cocci3 s | C3 s -> s
 129
 130
 131
 132 let mk_token_extended x =
 133   let origidx =
 134     match x with
 135     | T2 (_,_, idx) -> idx
 136     | _ -> None
 137   in
 138   { tok2 = x;
 139     str = str_of_token2 x;
 140     idx = origidx;
 141     new_tokens_before = [];
 142     remove = false;
 143   }
 144
 145 let rebuild_tokens_extented toks_ext =
 146   let _tokens = ref [] in
 147   toks_ext +> List.iter (fun tok ->
 148     tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
 149     if not tok.remove then push2 tok.tok2 _tokens;
 150   );
 151   let tokens = List.rev !_tokens in
 152   (tokens +> List.map mk_token_extended)
 153
 154
 155 let mcode_contain_plus = function
 156   | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
 157   | Ast_cocci.CONTEXT _ -> true
 158 (* patch: when need full coccinelle transformation *)
 159   | Ast_cocci.MINUS (_,_,_,[]) -> false
 160   | Ast_cocci.MINUS (_,_,_,x::xs) -> true
 161   | Ast_cocci.PLUS -> raise Impossible
 162
 163 let contain_plus info =
 164   let mck = Ast_c.mcode_of_info info in
 165   mcode_contain_plus mck
 166
 167 (*****************************************************************************)
 168 (* Last fix on the ast *)
 169 (*****************************************************************************)
 170
 171 (* Because of the ugly trick to handle initialiser, I generate fake ','
 172  * for the last initializer element, but if there is nothing around it,
 173  * I don't want in the end to print it.
 174  *)
 175
 176 let remove_useless_fakeInfo_struct program =
 177   let bigf = { Visitor_c.default_visitor_c_s with
 178     Visitor_c.kini_s = (fun (k,bigf) ini ->
 179       match k ini with
 180       | InitList args, ii ->
 181           (match ii with
 182           | [_i1;_i2] -> ini
 183           | [i1;i2;iicommaopt] ->
 184               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 185                  && (Ast_c.is_fake iicommaopt)
 186                  (* sometimes the guy put a normal iicommaopt *)
 187               then InitList args, [i1;i2]
 188               else InitList args, [i1;i2;iicommaopt]
 189           | [i1;i2;iicommaopt;end_comma_opt] ->
 190               (* only in #define. end_comma_opt canot be fake *)
 191               (* not sure if this will be considered ambiguous with a previous
 192                  case? *)
 193               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 194                  && (Ast_c.is_fake iicommaopt)
 195                  (* sometimes the guy put a normal iicommaopt *)
 196               then InitList args, [i1;i2;end_comma_opt]
 197               else InitList args, [i1;i2;iicommaopt;end_comma_opt]
 198           | _ -> raise Impossible
 199           )
 200       | x -> x
 201     )
 202   } in
 203   Visitor_c.vk_toplevel_s bigf program
 204
 205
 206 (*****************************************************************************)
 207 (* Tokens1 generation *)
 208 (*****************************************************************************)
 209
 210 let get_fakeInfo_and_tokens celem toks =
 211   let toks_in  = ref toks in
 212   let toks_out = ref [] in
 213
 214   (* todo? verify good order of position ? *)
 215   let pr_elem info =
 216     match Ast_c.pinfo_of_info info with
 217     | FakeTok _ ->
 218         Common.push2 (Fake1 info) toks_out
 219     | OriginTok _ | ExpandedTok _ ->
 220         (* get the associated comments/space/cppcomment tokens *)
 221         let (before, x, after) =
 222           !toks_in +> Common.split_when (fun tok ->
 223           info =*= TH.info_of_tok tok)
 224         in
 225         assert(info =*= TH.info_of_tok x);
 226         (*old: assert(before +> List.for_all (TH.is_comment)); *)
 227         before +> List.iter (fun x ->
 228           if not (TH.is_comment x)
 229           then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
 230           (* case such as  int asm d3("x"); not yet in ast *)
 231         );
 232         before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
 233         push2 (T1 x) toks_out;
 234         toks_in := after;
 235     | AbstractLineTok _ ->
 236         (* can be called on type info when for instance use -type_c *)
 237         if !Flag_parsing_c.pretty_print_type_info
 238         then Common.push2 (Fake1 info) toks_out
 239         else raise Impossible (* at this stage *)
 240   in
 241
 242   let pr_space _ = () in (* use the spacing that is there already *)
 243
 244   Pretty_print_c.pp_program_gen pr_elem pr_space celem;
 245
 246   if not (null !toks_in)
 247   then failwith "WEIRD: unparsing not finished";
 248
 249   List.rev !toks_out
 250
 251 (* Fake nodes that have BEFORE code should be moved over any subsequent
 252 whitespace and newlines, but not any comments, to get as close to the affected
 253 code as possible.  Similarly, fake nodes that have AFTER code should be moved
 254 backwards.  No fake nodes should have both before and after code. *)
 255
 256 let displace_fake_nodes toks =
 257   let is_fake = function Fake1 _ -> true | _ -> false in
 258   let is_whitespace = function
 259       T1(Parser_c.TCommentSpace _)
 260       (* patch: cocci    *)
 261     | T1(Parser_c.TCommentNewline _) -> true
 262     | _ -> false in
 263   let rec loop toks =
 264     let fake_info =
 265       try Some (Common.split_when is_fake toks)
 266       with Not_found -> None in
 267     match fake_info with
 268       Some(bef,((Fake1 info) as fake),aft) ->
 269         (match !(info.cocci_tag) with
 270         | Some x ->
 271           (match x with
 272             (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
 273             (* move the fake node forwards *)
 274               let (whitespace,rest) = Common.span is_whitespace aft in
 275               bef @ whitespace @ fake :: (loop rest)
 276           | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
 277             (* move the fake node backwards *)
 278               let revbef = List.rev bef in
 279               let (revwhitespace,revprev) = Common.span is_whitespace revbef in
 280               let whitespace = List.rev revwhitespace in
 281               let prev = List.rev revprev in
 282               prev @ fake :: (loop (whitespace @ aft))
 283           | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
 284               bef @ fake :: (loop aft)
 285           | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
 286               failwith "fake node should not be before-after"
 287           | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
 288         )
 289         | None ->
 290             bef @ fake :: (loop aft)
 291         )
 292     | None -> toks
 293     | _ -> raise Impossible in
 294   loop toks
 295
 296 (*****************************************************************************)
 297 (* Tokens2 generation *)
 298 (*****************************************************************************)
 299
 300 let comment2t2 = function
 301     (Token_c.TCommentCpp x,(info : Token_c.info)) ->
 302       C2("\n"^info.Common.str^"\n")
 303   | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
 304
 305 let expand_mcode toks =
 306   let toks_out = ref [] in
 307
 308   let index = ref 0 in
 309
 310   let add_elem t minus =
 311     match t with
 312     | Fake1 info ->
 313         let str = Ast_c.str_of_info info in
 314         if str =$= ""
 315         then push2 (Fake2) toks_out
 316         (* perhaps the fake ',' *)
 317         else push2 (C2 str) toks_out
 318
 319
 320     | T1 tok ->
 321         (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
 322         (* no tag on expandedTok ! *)
 323         let modified = function
 324             None -> false
 325           | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
 326           | _ -> true in
 327         (if (TH.is_expanded tok &&
 328              modified !((TH.info_of_tok tok).cocci_tag)
 329             (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
 330         then
 331           failwith
 332             (Printf.sprintf
 333                "expanded token %s on line %d is either modified or stored in a metavariable"
 334                (TH.str_of_tok tok) (TH.line_of_tok tok)));
 335
 336         let tok' = tok +> TH.visitor_info_of_tok (fun i ->
 337           { i with cocci_tag = ref Ast_c.emptyAnnot; }
 338         ) in
 339
 340         let optindex =
 341           if TH.is_origin tok && not (TH.is_real_comment tok)
 342           then begin
 343               incr index;
 344               Some !index
 345           end
 346           else None
 347         in
 348
 349         push2 (T2 (tok', minus, optindex)) toks_out
 350   in
 351
 352   let expand_info t =
 353     let (mcode,env) =
 354       Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
 355
 356     let pr_cocci s ln col rcol =
 357       push2 (Cocci2(s,ln,col,rcol)) toks_out  in
 358     let pr_c info =
 359       (match Ast_c.pinfo_of_info info with
 360         Ast_c.AbstractLineTok _ ->
 361           push2 (C2 (Ast_c.str_of_info info)) toks_out
 362       | Ast_c.FakeTok (s,_) ->
 363           push2 (C2 s) toks_out
 364       | _ ->
 365           Printf.printf "line: %s\n" (Common.dump info);
 366           failwith "not an abstract line");
 367       (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
 368       List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
 369
 370     let pr_barrier ln col = (* marks a position, used around C code *)
 371       push2 (Cocci2("",ln,col,col)) toks_out  in
 372     let pr_nobarrier ln col = () in (* not needed for linux spacing *)
 373
 374     let pr_cspace _ = push2 (C2 " ") toks_out in
 375
 376     let pr_space _ = () (* rely on add_space in cocci code *) in
 377     let pr_arity _ = () (* not interested *) in
 378
 379     let indent _   = push2 Indent_cocci2 toks_out in
 380     let unindent _ = push2 Unindent_cocci2 toks_out in
 381
 382     let args_pp =
 383       (env, pr_cocci, pr_c, pr_cspace,
 384        (match !Flag_parsing_c.spacing with
 385          Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
 386        pr_arity,
 387        (match !Flag_parsing_c.spacing with
 388          Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
 389        indent, unindent) in
 390
 391     (* old: when for yacfe with partial cocci:
 392      *    add_elem t false;
 393     *)
 394
 395     (* patch: when need full coccinelle transformation *)
 396     let unparser = Unparse_cocci.pp_list_list_any args_pp false in
 397     match mcode with
 398     | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
 399         (* Why adding ? because I want to have all the information, the whole
 400          * set of tokens, so I can then process and remove the
 401          * is_between_two_minus for instance *)
 402         add_elem t (Min (inst,adj));
 403         unparser any_xxs Unparse_cocci.InPlace
 404     | Ast_cocci.CONTEXT (_,any_befaft) ->
 405         (match any_befaft with
 406         | Ast_cocci.NOTHING ->
 407             add_elem t Ctx
 408         | Ast_cocci.BEFORE xxs ->
 409             unparser xxs Unparse_cocci.Before;
 410             add_elem t Ctx
 411         | Ast_cocci.AFTER xxs ->
 412             add_elem t Ctx;
 413             unparser xxs Unparse_cocci.After;
 414         | Ast_cocci.BEFOREAFTER (xxs, yys) ->
 415             unparser xxs Unparse_cocci.Before;
 416             add_elem t Ctx;
 417             unparser yys Unparse_cocci.After;
 418         )
 419     | Ast_cocci.PLUS -> raise Impossible
 420
 421   in
 422
 423   toks +> List.iter expand_info;
 424   List.rev !toks_out
 425
 426
 427 (*****************************************************************************)
 428 (* Tokens2 processing, filtering, adjusting *)
 429 (*****************************************************************************)
 430
 431 let is_space = function
 432   | T2(Parser_c.TCommentSpace _,_b,_i) -> true  (* only whitespace *)
 433   | _ -> false
 434
 435 let is_newline = function
 436   | T2(Parser_c.TCommentNewline _,_b,_i) -> true
 437   | _ -> false
 438
 439 let is_whitespace = function
 440   | (T2 (t,_b,_i)) ->
 441       (match t with
 442       | Parser_c.TCommentSpace _ -> true  (* only whitespace *)
 443       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 444       | _ -> false
 445       )
 446   | _ -> false
 447
 448 let is_minusable_comment = function
 449   | (T2 (t,_b,_i)) ->
 450       (match t with
 451       | Parser_c.TCommentSpace _   (* only whitespace *)
 452       (* patch: coccinelle *)
 453       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 454       | Parser_c.TComment _
 455       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 456       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 457       | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
 458         -> true
 459
 460       | Parser_c.TCommentMisc _
 461       | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
 462         -> false
 463
 464       | _ -> false
 465       )
 466   | _ -> false
 467
 468 let is_minusable_comment_nocpp = function
 469   | (T2 (t,_b,_i)) ->
 470       (match t with
 471       | Parser_c.TCommentSpace _   (* only whitespace *)
 472       (* patch: coccinelle *)
 473       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 474       | Parser_c.TComment _ -> true
 475       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 476       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 477       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
 478         -> false
 479
 480       | Parser_c.TCommentMisc _
 481       | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
 482         -> false
 483
 484       | _ -> false
 485       )
 486   | _ -> false
 487
 488 let all_coccis = function
 489     Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true
 490   | _ -> false
 491
 492 (*previously gave up if the first character was a newline, but not clear why*)
 493 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
 494
 495 let set_minus_comment adj = function
 496   | T2 (t,Ctx,idx) ->
 497       let str = TH.str_of_tok t in
 498       (match t with
 499       | Parser_c.TCommentSpace _
 500 (* patch: coccinelle *)
 501       | Parser_c.TCommentNewline _ -> ()
 502
 503       | Parser_c.TComment _
 504       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 505       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 506       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
 507         ->
 508           pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
 509                  (TH.line_of_tok t) str)
 510       | _ -> raise Impossible
 511       );
 512       T2 (t, Min adj, idx)
 513 (* patch: coccinelle *)
 514   | T2 (t,Min adj,idx) as x -> x
 515   | _ -> raise Impossible
 516
 517 let set_minus_comment_or_plus adj = function
 518     Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x
 519   | x -> set_minus_comment adj x
 520
 521 let drop_minus xs =
 522   xs +> Common.exclude (function
 523     | T2 (t,Min adj,_) -> true
 524     | _ -> false
 525   )
 526
 527 let remove_minus_and_between_and_expanded_and_fake xs =
 528
 529   (* get rid of exampled and fake tok *)
 530   let xs = xs +> Common.exclude (function
 531     | T2 (t,_,_) when TH.is_expanded t -> true
 532     | Fake2 -> true
 533
 534     | _ -> false
 535   )
 536   in
 537
 538   let minus_or_comment = function
 539       T2(_,Min adj,_) -> true
 540     | x -> is_minusable_comment x in
 541
 542   let minus_or_comment_nocpp = function
 543       T2(_,Min adj,_) -> true
 544     | x -> is_minusable_comment_nocpp x in
 545
 546   let common_adj (index1,adj1) (index2,adj2) =
 547     adj1 = adj2 (* same adjacency info *) &&
 548     (* non-empty intersection of witness trees *)
 549     not ((Common.inter_set index1 index2) = []) in
 550
 551   let rec adjust_around_minus = function
 552       [] -> []
 553     | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
 554       (((T2(_,Min adj,_))::_) as rest) ->
 555         (* an initial newline, as in a replaced statement *)
 556         let (between_minus,rest) = Common.span minus_or_comment rest in
 557         (match rest with
 558           [] -> (set_minus_comment adj x) ::
 559             (List.map (set_minus_comment adj) between_minus)
 560         | T2(_,Ctx,_)::_ when is_newline (List.hd(List.rev between_minus)) ->
 561             (set_minus_comment adj x)::(adjust_within_minus between_minus) @
 562             (adjust_around_minus rest)
 563         | _ ->
 564             x :: (adjust_within_minus between_minus) @
 565             (adjust_around_minus rest))
 566     | ((T2(_,Min adj,_))::_) as rest ->
 567         (* no initial newline, as in a replaced expression *)
 568         let (between_minus,rest) = Common.span minus_or_comment rest in
 569         (match rest with
 570           [] ->
 571             (List.map (set_minus_comment adj) between_minus)
 572         | _ ->
 573             (adjust_within_minus between_minus) @
 574             (adjust_around_minus rest))
 575     | x::xs -> x::adjust_around_minus xs
 576   and adjust_within_minus = function
 577       [] -> []
 578     | (T2(_,Min adj1,_) as t1)::xs ->
 579         let (between_minus,rest) = Common.span is_minusable_comment xs in
 580         (match rest with
 581           [] ->
 582             (* keep last newline *)
 583             let (drop,keep) =
 584               try
 585                 let (drop,nl,keep) =
 586                   Common.split_when is_newline between_minus in
 587                 (drop, nl :: keep)
 588               with Not_found -> (between_minus,[]) in
 589             t1 ::
 590             List.map (set_minus_comment_or_plus adj1) drop @
 591             keep
 592         | (T2(_,Min adj2,_) as t2)::rest when common_adj adj1 adj2 ->
 593             t1::
 594             List.map (set_minus_comment_or_plus adj1) between_minus @
 595             adjust_within_minus (t2::rest)
 596         | x::xs ->
 597             t1::(between_minus @ adjust_within_minus (x::xs)))
 598     | _ -> failwith "only minus and space possible" in
 599
 600   (* new idea: collects regions not containing non-space context code
 601      if two adjacent adjacent minus tokens satisfy common_adj then delete
 602      all spaces, comments etc between them
 603      if two adjacent minus tokens do not satisfy common_adj only delete
 604      the spaces between them if there are no comments, etc.
 605      if the region contain no plus code and is both preceded and followed
 606      by a newline, delete the initial newline. *)
 607
 608   let rec adjust_around_minus = function
 609       [] -> []
 610     | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
 611       (T2(_,Min adj1,_) as t1)::xs ->
 612         let (minus_list,rest) = Common.span not_context (t1::xs) in
 613         let contains_plus = List.exists is_plus minus_list in
 614         let x =
 615           match List.rev minus_list with
 616             (T2(Parser_c.TCommentNewline c,_b,_i))::rest
 617             when List.for_all minus_or_comment minus_list ->
 618               set_minus_comment_or_plus adj1 x
 619           | _ -> x in
 620         x :: adjust_within_minus contains_plus minus_list @
 621         adjust_around_minus rest
 622     | (T2(_,Min adj1,_) as t1)::xs ->
 623         let (minus_list,rest) = Common.span not_context (t1::xs) in
 624         let contains_plus = List.exists is_plus minus_list in
 625         adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
 626     | x::xs -> x :: adjust_around_minus xs
 627   and adjust_within_minus cp (* contains plus *) = function
 628       (T2(_,Min adj1,_) as t1)::xs ->
 629         let not_minus = function T2(_,Min _,_) -> false | _ -> true in
 630         let (not_minus_list,rest) = Common.span not_minus xs in
 631         t1 ::
 632         (match rest with
 633           (T2(_,Min adj2,_) as t2)::xs when common_adj adj1 adj2 ->
 634             (List.map (set_minus_comment_or_plus adj1) not_minus_list)
 635             @ (adjust_within_minus cp (t2::xs))
 636         | (T2(_,Min adj2,_) as t2)::xs ->
 637             let is_whitespace_or_plus = function
 638                 (T2 _) as x -> is_space x
 639               | _ -> true (*plus*) in
 640             if List.for_all is_whitespace_or_plus not_minus_list
 641             then
 642               (List.map (set_minus_comment_or_plus adj1) not_minus_list)
 643               @ (adjust_within_minus cp (t2::xs))
 644             else not_minus_list @ (adjust_within_minus cp (t2::xs))
 645         | _ ->
 646             if cp
 647             then xs
 648             else
 649               let (spaces,rest) = Common.span is_space xs in
 650               (List.map (set_minus_comment_or_plus adj1) spaces)
 651               @ rest)
 652     | xs -> failwith "should always start with minus"
 653   and not_context = function
 654       (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
 655     | _ -> true
 656   and is_plus = function
 657       C2 _ | Cocci2 _ -> true
 658     | _ -> false in
 659
 660   let xs = adjust_around_minus xs in
 661
 662   (* this drops blank lines after a brace introduced by removing code *)
 663   let minus_or_comment_nonl = function
 664       T2(_,Min adj,_) -> true
 665     | T2(Parser_c.TCommentNewline _,_b,_i) -> false
 666     | x -> is_minusable_comment x in
 667
 668   let rec adjust_after_brace = function
 669       [] -> []
 670     | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
 671        when str_of_token2 x =$= "{" ->
 672          let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
 673          let is_whitespace = function
 674              T2(Parser_c.TCommentSpace _,_b,_i)
 675              (* patch: cocci    *)
 676            | T2(Parser_c.TCommentNewline _,_b,_i) -> true
 677            | _ -> false in
 678          let (newlines,rest) = Common.span is_whitespace rest in
 679          let (drop_newlines,last_newline) =
 680            let rec loop = function
 681                [] -> ([],[])
 682              | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
 683                  (List.rev rest,[x])
 684              | x::xs ->
 685                  let (drop_newlines,last_newline) = loop xs in
 686                  (drop_newlines,x::last_newline) in
 687            loop (List.rev newlines) in
 688          x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
 689          last_newline@
 690          adjust_after_brace rest
 691     | x::xs -> x::adjust_after_brace xs in
 692
 693   let xs = adjust_after_brace xs in
 694
 695   (* search backwards from context } over spaces until reaching a newline.
 696      then go back over all minus code until reaching some context or + code.
 697      get rid of all intervening spaces, newlines, and comments
 698      input is reversed *)
 699   let rec adjust_before_brace = function
 700       [] -> []
 701     | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
 702         let (outer_spaces,rest) = Common.span is_space xs in
 703         x :: outer_spaces @
 704         (match rest with
 705           ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
 706           (* the rest of this code is the same as from_newline below
 707              but merging them seems to be error prone... *)
 708           ((T2 (t, Min adj, idx)) as m) :: rest ->
 709             let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 710             h :: m ::
 711             (List.map (set_minus_comment adj) spaces) @
 712             (adjust_before_brace rest)
 713         | _ -> adjust_before_brace rest)
 714     | x::xs -> x :: (adjust_before_brace xs) in
 715
 716   let from_newline = function
 717       ((T2 (t, Min adj, idx)) as m) :: rest ->
 718         let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 719         m ::
 720         (List.map (set_minus_comment adj) spaces) @
 721         (adjust_before_brace rest)
 722     | rest -> adjust_before_brace rest in
 723
 724   let xs = List.rev (from_newline (List.rev xs)) in
 725   let xs = drop_minus xs in
 726   xs
 727
 728 (* normally, in C code, a semicolon is not preceded by a space or newline *)
 729 let adjust_before_semicolon toks =
 730   let toks = List.rev toks in
 731   let rec search_semic = function
 732       [] -> []
 733     | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
 734         if List.mem (str_of_token2 x) [";";")";","]
 735         then x :: search_minus false xs
 736         else x :: search_semic xs
 737     | x::xs -> x :: search_semic xs
 738   and search_minus seen_minus xs =
 739     let (spaces, rest) = Common.span is_space xs in
 740     (* only delete spaces if something is actually deleted *)
 741     match rest with
 742       ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
 743     | _ -> if seen_minus then rest else xs in
 744   List.rev (search_semic toks)
 745
 746 let is_ident_like s = s ==~ Common.regexp_alpha
 747
 748 let rec add_space xs =
 749   match xs with
 750   | [] -> []
 751   | [x] -> [x]
 752   | (Cocci2(sx,lnx,_,rcolx) as x)::((Cocci2(sy,lny,lcoly,_)) as y)::xs
 753     when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
 754       not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
 755         (* this only works within a line.  could consider whether
 756            something should be done to add newlines too, rather than
 757            printing them explicitly in unparse_cocci. *)
 758         x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
 759   | x::y::xs ->
 760       let sx = str_of_token2 x in
 761       let sy = str_of_token2 y in
 762       if is_ident_like sx && is_ident_like sy
 763       then x::C2 " "::(add_space (y::xs))
 764       else x::(add_space (y::xs))
 765
 766
 767
 768 (* When insert some new code, because of a + in a SP, we must add this
 769  * code at the right place, with the good indentation. So each time we
 770  * encounter some spacing info, with some newline, we maintain the
 771  * current indentation level used.
 772  *
 773  * TODO problems: not accurate. ex: TODO
 774  *
 775  * TODO: if in #define region, should add a \ \n
 776  *)
 777 let new_tabbing2 space =
 778   (list_of_string space)
 779     +> List.rev
 780     +> Common.take_until (fun c -> c =<= '\n')
 781     +> List.rev
 782     +> List.map string_of_char
 783     +> String.concat ""
 784
 785 let new_tabbing a =
 786   Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
 787
 788
 789 let rec adjust_indentation xs =
 790   let _current_tabbing = ref "" in
 791   let tabbing_unit = ref None in
 792
 793   let string_of_list l = String.concat "" (List.map string_of_char l) in
 794
 795   (* try to pick a tabbing unit for the plus code *)
 796   let adjust_tabbing_unit old_tab new_tab =
 797     if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
 798     then
 799       let old_tab = list_of_string old_tab in
 800       let new_tab = list_of_string new_tab in
 801       let rec loop = function
 802           ([],new_tab) ->
 803             tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
 804         | (_,[]) -> failwith "not possible"
 805         | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
 806       loop (old_tab,new_tab) in
 807
 808   let remtab tu current_tab =
 809     let current_tab = List.rev(list_of_string current_tab) in
 810     let rec loop = function
 811         ([],new_tab) -> string_of_list (List.rev new_tab)
 812       | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
 813       | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
 814       | (_,ns) -> (* mismatch; remove what we can *)
 815           string_of_list (List.rev ns) in
 816     loop (tu,current_tab) in
 817
 818   let rec find_first_tab started = function
 819       [] -> ()
 820     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
 821         find_first_tab true xs
 822 (* patch: coccinelle *)
 823     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
 824       when started ->
 825         let s = str_of_token2 x +> new_tabbing in
 826         tabbing_unit := Some (s,List.rev (list_of_string s))
 827     | x::xs -> find_first_tab started xs in
 828   find_first_tab false xs;
 829
 830   let rec aux started xs =
 831     match xs with
 832     | [] ->  []
 833 (* patch: coccinelle *)
 834     | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
 835       ((Cocci2 ("{",_,_,_)) as a)::xs
 836       when started && str_of_token2 x =$= ")" ->
 837         (* to be done for if, etc, but not for a function header *)
 838         x::(C2 " ")::a::(aux started xs)
 839     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
 840         let old_tabbing = !_current_tabbing in
 841         str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
 842         (* only trust the indentation after the first { *)
 843         (if started then adjust_tabbing_unit old_tabbing !_current_tabbing);
 844         let coccis_rest = Common.span all_coccis xs in
 845         (match coccis_rest with
 846           (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
 847             (* the case where cocci code has been added before a close } *)
 848             x::aux started (Indent_cocci2::xs)
 849         | _ -> x::aux started xs)
 850     | Indent_cocci2::xs ->
 851         (match !tabbing_unit with
 852           None -> aux started xs
 853         | Some (tu,_) ->
 854             _current_tabbing := (!_current_tabbing)^tu;
 855             Cocci2 (tu,-1,-1,-1)::aux started xs)
 856     | Unindent_cocci2::xs ->
 857         (match !tabbing_unit with
 858           None -> aux started xs
 859         | Some (_,tu) ->
 860             _current_tabbing := remtab tu (!_current_tabbing);
 861             aux started xs)
 862     (* border between existing code and cocci code *)
 863     | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_)) as y)::xs
 864       when str_of_token2 x =$= "{" ->
 865         x::aux true (y::Indent_cocci2::xs)
 866     | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
 867       when str_of_token2 y =$= "}" ->
 868         x::aux started (y::Unindent_cocci2::xs)
 869     (* starting the body of the function *)
 870     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->  x::aux true xs
 871     | ((Cocci2("{",_,_,_)) as a)::xs -> a::aux true xs
 872     | ((Cocci2("\n",_,_,_)) as x)::xs ->
 873             (* dont inline in expr because of weird eval order of ocaml *)
 874         let s = !_current_tabbing in
 875         x::Cocci2 (s,-1,-1,-1)::aux started xs
 876     | x::xs -> x::aux started xs in
 877   aux false xs
 878
 879
 880 let rec find_paren_comma = function
 881   | [] -> ()
 882
 883   (* do nothing if was like this in original file *)
 884   | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
 885     ::xs when p2 =|= p1 + 1 ->
 886       find_paren_comma (x2::xs)
 887
 888   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
 889     ::xs when p2 =|= p1 + 1 ->
 890       find_paren_comma (x2::xs)
 891
 892   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
 893     ::xs when p2 =|= p1 + 1 ->
 894       find_paren_comma (x2::xs)
 895
 896   (* otherwise yes can adjust *)
 897   | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
 898       x2.remove <- true;
 899       find_paren_comma (x2::xs)
 900   | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
 901       x1.remove <- true;
 902       find_paren_comma (x2::xs)
 903
 904   | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
 905       x1.remove <- true;
 906       find_paren_comma (x2::xs)
 907
 908   | x::xs ->
 909       find_paren_comma xs
 910
 911
 912 let fix_tokens toks =
 913   let toks = toks +> List.map mk_token_extended in
 914
 915   let cleaner = toks +> Common.exclude (function
 916     | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
 917     | _ -> false
 918   ) in
 919   find_paren_comma cleaner;
 920
 921   let toks = rebuild_tokens_extented toks in
 922   toks +> List.map (fun x -> x.tok2)
 923
 924
 925
 926 (*****************************************************************************)
 927 (* Final unparsing (and debugging support) *)
 928 (*****************************************************************************)
 929
 930 (* for debugging *)
 931 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
 932
 933 let kind_of_token2 = function
 934   | Fake2 -> KFake
 935   | Cocci2 _ -> KCocci
 936   | C2 _ -> KC
 937   | T2 (t,_,_) ->
 938       (match TH.pinfo_of_tok t with
 939       | ExpandedTok _ -> KExpanded
 940       | OriginTok _ -> KOrigin
 941       | FakeTok _ -> raise Impossible (* now a Fake2 *)
 942       | AbstractLineTok _ -> raise Impossible (* now a KC *)
 943       )
 944   | Unindent_cocci2 | Indent_cocci2 -> raise Impossible
 945
 946 let end_mark = "!"
 947
 948 let start_mark = function
 949   | KFake -> "!F!"
 950   | KCocci -> "!S!"
 951   | KC -> "!A!"
 952   | KExpanded -> "!E!"
 953   | KOrigin -> ""
 954
 955 let print_all_tokens2 pr xs =
 956   if !Flag_parsing_c.debug_unparsing
 957   then
 958     let current_kind = ref KOrigin in
 959     xs +> List.iter (fun t ->
 960       let newkind = kind_of_token2 t in
 961       if newkind =*= !current_kind
 962       then pr (str_of_token2 t)
 963       else begin
 964         pr (end_mark);
 965         pr (start_mark newkind);
 966         pr (str_of_token2 t);
 967         current_kind := newkind
 968       end
 969     );
 970   else
 971     xs +> List.iter (fun x -> pr (str_of_token2 x))
 972
 973
 974
 975
 976 (*****************************************************************************)
 977 (* Entry points *)
 978 (*****************************************************************************)
 979
 980 (* old: PPviatok was made in the beginning to allow to pretty print a
 981  * complete C file, including a modified C file by transformation.ml,
 982  * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
 983  * maybe flow_to_ast) all the cases. Indeed we don't need to do some
 984  * fancy stuff when a function was not modified at all. Just need to
 985  * print the list of token as-is. But now pretty_print_c.ml handles
 986  * almost everything so maybe less useful. Maybe PPviatok allows to
 987  * optimize a little the pretty printing.
 988  *
 989  * update: now have PPviastr which goes even faster than PPviatok, so
 990  * PPviatok has disappeared.
 991  *)
 992
 993 type ppmethod = PPnormal | PPviastr
 994
 995
 996
 997
 998 (* The pp_program function will call pretty_print_c.ml with a special
 999  * function to print the leaf components, the tokens. When we want to
1000  * print a token, we need to print also maybe the space and comments that
1001  * were close to it in the original file (and that was omitted during the
1002  * parsing phase), and honor what the cocci-info attached to the token says.
1003  * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1004  * print it and also print some cocci-code attached in a PLUS to it.
1005  * So we will also maybe call unparse_cocci. Because the cocci-code may
1006  * contain metavariables, unparse_cocci will in fact sometimes call back
1007  * pretty_print_c (which will this time don't call back again unparse_cocci)
1008  *)
1009
1010 let pp_program2 xs outfile  =
1011   Common.with_open_outfile outfile (fun (pr,chan) ->
1012     let pr s =
1013       if !Flag_parsing_c.debug_unparsing
1014       then begin pr2_no_nl s; flush stderr end
1015       else pr s
1016         (* flush chan; *)
1017         (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1018     in
1019
1020     xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
1021       (* here can still work on ast *)
1022       let e = remove_useless_fakeInfo_struct e in
1023
1024       match ppmethod with
1025       | PPnormal ->
1026           (* now work on tokens *)
1027
1028           (* phase1: just get all the tokens, all the information *)
1029           assert(toks_e +> List.for_all (fun t ->
1030             TH.is_origin t or TH.is_expanded t
1031           ));
1032           let toks = get_fakeInfo_and_tokens e toks_e in
1033           let toks = displace_fake_nodes toks in
1034           (* assert Origin;ExpandedTok;Faketok *)
1035           let toks = expand_mcode toks in
1036           (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1037            * and no tag information, just NOTHING. *)
1038
1039           let toks =
1040             if !Flag.sgrep_mode2
1041             then drop_minus toks (* nothing to do for sgrep *)
1042             else
1043               (* phase2: can now start to filter and adjust *)
1044               let toks = adjust_indentation toks in
1045               let toks = adjust_before_semicolon toks in(*before remove minus*)
1046               let toks = remove_minus_and_between_and_expanded_and_fake toks in
1047               (* assert Origin + Cocci + C and no minus *)
1048               let toks = add_space toks in
1049               let toks = fix_tokens toks in
1050               toks in
1051
1052           (* in theory here could reparse and rework the ast! or
1053            * apply some SP. Not before cos julia may have generated
1054            * not parsable file. Need do unparsing_tricks call before being
1055            * ready to reparse. *)
1056           print_all_tokens2 pr toks;
1057
1058       | PPviastr -> pr str
1059     )
1060   )
1061
1062 let pp_program a b =
1063   Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1064
1065
1066 let pp_program_default xs outfile =
1067   let xs' = xs +> List.map (fun x -> x, PPnormal) in
1068   pp_program xs' outfile