parsing_c/unparse_c.ml

   1 (* Yoann Padioleau, Julia Lawall
   2  *
   3  * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
   4  * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
   5  *
   6  * This program is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU General Public License (GPL)
   8  * version 2 as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * file license.txt for more details.
  14  *
  15  *
  16  * Modifications by Julia Lawall for better newline handling.
  17  *)
  18 open Common
  19
  20 open Ast_c
  21
  22 module TH = Token_helpers
  23
  24
  25 (* should keep comments and directives in between adjacent deleted terms,
  26 but not comments and directives within deleted terms.  should use the
  27 labels found in the control-flow graph *)
  28
  29
  30
  31 (*****************************************************************************)
  32 (* Wrappers *)
  33 (*****************************************************************************)
  34 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
  35
  36 (*****************************************************************************)
  37 (* Types used during the intermediate phases of the unparsing *)
  38 (*****************************************************************************)
  39
  40 type token1 =
  41   | Fake1 of info
  42   | T1 of Parser_c.token
  43
  44 (* The cocci_tag of the token should always be a NOTHING. The mark of
  45  * the token can only be OriginTok or ExpandedTok. Why not get rid of
  46  * token and get something simpler ? because we need to know if the
  47  * info is a TCommentCpp or TCommentSpace, etc for some of the further
  48  * analysis so easier to keep with the token.
  49  *
  50  * This type contains the whole information. Have all the tokens with this
  51  * type.
  52  *)
  53 type min =
  54     Min of (int list (* match numbers *) * int (* adjacency information *))
  55   | Ctx
  56
  57 type token2 =
  58   | T2 of Parser_c.token * min *
  59           int option (* orig index, abstracting away comments and space *)
  60   | Fake2
  61   | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
  62   | C2 of string
  63   | Indent_cocci2
  64   | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
  65
  66 (* not used yet *)
  67 type token3 =
  68   | T3 of Parser_c.token
  69   | Cocci3 of string
  70   | C3 of string
  71
  72
  73 (* similar to the tech in parsing_hack *)
  74 type token_extended = {
  75   tok2 : token2;
  76   str  : string;
  77   idx: int option; (* to know if 2 tokens were consecutive in orig file *)
  78   mutable new_tokens_before : token2 list;
  79   mutable remove : bool;
  80 }
  81
  82
  83 (*****************************************************************************)
  84 (* Helpers *)
  85 (*****************************************************************************)
  86
  87 let info_of_token1 t =
  88   match t with
  89   | Fake1 info -> info
  90   | T1 tok -> TH.info_of_tok tok
  91
  92 let print_token1 = function
  93     T1 tok -> TH.str_of_tok tok
  94   | Fake1 info -> "fake"
  95
  96 let str_of_token2 = function
  97   | T2 (t,_,_) -> TH.str_of_tok t
  98   | Fake2 -> ""
  99   | Cocci2 (s,_,_,_) -> s
 100   | C2 s -> s
 101   | Indent_cocci2 -> ""
 102   | Unindent_cocci2 _ -> ""
 103
 104 let print_token2 = function
 105   | T2 (t,b,_) ->
 106       let b_str =
 107         match b with
 108           Min (index,adj) ->
 109             Printf.sprintf "-%d[%s]" adj
 110               (String.concat " " (List.map string_of_int index))
 111         | Ctx -> "" in
 112       "T2:"^b_str^TH.str_of_tok t
 113   | Fake2 -> "fake"
 114   | Cocci2 (s,_,lc,rc) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
 115   | C2 s -> "C2:"^s
 116   | Indent_cocci2 -> "Indent"
 117   | Unindent_cocci2 _ -> "Unindent"
 118
 119 let simple_print_all_tokens1 l =
 120   List.iter (function x -> Printf.printf "%s " (print_token1 x)) l;
 121   Printf.printf "\n"
 122
 123 let simple_print_all_tokens2 l =
 124   List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
 125   Printf.printf "\n"
 126
 127 let str_of_token3 = function
 128   | T3 t -> TH.str_of_tok t
 129   | Cocci3 s | C3 s -> s
 130
 131
 132
 133 let mk_token_extended x =
 134   let origidx =
 135     match x with
 136     | T2 (_,_, idx) -> idx
 137     | _ -> None
 138   in
 139   { tok2 = x;
 140     str = str_of_token2 x;
 141     idx = origidx;
 142     new_tokens_before = [];
 143     remove = false;
 144   }
 145
 146 let rebuild_tokens_extented toks_ext =
 147   let _tokens = ref [] in
 148   toks_ext +> List.iter (fun tok ->
 149     tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
 150     if not tok.remove then push2 tok.tok2 _tokens;
 151   );
 152   let tokens = List.rev !_tokens in
 153   (tokens +> List.map mk_token_extended)
 154
 155
 156 let mcode_contain_plus = function
 157   | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
 158   | Ast_cocci.CONTEXT _ -> true
 159 (* patch: when need full coccinelle transformation *)
 160   | Ast_cocci.MINUS (_,_,_,[]) -> false
 161   | Ast_cocci.MINUS (_,_,_,x::xs) -> true
 162   | Ast_cocci.PLUS _ -> raise Impossible
 163
 164 let contain_plus info =
 165   let mck = Ast_c.mcode_of_info info in
 166   mcode_contain_plus mck
 167
 168 (*****************************************************************************)
 169 (* Last fix on the ast *)
 170 (*****************************************************************************)
 171
 172 (* Because of the ugly trick to handle initialiser, I generate fake ','
 173  * for the last initializer element, but if there is nothing around it,
 174  * I don't want in the end to print it.
 175  *)
 176
 177 let remove_useless_fakeInfo_struct program =
 178   let bigf = { Visitor_c.default_visitor_c_s with
 179     Visitor_c.kini_s = (fun (k,bigf) ini ->
 180       match k ini with
 181       | InitList args, ii ->
 182           (match ii with
 183           | [_i1;_i2] -> ini
 184           | [i1;i2;iicommaopt] ->
 185               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 186                  && (Ast_c.is_fake iicommaopt)
 187                  (* sometimes the guy put a normal iicommaopt *)
 188               then InitList args, [i1;i2]
 189               else InitList args, [i1;i2;iicommaopt]
 190           | [i1;i2;iicommaopt;end_comma_opt] ->
 191               (* only in #define. end_comma_opt canot be fake *)
 192               (* not sure if this will be considered ambiguous with a previous
 193                  case? *)
 194               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 195                  && (Ast_c.is_fake iicommaopt)
 196                  (* sometimes the guy put a normal iicommaopt *)
 197               then InitList args, [i1;i2;end_comma_opt]
 198               else InitList args, [i1;i2;iicommaopt;end_comma_opt]
 199           | _ -> raise Impossible
 200           )
 201       | x -> x
 202     )
 203   } in
 204   Visitor_c.vk_toplevel_s bigf program
 205
 206
 207 (*****************************************************************************)
 208 (* Tokens1 generation *)
 209 (*****************************************************************************)
 210
 211 let get_fakeInfo_and_tokens celem toks =
 212   let toks_in  = ref toks in
 213   let toks_out = ref [] in
 214
 215   (* todo? verify good order of position ? *)
 216   let pr_elem info =
 217     match Ast_c.pinfo_of_info info with
 218     | FakeTok _ ->
 219         Common.push2 (Fake1 info) toks_out
 220     | OriginTok _ | ExpandedTok _ ->
 221         (* get the associated comments/space/cppcomment tokens *)
 222         let (before, x, after) =
 223           !toks_in +> Common.split_when (fun tok ->
 224           info =*= TH.info_of_tok tok)
 225         in
 226         assert(info =*= TH.info_of_tok x);
 227         (*old: assert(before +> List.for_all (TH.is_comment)); *)
 228         before +> List.iter (fun x ->
 229           if not (TH.is_comment x)
 230           then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
 231           (* case such as  int asm d3("x"); not yet in ast *)
 232         );
 233         before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
 234         push2 (T1 x) toks_out;
 235         toks_in := after;
 236     | AbstractLineTok _ ->
 237         (* can be called on type info when for instance use -type_c *)
 238         if !Flag_parsing_c.pretty_print_type_info
 239         then Common.push2 (Fake1 info) toks_out
 240         else raise Impossible (* at this stage *)
 241   in
 242
 243   let pr_space _ = () in (* use the spacing that is there already *)
 244
 245   Pretty_print_c.pp_program_gen pr_elem pr_space celem;
 246
 247   if not (null !toks_in)
 248   then failwith "WEIRD: unparsing not finished";
 249
 250   List.rev !toks_out
 251
 252 (* Fake nodes that have BEFORE code should be moved over any subsequent
 253 whitespace and newlines, but not any comments, to get as close to the affected
 254 code as possible.  Similarly, fake nodes that have AFTER code should be moved
 255 backwards.  No fake nodes should have both before and after code. *)
 256
 257 let displace_fake_nodes toks =
 258   let is_fake = function Fake1 _ -> true | _ -> false in
 259   let is_whitespace = function
 260       T1(Parser_c.TCommentSpace _)
 261       (* patch: cocci    *)
 262     | T1(Parser_c.TCommentNewline _) -> true
 263     | _ -> false in
 264   let rec loop toks =
 265     let fake_info =
 266       try Some (Common.split_when is_fake toks)
 267       with Not_found -> None in
 268     match fake_info with
 269       Some(bef,((Fake1 info) as fake),aft) ->
 270         (match !(info.cocci_tag) with
 271         | Some x ->
 272           (match x with
 273             (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
 274             (* move the fake node forwards *)
 275               let (whitespace,rest) = Common.span is_whitespace aft in
 276               bef @ whitespace @ fake :: (loop rest)
 277           | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
 278             (* move the fake node backwards *)
 279               let revbef = List.rev bef in
 280               let (revwhitespace,revprev) = Common.span is_whitespace revbef in
 281               let whitespace = List.rev revwhitespace in
 282               let prev = List.rev revprev in
 283               prev @ fake :: (loop (whitespace @ aft))
 284           | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
 285               bef @ fake :: (loop aft)
 286           | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
 287               failwith "fake node should not be before-after"
 288           | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
 289         )
 290         | None ->
 291             bef @ fake :: (loop aft)
 292         )
 293     | None -> toks
 294     | _ -> raise Impossible in
 295   loop toks
 296
 297 (*****************************************************************************)
 298 (* Tokens2 generation *)
 299 (*****************************************************************************)
 300
 301 let comment2t2 = function
 302     (Token_c.TCommentCpp x,(info : Token_c.info)) ->
 303       C2("\n"^info.Common.str^"\n")
 304   | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
 305
 306 let expand_mcode toks =
 307   let toks_out = ref [] in
 308
 309   let index = ref 0 in
 310
 311   let add_elem t minus =
 312     match t with
 313     | Fake1 info ->
 314         let str = Ast_c.str_of_info info in
 315         if str =$= ""
 316         then push2 (Fake2) toks_out
 317         (* perhaps the fake ',' *)
 318         else push2 (C2 str) toks_out
 319
 320
 321     | T1 tok ->
 322         (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
 323         (* no tag on expandedTok ! *)
 324         let modified = function
 325             None -> false
 326           | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
 327           | _ -> true in
 328         (if (TH.is_expanded tok &&
 329              modified !((TH.info_of_tok tok).cocci_tag)
 330             (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
 331         then
 332           failwith
 333             (Printf.sprintf
 334                "expanded token %s on line %d is either modified or stored in a metavariable"
 335                (TH.str_of_tok tok) (TH.line_of_tok tok)));
 336
 337         let tok' = tok +> TH.visitor_info_of_tok (fun i ->
 338           { i with cocci_tag = ref Ast_c.emptyAnnot; }
 339         ) in
 340
 341         let optindex =
 342           if TH.is_origin tok && not (TH.is_real_comment tok)
 343           then begin
 344               incr index;
 345               Some !index
 346           end
 347           else None
 348         in
 349
 350         push2 (T2 (tok', minus, optindex)) toks_out
 351   in
 352
 353   let expand_info t =
 354     let (mcode,env) =
 355       Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
 356
 357     let pr_cocci s ln col rcol =
 358       push2 (Cocci2(s,ln,col,rcol)) toks_out  in
 359     let pr_c info =
 360       (match Ast_c.pinfo_of_info info with
 361         Ast_c.AbstractLineTok _ ->
 362           push2 (C2 (Ast_c.str_of_info info)) toks_out
 363       | Ast_c.FakeTok (s,_) ->
 364           push2 (C2 s) toks_out
 365       | _ ->
 366           Printf.printf "line: %s\n" (Common.dump info);
 367           failwith "not an abstract line");
 368       (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
 369       List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
 370
 371     let pr_barrier ln col = (* marks a position, used around C code *)
 372       push2 (Cocci2("",ln,col,col)) toks_out  in
 373     let pr_nobarrier ln col = () in (* not needed for linux spacing *)
 374
 375     let pr_cspace _ = push2 (C2 " ") toks_out in
 376
 377     let pr_space _ = () (* rely on add_space in cocci code *) in
 378     let pr_arity _ = () (* not interested *) in
 379
 380     let indent _   = push2 Indent_cocci2 toks_out in
 381     let unindent x = push2 (Unindent_cocci2 x) toks_out in
 382
 383     let args_pp =
 384       (env, pr_cocci, pr_c, pr_cspace,
 385        (match !Flag_parsing_c.spacing with
 386          Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
 387        pr_arity,
 388        (match !Flag_parsing_c.spacing with
 389          Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
 390        indent, unindent) in
 391
 392     (* old: when for yacfe with partial cocci:
 393      *    add_elem t false;
 394     *)
 395
 396     (* patch: when need full coccinelle transformation *)
 397     let unparser = Unparse_cocci.pp_list_list_any args_pp false in
 398     match mcode with
 399     | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
 400         (* Why adding ? because I want to have all the information, the whole
 401          * set of tokens, so I can then process and remove the
 402          * is_between_two_minus for instance *)
 403         add_elem t (Min (inst,adj));
 404         unparser any_xxs Unparse_cocci.InPlace
 405     | Ast_cocci.CONTEXT (_,any_befaft) ->
 406         (match any_befaft with
 407         | Ast_cocci.NOTHING ->
 408             add_elem t Ctx
 409         | Ast_cocci.BEFORE (xxs,_) ->
 410             unparser xxs Unparse_cocci.Before;
 411             add_elem t Ctx
 412         | Ast_cocci.AFTER (xxs,_) ->
 413             add_elem t Ctx;
 414             unparser xxs Unparse_cocci.After;
 415         | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
 416             unparser xxs Unparse_cocci.Before;
 417             add_elem t Ctx;
 418             unparser yys Unparse_cocci.After;
 419         )
 420     | Ast_cocci.PLUS _ -> raise Impossible
 421
 422   in
 423
 424   toks +> List.iter expand_info;
 425   List.rev !toks_out
 426
 427
 428 (*****************************************************************************)
 429 (* Tokens2 processing, filtering, adjusting *)
 430 (*****************************************************************************)
 431
 432 let is_space = function
 433   | T2(Parser_c.TCommentSpace _,_b,_i) -> true  (* only whitespace *)
 434   | _ -> false
 435
 436 let is_newline = function
 437   | T2(Parser_c.TCommentNewline _,_b,_i) -> true
 438   | _ -> false
 439
 440 let is_whitespace = function
 441   | (T2 (t,_b,_i)) ->
 442       (match t with
 443       | Parser_c.TCommentSpace _ -> true  (* only whitespace *)
 444       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 445       | _ -> false
 446       )
 447   | _ -> false
 448
 449 let is_minusable_comment = function
 450   | (T2 (t,_b,_i)) ->
 451       (match t with
 452       | Parser_c.TCommentSpace _   (* only whitespace *)
 453       (* patch: coccinelle *)
 454       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 455       | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
 456       | Parser_c.TComment _
 457       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 458       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 459       | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
 460         -> true
 461
 462       | Parser_c.TCommentMisc _
 463       | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
 464         -> false
 465
 466       | _ -> false
 467       )
 468   | _ -> false
 469
 470 let is_minusable_comment_nocpp = function
 471   | (T2 (t,_b,_i)) ->
 472       (match t with
 473       | Parser_c.TCommentSpace _   (* only whitespace *)
 474       (* patch: coccinelle *)
 475       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 476       | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
 477       | Parser_c.TComment _ -> true
 478       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 479       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 480       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
 481         -> false
 482
 483       | Parser_c.TCommentMisc _
 484       | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
 485         -> false
 486
 487       | _ -> false
 488       )
 489   | _ -> false
 490
 491 let all_coccis = function
 492     Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 _ -> true
 493   | _ -> false
 494
 495 (*previously gave up if the first character was a newline, but not clear why*)
 496 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
 497
 498 let set_minus_comment adj = function
 499   | T2 (t,Ctx,idx) ->
 500       let str = TH.str_of_tok t in
 501       (match t with
 502       | Parser_c.TCommentSpace _
 503 (* patch: coccinelle *)
 504       | Parser_c.TCommentNewline _ -> ()
 505
 506       | Parser_c.TComment _
 507       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 508       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 509       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
 510         ->
 511           pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
 512                  (TH.line_of_tok t) str)
 513       | _ -> raise Impossible
 514       );
 515       T2 (t, Min adj, idx)
 516 (* patch: coccinelle *)
 517   | T2 (t,Min adj,idx) as x -> x
 518   | _ -> raise Impossible
 519
 520 let set_minus_comment_or_plus adj = function
 521     Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x
 522   | x -> set_minus_comment adj x
 523
 524 let drop_minus xs =
 525   xs +> Common.exclude (function
 526     | T2 (t,Min adj,_) -> true
 527     | _ -> false
 528   )
 529
 530 let remove_minus_and_between_and_expanded_and_fake xs =
 531
 532   (* get rid of exampled and fake tok *)
 533   let xs = xs +> Common.exclude (function
 534     | T2 (t,_,_) when TH.is_expanded t -> true
 535     | Fake2 -> true
 536
 537     | _ -> false
 538   )
 539   in
 540
 541   let minus_or_comment = function
 542       T2(_,Min adj,_) -> true
 543     | x -> is_minusable_comment x in
 544
 545   let minus_or_comment_nocpp = function
 546       T2(_,Min adj,_) -> true
 547     | x -> is_minusable_comment_nocpp x in
 548
 549   let common_adj (index1,adj1) (index2,adj2) =
 550     adj1 = adj2 (* same adjacency info *) &&
 551     (* non-empty intersection of witness trees *)
 552     not ((Common.inter_set index1 index2) = []) in
 553
 554   let rec adjust_around_minus = function
 555       [] -> []
 556     | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
 557       (((T2(_,Min adj,_))::_) as rest) ->
 558         (* an initial newline, as in a replaced statement *)
 559         let (between_minus,rest) = Common.span minus_or_comment rest in
 560         (match rest with
 561           [] -> (set_minus_comment adj x) ::
 562             (List.map (set_minus_comment adj) between_minus)
 563         | T2(_,Ctx,_)::_ when is_newline (List.hd(List.rev between_minus)) ->
 564             (set_minus_comment adj x)::(adjust_within_minus between_minus) @
 565             (adjust_around_minus rest)
 566         | _ ->
 567             x :: (adjust_within_minus between_minus) @
 568             (adjust_around_minus rest))
 569     | ((T2(_,Min adj,_))::_) as rest ->
 570         (* no initial newline, as in a replaced expression *)
 571         let (between_minus,rest) = Common.span minus_or_comment rest in
 572         (match rest with
 573           [] ->
 574             (List.map (set_minus_comment adj) between_minus)
 575         | _ ->
 576             (adjust_within_minus between_minus) @
 577             (adjust_around_minus rest))
 578     | x::xs -> x::adjust_around_minus xs
 579   and adjust_within_minus = function
 580       [] -> []
 581     | (T2(_,Min adj1,_) as t1)::xs ->
 582         let (between_minus,rest) = Common.span is_minusable_comment xs in
 583         (match rest with
 584           [] ->
 585             (* keep last newline *)
 586             let (drop,keep) =
 587               try
 588                 let (drop,nl,keep) =
 589                   Common.split_when is_newline between_minus in
 590                 (drop, nl :: keep)
 591               with Not_found -> (between_minus,[]) in
 592             t1 ::
 593             List.map (set_minus_comment_or_plus adj1) drop @
 594             keep
 595         | (T2(_,Min adj2,_) as t2)::rest when common_adj adj1 adj2 ->
 596             t1::
 597             List.map (set_minus_comment_or_plus adj1) between_minus @
 598             adjust_within_minus (t2::rest)
 599         | x::xs ->
 600             t1::(between_minus @ adjust_within_minus (x::xs)))
 601     | _ -> failwith "only minus and space possible" in
 602
 603   (* new idea: collects regions not containing non-space context code
 604      if two adjacent adjacent minus tokens satisfy common_adj then delete
 605      all spaces, comments etc between them
 606      if two adjacent minus tokens do not satisfy common_adj only delete
 607      the spaces between them if there are no comments, etc.
 608      if the region contain no plus code and is both preceded and followed
 609      by a newline, delete the initial newline. *)
 610
 611   let rec adjust_around_minus = function
 612       [] -> []
 613     | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
 614       (T2(_,Min adj1,_) as t1)::xs ->
 615         let (minus_list,rest) = Common.span not_context (t1::xs) in
 616         let contains_plus = List.exists is_plus minus_list in
 617         let x =
 618           match List.rev minus_list with
 619             (T2(Parser_c.TCommentNewline c,_b,_i))::rest
 620             when List.for_all minus_or_comment minus_list ->
 621               set_minus_comment_or_plus adj1 x
 622           | _ -> x in
 623         x :: adjust_within_minus contains_plus minus_list @
 624         adjust_around_minus rest
 625     | (T2(_,Min adj1,_) as t1)::xs ->
 626         let (minus_list,rest) = Common.span not_context (t1::xs) in
 627         let contains_plus = List.exists is_plus minus_list in
 628         adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
 629     | x::xs -> x :: adjust_around_minus xs
 630   and adjust_within_minus cp (* contains plus *) = function
 631       (T2(_,Min adj1,_) as t1)::xs ->
 632         let not_minus = function T2(_,Min _,_) -> false | _ -> true in
 633         let (not_minus_list,rest) = Common.span not_minus xs in
 634         t1 ::
 635         (match rest with
 636           (T2(_,Min adj2,_) as t2)::xs when common_adj adj1 adj2 ->
 637             (List.map (set_minus_comment_or_plus adj1) not_minus_list)
 638             @ (adjust_within_minus cp (t2::xs))
 639         | (T2(_,Min adj2,_) as t2)::xs ->
 640             let is_whitespace_or_plus = function
 641                 (T2 _) as x -> is_space x
 642               | _ -> true (*plus*) in
 643             if List.for_all is_whitespace_or_plus not_minus_list
 644             then
 645               (List.map (set_minus_comment_or_plus adj1) not_minus_list)
 646               @ (adjust_within_minus cp (t2::xs))
 647             else not_minus_list @ (adjust_within_minus cp (t2::xs))
 648         | _ ->
 649             if cp
 650             then xs
 651             else
 652               let (spaces,rest) = Common.span is_space xs in
 653               (List.map (set_minus_comment_or_plus adj1) spaces)
 654               @ rest)
 655     | xs -> failwith "should always start with minus"
 656   and not_context = function
 657       (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
 658     | _ -> true
 659   and is_plus = function
 660       C2 _ | Cocci2 _ -> true
 661     | _ -> false in
 662
 663   let xs = adjust_around_minus xs in
 664
 665   (* this drops blank lines after a brace introduced by removing code *)
 666   let minus_or_comment_nonl = function
 667       T2(_,Min adj,_) -> true
 668     | T2(Parser_c.TCommentNewline _,_b,_i) -> false
 669     | x -> is_minusable_comment x in
 670
 671   let rec adjust_after_brace = function
 672       [] -> []
 673     | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
 674        when str_of_token2 x =$= "{" ->
 675          let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
 676          let is_whitespace = function
 677              T2(Parser_c.TCommentSpace _,_b,_i)
 678              (* patch: cocci    *)
 679            | T2(Parser_c.TCommentNewline _,_b,_i) -> true
 680            | _ -> false in
 681          let (newlines,rest) = Common.span is_whitespace rest in
 682          let (drop_newlines,last_newline) =
 683            let rec loop = function
 684                [] -> ([],[])
 685              | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
 686                  (List.rev rest,[x])
 687              | x::xs ->
 688                  let (drop_newlines,last_newline) = loop xs in
 689                  (drop_newlines,x::last_newline) in
 690            loop (List.rev newlines) in
 691          x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
 692          last_newline@
 693          adjust_after_brace rest
 694     | x::xs -> x::adjust_after_brace xs in
 695
 696   let xs = adjust_after_brace xs in
 697
 698   (* search backwards from context } over spaces until reaching a newline.
 699      then go back over all minus code until reaching some context or + code.
 700      get rid of all intervening spaces, newlines, and comments
 701      input is reversed *)
 702   let rec adjust_before_brace = function
 703       [] -> []
 704     | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
 705         let (outer_spaces,rest) = Common.span is_space xs in
 706         x :: outer_spaces @
 707         (match rest with
 708           ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
 709           (* the rest of this code is the same as from_newline below
 710              but merging them seems to be error prone... *)
 711           ((T2 (t, Min adj, idx)) as m) :: rest ->
 712             let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 713             h :: m ::
 714             (List.map (set_minus_comment adj) spaces) @
 715             (adjust_before_brace rest)
 716         | _ -> adjust_before_brace rest)
 717     | x::xs -> x :: (adjust_before_brace xs) in
 718
 719   let from_newline = function
 720       ((T2 (t, Min adj, idx)) as m) :: rest ->
 721         let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 722         m ::
 723         (List.map (set_minus_comment adj) spaces) @
 724         (adjust_before_brace rest)
 725     | rest -> adjust_before_brace rest in
 726
 727   let xs = List.rev (from_newline (List.rev xs)) in
 728   let xs = drop_minus xs in
 729   xs
 730
 731 (* normally, in C code, a semicolon is not preceded by a space or newline *)
 732 let adjust_before_semicolon toks =
 733   let toks = List.rev toks in
 734   let rec search_semic = function
 735       [] -> []
 736     | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
 737         if List.mem (str_of_token2 x) [";";")";","]
 738         then x :: search_minus false xs
 739         else x :: search_semic xs
 740     | x::xs -> x :: search_semic xs
 741   and search_minus seen_minus xs =
 742     let (spaces, rest) = Common.span is_space xs in
 743     (* only delete spaces if something is actually deleted *)
 744     match rest with
 745       ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
 746     | _ -> if seen_minus then rest else xs in
 747   List.rev (search_semic toks)
 748
 749 let is_ident_like s = s ==~ Common.regexp_alpha
 750
 751 let rec add_space xs =
 752   match xs with
 753   | [] -> []
 754   | [x] -> [x]
 755   | (Cocci2(sx,lnx,_,rcolx) as x)::((Cocci2(sy,lny,lcoly,_)) as y)::xs
 756     when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
 757       not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
 758         (* this only works within a line.  could consider whether
 759            something should be done to add newlines too, rather than
 760            printing them explicitly in unparse_cocci. *)
 761         x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
 762   | x::y::xs ->
 763       let sx = str_of_token2 x in
 764       let sy = str_of_token2 y in
 765       if is_ident_like sx && is_ident_like sy
 766       then x::C2 " "::(add_space (y::xs))
 767       else x::(add_space (y::xs))
 768
 769
 770
 771 (* When insert some new code, because of a + in a SP, we must add this
 772  * code at the right place, with the good indentation. So each time we
 773  * encounter some spacing info, with some newline, we maintain the
 774  * current indentation level used.
 775  *
 776  * TODO problems: not accurate. ex: TODO
 777  *
 778  * TODO: if in #define region, should add a \ \n
 779  *)
 780 let new_tabbing2 space =
 781   (list_of_string space)
 782     +> List.rev
 783     +> Common.take_until (fun c -> c =<= '\n')
 784     +> List.rev
 785     +> List.map string_of_char
 786     +> String.concat ""
 787
 788 let new_tabbing a =
 789   Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
 790
 791
 792 let rec adjust_indentation xs =
 793   let _current_tabbing = ref "" in
 794   let tabbing_unit = ref None in
 795
 796   let string_of_list l = String.concat "" (List.map string_of_char l) in
 797
 798   (* try to pick a tabbing unit for the plus code *)
 799   let adjust_tabbing_unit old_tab new_tab =
 800     if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
 801     then
 802       let old_tab = list_of_string old_tab in
 803       let new_tab = list_of_string new_tab in
 804       let rec loop = function
 805           ([],new_tab) ->
 806             tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
 807         | (_,[]) -> failwith "not possible"
 808         | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
 809       loop (old_tab,new_tab) in
 810
 811   let remtab tu current_tab =
 812     let current_tab = List.rev(list_of_string current_tab) in
 813     let rec loop = function
 814         ([],new_tab) -> string_of_list (List.rev new_tab)
 815       | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
 816       | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
 817       | (_,ns) -> (* mismatch; remove what we can *)
 818           string_of_list (List.rev ns) in
 819     loop (tu,current_tab) in
 820
 821   let rec find_first_tab started = function
 822       [] -> ()
 823     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
 824         find_first_tab true xs
 825 (* patch: coccinelle *)
 826     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
 827       when started ->
 828         let s = str_of_token2 x +> new_tabbing in
 829         tabbing_unit := Some (s,List.rev (list_of_string s))
 830     | x::xs -> find_first_tab started xs in
 831   find_first_tab false xs;
 832
 833   let rec aux started xs =
 834     match xs with
 835     | [] ->  []
 836 (* patch: coccinelle *)
 837     | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
 838       ((Cocci2 ("{",_,_,_)) as a)::xs
 839       when started && str_of_token2 x =$= ")" ->
 840         (* to be done for if, etc, but not for a function header *)
 841         x::(C2 " ")::a::(aux started xs)
 842     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
 843         let old_tabbing = !_current_tabbing in
 844         str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
 845         (* only trust the indentation after the first { *)
 846         (if started
 847         then adjust_tabbing_unit old_tabbing !_current_tabbing);
 848         let coccis_rest = Common.span all_coccis xs in
 849         (match coccis_rest with
 850           (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
 851             (* the case where cocci code has been added before a close } *)
 852             x::aux started (Indent_cocci2::xs)
 853         | _ -> x::aux started xs)
 854     | Indent_cocci2::xs ->
 855         (match !tabbing_unit with
 856           None -> aux started xs
 857         | Some (tu,_) ->
 858             _current_tabbing := (!_current_tabbing)^tu;
 859             Cocci2 (tu,-1,-1,-1)::aux started xs)
 860     | Unindent_cocci2(permanent)::xs ->
 861         (match !tabbing_unit with
 862           None -> aux started xs
 863         | Some (_,tu) ->
 864             _current_tabbing := remtab tu (!_current_tabbing);
 865             aux started xs)
 866     (* border between existing code and cocci code *)
 867     | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_)) as y)::xs
 868       when str_of_token2 x =$= "{" ->
 869         x::aux true (y::Indent_cocci2::xs)
 870     | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
 871       when str_of_token2 y =$= "}" ->
 872         x::aux started (y::Unindent_cocci2 true::xs)
 873     (* starting the body of the function *)
 874     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->  x::aux true xs
 875     | ((Cocci2("{",_,_,_)) as a)::xs -> a::aux true xs
 876     | ((Cocci2("\n",_,_,_)) as x)::Unindent_cocci2(false)::xs ->
 877         x::aux started xs
 878     | ((Cocci2("\n",_,_,_)) as x)::xs ->
 879             (* dont inline in expr because of weird eval order of ocaml *)
 880         let s = !_current_tabbing in
 881         x::Cocci2 (s,-1,-1,-1)::aux started xs
 882     | x::xs -> x::aux started xs in
 883   aux false xs
 884
 885
 886 let rec find_paren_comma = function
 887   | [] -> ()
 888
 889   (* do nothing if was like this in original file *)
 890   | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
 891     ::xs when p2 =|= p1 + 1 ->
 892       find_paren_comma (x2::xs)
 893
 894   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
 895     ::xs when p2 =|= p1 + 1 ->
 896       find_paren_comma (x2::xs)
 897
 898   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
 899     ::xs when p2 =|= p1 + 1 ->
 900       find_paren_comma (x2::xs)
 901
 902   (* otherwise yes can adjust *)
 903   | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
 904       x2.remove <- true;
 905       find_paren_comma (x2::xs)
 906   | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
 907       x1.remove <- true;
 908       find_paren_comma (x2::xs)
 909
 910   | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
 911       x1.remove <- true;
 912       find_paren_comma (x2::xs)
 913
 914   | x::xs ->
 915       find_paren_comma xs
 916
 917
 918 let fix_tokens toks =
 919   let toks = toks +> List.map mk_token_extended in
 920
 921   let cleaner = toks +> Common.exclude (function
 922     | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
 923     | _ -> false
 924   ) in
 925   find_paren_comma cleaner;
 926
 927   let toks = rebuild_tokens_extented toks in
 928   toks +> List.map (fun x -> x.tok2)
 929
 930
 931
 932 (*****************************************************************************)
 933 (* Final unparsing (and debugging support) *)
 934 (*****************************************************************************)
 935
 936 (* for debugging *)
 937 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
 938
 939 let kind_of_token2 = function
 940   | Fake2 -> KFake
 941   | Cocci2 _ -> KCocci
 942   | C2 _ -> KC
 943   | T2 (t,_,_) ->
 944       (match TH.pinfo_of_tok t with
 945       | ExpandedTok _ -> KExpanded
 946       | OriginTok _ -> KOrigin
 947       | FakeTok _ -> raise Impossible (* now a Fake2 *)
 948       | AbstractLineTok _ -> raise Impossible (* now a KC *)
 949       )
 950   | Unindent_cocci2 _ | Indent_cocci2 -> raise Impossible
 951
 952 let end_mark = "!"
 953
 954 let start_mark = function
 955   | KFake -> "!F!"
 956   | KCocci -> "!S!"
 957   | KC -> "!A!"
 958   | KExpanded -> "!E!"
 959   | KOrigin -> ""
 960
 961 let print_all_tokens2 pr xs =
 962   if !Flag_parsing_c.debug_unparsing
 963   then
 964     let current_kind = ref KOrigin in
 965     xs +> List.iter (fun t ->
 966       let newkind = kind_of_token2 t in
 967       if newkind =*= !current_kind
 968       then pr (str_of_token2 t)
 969       else begin
 970         pr (end_mark);
 971         pr (start_mark newkind);
 972         pr (str_of_token2 t);
 973         current_kind := newkind
 974       end
 975     );
 976   else
 977     xs +> List.iter (fun x -> pr (str_of_token2 x))
 978
 979
 980
 981
 982 (*****************************************************************************)
 983 (* Entry points *)
 984 (*****************************************************************************)
 985
 986 (* old: PPviatok was made in the beginning to allow to pretty print a
 987  * complete C file, including a modified C file by transformation.ml,
 988  * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
 989  * maybe flow_to_ast) all the cases. Indeed we don't need to do some
 990  * fancy stuff when a function was not modified at all. Just need to
 991  * print the list of token as-is. But now pretty_print_c.ml handles
 992  * almost everything so maybe less useful. Maybe PPviatok allows to
 993  * optimize a little the pretty printing.
 994  *
 995  * update: now have PPviastr which goes even faster than PPviatok, so
 996  * PPviatok has disappeared.
 997  *)
 998
 999 type ppmethod = PPnormal | PPviastr
1000
1001
1002
1003
1004 (* The pp_program function will call pretty_print_c.ml with a special
1005  * function to print the leaf components, the tokens. When we want to
1006  * print a token, we need to print also maybe the space and comments that
1007  * were close to it in the original file (and that was omitted during the
1008  * parsing phase), and honor what the cocci-info attached to the token says.
1009  * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1010  * print it and also print some cocci-code attached in a PLUS to it.
1011  * So we will also maybe call unparse_cocci. Because the cocci-code may
1012  * contain metavariables, unparse_cocci will in fact sometimes call back
1013  * pretty_print_c (which will this time don't call back again unparse_cocci)
1014  *)
1015
1016 let pp_program2 xs outfile  =
1017   Common.with_open_outfile outfile (fun (pr,chan) ->
1018     let pr s =
1019       if !Flag_parsing_c.debug_unparsing
1020       then begin pr2_no_nl s; flush stderr end
1021       else pr s
1022         (* flush chan; *)
1023         (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1024     in
1025
1026     xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
1027       (* here can still work on ast *)
1028       let e = remove_useless_fakeInfo_struct e in
1029
1030       match ppmethod with
1031       | PPnormal ->
1032           (* now work on tokens *)
1033
1034           (* phase1: just get all the tokens, all the information *)
1035           assert(toks_e +> List.for_all (fun t ->
1036             TH.is_origin t or TH.is_expanded t
1037           ));
1038           let toks = get_fakeInfo_and_tokens e toks_e in
1039           let toks = displace_fake_nodes toks in
1040           (* assert Origin;ExpandedTok;Faketok *)
1041           let toks = expand_mcode toks in
1042           (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1043            * and no tag information, just NOTHING. *)
1044
1045           let toks =
1046             if !Flag.sgrep_mode2
1047             then drop_minus toks (* nothing to do for sgrep *)
1048             else
1049               (* phase2: can now start to filter and adjust *)
1050               let toks = adjust_indentation toks in
1051               let toks = adjust_before_semicolon toks in(*before remove minus*)
1052               let toks = remove_minus_and_between_and_expanded_and_fake toks in
1053               (* assert Origin + Cocci + C and no minus *)
1054               let toks = add_space toks in
1055               let toks = fix_tokens toks in
1056               toks in
1057
1058           (* in theory here could reparse and rework the ast! or
1059            * apply some SP. Not before cos julia may have generated
1060            * not parsable file. Need do unparsing_tricks call before being
1061            * ready to reparse. *)
1062           print_all_tokens2 pr toks;
1063
1064       | PPviastr -> pr str
1065     )
1066   )
1067
1068 let pp_program a b =
1069   Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1070
1071
1072 let pp_program_default xs outfile =
1073   let xs' = xs +> List.map (fun x -> x, PPnormal) in
1074   pp_program xs' outfile