parsing_c/unparse_c.ml

   1 (* Yoann Padioleau, Julia Lawall
   2  *
   3  * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
   4  * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
   5  *
   6  * This program is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU General Public License (GPL)
   8  * version 2 as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * file license.txt for more details.
  14  *
  15  *
  16  * Modifications by Julia Lawall for better newline handling.
  17  *)
  18 open Common
  19
  20 open Ast_c
  21
  22 module TH = Token_helpers
  23
  24
  25 (* should keep comments and directives in between adjacent deleted terms,
  26 but not comments and directives within deleted terms.  should use the
  27 labels found in the control-flow graph *)
  28
  29
  30
  31 (*****************************************************************************)
  32 (* Wrappers *)
  33 (*****************************************************************************)
  34 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
  35
  36 (*****************************************************************************)
  37 (* Types used during the intermediate phases of the unparsing *)
  38 (*****************************************************************************)
  39
  40 type token1 =
  41   | Fake1 of info
  42   | T1 of Parser_c.token
  43
  44 (* The cocci_tag of the token should always be a NOTHING. The mark of
  45  * the token can only be OriginTok or ExpandedTok. Why not get rid of
  46  * token and get something simpler ? because we need to know if the
  47  * info is a TCommentCpp or TCommentSpace, etc for some of the further
  48  * analysis so easier to keep with the token.
  49  *
  50  * This type contains the whole information. Have all the tokens with this
  51  * type.
  52  *)
  53 type min =
  54     Min of (int list (* match numbers from witness trees *) *
  55               Ast_cocci.adjacency (* adjacency information *))
  56   | Ctx
  57
  58 type token2 =
  59   | T2 of Parser_c.token * min *
  60           int option (* orig index, abstracting away comments and space *)
  61   | Fake2 of min
  62   | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
  63         * Unparse_cocci.nlhint option
  64   | C2 of string
  65   | Comma of string
  66   | Indent_cocci2
  67   | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
  68
  69 (* not used yet *)
  70 type token3 =
  71   | T3 of Parser_c.token
  72   | Cocci3 of string
  73   | C3 of string
  74
  75
  76 (* similar to the tech in parsing_hack *)
  77 type token_extended = {
  78   tok2 : token2;
  79   str  : string;
  80   idx: int option; (* to know if 2 tokens were consecutive in orig file *)
  81   mutable new_tokens_before : token2 list;
  82   mutable remove : bool;
  83 }
  84
  85
  86 (*****************************************************************************)
  87 (* Helpers *)
  88 (*****************************************************************************)
  89
  90 let info_of_token1 t =
  91   match t with
  92   | Fake1 info -> info
  93   | T1 tok -> TH.info_of_tok tok
  94
  95 let print_token1 = function
  96     T1 tok -> TH.str_of_tok tok
  97   | Fake1 info -> "fake"
  98
  99 let str_of_token2 = function
 100   | T2 (t,_,_) -> TH.str_of_tok t
 101   | Fake2 _ -> ""
 102   | Cocci2 (s,_,_,_,_) -> s
 103   | C2 s -> s
 104   | Comma s -> s
 105   | Indent_cocci2 -> ""
 106   | Unindent_cocci2 _ -> ""
 107
 108 let print_token2 = function
 109   | T2 (t,b,_) ->
 110       let t_str =
 111         match t with
 112         | Parser_c.TCommentSpace _ -> " sp "
 113         | Parser_c.TCommentNewline _ -> " nl "
 114         | Parser_c.TCommentCpp _ -> " cp "
 115         | Parser_c.TCommentMisc _ -> " misc "
 116         | Parser_c.TComment _ -> " comment "
 117         | _ -> "" in
 118       let b_str =
 119         match b with
 120           Min (index,adj) ->
 121             Printf.sprintf "-%d[%s]"
 122               (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
 123               (String.concat " " (List.map string_of_int index))
 124         | Ctx -> "" in
 125       "T2:"^b_str^t_str^TH.str_of_tok t
 126   | Fake2 b ->
 127       let b_str =
 128         match b with
 129           Min (index,adj) ->
 130             Printf.sprintf "-%d[%s]"
 131               (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
 132               (String.concat " " (List.map string_of_int index))
 133         | Ctx -> "" in
 134       b_str^"fake"
 135   | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
 136   | C2 s -> "C2:"^s
 137   | Comma s -> "Comma:"^s
 138   | Indent_cocci2 -> "Indent"
 139   | Unindent_cocci2 _ -> "Unindent"
 140
 141 let simple_print_all_tokens1 l =
 142   List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l;
 143   Printf.printf "\n"
 144
 145 let simple_print_all_tokens2 l =
 146   List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
 147   Printf.printf "\n"
 148
 149 let str_of_token3 = function
 150   | T3 t -> TH.str_of_tok t
 151   | Cocci3 s | C3 s -> s
 152
 153
 154
 155 let mk_token_extended x =
 156   let origidx =
 157     match x with
 158     | T2 (_,_, idx) -> idx
 159     | _ -> None
 160   in
 161   { tok2 = x;
 162     str = str_of_token2 x;
 163     idx = origidx;
 164     new_tokens_before = [];
 165     remove = false;
 166   }
 167
 168 let rebuild_tokens_extented toks_ext =
 169   let _tokens = ref [] in
 170   toks_ext +> List.iter (fun tok ->
 171     tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
 172     if not tok.remove then push2 tok.tok2 _tokens;
 173   );
 174   let tokens = List.rev !_tokens in
 175   (tokens +> List.map mk_token_extended)
 176
 177
 178 let mcode_contain_plus = function
 179   | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
 180   | Ast_cocci.CONTEXT _ -> true
 181 (* patch: when need full coccinelle transformation *)
 182   | Ast_cocci.MINUS (_,_,_,Ast_cocci.NOREPLACEMENT) -> false
 183   | Ast_cocci.MINUS (_,_,_,Ast_cocci.REPLACEMENT _) -> true(*REPL is not empty*)
 184   | Ast_cocci.PLUS _ -> raise Impossible
 185
 186 let contain_plus info =
 187   let mck = Ast_c.mcode_of_info info in
 188   mcode_contain_plus mck
 189
 190 (*****************************************************************************)
 191 (* Last fix on the ast *)
 192 (*****************************************************************************)
 193
 194 (* Because of the ugly trick to handle initialiser, I generate fake ','
 195  * for the last initializer element, but if there is nothing around it,
 196  * I don't want in the end to print it.
 197  *)
 198
 199 let remove_useless_fakeInfo_struct program =
 200   let bigf = { Visitor_c.default_visitor_c_s with
 201     Visitor_c.kini_s = (fun (k,bigf) ini ->
 202       match k ini with
 203       | InitList args, ii ->
 204           (match ii with
 205           | [_i1;_i2] -> ini
 206           | [i1;i2;iicommaopt] ->
 207               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 208                  && (Ast_c.is_fake iicommaopt)
 209                  (* sometimes the guy put a normal iicommaopt *)
 210               then InitList args, [i1;i2]
 211               else InitList args, [i1;i2;iicommaopt]
 212           | [i1;i2;iicommaopt;end_comma_opt] ->
 213               (* only in #define. end_comma_opt canot be fake *)
 214               (* not sure if this will be considered ambiguous with a previous
 215                  case? *)
 216               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 217                  && (Ast_c.is_fake iicommaopt)
 218                  (* sometimes the guy put a normal iicommaopt *)
 219               then InitList args, [i1;i2;end_comma_opt]
 220               else InitList args, [i1;i2;iicommaopt;end_comma_opt]
 221           | _ -> raise Impossible
 222           )
 223       | x -> x
 224     )
 225   } in
 226   Visitor_c.vk_toplevel_s bigf program
 227
 228
 229 (*****************************************************************************)
 230 (* Tokens1 generation *)
 231 (*****************************************************************************)
 232
 233 let get_fakeInfo_and_tokens celem toks =
 234   let toks_in  = ref toks in
 235   let toks_out = ref [] in
 236
 237   (* todo? verify good order of position ? *)
 238   let pr_elem info =
 239     match Ast_c.pinfo_of_info info with
 240     | FakeTok _ ->
 241         Common.push2 (Fake1 info) toks_out
 242     | OriginTok _ | ExpandedTok _ ->
 243         (* get the associated comments/space/cppcomment tokens *)
 244         let (before, x, after) =
 245           !toks_in +> Common.split_when (fun tok ->
 246           info =*= TH.info_of_tok tok)
 247         in
 248         assert(info =*= TH.info_of_tok x);
 249         (*old: assert(before +> List.for_all (TH.is_comment)); *)
 250         before +> List.iter (fun x ->
 251           if not (TH.is_comment x)
 252           then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
 253           (* case such as  int asm d3("x"); not yet in ast *)
 254         );
 255         before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
 256         push2 (T1 x) toks_out;
 257         toks_in := after;
 258     | AbstractLineTok _ ->
 259         (* can be called on type info when for instance use -type_c *)
 260         if !Flag_parsing_c.pretty_print_type_info
 261         then Common.push2 (Fake1 info) toks_out
 262         else raise Impossible (* at this stage *)
 263   in
 264
 265   let pr_space _ = () in (* use the spacing that is there already *)
 266
 267   Pretty_print_c.pp_program_gen pr_elem pr_space celem;
 268
 269   if not (null !toks_in)
 270   then failwith "WEIRD: unparsing not finished";
 271
 272   List.rev !toks_out
 273
 274 (* Fake nodes that have BEFORE code or are - should be moved over any subsequent
 275 whitespace and newlines, but not any comments, to get as close to the affected
 276 code as possible.  Similarly, fake nodes that have AFTER code should be moved
 277 backwards.  No fake nodes should have both before and after code. *)
 278
 279 let displace_fake_nodes toks =
 280   let is_fake = function Fake1 _ -> true | _ -> false in
 281   let is_whitespace = function
 282       T1(Parser_c.TCommentSpace _)
 283       (* patch: cocci    *)
 284     | T1(Parser_c.TCommentNewline _) -> true
 285     | _ -> false in
 286   let rec loop toks =
 287     let fake_info =
 288       try Some (Common.split_when is_fake toks)
 289       with Not_found -> None in
 290     match fake_info with
 291       Some(bef,((Fake1 info) as fake),aft) ->
 292         (match !(info.cocci_tag) with
 293         | Some x ->
 294           (match x with
 295             (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_)
 296           | (Ast_cocci.MINUS(_,_,_,Ast_cocci.REPLACEMENT _),_) ->
 297             (* move the fake node forwards *)
 298               let (whitespace,rest) = Common.span is_whitespace aft in
 299               bef @ whitespace @ fake :: (loop rest)
 300           | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
 301             (* move the fake node backwards *)
 302               let revbef = List.rev bef in
 303               let (revwhitespace,revprev) = Common.span is_whitespace revbef in
 304               let whitespace = List.rev revwhitespace in
 305               let prev = List.rev revprev in
 306               prev @ fake :: (loop (whitespace @ aft))
 307           | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
 308               bef @ fake :: (loop aft)
 309           | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
 310               failwith "fake node should not be before-after"
 311           | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
 312         )
 313         | None ->
 314             bef @ fake :: (loop aft)
 315         )
 316     | None -> toks
 317     | _ -> raise Impossible in
 318   loop toks
 319
 320 (*****************************************************************************)
 321 (* Tokens2 generation *)
 322 (*****************************************************************************)
 323
 324 let comment2t2 = function
 325     (Token_c.TCommentCpp
 326        (* not sure iif the following list is exhaustive or complete *)
 327        (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
 328      (info : Token_c.info)) ->
 329       C2(info.Common.str)
 330   | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
 331       C2("\n"^info.Common.str^"\n")
 332   | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
 333
 334 let expand_mcode toks =
 335   let toks_out = ref [] in
 336
 337   let index = ref 0 in
 338
 339   let add_elem t minus =
 340     match t with
 341     | Fake1 info ->
 342         let str = Ast_c.str_of_info info in
 343         if str =$= ""
 344         then push2 (Fake2 minus) toks_out
 345         (* fx the fake "," at the end of a structure or enum.
 346            no idea what other fake info there can be... *)
 347         else push2 (Comma str) toks_out
 348
 349
 350     | T1 tok ->
 351         (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
 352         (* no tag on expandedTok ! *)
 353         let modified = function
 354             None -> false
 355           | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
 356           | _ -> true in
 357         (if (TH.is_expanded tok &&
 358              modified !((TH.info_of_tok tok).cocci_tag)
 359             (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
 360         then
 361           failwith
 362             (Printf.sprintf
 363                "expanded token %s on line %d is either modified or stored in a metavariable"
 364                (TH.str_of_tok tok) (TH.line_of_tok tok)));
 365
 366         let tok' = tok +> TH.visitor_info_of_tok (fun i ->
 367           { i with cocci_tag = ref Ast_c.emptyAnnot; }
 368         ) in
 369
 370         let optindex =
 371           if TH.is_origin tok && not (TH.is_real_comment tok)
 372           then begin
 373               incr index;
 374               Some !index
 375           end
 376           else None
 377         in
 378
 379         push2 (T2 (tok', minus, optindex)) toks_out
 380   in
 381
 382   let expand_info t =
 383     let (mcode,env) =
 384       Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
 385
 386     let pr_cocci s ln col rcol hint =
 387       push2 (Cocci2(s,ln,col,rcol,hint)) toks_out  in
 388     let pr_c info =
 389       (match Ast_c.pinfo_of_info info with
 390         Ast_c.AbstractLineTok _ ->
 391           push2 (C2 (Ast_c.str_of_info info)) toks_out
 392       | Ast_c.FakeTok (s,_) ->
 393           push2 (C2 s) toks_out
 394       | _ ->
 395           Printf.printf "line: %s\n" (Common.dump info);
 396           failwith "not an abstract line");
 397       (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
 398       List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
 399
 400     let pr_barrier ln col = (* marks a position, used around C code *)
 401       push2 (Cocci2("",ln,col,col,None)) toks_out  in
 402     let pr_nobarrier ln col = () in (* not needed for linux spacing *)
 403
 404     let pr_cspace _ = push2 (C2 " ") toks_out in
 405
 406     let pr_space _ = () (* rely on add_space in cocci code *) in
 407     let pr_arity _ = () (* not interested *) in
 408
 409     let indent _   = push2 Indent_cocci2 toks_out in
 410     let unindent x = push2 (Unindent_cocci2 x) toks_out in
 411
 412     let args_pp =
 413       (env, pr_cocci, pr_c, pr_cspace,
 414        (match !Flag_parsing_c.spacing with
 415          Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
 416        pr_arity,
 417        (match !Flag_parsing_c.spacing with
 418          Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
 419        indent, unindent) in
 420
 421     (* old: when for yacfe with partial cocci:
 422      *    add_elem t false;
 423     *)
 424
 425     (* patch: when need full coccinelle transformation *)
 426     let unparser = Unparse_cocci.pp_list_list_any args_pp false in
 427     match mcode with
 428     | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
 429         (* Why adding ? because I want to have all the information, the whole
 430          * set of tokens, so I can then process and remove the
 431          * is_between_two_minus for instance *)
 432         add_elem t (Min (inst,adj));
 433         (match any_xxs with
 434           Ast_cocci.NOREPLACEMENT -> ()
 435         | Ast_cocci.REPLACEMENT(any_xxs,_) ->
 436             unparser any_xxs Unparse_cocci.InPlace)
 437     | Ast_cocci.CONTEXT (_,any_befaft) ->
 438         (match any_befaft with
 439         | Ast_cocci.NOTHING ->
 440             add_elem t Ctx
 441         | Ast_cocci.BEFORE (xxs,_) ->
 442             unparser xxs Unparse_cocci.Before;
 443             add_elem t Ctx
 444         | Ast_cocci.AFTER (xxs,_) ->
 445             add_elem t Ctx;
 446             unparser xxs Unparse_cocci.After;
 447         | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
 448             unparser xxs Unparse_cocci.Before;
 449             add_elem t Ctx;
 450             unparser yys Unparse_cocci.After;
 451         )
 452     | Ast_cocci.PLUS _ -> raise Impossible
 453
 454   in
 455
 456   toks +> List.iter expand_info;
 457   List.rev !toks_out
 458
 459
 460 (*****************************************************************************)
 461 (* Tokens2 processing, filtering, adjusting *)
 462 (*****************************************************************************)
 463
 464 let is_space = function
 465   | T2(Parser_c.TCommentSpace _,_b,_i) -> true  (* only whitespace *)
 466   | _ -> false
 467
 468 let is_newline = function
 469   | T2(Parser_c.TCommentNewline _,_b,_i) -> true
 470   | _ -> false
 471
 472 let is_whitespace = function
 473   | (T2 (t,_b,_i)) ->
 474       (match t with
 475       | Parser_c.TCommentSpace _ -> true  (* only whitespace *)
 476       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 477       | _ -> false
 478       )
 479   | _ -> false
 480
 481 let is_minusable_comment = function
 482   | (T2 (t,_b,_i)) ->
 483       (match t with
 484       | Parser_c.TCommentSpace _   (* only whitespace *)
 485       (* patch: coccinelle *)
 486       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 487       | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
 488       | Parser_c.TComment _
 489       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 490       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 491       | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
 492         -> true
 493
 494       | Parser_c.TCommentMisc _
 495       | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
 496         -> false
 497
 498       | _ -> false
 499       )
 500   | _ -> false
 501
 502 let is_minusable_comment_nocpp = function
 503   | (T2 (t,_b,_i)) ->
 504       (match t with
 505       | Parser_c.TCommentSpace _   (* only whitespace *)
 506       (* patch: coccinelle *)
 507       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 508       | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
 509       | Parser_c.TComment _ -> true
 510       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 511       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 512       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
 513         -> false
 514
 515       | Parser_c.TCommentMisc _
 516       | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
 517         -> false
 518
 519       | _ -> false
 520       )
 521   | _ -> false
 522
 523 let all_coccis = function
 524     Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ -> true
 525   | _ -> false
 526
 527 (*previously gave up if the first character was a newline, but not clear why*)
 528 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
 529
 530 let set_minus_comment adj = function
 531   | T2 (t,Ctx,idx) ->
 532       let str = TH.str_of_tok t in
 533       (match t with
 534       | Parser_c.TCommentSpace _
 535 (* patch: coccinelle *)
 536       | Parser_c.TCommentNewline _ -> ()
 537
 538       | Parser_c.TComment _
 539       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 540       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 541       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
 542         ->
 543           pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
 544                  (TH.line_of_tok t) str)
 545       | _ -> raise Impossible
 546       );
 547       T2 (t, Min adj, idx)
 548 (* patch: coccinelle *)
 549   | T2 (t,Min adj,idx) as x -> x
 550   | Fake2 _ as x -> x
 551   | _ -> raise Impossible
 552
 553 let set_minus_comment_or_plus adj = function
 554     Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x
 555   | x -> set_minus_comment adj x
 556
 557 let drop_minus xs =
 558   xs +> Common.exclude (function
 559     | T2 (t,Min adj,_) -> true
 560     | _ -> false
 561   )
 562
 563 let drop_expanded xs =
 564   xs +> Common.exclude (function
 565     | T2 (t,_,_) when TH.is_expanded t -> true
 566     | _ -> false
 567   )
 568
 569 let drop_fake xs =
 570   xs +> Common.exclude (function
 571     | Fake2 _ -> true
 572     | _ -> false
 573   )
 574
 575 let remove_minus_and_between_and_expanded_and_fake xs =
 576
 577   (* get rid of expanded tok *)
 578   let xs = drop_expanded xs in
 579
 580   let minus_or_comment = function
 581       T2(_,Min adj,_) -> true
 582     | x -> is_minusable_comment x in
 583
 584   let minus_or_comment_nocpp = function
 585       T2(_,Min adj,_) -> true
 586     | x -> is_minusable_comment_nocpp x in
 587
 588   let common_adj (index1,adj1) (index2,adj2) =
 589     let same_adj = (* same adjacency info *)
 590       match (adj1,adj2) with
 591         (Ast_cocci.ADJ adj1,Ast_cocci.ADJ adj2) -> adj1 = adj2
 592       | (Ast_cocci.ALLMINUS,_) | (_,Ast_cocci.ALLMINUS) -> true in
 593     same_adj &&
 594     (* non-empty intersection of witness trees *)
 595     not ((Common.inter_set index1 index2) = []) in
 596
 597   (* new idea: collects regions not containing non-space context code
 598      if two adjacent adjacent minus tokens satisfy common_adj then delete
 599      all spaces, comments etc between them
 600      if two adjacent minus tokens do not satisfy common_adj only delete
 601      the spaces between them if there are no comments, etc.
 602      if the region contain no plus code and is both preceded and followed
 603      by a newline, delete the initial newline. *)
 604
 605   let rec adjust_around_minus = function
 606       [] -> []
 607     | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
 608       ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
 609         let (minus_list,rest) = Common.span not_context (t1::xs) in
 610         let contains_plus = List.exists is_plus minus_list in
 611         let x =
 612           match List.rev minus_list with
 613             (T2(Parser_c.TCommentNewline c,_b,_i))::rest
 614             when List.for_all minus_or_comment minus_list ->
 615               set_minus_comment_or_plus adj1 x
 616           | _ -> x in
 617         x :: adjust_within_minus contains_plus minus_list @
 618         adjust_around_minus rest
 619     | ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
 620         let (minus_list,rest) = Common.span not_context (t1::xs) in
 621         let contains_plus = List.exists is_plus minus_list in
 622         adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
 623     | x::xs ->
 624         x :: adjust_around_minus xs
 625   and adjust_within_minus cp (* contains plus *) = function
 626       ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
 627         let not_minus = function T2(_,Min _,_) -> false | _ -> true in
 628         let (not_minus_list,rest) = Common.span not_minus xs in
 629         t1 ::
 630         (match rest with
 631           ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs
 632           when common_adj adj1 adj2 ->
 633             (List.map (set_minus_comment_or_plus adj1) not_minus_list)
 634             @ (adjust_within_minus cp (t2::xs))
 635         | ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs ->
 636             if not cp && List.for_all is_whitespace not_minus_list
 637             then
 638               (List.map (set_minus_comment_or_plus adj1) not_minus_list)
 639               @ (adjust_within_minus cp (t2::xs))
 640             else
 641               not_minus_list @ (adjust_within_minus cp (t2::xs))
 642         | _ ->
 643             if cp
 644             then xs
 645             else
 646               (* remove spaces after removed stuff, eg a comma after a
 647                  function argument *)
 648               let (spaces,rest) = Common.span is_space xs in
 649               (List.map (set_minus_comment_or_plus adj1) spaces)
 650               @ rest)
 651     | xs -> failwith "should always start with minus"
 652   and not_context = function
 653       (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
 654     | _ -> true
 655   and is_plus = function
 656       C2 _ | Comma _ | Cocci2 _ -> true
 657     | _ -> false in
 658
 659   let xs = adjust_around_minus xs in
 660
 661   (* get rid of fake tok *)
 662   let xs = drop_fake xs in
 663
 664   (* this drops blank lines after a brace introduced by removing code *)
 665   let minus_or_comment_nonl = function
 666       T2(_,Min adj,_) -> true
 667     | T2(Parser_c.TCommentNewline _,_b,_i) -> false
 668     | x -> is_minusable_comment x in
 669
 670   let rec adjust_after_brace = function
 671       [] -> []
 672     | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
 673        when str_of_token2 x =$= "{" ->
 674          let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
 675          let is_whitespace = function
 676              T2(Parser_c.TCommentSpace _,_b,_i)
 677              (* patch: cocci    *)
 678            | T2(Parser_c.TCommentNewline _,_b,_i) -> true
 679            | _ -> false in
 680          let (newlines,rest) = Common.span is_whitespace rest in
 681          let (drop_newlines,last_newline) =
 682            let rec loop = function
 683                [] -> ([],[])
 684              | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
 685                  (List.rev rest,[x])
 686              | x::xs ->
 687                  let (drop_newlines,last_newline) = loop xs in
 688                  (drop_newlines,x::last_newline) in
 689            loop (List.rev newlines) in
 690          x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
 691          last_newline@
 692          adjust_after_brace rest
 693     | x::xs -> x::adjust_after_brace xs in
 694
 695   let xs = adjust_after_brace xs in
 696
 697   (* search backwards from context } over spaces until reaching a newline.
 698      then go back over all minus code until reaching some context or + code.
 699      get rid of all intervening spaces, newlines, and comments
 700      input is reversed *)
 701   let rec adjust_before_brace = function
 702       [] -> []
 703     | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
 704         let (outer_spaces,rest) = Common.span is_space xs in
 705         x :: outer_spaces @
 706         (match rest with
 707           ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
 708           (* the rest of this code is the same as from_newline below
 709              but merging them seems to be error prone... *)
 710           ((T2 (t, Min adj, idx)) as m) :: rest ->
 711             let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 712             h :: m ::
 713             (List.map (set_minus_comment adj) spaces) @
 714             (adjust_before_brace rest)
 715         | _ -> adjust_before_brace rest)
 716     | x::xs -> x :: (adjust_before_brace xs) in
 717
 718   let from_newline = function
 719       ((T2 (t, Min adj, idx)) as m) :: rest ->
 720         let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 721         m ::
 722         (List.map (set_minus_comment adj) spaces) @
 723         (adjust_before_brace rest)
 724     | ((T2 (t0, Ctx, idx0)) as m0) :: ((T2 (t, Min adj, idx)) as m) :: rest
 725         when TH.str_of_tok t0 = "" ->
 726           (* This is for the case of a #define that is completely deleted,
 727              because a #define has a strange EOL token at the end.
 728              We hope there i no other kind of token that is represented by
 729              "", but it seems like changing the kind of token might break
 730              the end of entity recognition in the C parser.
 731              See parsing_hacks.ml *)
 732           let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 733           m0 :: m ::
 734           (List.map (set_minus_comment adj) spaces) @
 735           (adjust_before_brace rest)
 736     | rest -> adjust_before_brace rest in
 737
 738   let xs = List.rev (from_newline (List.rev xs)) in
 739   let xs = drop_minus xs in
 740   xs
 741
 742 (* normally, in C code, a semicolon is not preceded by a space or newline *)
 743 let adjust_before_semicolon toks =
 744   let toks = List.rev toks in
 745   let rec search_semic = function
 746       [] -> []
 747     | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
 748         if List.mem (str_of_token2 x) [";";")";","]
 749         then x :: search_semic (search_minus false xs)
 750         else x :: search_semic xs
 751     | x::xs -> x :: search_semic xs
 752   and search_minus seen_minus xs =
 753     let (spaces, rest) = Common.span is_space xs in
 754     (* only delete spaces if something is actually deleted *)
 755     match rest with
 756       ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
 757     | _ -> if seen_minus then rest else xs in
 758   List.rev (search_semic toks)
 759
 760 (* normally, in C code, a ( is not followed by a space or newline *)
 761 let adjust_after_paren toks =
 762   let rec search_paren = function
 763       [] -> []
 764     | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
 765         if List.mem (str_of_token2 x) ["("] (* other things? *)
 766         then x :: search_paren(search_minus false xs)
 767         else x :: search_paren xs
 768     | x::xs -> x :: search_paren xs
 769   and search_minus seen_minus xs =
 770     let (spaces, rest) = Common.span is_whitespace xs in
 771     (* only delete spaces if something is actually deleted *)
 772     match rest with
 773       ((T2(_,Min _,_)) as a)::rerest -> (* minus *)
 774         a :: search_minus true rerest
 775     | ((T2(_,Ctx,_)) as a)::rerest when str_of_token2 a = "," ->
 776         (* comma after ( will be deleted, so consider it as minus code
 777            already *)
 778         a :: search_minus true rerest
 779     | _ -> if seen_minus then rest else xs in (* drop trailing space *)
 780   search_paren toks
 781
 782 (* this is for the case where braces are added around an if branch *)
 783 let paren_then_brace toks =
 784   let rec search_paren = function
 785       [] -> []
 786     | ((T2(_,Ctx,_)) as x)::xs ->
 787         if List.mem (str_of_token2 x) [")"]
 788         then x :: search_paren(search_plus xs)
 789         else x :: search_paren xs
 790     | x::xs -> x :: search_paren xs
 791   and search_plus xs =
 792     let (spaces, rest) = Common.span is_whitespace xs in
 793     match rest with
 794       (* move the brace up to the previous line *)
 795       ((Cocci2("{",_,_,_,_)) as x) :: (((Cocci2 _) :: _) as rest) ->
 796         (C2 " ") :: x :: spaces @ rest
 797     | _ -> xs in
 798   search_paren toks
 799
 800 let is_ident_like s = s ==~ Common.regexp_alpha
 801
 802 let rec drop_space_at_endline = function
 803     [] -> []
 804   | [x] -> [x]
 805   | (C2 " ") ::
 806     ((((T2(Parser_c.TCommentSpace _,Ctx,_)) | Cocci2("\n",_,_,_,_) |
 807     (T2(Parser_c.TCommentNewline _,Ctx,_))) :: _) as rest) ->
 808       (* when unparse_cocci doesn't know whether space is needed *)
 809       drop_space_at_endline rest
 810   | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest ->
 811       let (outer_spaces,rest) = Common.span is_space rest in
 812       let minus_or_comment_or_space_nocpp = function
 813           T2(_,Min adj,_) -> true
 814         | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true
 815         | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false
 816         | x -> false in
 817       let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in
 818       let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
 819       if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus
 820       then
 821         match rest with
 822           ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest ->
 823             (* drop trailing spaces *)
 824             minus@a::(drop_space_at_endline rest)
 825         | _ -> fail()
 826       else fail()
 827   | a :: rest ->
 828       a :: drop_space_at_endline rest
 829
 830 (* if a removed ( is between two tokens, then add a space *)
 831 let rec paren_to_space = function
 832     [] -> []
 833   | [x] -> [x]
 834   | [x;y] -> [x;y]
 835   | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest
 836     when not (is_whitespace a) && TH.str_of_tok t = "(" ->
 837       a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
 838   | a :: rest -> a :: (paren_to_space rest)
 839
 840 let rec add_space xs =
 841   match xs with
 842   | [] -> []
 843   | [x] -> [x]
 844   | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
 845     when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
 846       not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
 847         (* this only works within a line.  could consider whether
 848            something should be done to add newlines too, rather than
 849            printing them explicitly in unparse_cocci. *)
 850         x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
 851   | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
 852     when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
 853       not (lnx = -1) && lnx < lny && not (rcolx = -1) ->
 854         (* this only works within a line.  could consider whether
 855            something should be done to add newlines too, rather than
 856            printing them explicitly in unparse_cocci. *)
 857         x::C2 (String.make (lny-lnx) '\n')::
 858         C2 (String.make (lcoly-1) ' '):: (* -1 is for the + *)
 859         add_space (y::xs)
 860   | ((T2(_,Ctx,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *)
 861       let sx = str_of_token2 x in
 862       let sy = str_of_token2 y in
 863       if is_ident_like sx && (is_ident_like sy or List.mem sy ["="])
 864       then x::C2 " "::(add_space (y::xs))
 865       else x::(add_space (y::xs))
 866   | x::y::xs -> (* not boundary, not sure if it is possible *)
 867       let sx = str_of_token2 x in
 868       let sy = str_of_token2 y in
 869       if is_ident_like sx && is_ident_like sy
 870       then x::C2 " "::(add_space (y::xs))
 871       else x::(add_space (y::xs))
 872
 873 (* A fake comma is added at the end of an unordered initlist or a enum
 874 decl, if the initlist or enum doesn't already end in a comma.  This is only
 875 needed if there is + code, ie if we see Cocci after it in the code sequence *)
 876
 877 let rec drop_end_comma = function
 878     [] -> []
 879   | [x] -> [x]
 880   | ((Comma ",") as x) :: rest ->
 881       let (newlines,rest2) = Common.span is_whitespace rest in
 882       (match rest2 with
 883         (Cocci2 _) :: _ -> x :: drop_end_comma rest
 884       | _ -> drop_end_comma rest)
 885   | x :: xs -> x :: drop_end_comma xs
 886
 887 (* The following only works for the outermost function call.  Stack records
 888 the column of all open parentheses.  Space_cell contains the most recent
 889 comma in the outermost function call.  The goal is to decide whether this
 890 should be followed by a space or a newline and indent. *)
 891 let add_newlines toks tabbing_unit =
 892   let create_indent n =
 893     let (tu,tlen) =
 894       match tabbing_unit with
 895         Some ("\t",_) -> ("\t",8)
 896       | Some ("",_) -> ("\t",8) (* not sure why... *)
 897       | Some (s,_) -> (s,String.length s) (* assuming only spaces *)
 898       | None -> ("\t",8) in
 899     let rec loop seen =
 900       if seen + tlen <= n
 901       then tu ^ loop (seen + tlen)
 902       else String.make (n-seen) ' ' in
 903     loop 0 in
 904   let check_for_newline count x = function
 905       Some (start,space_cell) when count > Flag_parsing_c.max_width ->
 906         space_cell := "\n"^(create_indent x);
 907         Some (x + (count - start))
 908     | _ -> None in
 909   (* the following is for strings that may contain newline *)
 910   let string_length s count =
 911     let l = list_of_string s in
 912     List.fold_left
 913       (function count ->
 914         function
 915             '\t' -> count + 8
 916           | '\n' -> 0
 917           | c -> count + 1)
 918       count l in
 919   let rec loop info count = function
 920       [] -> []
 921     | ((T2(tok,_,_)) as a)::xs ->
 922         a :: loop info (string_length (TH.str_of_tok tok) count) xs
 923     | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
 924         let (stack,space_cell) = info in
 925         let rest =
 926           match hint with
 927             None -> loop info (string_length s count) xs
 928           | Some Unparse_cocci.StartBox ->
 929               let count = string_length s count in
 930               loop (count::stack,space_cell) count xs
 931           | Some Unparse_cocci.EndBox ->
 932               let count = string_length s count in
 933               (match stack with
 934                 [x] ->
 935                   (match check_for_newline count x space_cell with
 936                     Some count -> loop ([],None) count xs
 937                   | None -> loop ([],None) count xs)
 938               | _ -> loop (List.tl stack,space_cell) count xs)
 939           | Some (Unparse_cocci.SpaceOrNewline sp) ->
 940               let count = string_length s (count + 1 (*space*)) in
 941               (match stack with
 942                 [x] ->
 943                   (match check_for_newline count x space_cell with
 944                     Some count -> loop (stack,Some (x,sp)) count xs
 945                   | None -> loop (stack,Some (count,sp)) count xs)
 946               | _ -> loop info count xs) in
 947         a :: rest
 948     | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
 949     | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
 950     | Fake2 _ :: _ | Indent_cocci2 :: _
 951     | Unindent_cocci2 _::_ ->
 952         failwith "unexpected fake, indent, or unindent" in
 953   let redo_spaces prev = function
 954       Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
 955         C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
 956     | t -> t::prev in
 957   (match !Flag_parsing_c.spacing with
 958     Flag_parsing_c.SMPL -> toks
 959   | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)))
 960
 961 (* When insert some new code, because of a + in a SP, we must add this
 962  * code at the right place, with the good indentation. So each time we
 963  * encounter some spacing info, with some newline, we maintain the
 964  * current indentation level used.
 965  *
 966  * TODO problems: not accurate. ex: TODO
 967  *
 968  * TODO: if in #define region, should add a \ \n
 969  *)
 970 let new_tabbing2 space =
 971   (list_of_string space)
 972     +> List.rev
 973     +> Common.take_until (fun c -> c =<= '\n')
 974     +> List.rev
 975     +> List.map string_of_char
 976     +> String.concat ""
 977
 978 let new_tabbing a =
 979   Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
 980
 981
 982 let rec adjust_indentation xs =
 983
 984   let _current_tabbing = ref ([] : string list) in
 985   let tabbing_unit = ref None in
 986
 987   let string_of_list l = String.concat "" (List.map string_of_char l) in
 988
 989   (* try to pick a tabbing unit for the plus code *)
 990   let adjust_tabbing_unit old_tab new_tab =
 991     if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
 992     then
 993       let old_tab = list_of_string old_tab in
 994       let new_tab = list_of_string new_tab in
 995       let rec loop = function
 996           ([],new_tab) ->
 997             tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
 998         | (_,[]) -> failwith "not possible"
 999         | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
1000       loop (old_tab,new_tab) in
1001
1002 (*
1003   let remtab tu current_tab =
1004     let current_tab = List.rev(list_of_string current_tab) in
1005     let rec loop = function
1006         ([],new_tab) -> string_of_list (List.rev new_tab)
1007       | (_,[]) -> (-*weird; tabbing unit used up more than the current tab*-)
1008         ""
1009       | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
1010       | (_,ns) -> (-* mismatch; remove what we can *-)
1011           string_of_list (List.rev ns) in
1012     loop (tu,current_tab) in
1013 *)
1014
1015   let rec find_first_tab started = function
1016       [] -> ()
1017     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
1018         find_first_tab true xs
1019 (* patch: coccinelle *)
1020     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
1021       when started ->
1022         let s = str_of_token2 x +> new_tabbing in
1023         tabbing_unit := Some (s,List.rev (list_of_string s))
1024     | x::xs -> find_first_tab started xs in
1025   find_first_tab false xs;
1026
1027   let rec balanced ct = function
1028       [] -> ct >= 0
1029     | ((T2(tok,_,_)) as x)::xs ->
1030         (match str_of_token2 x with
1031           "(" -> balanced (ct+1) xs
1032         | ")" -> balanced (ct-1) xs
1033         | _ -> balanced ct xs)
1034     | x::xs -> balanced ct xs in
1035
1036   let rec aux started xs =
1037     match xs with
1038     | [] ->  []
1039 (* patch: coccinelle *)
1040     | (T2 (Parser_c.TCommentNewline _,_,_))::Unindent_cocci2(false)::xs
1041     | (Cocci2("\n",_,_,_,_))::Unindent_cocci2(false)::xs ->
1042         (C2 "\n")::aux started xs
1043     | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
1044       ((Cocci2 ("{",_,_,_,_)) as a)::xs
1045       when started && str_of_token2 x =$= ")" ->
1046         (* to be done for if, etc, but not for a function header *)
1047         x::(C2 " ")::a::(aux started xs)
1048     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs
1049       when balanced 0 (fst(Common.span (function x -> not(is_newline x)) xs)) ->
1050         let old_tabbing = !_current_tabbing in
1051         str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := [s]);
1052         (* only trust the indentation after the first { *)
1053         (if started
1054         then
1055           adjust_tabbing_unit
1056             (String.concat "" old_tabbing)
1057             (String.concat "" !_current_tabbing));
1058         let coccis_rest = Common.span all_coccis xs in
1059         (match coccis_rest with
1060           (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
1061             (* the case where cocci code has been added before a close } *)
1062             x::aux started (Indent_cocci2::xs)
1063         | _ -> x::aux started xs)
1064     | Indent_cocci2::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
1065       when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL ->
1066         let tu = String.make (lcoly-1) ' ' in
1067         _current_tabbing := tu::(!_current_tabbing);
1068         C2 (tu)::aux started (y::xs)
1069     | Indent_cocci2::xs ->
1070         (match !tabbing_unit with
1071           None -> aux started xs
1072         | Some (tu,_) ->
1073             _current_tabbing := tu::(!_current_tabbing);
1074              (* can't be C2, for later phases *)
1075              Cocci2 (tu,-1,-1,-1,None)::aux started xs)
1076     | Unindent_cocci2(permanent)::xs ->
1077         (match !_current_tabbing with
1078           [] -> aux started xs
1079         | _::new_tabbing ->
1080             _current_tabbing := new_tabbing;
1081             aux started xs)
1082     (* border between existing code and cocci code *)
1083     | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
1084       when str_of_token2 x =$= "{" ->
1085         x::aux true (y::Indent_cocci2::xs)
1086     | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
1087       when str_of_token2 y =$= "}" ->
1088         x::aux started (y::Unindent_cocci2 true::xs)
1089     (* starting the body of the function *)
1090     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->  x::aux true xs
1091     | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
1092     | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
1093             (* dont inline in expr because of weird eval order of ocaml *)
1094         let s = String.concat "" !_current_tabbing in
1095         (* can't be C2, for later phases *)
1096         x::Cocci2 (s,-1,-1,-1,None)::aux started xs
1097     | x::xs -> x::aux started xs in
1098   (aux false xs,!tabbing_unit)
1099
1100
1101 let rec find_paren_comma = function
1102   | [] -> ()
1103
1104   (* do nothing if was like this in original file *)
1105   | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
1106     ::xs when p2 =|= p1 + 1 ->
1107       find_paren_comma (x2::xs)
1108
1109   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
1110     ::xs when p2 =|= p1 + 1 ->
1111       find_paren_comma (x2::xs)
1112
1113   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
1114     ::xs when p2 =|= p1 + 1 ->
1115       find_paren_comma (x2::xs)
1116
1117   (* otherwise yes can adjust *)
1118   | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
1119       x2.remove <- true;
1120       find_paren_comma (x2::xs)
1121   | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
1122       x1.remove <- true;
1123       find_paren_comma (x2::xs)
1124
1125   | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
1126       x1.remove <- true;
1127       find_paren_comma (x2::xs)
1128
1129   | x::xs ->
1130       find_paren_comma xs
1131
1132
1133 let fix_tokens toks =
1134   let toks = toks +> List.map mk_token_extended in
1135
1136   let cleaner = toks +> Common.exclude (function
1137     | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
1138     | _ -> false
1139   ) in
1140   find_paren_comma cleaner;
1141
1142   let toks = rebuild_tokens_extented toks in
1143   toks +> List.map (fun x -> x.tok2)
1144
1145
1146
1147 (*****************************************************************************)
1148 (* Final unparsing (and debugging support) *)
1149 (*****************************************************************************)
1150
1151 (* for debugging *)
1152 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
1153
1154 let kind_of_token2 = function
1155   | Fake2 _ -> KFake
1156   | Cocci2 _ -> KCocci
1157   | C2 _ -> KC
1158   | Comma _ -> KC
1159   | T2 (t,_,_) ->
1160       (match TH.pinfo_of_tok t with
1161       | ExpandedTok _ -> KExpanded
1162       | OriginTok _ -> KOrigin
1163       | FakeTok _ -> raise Impossible (* now a Fake2 *)
1164       | AbstractLineTok _ -> raise Impossible (* now a KC *)
1165       )
1166   | Unindent_cocci2 _ | Indent_cocci2 -> raise Impossible
1167
1168 let end_mark = "!"
1169
1170 let start_mark = function
1171   | KFake -> "!F!"
1172   | KCocci -> "!S!"
1173   | KC -> "!A!"
1174   | KExpanded -> "!E!"
1175   | KOrigin -> ""
1176
1177 let print_all_tokens2 pr xs =
1178   if !Flag_parsing_c.debug_unparsing
1179   then
1180     let current_kind = ref KOrigin in
1181     xs +> List.iter (fun t ->
1182       let newkind = kind_of_token2 t in
1183       if newkind =*= !current_kind
1184       then pr (str_of_token2 t)
1185       else begin
1186         pr (end_mark);
1187         pr (start_mark newkind);
1188         pr (str_of_token2 t);
1189         current_kind := newkind
1190       end
1191     );
1192   else
1193     xs +> List.iter (fun x -> pr (str_of_token2 x))
1194
1195
1196
1197
1198 (*****************************************************************************)
1199 (* Entry points *)
1200 (*****************************************************************************)
1201
1202 (* old: PPviatok was made in the beginning to allow to pretty print a
1203  * complete C file, including a modified C file by transformation.ml,
1204  * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1205  * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1206  * fancy stuff when a function was not modified at all. Just need to
1207  * print the list of token as-is. But now pretty_print_c.ml handles
1208  * almost everything so maybe less useful. Maybe PPviatok allows to
1209  * optimize a little the pretty printing.
1210  *
1211  * update: now have PPviastr which goes even faster than PPviatok, so
1212  * PPviatok has disappeared.
1213  *)
1214
1215 type ppmethod = PPnormal | PPviastr
1216
1217
1218
1219
1220 (* The pp_program function will call pretty_print_c.ml with a special
1221  * function to print the leaf components, the tokens. When we want to
1222  * print a token, we need to print also maybe the space and comments that
1223  * were close to it in the original file (and that was omitted during the
1224  * parsing phase), and honor what the cocci-info attached to the token says.
1225  * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1226  * print it and also print some cocci-code attached in a PLUS to it.
1227  * So we will also maybe call unparse_cocci. Because the cocci-code may
1228  * contain metavariables, unparse_cocci will in fact sometimes call back
1229  * pretty_print_c (which will this time don't call back again unparse_cocci)
1230  *)
1231
1232 let pp_program2 xs outfile  =
1233   Common.with_open_outfile outfile (fun (pr,chan) ->
1234     let pr s =
1235       if !Flag_parsing_c.debug_unparsing
1236       then begin pr2_no_nl s; flush stderr end
1237       else pr s
1238         (* flush chan; *)
1239         (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1240     in
1241
1242     xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
1243       (* here can still work on ast *)
1244       let e = remove_useless_fakeInfo_struct e in
1245
1246       match ppmethod with
1247       | PPnormal ->
1248           (* now work on tokens *)
1249           (* phase1: just get all the tokens, all the information *)
1250           assert(toks_e +> List.for_all (fun t ->
1251             TH.is_origin t or TH.is_expanded t
1252           ));
1253           let toks = get_fakeInfo_and_tokens e toks_e in
1254           let toks = displace_fake_nodes toks in
1255           (* assert Origin;ExpandedTok;Faketok *)
1256           let toks = expand_mcode toks in
1257
1258           (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1259            * and no tag information, just NOTHING. *)
1260
1261           let toks =
1262             if !Flag.sgrep_mode2
1263             then
1264               (* nothing else to do for sgrep *)
1265               drop_expanded(drop_fake(drop_minus toks))
1266             else
1267               (* phase2: can now start to filter and adjust *)
1268                (let (toks,tu) = adjust_indentation toks in
1269               let toks = adjust_before_semicolon toks in(*before remove minus*)
1270               let toks = adjust_after_paren toks in(*also before remove minus*)
1271               let toks = drop_space_at_endline toks in
1272               let toks = paren_to_space toks in
1273               let toks = drop_end_comma toks in
1274               let toks = remove_minus_and_between_and_expanded_and_fake toks in
1275               (* assert Origin + Cocci + C and no minus *)
1276               let toks = add_space toks in
1277               let toks = add_newlines toks tu in
1278               let toks = paren_then_brace toks in
1279               let toks = fix_tokens toks in
1280                toks) in
1281
1282           (* in theory here could reparse and rework the ast! or
1283            * apply some SP. Not before cos julia may have generated
1284            * not parsable file. Need do unparsing_tricks call before being
1285            * ready to reparse. *)
1286           print_all_tokens2 pr toks;
1287
1288       | PPviastr -> pr str
1289     )
1290   )
1291
1292 let pp_program a b =
1293   Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1294
1295
1296 let pp_program_default xs outfile =
1297   let xs' = xs +> List.map (fun x -> x, PPnormal) in
1298   pp_program xs' outfile