parsing_c/unparse_c.ml

   1 (* Yoann Padioleau, Julia Lawall
   2  *
   3  * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
   4  * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
   5  *
   6  * This program is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU General Public License (GPL)
   8  * version 2 as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * file license.txt for more details.
  14  *
  15  *
  16  * Modifications by Julia Lawall for better newline handling.
  17  *)
  18 open Common
  19
  20 open Ast_c
  21
  22 module TH = Token_helpers
  23
  24
  25 (* should keep comments and directives in between adjacent deleted terms,
  26 but not comments and directives within deleted terms.  should use the
  27 labels found in the control-flow graph *)
  28
  29
  30
  31 (*****************************************************************************)
  32 (* Wrappers *)
  33 (*****************************************************************************)
  34 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
  35
  36 (*****************************************************************************)
  37 (* Types used during the intermediate phases of the unparsing *)
  38 (*****************************************************************************)
  39
  40 type token1 =
  41   | Fake1 of info
  42   | T1 of Parser_c.token
  43
  44 (* The cocci_tag of the token should always be a NOTHING. The mark of
  45  * the token can only be OriginTok or ExpandedTok. Why not get rid of
  46  * token and get something simpler ? because we need to know if the
  47  * info is a TCommentCpp or TCommentSpace, etc for some of the further
  48  * analysis so easier to keep with the token.
  49  *
  50  * This type contains the whole information. Have all the tokens with this
  51  * type.
  52  *)
  53 type min =
  54     Min of (int list (* match numbers from witness trees *) *
  55               Ast_cocci.adjacency (* adjacency information *))
  56   | Ctx
  57
  58 type token2 =
  59   | T2 of Parser_c.token * min *
  60           int option (* orig index, abstracting away comments and space *)
  61   | Fake2 of min
  62   | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
  63         * Unparse_cocci.nlhint option
  64   | C2 of string
  65   | Comma of string
  66   | Indent_cocci2
  67   | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
  68   | EatSpace2
  69
  70 (* not used yet *)
  71 type token3 =
  72   | T3 of Parser_c.token
  73   | Cocci3 of string
  74   | C3 of string
  75
  76
  77 (* similar to the tech in parsing_hack *)
  78 type token_extended = {
  79   tok2 : token2;
  80   str  : string;
  81   idx: int option; (* to know if 2 tokens were consecutive in orig file *)
  82   mutable new_tokens_before : token2 list;
  83   mutable remove : bool;
  84 }
  85
  86
  87 (*****************************************************************************)
  88 (* Helpers *)
  89 (*****************************************************************************)
  90
  91 let info_of_token1 t =
  92   match t with
  93   | Fake1 info -> info
  94   | T1 tok -> TH.info_of_tok tok
  95
  96 let print_token1 = function
  97     T1 tok -> TH.str_of_tok tok
  98   | Fake1 info -> "fake"
  99
 100 let str_of_token2 = function
 101   | T2 (t,_,_) -> TH.str_of_tok t
 102   | Fake2 _ -> ""
 103   | Cocci2 (s,_,_,_,_) -> s
 104   | C2 s -> s
 105   | Comma s -> s
 106   | Indent_cocci2 -> ""
 107   | Unindent_cocci2 _ -> ""
 108   | EatSpace2 -> ""
 109
 110 let print_token2 = function
 111   | T2 (t,b,_) ->
 112       let t_str =
 113         match t with
 114         | Parser_c.TCommentSpace _ -> " sp "
 115         | Parser_c.TCommentNewline _ -> " nl "
 116         | Parser_c.TCommentCpp _ -> " cp "
 117         | Parser_c.TCommentMisc _ -> " misc "
 118         | Parser_c.TComment _ -> " comment "
 119         | _ -> "" in
 120       let b_str =
 121         match b with
 122           Min (index,adj) ->
 123             Printf.sprintf "-%d[%s]"
 124               (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
 125               (String.concat " " (List.map string_of_int index))
 126         | Ctx -> "" in
 127       "T2:"^b_str^t_str^TH.str_of_tok t
 128   | Fake2 b ->
 129       let b_str =
 130         match b with
 131           Min (index,adj) ->
 132             Printf.sprintf "-%d[%s]"
 133               (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
 134               (String.concat " " (List.map string_of_int index))
 135         | Ctx -> "" in
 136       b_str^"fake"
 137   | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
 138   | C2 s -> "C2:"^s
 139   | Comma s -> "Comma:"^s
 140   | Indent_cocci2 -> "Indent"
 141   | Unindent_cocci2 _ -> "Unindent"
 142   | EatSpace2 -> "EatSpace"
 143
 144 let simple_print_all_tokens1 l =
 145   List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l;
 146   Printf.printf "\n"
 147
 148 let simple_print_all_tokens2 l =
 149   List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
 150   Printf.printf "\n"
 151
 152 let str_of_token3 = function
 153   | T3 t -> TH.str_of_tok t
 154   | Cocci3 s | C3 s -> s
 155
 156
 157
 158 let mk_token_extended x =
 159   let origidx =
 160     match x with
 161     | T2 (_,_, idx) -> idx
 162     | _ -> None
 163   in
 164   { tok2 = x;
 165     str = str_of_token2 x;
 166     idx = origidx;
 167     new_tokens_before = [];
 168     remove = false;
 169   }
 170
 171 let rebuild_tokens_extented toks_ext =
 172   let _tokens = ref [] in
 173   toks_ext +> List.iter (fun tok ->
 174     tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
 175     if not tok.remove then push2 tok.tok2 _tokens;
 176   );
 177   let tokens = List.rev !_tokens in
 178   (tokens +> List.map mk_token_extended)
 179
 180
 181 let mcode_contain_plus = function
 182   | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
 183   | Ast_cocci.CONTEXT _ -> true
 184 (* patch: when need full coccinelle transformation *)
 185   | Ast_cocci.MINUS (_,_,_,Ast_cocci.NOREPLACEMENT) -> false
 186   | Ast_cocci.MINUS (_,_,_,Ast_cocci.REPLACEMENT _) -> true(*REPL is not empty*)
 187   | Ast_cocci.PLUS _ -> raise Impossible
 188
 189 let contain_plus info =
 190   let mck = Ast_c.mcode_of_info info in
 191   mcode_contain_plus mck
 192
 193 (*****************************************************************************)
 194 (* Last fix on the ast *)
 195 (*****************************************************************************)
 196
 197 (* Because of the ugly trick to handle initialiser, I generate fake ','
 198  * for the last initializer element, but if there is nothing around it,
 199  * I don't want in the end to print it.
 200  *)
 201
 202 let remove_useless_fakeInfo_struct program =
 203   let bigf = { Visitor_c.default_visitor_c_s with
 204     Visitor_c.kini_s = (fun (k,bigf) ini ->
 205       match k ini with
 206       | InitList args, ii ->
 207           (match ii with
 208           | [_i1;_i2] -> ini
 209           | [i1;i2;iicommaopt] ->
 210               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 211                  && (Ast_c.is_fake iicommaopt)
 212                  (* sometimes the guy put a normal iicommaopt *)
 213               then InitList args, [i1;i2]
 214               else InitList args, [i1;i2;iicommaopt]
 215           | [i1;i2;iicommaopt;end_comma_opt] ->
 216               (* only in #define. end_comma_opt canot be fake *)
 217               (* not sure if this will be considered ambiguous with a previous
 218                  case? *)
 219               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 220                  && (Ast_c.is_fake iicommaopt)
 221                  (* sometimes the guy put a normal iicommaopt *)
 222               then InitList args, [i1;i2;end_comma_opt]
 223               else InitList args, [i1;i2;iicommaopt;end_comma_opt]
 224           | _ -> raise Impossible
 225           )
 226       | x -> x
 227     )
 228   } in
 229   Visitor_c.vk_toplevel_s bigf program
 230
 231
 232 (*****************************************************************************)
 233 (* Tokens1 generation *)
 234 (*****************************************************************************)
 235
 236 let get_fakeInfo_and_tokens celem toks =
 237   let toks_in  = ref toks in
 238   let toks_out = ref [] in
 239
 240   (* todo? verify good order of position ? *)
 241   let pr_elem info =
 242     match Ast_c.pinfo_of_info info with
 243     | FakeTok _ ->
 244         Common.push2 (Fake1 info) toks_out
 245     | OriginTok _ | ExpandedTok _ ->
 246         (* get the associated comments/space/cppcomment tokens *)
 247         let (before, x, after) =
 248           !toks_in +> Common.split_when (fun tok ->
 249             info =*= TH.info_of_tok tok)
 250         in
 251         assert(info =*= TH.info_of_tok x);
 252         (*old: assert(before +> List.for_all (TH.is_comment)); *)
 253         before +> List.iter (fun x ->
 254           if not (TH.is_comment x)
 255           then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
 256           (* case such as  int asm d3("x"); not yet in ast *)
 257         );
 258         before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
 259         push2 (T1 x) toks_out;
 260         toks_in := after;
 261     | AbstractLineTok _ ->
 262         (* can be called on type info when for instance use -type_c *)
 263         if !Flag_parsing_c.pretty_print_type_info
 264         then Common.push2 (Fake1 info) toks_out
 265         else raise Impossible (* at this stage *)
 266   in
 267
 268   let pr_space _ = () in (* use the spacing that is there already *)
 269
 270   Pretty_print_c.pp_program_gen pr_elem pr_space celem;
 271
 272   if not (null !toks_in)
 273   then failwith "WEIRD: unparsing not finished";
 274
 275   List.rev !toks_out
 276
 277 (* Fake nodes that have BEFORE code or are - should be moved over any subsequent
 278 whitespace and newlines, but not any comments, to get as close to the affected
 279 code as possible.  Similarly, fake nodes that have AFTER code should be moved
 280 backwards.  No fake nodes should have both before and after code. *)
 281
 282 let displace_fake_nodes toks =
 283   let is_fake = function Fake1 _ -> true | _ -> false in
 284   let is_whitespace = function
 285       T1(Parser_c.TCommentSpace _)
 286       (* patch: cocci    *)
 287     | T1(Parser_c.TCommentNewline _) -> true
 288     | _ -> false in
 289   let rec loop toks =
 290     let fake_info =
 291       try Some (Common.split_when is_fake toks)
 292       with Not_found -> None in
 293     match fake_info with
 294       Some(bef,((Fake1 info) as fake),aft) ->
 295         (match !(info.cocci_tag) with
 296         | Some x ->
 297           (match x with
 298             (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_)
 299           | (Ast_cocci.MINUS(_,_,_,Ast_cocci.REPLACEMENT _),_) ->
 300             (* move the fake node forwards *)
 301               let (whitespace,rest) = Common.span is_whitespace aft in
 302               bef @ whitespace @ fake :: (loop rest)
 303           | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
 304             (* move the fake node backwards *)
 305               let revbef = List.rev bef in
 306               let (revwhitespace,revprev) = Common.span is_whitespace revbef in
 307               let whitespace = List.rev revwhitespace in
 308               let prev = List.rev revprev in
 309               prev @ fake :: (loop (whitespace @ aft))
 310           | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
 311               bef @ fake :: (loop aft)
 312           | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
 313               failwith "fake node should not be before-after"
 314           | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
 315         )
 316         | None ->
 317             bef @ fake :: (loop aft)
 318         )
 319     | None -> toks
 320     | _ -> raise Impossible in
 321   loop toks
 322
 323 (*****************************************************************************)
 324 (* Tokens2 generation *)
 325 (*****************************************************************************)
 326
 327 let comment2t2 = function
 328     (Token_c.TCommentCpp
 329        (* not sure iif the following list is exhaustive or complete *)
 330        (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
 331      (info : Token_c.info)) ->
 332       C2(info.Common.str)
 333   | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
 334       C2("\n"^info.Common.str^"\n")
 335   | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
 336
 337 let expand_mcode toks =
 338   let toks_out = ref [] in
 339
 340   let index = ref 0 in
 341
 342   let add_elem t minus =
 343     match t with
 344     | Fake1 info ->
 345         let str = Ast_c.str_of_info info in
 346         if str =$= ""
 347         then push2 (Fake2 minus) toks_out
 348         (* fx the fake "," at the end of a structure or enum.
 349            no idea what other fake info there can be... *)
 350         else push2 (Comma str) toks_out
 351
 352
 353     | T1 tok ->
 354         (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
 355         (* no tag on expandedTok ! *)
 356         let modified = function
 357             None -> false
 358           | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
 359           | _ -> true in
 360         (if (TH.is_expanded tok &&
 361              modified !((TH.info_of_tok tok).cocci_tag)
 362             (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
 363         then
 364           failwith
 365             (Printf.sprintf
 366                "expanded token %s on line %d is either modified or stored in a metavariable"
 367                (TH.str_of_tok tok) (TH.line_of_tok tok)));
 368
 369         let tok' = tok +> TH.visitor_info_of_tok (fun i ->
 370           { i with cocci_tag = ref Ast_c.emptyAnnot; }
 371         ) in
 372
 373         let optindex =
 374           if TH.is_origin tok && not (TH.is_real_comment tok)
 375           then begin
 376               incr index;
 377               Some !index
 378           end
 379           else None
 380         in
 381
 382         push2 (T2 (tok', minus, optindex)) toks_out
 383   in
 384
 385   let expand_info t =
 386     let (mcode,env) =
 387       Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
 388
 389     let pr_cocci s ln col rcol hint =
 390       push2 (Cocci2(s,ln,col,rcol,hint)) toks_out  in
 391     let pr_c info =
 392       (match Ast_c.pinfo_of_info info with
 393         Ast_c.AbstractLineTok _ ->
 394           push2 (C2 (Ast_c.str_of_info info)) toks_out
 395       | Ast_c.FakeTok (s,_) ->
 396           push2 (C2 s) toks_out
 397       | _ ->
 398           Printf.printf "line: %s\n" (Common.dump info);
 399           failwith "not an abstract line");
 400       (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
 401       List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
 402
 403     let pr_barrier ln col = (* marks a position, used around C code *)
 404       push2 (Cocci2("",ln,col,col,None)) toks_out  in
 405     let pr_nobarrier ln col = () in (* not needed for linux spacing *)
 406
 407     let pr_cspace _ = push2 (C2 " ") toks_out in
 408
 409     let pr_space _ = () (* rely on add_space in cocci code *) in
 410     let pr_arity _ = () (* not interested *) in
 411
 412     let indent _   = push2 Indent_cocci2 toks_out in
 413     let unindent x = push2 (Unindent_cocci2 x) toks_out in
 414     let eat_space _   = push2 EatSpace2 toks_out in
 415
 416     let args_pp =
 417       (env, pr_cocci, pr_c, pr_cspace,
 418        (match !Flag_parsing_c.spacing with
 419          Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
 420        pr_arity,
 421        (match !Flag_parsing_c.spacing with
 422          Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
 423        indent, unindent, eat_space) in
 424
 425     (* old: when for yacfe with partial cocci:
 426      *    add_elem t false;
 427     *)
 428
 429     (* patch: when need full coccinelle transformation *)
 430     let unparser = Unparse_cocci.pp_list_list_any args_pp false in
 431     match mcode with
 432     | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
 433         (* Why adding ? because I want to have all the information, the whole
 434          * set of tokens, so I can then process and remove the
 435          * is_between_two_minus for instance *)
 436         add_elem t (Min (inst,adj));
 437         (match any_xxs with
 438           Ast_cocci.NOREPLACEMENT -> ()
 439         | Ast_cocci.REPLACEMENT(any_xxs,_) ->
 440             unparser any_xxs Unparse_cocci.InPlace)
 441     | Ast_cocci.CONTEXT (_,any_befaft) ->
 442         (match any_befaft with
 443         | Ast_cocci.NOTHING ->
 444             add_elem t Ctx
 445         | Ast_cocci.BEFORE (xxs,_) ->
 446             unparser xxs Unparse_cocci.Before;
 447             add_elem t Ctx
 448         | Ast_cocci.AFTER (xxs,_) ->
 449             add_elem t Ctx;
 450             unparser xxs Unparse_cocci.After;
 451         | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
 452             unparser xxs Unparse_cocci.Before;
 453             add_elem t Ctx;
 454             unparser yys Unparse_cocci.After;
 455         )
 456     | Ast_cocci.PLUS _ -> raise Impossible
 457
 458   in
 459
 460   toks +> List.iter expand_info;
 461   List.rev !toks_out
 462
 463
 464 (*****************************************************************************)
 465 (* Tokens2 processing, filtering, adjusting *)
 466 (*****************************************************************************)
 467
 468 let is_space = function
 469   | T2(Parser_c.TCommentSpace _,_b,_i) -> true  (* only whitespace *)
 470   | _ -> false
 471
 472 let is_newline = function
 473   | T2(Parser_c.TCommentNewline _,_b,_i) -> true
 474   | _ -> false
 475
 476 let is_whitespace = function
 477   | (T2 (t,_b,_i)) ->
 478       (match t with
 479       | Parser_c.TCommentSpace _ -> true  (* only whitespace *)
 480       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 481       | _ -> false
 482       )
 483   | _ -> false
 484
 485 let is_minusable_comment = function
 486   | (T2 (t,_b,_i)) ->
 487       (match t with
 488       | Parser_c.TCommentSpace _   (* only whitespace *)
 489       (* patch: coccinelle *)
 490       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 491       | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
 492       | Parser_c.TComment _
 493       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 494       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 495       | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
 496         -> true
 497
 498       | Parser_c.TCommentMisc _
 499       | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
 500         -> false
 501
 502       | _ -> false
 503       )
 504   | _ -> false
 505
 506 let is_minusable_comment_nocpp = function
 507   | (T2 (t,_b,_i)) ->
 508       (match t with
 509       | Parser_c.TCommentSpace _   (* only whitespace *)
 510       (* patch: coccinelle *)
 511       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 512       | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
 513       | Parser_c.TComment _ -> true
 514       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 515       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 516       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
 517         -> false
 518
 519       | Parser_c.TCommentMisc _
 520       | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
 521         -> false
 522
 523       | _ -> false
 524       )
 525   | _ -> false
 526
 527 let all_coccis = function
 528     Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _
 529   | EatSpace2 -> true
 530   | _ -> false
 531
 532 (*previously gave up if the first character was a newline, but not clear why*)
 533 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
 534
 535 let set_minus_comment adj = function
 536   | T2 (t,Ctx,idx) ->
 537       let str = TH.str_of_tok t in
 538       (match t with
 539       | Parser_c.TCommentSpace _
 540 (* patch: coccinelle *)
 541       | Parser_c.TCommentNewline _ -> ()
 542
 543       | Parser_c.TComment _
 544       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 545       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 546       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
 547         ->
 548           pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
 549                  (TH.line_of_tok t) str)
 550       | _ -> raise Impossible
 551       );
 552       T2 (t, Min adj, idx)
 553 (* patch: coccinelle *)
 554   | T2 (t,Min adj,idx) as x -> x
 555   | Fake2 _ as x -> x
 556   | _ -> raise Impossible
 557
 558 let set_minus_comment_or_plus adj = function
 559     Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _
 560   | EatSpace2 as x -> x
 561   | x -> set_minus_comment adj x
 562
 563 let drop_minus xs =
 564   xs +> Common.exclude (function
 565     | T2 (t,Min adj,_) -> true
 566     | _ -> false
 567   )
 568
 569 let drop_expanded xs =
 570   xs +> Common.exclude (function
 571     | T2 (t,_,_) when TH.is_expanded t -> true
 572     | _ -> false
 573   )
 574
 575 let drop_fake xs =
 576   xs +> Common.exclude (function
 577     | Fake2 _ -> true
 578     | _ -> false
 579   )
 580
 581 let remove_minus_and_between_and_expanded_and_fake xs =
 582
 583   (* get rid of expanded tok *)
 584   let xs = drop_expanded xs in
 585
 586   let minus_or_comment = function
 587       T2(_,Min adj,_) -> true
 588     | x -> is_minusable_comment x in
 589
 590   let minus_or_comment_nocpp = function
 591       T2(_,Min adj,_) -> true
 592     | x -> is_minusable_comment_nocpp x in
 593
 594   let common_adj (index1,adj1) (index2,adj2) =
 595     let same_adj = (* same adjacency info *)
 596       match (adj1,adj2) with
 597         (Ast_cocci.ADJ adj1,Ast_cocci.ADJ adj2) -> adj1 = adj2
 598       | (Ast_cocci.ALLMINUS,_) | (_,Ast_cocci.ALLMINUS) -> true in
 599     same_adj &&
 600     (* non-empty intersection of witness trees *)
 601     not ((Common.inter_set index1 index2) = []) in
 602
 603   (* new idea: collects regions not containing non-space context code
 604      if two adjacent adjacent minus tokens satisfy common_adj then delete
 605      all spaces, comments etc between them
 606      if two adjacent minus tokens do not satisfy common_adj only delete
 607      the spaces between them if there are no comments, etc.
 608      if the region contain no plus code and is both preceded and followed
 609      by a newline, delete the initial newline. *)
 610
 611   let rec adjust_around_minus = function
 612       [] -> []
 613     | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
 614       ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
 615         let (minus_list,rest) = Common.span not_context (t1::xs) in
 616         let contains_plus = List.exists is_plus minus_list in
 617         let x =
 618           match List.rev minus_list with
 619             (T2(Parser_c.TCommentNewline c,_b,_i))::rest
 620             when List.for_all minus_or_comment minus_list ->
 621               set_minus_comment_or_plus adj1 x
 622           | _ -> x in
 623         x :: adjust_within_minus contains_plus minus_list @
 624         adjust_around_minus rest
 625     | ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
 626         let (minus_list,rest) = Common.span not_context (t1::xs) in
 627         let contains_plus = List.exists is_plus minus_list in
 628         adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
 629     | x::xs ->
 630         x :: adjust_around_minus xs
 631   and adjust_within_minus cp (* contains plus *) = function
 632       ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
 633         let not_minus = function T2(_,Min _,_) -> false | _ -> true in
 634         let (not_minus_list,rest) = Common.span not_minus xs in
 635         t1 ::
 636         (match rest with
 637           ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs
 638           when common_adj adj1 adj2 ->
 639             (List.map (set_minus_comment_or_plus adj1) not_minus_list)
 640             @ (adjust_within_minus cp (t2::xs))
 641         | ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs ->
 642             if not cp && List.for_all is_whitespace not_minus_list
 643             then
 644               (List.map (set_minus_comment_or_plus adj1) not_minus_list)
 645               @ (adjust_within_minus cp (t2::xs))
 646             else
 647               not_minus_list @ (adjust_within_minus cp (t2::xs))
 648         | _ ->
 649             if cp
 650             then xs
 651             else
 652               (* remove spaces after removed stuff, eg a comma after a
 653                  function argument *)
 654               let (spaces,rest) = Common.span is_space xs in
 655               (List.map (set_minus_comment_or_plus adj1) spaces)
 656               @ rest)
 657     | xs -> failwith "should always start with minus"
 658   and not_context = function
 659       (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
 660     | _ -> true
 661   and is_plus = function
 662       C2 _ | Comma _ | Cocci2 _ -> true
 663     | _ -> false in
 664
 665   let xs = adjust_around_minus xs in
 666
 667   (* get rid of fake tok *)
 668   let xs = drop_fake xs in
 669
 670   (* this drops blank lines after a brace introduced by removing code *)
 671   let minus_or_comment_nonl = function
 672       T2(_,Min adj,_) -> true
 673     | T2(Parser_c.TCommentNewline _,_b,_i) -> false
 674     | x -> is_minusable_comment x in
 675
 676   let rec adjust_after_brace = function
 677       [] -> []
 678     | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
 679        when str_of_token2 x =$= "{" ->
 680          let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
 681          let is_whitespace = function
 682              T2(Parser_c.TCommentSpace _,_b,_i)
 683              (* patch: cocci    *)
 684            | T2(Parser_c.TCommentNewline _,_b,_i) -> true
 685            | _ -> false in
 686          let (newlines,rest) = Common.span is_whitespace rest in
 687          let (drop_newlines,last_newline) =
 688            let rec loop = function
 689                [] -> ([],[])
 690              | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
 691                  (List.rev rest,[x])
 692              | x::xs ->
 693                  let (drop_newlines,last_newline) = loop xs in
 694                  (drop_newlines,x::last_newline) in
 695            loop (List.rev newlines) in
 696          x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
 697          last_newline@
 698          adjust_after_brace rest
 699     | x::xs -> x::adjust_after_brace xs in
 700
 701   let xs = adjust_after_brace xs in
 702
 703   (* search backwards from context } over spaces until reaching a newline.
 704      then go back over all minus code until reaching some context or + code.
 705      get rid of all intervening spaces, newlines, and comments
 706      input is reversed *)
 707   let rec adjust_before_brace = function
 708       [] -> []
 709     | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
 710         let (outer_spaces,rest) = Common.span is_space xs in
 711         x :: outer_spaces @
 712         (match rest with
 713           ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
 714           (* the rest of this code is the same as from_newline below
 715              but merging them seems to be error prone... *)
 716           ((T2 (t, Min adj, idx)) as m) :: rest ->
 717             let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 718             h :: m ::
 719             (List.map (set_minus_comment adj) spaces) @
 720             (adjust_before_brace rest)
 721         | _ -> adjust_before_brace rest)
 722     | x::xs -> x :: (adjust_before_brace xs) in
 723
 724   let from_newline = function
 725       ((T2 (t, Min adj, idx)) as m) :: rest ->
 726         let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 727         m ::
 728         (List.map (set_minus_comment adj) spaces) @
 729         (adjust_before_brace rest)
 730     | ((T2 (t0, Ctx, idx0)) as m0) :: ((T2 (t, Min adj, idx)) as m) :: rest
 731         when TH.str_of_tok t0 = "" ->
 732           (* This is for the case of a #define that is completely deleted,
 733              because a #define has a strange EOL token at the end.
 734              We hope there i no other kind of token that is represented by
 735              "", but it seems like changing the kind of token might break
 736              the end of entity recognition in the C parser.
 737              See parsing_hacks.ml *)
 738           let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 739           m0 :: m ::
 740           (List.map (set_minus_comment adj) spaces) @
 741           (adjust_before_brace rest)
 742     | rest -> adjust_before_brace rest in
 743
 744   let xs = List.rev (from_newline (List.rev xs)) in
 745   let xs = drop_minus xs in
 746   xs
 747
 748 (* things that should not be followed by space - boundary between SmPL
 749    code and C code *)
 750 let adjust_eat_space toks =
 751   let rec loop = function
 752       [] -> []
 753     | EatSpace2 :: x :: rest when is_space x -> loop rest
 754     | EatSpace2 :: rest -> loop rest
 755     | x :: xs -> x :: loop xs in
 756   loop toks
 757
 758 (* normally, in C code, a semicolon is not preceded by a space or newline *)
 759 let adjust_before_semicolon toks =
 760   let toks = List.rev toks in
 761   let rec search_semic = function
 762       [] -> []
 763     | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
 764         if List.mem (str_of_token2 x) [";";")";","]
 765         then x :: search_semic (search_minus false xs)
 766         else x :: search_semic xs
 767     | x::xs -> x :: search_semic xs
 768   and search_minus seen_minus xs =
 769     let (spaces, rest) = Common.span is_space xs in
 770     (* only delete spaces if something is actually deleted *)
 771     match rest with
 772       ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
 773     | _ -> if seen_minus then rest else xs in
 774   List.rev (search_semic toks)
 775
 776 (* normally, in C code, a ( is not followed by a space or newline *)
 777 let adjust_after_paren toks =
 778   let rec search_paren = function
 779       [] -> []
 780     | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
 781         if List.mem (str_of_token2 x) ["("] (* other things? *)
 782         then x :: search_paren(search_minus false xs)
 783         else x :: search_paren xs
 784     | x::xs -> x :: search_paren xs
 785   and search_minus seen_minus xs =
 786     let (spaces, rest) = Common.span is_whitespace xs in
 787     (* only delete spaces if something is actually deleted *)
 788     match rest with
 789       ((T2(_,Min _,_)) as a)::rerest -> (* minus *)
 790         a :: search_minus true rerest
 791     | ((T2(_,Ctx,_)) as a)::rerest when str_of_token2 a = "," ->
 792         (* comma after ( will be deleted, so consider it as minus code
 793            already *)
 794         a :: search_minus true rerest
 795     | _ -> if seen_minus then rest else xs in (* drop trailing space *)
 796   search_paren toks
 797
 798 (* this is for the case where braces are added around an if branch *)
 799 let paren_then_brace toks =
 800   let rec search_paren = function
 801       [] -> []
 802     | ((T2(_,Ctx,_)) as x)::xs ->
 803         if List.mem (str_of_token2 x) [")"]
 804         then x :: search_paren(search_plus xs)
 805         else x :: search_paren xs
 806     | x::xs -> x :: search_paren xs
 807   and search_plus xs =
 808     let (spaces, rest) = Common.span is_whitespace xs in
 809     match rest with
 810       (* move the brace up to the previous line *)
 811       ((Cocci2("{",_,_,_,_)) as x) :: (((Cocci2 _) :: _) as rest) ->
 812         (C2 " ") :: x :: spaces @ rest
 813     | _ -> xs in
 814   search_paren toks
 815
 816 let is_ident_like s = s ==~ Common.regexp_alpha
 817
 818 let rec drop_space_at_endline = function
 819     [] -> []
 820   | [x] -> [x]
 821   | (C2 " ") ::
 822     ((((T2(Parser_c.TCommentSpace _,Ctx,_)) | Cocci2("\n",_,_,_,_) |
 823     (T2(Parser_c.TCommentNewline _,Ctx,_))) :: _) as rest) ->
 824       (* when unparse_cocci doesn't know whether space is needed *)
 825       drop_space_at_endline rest
 826   | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest ->
 827       let (outer_spaces,rest) = Common.span is_space rest in
 828       let minus_or_comment_or_space_nocpp = function
 829           T2(_,Min adj,_) -> true
 830         | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true
 831         | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false
 832         | x -> false in
 833       let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in
 834       let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
 835       if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus
 836       then
 837         match rest with
 838           ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest ->
 839             (* drop trailing spaces *)
 840             minus@a::(drop_space_at_endline rest)
 841         | _ -> fail()
 842       else fail()
 843   | a :: rest ->
 844       a :: drop_space_at_endline rest
 845
 846 (* if a removed ( is between two tokens, then add a space *)
 847 let rec paren_to_space = function
 848     [] -> []
 849   | [x] -> [x]
 850   | [x;y] -> [x;y]
 851   | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest
 852     when not (is_whitespace a) && TH.str_of_tok t = "(" ->
 853       a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
 854   | a :: rest -> a :: (paren_to_space rest)
 855
 856 let rec add_space xs =
 857   match xs with
 858   | [] -> []
 859   | [x] -> [x]
 860   | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
 861     when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
 862       not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
 863         (* this only works within a line.  could consider whether
 864            something should be done to add newlines too, rather than
 865            printing them explicitly in unparse_cocci. *)
 866         x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
 867   | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
 868     when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
 869       not (lnx = -1) && lnx < lny && not (rcolx = -1) ->
 870         (* this only works within a line.  could consider whether
 871            something should be done to add newlines too, rather than
 872            printing them explicitly in unparse_cocci. *)
 873         x::C2 (String.make (lny-lnx) '\n')::
 874         C2 (String.make (lcoly-1) ' '):: (* -1 is for the + *)
 875         add_space (y::xs)
 876   | ((T2(_,Ctx,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *)
 877       let sx = str_of_token2 x in
 878       let sy = str_of_token2 y in
 879       if is_ident_like sx && (is_ident_like sy or List.mem sy ["="])
 880       then x::C2 " "::(add_space (y::xs))
 881       else x::(add_space (y::xs))
 882   | x::y::xs -> (* not boundary, not sure if it is possible *)
 883       let sx = str_of_token2 x in
 884       let sy = str_of_token2 y in
 885       if is_ident_like sx && is_ident_like sy
 886       then x::C2 " "::(add_space (y::xs))
 887       else x::(add_space (y::xs))
 888
 889 (* A fake comma is added at the end of an unordered initlist or a enum
 890 decl, if the initlist or enum doesn't already end in a comma.  This is only
 891 needed if there is + code, ie if we see Cocci after it in the code sequence *)
 892
 893 let rec drop_end_comma = function
 894     [] -> []
 895   | [x] -> [x]
 896   | ((Comma ",") as x) :: rest ->
 897       let (newlines,rest2) = Common.span is_whitespace rest in
 898       (match rest2 with
 899         (Cocci2 _) :: _ -> x :: drop_end_comma rest
 900       | _ -> drop_end_comma rest)
 901   | x :: xs -> x :: drop_end_comma xs
 902
 903 (* The following only works for the outermost function call.  Stack records
 904 the column of all open parentheses.  Space_cell contains the most recent
 905 comma in the outermost function call.  The goal is to decide whether this
 906 should be followed by a space or a newline and indent. *)
 907 let add_newlines toks tabbing_unit =
 908   let create_indent n =
 909     let (tu,tlen) =
 910       match tabbing_unit with
 911         Some ("\t",_) -> ("\t",8)
 912       | Some ("",_) -> ("\t",8) (* not sure why... *)
 913       | Some (s,_) -> (s,String.length s) (* assuming only spaces *)
 914       | None -> ("\t",8) in
 915     let rec loop seen =
 916       if seen + tlen <= n
 917       then tu ^ loop (seen + tlen)
 918       else String.make (n-seen) ' ' in
 919     loop 0 in
 920   let check_for_newline count x = function
 921       Some (start,space_cell) when count > Flag_parsing_c.max_width ->
 922         space_cell := "\n"^(create_indent x);
 923         Some (x + (count - start))
 924     | _ -> None in
 925   (* the following is for strings that may contain newline *)
 926   let string_length s count =
 927     let l = list_of_string s in
 928     List.fold_left
 929       (function count ->
 930         function
 931             '\t' -> count + 8
 932           | '\n' -> 0
 933           | c -> count + 1)
 934       count l in
 935   let rec loop info count = function
 936       [] -> []
 937     | ((T2(tok,_,_)) as a)::xs ->
 938         a :: loop info (string_length (TH.str_of_tok tok) count) xs
 939     | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
 940         let (stack,space_cell) = info in
 941         let rest =
 942           match hint with
 943             None -> loop info (string_length s count) xs
 944           | Some Unparse_cocci.StartBox ->
 945               let count = string_length s count in
 946               loop (count::stack,space_cell) count xs
 947           | Some Unparse_cocci.EndBox ->
 948               let count = string_length s count in
 949               (match stack with
 950                 [x] ->
 951                   (match check_for_newline count x space_cell with
 952                     Some count -> loop ([],None) count xs
 953                   | None -> loop ([],None) count xs)
 954               | _ -> loop (List.tl stack,space_cell) count xs)
 955           | Some (Unparse_cocci.SpaceOrNewline sp) ->
 956               let count = string_length s (count + 1 (*space*)) in
 957               (match stack with
 958                 [x] ->
 959                   (match check_for_newline count x space_cell with
 960                     Some count -> loop (stack,Some (x,sp)) count xs
 961                   | None -> loop (stack,Some (count,sp)) count xs)
 962               | _ -> loop info count xs) in
 963         a :: rest
 964     | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
 965     | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
 966     | Fake2 _ :: _ | Indent_cocci2 :: _
 967     | Unindent_cocci2 _::_ | EatSpace2::_ ->
 968         failwith "unexpected fake, indent, unindent, or eatspace" in
 969   let redo_spaces prev = function
 970       Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
 971         C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
 972     | t -> t::prev in
 973   (match !Flag_parsing_c.spacing with
 974     Flag_parsing_c.SMPL -> toks
 975   | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)))
 976
 977 (* When insert some new code, because of a + in a SP, we must add this
 978  * code at the right place, with the good indentation. So each time we
 979  * encounter some spacing info, with some newline, we maintain the
 980  * current indentation level used.
 981  *
 982  * TODO problems: not accurate. ex: TODO
 983  *
 984  * TODO: if in #define region, should add a \ \n
 985  *)
 986 let new_tabbing2 space =
 987   (list_of_string space)
 988     +> List.rev
 989     +> Common.take_until (fun c -> c =<= '\n')
 990     +> List.rev
 991     +> List.map string_of_char
 992     +> String.concat ""
 993
 994 let new_tabbing a =
 995   Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
 996
 997
 998 let rec adjust_indentation xs =
 999
1000   let _current_tabbing = ref ([] : string list) in
1001   let tabbing_unit = ref None in
1002
1003   let string_of_list l = String.concat "" (List.map string_of_char l) in
1004
1005   (* try to pick a tabbing unit for the plus code *)
1006   let adjust_tabbing_unit old_tab new_tab =
1007     if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
1008     then
1009       let old_tab = list_of_string old_tab in
1010       let new_tab = list_of_string new_tab in
1011       let rec loop = function
1012           ([],new_tab) ->
1013             tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
1014         | (_,[]) -> failwith "not possible"
1015         | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
1016       loop (old_tab,new_tab) in
1017
1018 (*
1019   let remtab tu current_tab =
1020     let current_tab = List.rev(list_of_string current_tab) in
1021     let rec loop = function
1022         ([],new_tab) -> string_of_list (List.rev new_tab)
1023       | (_,[]) -> (-*weird; tabbing unit used up more than the current tab*-)
1024         ""
1025       | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
1026       | (_,ns) -> (-* mismatch; remove what we can *-)
1027           string_of_list (List.rev ns) in
1028     loop (tu,current_tab) in
1029 *)
1030
1031   let rec find_first_tab started = function
1032       [] -> ()
1033     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
1034         find_first_tab true xs
1035 (* patch: coccinelle *)
1036     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
1037       when started ->
1038         let s = str_of_token2 x +> new_tabbing in
1039         tabbing_unit := Some (s,List.rev (list_of_string s))
1040     | x::xs -> find_first_tab started xs in
1041   find_first_tab false xs;
1042
1043   let rec balanced ct = function
1044       [] -> ct >= 0
1045     | ((T2(tok,_,_)) as x)::xs ->
1046         (match str_of_token2 x with
1047           "(" -> balanced (ct+1) xs
1048         | ")" -> balanced (ct-1) xs
1049         | _ -> balanced ct xs)
1050     | x::xs -> balanced ct xs in
1051
1052   let rec aux started xs =
1053     match xs with
1054     | [] ->  []
1055 (* patch: coccinelle *)
1056     | (T2 (Parser_c.TCommentNewline _,_,_))::Unindent_cocci2(false)::xs
1057     | (Cocci2("\n",_,_,_,_))::Unindent_cocci2(false)::xs ->
1058         (C2 "\n")::aux started xs
1059     | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
1060       ((Cocci2 ("{",_,_,_,_)) as a)::xs
1061       when started && str_of_token2 x =$= ")" ->
1062         (* to be done for if, etc, but not for a function header *)
1063         x::(C2 " ")::a::(aux started xs)
1064     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs
1065       when balanced 0 (fst(Common.span (function x -> not(is_newline x)) xs)) ->
1066         let old_tabbing = !_current_tabbing in
1067         str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := [s]);
1068         (* only trust the indentation after the first { *)
1069         (if started
1070         then
1071           adjust_tabbing_unit
1072             (String.concat "" old_tabbing)
1073             (String.concat "" !_current_tabbing));
1074         let coccis_rest = Common.span all_coccis xs in
1075         (match coccis_rest with
1076           (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
1077             (* the case where cocci code has been added before a close } *)
1078             x::aux started (Indent_cocci2::xs)
1079         | _ -> x::aux started xs)
1080     | Indent_cocci2::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
1081       when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL ->
1082         let tu = String.make (lcoly-1) ' ' in
1083         _current_tabbing := tu::(!_current_tabbing);
1084         C2 (tu)::aux started (y::xs)
1085     | Indent_cocci2::xs ->
1086         (match !tabbing_unit with
1087           None -> aux started xs
1088         | Some (tu,_) ->
1089             _current_tabbing := tu::(!_current_tabbing);
1090              (* can't be C2, for later phases *)
1091              Cocci2 (tu,-1,-1,-1,None)::aux started xs)
1092     | Unindent_cocci2(permanent)::xs ->
1093         (match !_current_tabbing with
1094           [] -> aux started xs
1095         | _::new_tabbing ->
1096             _current_tabbing := new_tabbing;
1097             aux started xs)
1098     (* border between existing code and cocci code *)
1099     | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
1100       when str_of_token2 x =$= "{" ->
1101         x::aux true (y::Indent_cocci2::xs)
1102     | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
1103       when str_of_token2 y =$= "}" ->
1104         x::aux started (y::Unindent_cocci2 true::xs)
1105     (* starting the body of the function *)
1106     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->  x::aux true xs
1107     | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
1108     | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
1109             (* dont inline in expr because of weird eval order of ocaml *)
1110         let s = String.concat "" !_current_tabbing in
1111         (* can't be C2, for later phases *)
1112         x::Cocci2 (s,-1,-1,-1,None)::aux started xs
1113     | x::xs -> x::aux started xs in
1114   (aux false xs,!tabbing_unit)
1115
1116
1117 let rec find_paren_comma = function
1118   | [] -> ()
1119
1120   (* do nothing if was like this in original file *)
1121   | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
1122     ::xs when p2 =|= p1 + 1 ->
1123       find_paren_comma (x2::xs)
1124
1125   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
1126     ::xs when p2 =|= p1 + 1 ->
1127       find_paren_comma (x2::xs)
1128
1129   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
1130     ::xs when p2 =|= p1 + 1 ->
1131       find_paren_comma (x2::xs)
1132
1133   (* otherwise yes can adjust *)
1134   | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
1135       x2.remove <- true;
1136       find_paren_comma (x2::xs)
1137   | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
1138       x1.remove <- true;
1139       find_paren_comma (x2::xs)
1140
1141   | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
1142       x1.remove <- true;
1143       find_paren_comma (x2::xs)
1144
1145   | x::xs ->
1146       find_paren_comma xs
1147
1148
1149 let fix_tokens toks =
1150   let toks = toks +> List.map mk_token_extended in
1151
1152   let cleaner = toks +> Common.exclude (function
1153     | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
1154     | _ -> false
1155   ) in
1156   find_paren_comma cleaner;
1157
1158   let toks = rebuild_tokens_extented toks in
1159   toks +> List.map (fun x -> x.tok2)
1160
1161
1162
1163 (*****************************************************************************)
1164 (* Final unparsing (and debugging support) *)
1165 (*****************************************************************************)
1166
1167 (* for debugging *)
1168 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
1169
1170 let kind_of_token2 = function
1171   | Fake2 _ -> KFake
1172   | Cocci2 _ -> KCocci
1173   | C2 _ -> KC
1174   | Comma _ -> KC
1175   | T2 (t,_,_) ->
1176       (match TH.pinfo_of_tok t with
1177       | ExpandedTok _ -> KExpanded
1178       | OriginTok _ -> KOrigin
1179       | FakeTok _ -> raise Impossible (* now a Fake2 *)
1180       | AbstractLineTok _ -> raise Impossible (* now a KC *)
1181       )
1182   | Unindent_cocci2 _ | Indent_cocci2 | EatSpace2 -> raise Impossible
1183
1184 let end_mark = "!"
1185
1186 let start_mark = function
1187   | KFake -> "!F!"
1188   | KCocci -> "!S!"
1189   | KC -> "!A!"
1190   | KExpanded -> "!E!"
1191   | KOrigin -> ""
1192
1193 let print_all_tokens2 pr xs =
1194   if !Flag_parsing_c.debug_unparsing
1195   then
1196     let current_kind = ref KOrigin in
1197     xs +> List.iter (fun t ->
1198       let newkind = kind_of_token2 t in
1199       if newkind =*= !current_kind
1200       then pr (str_of_token2 t)
1201       else begin
1202         pr (end_mark);
1203         pr (start_mark newkind);
1204         pr (str_of_token2 t);
1205         current_kind := newkind
1206       end
1207     );
1208   else
1209     xs +> List.iter (fun x -> pr (str_of_token2 x))
1210
1211
1212
1213
1214 (*****************************************************************************)
1215 (* Entry points *)
1216 (*****************************************************************************)
1217
1218 (* old: PPviatok was made in the beginning to allow to pretty print a
1219  * complete C file, including a modified C file by transformation.ml,
1220  * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1221  * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1222  * fancy stuff when a function was not modified at all. Just need to
1223  * print the list of token as-is. But now pretty_print_c.ml handles
1224  * almost everything so maybe less useful. Maybe PPviatok allows to
1225  * optimize a little the pretty printing.
1226  *
1227  * update: now have PPviastr which goes even faster than PPviatok, so
1228  * PPviatok has disappeared.
1229  *)
1230
1231 type ppmethod = PPnormal | PPviastr
1232
1233
1234
1235
1236 (* The pp_program function will call pretty_print_c.ml with a special
1237  * function to print the leaf components, the tokens. When we want to
1238  * print a token, we need to print also maybe the space and comments that
1239  * were close to it in the original file (and that was omitted during the
1240  * parsing phase), and honor what the cocci-info attached to the token says.
1241  * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1242  * print it and also print some cocci-code attached in a PLUS to it.
1243  * So we will also maybe call unparse_cocci. Because the cocci-code may
1244  * contain metavariables, unparse_cocci will in fact sometimes call back
1245  * pretty_print_c (which will this time don't call back again unparse_cocci)
1246  *)
1247
1248 let pp_program2 xs outfile  =
1249   Common.with_open_outfile outfile (fun (pr,chan) ->
1250     let pr s =
1251       if !Flag_parsing_c.debug_unparsing
1252       then begin pr2_no_nl s; flush stderr end
1253       else pr s
1254         (* flush chan; *)
1255         (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1256     in
1257
1258     xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
1259       (* here can still work on ast *)
1260       let e = remove_useless_fakeInfo_struct e in
1261
1262       match ppmethod with
1263       | PPnormal ->
1264           (* now work on tokens *)
1265           (* phase1: just get all the tokens, all the information *)
1266           assert(toks_e +> List.for_all (fun t ->
1267             TH.is_origin t or TH.is_expanded t
1268           ));
1269           let toks = get_fakeInfo_and_tokens e toks_e in
1270           let toks = displace_fake_nodes toks in
1271           (* assert Origin;ExpandedTok;Faketok *)
1272           let toks = expand_mcode toks in
1273
1274           (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1275            * and no tag information, just NOTHING. *)
1276
1277           let toks =
1278             if !Flag.sgrep_mode2
1279             then
1280               (* nothing else to do for sgrep *)
1281               drop_expanded(drop_fake(drop_minus toks))
1282             else
1283               (* phase2: can now start to filter and adjust *)
1284                (let (toks,tu) = adjust_indentation toks in
1285               let toks = adjust_eat_space toks in
1286               let toks = adjust_before_semicolon toks in(*before remove minus*)
1287               let toks = adjust_after_paren toks in(*also before remove minus*)
1288               let toks = drop_space_at_endline toks in
1289               let toks = paren_to_space toks in
1290               let toks = drop_end_comma toks in
1291               let toks = remove_minus_and_between_and_expanded_and_fake toks in
1292               (* assert Origin + Cocci + C and no minus *)
1293               let toks = add_space toks in
1294               let toks = add_newlines toks tu in
1295               let toks = paren_then_brace toks in
1296               let toks = fix_tokens toks in
1297                toks) in
1298
1299           (* in theory here could reparse and rework the ast! or
1300            * apply some SP. Not before cos julia may have generated
1301            * not parsable file. Need do unparsing_tricks call before being
1302            * ready to reparse. *)
1303           print_all_tokens2 pr toks;
1304
1305       | PPviastr -> pr str
1306     )
1307   )
1308
1309 let pp_program a b =
1310   Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1311
1312
1313 let pp_program_default xs outfile =
1314   let xs' = xs +> List.map (fun x -> x, PPnormal) in
1315   pp_program xs' outfile