parsing_c/unparse_c.ml

   1 (* Yoann Padioleau, Julia Lawall
   2  *
   3  * Copyright (C) 2012, INRIA.
   4  * Copyright (C) 2010, 2011, University of Copenhagen DIKU and INRIA.
   5  * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
   6  *
   7  * This program is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License (GPL)
   9  * version 2 as published by the Free Software Foundation.
  10  *
  11  * This program is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * file license.txt for more details.
  15  *
  16  *
  17  * Modifications by Julia Lawall for better newline handling.
  18  *)
  19 open Common
  20
  21 open Ast_c
  22
  23 module TH = Token_helpers
  24
  25
  26 (* should keep comments and directives in between adjacent deleted terms,
  27 but not comments and directives within deleted terms.  should use the
  28 labels found in the control-flow graph *)
  29
  30
  31
  32 (*****************************************************************************)
  33 (* Wrappers *)
  34 (*****************************************************************************)
  35 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
  36
  37 (*****************************************************************************)
  38 (* Types used during the intermediate phases of the unparsing *)
  39 (*****************************************************************************)
  40
  41 type token1 =
  42   | Fake1 of info
  43   | T1 of Parser_c.token
  44
  45 (* The cocci_tag of the token should always be a NOTHING. The mark of
  46  * the token can only be OriginTok or ExpandedTok. Why not get rid of
  47  * token and get something simpler ? because we need to know if the
  48  * info is a TCommentCpp or TCommentSpace, etc for some of the further
  49  * analysis so easier to keep with the token.
  50  *
  51  * This type contains the whole information. Have all the tokens with this
  52  * type.
  53  *)
  54 type min =
  55     Min of (int list (* match numbers from witness trees *) *
  56               Ast_cocci.adjacency (* adjacency information *))
  57   | Ctx
  58
  59 type token2 =
  60   | T2 of Parser_c.token * min *
  61           int option (* orig index, abstracting away comments and space *)
  62   | Fake2 of min
  63   | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
  64         * Unparse_cocci.nlhint option
  65   | C2 of string
  66   | Comma of string
  67   | Indent_cocci2
  68   | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
  69   | EatSpace2
  70
  71 (* not used yet *)
  72 type token3 =
  73   | T3 of Parser_c.token
  74   | Cocci3 of string
  75   | C3 of string
  76
  77
  78 (* similar to the tech in parsing_hack *)
  79 type token_extended = {
  80   tok2 : token2;
  81   str  : string;
  82   idx: int option; (* to know if 2 tokens were consecutive in orig file *)
  83   mutable new_tokens_before : token2 list;
  84   mutable remove : bool;
  85 }
  86
  87
  88 (*****************************************************************************)
  89 (* Helpers *)
  90 (*****************************************************************************)
  91
  92 let info_of_token1 t =
  93   match t with
  94   | Fake1 info -> info
  95   | T1 tok -> TH.info_of_tok tok
  96
  97 let print_token1 = function
  98     T1 tok -> TH.str_of_tok tok
  99   | Fake1 info -> "fake"
 100
 101 let str_of_token2 = function
 102   | T2 (t,_,_) -> TH.str_of_tok t
 103   | Fake2 _ -> ""
 104   | Cocci2 (s,_,_,_,_) -> s
 105   | C2 s -> s
 106   | Comma s -> s
 107   | Indent_cocci2 -> ""
 108   | Unindent_cocci2 _ -> ""
 109   | EatSpace2 -> ""
 110
 111 let print_token2 = function
 112   | T2 (t,b,_) ->
 113       let t_str =
 114         match t with
 115         | Parser_c.TCommentSpace _ -> " sp "
 116         | Parser_c.TCommentNewline _ -> " nl "
 117         | Parser_c.TCommentCpp _ -> " cp "
 118         | Parser_c.TCommentMisc _ -> " misc "
 119         | Parser_c.TComment _ -> " comment "
 120         | _ -> "" in
 121       let b_str =
 122         match b with
 123           Min (index,adj) ->
 124             Printf.sprintf "-%d[%s]"
 125               (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
 126               (String.concat " " (List.map string_of_int index))
 127         | Ctx -> "" in
 128       "T2:"^b_str^t_str^TH.str_of_tok t
 129   | Fake2 b ->
 130       let b_str =
 131         match b with
 132           Min (index,adj) ->
 133             Printf.sprintf "-%d[%s]"
 134               (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
 135               (String.concat " " (List.map string_of_int index))
 136         | Ctx -> "" in
 137       b_str^"fake"
 138   | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
 139   | C2 s -> "C2:"^s
 140   | Comma s -> "Comma:"^s
 141   | Indent_cocci2 -> "Indent"
 142   | Unindent_cocci2 _ -> "Unindent"
 143   | EatSpace2 -> "EatSpace"
 144
 145 let simple_print_all_tokens1 l =
 146   List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l;
 147   Printf.printf "\n"
 148
 149 let simple_print_all_tokens2 l =
 150   List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
 151   Printf.printf "\n"
 152
 153 let str_of_token3 = function
 154   | T3 t -> TH.str_of_tok t
 155   | Cocci3 s | C3 s -> s
 156
 157
 158
 159 let mk_token_extended x =
 160   let origidx =
 161     match x with
 162     | T2 (_,_, idx) -> idx
 163     | _ -> None
 164   in
 165   { tok2 = x;
 166     str = str_of_token2 x;
 167     idx = origidx;
 168     new_tokens_before = [];
 169     remove = false;
 170   }
 171
 172 let rebuild_tokens_extented toks_ext =
 173   let _tokens = ref [] in
 174   toks_ext +> List.iter (fun tok ->
 175     tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
 176     if not tok.remove then push2 tok.tok2 _tokens;
 177   );
 178   let tokens = List.rev !_tokens in
 179   (tokens +> List.map mk_token_extended)
 180
 181
 182 let mcode_contain_plus = function
 183   | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
 184   | Ast_cocci.CONTEXT _ -> true
 185 (* patch: when need full coccinelle transformation *)
 186   | Ast_cocci.MINUS (_,_,_,Ast_cocci.NOREPLACEMENT) -> false
 187   | Ast_cocci.MINUS (_,_,_,Ast_cocci.REPLACEMENT _) -> true(*REPL is not empty*)
 188   | Ast_cocci.PLUS _ -> raise Impossible
 189
 190 let contain_plus info =
 191   let mck = Ast_c.mcode_of_info info in
 192   mcode_contain_plus mck
 193
 194 (*****************************************************************************)
 195 (* Last fix on the ast *)
 196 (*****************************************************************************)
 197
 198 (* Because of the ugly trick to handle initialiser, I generate fake ','
 199  * for the last initializer element, but if there is nothing around it,
 200  * I don't want in the end to print it.
 201  *)
 202
 203 let remove_useless_fakeInfo_struct program =
 204   let bigf = { Visitor_c.default_visitor_c_s with
 205     Visitor_c.kini_s = (fun (k,bigf) ini ->
 206       match k ini with
 207       | InitList args, ii ->
 208           (match ii with
 209           | [_i1;_i2] -> ini
 210           | [i1;i2;iicommaopt] ->
 211               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 212                  && (Ast_c.is_fake iicommaopt)
 213                  (* sometimes the guy put a normal iicommaopt *)
 214               then InitList args, [i1;i2]
 215               else InitList args, [i1;i2;iicommaopt]
 216           | [i1;i2;iicommaopt;end_comma_opt] ->
 217               (* only in #define. end_comma_opt canot be fake *)
 218               (* not sure if this will be considered ambiguous with a previous
 219                  case? *)
 220               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 221                  && (Ast_c.is_fake iicommaopt)
 222                  (* sometimes the guy put a normal iicommaopt *)
 223               then InitList args, [i1;i2;end_comma_opt]
 224               else InitList args, [i1;i2;iicommaopt;end_comma_opt]
 225           | _ -> raise Impossible
 226           )
 227       | x -> x
 228     )
 229   } in
 230   Visitor_c.vk_toplevel_s bigf program
 231
 232
 233 (*****************************************************************************)
 234 (* Tokens1 generation *)
 235 (*****************************************************************************)
 236
 237 let get_fakeInfo_and_tokens celem toks =
 238   let toks_in  = ref toks in
 239   let toks_out = ref [] in
 240
 241   (* todo? verify good order of position ? *)
 242   let pr_elem info =
 243     match Ast_c.pinfo_of_info info with
 244     | FakeTok _ ->
 245         Common.push2 (Fake1 info) toks_out
 246     | OriginTok _ | ExpandedTok _ ->
 247         (* get the associated comments/space/cppcomment tokens *)
 248         let (before, x, after) =
 249           !toks_in +> Common.split_when (fun tok ->
 250             info =*= TH.info_of_tok tok)
 251         in
 252         assert(info =*= TH.info_of_tok x);
 253         (*old: assert(before +> List.for_all (TH.is_comment)); *)
 254         before +> List.iter (fun x ->
 255           if not (TH.is_comment x)
 256           then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
 257           (* case such as  int asm d3("x"); not yet in ast *)
 258         );
 259         before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
 260         push2 (T1 x) toks_out;
 261         toks_in := after;
 262     | AbstractLineTok _ ->
 263         (* can be called on type info when for instance use -type_c *)
 264         if !Flag_parsing_c.pretty_print_type_info
 265         then Common.push2 (Fake1 info) toks_out
 266         else raise Impossible (* at this stage *)
 267   in
 268
 269   let pr_space _ = () in (* use the spacing that is there already *)
 270
 271   Pretty_print_c.pp_program_gen pr_elem pr_space celem;
 272
 273   if not (null !toks_in)
 274   then failwith "WEIRD: unparsing not finished";
 275
 276   List.rev !toks_out
 277
 278 (* Fake nodes that have BEFORE code or are - should be moved over any subsequent
 279 whitespace and newlines, but not any comments, to get as close to the affected
 280 code as possible.  Similarly, fake nodes that have AFTER code should be moved
 281 backwards.  No fake nodes should have both before and after code. *)
 282
 283 let displace_fake_nodes toks =
 284   let is_fake = function Fake1 _ -> true | _ -> false in
 285   let is_whitespace = function
 286       T1(Parser_c.TCommentSpace _)
 287       (* patch: cocci    *)
 288     | T1(Parser_c.TCommentNewline _) -> true
 289     | _ -> false in
 290   let rec loop toks =
 291     let fake_info =
 292       try Some (Common.split_when is_fake toks)
 293       with Not_found -> None in
 294     match fake_info with
 295       Some(bef,((Fake1 info) as fake),aft) ->
 296         (match !(info.cocci_tag) with
 297         | Some x ->
 298           (match x with
 299             (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_)
 300           | (Ast_cocci.MINUS(_,_,_,Ast_cocci.REPLACEMENT _),_) ->
 301             (* move the fake node forwards *)
 302               let (whitespace,rest) = Common.span is_whitespace aft in
 303               bef @ whitespace @ fake :: (loop rest)
 304           | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
 305             (* move the fake node backwards *)
 306               let revbef = List.rev bef in
 307               let (revwhitespace,revprev) = Common.span is_whitespace revbef in
 308               let whitespace = List.rev revwhitespace in
 309               let prev = List.rev revprev in
 310               prev @ fake :: (loop (whitespace @ aft))
 311           | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
 312               bef @ fake :: (loop aft)
 313           | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
 314               failwith "fake node should not be before-after"
 315           | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
 316         )
 317         | None ->
 318             bef @ fake :: (loop aft)
 319         )
 320     | None -> toks
 321     | _ -> raise Impossible in
 322   loop toks
 323
 324 (*****************************************************************************)
 325 (* Tokens2 generation *)
 326 (*****************************************************************************)
 327
 328 let comment2t2 = function
 329     (Token_c.TCommentCpp
 330        (* not sure iif the following list is exhaustive or complete *)
 331        (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
 332      (info : Token_c.info)) ->
 333       C2(info.Common.str)
 334   | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
 335       C2("\n"^info.Common.str^"\n")
 336   | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
 337
 338 let expand_mcode toks =
 339   let toks_out = ref [] in
 340
 341   let index = ref 0 in
 342
 343   let add_elem t minus =
 344     match t with
 345     | Fake1 info ->
 346         let str = Ast_c.str_of_info info in
 347         if str =$= ""
 348         then push2 (Fake2 minus) toks_out
 349         (* fx the fake "," at the end of a structure or enum.
 350            no idea what other fake info there can be... *)
 351         else push2 (Comma str) toks_out
 352
 353
 354     | T1 tok ->
 355         (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
 356         (* no tag on expandedTok ! *)
 357         let modified = function
 358             None -> false
 359           | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
 360           | _ -> true in
 361         (if (TH.is_expanded tok &&
 362              modified !((TH.info_of_tok tok).cocci_tag)
 363             (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
 364         then
 365           failwith
 366             (Printf.sprintf
 367                "expanded token %s on line %d is either modified or stored in a metavariable"
 368                (TH.str_of_tok tok) (TH.line_of_tok tok)));
 369
 370         let tok' = tok +> TH.visitor_info_of_tok (fun i ->
 371           { i with cocci_tag = ref Ast_c.emptyAnnot; }
 372         ) in
 373
 374         let optindex =
 375           if TH.is_origin tok && not (TH.is_real_comment tok)
 376           then begin
 377               incr index;
 378               Some !index
 379           end
 380           else None
 381         in
 382
 383         push2 (T2 (tok', minus, optindex)) toks_out
 384   in
 385
 386   let expand_info t =
 387     let (mcode,env) =
 388       Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
 389
 390     let pr_cocci s ln col rcol hint =
 391       push2 (Cocci2(s,ln,col,rcol,hint)) toks_out  in
 392     let pr_c info =
 393       (match Ast_c.pinfo_of_info info with
 394         Ast_c.AbstractLineTok _ ->
 395           push2 (C2 (Ast_c.str_of_info info)) toks_out
 396       | Ast_c.FakeTok (s,_) ->
 397           push2 (C2 s) toks_out
 398       | _ ->
 399           Printf.printf "line: %s\n" (Common.dump info);
 400           failwith "not an abstract line");
 401       (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
 402       List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
 403
 404     let pr_barrier ln col = (* marks a position, used around C code *)
 405       push2 (Cocci2("",ln,col,col,None)) toks_out  in
 406     let pr_nobarrier ln col = () in (* not needed for linux spacing *)
 407
 408     let pr_cspace _ = push2 (C2 " ") toks_out in
 409
 410     let pr_space _ = () (* rely on add_space in cocci code *) in
 411     let pr_arity _ = () (* not interested *) in
 412
 413     let indent _   = push2 Indent_cocci2 toks_out in
 414     let unindent x = push2 (Unindent_cocci2 x) toks_out in
 415     let eat_space _   = push2 EatSpace2 toks_out in
 416
 417     let args_pp =
 418       (env, pr_cocci, pr_c, pr_cspace,
 419        (match !Flag_parsing_c.spacing with
 420          Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
 421        pr_arity,
 422        (match !Flag_parsing_c.spacing with
 423          Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
 424        indent, unindent, eat_space) in
 425
 426     (* old: when for yacfe with partial cocci:
 427      *    add_elem t false;
 428     *)
 429
 430     (* patch: when need full coccinelle transformation *)
 431     let unparser = Unparse_cocci.pp_list_list_any args_pp false in
 432     match mcode with
 433     | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
 434         (* Why adding ? because I want to have all the information, the whole
 435          * set of tokens, so I can then process and remove the
 436          * is_between_two_minus for instance *)
 437         add_elem t (Min (inst,adj));
 438         (match any_xxs with
 439           Ast_cocci.NOREPLACEMENT -> ()
 440         | Ast_cocci.REPLACEMENT(any_xxs,_) ->
 441             unparser any_xxs Unparse_cocci.InPlace)
 442     | Ast_cocci.CONTEXT (_,any_befaft) ->
 443         (match any_befaft with
 444         | Ast_cocci.NOTHING ->
 445             add_elem t Ctx
 446         | Ast_cocci.BEFORE (xxs,_) ->
 447             unparser xxs Unparse_cocci.Before;
 448             add_elem t Ctx
 449         | Ast_cocci.AFTER (xxs,_) ->
 450             add_elem t Ctx;
 451             unparser xxs Unparse_cocci.After;
 452         | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
 453             unparser xxs Unparse_cocci.Before;
 454             add_elem t Ctx;
 455             unparser yys Unparse_cocci.After;
 456         )
 457     | Ast_cocci.PLUS _ -> raise Impossible
 458
 459   in
 460
 461   toks +> List.iter expand_info;
 462   List.rev !toks_out
 463
 464
 465 (*****************************************************************************)
 466 (* Tokens2 processing, filtering, adjusting *)
 467 (*****************************************************************************)
 468
 469 let is_space = function
 470   | T2(Parser_c.TCommentSpace _,_b,_i) -> true  (* only whitespace *)
 471   | _ -> false
 472
 473 let is_newline = function
 474   | T2(Parser_c.TCommentNewline _,_b,_i) -> true
 475   | _ -> false
 476
 477 let is_whitespace = function
 478   | (T2 (t,_b,_i)) ->
 479       (match t with
 480       | Parser_c.TCommentSpace _ -> true  (* only whitespace *)
 481       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 482       | _ -> false
 483       )
 484   | _ -> false
 485
 486 let is_minusable_comment = function
 487   | (T2 (t,_b,_i)) ->
 488       (match t with
 489       | Parser_c.TCommentSpace _   (* only whitespace *)
 490       (* patch: coccinelle *)
 491       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 492       | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
 493       | Parser_c.TComment _
 494       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 495       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 496       | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
 497         -> true
 498
 499       | Parser_c.TCommentMisc _
 500       | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
 501         -> false
 502
 503       | _ -> false
 504       )
 505   | _ -> false
 506
 507 let is_minusable_comment_nocpp = function
 508   | (T2 (t,_b,_i)) ->
 509       (match t with
 510       | Parser_c.TCommentSpace _   (* only whitespace *)
 511       (* patch: coccinelle *)
 512       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 513       | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
 514       | Parser_c.TComment _ -> true
 515       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 516       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 517       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
 518         -> false
 519
 520       | Parser_c.TCommentMisc _
 521       | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
 522         -> false
 523
 524       | _ -> false
 525       )
 526   | _ -> false
 527
 528 let all_coccis = function
 529     Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _
 530   | EatSpace2 -> true
 531   | _ -> false
 532
 533 (*previously gave up if the first character was a newline, but not clear why*)
 534 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
 535
 536 let set_minus_comment adj = function
 537   | T2 (t,Ctx,idx) ->
 538       let str = TH.str_of_tok t in
 539       (match t with
 540       | Parser_c.TCommentSpace _
 541 (* patch: coccinelle *)
 542       | Parser_c.TCommentNewline _ -> ()
 543
 544       | Parser_c.TComment _
 545       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 546       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 547       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
 548         ->
 549           pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
 550                  (TH.line_of_tok t) str)
 551       | _ -> raise Impossible
 552       );
 553       T2 (t, Min adj, idx)
 554 (* patch: coccinelle *)
 555   | T2 (t,Min adj,idx) as x -> x
 556   | Fake2 _ as x -> x
 557   | _ -> raise Impossible
 558
 559 let set_minus_comment_or_plus adj = function
 560     Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _
 561   | EatSpace2 as x -> x
 562   | x -> set_minus_comment adj x
 563
 564 let drop_minus xs =
 565   xs +> Common.exclude (function
 566     | T2 (t,Min adj,_) -> true
 567     | _ -> false
 568   )
 569
 570 let drop_expanded xs =
 571   xs +> Common.exclude (function
 572     | T2 (t,_,_) when TH.is_expanded t -> true
 573     | _ -> false
 574   )
 575
 576 let drop_fake xs =
 577   xs +> Common.exclude (function
 578     | Fake2 _ -> true
 579     | _ -> false
 580   )
 581
 582 let remove_minus_and_between_and_expanded_and_fake xs =
 583
 584   (* get rid of expanded tok *)
 585   let xs = drop_expanded xs in
 586
 587   let minus_or_comment = function
 588       T2(_,Min adj,_) -> true
 589     | x -> is_minusable_comment x in
 590
 591   let minus_or_comment_nocpp = function
 592       T2(_,Min adj,_) -> true
 593     | x -> is_minusable_comment_nocpp x in
 594
 595   let common_adj (index1,adj1) (index2,adj2) =
 596     let same_adj = (* same adjacency info *)
 597       match (adj1,adj2) with
 598         (Ast_cocci.ADJ adj1,Ast_cocci.ADJ adj2) -> adj1 = adj2
 599       | (Ast_cocci.ALLMINUS,_) | (_,Ast_cocci.ALLMINUS) -> true in
 600     same_adj &&
 601     (* non-empty intersection of witness trees *)
 602     not ((Common.inter_set index1 index2) = []) in
 603
 604   (* new idea: collects regions not containing non-space context code
 605      if two adjacent adjacent minus tokens satisfy common_adj then delete
 606      all spaces, comments etc between them
 607      if two adjacent minus tokens do not satisfy common_adj only delete
 608      the spaces between them if there are no comments, etc.
 609      if the region contain no plus code and is both preceded and followed
 610      by a newline, delete the initial newline. *)
 611
 612   let rec adjust_around_minus = function
 613       [] -> []
 614     | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
 615       ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
 616         let (minus_list,rest) = Common.span not_context (t1::xs) in
 617         let contains_plus = List.exists is_plus minus_list in
 618         let x =
 619           match List.rev minus_list with
 620             (T2(Parser_c.TCommentNewline c,_b,_i))::rest
 621             when List.for_all minus_or_comment minus_list ->
 622               set_minus_comment_or_plus adj1 x
 623           | _ -> x in
 624         x :: adjust_within_minus contains_plus minus_list @
 625         adjust_around_minus rest
 626     | ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
 627         let (minus_list,rest) = Common.span not_context (t1::xs) in
 628         let contains_plus = List.exists is_plus minus_list in
 629         adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
 630     | x::xs ->
 631         x :: adjust_around_minus xs
 632   and adjust_within_minus cp (* contains plus *) = function
 633       ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
 634         let not_minus = function T2(_,Min _,_) -> false | _ -> true in
 635         let (not_minus_list,rest) = Common.span not_minus xs in
 636         t1 ::
 637         (match rest with
 638           ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs
 639           when common_adj adj1 adj2 ->
 640             (List.map (set_minus_comment_or_plus adj1) not_minus_list)
 641             @ (adjust_within_minus cp (t2::xs))
 642         | ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs ->
 643             if not cp && List.for_all is_whitespace not_minus_list
 644             then
 645               (List.map (set_minus_comment_or_plus adj1) not_minus_list)
 646               @ (adjust_within_minus cp (t2::xs))
 647             else
 648               not_minus_list @ (adjust_within_minus cp (t2::xs))
 649         | _ ->
 650             if cp
 651             then xs
 652             else
 653               (* remove spaces after removed stuff, eg a comma after a
 654                  function argument *)
 655               let (spaces,rest) = Common.span is_space xs in
 656               (List.map (set_minus_comment_or_plus adj1) spaces)
 657               @ rest)
 658     | xs -> failwith "should always start with minus"
 659   and not_context = function
 660       (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
 661     | _ -> true
 662   and is_plus = function
 663       C2 _ | Comma _ | Cocci2 _ -> true
 664     | _ -> false in
 665
 666   let xs = adjust_around_minus xs in
 667
 668   (* get rid of fake tok *)
 669   let xs = drop_fake xs in
 670
 671   (* this drops blank lines after a brace introduced by removing code *)
 672   let minus_or_comment_nonl = function
 673       T2(_,Min adj,_) -> true
 674     | T2(Parser_c.TCommentNewline _,_b,_i) -> false
 675     | x -> is_minusable_comment x in
 676
 677   let rec adjust_after_brace = function
 678       [] -> []
 679     | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
 680        when str_of_token2 x =$= "{" ->
 681          let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
 682          let is_whitespace = function
 683              T2(Parser_c.TCommentSpace _,_b,_i)
 684              (* patch: cocci    *)
 685            | T2(Parser_c.TCommentNewline _,_b,_i) -> true
 686            | _ -> false in
 687          let (newlines,rest) = Common.span is_whitespace rest in
 688          let (drop_newlines,last_newline) =
 689            let rec loop = function
 690                [] -> ([],[])
 691              | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
 692                  (List.rev rest,[x])
 693              | x::xs ->
 694                  let (drop_newlines,last_newline) = loop xs in
 695                  (drop_newlines,x::last_newline) in
 696            loop (List.rev newlines) in
 697          x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
 698          last_newline@
 699          adjust_after_brace rest
 700     | x::xs -> x::adjust_after_brace xs in
 701
 702   let xs = adjust_after_brace xs in
 703
 704   (* search backwards from context } over spaces until reaching a newline.
 705      then go back over all minus code until reaching some context or + code.
 706      get rid of all intervening spaces, newlines, and comments
 707      input is reversed *)
 708   let rec adjust_before_brace = function
 709       [] -> []
 710     | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
 711         let (outer_spaces,rest) = Common.span is_space xs in
 712         x :: outer_spaces @
 713         (match rest with
 714           ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
 715           (* the rest of this code is the same as from_newline below
 716              but merging them seems to be error prone... *)
 717           ((T2 (t, Min adj, idx)) as m) :: rest ->
 718             let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 719             h :: m ::
 720             (List.map (set_minus_comment adj) spaces) @
 721             (adjust_before_brace rest)
 722         | _ -> adjust_before_brace rest)
 723     | x::xs -> x :: (adjust_before_brace xs) in
 724
 725   let from_newline = function
 726       ((T2 (t, Min adj, idx)) as m) :: rest ->
 727         let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 728         m ::
 729         (List.map (set_minus_comment adj) spaces) @
 730         (adjust_before_brace rest)
 731     | ((T2 (t0, Ctx, idx0)) as m0) :: ((T2 (t, Min adj, idx)) as m) :: rest
 732         when TH.str_of_tok t0 = "" ->
 733           (* This is for the case of a #define that is completely deleted,
 734              because a #define has a strange EOL token at the end.
 735              We hope there i no other kind of token that is represented by
 736              "", but it seems like changing the kind of token might break
 737              the end of entity recognition in the C parser.
 738              See parsing_hacks.ml *)
 739           let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 740           m0 :: m ::
 741           (List.map (set_minus_comment adj) spaces) @
 742           (adjust_before_brace rest)
 743     | rest -> adjust_before_brace rest in
 744
 745   let xs = List.rev (from_newline (List.rev xs)) in
 746   let xs = drop_minus xs in
 747   xs
 748
 749 (* things that should not be followed by space - boundary between SmPL
 750    code and C code *)
 751 let adjust_eat_space toks =
 752   let rec loop = function
 753       [] -> []
 754     | EatSpace2 :: x :: rest when is_space x -> loop rest
 755     | EatSpace2 :: rest -> loop rest
 756     | x :: xs -> x :: loop xs in
 757   loop toks
 758
 759 (* normally, in C code, a semicolon is not preceded by a space or newline *)
 760 let adjust_before_semicolon toks =
 761   let toks = List.rev toks in
 762   let rec search_semic = function
 763       [] -> []
 764     | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
 765         if List.mem (str_of_token2 x) [";";")";","]
 766         then x :: search_semic (search_minus false xs)
 767         else x :: search_semic xs
 768     | x::xs -> x :: search_semic xs
 769   and search_minus seen_minus xs =
 770     let (spaces, rest) = Common.span is_space xs in
 771     (* only delete spaces if something is actually deleted *)
 772     match rest with
 773       ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
 774     | _ -> if seen_minus then rest else xs in
 775   List.rev (search_semic toks)
 776
 777 (* normally, in C code, a ( is not followed by a space or newline *)
 778 let adjust_after_paren toks =
 779   let rec search_paren = function
 780       [] -> []
 781     | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
 782         if List.mem (str_of_token2 x) ["("] (* other things? *)
 783         then x :: search_paren(search_minus false xs)
 784         else x :: search_paren xs
 785     | x::xs -> x :: search_paren xs
 786   and search_minus seen_minus xs =
 787     let (spaces, rest) = Common.span is_whitespace xs in
 788     (* only delete spaces if something is actually deleted *)
 789     match rest with
 790       ((T2(_,Min _,_)) as a)::rerest -> (* minus *)
 791         a :: search_minus true rerest
 792     | ((T2(_,Ctx,_)) as a)::rerest when seen_minus && str_of_token2 a = "," ->
 793         (* comma after ( will be deleted, so consider it as minus code
 794            already *)
 795         a :: search_minus true rerest
 796     | _ -> if seen_minus then rest else xs in (* drop trailing space *)
 797   search_paren toks
 798
 799 (* this is for the case where braces are added around an if branch *)
 800 let paren_then_brace toks =
 801   let rec search_paren = function
 802       [] -> []
 803     | ((T2(_,Ctx,_)) as x)::xs ->
 804         if List.mem (str_of_token2 x) [")"]
 805         then x :: search_paren(search_plus xs)
 806         else x :: search_paren xs
 807     | x::xs -> x :: search_paren xs
 808   and search_plus xs =
 809     let (spaces, rest) = Common.span is_whitespace xs in
 810     match rest with
 811       (* move the brace up to the previous line *)
 812       ((Cocci2("{",_,_,_,_)) as x) :: (((Cocci2 _) :: _) as rest) ->
 813         (C2 " ") :: x :: spaces @ rest
 814     | _ -> xs in
 815   search_paren toks
 816
 817 let is_ident_like s = s ==~ Common.regexp_alpha
 818
 819 let rec drop_space_at_endline = function
 820     [] -> []
 821   | [x] -> [x]
 822   | (C2 " ") ::
 823     ((((T2(Parser_c.TCommentSpace _,Ctx,_)) | Cocci2("\n",_,_,_,_) |
 824     (T2(Parser_c.TCommentNewline _,Ctx,_))) :: _) as rest) ->
 825       (* when unparse_cocci doesn't know whether space is needed *)
 826       drop_space_at_endline rest
 827   | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest ->
 828       let (outer_spaces,rest) = Common.span is_space rest in
 829       let minus_or_comment_or_space_nocpp = function
 830           T2(_,Min adj,_) -> true
 831         | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true
 832         | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false
 833         | x -> false in
 834       let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in
 835       let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
 836       if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus
 837       then
 838         match rest with
 839           ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest ->
 840             (* drop trailing spaces *)
 841             minus@a::(drop_space_at_endline rest)
 842         | _ -> fail()
 843       else fail()
 844   | a :: rest ->
 845       a :: drop_space_at_endline rest
 846
 847 (* if a removed ( is between two tokens, then add a space *)
 848 let rec paren_to_space = function
 849     [] -> []
 850   | [x] -> [x]
 851   | [x;y] -> [x;y]
 852   | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest
 853     when not (is_whitespace a) && TH.str_of_tok t = "(" ->
 854       a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
 855   | a :: rest -> a :: (paren_to_space rest)
 856
 857 let rec add_space xs =
 858   match xs with
 859   | [] -> []
 860   | [x] -> [x]
 861   | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
 862     when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
 863       not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
 864         (* this only works within a line.  could consider whether
 865            something should be done to add newlines too, rather than
 866            printing them explicitly in unparse_cocci. *)
 867         x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
 868   | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
 869     when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
 870       not (lnx = -1) && lnx < lny && not (rcolx = -1) ->
 871         (* this only works within a line.  could consider whether
 872            something should be done to add newlines too, rather than
 873            printing them explicitly in unparse_cocci. *)
 874         x::C2 (String.make (lny-lnx) '\n')::
 875         C2 (String.make (lcoly-1) ' '):: (* -1 is for the + *)
 876         add_space (y::xs)
 877   | ((T2(_,Ctx,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *)
 878       let sx = str_of_token2 x in
 879       let sy = str_of_token2 y in
 880       if is_ident_like sx && (is_ident_like sy or List.mem sy ["="])
 881       then x::C2 " "::(add_space (y::xs))
 882       else x::(add_space (y::xs))
 883   | x::y::xs -> (* not boundary, not sure if it is possible *)
 884       let sx = str_of_token2 x in
 885       let sy = str_of_token2 y in
 886       if is_ident_like sx && is_ident_like sy
 887       then x::C2 " "::(add_space (y::xs))
 888       else x::(add_space (y::xs))
 889
 890 (* A fake comma is added at the end of an unordered initlist or a enum
 891 decl, if the initlist or enum doesn't already end in a comma.  This is only
 892 needed if there is + code, ie if we see Cocci after it in the code sequence *)
 893
 894 let rec drop_end_comma = function
 895     [] -> []
 896   | [x] -> [x]
 897   | ((Comma ",") as x) :: rest ->
 898       let (newlines,rest2) = Common.span is_whitespace rest in
 899       (match rest2 with
 900         (Cocci2 _) :: _ -> x :: drop_end_comma rest
 901       | _ -> drop_end_comma rest)
 902   | x :: xs -> x :: drop_end_comma xs
 903
 904 (* The following only works for the outermost function call.  Stack records
 905 the column of all open parentheses.  Space_cell contains the most recent
 906 comma in the outermost function call.  The goal is to decide whether this
 907 should be followed by a space or a newline and indent. *)
 908 let add_newlines toks tabbing_unit =
 909   let create_indent n =
 910     let (tu,tlen) =
 911       match tabbing_unit with
 912         Some ("\t",_) -> ("\t",8)
 913       | Some ("",_) -> ("\t",8) (* not sure why... *)
 914       | Some (s,_) -> (s,String.length s) (* assuming only spaces *)
 915       | None -> ("\t",8) in
 916     let rec loop seen =
 917       if seen + tlen <= n
 918       then tu ^ loop (seen + tlen)
 919       else String.make (n-seen) ' ' in
 920     loop 0 in
 921   let check_for_newline count x = function
 922       Some (start,space_cell) when count > Flag_parsing_c.max_width ->
 923         space_cell := "\n"^(create_indent x);
 924         Some (x + (count - start))
 925     | _ -> None in
 926   (* the following is for strings that may contain newline *)
 927   let string_length s count =
 928     let l = list_of_string s in
 929     List.fold_left
 930       (function count ->
 931         function
 932             '\t' -> count + 8
 933           | '\n' -> 0
 934           | c -> count + 1)
 935       count l in
 936   let rec loop info count = function
 937       [] -> []
 938     | ((T2(tok,_,_)) as a)::xs ->
 939         a :: loop info (string_length (TH.str_of_tok tok) count) xs
 940     | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
 941         let (stack,space_cell) = info in
 942         let rest =
 943           match hint with
 944             None -> loop info (string_length s count) xs
 945           | Some Unparse_cocci.StartBox ->
 946               let count = string_length s count in
 947               loop (count::stack,space_cell) count xs
 948           | Some Unparse_cocci.EndBox ->
 949               let count = string_length s count in
 950               (match stack with
 951                 [x] ->
 952                   (match check_for_newline count x space_cell with
 953                     Some count -> loop ([],None) count xs
 954                   | None -> loop ([],None) count xs)
 955               | _ -> loop (List.tl stack,space_cell) count xs)
 956           | Some (Unparse_cocci.SpaceOrNewline sp) ->
 957               let count = string_length s (count + 1 (*space*)) in
 958               (match stack with
 959                 [x] ->
 960                   (match check_for_newline count x space_cell with
 961                     Some count -> loop (stack,Some (x,sp)) count xs
 962                   | None -> loop (stack,Some (count,sp)) count xs)
 963               | _ -> loop info count xs) in
 964         a :: rest
 965     | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
 966     | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
 967     | Fake2 _ :: _ | Indent_cocci2 :: _
 968     | Unindent_cocci2 _::_ | EatSpace2::_ ->
 969         failwith "unexpected fake, indent, unindent, or eatspace" in
 970   let redo_spaces prev = function
 971       Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
 972         C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
 973     | t -> t::prev in
 974   (match !Flag_parsing_c.spacing with
 975     Flag_parsing_c.SMPL -> toks
 976   | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)))
 977
 978 (* When insert some new code, because of a + in a SP, we must add this
 979  * code at the right place, with the good indentation. So each time we
 980  * encounter some spacing info, with some newline, we maintain the
 981  * current indentation level used.
 982  *
 983  * TODO problems: not accurate. ex: TODO
 984  *
 985  * TODO: if in #define region, should add a \ \n
 986  *)
 987 let new_tabbing2 space =
 988   (list_of_string space)
 989     +> List.rev
 990     +> Common.take_until (fun c -> c =<= '\n')
 991     +> List.rev
 992     +> List.map string_of_char
 993     +> String.concat ""
 994
 995 let new_tabbing a =
 996   Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
 997
 998
 999 let rec adjust_indentation xs =
1000
1001   let _current_tabbing = ref ([] : string list) in
1002   let tabbing_unit = ref None in
1003
1004   let string_of_list l = String.concat "" (List.map string_of_char l) in
1005
1006   (* try to pick a tabbing unit for the plus code *)
1007   let adjust_tabbing_unit old_tab new_tab =
1008     if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
1009     then
1010       let old_tab = list_of_string old_tab in
1011       let new_tab = list_of_string new_tab in
1012       let rec loop = function
1013           ([],new_tab) ->
1014             tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
1015         | (_,[]) -> failwith "not possible"
1016         | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
1017       loop (old_tab,new_tab) in
1018
1019 (*
1020   let remtab tu current_tab =
1021     let current_tab = List.rev(list_of_string current_tab) in
1022     let rec loop = function
1023         ([],new_tab) -> string_of_list (List.rev new_tab)
1024       | (_,[]) -> (-*weird; tabbing unit used up more than the current tab*-)
1025         ""
1026       | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
1027       | (_,ns) -> (-* mismatch; remove what we can *-)
1028           string_of_list (List.rev ns) in
1029     loop (tu,current_tab) in
1030 *)
1031
1032   let rec find_first_tab started = function
1033       [] -> ()
1034     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
1035         find_first_tab true xs
1036 (* patch: coccinelle *)
1037     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
1038       when started ->
1039         let s = str_of_token2 x +> new_tabbing in
1040         tabbing_unit := Some (s,List.rev (list_of_string s))
1041     | x::xs -> find_first_tab started xs in
1042   find_first_tab false xs;
1043
1044   let rec balanced ct = function
1045       [] -> ct >= 0
1046     | ((T2(tok,_,_)) as x)::xs ->
1047         (match str_of_token2 x with
1048           "(" -> balanced (ct+1) xs
1049         | ")" -> balanced (ct-1) xs
1050         | _ -> balanced ct xs)
1051     | x::xs -> balanced ct xs in
1052
1053   let update_tabbing started s x =
1054     let old_tabbing = !_current_tabbing in
1055     str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := [s]);
1056     (* only trust the indentation after the first { *)
1057     if started
1058     then
1059       adjust_tabbing_unit
1060         (String.concat "" old_tabbing)
1061         (String.concat "" !_current_tabbing) in
1062
1063   let rec aux started xs =
1064     match xs with
1065     | [] ->  []
1066 (* patch: coccinelle *)
1067     | ((T2 (Parser_c.TCommentNewline s,_,_)) as x)::
1068       Unindent_cocci2(false)::xs ->
1069         update_tabbing started s x;
1070         (C2 "\n")::aux started xs
1071     | (Cocci2("\n",_,_,_,_))::Unindent_cocci2(false)::xs ->
1072         (C2 "\n")::aux started xs
1073     | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
1074       ((Cocci2 ("{",_,_,_,_)) as a)::xs
1075       when started && str_of_token2 x =$= ")" ->
1076         (* to be done for if, etc, but not for a function header *)
1077         x::(C2 " ")::a::(aux started xs)
1078     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs
1079       when
1080         balanced 0 (fst(Common.span (function x -> not(is_newline x)) xs)) ->
1081         update_tabbing started s x;
1082         let coccis_rest = Common.span all_coccis xs in
1083         (match coccis_rest with
1084           (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
1085             (* the case where cocci code has been added before a close } *)
1086             x::aux started (Indent_cocci2::xs)
1087         | _ -> x::aux started xs)
1088     | Indent_cocci2::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
1089       when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL ->
1090         let tu = String.make (lcoly-1) ' ' in
1091         _current_tabbing := tu::(!_current_tabbing);
1092         C2 (tu)::aux started (y::xs)
1093     | Indent_cocci2::xs ->
1094         (match !tabbing_unit with
1095           None -> aux started xs
1096         | Some (tu,_) ->
1097             _current_tabbing := tu::(!_current_tabbing);
1098              (* can't be C2, for later phases *)
1099              Cocci2 (tu,-1,-1,-1,None)::aux started xs)
1100     | Unindent_cocci2(permanent)::xs ->
1101         (match !_current_tabbing with
1102           [] -> aux started xs
1103         | _::new_tabbing ->
1104             let s = String.concat "" new_tabbing in
1105             _current_tabbing := new_tabbing;
1106             Cocci2 (s,-1,-1,-1,None)::aux started xs)
1107     (* border between existing code and cocci code *)
1108     | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
1109       when str_of_token2 x =$= "{" ->
1110         x::aux true (y::Indent_cocci2::xs)
1111     | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
1112       when str_of_token2 y =$= "}" ->
1113         x::aux started (Unindent_cocci2 true::y::xs)
1114     (* starting the body of the function *)
1115     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->  x::aux true xs
1116     | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
1117     | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
1118             (* dont inline in expr because of weird eval order of ocaml *)
1119         let s = String.concat "" !_current_tabbing in
1120         (* can't be C2, for later phases *)
1121         x::Cocci2 (s,-1,-1,-1,None)::aux started xs
1122     | x::xs -> x::aux started xs in
1123   (aux false xs,!tabbing_unit)
1124
1125
1126 let rec find_paren_comma = function
1127   | [] -> ()
1128
1129   (* do nothing if was like this in original file *)
1130   | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
1131     ::xs when p2 =|= p1 + 1 ->
1132       find_paren_comma (x2::xs)
1133
1134   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
1135     ::xs when p2 =|= p1 + 1 ->
1136       find_paren_comma (x2::xs)
1137
1138   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
1139     ::xs when p2 =|= p1 + 1 ->
1140       find_paren_comma (x2::xs)
1141
1142   (* otherwise yes can adjust *)
1143   | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
1144       x2.remove <- true;
1145       find_paren_comma (x2::xs)
1146   | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
1147       x1.remove <- true;
1148       find_paren_comma (x2::xs)
1149
1150   | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
1151       x1.remove <- true;
1152       find_paren_comma (x2::xs)
1153
1154   | x::xs ->
1155       find_paren_comma xs
1156
1157
1158 let fix_tokens toks =
1159   let toks = toks +> List.map mk_token_extended in
1160
1161   let cleaner = toks +> Common.exclude (function
1162     | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
1163     | _ -> false
1164   ) in
1165   find_paren_comma cleaner;
1166
1167   let toks = rebuild_tokens_extented toks in
1168   toks +> List.map (fun x -> x.tok2)
1169
1170
1171
1172 (*****************************************************************************)
1173 (* Final unparsing (and debugging support) *)
1174 (*****************************************************************************)
1175
1176 (* for debugging *)
1177 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
1178
1179 let kind_of_token2 = function
1180   | Fake2 _ -> KFake
1181   | Cocci2 _ -> KCocci
1182   | C2 _ -> KC
1183   | Comma _ -> KC
1184   | T2 (t,_,_) ->
1185       (match TH.pinfo_of_tok t with
1186       | ExpandedTok _ -> KExpanded
1187       | OriginTok _ -> KOrigin
1188       | FakeTok _ -> raise Impossible (* now a Fake2 *)
1189       | AbstractLineTok _ -> raise Impossible (* now a KC *)
1190       )
1191   | Unindent_cocci2 _ | Indent_cocci2 | EatSpace2 -> raise Impossible
1192
1193 let end_mark = "!"
1194
1195 let start_mark = function
1196   | KFake -> "!F!"
1197   | KCocci -> "!S!"
1198   | KC -> "!A!"
1199   | KExpanded -> "!E!"
1200   | KOrigin -> ""
1201
1202 let print_all_tokens2 pr xs =
1203   if !Flag_parsing_c.debug_unparsing
1204   then
1205     let current_kind = ref KOrigin in
1206     xs +> List.iter (fun t ->
1207       let newkind = kind_of_token2 t in
1208       if newkind =*= !current_kind
1209       then pr (str_of_token2 t)
1210       else begin
1211         pr (end_mark);
1212         pr (start_mark newkind);
1213         pr (str_of_token2 t);
1214         current_kind := newkind
1215       end
1216     );
1217   else
1218     xs +> List.iter (fun x -> pr (str_of_token2 x))
1219
1220
1221
1222
1223 (*****************************************************************************)
1224 (* Entry points *)
1225 (*****************************************************************************)
1226
1227 (* old: PPviatok was made in the beginning to allow to pretty print a
1228  * complete C file, including a modified C file by transformation.ml,
1229  * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1230  * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1231  * fancy stuff when a function was not modified at all. Just need to
1232  * print the list of token as-is. But now pretty_print_c.ml handles
1233  * almost everything so maybe less useful. Maybe PPviatok allows to
1234  * optimize a little the pretty printing.
1235  *
1236  * update: now have PPviastr which goes even faster than PPviatok, so
1237  * PPviatok has disappeared.
1238  *)
1239
1240 type ppmethod = PPnormal | PPviastr
1241
1242
1243
1244
1245 (* The pp_program function will call pretty_print_c.ml with a special
1246  * function to print the leaf components, the tokens. When we want to
1247  * print a token, we need to print also maybe the space and comments that
1248  * were close to it in the original file (and that was omitted during the
1249  * parsing phase), and honor what the cocci-info attached to the token says.
1250  * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1251  * print it and also print some cocci-code attached in a PLUS to it.
1252  * So we will also maybe call unparse_cocci. Because the cocci-code may
1253  * contain metavariables, unparse_cocci will in fact sometimes call back
1254  * pretty_print_c (which will this time don't call back again unparse_cocci)
1255  *)
1256
1257 let pp_program2 xs outfile  =
1258   Common.with_open_outfile outfile (fun (pr,chan) ->
1259     let pr s =
1260       if !Flag_parsing_c.debug_unparsing
1261       then begin pr2_no_nl s; flush stderr end
1262       else pr s
1263         (* flush chan; *)
1264         (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1265     in
1266
1267     xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
1268       (* here can still work on ast *)
1269       let e = remove_useless_fakeInfo_struct e in
1270
1271       match ppmethod with
1272       | PPnormal ->
1273           (* now work on tokens *)
1274           (* phase1: just get all the tokens, all the information *)
1275           assert(toks_e +> List.for_all (fun t ->
1276             TH.is_origin t or TH.is_expanded t
1277           ));
1278           let toks = get_fakeInfo_and_tokens e toks_e in
1279           let toks = displace_fake_nodes toks in
1280           (* assert Origin;ExpandedTok;Faketok *)
1281           let toks = expand_mcode toks in
1282
1283           (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1284            * and no tag information, just NOTHING. *)
1285
1286           let toks =
1287             if !Flag.sgrep_mode2
1288             then
1289               (* nothing else to do for sgrep *)
1290               drop_expanded(drop_fake(drop_minus toks))
1291             else
1292               (* phase2: can now start to filter and adjust *)
1293                (let (toks,tu) = adjust_indentation toks in
1294               let toks = adjust_eat_space toks in
1295               let toks = adjust_before_semicolon toks in(*before remove minus*)
1296               let toks = adjust_after_paren toks in(*also before remove minus*)
1297               let toks = drop_space_at_endline toks in
1298               let toks = paren_to_space toks in
1299               let toks = drop_end_comma toks in
1300               let toks = remove_minus_and_between_and_expanded_and_fake toks in
1301               (* assert Origin + Cocci + C and no minus *)
1302               let toks = add_space toks in
1303               let toks = add_newlines toks tu in
1304               let toks = paren_then_brace toks in
1305               let toks = fix_tokens toks in
1306                toks) in
1307
1308           (* in theory here could reparse and rework the ast! or
1309            * apply some SP. Not before cos julia may have generated
1310            * not parsable file. Need do unparsing_tricks call before being
1311            * ready to reparse. *)
1312           print_all_tokens2 pr toks;
1313
1314       | PPviastr -> pr str
1315     )
1316   )
1317
1318 let pp_program a b =
1319   Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1320
1321
1322 let pp_program_default xs outfile =
1323   let xs' = xs +> List.map (fun x -> x, PPnormal) in
1324   pp_program xs' outfile