parsing_c/unparse_c.ml

   1 (* Yoann Padioleau, Julia Lawall
   2  *
   3  * Copyright (C) 2012, INRIA.
   4  * Copyright (C) 2010, 2011, University of Copenhagen DIKU and INRIA.
   5  * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
   6  *
   7  * This program is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License (GPL)
   9  * version 2 as published by the Free Software Foundation.
  10  *
  11  * This program is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * file license.txt for more details.
  15  *
  16  *
  17  * Modifications by Julia Lawall for better newline handling.
  18  *)
  19 open Common
  20
  21 open Ast_c
  22
  23 module TH = Token_helpers
  24
  25
  26 (* should keep comments and directives in between adjacent deleted terms,
  27 but not comments and directives within deleted terms.  should use the
  28 labels found in the control-flow graph *)
  29
  30
  31
  32 (*****************************************************************************)
  33 (* Wrappers *)
  34 (*****************************************************************************)
  35 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
  36
  37 (*****************************************************************************)
  38 (* Types used during the intermediate phases of the unparsing *)
  39 (*****************************************************************************)
  40
  41 type token1 =
  42   | Fake1 of info
  43   | T1 of Parser_c.token
  44
  45 (* The cocci_tag of the token should always be a NOTHING. The mark of
  46  * the token can only be OriginTok or ExpandedTok. Why not get rid of
  47  * token and get something simpler ? because we need to know if the
  48  * info is a TCommentCpp or TCommentSpace, etc for some of the further
  49  * analysis so easier to keep with the token.
  50  *
  51  * This type contains the whole information. Have all the tokens with this
  52  * type.
  53  *)
  54 type min =
  55     Min of (int list (* match numbers from witness trees *) *
  56               Ast_cocci.adjacency (* adjacency information *))
  57   | Ctx
  58
  59 type token2 =
  60   | T2 of Parser_c.token * min *
  61           int option (* orig index, abstracting away comments and space *)
  62   | Fake2 of min
  63   | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
  64         * Unparse_cocci.nlhint option
  65   | C2 of string
  66   | Comma of string
  67   | Indent_cocci2
  68   | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
  69   | EatSpace2
  70
  71 (* not used yet *)
  72 type token3 =
  73   | T3 of Parser_c.token
  74   | Cocci3 of string
  75   | C3 of string
  76
  77
  78 (* similar to the tech in parsing_hack *)
  79 type token_extended = {
  80   tok2 : token2;
  81   str  : string;
  82   idx: int option; (* to know if 2 tokens were consecutive in orig file *)
  83   mutable new_tokens_before : token2 list;
  84   mutable remove : bool;
  85 }
  86
  87
  88 (*****************************************************************************)
  89 (* Helpers *)
  90 (*****************************************************************************)
  91
  92 let info_of_token1 t =
  93   match t with
  94   | Fake1 info -> info
  95   | T1 tok -> TH.info_of_tok tok
  96
  97 let print_token1 = function
  98     T1 tok -> TH.str_of_tok tok
  99   | Fake1 info -> "fake"
 100
 101 let str_of_token2 = function
 102   | T2 (t,_,_) -> TH.str_of_tok t
 103   | Fake2 _ -> ""
 104   | Cocci2 (s,_,_,_,_) -> s
 105   | C2 s -> s
 106   | Comma s -> s
 107   | Indent_cocci2 -> ""
 108   | Unindent_cocci2 _ -> ""
 109   | EatSpace2 -> ""
 110
 111 let print_token2 = function
 112   | T2 (t,b,_) ->
 113       let t_str =
 114         match t with
 115         | Parser_c.TCommentSpace _ -> " sp "
 116         | Parser_c.TCommentNewline _ -> " nl "
 117         | Parser_c.TCommentCpp _ -> " cp "
 118         | Parser_c.TCommentMisc _ -> " misc "
 119         | Parser_c.TComment _ -> " comment "
 120         | _ -> "" in
 121       let b_str =
 122         match b with
 123           Min (index,adj) ->
 124             Printf.sprintf "-%d[%s]"
 125               (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
 126               (String.concat " " (List.map string_of_int index))
 127         | Ctx -> "" in
 128       "T2:"^b_str^t_str^TH.str_of_tok t
 129   | Fake2 b ->
 130       let b_str =
 131         match b with
 132           Min (index,adj) ->
 133             Printf.sprintf "-%d[%s]"
 134               (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
 135               (String.concat " " (List.map string_of_int index))
 136         | Ctx -> "" in
 137       b_str^"fake"
 138   | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
 139   | C2 s -> "C2:"^s
 140   | Comma s -> "Comma:"^s
 141   | Indent_cocci2 -> "Indent"
 142   | Unindent_cocci2 _ -> "Unindent"
 143   | EatSpace2 -> "EatSpace"
 144
 145 let simple_print_all_tokens1 l =
 146   List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l;
 147   Printf.printf "\n"
 148
 149 let simple_print_all_tokens2 l =
 150   List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
 151   Printf.printf "\n"
 152
 153 let str_of_token3 = function
 154   | T3 t -> TH.str_of_tok t
 155   | Cocci3 s | C3 s -> s
 156
 157
 158
 159 let mk_token_extended x =
 160   let origidx =
 161     match x with
 162     | T2 (_,_, idx) -> idx
 163     | _ -> None
 164   in
 165   { tok2 = x;
 166     str = str_of_token2 x;
 167     idx = origidx;
 168     new_tokens_before = [];
 169     remove = false;
 170   }
 171
 172 let rebuild_tokens_extented toks_ext =
 173   let _tokens = ref [] in
 174   toks_ext +> List.iter (fun tok ->
 175     tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
 176     if not tok.remove then push2 tok.tok2 _tokens;
 177   );
 178   let tokens = List.rev !_tokens in
 179   (tokens +> List.map mk_token_extended)
 180
 181
 182 let mcode_contain_plus = function
 183   | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
 184   | Ast_cocci.CONTEXT _ -> true
 185 (* patch: when need full coccinelle transformation *)
 186   | Ast_cocci.MINUS (_,_,_,Ast_cocci.NOREPLACEMENT) -> false
 187   | Ast_cocci.MINUS (_,_,_,Ast_cocci.REPLACEMENT _) -> true(*REPL is not empty*)
 188   | Ast_cocci.PLUS _ -> raise Impossible
 189
 190 let contain_plus info =
 191   let mck = Ast_c.mcode_of_info info in
 192   mcode_contain_plus mck
 193
 194 (*****************************************************************************)
 195 (* Last fix on the ast *)
 196 (*****************************************************************************)
 197
 198 (* Because of the ugly trick to handle initialiser, I generate fake ','
 199  * for the last initializer element, but if there is nothing around it,
 200  * I don't want in the end to print it.
 201  *)
 202
 203 let remove_useless_fakeInfo_struct program =
 204   let bigf = { Visitor_c.default_visitor_c_s with
 205     Visitor_c.kini_s = (fun (k,bigf) ini ->
 206       match k ini with
 207       | InitList args, ii ->
 208           (match ii with
 209           | [_i1;_i2] -> ini
 210           | [i1;i2;iicommaopt] ->
 211               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 212                  && (Ast_c.is_fake iicommaopt)
 213                  (* sometimes the guy put a normal iicommaopt *)
 214               then InitList args, [i1;i2]
 215               else InitList args, [i1;i2;iicommaopt]
 216           | [i1;i2;iicommaopt;end_comma_opt] ->
 217               (* only in #define. end_comma_opt canot be fake *)
 218               (* not sure if this will be considered ambiguous with a previous
 219                  case? *)
 220               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 221                  && (Ast_c.is_fake iicommaopt)
 222                  (* sometimes the guy put a normal iicommaopt *)
 223               then InitList args, [i1;i2;end_comma_opt]
 224               else InitList args, [i1;i2;iicommaopt;end_comma_opt]
 225           | _ -> raise Impossible
 226           )
 227       | x -> x
 228     )
 229   } in
 230   Visitor_c.vk_toplevel_s bigf program
 231
 232
 233 (*****************************************************************************)
 234 (* Tokens1 generation *)
 235 (*****************************************************************************)
 236
 237 let get_fakeInfo_and_tokens celem toks =
 238   let toks_in  = ref toks in
 239   let toks_out = ref [] in
 240
 241   (* todo? verify good order of position ? *)
 242   let pr_elem info =
 243     match Ast_c.pinfo_of_info info with
 244     | FakeTok _ ->
 245         Common.push2 (Fake1 info) toks_out
 246     | OriginTok _ | ExpandedTok _ ->
 247         (* get the associated comments/space/cppcomment tokens *)
 248         let (before, x, after) =
 249           !toks_in +> Common.split_when (fun tok ->
 250             info =*= TH.info_of_tok tok)
 251         in
 252         assert(info =*= TH.info_of_tok x);
 253         (*old: assert(before +> List.for_all (TH.is_comment)); *)
 254         before +> List.iter (fun x ->
 255           if not (TH.is_comment x)
 256           then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
 257           (* case such as  int asm d3("x"); not yet in ast *)
 258         );
 259         before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
 260         push2 (T1 x) toks_out;
 261         toks_in := after;
 262     | AbstractLineTok _ ->
 263         (* can be called on type info when for instance use -type_c *)
 264         if !Flag_parsing_c.pretty_print_type_info
 265         then Common.push2 (Fake1 info) toks_out
 266         else raise Impossible (* at this stage *)
 267   in
 268
 269   let pr_space _ = () in (* use the spacing that is there already *)
 270
 271   Pretty_print_c.pp_program_gen pr_elem pr_space celem;
 272
 273   if not (null !toks_in)
 274   then failwith "WEIRD: unparsing not finished";
 275
 276   List.rev !toks_out
 277
 278 (* Fake nodes that have BEFORE code or are - should be moved over any subsequent
 279 whitespace and newlines, but not any comments, to get as close to the affected
 280 code as possible.  Similarly, fake nodes that have AFTER code should be moved
 281 backwards.  No fake nodes should have both before and after code. *)
 282
 283 let displace_fake_nodes toks =
 284   let is_fake = function Fake1 _ -> true | _ -> false in
 285   let is_whitespace = function
 286       T1(Parser_c.TCommentSpace _)
 287       (* patch: cocci    *)
 288     | T1(Parser_c.TCommentNewline _) -> true
 289     | _ -> false in
 290   let rec loop toks =
 291     let fake_info =
 292       try Some (Common.split_when is_fake toks)
 293       with Not_found -> None in
 294     match fake_info with
 295       Some(bef,((Fake1 info) as fake),aft) ->
 296         (match !(info.cocci_tag) with
 297         | Some x ->
 298           (match x with
 299             (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_)
 300           | (Ast_cocci.MINUS(_,_,_,Ast_cocci.REPLACEMENT _),_) ->
 301             (* move the fake node forwards *)
 302               let (whitespace,rest) = Common.span is_whitespace aft in
 303               bef @ whitespace @ fake :: (loop rest)
 304           | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
 305             (* move the fake node backwards *)
 306               let revbef = List.rev bef in
 307               let (revwhitespace,revprev) = Common.span is_whitespace revbef in
 308               let whitespace = List.rev revwhitespace in
 309               let prev = List.rev revprev in
 310               prev @ fake :: (loop (whitespace @ aft))
 311           | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
 312               bef @ fake :: (loop aft)
 313           | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
 314               failwith "fake node should not be before-after"
 315           | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
 316         )
 317         | None ->
 318             bef @ fake :: (loop aft)
 319         )
 320     | None -> toks
 321     | _ -> raise Impossible in
 322   loop toks
 323
 324 (*****************************************************************************)
 325 (* Tokens2 generation *)
 326 (*****************************************************************************)
 327
 328 let comment2t2 = function
 329     (Token_c.TCommentCpp
 330        (* not sure iif the following list is exhaustive or complete *)
 331        (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
 332      (info : Token_c.info)) ->
 333       C2(info.Common.str)
 334   | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
 335       C2("\n"^info.Common.str^"\n")
 336   | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
 337
 338 let expand_mcode toks =
 339   let toks_out = ref [] in
 340
 341   let index = ref 0 in
 342
 343   let add_elem t minus =
 344     match t with
 345     | Fake1 info ->
 346         let str = Ast_c.str_of_info info in
 347         if str =$= ""
 348         then push2 (Fake2 minus) toks_out
 349         (* fx the fake "," at the end of a structure or enum.
 350            no idea what other fake info there can be... *)
 351         else push2 (Comma str) toks_out
 352
 353
 354     | T1 tok ->
 355         (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
 356         (* no tag on expandedTok ! *)
 357         let modified = function
 358             None -> false
 359           | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
 360           | _ -> true in
 361         (if (TH.is_expanded tok &&
 362              modified !((TH.info_of_tok tok).cocci_tag)
 363             (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
 364         then
 365           failwith
 366             (Printf.sprintf
 367                "expanded token %s on line %d is either modified or stored in a metavariable"
 368                (TH.str_of_tok tok) (TH.line_of_tok tok)));
 369
 370         let tok' = tok +> TH.visitor_info_of_tok (fun i ->
 371           { i with cocci_tag = ref Ast_c.emptyAnnot; }
 372         ) in
 373
 374         let optindex =
 375           if TH.is_origin tok && not (TH.is_real_comment tok)
 376           then begin
 377               incr index;
 378               Some !index
 379           end
 380           else None
 381         in
 382
 383         push2 (T2 (tok', minus, optindex)) toks_out
 384   in
 385
 386   let expand_info t =
 387     let (mcode,env) =
 388       Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
 389
 390     let pr_cocci s ln col rcol hint =
 391       push2 (Cocci2(s,ln,col,rcol,hint)) toks_out  in
 392     let pr_c info =
 393       (match Ast_c.pinfo_of_info info with
 394         Ast_c.AbstractLineTok _ ->
 395           push2 (C2 (Ast_c.str_of_info info)) toks_out
 396       | Ast_c.FakeTok (s,_) ->
 397           push2 (C2 s) toks_out
 398       | _ ->
 399           Printf.fprintf stderr "line: %s\n" (Common.dump info);
 400           failwith "not an abstract line");
 401       (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
 402       List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
 403
 404     let pr_barrier ln col = (* marks a position, used around C code *)
 405       push2 (Cocci2("",ln,col,col,None)) toks_out  in
 406     let pr_nobarrier ln col = () in (* not needed for linux spacing *)
 407
 408     let pr_cspace _ = push2 (C2 " ") toks_out in
 409
 410     let pr_space _ = () (* rely on add_space in cocci code *) in
 411     let pr_arity _ = () (* not interested *) in
 412
 413     let indent _   = push2 Indent_cocci2 toks_out in
 414     let unindent x = push2 (Unindent_cocci2 x) toks_out in
 415     let eat_space _   = push2 EatSpace2 toks_out in
 416
 417     let args_pp =
 418       (env, pr_cocci, pr_c, pr_cspace,
 419        (match !Flag_parsing_c.spacing with
 420          Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
 421        pr_arity,
 422        (match !Flag_parsing_c.spacing with
 423          Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
 424        indent, unindent, eat_space) in
 425
 426     (* old: when for yacfe with partial cocci:
 427      *    add_elem t false;
 428     *)
 429
 430     (* patch: when need full coccinelle transformation *)
 431     let unparser = Unparse_cocci.pp_list_list_any args_pp false in
 432     match mcode with
 433     | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
 434         (* Why adding ? because I want to have all the information, the whole
 435          * set of tokens, so I can then process and remove the
 436          * is_between_two_minus for instance *)
 437         add_elem t (Min (inst,adj));
 438         (match any_xxs with
 439           Ast_cocci.NOREPLACEMENT -> ()
 440         | Ast_cocci.REPLACEMENT(any_xxs,_) ->
 441             unparser any_xxs Unparse_cocci.InPlace)
 442     | Ast_cocci.CONTEXT (_,any_befaft) ->
 443         (match any_befaft with
 444         | Ast_cocci.NOTHING ->
 445             add_elem t Ctx
 446         | Ast_cocci.BEFORE (xxs,_) ->
 447             unparser xxs Unparse_cocci.Before;
 448             add_elem t Ctx
 449         | Ast_cocci.AFTER (xxs,_) ->
 450             add_elem t Ctx;
 451             unparser xxs Unparse_cocci.After;
 452         | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
 453             unparser xxs Unparse_cocci.Before;
 454             add_elem t Ctx;
 455             unparser yys Unparse_cocci.After;
 456         )
 457     | Ast_cocci.PLUS _ -> raise Impossible
 458
 459   in
 460
 461   toks +> List.iter expand_info;
 462   List.rev !toks_out
 463
 464
 465 (*****************************************************************************)
 466 (* Tokens2 processing, filtering, adjusting *)
 467 (*****************************************************************************)
 468
 469 let is_space = function
 470   | T2(Parser_c.TCommentSpace _,_b,_i) -> true  (* only whitespace *)
 471   | _ -> false
 472
 473 let is_newline = function
 474   | T2(Parser_c.TCommentNewline _,_b,_i) -> true
 475   | _ -> false
 476
 477 let is_whitespace = function
 478   | (T2 (t,_b,_i)) ->
 479       (match t with
 480       | Parser_c.TCommentSpace _ -> true  (* only whitespace *)
 481       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 482       | _ -> false
 483       )
 484   | _ -> false
 485
 486 let is_minusable_comment = function
 487   | (T2 (t,_b,_i)) ->
 488       (match t with
 489       | Parser_c.TCommentSpace _   (* only whitespace *)
 490       (* patch: coccinelle *)
 491       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 492       | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
 493       | Parser_c.TComment _
 494       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 495       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 496       | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _)
 497       | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
 498         -> true
 499
 500       | Parser_c.TCommentMisc _
 501       | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
 502         -> false
 503
 504       | _ -> false
 505       )
 506   | _ -> false
 507
 508 let is_minusable_comment_nocpp = function
 509   | (T2 (t,_b,_i)) ->
 510       (match t with
 511       | Parser_c.TCommentSpace _   (* only whitespace *)
 512       (* patch: coccinelle *)
 513       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 514       | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
 515       | Parser_c.TComment _ -> true
 516       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 517       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 518       | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _)
 519       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
 520         -> false
 521
 522       | Parser_c.TCommentMisc _
 523       | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
 524         -> false
 525
 526       | _ -> false
 527       )
 528   | _ -> false
 529
 530 let all_coccis = function
 531     Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _
 532   | EatSpace2 -> true
 533   | _ -> false
 534
 535 (*previously gave up if the first character was a newline, but not clear why*)
 536 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
 537
 538 let set_minus_comment adj = function
 539   | T2 (t,Ctx,idx) ->
 540       let str = TH.str_of_tok t in
 541       (match t with
 542       | Parser_c.TCommentSpace _
 543 (* patch: coccinelle *)
 544       | Parser_c.TCommentNewline _ -> ()
 545
 546       | Parser_c.TComment _
 547       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 548       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 549       | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _)
 550       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
 551         ->
 552           pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
 553                  (TH.line_of_tok t) str)
 554       | _ -> raise Impossible
 555       );
 556       T2 (t, Min adj, idx)
 557 (* patch: coccinelle *)
 558   | T2 (t,Min adj,idx) as x -> x
 559   | Fake2 _ as x -> x
 560   | _ -> raise Impossible
 561
 562 (* don't touch ifdefs, done after *)
 563 let set_minus_comment_or_plus adj = function
 564     Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _
 565   | EatSpace2 as x -> x
 566   | x -> set_minus_comment adj x
 567
 568 let drop_minus xs =
 569   xs +> Common.exclude (function
 570     | T2 (t,Min adj,_) -> true
 571     | _ -> false
 572   )
 573
 574 let drop_expanded xs =
 575   xs +> Common.exclude (function
 576     | T2 (t,_,_) when TH.is_expanded t -> true
 577     | _ -> false
 578   )
 579
 580 let drop_fake xs =
 581   xs +> Common.exclude (function
 582     | Fake2 _ -> true
 583     | _ -> false
 584   )
 585
 586 let remove_minus_and_between_and_expanded_and_fake xs =
 587
 588   (* get rid of expanded tok *)
 589   let xs = drop_expanded xs in
 590
 591   let minus_or_comment = function
 592       T2(_,Min adj,_) -> true
 593     | x -> is_minusable_comment x in
 594
 595   let minus_or_comment_nocpp = function
 596       T2(_,Min adj,_) -> true
 597     | x -> is_minusable_comment_nocpp x in
 598
 599   let common_adj (index1,adj1) (index2,adj2) =
 600     let same_adj = (* same adjacency info *)
 601       match (adj1,adj2) with
 602         (Ast_cocci.ADJ adj1,Ast_cocci.ADJ adj2) -> adj1 = adj2
 603       | (Ast_cocci.ALLMINUS,_) | (_,Ast_cocci.ALLMINUS) -> true in
 604     same_adj &&
 605     (* non-empty intersection of witness trees *)
 606     not ((Common.inter_set index1 index2) = []) in
 607
 608   (* new idea: collects regions not containing non-space context code
 609      if two adjacent adjacent minus tokens satisfy common_adj then delete
 610      all spaces, comments etc between them
 611      if two adjacent minus tokens do not satisfy common_adj only delete
 612      the spaces between them if there are no comments, etc.
 613      if the region contain no plus code and is both preceded and followed
 614      by a newline, delete the initial newline. *)
 615
 616   let rec adjust_around_minus = function
 617       [] -> []
 618     | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
 619       ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
 620         let (minus_list,rest) = Common.span not_context (t1::xs) in
 621         let contains_plus = List.exists is_plus minus_list in
 622         let x =
 623           match List.rev minus_list with
 624             (T2(Parser_c.TCommentNewline c,_b,_i))::rest
 625             when List.for_all minus_or_comment minus_list ->
 626               set_minus_comment_or_plus adj1 x
 627           | _ -> x in
 628         x :: adjust_within_minus contains_plus minus_list @
 629         adjust_around_minus rest
 630     | ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
 631         let (minus_list,rest) = Common.span not_context (t1::xs) in
 632         let contains_plus = List.exists is_plus minus_list in
 633         adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
 634     | x::xs ->
 635         x :: adjust_around_minus xs
 636   and adjust_within_minus cp (* contains plus *) = function
 637       ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
 638         let not_minus = function T2(_,Min _,_) -> false | _ -> true in
 639         let (not_minus_list,rest) = Common.span not_minus xs in
 640         t1 ::
 641         (match rest with
 642           ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs
 643           when common_adj adj1 adj2 ->
 644             (List.map (set_minus_comment_or_plus adj1) not_minus_list)
 645             @ (adjust_within_minus cp (t2::xs))
 646         | ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs ->
 647             if not cp && List.for_all is_whitespace not_minus_list
 648             then
 649               (List.map (set_minus_comment_or_plus adj1) not_minus_list)
 650               @ (adjust_within_minus cp (t2::xs))
 651             else
 652               not_minus_list @ (adjust_within_minus cp (t2::xs))
 653         | _ ->
 654             if cp
 655             then xs
 656             else
 657               (* remove spaces after removed stuff, eg a comma after a
 658                  function argument *)
 659               (let (spaces,rest) = Common.span is_space xs in
 660               (List.map (set_minus_comment_or_plus adj1) spaces)
 661               @ rest))
 662     | xs -> failwith "should always start with minus"
 663   and not_context = function
 664       (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
 665     | _ -> true
 666   and is_plus = function
 667       C2 _ | Comma _ | Cocci2 _ -> true
 668     | _ -> false in
 669
 670   let xs = adjust_around_minus xs in
 671
 672   (* get rid of fake tok *)
 673   let xs = drop_fake xs in
 674
 675   (* this drops blank lines after a brace introduced by removing code *)
 676   let minus_or_comment_nonl = function
 677       T2(_,Min adj,_) -> true
 678     | T2(Parser_c.TCommentNewline _,_b,_i) -> false
 679     | x -> is_minusable_comment x in
 680
 681   let rec adjust_after_brace = function
 682       [] -> []
 683     | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
 684        when str_of_token2 x =$= "{" ->
 685          let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
 686          let is_whitespace = function
 687              T2(Parser_c.TCommentSpace _,_b,_i)
 688              (* patch: cocci    *)
 689            | T2(Parser_c.TCommentNewline _,_b,_i) -> true
 690            | _ -> false in
 691          let (newlines,rest) = Common.span is_whitespace rest in
 692          let (drop_newlines,last_newline) =
 693            let rec loop = function
 694                [] -> ([],[])
 695              | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
 696                  (List.rev rest,[x])
 697              | x::xs ->
 698                  let (drop_newlines,last_newline) = loop xs in
 699                  (drop_newlines,x::last_newline) in
 700            loop (List.rev newlines) in
 701          x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
 702          last_newline@
 703          adjust_after_brace rest
 704     | x::xs -> x::adjust_after_brace xs in
 705
 706   let xs = adjust_after_brace xs in
 707
 708   (* search backwards from context } over spaces until reaching a newline.
 709      then go back over all minus code until reaching some context or + code.
 710      get rid of all intervening spaces, newlines, and comments
 711      input is reversed *)
 712   let rec adjust_before_brace = function
 713       [] -> []
 714     | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
 715         let (outer_spaces,rest) = Common.span is_space xs in
 716         x :: outer_spaces @
 717         (match rest with
 718           ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
 719           (* the rest of this code is the same as from_newline below
 720              but merging them seems to be error prone... *)
 721           ((T2 (t, Min adj, idx)) as m) :: rest ->
 722             let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 723             h :: m ::
 724             (List.map (set_minus_comment adj) spaces) @
 725             (adjust_before_brace rest)
 726         | _ -> adjust_before_brace rest)
 727     | x::xs -> x :: (adjust_before_brace xs) in
 728
 729   let from_newline = function
 730       ((T2 (t, Min adj, idx)) as m) :: rest ->
 731         let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 732         m ::
 733         (List.map (set_minus_comment adj) spaces) @
 734         (adjust_before_brace rest)
 735     | ((T2 (t0, Ctx, idx0)) as m0) :: ((T2 (t, Min adj, idx)) as m) :: rest
 736         when TH.str_of_tok t0 = "" ->
 737           (* This is for the case of a #define that is completely deleted,
 738              because a #define has a strange EOL token at the end.
 739              We hope there i no other kind of token that is represented by
 740              "", but it seems like changing the kind of token might break
 741              the end of entity recognition in the C parser.
 742              See parsing_hacks.ml *)
 743           let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
 744           m0 :: m ::
 745           (List.map (set_minus_comment adj) spaces) @
 746           (adjust_before_brace rest)
 747     | rest -> adjust_before_brace rest in
 748
 749   let xs = List.rev (from_newline (List.rev xs)) in
 750
 751   let cleanup_ifdefs toks =
 752   (* TODO: these functions are horrid, but using tokens caused circularity *)
 753     let is_ifdef = function
 754         T2((Parser_c.TCommentCpp
 755               (Token_c.CppIfDirective Token_c.IfDef, _)),m,idx) -> true
 756       | T2((Parser_c.TCommentCpp
 757               (Token_c.CppIfDirective Token_c.IfDef0, _)),m,idx) -> true
 758       | t -> false in
 759     let is_else = function
 760         T2((Parser_c.TCommentCpp
 761               (Token_c.CppIfDirective Token_c.Else, _)),m,idx) -> true
 762       | _ -> false in
 763     let is_endif = function
 764         T2((Parser_c.TCommentCpp
 765               (Token_c.CppIfDirective Token_c.Endif, _)),m,idx) -> true
 766       | _ -> false in
 767     let add t = function
 768         l::rest -> (t::l)::rest
 769       | _ -> failwith "not possible" in
 770     let rec parse_ifdef acc_keywords acc_code stack = function
 771         [] -> (None,acc_keywords,acc_code)
 772       | t::rest when is_else t ->
 773           (match stack with
 774             [] -> parse_ifdef (t::acc_keywords) ([]::acc_code) stack rest
 775           | _ -> parse_ifdef acc_keywords (add t acc_code) stack rest)
 776       | t::rest when is_endif t ->
 777           (match stack with
 778             [] -> ((Some (t,rest)),acc_keywords,acc_code)
 779           | _::stack -> parse_ifdef acc_keywords (add t acc_code) stack rest)
 780       | t::rest when is_ifdef t ->
 781           parse_ifdef acc_keywords (add t acc_code) (()::stack) rest
 782       | t::rest -> parse_ifdef acc_keywords (add t acc_code) stack rest in
 783     let unminus = function
 784         T2 (t,Min adj,idx) -> T2 (t,Ctx,idx)
 785       | x -> x in
 786     let is_minus = function
 787         T2 (t,Min adj,idx) -> true
 788       | x -> false in
 789     let rec loop = function
 790         [] -> []
 791       | t::rest when is_ifdef t ->
 792           let (ender,acc_keywords,acc_code) =
 793             parse_ifdef [t] [[]] [] rest in
 794           let acc_code = List.map loop acc_code in
 795           let merge = (* args reversed *)
 796             List.fold_left2
 797               (fun prev kwd code -> kwd :: (List.rev code) @ prev)
 798               [] in
 799           (match ender with
 800             None -> merge (List.map unminus acc_keywords) acc_code
 801           | Some(endif,rest) ->
 802               let rest = loop rest in
 803               if List.for_all is_minus (endif::acc_keywords)
 804               then (merge acc_keywords acc_code) @ (endif :: rest)
 805               else
 806                 (merge (List.map unminus acc_keywords) acc_code) @
 807                 ((unminus endif) :: rest))
 808       | x::xs -> x :: loop xs in
 809     loop toks in
 810
 811   let xs = cleanup_ifdefs xs in
 812   let xs = drop_minus xs in
 813   xs
 814
 815 (* things that should not be followed by space - boundary between SmPL
 816    code and C code *)
 817 let adjust_eat_space toks =
 818   let rec loop = function
 819       [] -> []
 820     | EatSpace2 :: x :: rest when is_space x -> loop rest
 821     | EatSpace2 :: rest -> loop rest
 822     | x :: xs -> x :: loop xs in
 823   loop toks
 824
 825 (* normally, in C code, a semicolon is not preceded by a space or newline *)
 826 let adjust_before_semicolon toks =
 827   let toks = List.rev toks in
 828   let rec search_semic = function
 829       [] -> []
 830     | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
 831         if List.mem (str_of_token2 x) [";";")";","]
 832         then x :: search_semic (search_minus false xs)
 833         else x :: search_semic xs
 834     | x::xs -> x :: search_semic xs
 835   and search_minus seen_minus xs =
 836     let (spaces, rest) = Common.span is_space xs in
 837     (* only delete spaces if something is actually deleted *)
 838     match rest with
 839       ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
 840     | _ -> if seen_minus then rest else xs in
 841   List.rev (search_semic toks)
 842
 843 (* normally, in C code, a ( is not followed by a space or newline *)
 844 let adjust_after_paren toks =
 845   let rec search_paren = function
 846       [] -> []
 847     | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
 848         if List.mem (str_of_token2 x) ["("] (* other things? *)
 849         then x :: search_paren(search_minus false xs)
 850         else x :: search_paren xs
 851     | x::xs -> x :: search_paren xs
 852   and search_minus seen_minus xs =
 853     let (spaces, rest) = Common.span is_whitespace xs in
 854     (* only delete spaces if something is actually deleted *)
 855     match rest with
 856       ((T2(_,Min _,_)) as a)::rerest -> (* minus *)
 857         a :: search_minus true rerest
 858     | ((T2(_,Ctx,_)) as a)::rerest when seen_minus && str_of_token2 a = "," ->
 859         (* comma after ( will be deleted, so consider it as minus code
 860            already *)
 861         a :: search_minus true rerest
 862     | _ -> if seen_minus then rest else xs in (* drop trailing space *)
 863   search_paren toks
 864
 865 (* this is for the case where braces are added around an if branch *)
 866 let paren_then_brace toks =
 867   let rec search_paren = function
 868       [] -> []
 869     | ((T2(_,Ctx,_)) as x)::xs ->
 870         if List.mem (str_of_token2 x) [")"]
 871         then x :: search_paren(search_plus xs)
 872         else x :: search_paren xs
 873     | x::xs -> x :: search_paren xs
 874   and search_plus xs =
 875     let (spaces, rest) = Common.span is_whitespace xs in
 876     match rest with
 877       (* move the brace up to the previous line *)
 878       ((Cocci2("{",_,_,_,_)) as x) :: (((Cocci2 _) :: _) as rest) ->
 879         (C2 " ") :: x :: spaces @ rest
 880     | _ -> xs in
 881   search_paren toks
 882
 883 let is_ident_like s = s ==~ Common.regexp_alpha
 884
 885 let rec drop_space_at_endline = function
 886     [] -> []
 887   | [x] -> [x]
 888   | (C2 " ") ::
 889     ((((T2(Parser_c.TCommentSpace _,Ctx,_)) | Cocci2("\n",_,_,_,_) |
 890     (T2(Parser_c.TCommentNewline _,Ctx,_))) :: _) as rest) ->
 891       (* when unparse_cocci doesn't know whether space is needed *)
 892       drop_space_at_endline rest
 893   | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest ->
 894       let (outer_spaces,rest) = Common.span is_space rest in
 895       let minus_or_comment_or_space_nocpp = function
 896           T2(_,Min adj,_) -> true
 897         | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true
 898         | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false
 899         | x -> false in
 900       let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in
 901       let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
 902       if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus
 903       then
 904         match rest with
 905           ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest ->
 906             (* drop trailing spaces *)
 907             minus@a::(drop_space_at_endline rest)
 908         | _ -> fail()
 909       else fail()
 910   | a :: rest ->
 911       a :: drop_space_at_endline rest
 912
 913 (* if a removed ( is between two tokens, then add a space *)
 914 let rec paren_to_space = function
 915     [] -> []
 916   | [x] -> [x]
 917   | [x;y] -> [x;y]
 918   | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest
 919     when not (is_whitespace a) && TH.str_of_tok t = "(" ->
 920       a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
 921   | a :: rest -> a :: (paren_to_space rest)
 922
 923 let rec add_space xs =
 924   match xs with
 925   | [] -> []
 926   | [x] -> [x]
 927   | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
 928     when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
 929       not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
 930         (* this only works within a line.  could consider whether
 931            something should be done to add newlines too, rather than
 932            printing them explicitly in unparse_cocci. *)
 933         x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
 934   | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
 935     when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
 936       not (lnx = -1) && lnx < lny && not (rcolx = -1) ->
 937         (* this only works within a line.  could consider whether
 938            something should be done to add newlines too, rather than
 939            printing them explicitly in unparse_cocci. *)
 940         x::C2 (String.make (lny-lnx) '\n')::
 941         C2 (String.make (lcoly-1) ' '):: (* -1 is for the + *)
 942         add_space (y::xs)
 943   | ((T2(_,Ctx,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *)
 944       let sx = str_of_token2 x in
 945       let sy = str_of_token2 y in
 946       if is_ident_like sx && (is_ident_like sy or List.mem sy ["="])
 947       then x::C2 " "::(add_space (y::xs))
 948       else x::(add_space (y::xs))
 949   | x::y::xs -> (* not boundary, not sure if it is possible *)
 950       let sx = str_of_token2 x in
 951       let sy = str_of_token2 y in
 952       if is_ident_like sx && is_ident_like sy
 953       then x::C2 " "::(add_space (y::xs))
 954       else x::(add_space (y::xs))
 955
 956 (* A fake comma is added at the end of an unordered initlist or a enum
 957 decl, if the initlist or enum doesn't already end in a comma.  This is only
 958 needed if there is + code, ie if we see Cocci after it in the code sequence *)
 959
 960 let rec drop_end_comma = function
 961     [] -> []
 962   | [x] -> [x]
 963   | ((Comma ",") as x) :: rest ->
 964       let (newlines,rest2) = Common.span is_whitespace rest in
 965       (match rest2 with
 966         (Cocci2 _) :: _ -> x :: drop_end_comma rest
 967       | _ -> drop_end_comma rest)
 968   | x :: xs -> x :: drop_end_comma xs
 969
 970 (* The following only works for the outermost function call.  Stack records
 971 the column of all open parentheses.  Space_cell contains the most recent
 972 comma in the outermost function call.  The goal is to decide whether this
 973 should be followed by a space or a newline and indent. *)
 974 let add_newlines toks tabbing_unit =
 975   let create_indent n =
 976     let (tu,tlen) =
 977       match tabbing_unit with
 978         Some ("\t",_) -> ("\t",8)
 979       | Some ("",_) -> ("\t",8) (* not sure why... *)
 980       | Some (s,_) -> (s,String.length s) (* assuming only spaces *)
 981       | None -> ("\t",8) in
 982     let rec loop seen =
 983       if seen + tlen <= n
 984       then tu ^ loop (seen + tlen)
 985       else String.make (n-seen) ' ' in
 986     loop 0 in
 987   let check_for_newline count x = function
 988       Some (start,space_cell) when count > Flag_parsing_c.max_width ->
 989         space_cell := "\n"^(create_indent x);
 990         Some (x + (count - start))
 991     | _ -> None in
 992   (* the following is for strings that may contain newline *)
 993   let string_length s count =
 994     let l = list_of_string s in
 995     List.fold_left
 996       (function count ->
 997         function
 998             '\t' -> count + 8
 999           | '\n' -> 0
1000           | c -> count + 1)
1001       count l in
1002   let rec loop info count = function
1003       [] -> []
1004     | ((T2(tok,_,_)) as a)::xs ->
1005         a :: loop info (string_length (TH.str_of_tok tok) count) xs
1006     | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
1007         let (stack,space_cell) = info in
1008         let rest =
1009           match hint with
1010             None -> loop info (string_length s count) xs
1011           | Some Unparse_cocci.StartBox ->
1012               let count = string_length s count in
1013               loop (count::stack,space_cell) count xs
1014           | Some Unparse_cocci.EndBox ->
1015               let count = string_length s count in
1016               (match stack with
1017                 [x] ->
1018                   (match check_for_newline count x space_cell with
1019                     Some count -> loop ([],None) count xs
1020                   | None -> loop ([],None) count xs)
1021               | _ -> loop (List.tl stack,space_cell) count xs)
1022           | Some (Unparse_cocci.SpaceOrNewline sp) ->
1023               let count = string_length s (count + 1 (*space*)) in
1024               (match stack with
1025                 [x] ->
1026                   (match check_for_newline count x space_cell with
1027                     Some count -> loop (stack,Some (x,sp)) count xs
1028                   | None -> loop (stack,Some (count,sp)) count xs)
1029               | _ -> loop info count xs) in
1030         a :: rest
1031     | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
1032     | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
1033     | Fake2 _ :: _ | Indent_cocci2 :: _
1034     | Unindent_cocci2 _::_ | EatSpace2::_ ->
1035         failwith "unexpected fake, indent, unindent, or eatspace" in
1036   let redo_spaces prev = function
1037       Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
1038         C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
1039     | t -> t::prev in
1040   (match !Flag_parsing_c.spacing with
1041     Flag_parsing_c.SMPL -> toks
1042   | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)))
1043
1044 (* When insert some new code, because of a + in a SP, we must add this
1045  * code at the right place, with the good indentation. So each time we
1046  * encounter some spacing info, with some newline, we maintain the
1047  * current indentation level used.
1048  *
1049  * TODO problems: not accurate. ex: TODO
1050  *
1051  * TODO: if in #define region, should add a \ \n
1052  *)
1053 let new_tabbing2 space =
1054   (list_of_string space)
1055     +> List.rev
1056     +> Common.take_until (fun c -> c =<= '\n')
1057     +> List.rev
1058     +> List.map string_of_char
1059     +> String.concat ""
1060
1061 let new_tabbing a =
1062   Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
1063
1064
1065 let rec adjust_indentation xs =
1066
1067   let _current_tabbing = ref ([] : string list) in
1068   let tabbing_unit = ref None in
1069
1070   let string_of_list l = String.concat "" (List.map string_of_char l) in
1071
1072   (* try to pick a tabbing unit for the plus code *)
1073   let adjust_tabbing_unit old_tab new_tab =
1074     if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
1075     then
1076       let old_tab = list_of_string old_tab in
1077       let new_tab = list_of_string new_tab in
1078       let rec loop = function
1079           ([],new_tab) ->
1080             tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
1081         | (_,[]) -> failwith "not possible"
1082         | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
1083       loop (old_tab,new_tab) in
1084
1085 (*
1086   let remtab tu current_tab =
1087     let current_tab = List.rev(list_of_string current_tab) in
1088     let rec loop = function
1089         ([],new_tab) -> string_of_list (List.rev new_tab)
1090       | (_,[]) -> (-*weird; tabbing unit used up more than the current tab*-)
1091         ""
1092       | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
1093       | (_,ns) -> (-* mismatch; remove what we can *-)
1094           string_of_list (List.rev ns) in
1095     loop (tu,current_tab) in
1096 *)
1097
1098   let rec find_first_tab started = function
1099       [] -> ()
1100     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
1101         find_first_tab true xs
1102 (* patch: coccinelle *)
1103     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
1104       when started ->
1105         let s = str_of_token2 x +> new_tabbing in
1106         tabbing_unit := Some (s,List.rev (list_of_string s))
1107     | x::xs -> find_first_tab started xs in
1108   find_first_tab false xs;
1109
1110   let rec balanced ct = function
1111       [] -> ct >= 0
1112     | ((T2(tok,_,_)) as x)::xs ->
1113         (match str_of_token2 x with
1114           "(" -> balanced (ct+1) xs
1115         | ")" -> balanced (ct-1) xs
1116         | _ -> balanced ct xs)
1117     | x::xs -> balanced ct xs in
1118
1119   let update_tabbing started s x =
1120     let old_tabbing = !_current_tabbing in
1121     str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := [s]);
1122     (* only trust the indentation after the first { *)
1123     if started
1124     then
1125       adjust_tabbing_unit
1126         (String.concat "" old_tabbing)
1127         (String.concat "" !_current_tabbing) in
1128
1129   let rec aux started xs =
1130     match xs with
1131     | [] ->  []
1132 (* patch: coccinelle *)
1133     | ((T2 (Parser_c.TCommentNewline s,_,_)) as x)::
1134       Unindent_cocci2(false)::xs ->
1135         update_tabbing started s x;
1136         (C2 "\n")::aux started xs
1137     | (Cocci2("\n",_,_,_,_))::Unindent_cocci2(false)::xs ->
1138         (C2 "\n")::aux started xs
1139     | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
1140       ((Cocci2 ("{",_,_,_,_)) as a)::xs
1141       when started && str_of_token2 x =$= ")" ->
1142         (* to be done for if, etc, but not for a function header *)
1143         x::(C2 " ")::a::(aux started xs)
1144     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs
1145       when
1146         balanced 0 (fst(Common.span (function x -> not(is_newline x)) xs)) ->
1147         update_tabbing started s x;
1148         let coccis_rest = Common.span all_coccis xs in
1149         (match coccis_rest with
1150           (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
1151             (* the case where cocci code has been added before a close } *)
1152             x::aux started (Indent_cocci2::xs)
1153         | _ -> x::aux started xs)
1154     | Indent_cocci2::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
1155       when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL ->
1156         let tu = String.make (lcoly-1) ' ' in
1157         _current_tabbing := tu::(!_current_tabbing);
1158         C2 (tu)::aux started (y::xs)
1159     | Indent_cocci2::xs ->
1160         (match !tabbing_unit with
1161           None -> aux started xs
1162         | Some (tu,_) ->
1163             _current_tabbing := tu::(!_current_tabbing);
1164              (* can't be C2, for later phases *)
1165              Cocci2 (tu,-1,-1,-1,None)::aux started xs)
1166     | Unindent_cocci2(permanent)::xs ->
1167         (match !_current_tabbing with
1168           [] -> aux started xs
1169         | _::new_tabbing ->
1170             let s = String.concat "" new_tabbing in
1171             _current_tabbing := new_tabbing;
1172             Cocci2 (s,-1,-1,-1,None)::aux started xs)
1173     (* border between existing code and cocci code *)
1174     | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
1175       when str_of_token2 x =$= "{" ->
1176         x::aux true (y::Indent_cocci2::xs)
1177     | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
1178       when str_of_token2 y =$= "}" ->
1179         x::aux started (Unindent_cocci2 true::y::xs)
1180     (* starting the body of the function *)
1181     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->  x::aux true xs
1182     | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
1183     | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
1184             (* dont inline in expr because of weird eval order of ocaml *)
1185         let s = String.concat "" !_current_tabbing in
1186         (* can't be C2, for later phases *)
1187         x::Cocci2 (s,-1,-1,-1,None)::aux started xs
1188     | x::xs -> x::aux started xs in
1189   (aux false xs,!tabbing_unit)
1190
1191
1192 let rec find_paren_comma = function
1193   | [] -> ()
1194
1195   (* do nothing if was like this in original file *)
1196   | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
1197     ::xs when p2 =|= p1 + 1 ->
1198       find_paren_comma (x2::xs)
1199
1200   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
1201     ::xs when p2 =|= p1 + 1 ->
1202       find_paren_comma (x2::xs)
1203
1204   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
1205     ::xs when p2 =|= p1 + 1 ->
1206       find_paren_comma (x2::xs)
1207
1208   (* otherwise yes can adjust *)
1209   | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
1210       x2.remove <- true;
1211       find_paren_comma (x2::xs)
1212   | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
1213       x1.remove <- true;
1214       find_paren_comma (x2::xs)
1215
1216   | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
1217       x1.remove <- true;
1218       find_paren_comma (x2::xs)
1219
1220   | x::xs ->
1221       find_paren_comma xs
1222
1223
1224 let fix_tokens toks =
1225   let toks = toks +> List.map mk_token_extended in
1226
1227   let cleaner = toks +> Common.exclude (function
1228     | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
1229     | _ -> false
1230   ) in
1231   find_paren_comma cleaner;
1232
1233   let toks = rebuild_tokens_extented toks in
1234   toks +> List.map (fun x -> x.tok2)
1235
1236
1237
1238 (*****************************************************************************)
1239 (* Final unparsing (and debugging support) *)
1240 (*****************************************************************************)
1241
1242 (* for debugging *)
1243 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
1244
1245 let kind_of_token2 = function
1246   | Fake2 _ -> KFake
1247   | Cocci2 _ -> KCocci
1248   | C2 _ -> KC
1249   | Comma _ -> KC
1250   | T2 (t,_,_) ->
1251       (match TH.pinfo_of_tok t with
1252       | ExpandedTok _ -> KExpanded
1253       | OriginTok _ -> KOrigin
1254       | FakeTok _ -> raise Impossible (* now a Fake2 *)
1255       | AbstractLineTok _ -> raise Impossible (* now a KC *)
1256       )
1257   | Unindent_cocci2 _ | Indent_cocci2 | EatSpace2 -> raise Impossible
1258
1259 let end_mark = "!"
1260
1261 let start_mark = function
1262   | KFake -> "!F!"
1263   | KCocci -> "!S!"
1264   | KC -> "!A!"
1265   | KExpanded -> "!E!"
1266   | KOrigin -> ""
1267
1268 let print_all_tokens2 pr xs =
1269   if !Flag_parsing_c.debug_unparsing
1270   then
1271     let current_kind = ref KOrigin in
1272     xs +> List.iter (fun t ->
1273       let newkind = kind_of_token2 t in
1274       if newkind =*= !current_kind
1275       then pr (str_of_token2 t)
1276       else begin
1277         pr (end_mark);
1278         pr (start_mark newkind);
1279         pr (str_of_token2 t);
1280         current_kind := newkind
1281       end
1282     );
1283   else
1284     xs +> List.iter (fun x -> pr (str_of_token2 x))
1285
1286
1287
1288
1289 (*****************************************************************************)
1290 (* Entry points *)
1291 (*****************************************************************************)
1292
1293 (* old: PPviatok was made in the beginning to allow to pretty print a
1294  * complete C file, including a modified C file by transformation.ml,
1295  * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1296  * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1297  * fancy stuff when a function was not modified at all. Just need to
1298  * print the list of token as-is. But now pretty_print_c.ml handles
1299  * almost everything so maybe less useful. Maybe PPviatok allows to
1300  * optimize a little the pretty printing.
1301  *
1302  * update: now have PPviastr which goes even faster than PPviatok, so
1303  * PPviatok has disappeared.
1304  *)
1305
1306 type ppmethod = PPnormal | PPviastr
1307
1308
1309
1310
1311 (* The pp_program function will call pretty_print_c.ml with a special
1312  * function to print the leaf components, the tokens. When we want to
1313  * print a token, we need to print also maybe the space and comments that
1314  * were close to it in the original file (and that was omitted during the
1315  * parsing phase), and honor what the cocci-info attached to the token says.
1316  * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1317  * print it and also print some cocci-code attached in a PLUS to it.
1318  * So we will also maybe call unparse_cocci. Because the cocci-code may
1319  * contain metavariables, unparse_cocci will in fact sometimes call back
1320  * pretty_print_c (which will this time don't call back again unparse_cocci)
1321  *)
1322
1323 let pp_program2 xs outfile  =
1324   Common.with_open_outfile outfile (fun (pr,chan) ->
1325     let pr s =
1326       if !Flag_parsing_c.debug_unparsing
1327       then begin pr2_no_nl s; flush stderr end
1328       else pr s
1329         (* flush chan; *)
1330         (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1331     in
1332
1333     xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
1334       (* here can still work on ast *)
1335       let e = remove_useless_fakeInfo_struct e in
1336
1337       match ppmethod with
1338       | PPnormal ->
1339           (* now work on tokens *)
1340           (* phase1: just get all the tokens, all the information *)
1341           assert(toks_e +> List.for_all (fun t ->
1342             TH.is_origin t or TH.is_expanded t
1343           ));
1344           let toks = get_fakeInfo_and_tokens e toks_e in
1345           let toks = displace_fake_nodes toks in
1346           (* assert Origin;ExpandedTok;Faketok *)
1347           let toks = expand_mcode toks in
1348
1349           (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1350            * and no tag information, just NOTHING. *)
1351
1352           let toks =
1353             if !Flag.sgrep_mode2
1354             then
1355               (* nothing else to do for sgrep *)
1356               drop_expanded(drop_fake(drop_minus toks))
1357             else
1358               (* phase2: can now start to filter and adjust *)
1359                (let (toks,tu) = adjust_indentation toks in
1360               let toks = adjust_eat_space toks in
1361               let toks = adjust_before_semicolon toks in(*before remove minus*)
1362               let toks = adjust_after_paren toks in(*also before remove minus*)
1363               let toks = drop_space_at_endline toks in
1364               let toks = paren_to_space toks in
1365               let toks = drop_end_comma toks in
1366               let toks = remove_minus_and_between_and_expanded_and_fake toks in
1367               (* assert Origin + Cocci + C and no minus *)
1368               let toks = add_space toks in
1369               let toks = add_newlines toks tu in
1370               let toks = paren_then_brace toks in
1371               let toks = fix_tokens toks in
1372                toks) in
1373
1374           (* in theory here could reparse and rework the ast! or
1375            * apply some SP. Not before cos julia may have generated
1376            * not parsable file. Need do unparsing_tricks call before being
1377            * ready to reparse. *)
1378           print_all_tokens2 pr toks;
1379
1380       | PPviastr -> pr str
1381     )
1382   )
1383
1384 let pp_program a b =
1385   Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1386
1387
1388 let pp_program_default xs outfile =
1389   let xs' = xs +> List.map (fun x -> x, PPnormal) in
1390   pp_program xs' outfile