parsing_c/unparse_c.ml

   1 (* Yoann Padioleau, Julia Lawall
   2  *
   3  * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License (GPL)
   7  * version 2 as published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * file license.txt for more details.
  13  *
  14  *
  15  * Modifications by Julia Lawall for better newline handling.
  16  *)
  17 open Common
  18
  19 open Ast_c
  20
  21 module TH = Token_helpers
  22
  23
  24 (* should keep comments and directives in between adjacent deleted terms,
  25 but not comments and directives within deleted terms.  should use the
  26 labels found in the control-flow graph *)
  27
  28
  29
  30 (*****************************************************************************)
  31 (* Types used during the intermediate phases of the unparsing *)
  32 (*****************************************************************************)
  33
  34 type token1 =
  35   | Fake1 of info
  36   | T1 of Parser_c.token
  37
  38 (* The cocci_tag of the token should always be a NOTHING. The mark of
  39  * the token can only be OriginTok or ExpandedTok. Why not get rid of
  40  * token and get something simpler ? because we need to know if the
  41  * info is a TCommentCpp or TCommentSpace, etc for some of the further
  42  * analysis so easier to keep with the token.
  43  *
  44  * This type contains the whole information. Have all the tokens with this
  45  * type.
  46  *)
  47 type token2 =
  48   | T2 of Parser_c.token * bool (* minus *) *
  49           int option (* orig index, abstracting away comments and space *)
  50   | Fake2
  51   | Cocci2 of string
  52   | C2 of string
  53   | Indent_cocci2
  54   | Unindent_cocci2
  55
  56 (* not used yet *)
  57 type token3 =
  58   | T3 of Parser_c.token
  59   | Cocci3 of string
  60   | C3 of string
  61
  62
  63 (* similar to the tech in parsing_hack *)
  64 type token_extended = {
  65   tok2 : token2;
  66   str  : string;
  67   idx: int option; (* to know if 2 tokens were consecutive in orig file *)
  68   mutable new_tokens_before : token2 list;
  69   mutable remove : bool;
  70 }
  71
  72
  73 (*****************************************************************************)
  74 (* Helpers *)
  75 (*****************************************************************************)
  76
  77 let info_of_token1 t =
  78   match t with
  79   | Fake1 info -> info
  80   | T1 tok -> TH.info_of_tok tok
  81
  82 let str_of_token2 = function
  83   | T2 (t,_,_) -> TH.str_of_tok t
  84   | Fake2 -> ""
  85   | Cocci2 s -> s
  86   | C2 s -> s
  87   | Indent_cocci2 -> ""
  88   | Unindent_cocci2 -> ""
  89
  90 let print_token2 = function
  91   | T2 (t,b,_) -> "T2:"^(if b then "-" else "")^TH.str_of_tok t
  92   | Fake2 -> ""
  93   | Cocci2 s -> "Cocci2:"^s
  94   | C2 s -> "C2:"^s
  95   | Indent_cocci2 -> "Indent"
  96   | Unindent_cocci2 -> "Unindent"
  97
  98 let print_all_tokens2 l =
  99   List.iter (function x -> Printf.printf "%s " (print_token2 x)) l;
 100   Printf.printf "\n"
 101
 102 let str_of_token3 = function
 103   | T3 t -> TH.str_of_tok t
 104   | Cocci3 s | C3 s -> s
 105
 106
 107
 108 let mk_token_extended x =
 109   let origidx =
 110     match x with
 111     | T2 (_,_, idx) -> idx
 112     | _ -> None
 113   in
 114   { tok2 = x;
 115     str = str_of_token2 x;
 116     idx = origidx;
 117     new_tokens_before = [];
 118     remove = false;
 119   }
 120
 121 let rebuild_tokens_extented toks_ext =
 122   let _tokens = ref [] in
 123   toks_ext +> List.iter (fun tok ->
 124     tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
 125     if not tok.remove then push2 tok.tok2 _tokens;
 126   );
 127   let tokens = List.rev !_tokens in
 128   (tokens +> List.map mk_token_extended)
 129
 130
 131 let mcode_contain_plus = function
 132   | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
 133   | Ast_cocci.CONTEXT _ -> true
 134 (* patch: when need full coccinelle transformation *)
 135   | Ast_cocci.MINUS (_,[]) -> false
 136   | Ast_cocci.MINUS (_,x::xs) -> true
 137   | Ast_cocci.PLUS -> raise Impossible
 138
 139 let contain_plus info =
 140   let mck = Ast_c.mcode_of_info info in
 141   mcode_contain_plus mck
 142
 143 (*****************************************************************************)
 144 (* Last fix on the ast *)
 145 (*****************************************************************************)
 146
 147 (* Because of the ugly trick to handle initialiser, I generate fake ','
 148  * for the last initializer element, but if there is nothing around it,
 149  * I don't want in the end to print it.
 150  *)
 151
 152 let remove_useless_fakeInfo_struct program =
 153   let bigf = { Visitor_c.default_visitor_c_s with
 154     Visitor_c.kini_s = (fun (k,bigf) ini ->
 155       match k ini with
 156       | InitList args, ii ->
 157           (match ii with
 158           | [_i1;_i2] -> ini
 159           | [i1;i2;iicommaopt] ->
 160               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 161                  && (Ast_c.is_fake iicommaopt)
 162                  (* sometimes the guy put a normal iicommaopt *)
 163               then InitList args, [i1;i2]
 164               else InitList args, [i1;i2;iicommaopt]
 165           | [i1;i2;iicommaopt;end_comma_opt] ->
 166               (* only in #define. end_comma_opt canot be fake *)
 167               (* not sure if this will be considered ambiguous with a previous
 168                  case? *)
 169               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 170                  && (Ast_c.is_fake iicommaopt)
 171                  (* sometimes the guy put a normal iicommaopt *)
 172               then InitList args, [i1;i2;end_comma_opt]
 173               else InitList args, [i1;i2;iicommaopt;end_comma_opt]
 174           | _ -> raise Impossible
 175           )
 176       | x -> x
 177     )
 178   } in
 179   Visitor_c.vk_toplevel_s bigf program
 180
 181
 182 (*****************************************************************************)
 183 (* Tokens1 generation *)
 184 (*****************************************************************************)
 185
 186 let get_fakeInfo_and_tokens celem toks =
 187   let toks_in  = ref toks in
 188   let toks_out = ref [] in
 189
 190   (* todo? verify good order of position ? *)
 191   let pr_elem info =
 192     match Ast_c.pinfo_of_info info with
 193     | FakeTok _ ->
 194         Common.push2 (Fake1 info) toks_out
 195     | OriginTok _ | ExpandedTok _ ->
 196         (* get the associated comments/space/cppcomment tokens *)
 197         let (before, x, after) = !toks_in +> Common.split_when (fun tok ->
 198           info =*= TH.info_of_tok tok)
 199         in
 200         assert(info =*= TH.info_of_tok x);
 201         (*old: assert(before +> List.for_all (TH.is_comment)); *)
 202         before +> List.iter (fun x ->
 203           if not (TH.is_comment x)
 204           then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
 205           (* case such as  int asm d3("x"); not yet in ast *)
 206         );
 207         before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
 208         push2 (T1 x) toks_out;
 209         toks_in := after;
 210     | AbstractLineTok _ ->
 211         (* can be called on type info when for instance use -type_c *)
 212         if !Flag_parsing_c.pretty_print_type_info
 213         then Common.push2 (Fake1 info) toks_out
 214         else raise Impossible (* at this stage *)
 215   in
 216
 217   let pr_space _ = () in (* use the spacing that is there already *)
 218
 219   Pretty_print_c.pp_program_gen pr_elem pr_space celem;
 220
 221   if not (null !toks_in)
 222   then failwith "WEIRD: unparsing not finished";
 223
 224   List.rev !toks_out
 225
 226 (* Fake nodes that have BEFORE code should be moved over any subsequent
 227 whitespace and newlines, but not any comments, to get as close to the affected
 228 code as possible.  Similarly, face nodes that have AFTER code should be moved
 229 backwards.  No fake nodes should have both before and after code. *)
 230
 231 let displace_fake_nodes toks =
 232   let is_fake = function Fake1 _ -> true | _ -> false in
 233   let is_whitespace = function
 234       T1(Parser_c.TCommentSpace _)
 235       (* patch: cocci    *)
 236     | T1(Parser_c.TCommentNewline _) -> true
 237     | _ -> false in
 238   let rec loop toks =
 239     let fake_info =
 240       try Some (Common.split_when is_fake toks)
 241       with Not_found -> None in
 242     match fake_info with
 243       Some(bef,((Fake1 info) as fake),aft) ->
 244         (match !(info.cocci_tag) with
 245           (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
 246             (* move the fake node forwards *)
 247             let (whitespace,rest) = Common.span is_whitespace aft in
 248             bef @ whitespace @ fake :: (loop rest)
 249         | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
 250             (* move the fake node backwards *)
 251             let revbef = List.rev bef in
 252             let (revwhitespace,revprev) = Common.span is_whitespace revbef in
 253             let whitespace = List.rev revwhitespace in
 254             let prev = List.rev revprev in
 255             prev @ fake :: (loop (whitespace @ aft))
 256         | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
 257             bef @ fake :: (loop aft)
 258         | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
 259             failwith "fake node should not be before-after"
 260         | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
 261         )
 262     | None -> toks
 263     | _ -> raise Impossible in
 264   loop toks
 265
 266 (*****************************************************************************)
 267 (* Tokens2 generation *)
 268 (*****************************************************************************)
 269
 270 let comment2t2 = function
 271     (Token_c.TCommentCpp x,(info : Token_c.info)) ->
 272       C2("\n"^info.Common.str^"\n")
 273   | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
 274
 275 let expand_mcode toks =
 276   let toks_out = ref [] in
 277
 278   let index = ref 0 in
 279
 280   let add_elem t minus =
 281     match t with
 282     | Fake1 info ->
 283         let str = Ast_c.str_of_info info in
 284         if str =$= ""
 285         then push2 (Fake2) toks_out
 286         (* perhaps the fake ',' *)
 287         else push2 (C2 str) toks_out
 288
 289
 290     | T1 tok ->
 291         (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
 292         (* no tag on expandedTok ! *)
 293         (if (TH.is_expanded tok &&
 294             !((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot)
 295         then
 296           failwith
 297             (Printf.sprintf
 298                "expanded token %s on line %d is either modified or stored in a metavariable"
 299                (TH.str_of_tok tok) (TH.line_of_tok tok)));
 300
 301         let tok' = tok +> TH.visitor_info_of_tok (fun i ->
 302           { i with cocci_tag = ref Ast_c.emptyAnnot; }
 303         ) in
 304
 305         let optindex =
 306           if TH.is_origin tok && not (TH.is_real_comment tok)
 307           then begin
 308               incr index;
 309               Some !index
 310           end
 311           else None
 312         in
 313
 314         push2 (T2 (tok', minus, optindex)) toks_out
 315   in
 316
 317   let expand_info t =
 318     let (mcode,env) = !((info_of_token1 t).cocci_tag) in
 319
 320     let pr_cocci s =
 321       push2 (Cocci2 s) toks_out
 322     in
 323     let pr_c info =
 324       (match Ast_c.pinfo_of_info info with
 325         Ast_c.AbstractLineTok _ ->
 326           push2 (C2 (Ast_c.str_of_info info)) toks_out
 327       | Ast_c.FakeTok (s,_) ->
 328           push2 (C2 s) toks_out
 329       | _ ->
 330           Printf.printf "line: %s\n" (Common.dump info);
 331           failwith "not an abstract line");
 332       (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
 333       List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
 334
 335
 336
 337     let pr_space _ = push2 (C2 " ") toks_out in
 338
 339     let indent _   = push2 Indent_cocci2 toks_out in
 340     let unindent _ = push2 Unindent_cocci2 toks_out in
 341
 342     let args_pp = (env, pr_cocci, pr_c, pr_space, indent, unindent) in
 343
 344     (* old: when for yacfe with partial cocci:
 345      *    add_elem t false;
 346     *)
 347
 348     (* patch: when need full coccinelle transformation *)
 349     let unparser = Unparse_cocci.pp_list_list_any args_pp false in
 350     match mcode with
 351     | Ast_cocci.MINUS (_,any_xxs) ->
 352         (* Why adding ? because I want to have all the information, the whole
 353          * set of tokens, so I can then process and remove the
 354          * is_between_two_minus for instance *)
 355         add_elem t true;
 356         unparser any_xxs Unparse_cocci.InPlace
 357     | Ast_cocci.CONTEXT (_,any_befaft) ->
 358         (match any_befaft with
 359         | Ast_cocci.NOTHING ->
 360             add_elem t false
 361         | Ast_cocci.BEFORE xxs ->
 362             unparser xxs Unparse_cocci.Before;
 363             add_elem t false
 364         | Ast_cocci.AFTER xxs ->
 365             add_elem t false;
 366             unparser xxs Unparse_cocci.After;
 367         | Ast_cocci.BEFOREAFTER (xxs, yys) ->
 368             unparser xxs Unparse_cocci.Before;
 369             add_elem t false;
 370             unparser yys Unparse_cocci.After;
 371         )
 372     | Ast_cocci.PLUS -> raise Impossible
 373
 374   in
 375
 376   toks +> List.iter expand_info;
 377   List.rev !toks_out
 378
 379
 380 (*****************************************************************************)
 381 (* Tokens2 processing, filtering, adjusting *)
 382 (*****************************************************************************)
 383
 384 let is_minusable_comment = function
 385   | (T2 (t,_b,_i)) ->
 386       (match t with
 387       | Parser_c.TCommentSpace _   (* only whitespace *)
 388       (* patch: coccinelle *)
 389       | Parser_c.TCommentNewline _ (* newline plus whitespace *)
 390       | Parser_c.TComment _
 391       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 392       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 393       | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
 394         -> true
 395
 396       | Parser_c.TCommentMisc _
 397       | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
 398         -> false
 399
 400       | _ -> false
 401       )
 402   | _ -> false
 403
 404 let all_coccis = function
 405     Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true
 406   | _ -> false
 407
 408 (*previously gave up if the first character was a newline, but not clear why*)
 409 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
 410
 411 let set_minus_comment = function
 412   | T2 (t,false,idx) ->
 413       let str = TH.str_of_tok t in
 414       (match t with
 415       | Parser_c.TCommentSpace _
 416 (* patch: coccinelle *)
 417       | Parser_c.TCommentNewline _ -> ()
 418
 419       | Parser_c.TComment _
 420       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 421       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 422       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
 423         ->
 424           pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
 425                  (TH.line_of_tok t) str)
 426       | _ -> raise Impossible
 427       );
 428       T2 (t, true, idx)
 429 (* patch: coccinelle *)
 430   | T2 (Parser_c.TCommentNewline _,true,idx) as x -> x
 431   | _ -> raise Impossible
 432
 433 let set_minus_comment_or_plus = function
 434     Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x
 435   | x -> set_minus_comment x
 436
 437 let remove_minus_and_between_and_expanded_and_fake xs =
 438
 439   (* get rid of exampled and fake tok *)
 440   let xs = xs +> Common.exclude (function
 441     | T2 (t,_,_) when TH.is_expanded t -> true
 442     | Fake2 -> true
 443
 444     | _ -> false
 445   )
 446   in
 447
 448   (*This drops the space before each completely minused block (no plus code).*)
 449   let minus_or_comment = function
 450       T2(_,true,_) -> true
 451     | T2(Parser_c.TCommentNewline _,_b,_i) -> false
 452     | x -> is_minusable_comment x in
 453
 454   let rec adjust_before_minus = function
 455       [] -> []
 456 (* patch: coccinelle  *)
 457     | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::((T2(_,true,_)::_) as xs) ->
 458         let (between_minus,rest) = Common.span minus_or_comment xs in
 459         (match rest with
 460           [] -> (set_minus_comment x) :: between_minus
 461         | T2(Parser_c.TCommentNewline _,_b,_i)::_ ->
 462             (set_minus_comment x) :: between_minus @
 463             (adjust_before_minus rest)
 464         | _ -> x :: between_minus @ (adjust_before_minus rest))
 465     | x::xs -> x::adjust_before_minus xs in
 466
 467   let xs = adjust_before_minus xs in
 468
 469   (* this drops blank lines after a brace introduced by removing code *)
 470   let rec adjust_after_brace = function
 471       [] -> []
 472     | ((T2(_,false,_)) as x)::((T2(_,true,_)::_) as xs)
 473        when str_of_token2 x =$= "{" ->
 474          let (between_minus,rest) = Common.span minus_or_comment xs in
 475          let is_whitespace = function
 476              T2(Parser_c.TCommentSpace _,_b,_i)
 477              (* patch: cocci    *)
 478            | T2(Parser_c.TCommentNewline _,_b,_i) -> true
 479            | _ -> false in
 480          let (newlines,rest) = Common.span is_whitespace rest in
 481          let (drop_newlines,last_newline) =
 482            let rec loop = function
 483                [] -> ([],[])
 484              | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
 485                  (List.rev rest,[x])
 486              | x::xs ->
 487                  let (drop_newlines,last_newline) = loop xs in
 488                  (drop_newlines,x::last_newline) in
 489            loop (List.rev newlines) in
 490          x::between_minus@(List.map set_minus_comment drop_newlines)@
 491          last_newline@
 492          adjust_after_brace rest
 493     | x::xs -> x::adjust_after_brace xs in
 494
 495   let xs = adjust_after_brace xs in
 496
 497   (* this deals with any stuff that is between the minused code, eg
 498      spaces, comments, attributes, etc. *)
 499   (* The use of is_minusable_comment_or_plus and set_minus_comment_or_plus
 500      is because the + code can end up anywhere in the middle of the - code;
 501      it is not necessarily to the far left *)
 502   let rec adjust_between_minus xs =
 503     match xs with
 504     | [] -> []
 505     | (T2 (t1,true,idx1))::xs ->
 506
 507         let (between_comments, rest) =
 508           Common.span is_minusable_comment_or_plus xs in
 509         (match rest with
 510         | [] -> [(T2 (t1, true,idx1))]
 511
 512         | (T2 (t2, true,idx2))::rest ->
 513             (T2 (t1, true,idx1))::
 514             (List.map set_minus_comment_or_plus between_comments @
 515              adjust_between_minus ((T2 (t2, true, idx2))::rest))
 516         | x::xs ->
 517             (T2 (t1, true, idx1))::
 518             (between_comments @ adjust_between_minus (x::xs))
 519         )
 520
 521     | x::xs -> x::adjust_between_minus xs in
 522
 523   let xs = adjust_between_minus xs in
 524
 525   let xs = xs +> Common.exclude (function
 526     | T2 (t,true,_) -> true
 527     | _ -> false
 528   ) in
 529   xs
 530
 531 (* normally, in C code, a semicolon is not preceded by a space or newline *)
 532 let adjust_before_semicolon toks =
 533   let toks = List.rev toks in
 534   let rec loop = function
 535       [] -> []
 536     | ((T2(_,false,_)) as x)::xs ->
 537         if List.mem (str_of_token2 x) [";";")";","]
 538         then
 539           let (spaces, rest) = Common.span is_minusable_comment xs in
 540           (match rest with
 541             (T2(_,true,_))::_ | (Cocci2 _)::_ ->
 542               (* only drop spaces if something was actually changed before *)
 543               x :: loop rest
 544           | _ -> x :: loop xs)
 545         else x :: loop xs
 546     | x::xs -> x :: loop xs in
 547   List.rev (loop toks)
 548
 549 let is_ident_like s = s ==~ Common.regexp_alpha
 550
 551 let rec add_space xs =
 552   match xs with
 553   | [] -> []
 554   | [x] -> [x]
 555   | x::y::xs ->
 556       let sx = str_of_token2 x in
 557       let sy = str_of_token2 y in
 558       if is_ident_like sx && is_ident_like sy
 559       then x::C2 " "::(add_space (y::xs))
 560       else x::(add_space (y::xs))
 561
 562
 563
 564 (* When insert some new code, because of a + in a SP, we must add this
 565  * code at the right place, with the good indentation. So each time we
 566  * encounter some spacing info, with some newline, we maintain the
 567  * current indentation level used.
 568  *
 569  * TODO problems: not accurate. ex: TODO
 570  *
 571  * TODO: if in #define region, should add a \ \n
 572  *)
 573 let new_tabbing2 space =
 574   (list_of_string space)
 575     +> List.rev
 576     +> Common.take_until (fun c -> c =<= '\n')
 577     +> List.rev
 578     +> List.map string_of_char
 579     +> String.concat ""
 580
 581 let new_tabbing a =
 582   Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
 583
 584
 585 let rec adjust_indentation xs =
 586   let _current_tabbing = ref "" in
 587   let tabbing_unit = ref None in
 588
 589   let string_of_list l = String.concat "" (List.map string_of_char l) in
 590
 591   (* try to pick a tabbing unit for the plus code *)
 592   let adjust_tabbing_unit old_tab new_tab =
 593     if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
 594     then
 595       let old_tab = list_of_string old_tab in
 596       let new_tab = list_of_string new_tab in
 597       let rec loop = function
 598           ([],new_tab) ->
 599             tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
 600         | (_,[]) -> failwith "not possible"
 601         | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
 602       loop (old_tab,new_tab) in
 603
 604   let remtab tu current_tab =
 605     let current_tab = List.rev(list_of_string current_tab) in
 606     let rec loop = function
 607         ([],new_tab) -> string_of_list (List.rev new_tab)
 608       | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
 609       | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
 610       | (_,ns) -> (* mismatch; remove what we can *)
 611           string_of_list (List.rev ns) in
 612     loop (tu,current_tab) in
 613
 614   let rec find_first_tab started = function
 615       [] -> ()
 616     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
 617         find_first_tab true xs
 618 (* patch: coccinelle *)
 619     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
 620       when started ->
 621         let s = str_of_token2 x +> new_tabbing in
 622         tabbing_unit := Some (s,List.rev (list_of_string s))
 623     | x::xs -> find_first_tab started xs in
 624   find_first_tab false xs;
 625
 626   let rec aux started xs =
 627     match xs with
 628     | [] ->  []
 629 (* patch: coccinelle *)
 630     | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
 631       (Cocci2 "{")::xs when started && str_of_token2 x =$= ")" ->
 632         (* to be done for if, etc, but not for a function header *)
 633         x::(Cocci2 " {")::(aux started xs)
 634     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
 635         let old_tabbing = !_current_tabbing in
 636         str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
 637         (* only trust the indentation after the first { *)
 638         (if started then adjust_tabbing_unit old_tabbing !_current_tabbing);
 639         let coccis_rest = Common.span all_coccis xs in
 640         (match coccis_rest with
 641           (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
 642             (* the case where cocci code has been added before a close } *)
 643             x::aux started (Indent_cocci2::xs)
 644         | _ -> x::aux started xs)
 645     | Indent_cocci2::xs ->
 646         (match !tabbing_unit with
 647           None -> aux started xs
 648         | Some (tu,_) ->
 649             _current_tabbing := (!_current_tabbing)^tu;
 650             Cocci2 (tu)::aux started xs)
 651     | Unindent_cocci2::xs ->
 652         (match !tabbing_unit with
 653           None -> aux started xs
 654         | Some (_,tu) ->
 655             _current_tabbing := remtab tu (!_current_tabbing);
 656             aux started xs)
 657     (* border between existing code and cocci code *)
 658     | ((T2 (tok,_,_)) as x)::((Cocci2 "\n") as y)::xs
 659       when str_of_token2 x =$= "{" ->
 660         x::aux true (y::Indent_cocci2::xs)
 661     | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
 662       when str_of_token2 y =$= "}" ->
 663         x::aux started (y::Unindent_cocci2::xs)
 664     (* starting the body of the function *)
 665     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->  x::aux true xs
 666     | (Cocci2 "{")::xs -> (Cocci2 "{")::aux true xs
 667     | ((Cocci2 "\n") as x)::xs ->
 668             (* dont inline in expr because of weird eval order of ocaml *)
 669         let s = !_current_tabbing in
 670         x::Cocci2 (s)::aux started xs
 671     | x::xs -> x::aux started xs in
 672   aux false xs
 673
 674
 675 let rec find_paren_comma = function
 676   | [] -> ()
 677
 678   (* do nothing if was like this in original file *)
 679   | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
 680     ::xs when p2 =|= p1 + 1 ->
 681       find_paren_comma (x2::xs)
 682
 683   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
 684     ::xs when p2 =|= p1 + 1 ->
 685       find_paren_comma (x2::xs)
 686
 687   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
 688     ::xs when p2 =|= p1 + 1 ->
 689       find_paren_comma (x2::xs)
 690
 691   (* otherwise yes can adjust *)
 692   | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
 693       x2.remove <- true;
 694       find_paren_comma (x2::xs)
 695   | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
 696       x1.remove <- true;
 697       find_paren_comma (x2::xs)
 698
 699   | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
 700       x1.remove <- true;
 701       find_paren_comma (x2::xs)
 702
 703   | x::xs ->
 704       find_paren_comma xs
 705
 706
 707 let fix_tokens toks =
 708   let toks = toks +> List.map mk_token_extended in
 709
 710   let cleaner = toks +> Common.exclude (function
 711     | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
 712     | _ -> false
 713   ) in
 714   find_paren_comma cleaner;
 715
 716   let toks = rebuild_tokens_extented toks in
 717   toks +> List.map (fun x -> x.tok2)
 718
 719
 720
 721 (*****************************************************************************)
 722 (* Final unparsing (and debugging support) *)
 723 (*****************************************************************************)
 724
 725 (* for debugging *)
 726 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
 727
 728 let kind_of_token2 = function
 729   | Fake2 -> KFake
 730   | Cocci2 _ -> KCocci
 731   | C2 _ -> KC
 732   | T2 (t,_,_) ->
 733       (match TH.pinfo_of_tok t with
 734       | ExpandedTok _ -> KExpanded
 735       | OriginTok _ -> KOrigin
 736       | FakeTok _ -> raise Impossible (* now a Fake2 *)
 737       | AbstractLineTok _ -> raise Impossible (* now a KC *)
 738       )
 739   | Unindent_cocci2 | Indent_cocci2 -> raise Impossible
 740
 741 let end_mark = "!"
 742
 743 let start_mark = function
 744   | KFake -> "!F!"
 745   | KCocci -> "!S!"
 746   | KC -> "!A!"
 747   | KExpanded -> "!E!"
 748   | KOrigin -> ""
 749
 750 let print_all_tokens2 pr xs =
 751   if !Flag_parsing_c.debug_unparsing
 752   then
 753     let current_kind = ref KOrigin in
 754     xs +> List.iter (fun t ->
 755       let newkind = kind_of_token2 t in
 756       if newkind =*= !current_kind
 757       then pr (str_of_token2 t)
 758       else begin
 759         pr (end_mark);
 760         pr (start_mark newkind);
 761         pr (str_of_token2 t);
 762         current_kind := newkind
 763       end
 764     );
 765   else
 766     xs +> List.iter (fun x -> pr (str_of_token2 x))
 767
 768
 769
 770
 771 (*****************************************************************************)
 772 (* Entry points *)
 773 (*****************************************************************************)
 774
 775 (* old: PPviatok was made in the beginning to allow to pretty print a
 776  * complete C file, including a modified C file by transformation.ml,
 777  * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
 778  * maybe flow_to_ast) all the cases. Indeed we don't need to do some
 779  * fancy stuff when a function was not modified at all. Just need to
 780  * print the list of token as-is. But now pretty_print_c.ml handles
 781  * almost everything so maybe less useful. Maybe PPviatok allows to
 782  * optimize a little the pretty printing.
 783  *
 784  * update: now have PPviastr which goes even faster than PPviatok, so
 785  * PPviatok has disappeared.
 786  *)
 787
 788 type ppmethod = PPnormal | PPviastr
 789
 790
 791
 792
 793 (* The pp_program function will call pretty_print_c.ml with a special
 794  * function to print the leaf components, the tokens. When we want to
 795  * print a token, we need to print also maybe the space and comments that
 796  * were close to it in the original file (and that was omitted during the
 797  * parsing phase), and honor what the cocci-info attached to the token says.
 798  * Maybe we will not print the token if it's a MINUS-token, and maybe we will
 799  * print it and also print some cocci-code attached in a PLUS to it.
 800  * So we will also maybe call unparse_cocci. Because the cocci-code may
 801  * contain metavariables, unparse_cocci will in fact sometimes call back
 802  * pretty_print_c (which will this time don't call back again unparse_cocci)
 803  *)
 804
 805 let pp_program2 xs outfile  =
 806   Common.with_open_outfile outfile (fun (pr,chan) ->
 807     let pr s =
 808       if !Flag_parsing_c.debug_unparsing
 809       then begin pr2_no_nl s; flush stderr end
 810       else pr s
 811         (* flush chan; *)
 812         (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
 813     in
 814
 815     xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
 816
 817       (* here can still work on ast *)
 818       let e = remove_useless_fakeInfo_struct e in
 819
 820       match ppmethod with
 821       | PPnormal ->
 822           (* now work on tokens *)
 823
 824           (* phase1: just get all the tokens, all the information *)
 825           assert(toks_e +> List.for_all (fun t ->
 826             TH.is_origin t or TH.is_expanded t
 827           ));
 828           let toks = get_fakeInfo_and_tokens e toks_e in
 829           let toks = displace_fake_nodes toks in
 830           (* assert Origin;ExpandedTok;Faketok *)
 831           let toks = expand_mcode toks in
 832           (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
 833            * and no tag information, just NOTHING. *)
 834
 835           (* phase2: can now start to filter and adjust *)
 836           let toks = adjust_indentation toks in
 837           let toks = remove_minus_and_between_and_expanded_and_fake toks in
 838           let toks = adjust_before_semicolon toks in
 839           (* assert Origin + Cocci + C and no minus *)
 840           let toks = add_space toks in
 841           let toks = fix_tokens toks in
 842
 843           (* in theory here could reparse and rework the ast! or
 844            * apply some SP. Not before cos julia may have generated
 845            * not parsable file. Need do unparsing_tricks call before being
 846            * ready to reparse. *)
 847           print_all_tokens2 pr toks;
 848
 849       | PPviastr -> pr str
 850     )
 851   )
 852
 853 let pp_program a b =
 854   Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
 855
 856
 857 let pp_program_default xs outfile =
 858   let xs' = xs +> List.map (fun x -> x, PPnormal) in
 859   pp_program xs' outfile