parsing_c/unparse_c.ml

   1 (* Yoann Padioleau, Julia Lawall
   2  *
   3  * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License (GPL)
   7  * version 2 as published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * file license.txt for more details.
  13  *
  14  *
  15  * Modifications by Julia Lawall for better newline handling.
  16  *)
  17 open Common
  18
  19 open Ast_c
  20
  21 module TH = Token_helpers
  22
  23
  24 (* should keep comments and directives in between adjacent deleted terms,
  25 but not comments and directives within deleted terms.  should use the
  26 labels found in the control-flow graph *)
  27
  28
  29
  30 (*****************************************************************************)
  31 (* Wrappers *)
  32 (*****************************************************************************)
  33 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
  34
  35 (*****************************************************************************)
  36 (* Types used during the intermediate phases of the unparsing *)
  37 (*****************************************************************************)
  38
  39 type token1 =
  40   | Fake1 of info
  41   | T1 of Parser_c.token
  42
  43 (* The cocci_tag of the token should always be a NOTHING. The mark of
  44  * the token can only be OriginTok or ExpandedTok. Why not get rid of
  45  * token and get something simpler ? because we need to know if the
  46  * info is a TCommentCpp or TCommentSpace, etc for some of the further
  47  * analysis so easier to keep with the token.
  48  *
  49  * This type contains the whole information. Have all the tokens with this
  50  * type.
  51  *)
  52 type min =
  53     Min of (int list (* match numbers *) * int (* adjacency information *))
  54   | Ctx
  55
  56 type token2 =
  57   | T2 of Parser_c.token * min *
  58           int option (* orig index, abstracting away comments and space *)
  59   | Fake2
  60   | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
  61   | C2 of string
  62   | Indent_cocci2
  63   | Unindent_cocci2
  64
  65 (* not used yet *)
  66 type token3 =
  67   | T3 of Parser_c.token
  68   | Cocci3 of string
  69   | C3 of string
  70
  71
  72 (* similar to the tech in parsing_hack *)
  73 type token_extended = {
  74   tok2 : token2;
  75   str  : string;
  76   idx: int option; (* to know if 2 tokens were consecutive in orig file *)
  77   mutable new_tokens_before : token2 list;
  78   mutable remove : bool;
  79 }
  80
  81
  82 (*****************************************************************************)
  83 (* Helpers *)
  84 (*****************************************************************************)
  85
  86 let info_of_token1 t =
  87   match t with
  88   | Fake1 info -> info
  89   | T1 tok -> TH.info_of_tok tok
  90
  91 let str_of_token2 = function
  92   | T2 (t,_,_) -> TH.str_of_tok t
  93   | Fake2 -> ""
  94   | Cocci2 (s,_,_,_) -> s
  95   | C2 s -> s
  96   | Indent_cocci2 -> ""
  97   | Unindent_cocci2 -> ""
  98
  99 let print_token2 = function
 100   | T2 (t,b,_) ->
 101       let b_str =
 102         match b with
 103           Min (index,adj) ->
 104             Printf.sprintf "-%d[%s]" adj
 105               (String.concat " " (List.map string_of_int index))
 106         | Ctx -> "" in
 107       "T2:"^b_str^TH.str_of_tok t
 108   | Fake2 -> ""
 109   | Cocci2 (s,_,lc,rc) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
 110   | C2 s -> "C2:"^s
 111   | Indent_cocci2 -> "Indent"
 112   | Unindent_cocci2 -> "Unindent"
 113
 114 let simple_print_all_tokens2 l =
 115   List.iter (function x -> Printf.printf "%s " (print_token2 x)) l;
 116   Printf.printf "\n"
 117
 118 let str_of_token3 = function
 119   | T3 t -> TH.str_of_tok t
 120   | Cocci3 s | C3 s -> s
 121
 122
 123
 124 let mk_token_extended x =
 125   let origidx =
 126     match x with
 127     | T2 (_,_, idx) -> idx
 128     | _ -> None
 129   in
 130   { tok2 = x;
 131     str = str_of_token2 x;
 132     idx = origidx;
 133     new_tokens_before = [];
 134     remove = false;
 135   }
 136
 137 let rebuild_tokens_extented toks_ext =
 138   let _tokens = ref [] in
 139   toks_ext +> List.iter (fun tok ->
 140     tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
 141     if not tok.remove then push2 tok.tok2 _tokens;
 142   );
 143   let tokens = List.rev !_tokens in
 144   (tokens +> List.map mk_token_extended)
 145
 146
 147 let mcode_contain_plus = function
 148   | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
 149   | Ast_cocci.CONTEXT _ -> true
 150 (* patch: when need full coccinelle transformation *)
 151   | Ast_cocci.MINUS (_,_,_,[]) -> false
 152   | Ast_cocci.MINUS (_,_,_,x::xs) -> true
 153   | Ast_cocci.PLUS -> raise Impossible
 154
 155 let contain_plus info =
 156   let mck = Ast_c.mcode_of_info info in
 157   mcode_contain_plus mck
 158
 159 (*****************************************************************************)
 160 (* Last fix on the ast *)
 161 (*****************************************************************************)
 162
 163 (* Because of the ugly trick to handle initialiser, I generate fake ','
 164  * for the last initializer element, but if there is nothing around it,
 165  * I don't want in the end to print it.
 166  *)
 167
 168 let remove_useless_fakeInfo_struct program =
 169   let bigf = { Visitor_c.default_visitor_c_s with
 170     Visitor_c.kini_s = (fun (k,bigf) ini ->
 171       match k ini with
 172       | InitList args, ii ->
 173           (match ii with
 174           | [_i1;_i2] -> ini
 175           | [i1;i2;iicommaopt] ->
 176               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 177                  && (Ast_c.is_fake iicommaopt)
 178                  (* sometimes the guy put a normal iicommaopt *)
 179               then InitList args, [i1;i2]
 180               else InitList args, [i1;i2;iicommaopt]
 181           | [i1;i2;iicommaopt;end_comma_opt] ->
 182               (* only in #define. end_comma_opt canot be fake *)
 183               (* not sure if this will be considered ambiguous with a previous
 184                  case? *)
 185               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 186                  && (Ast_c.is_fake iicommaopt)
 187                  (* sometimes the guy put a normal iicommaopt *)
 188               then InitList args, [i1;i2;end_comma_opt]
 189               else InitList args, [i1;i2;iicommaopt;end_comma_opt]
 190           | _ -> raise Impossible
 191           )
 192       | x -> x
 193     )
 194   } in
 195   Visitor_c.vk_toplevel_s bigf program
 196
 197
 198 (*****************************************************************************)
 199 (* Tokens1 generation *)
 200 (*****************************************************************************)
 201
 202 let get_fakeInfo_and_tokens celem toks =
 203   let toks_in  = ref toks in
 204   let toks_out = ref [] in
 205
 206   (* todo? verify good order of position ? *)
 207   let pr_elem info =
 208     match Ast_c.pinfo_of_info info with
 209     | FakeTok _ ->
 210         Common.push2 (Fake1 info) toks_out
 211     | OriginTok _ | ExpandedTok _ ->
 212         (* get the associated comments/space/cppcomment tokens *)
 213         let (before, x, after) = !toks_in +> Common.split_when (fun tok ->
 214           info =*= TH.info_of_tok tok)
 215         in
 216         assert(info =*= TH.info_of_tok x);
 217         (*old: assert(before +> List.for_all (TH.is_comment)); *)
 218         before +> List.iter (fun x ->
 219           if not (TH.is_comment x)
 220           then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
 221           (* case such as  int asm d3("x"); not yet in ast *)
 222         );
 223         before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
 224         push2 (T1 x) toks_out;
 225         toks_in := after;
 226     | AbstractLineTok _ ->
 227         (* can be called on type info when for instance use -type_c *)
 228         if !Flag_parsing_c.pretty_print_type_info
 229         then Common.push2 (Fake1 info) toks_out
 230         else raise Impossible (* at this stage *)
 231   in
 232
 233   let pr_space _ = () in (* use the spacing that is there already *)
 234
 235   Pretty_print_c.pp_program_gen pr_elem pr_space celem;
 236
 237   if not (null !toks_in)
 238   then failwith "WEIRD: unparsing not finished";
 239
 240   List.rev !toks_out
 241
 242 (* Fake nodes that have BEFORE code should be moved over any subsequent
 243 whitespace and newlines, but not any comments, to get as close to the affected
 244 code as possible.  Similarly, face nodes that have AFTER code should be moved
 245 backwards.  No fake nodes should have both before and after code. *)
 246
 247 let displace_fake_nodes toks =
 248   let is_fake = function Fake1 _ -> true | _ -> false in
 249   let is_whitespace = function
 250       T1(Parser_c.TCommentSpace _)
 251       (* patch: cocci    *)
 252     | T1(Parser_c.TCommentNewline _) -> true
 253     | _ -> false in
 254   let rec loop toks =
 255     let fake_info =
 256       try Some (Common.split_when is_fake toks)
 257       with Not_found -> None in
 258     match fake_info with
 259       Some(bef,((Fake1 info) as fake),aft) ->
 260         (match !(info.cocci_tag) with
 261         | Some x ->
 262           (match x with
 263           (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
 264             (* move the fake node forwards *)
 265             let (whitespace,rest) = Common.span is_whitespace aft in
 266             bef @ whitespace @ fake :: (loop rest)
 267         | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
 268             (* move the fake node backwards *)
 269             let revbef = List.rev bef in
 270             let (revwhitespace,revprev) = Common.span is_whitespace revbef in
 271             let whitespace = List.rev revwhitespace in
 272             let prev = List.rev revprev in
 273             prev @ fake :: (loop (whitespace @ aft))
 274         | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
 275             bef @ fake :: (loop aft)
 276         | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
 277             failwith "fake node should not be before-after"
 278         | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
 279         )
 280         | None ->
 281             bef @ fake :: (loop aft)
 282         )
 283     | None -> toks
 284     | _ -> raise Impossible in
 285   loop toks
 286
 287 (*****************************************************************************)
 288 (* Tokens2 generation *)
 289 (*****************************************************************************)
 290
 291 let comment2t2 = function
 292     (Token_c.TCommentCpp x,(info : Token_c.info)) ->
 293       C2("\n"^info.Common.str^"\n")
 294   | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
 295
 296 let expand_mcode toks =
 297   let toks_out = ref [] in
 298
 299   let index = ref 0 in
 300
 301   let add_elem t minus =
 302     match t with
 303     | Fake1 info ->
 304         let str = Ast_c.str_of_info info in
 305         if str =$= ""
 306         then push2 (Fake2) toks_out
 307         (* perhaps the fake ',' *)
 308         else push2 (C2 str) toks_out
 309
 310
 311     | T1 tok ->
 312         (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
 313         (* no tag on expandedTok ! *)
 314         (if (TH.is_expanded tok &&
 315             !((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot)
 316         then
 317           failwith
 318             (Printf.sprintf
 319                "expanded token %s on line %d is either modified or stored in a metavariable"
 320                (TH.str_of_tok tok) (TH.line_of_tok tok)));
 321
 322         let tok' = tok +> TH.visitor_info_of_tok (fun i ->
 323           { i with cocci_tag = ref Ast_c.emptyAnnot; }
 324         ) in
 325
 326         let optindex =
 327           if TH.is_origin tok && not (TH.is_real_comment tok)
 328           then begin
 329               incr index;
 330               Some !index
 331           end
 332           else None
 333         in
 334
 335         push2 (T2 (tok', minus, optindex)) toks_out
 336   in
 337
 338   let expand_info t =
 339     let (mcode,env) =
 340       Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
 341
 342     let pr_cocci s ln col rcol =
 343       push2 (Cocci2(s,ln,col,rcol)) toks_out  in
 344     let pr_c info =
 345       (match Ast_c.pinfo_of_info info with
 346         Ast_c.AbstractLineTok _ ->
 347           push2 (C2 (Ast_c.str_of_info info)) toks_out
 348       | Ast_c.FakeTok (s,_) ->
 349           push2 (C2 s) toks_out
 350       | _ ->
 351           Printf.printf "line: %s\n" (Common.dump info);
 352           failwith "not an abstract line");
 353       (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
 354       List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
 355
 356     let pr_barrier ln col = (* marks a position, used around C code *)
 357       push2 (Cocci2("",ln,col,col)) toks_out  in
 358     let pr_nobarrier ln col = () in (* not needed for linux spacing *)
 359
 360     let pr_cspace _ = push2 (C2 " ") toks_out in
 361
 362     let pr_space _ = () (* rely on add_space in cocci code *) in
 363     let pr_arity _ = () (* not interested *) in
 364
 365     let indent _   = push2 Indent_cocci2 toks_out in
 366     let unindent _ = push2 Unindent_cocci2 toks_out in
 367
 368     let args_pp =
 369       (env, pr_cocci, pr_c, pr_cspace,
 370        (match !Flag_parsing_c.spacing with
 371          Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
 372        pr_arity,
 373        (match !Flag_parsing_c.spacing with
 374          Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
 375        indent, unindent) in
 376
 377     (* old: when for yacfe with partial cocci:
 378      *    add_elem t false;
 379     *)
 380
 381     (* patch: when need full coccinelle transformation *)
 382     let unparser = Unparse_cocci.pp_list_list_any args_pp false in
 383     match mcode with
 384     | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
 385         (* Why adding ? because I want to have all the information, the whole
 386          * set of tokens, so I can then process and remove the
 387          * is_between_two_minus for instance *)
 388         add_elem t (Min (inst,adj));
 389         unparser any_xxs Unparse_cocci.InPlace
 390     | Ast_cocci.CONTEXT (_,any_befaft) ->
 391         (match any_befaft with
 392         | Ast_cocci.NOTHING ->
 393             add_elem t Ctx
 394         | Ast_cocci.BEFORE xxs ->
 395             unparser xxs Unparse_cocci.Before;
 396             add_elem t Ctx
 397         | Ast_cocci.AFTER xxs ->
 398             add_elem t Ctx;
 399             unparser xxs Unparse_cocci.After;
 400         | Ast_cocci.BEFOREAFTER (xxs, yys) ->
 401             unparser xxs Unparse_cocci.Before;
 402             add_elem t Ctx;
 403             unparser yys Unparse_cocci.After;
 404         )
 405     | Ast_cocci.PLUS -> raise Impossible
 406
 407   in
 408
 409   toks +> List.iter expand_info;
 410   List.rev !toks_out
 411
 412
 413 (*****************************************************************************)
 414 (* Tokens2 processing, filtering, adjusting *)
 415 (*****************************************************************************)
 416
 417 let is_minusable_comment = function
 418   | (T2 (t,_b,_i)) ->
 419       (match t with
 420       | Parser_c.TCommentSpace _   (* only whitespace *)
 421       (* patch: coccinelle *)
 422       | Parser_c.TCommentNewline _ (* newline plus whitespace *)
 423       | Parser_c.TComment _
 424       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 425       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 426       | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
 427         -> true
 428
 429       | Parser_c.TCommentMisc _
 430       | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
 431         -> false
 432
 433       | _ -> false
 434       )
 435   | _ -> false
 436
 437 let all_coccis = function
 438     Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true
 439   | _ -> false
 440
 441 (*previously gave up if the first character was a newline, but not clear why*)
 442 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
 443
 444 let set_minus_comment adj = function
 445   | T2 (t,Ctx,idx) ->
 446       let str = TH.str_of_tok t in
 447       (match t with
 448       | Parser_c.TCommentSpace _
 449 (* patch: coccinelle *)
 450       | Parser_c.TCommentNewline _ -> ()
 451
 452       | Parser_c.TComment _
 453       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 454       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 455       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
 456         ->
 457           pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
 458                  (TH.line_of_tok t) str)
 459       | _ -> raise Impossible
 460       );
 461       T2 (t, Min adj, idx)
 462 (* patch: coccinelle *)
 463   | T2 (Parser_c.TCommentNewline _,Min adj,idx) as x -> x
 464   | _ -> raise Impossible
 465
 466 let set_minus_comment_or_plus adj = function
 467     Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x
 468   | x -> set_minus_comment adj x
 469
 470 let remove_minus_and_between_and_expanded_and_fake xs =
 471
 472   (* get rid of exampled and fake tok *)
 473   let xs = xs +> Common.exclude (function
 474     | T2 (t,_,_) when TH.is_expanded t -> true
 475     | Fake2 -> true
 476
 477     | _ -> false
 478   )
 479   in
 480
 481   (*This drops the space before each completely minused block (no plus code).*)
 482   let minus_or_comment = function
 483       T2(_,Min adj,_) -> true
 484     | T2(Parser_c.TCommentNewline _,_b,_i) -> false
 485     | x -> is_minusable_comment x in
 486
 487   let rec adjust_before_minus = function
 488       [] -> []
 489 (* patch: coccinelle  *)
 490     | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
 491       ((T2(_,Min adj,_)::_) as xs) ->
 492         let (between_minus,rest) = Common.span minus_or_comment xs in
 493         (match rest with
 494           [] -> (set_minus_comment adj x) :: between_minus
 495         | T2(Parser_c.TCommentNewline _,_b,_i)::_ ->
 496             (set_minus_comment adj x) :: between_minus @
 497             (adjust_before_minus rest)
 498         | _ -> x :: between_minus @ (adjust_before_minus rest))
 499     | x::xs -> x::adjust_before_minus xs in
 500
 501   let xs = adjust_before_minus xs in
 502
 503   (* this drops blank lines after a brace introduced by removing code *)
 504   let rec adjust_after_brace = function
 505       [] -> []
 506     | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
 507        when str_of_token2 x =$= "{" ->
 508          let (between_minus,rest) = Common.span minus_or_comment xs in
 509          let is_whitespace = function
 510              T2(Parser_c.TCommentSpace _,_b,_i)
 511              (* patch: cocci    *)
 512            | T2(Parser_c.TCommentNewline _,_b,_i) -> true
 513            | _ -> false in
 514          let (newlines,rest) = Common.span is_whitespace rest in
 515          let (drop_newlines,last_newline) =
 516            let rec loop = function
 517                [] -> ([],[])
 518              | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
 519                  (List.rev rest,[x])
 520              | x::xs ->
 521                  let (drop_newlines,last_newline) = loop xs in
 522                  (drop_newlines,x::last_newline) in
 523            loop (List.rev newlines) in
 524          x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
 525          last_newline@
 526          adjust_after_brace rest
 527     | x::xs -> x::adjust_after_brace xs in
 528
 529   let xs = adjust_after_brace xs in
 530
 531   (* this deals with any stuff that is between the minused code, eg
 532      spaces, comments, attributes, etc. *)
 533   (* The use of is_minusable_comment_or_plus and set_minus_comment_or_plus
 534      is because the + code can end up anywhere in the middle of the - code;
 535      it is not necessarily to the far left *)
 536
 537   let common_adj (index1,adj1) (index2,adj2) =
 538     adj1 = adj2 (* same adjacency info *) &&
 539     (* non-empty intersection of witness trees *)
 540     not ((Common.inter_set index1 index2) = []) in
 541
 542   let rec adjust_between_minus xs =
 543     match xs with
 544     | [] -> []
 545     | ((T2 (_,Min adj1,_)) as t1)::xs ->
 546         let (between_comments, rest) =
 547           Common.span is_minusable_comment_or_plus xs in
 548         (match rest with
 549         | [] -> [t1]
 550
 551         | ((T2 (_,Min adj2,_)) as t2)::rest when common_adj adj1 adj2 ->
 552             t1::
 553             (List.map (set_minus_comment_or_plus adj1) between_comments @
 554              adjust_between_minus (t2::rest))
 555         | x::xs ->
 556             t1::(between_comments @ adjust_between_minus (x::xs))
 557         )
 558
 559     | x::xs -> x::adjust_between_minus xs in
 560
 561   let xs = adjust_between_minus xs in
 562
 563   let xs = xs +> Common.exclude (function
 564     | T2 (t,Min adj,_) -> true
 565     | _ -> false
 566   ) in
 567   xs
 568
 569 (* normally, in C code, a semicolon is not preceded by a space or newline *)
 570 let adjust_before_semicolon toks =
 571   let toks = List.rev toks in
 572   let rec loop = function
 573       [] -> []
 574     | ((T2(_,Ctx,_)) as x)::xs ->
 575         if List.mem (str_of_token2 x) [";";")";","]
 576         then
 577           let (spaces, rest) = Common.span is_minusable_comment xs in
 578           (match rest with
 579             (T2(_,Min _,_))::_ | (Cocci2 _)::_ ->
 580               (* only drop spaces if something was actually changed before *)
 581               x :: loop rest
 582           | _ -> x :: loop xs)
 583         else x :: loop xs
 584     | x::xs -> x :: loop xs in
 585   List.rev (loop toks)
 586
 587 let is_ident_like s = s ==~ Common.regexp_alpha
 588
 589 let rec add_space xs =
 590   match xs with
 591   | [] -> []
 592   | [x] -> [x]
 593   | (Cocci2(sx,lnx,_,rcolx) as x)::((Cocci2(sy,lny,lcoly,_)) as y)::xs
 594     when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
 595       not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
 596         (* this only works within a line.  could consider whether
 597            something should be done to add newlines too, rather than
 598            printing them explicitly in unparse_cocci. *)
 599         x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
 600   | x::y::xs ->
 601       let sx = str_of_token2 x in
 602       let sy = str_of_token2 y in
 603       if is_ident_like sx && is_ident_like sy
 604       then x::C2 " "::(add_space (y::xs))
 605       else x::(add_space (y::xs))
 606
 607
 608
 609 (* When insert some new code, because of a + in a SP, we must add this
 610  * code at the right place, with the good indentation. So each time we
 611  * encounter some spacing info, with some newline, we maintain the
 612  * current indentation level used.
 613  *
 614  * TODO problems: not accurate. ex: TODO
 615  *
 616  * TODO: if in #define region, should add a \ \n
 617  *)
 618 let new_tabbing2 space =
 619   (list_of_string space)
 620     +> List.rev
 621     +> Common.take_until (fun c -> c =<= '\n')
 622     +> List.rev
 623     +> List.map string_of_char
 624     +> String.concat ""
 625
 626 let new_tabbing a =
 627   Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
 628
 629
 630 let rec adjust_indentation xs =
 631   let _current_tabbing = ref "" in
 632   let tabbing_unit = ref None in
 633
 634   let string_of_list l = String.concat "" (List.map string_of_char l) in
 635
 636   (* try to pick a tabbing unit for the plus code *)
 637   let adjust_tabbing_unit old_tab new_tab =
 638     if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
 639     then
 640       let old_tab = list_of_string old_tab in
 641       let new_tab = list_of_string new_tab in
 642       let rec loop = function
 643           ([],new_tab) ->
 644             tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
 645         | (_,[]) -> failwith "not possible"
 646         | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
 647       loop (old_tab,new_tab) in
 648
 649   let remtab tu current_tab =
 650     let current_tab = List.rev(list_of_string current_tab) in
 651     let rec loop = function
 652         ([],new_tab) -> string_of_list (List.rev new_tab)
 653       | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
 654       | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
 655       | (_,ns) -> (* mismatch; remove what we can *)
 656           string_of_list (List.rev ns) in
 657     loop (tu,current_tab) in
 658
 659   let rec find_first_tab started = function
 660       [] -> ()
 661     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
 662         find_first_tab true xs
 663 (* patch: coccinelle *)
 664     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
 665       when started ->
 666         let s = str_of_token2 x +> new_tabbing in
 667         tabbing_unit := Some (s,List.rev (list_of_string s))
 668     | x::xs -> find_first_tab started xs in
 669   find_first_tab false xs;
 670
 671   let rec aux started xs =
 672     match xs with
 673     | [] ->  []
 674 (* patch: coccinelle *)
 675     | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
 676       ((Cocci2 ("{",_,_,_)) as a)::xs
 677       when started && str_of_token2 x =$= ")" ->
 678         (* to be done for if, etc, but not for a function header *)
 679         x::(C2 " ")::a::(aux started xs)
 680     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
 681         let old_tabbing = !_current_tabbing in
 682         str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
 683         (* only trust the indentation after the first { *)
 684         (if started then adjust_tabbing_unit old_tabbing !_current_tabbing);
 685         let coccis_rest = Common.span all_coccis xs in
 686         (match coccis_rest with
 687           (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
 688             (* the case where cocci code has been added before a close } *)
 689             x::aux started (Indent_cocci2::xs)
 690         | _ -> x::aux started xs)
 691     | Indent_cocci2::xs ->
 692         (match !tabbing_unit with
 693           None -> aux started xs
 694         | Some (tu,_) ->
 695             _current_tabbing := (!_current_tabbing)^tu;
 696             Cocci2 (tu,-1,-1,-1)::aux started xs)
 697     | Unindent_cocci2::xs ->
 698         (match !tabbing_unit with
 699           None -> aux started xs
 700         | Some (_,tu) ->
 701             _current_tabbing := remtab tu (!_current_tabbing);
 702             aux started xs)
 703     (* border between existing code and cocci code *)
 704     | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_)) as y)::xs
 705       when str_of_token2 x =$= "{" ->
 706         x::aux true (y::Indent_cocci2::xs)
 707     | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
 708       when str_of_token2 y =$= "}" ->
 709         x::aux started (y::Unindent_cocci2::xs)
 710     (* starting the body of the function *)
 711     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->  x::aux true xs
 712     | ((Cocci2("{",_,_,_)) as a)::xs -> a::aux true xs
 713     | ((Cocci2("\n",_,_,_)) as x)::xs ->
 714             (* dont inline in expr because of weird eval order of ocaml *)
 715         let s = !_current_tabbing in
 716         x::Cocci2 (s,-1,-1,-1)::aux started xs
 717     | x::xs -> x::aux started xs in
 718   aux false xs
 719
 720
 721 let rec find_paren_comma = function
 722   | [] -> ()
 723
 724   (* do nothing if was like this in original file *)
 725   | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
 726     ::xs when p2 =|= p1 + 1 ->
 727       find_paren_comma (x2::xs)
 728
 729   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
 730     ::xs when p2 =|= p1 + 1 ->
 731       find_paren_comma (x2::xs)
 732
 733   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
 734     ::xs when p2 =|= p1 + 1 ->
 735       find_paren_comma (x2::xs)
 736
 737   (* otherwise yes can adjust *)
 738   | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
 739       x2.remove <- true;
 740       find_paren_comma (x2::xs)
 741   | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
 742       x1.remove <- true;
 743       find_paren_comma (x2::xs)
 744
 745   | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
 746       x1.remove <- true;
 747       find_paren_comma (x2::xs)
 748
 749   | x::xs ->
 750       find_paren_comma xs
 751
 752
 753 let fix_tokens toks =
 754   let toks = toks +> List.map mk_token_extended in
 755
 756   let cleaner = toks +> Common.exclude (function
 757     | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
 758     | _ -> false
 759   ) in
 760   find_paren_comma cleaner;
 761
 762   let toks = rebuild_tokens_extented toks in
 763   toks +> List.map (fun x -> x.tok2)
 764
 765
 766
 767 (*****************************************************************************)
 768 (* Final unparsing (and debugging support) *)
 769 (*****************************************************************************)
 770
 771 (* for debugging *)
 772 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
 773
 774 let kind_of_token2 = function
 775   | Fake2 -> KFake
 776   | Cocci2 _ -> KCocci
 777   | C2 _ -> KC
 778   | T2 (t,_,_) ->
 779       (match TH.pinfo_of_tok t with
 780       | ExpandedTok _ -> KExpanded
 781       | OriginTok _ -> KOrigin
 782       | FakeTok _ -> raise Impossible (* now a Fake2 *)
 783       | AbstractLineTok _ -> raise Impossible (* now a KC *)
 784       )
 785   | Unindent_cocci2 | Indent_cocci2 -> raise Impossible
 786
 787 let end_mark = "!"
 788
 789 let start_mark = function
 790   | KFake -> "!F!"
 791   | KCocci -> "!S!"
 792   | KC -> "!A!"
 793   | KExpanded -> "!E!"
 794   | KOrigin -> ""
 795
 796 let print_all_tokens2 pr xs =
 797   if !Flag_parsing_c.debug_unparsing
 798   then
 799     let current_kind = ref KOrigin in
 800     xs +> List.iter (fun t ->
 801       let newkind = kind_of_token2 t in
 802       if newkind =*= !current_kind
 803       then pr (str_of_token2 t)
 804       else begin
 805         pr (end_mark);
 806         pr (start_mark newkind);
 807         pr (str_of_token2 t);
 808         current_kind := newkind
 809       end
 810     );
 811   else
 812     xs +> List.iter (fun x -> pr (str_of_token2 x))
 813
 814
 815
 816
 817 (*****************************************************************************)
 818 (* Entry points *)
 819 (*****************************************************************************)
 820
 821 (* old: PPviatok was made in the beginning to allow to pretty print a
 822  * complete C file, including a modified C file by transformation.ml,
 823  * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
 824  * maybe flow_to_ast) all the cases. Indeed we don't need to do some
 825  * fancy stuff when a function was not modified at all. Just need to
 826  * print the list of token as-is. But now pretty_print_c.ml handles
 827  * almost everything so maybe less useful. Maybe PPviatok allows to
 828  * optimize a little the pretty printing.
 829  *
 830  * update: now have PPviastr which goes even faster than PPviatok, so
 831  * PPviatok has disappeared.
 832  *)
 833
 834 type ppmethod = PPnormal | PPviastr
 835
 836
 837
 838
 839 (* The pp_program function will call pretty_print_c.ml with a special
 840  * function to print the leaf components, the tokens. When we want to
 841  * print a token, we need to print also maybe the space and comments that
 842  * were close to it in the original file (and that was omitted during the
 843  * parsing phase), and honor what the cocci-info attached to the token says.
 844  * Maybe we will not print the token if it's a MINUS-token, and maybe we will
 845  * print it and also print some cocci-code attached in a PLUS to it.
 846  * So we will also maybe call unparse_cocci. Because the cocci-code may
 847  * contain metavariables, unparse_cocci will in fact sometimes call back
 848  * pretty_print_c (which will this time don't call back again unparse_cocci)
 849  *)
 850
 851 let pp_program2 xs outfile  =
 852   Common.with_open_outfile outfile (fun (pr,chan) ->
 853     let pr s =
 854       if !Flag_parsing_c.debug_unparsing
 855       then begin pr2_no_nl s; flush stderr end
 856       else pr s
 857         (* flush chan; *)
 858         (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
 859     in
 860
 861     xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
 862
 863       (* here can still work on ast *)
 864       let e = remove_useless_fakeInfo_struct e in
 865
 866       match ppmethod with
 867       | PPnormal ->
 868           (* now work on tokens *)
 869
 870           (* phase1: just get all the tokens, all the information *)
 871           assert(toks_e +> List.for_all (fun t ->
 872             TH.is_origin t or TH.is_expanded t
 873           ));
 874           let toks = get_fakeInfo_and_tokens e toks_e in
 875           let toks = displace_fake_nodes toks in
 876           (* assert Origin;ExpandedTok;Faketok *)
 877           let toks = expand_mcode toks in
 878           (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
 879            * and no tag information, just NOTHING. *)
 880
 881           (* phase2: can now start to filter and adjust *)
 882           let toks = adjust_indentation toks in
 883           let toks = remove_minus_and_between_and_expanded_and_fake toks in
 884           let toks = adjust_before_semicolon toks in
 885           (* assert Origin + Cocci + C and no minus *)
 886           let toks = add_space toks in
 887           let toks = fix_tokens toks in
 888
 889           (* in theory here could reparse and rework the ast! or
 890            * apply some SP. Not before cos julia may have generated
 891            * not parsable file. Need do unparsing_tricks call before being
 892            * ready to reparse. *)
 893           print_all_tokens2 pr toks;
 894
 895       | PPviastr -> pr str
 896     )
 897   )
 898
 899 let pp_program a b =
 900   Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
 901
 902
 903 let pp_program_default xs outfile =
 904   let xs' = xs +> List.map (fun x -> x, PPnormal) in
 905   pp_program xs' outfile