parsing_c/unparse_c2.ml

   1 (* Copyright (C) 2002-2008 Yoann Padioleau
   2  *
   3  * This program is free software; you can redistribute it and/or
   4  * modify it under the terms of the GNU General Public License (GPL)
   5  * version 2 as published by the Free Software Foundation.
   6  *
   7  * This program is distributed in the hope that it will be useful,
   8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  10  * file license.txt for more details.
  11  *)
  12 open Common
  13
  14 open Ast_c
  15
  16 module TH = Token_helpers
  17
  18
  19
  20
  21 (*****************************************************************************)
  22 (* Types used during the intermediate phases of the unparsing *)
  23 (*****************************************************************************)
  24
  25 type token1 =
  26   | Fake1 of info
  27   | T1 of Parser_c.token
  28
  29 (* The cocci_tag of the token should always be a NOTHING. The mark of
  30  * the token can only be OriginTok or ExpandedTok. Why not get rid of
  31  * token and get something simpler ? because we need to know if the
  32  * info is a TCommentCpp or TCommentSpace, etc for some of the further
  33  * analysis so easier to keep with the token.
  34  *
  35  * This type contains the whole information. Have all the tokens with this
  36  * type.
  37  *)
  38 type token2 =
  39   | T2 of Parser_c.token * bool (* minus *) *
  40           int option (* orig index, abstracting away comments and space *)
  41   | Fake2
  42   | Cocci2 of string
  43   | C2 of string
  44   | Indent_cocci2
  45   | Unindent_cocci2
  46
  47 (* not used yet *)
  48 type token3 =
  49   | T3 of Parser_c.token
  50   | Cocci3 of string
  51   | C3 of string
  52
  53
  54 (* similar to the tech in parsing_hack *)
  55 type token_extended = {
  56   tok2 : token2;
  57   str  : string;
  58   idx: int option; (* to know if 2 tokens were consecutive in orig file *)
  59   mutable new_tokens_before : token2 list;
  60   mutable remove : bool;
  61 }
  62
  63
  64 (*****************************************************************************)
  65 (* Helpers *)
  66 (*****************************************************************************)
  67
  68 let info_of_token1 t =
  69   match t with
  70   | Fake1 info -> info
  71   | T1 tok -> TH.info_of_tok tok
  72
  73 let str_of_token2 = function
  74   | T2 (t,_,_) -> TH.str_of_tok t
  75   | Fake2 -> ""
  76   | Cocci2 s -> s
  77   | C2 s -> s
  78   | Indent_cocci2 -> ""
  79   | Unindent_cocci2 -> ""
  80
  81 let print_token2 = function
  82   | T2 (t,b,_) -> "T2:"^(if b then "-" else "")^TH.str_of_tok t
  83   | Fake2 -> ""
  84   | Cocci2 s -> "Cocci2:"^s
  85   | C2 s -> "C2:"^s
  86   | Indent_cocci2 -> "Indent"
  87   | Unindent_cocci2 -> "Unindent"
  88
  89 let print_all_tokens2 l =
  90   List.iter (function x -> Printf.printf "%s " (print_token2 x)) l;
  91   Printf.printf "\n"
  92
  93 let str_of_token3 = function
  94   | T3 t -> TH.str_of_tok t
  95   | Cocci3 s | C3 s -> s
  96
  97
  98
  99 let mk_token_extended x =
 100   let origidx =
 101     match x with
 102     | T2 (_,_, idx) -> idx
 103     | _ -> None
 104   in
 105   { tok2 = x;
 106     str = str_of_token2 x;
 107     idx = origidx;
 108     new_tokens_before = [];
 109     remove = false;
 110   }
 111
 112 let rebuild_tokens_extented toks_ext =
 113   let _tokens = ref [] in
 114   toks_ext +> List.iter (fun tok ->
 115     tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
 116     if not tok.remove then push2 tok.tok2 _tokens;
 117   );
 118   let tokens = List.rev !_tokens in
 119   (tokens +> List.map mk_token_extended)
 120
 121
 122 let mcode_contain_plus = function
 123   | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
 124   | Ast_cocci.CONTEXT _ -> true
 125   | Ast_cocci.MINUS (_,[]) -> false
 126   | Ast_cocci.MINUS (_,x::xs) -> true
 127   | Ast_cocci.PLUS -> raise Impossible
 128
 129 let contain_plus info =
 130   let mck = Ast_c.mcode_of_info info in
 131   mcode_contain_plus mck
 132
 133 (*****************************************************************************)
 134 (* Last fix on the ast *)
 135 (*****************************************************************************)
 136
 137 (* Because of the ugly trick to handle initialiser, I generate fake ','
 138  * for the last initializer element, but if there is nothing around it,
 139  * I don't want in the end to print it.
 140  *)
 141
 142 let remove_useless_fakeInfo_struct program =
 143   let bigf = { Visitor_c.default_visitor_c_s with
 144     Visitor_c.kini_s = (fun (k,bigf) ini ->
 145       match k ini with
 146       | InitList args, ii ->
 147           (match ii with
 148           | [_i1;_i2] -> ini
 149           | [i1;i2;iicommaopt] ->
 150               if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
 151                  && (Ast_c.is_fake iicommaopt)
 152                  (* sometimes the guy put a normal iicommaopt *)
 153
 154               then InitList args, [i1;i2]
 155               else InitList args, [i1;i2;iicommaopt]
 156           | _ -> raise Impossible
 157           )
 158       | x -> x
 159     )
 160   } in
 161   Visitor_c.vk_toplevel_s bigf program
 162
 163
 164 (*****************************************************************************)
 165 (* Tokens1 generation *)
 166 (*****************************************************************************)
 167
 168 let get_fakeInfo_and_tokens celem toks =
 169   let toks_in  = ref toks in
 170   let toks_out = ref [] in
 171
 172   (* todo? verify good order of position ? *)
 173   let pr_elem info =
 174     match Ast_c.pinfo_of_info info with
 175     | FakeTok _ ->
 176         Common.push2 (Fake1 info) toks_out
 177     | OriginTok _ | ExpandedTok _ ->
 178         (* get the associated comments/space/cppcomment tokens *)
 179         let (before, x, after) = !toks_in +> Common.split_when (fun tok ->
 180           info =*= TH.info_of_tok tok)
 181         in
 182         assert(info = TH.info_of_tok x);
 183         (*old: assert(before +> List.for_all (TH.is_comment)); *)
 184         before +> List.iter (fun x ->
 185           if not (TH.is_comment x)
 186           then pr2 ("WIERD: not a comment:" ^ TH.str_of_tok x)
 187           (* case such as  int asm d3("x"); not yet in ast *)
 188         );
 189         before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
 190         push2 (T1 x) toks_out;
 191         toks_in := after;
 192     | AbstractLineTok _ ->
 193         (* can be called on type info when for instance use -type_c *)
 194         if !Flag_parsing_c.pretty_print_type_info
 195         then Common.push2 (Fake1 info) toks_out
 196         else raise Impossible (* at this stage *)
 197   in
 198
 199   let pr_space _ = () in (* use the spacing that is there already *)
 200
 201   Pretty_print_c.pp_program_gen pr_elem pr_space celem;
 202
 203   if not (null !toks_in)
 204   then failwith "WIERD: unparsing not finished";
 205
 206   List.rev !toks_out
 207
 208 (* Fake nodes that have BEFORE code should be moved over any subsequent
 209 whitespace and newlines, but not any comments, to get as close to the affected
 210 code as possible.  Similarly, face nodes that have AFTER code should be moved
 211 backwards.  No fake nodes should have both before and after code. *)
 212
 213 let displace_fake_nodes toks =
 214   let is_fake = function Fake1 _ -> true | _ -> false in
 215   let is_whitespace = function
 216       T1(Parser_c.TCommentSpace _)
 217     | T1(Parser_c.TCommentNewline _) -> true
 218     | _ -> false in
 219   let rec loop toks =
 220     let fake_info =
 221       try Some (Common.split_when is_fake toks)
 222       with Not_found -> None in
 223     match fake_info with
 224       Some(bef,((Fake1 info) as fake),aft) ->
 225         (match !(info.cocci_tag) with
 226           (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
 227             (* move the fake node forwards *)
 228             let (whitespace,rest) = Common.span is_whitespace aft in
 229             bef @ whitespace @ fake :: (loop rest)
 230         | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
 231             (* move the fake node backwards *)
 232             let revbef = List.rev bef in
 233             let (revwhitespace,revprev) = Common.span is_whitespace revbef in
 234             let whitespace = List.rev revwhitespace in
 235             let prev = List.rev revprev in
 236             prev @ fake :: (loop (whitespace @ aft))
 237         | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
 238             bef @ fake :: (loop aft)
 239         | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
 240             failwith "fake node should not be before-after"
 241         | _ -> bef @ fake :: (loop aft))
 242     | None -> toks
 243     | _ -> raise Impossible in
 244   loop toks
 245
 246 (*****************************************************************************)
 247 (* Tokens2 generation *)
 248 (*****************************************************************************)
 249
 250 let expand_mcode toks =
 251   let toks_out = ref [] in
 252
 253   let index = ref 0 in
 254
 255   let add_elem t minus =
 256     match t with
 257     | Fake1 info ->
 258         let str = Ast_c.str_of_info info in
 259         if str = ""
 260         then push2 (Fake2) toks_out
 261         (* perhaps the fake ',' *)
 262         else push2 (C2 str) toks_out
 263
 264
 265     | T1 tok ->
 266         (* no tag on expandedTok ! *)
 267         assert(not (TH.is_expanded tok &&
 268             !((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot));
 269
 270         let tok' = tok +> TH.visitor_info_of_tok (fun i ->
 271           { i with cocci_tag = ref Ast_c.emptyAnnot; }
 272         ) in
 273
 274         let optindex =
 275           if TH.is_origin tok && not (TH.is_real_comment tok)
 276           then begin
 277               incr index;
 278               Some !index
 279           end
 280           else None
 281         in
 282
 283         push2 (T2 (tok', minus, optindex)) toks_out
 284   in
 285
 286   let expand_info t =
 287     let (mcode,env) = !((info_of_token1 t).cocci_tag) in
 288
 289     let pr_cocci s =
 290       push2 (Cocci2 s) toks_out
 291     in
 292     let pr_c info =
 293       match Ast_c.pinfo_of_info info with
 294         Ast_c.AbstractLineTok _ -> push2 (C2 (Ast_c.str_of_info info)) toks_out
 295       | Ast_c.FakeTok (s,_) -> push2 (C2 s) toks_out
 296       | _ ->
 297           Printf.printf "line: %s\n" (Common.dump info);
 298           failwith "not an abstract line" in
 299
 300     let pr_space _ = push2 (C2 " ") toks_out in
 301
 302     let indent _   = push2 Indent_cocci2 toks_out in
 303     let unindent _ = push2 Unindent_cocci2 toks_out in
 304
 305     let args_pp = (env, pr_cocci, pr_c, pr_space, indent, unindent) in
 306
 307
 308     match mcode with
 309     | Ast_cocci.MINUS (_,any_xxs) ->
 310         (* Why adding ? because I want to have all the information, the whole
 311          * set of tokens, so I can then process and remove the
 312          * is_between_two_minus for instance *)
 313         add_elem t true;
 314         Unparse_cocci2.pp_list_list_any args_pp any_xxs Unparse_cocci2.InPlace
 315     | Ast_cocci.CONTEXT (_,any_befaft) ->
 316         (match any_befaft with
 317         | Ast_cocci.NOTHING ->
 318             add_elem t false
 319         | Ast_cocci.BEFORE xxs ->
 320             Unparse_cocci2.pp_list_list_any args_pp xxs Unparse_cocci2.Before;
 321             add_elem t false
 322         | Ast_cocci.AFTER xxs ->
 323             add_elem t false;
 324             Unparse_cocci2.pp_list_list_any args_pp xxs Unparse_cocci2.After;
 325         | Ast_cocci.BEFOREAFTER (xxs, yys) ->
 326             Unparse_cocci2.pp_list_list_any args_pp xxs Unparse_cocci2.Before;
 327             add_elem t false;
 328             Unparse_cocci2.pp_list_list_any args_pp yys Unparse_cocci2.After;
 329         )
 330     | Ast_cocci.PLUS -> raise Impossible
 331
 332   in
 333
 334   toks +> List.iter expand_info;
 335   List.rev !toks_out
 336
 337
 338 (*****************************************************************************)
 339 (* Tokens2 processing, filtering, adjusting *)
 340 (*****************************************************************************)
 341
 342 let is_minusable_comment = function
 343   | T2 (t,_b,_i) ->
 344       (match t with
 345       | Parser_c.TCommentSpace _   (* only whitespace *)
 346       | Parser_c.TCommentNewline _ (* newline plus whitespace *)
 347       | Parser_c.TComment _
 348       | Parser_c.TCommentCpp (Ast_c.CppAttr, _)
 349       | Parser_c.TCommentCpp (Ast_c.CppMacro, _)
 350         -> true
 351
 352       | Parser_c.TCommentMisc _
 353       | Parser_c.TCommentCpp (Ast_c.CppDirective, _)
 354       | Parser_c.TCommentCpp (Ast_c.CppOther, _)
 355         -> false
 356
 357       | _ -> false
 358       )
 359   | _ -> false
 360
 361 let all_coccis = function
 362     Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true
 363   | _ -> false
 364
 365 let is_minusable_comment_or_plus = function
 366     T2(Parser_c.TCommentNewline _,_b,_i) -> false
 367   | x -> is_minusable_comment x or all_coccis x
 368
 369 let set_minus_comment = function
 370   | T2 (t,false,idx) ->
 371       let str = TH.str_of_tok t in
 372       (match t with
 373       | Parser_c.TCommentSpace _
 374       | Parser_c.TCommentNewline _ -> ()
 375
 376       | Parser_c.TComment _
 377       | Parser_c.TCommentCpp (Ast_c.CppAttr, _)
 378       | Parser_c.TCommentCpp (Ast_c.CppMacro, _)
 379         ->
 380           pr2 ("ERASING_COMMENTS: " ^ str)
 381       | _ -> raise Impossible
 382       );
 383       T2 (t, true, idx)
 384   | T2 (Parser_c.TCommentNewline _,true,idx) as x -> x
 385   | _ -> raise Impossible
 386
 387 let set_minus_comment_or_plus = function
 388     Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x
 389   | x -> set_minus_comment x
 390
 391
 392 let remove_minus_and_between_and_expanded_and_fake xs =
 393
 394   (* get rid of exampled and fake tok *)
 395   let xs = xs +> Common.exclude (function
 396     | T2 (t,_,_) when TH.is_expanded t -> true
 397     | Fake2 -> true
 398
 399     | _ -> false
 400   )
 401   in
 402
 403   (*This drops the space before each completely minused block (no plus code).*)
 404   let rec adjust_before_minus = function
 405       [] -> []
 406     | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::((T2(_,true,_)::_) as xs) ->
 407         let minus_or_comment = function
 408             T2(_,true,_) -> true
 409           | T2(Parser_c.TCommentNewline _,_b,_i) -> false
 410           | x -> is_minusable_comment x in
 411         let (between_minus,rest) = Common.span minus_or_comment xs in
 412         (match rest with
 413           [] -> (set_minus_comment x) :: between_minus
 414         | T2(Parser_c.TCommentNewline _,_b,_i)::_ ->
 415             (set_minus_comment x) :: between_minus @
 416             (adjust_before_minus rest)
 417         | _ -> x :: between_minus @ (adjust_before_minus rest))
 418     | x::xs -> x::adjust_before_minus xs in
 419
 420   let xs = adjust_before_minus xs in
 421
 422   (* this deals with any stuff that is between the minused code, eg
 423      spaces, comments, attributes, etc. *)
 424   (* The use of is_minusable_comment_or_plus and set_minus_comment_or_plus
 425      is because the + code can end up anywhere in the middle of the - code;
 426      it is not necessarily to the far left *)
 427   let rec adjust_between_minus xs =
 428     match xs with
 429     | [] -> []
 430     | (T2 (t1,true,idx1))::xs ->
 431
 432         let (between_comments, rest) =
 433           Common.span is_minusable_comment_or_plus xs in
 434         (match rest with
 435         | [] -> [(T2 (t1, true,idx1))]
 436
 437         | (T2 (t2, true,idx2))::rest ->
 438              (T2 (t1, true,idx1))::
 439                (List.map set_minus_comment_or_plus between_comments @
 440                adjust_between_minus ((T2 (t2, true, idx2))::rest))
 441         | x::xs ->
 442              (T2 (t1, true, idx1))::
 443                (between_comments @ adjust_between_minus (x::xs))
 444         )
 445
 446     | x::xs -> x::adjust_between_minus xs in
 447
 448   let xs = adjust_between_minus xs in
 449
 450   let xs = xs +> Common.exclude (function
 451     | T2 (t,true,_) -> true
 452     | _ -> false
 453   ) in
 454   xs
 455
 456
 457 let is_ident_like s = s ==~ Common.regexp_alpha
 458
 459 let rec add_space xs =
 460   match xs with
 461   | [] -> []
 462   | [x] -> [x]
 463   | x::y::xs ->
 464       let sx = str_of_token2 x in
 465       let sy = str_of_token2 y in
 466       if is_ident_like sx && is_ident_like sy
 467       then x::C2 " "::(add_space (y::xs))
 468       else x::(add_space (y::xs))
 469
 470
 471
 472 (* When insert some new code, because of a + in a SP, we must add this
 473  * code at the right place, with the good indentation. So each time we
 474  * encounter some spacing info, with some newline, we maintain the
 475  * current indentation level used.
 476  *
 477  * TODO problems: not accurate. ex: TODO
 478  *
 479  * TODO: if in #define region, should add a \ \n
 480  *)
 481 let new_tabbing2 space =
 482   (list_of_string space)
 483     +> List.rev
 484     +> Common.take_until (fun c -> c = '\n')
 485     +> List.rev
 486     +> List.map string_of_char
 487     +> String.concat ""
 488
 489 let new_tabbing a =
 490   Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
 491
 492
 493 let rec adjust_indentation xs =
 494   let _current_tabbing = ref "" in
 495   let tabbing_unit = ref None in
 496
 497   let string_of_list l = String.concat "" (List.map string_of_char l) in
 498
 499   (* try to pick a tabbing unit for the plus code *)
 500   let adjust_tabbing_unit old_tab new_tab =
 501     if !tabbing_unit = None && String.length new_tab > String.length old_tab
 502     then
 503       let old_tab = list_of_string old_tab in
 504       let new_tab = list_of_string new_tab in
 505       let rec loop = function
 506           ([],new_tab) ->
 507             tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
 508         | (_,[]) -> failwith "not possible"
 509         | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
 510       loop (old_tab,new_tab) in
 511
 512   let remtab tu current_tab =
 513     let current_tab = List.rev(list_of_string current_tab) in
 514     let rec loop = function
 515         ([],new_tab) -> string_of_list (List.rev new_tab)
 516       | (_,[]) -> "" (*wierd; tabbing unit used up more than the current tab*)
 517       | (t::ts,n::ns) when t = n -> loop (ts,ns)
 518       | (_,ns) -> (* mismatch; remove what we can *)
 519           string_of_list (List.rev ns) in
 520     loop (tu,current_tab) in
 521
 522   let rec find_first_tab started = function
 523       [] -> ()
 524     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x = "{" ->
 525         find_first_tab true xs
 526     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
 527       when started ->
 528         let s = str_of_token2 x +> new_tabbing in
 529         tabbing_unit := Some (s,List.rev (list_of_string s))
 530     | x::xs -> find_first_tab started xs in
 531   find_first_tab false xs;
 532
 533   let rec aux started xs =
 534     match xs with
 535     | [] ->  []
 536     | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
 537       (Cocci2 "{")::xs when started && str_of_token2 x = ")" ->
 538         (* to be done for if, etc, but not for a function header *)
 539         x::(Cocci2 " {")::(aux started xs)
 540     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
 541         let old_tabbing = !_current_tabbing in
 542         str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
 543         (* only trust the indentation after the first { *)
 544         (if started then adjust_tabbing_unit old_tabbing !_current_tabbing);
 545         let coccis_rest = Common.span all_coccis xs in
 546         (match coccis_rest with
 547           (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y = "}" ->
 548             (* the case where cocci code has been added before a close } *)
 549             x::aux started (Indent_cocci2::xs)
 550         | _ -> x::aux started xs)
 551     | Indent_cocci2::xs ->
 552         (match !tabbing_unit with
 553           None -> aux started xs
 554         | Some (tu,_) ->
 555             _current_tabbing := (!_current_tabbing)^tu;
 556             Cocci2 (tu)::aux started xs)
 557     | Unindent_cocci2::xs ->
 558         (match !tabbing_unit with
 559           None -> aux started xs
 560         | Some (_,tu) ->
 561             _current_tabbing := remtab tu (!_current_tabbing);
 562             aux started xs)
 563     (* border between existing code and cocci code *)
 564     | ((T2 (tok,_,_)) as x)::((Cocci2 "\n") as y)::xs
 565       when str_of_token2 x = "{" ->
 566         x::aux true (y::Indent_cocci2::xs)
 567     | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
 568       when str_of_token2 y = "}" ->
 569         x::aux started (y::Unindent_cocci2::xs)
 570     (* starting the body of the function *)
 571     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x = "{" ->  x::aux true xs
 572     | (Cocci2 "{")::xs -> (Cocci2 "{")::aux true xs
 573     | ((Cocci2 "\n") as x)::xs ->
 574             (* dont inline in expr because of wierd eval order of ocaml *)
 575         let s = !_current_tabbing in
 576         x::Cocci2 (s)::aux started xs
 577     | x::xs -> x::aux started xs in
 578   aux false xs
 579
 580
 581 let rec find_paren_comma = function
 582   | [] -> ()
 583
 584   (* do nothing if was like this in original file *)
 585   | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
 586     ::xs when p2 = p1 + 1 ->
 587       find_paren_comma (x2::xs)
 588
 589   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
 590     ::xs when p2 = p1 + 1 ->
 591       find_paren_comma (x2::xs)
 592
 593   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
 594     ::xs when p2 = p1 + 1 ->
 595       find_paren_comma (x2::xs)
 596
 597   (* otherwise yes can adjust *)
 598   | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
 599       x2.remove <- true;
 600       find_paren_comma (x2::xs)
 601   | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
 602       x1.remove <- true;
 603       find_paren_comma (x2::xs)
 604
 605   | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
 606       x1.remove <- true;
 607       find_paren_comma (x2::xs)
 608
 609   | x::xs ->
 610       find_paren_comma xs
 611
 612
 613 let fix_tokens toks =
 614   let toks = toks +> List.map mk_token_extended in
 615
 616   let cleaner = toks +> Common.exclude (function
 617     | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
 618     | _ -> false
 619   ) in
 620   find_paren_comma cleaner;
 621
 622   let toks = rebuild_tokens_extented toks in
 623   toks +> List.map (fun x -> x.tok2)
 624
 625
 626
 627 (*****************************************************************************)
 628 (* Final unparsing (and debugging support) *)
 629 (*****************************************************************************)
 630
 631 (* for debugging *)
 632 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
 633
 634 let kind_of_token2 = function
 635   | Fake2 -> KFake
 636   | Cocci2 _ -> KCocci
 637   | C2 _ -> KC
 638   | T2 (t,_,_) ->
 639       (match TH.pinfo_of_tok t with
 640       | ExpandedTok _ -> KExpanded
 641       | OriginTok _ -> KOrigin
 642       | FakeTok _ -> raise Impossible (* now a Fake2 *)
 643       | AbstractLineTok _ -> raise Impossible (* now a KC *)
 644       )
 645   | Unindent_cocci2 | Indent_cocci2 -> raise Impossible
 646
 647 let end_mark = "!"
 648
 649 let start_mark = function
 650   | KFake -> "!F!"
 651   | KCocci -> "!S!"
 652   | KC -> "!A!"
 653   | KExpanded -> "!E!"
 654   | KOrigin -> ""
 655
 656 let print_all_tokens2 pr xs =
 657   if !Flag_parsing_c.debug_unparsing
 658   then
 659     let current_kind = ref KOrigin in
 660     xs +> List.iter (fun t ->
 661       let newkind = kind_of_token2 t in
 662       if newkind = !current_kind
 663       then pr (str_of_token2 t)
 664       else begin
 665         pr (end_mark);
 666         pr (start_mark newkind);
 667         pr (str_of_token2 t);
 668         current_kind := newkind
 669       end
 670     );
 671   else
 672     xs +> List.iter (fun x -> pr (str_of_token2 x))
 673
 674
 675
 676
 677 (*****************************************************************************)
 678 (* Entry points *)
 679 (*****************************************************************************)
 680
 681 (* old: PPviatok was made in the beginning to allow to pretty print a
 682  * complete C file, including a modified C file by transformation.ml,
 683  * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
 684  * maybe flow_to_ast) all the cases. Indeed we don't need to do some
 685  * fancy stuff when a function was not modified at all. Just need to
 686  * print the list of token as-is. But now pretty_print_c.ml handles
 687  * almost everything so maybe less useful. Maybe PPviatok allows to
 688  * optimize a little the pretty printing.
 689  *
 690  * update: now have PPviastr which goes even faster than PPviatok, so
 691  * PPviatok has disappeared.
 692  *)
 693
 694 type ppmethod = PPnormal | PPviastr
 695
 696
 697
 698
 699 (* The pp_program function will call pretty_print_c.ml with a special
 700  * function to print the leaf components, the tokens. When we want to
 701  * print a token, we need to print also maybe the space and comments that
 702  * were close to it in the original file (and that was omitted during the
 703  * parsing phase), and honor what the cocci-info attached to the token says.
 704  * Maybe we will not print the token if it's a MINUS-token, and maybe we will
 705  * print it and also print some cocci-code attached in a PLUS to it.
 706  * So we will also maybe call unparse_cocci. Because the cocci-code may
 707  * contain metavariables, unparse_cocci will in fact sometimes call back
 708  * pretty_print_c (which will this time don't call back again unparse_cocci)
 709  *)
 710
 711 let pp_program2 xs outfile  =
 712   Common.with_open_outfile outfile (fun (pr,chan) ->
 713     let pr s =
 714       if !Flag_parsing_c.debug_unparsing
 715       then begin pr2_no_nl s; flush stderr end
 716       else pr s
 717         (* flush chan; *)
 718         (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
 719     in
 720
 721     xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
 722
 723       (* here can still work on ast *)
 724       let e = remove_useless_fakeInfo_struct e in
 725
 726       match ppmethod with
 727       | PPnormal ->
 728           (* now work on tokens *)
 729
 730           (* phase1: just get all the tokens, all the information *)
 731           assert(toks_e +> List.for_all (fun t ->
 732             TH.is_origin t or TH.is_expanded t
 733           ));
 734           let toks = get_fakeInfo_and_tokens e toks_e in
 735           let toks = displace_fake_nodes toks in
 736           (* assert Origin;ExpandedTok;Faketok *)
 737           let toks = expand_mcode toks in
 738           (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
 739            * and no tag information, just NOTHING. *)
 740
 741           (* phase2: can now start to filter and adjust *)
 742           let toks = adjust_indentation toks in
 743           let toks = remove_minus_and_between_and_expanded_and_fake toks in
 744           (* assert Origin + Cocci + C and no minus *)
 745           let toks = add_space toks in
 746           let toks = fix_tokens toks in
 747
 748           (* in theory here could reparse and rework the ast! or
 749            * apply some SP. Not before cos julia may have generated
 750            * not parsable file. Need do unparsing_tricks call before being
 751            * ready to reparse. *)
 752           print_all_tokens2 pr toks;
 753
 754       | PPviastr -> pr str
 755     )
 756   )
 757
 758 let pp_program a b =
 759   Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
 760