parsing_c/unparse_c.ml

   1 (* Yoann Padioleau, Julia Lawall
   2  *
   3  * Copyright (C) 2012, INRIA.
   4  * Copyright (C) 2010, 2011, University of Copenhagen DIKU and INRIA.
   5  * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
   6  *
   7  * This program is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License (GPL)
   9  * version 2 as published by the Free Software Foundation.
  10  *
  11  * This program is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * file license.txt for more details.
  15  *
  16  *
  17  * Modifications by Julia Lawall for better newline handling.
  18  *)
  19 open Common
  20
  21 module TH = Token_helpers
  22
  23 (* should keep comments and directives in between adjacent deleted terms,
  24 but not comments and directives within deleted terms.  should use the
  25 labels found in the control-flow graph *)
  26
  27
  28
  29 (*****************************************************************************)
  30 (* Wrappers *)
  31 (*****************************************************************************)
  32 let pr2, pr2_once = mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
  33
  34 (*****************************************************************************)
  35 (* Types used during the intermediate phases of the unparsing *)
  36 (*****************************************************************************)
  37
  38 type token1 =
  39   | Fake1 of Ast_c.info
  40   | T1 of Parser_c.token
  41
  42 (* The cocci_tag of the token should always be a NOTHING. The mark of
  43  * the token can only be OriginTok or ExpandedTok. Why not get rid of
  44  * token and get something simpler ? because we need to know if the
  45  * info is a TCommentCpp or TCommentSpace, etc for some of the further
  46  * analysis so easier to keep with the token.
  47  *
  48  * This type contains the whole information. Have all the tokens with this
  49  * type.
  50  *)
  51 type min =
  52   | Min of (int list (* match numbers from witness trees *) *
  53             Ast_cocci.adjacency (* adjacency information *))
  54   | Ctx
  55
  56 type token2 =
  57   | T2 of Parser_c.token * min
  58         * int option (* orig index, abstracting away comments and space *)
  59         * Unparse_cocci.nlhint option
  60   | Fake2 of min
  61   | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
  62             * Unparse_cocci.nlhint option
  63   | C2 of string
  64   | Comma of string
  65   | Indent_cocci2
  66   | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
  67   | EatSpace2
  68
  69 (* not used yet *)
  70 type token3 =
  71   | T3 of Parser_c.token
  72   | Cocci3 of string
  73   | C3 of string
  74
  75
  76 (* similar to the tech in parsing_hack *)
  77 type token_extended =
  78   { tok2 : token2;
  79     str  : string;
  80     idx  : int option; (* to know if 2 tokens were consecutive in orig file *)
  81     mutable new_tokens_before : token2 list;
  82     mutable remove : bool;
  83   }
  84
  85
  86 (*****************************************************************************)
  87 (* Helpers *)
  88 (*****************************************************************************)
  89
  90 let info_of_token1 t =
  91   match t with
  92   | Fake1 info -> info
  93   | T1 tok -> TH.info_of_tok tok
  94
  95 let print_token1 = function
  96   | T1 tok -> TH.str_of_tok tok
  97   | Fake1 info -> "fake"
  98
  99 let str_of_token2 = function
 100   | T2 (t,_,_,_) -> TH.str_of_tok t
 101   | Cocci2 (s,_,_,_,_)
 102   | C2 s
 103   | Comma s -> s
 104   | Fake2 _
 105   | Indent_cocci2
 106   | Unindent_cocci2 _
 107   | EatSpace2 -> ""
 108
 109 let print_token2 = function
 110   | T2 (t,b,_,_) ->
 111     let t_str =
 112       match t with
 113       | Parser_c.TCommentSpace _ -> " sp "
 114       | Parser_c.TCommentNewline _ -> " nl "
 115       | Parser_c.TCommentCpp _ -> " cp "
 116       | Parser_c.TCommentMisc _ -> " misc "
 117       | Parser_c.TComment _ -> " comment "
 118       | _ -> "" in
 119     let b_str =
 120       match b with
 121       | Min (index,adj) ->
 122         Printf.sprintf "-.%d[%s]"
 123           (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
 124           (String.concat " " (List.map string_of_int index))
 125       | Ctx -> "" in
 126     "T2:"^b_str^t_str^TH.str_of_tok t
 127   | Fake2 b ->
 128     let b_str =
 129       match b with
 130       | Min (index,adj) ->
 131         Printf.sprintf "-%d[%s]"
 132           (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
 133           (String.concat " " (List.map string_of_int index))
 134       | Ctx -> "" in
 135     b_str^"fake"
 136   | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
 137   | C2 s -> "C2:"^s
 138   | Comma s -> "Comma:"^s
 139   | Indent_cocci2 -> "Indent"
 140   | Unindent_cocci2 _ -> "Unindent"
 141   | EatSpace2 -> "EatSpace"
 142
 143 (*
 144 let simple_print_all_tokens pr l =
 145   List.iter (function x -> Printf.printf "|%s| " (pr x)) l;
 146   Printf.printf "\n"
 147 *)
 148
 149 let str_of_token3 = function
 150   | T3 t -> TH.str_of_tok t
 151   | Cocci3 s | C3 s -> s
 152
 153
 154
 155 let mk_token_extended x =
 156   let origidx =
 157     match x with
 158     | T2 (_,_,idx,_) -> idx
 159     | _ -> None in
 160   { tok2 = x;
 161     str = str_of_token2 x;
 162     idx = origidx;
 163     new_tokens_before = [];
 164     remove = false;
 165   }
 166
 167 let rebuild_tokens_extented toks_ext =
 168   let _tokens = ref [] in
 169   toks_ext +> List.iter (fun tok ->
 170     tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
 171     if not tok.remove then push2 tok.tok2 _tokens;
 172     );
 173   let tokens = List.rev !_tokens in
 174   (tokens +> List.map mk_token_extended)
 175
 176
 177 let mcode_contain_plus = function
 178   | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
 179   | Ast_cocci.CONTEXT _ -> true
 180   (* patch: when need full coccinelle transformation *)
 181   | Ast_cocci.MINUS (_,_,_,Ast_cocci.NOREPLACEMENT) -> false
 182   | Ast_cocci.MINUS (_,_,_,Ast_cocci.REPLACEMENT _) -> true(*REPL is not empty*)
 183   | Ast_cocci.PLUS _ -> raise (Impossible 132)
 184
 185 let contain_plus info =
 186   let mck = Ast_c.mcode_of_info info in
 187   mcode_contain_plus mck
 188
 189 (*****************************************************************************)
 190 (* Last fix on the ast *)
 191 (*****************************************************************************)
 192
 193 (* Because of the ugly trick to handle initialiser, I generate fake ','
 194  * for the last initializer element, but if there is nothing around it,
 195  * I don't want in the end to print it.
 196  *)
 197
 198 let remove_useless_fakeInfo_struct program =
 199   let bigf = { Visitor_c.default_visitor_c_s with
 200     Visitor_c.kini_s = (fun (k,bigf) ini ->
 201       match k ini with
 202       | Ast_c.InitList args, ii ->
 203         (match ii with
 204         | [_;_] -> ini
 205         | i1 :: i2 :: iicommaopt :: tl when
 206            (not (contain_plus iicommaopt))
 207         && (not (contain_plus i2))
 208         && (Ast_c.is_fake iicommaopt) ->
 209           (* sometimes the guy put a normal iicommaopt *)
 210           Ast_c.InitList args, (i1 :: i2 :: tl)
 211         | ii -> Ast_c.InitList args, ii
 212         )
 213       | x -> x)
 214     } in
 215   Visitor_c.vk_toplevel_s bigf program
 216
 217
 218 (*****************************************************************************)
 219 (* Tokens1 generation *)
 220 (*****************************************************************************)
 221
 222 let get_fakeInfo_and_tokens celem toks =
 223
 224   let toks_in  = ref toks in
 225   let toks_out = ref [] in
 226
 227   (* todo? verify good order of position ? *)
 228   let pr_elem info =
 229     match Ast_c.pinfo_of_info info with
 230     | Ast_c.FakeTok _ ->
 231       push2 (Fake1 info) toks_out
 232     | Ast_c.OriginTok _ | Ast_c.ExpandedTok _ ->
 233
 234       (* get the associated comments/space/cppcomment tokens *)
 235       let (before, x, after) =
 236         !toks_in +> split_when (fun tok ->
 237           info =*= TH.info_of_tok tok)
 238       in
 239       assert(info =*= TH.info_of_tok x);
 240       (*old: assert(before +> List.for_all (TH.is_comment)); *)
 241       before +> List.iter (fun x ->
 242         if not (TH.is_comment x)
 243         then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
 244         (* case such as  int asm d3("x"); not yet in ast *)
 245         );
 246       before +> List.iter (fun x -> push2 (T1 x) toks_out);
 247
 248       push2 (T1 x) toks_out;
 249       toks_in := after;
 250     | Ast_c.AbstractLineTok _ ->
 251       (* can be called on type info when for instance use -type_c *)
 252       if !Flag_parsing_c.pretty_print_type_info
 253       then push2 (Fake1 info) toks_out
 254       else raise (Impossible 134) (* at this stage *)
 255   in
 256
 257   let pr_space _ = () in (* use the spacing that is there already *)
 258
 259   Pretty_print_c.pp_program_gen pr_elem pr_space celem;
 260
 261   if not (null !toks_in)
 262   then failwith "WEIRD: unparsing not finished";
 263
 264   List.rev !toks_out
 265
 266 (* Fake nodes that have BEFORE code or are - should be moved over any subsequent
 267 whitespace and newlines, but not any comments, to get as close to the affected
 268 code as possible.  Similarly, fake nodes that have AFTER code should be moved
 269 backwards.  No fake nodes should have both before and after code. *)
 270
 271 let displace_fake_nodes toks =
 272   let is_fake = function Fake1 _ -> true | _ -> false in
 273   let is_whitespace = function
 274     | T1(Parser_c.TCommentSpace _)
 275   (* patch: cocci    *)
 276     | T1(Parser_c.TCommentNewline _) -> true
 277     | _ -> false in
 278   let rec loop toks =
 279     let fake_info =
 280       try Some (split_when is_fake toks)
 281       with Not_found -> None in
 282     match fake_info with
 283     | Some(bef,((Fake1 info) as fake),aft) ->
 284       (match !(info.Ast_c.cocci_tag) with
 285       | Some x ->
 286         (match x with
 287         | (Ast_cocci.MINUS(_,_,_,Ast_cocci.REPLACEMENT _),_)
 288           (* for , replacement is more likely to be like after, but not clear...
 289              but treating it as after breaks a lot of tests. *)
 290
 291         | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
 292           (* move the fake node forwards *)
 293           let (whitespace,rest) = span is_whitespace aft in
 294           bef @ whitespace @ fake :: (loop rest)
 295
 296         | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
 297           (* move the fake node backwards *)
 298           let revbef = List.rev bef in
 299           let (revwhitespace,revprev) = span is_whitespace revbef in
 300           let whitespace = List.rev revwhitespace in
 301           let prev = List.rev revprev in
 302           prev @ fake :: (loop (whitespace @ aft))
 303         | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
 304           failwith "fake node should not be before-after"
 305         | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_)
 306         | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
 307         )
 308       | None ->
 309         bef @ fake :: (loop aft)
 310       )
 311     | None -> toks
 312     | _ -> raise (Impossible 135) in
 313   loop toks
 314
 315 (*****************************************************************************)
 316 (* Tokens2 generation *)
 317 (*****************************************************************************)
 318
 319 let comment2t2 = function
 320   | (Token_c.TCommentCpp
 321   (* not sure iif the following list is exhaustive or complete *)
 322     (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
 323     (info : Token_c.info)) ->
 324     C2(info.Common.str)
 325   | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
 326     C2("\n"^info.Common.str^"\n")
 327   | x -> failwith (Printf.sprintf "unexpected comment %s" (Dumper.dump x))
 328
 329 let expand_mcode toks =
 330   let toks_out = ref [] in
 331
 332   let index = ref 0 in
 333
 334   let add_elem t minus =
 335     match t with
 336     | Fake1 info ->
 337       let str = Ast_c.str_of_info info in
 338       let isminus = match minus with Min _ -> true | Ctx -> false in
 339       (* don't add fake string if the thing should be removed *)
 340       if str =$= "" or isminus
 341       then push2 (Fake2 minus) toks_out
 342       (* fx the fake "," at the end of a structure or enum.
 343       no idea what other fake info there can be... *)
 344       else push2 (Comma str) toks_out
 345
 346     | T1 tok ->
 347       (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
 348       (* no tag on expandedTok ! *)
 349       let modified = function
 350         | None -> false
 351         | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
 352         | _ -> true in
 353       (if TH.is_expanded tok &&
 354         modified !((TH.info_of_tok tok).Ast_c.cocci_tag)
 355         (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*)
 356       then
 357         failwith
 358           (Printf.sprintf
 359             "expanded token %s on line %d is either modified or stored in a metavariable"
 360             (TH.str_of_tok tok) (TH.line_of_tok tok)));
 361
 362       let tok' = tok +> TH.visitor_info_of_tok (fun i ->
 363         { i with Ast_c.cocci_tag = ref Ast_c.emptyAnnot; }
 364       ) in
 365
 366       let optindex =
 367         if TH.is_origin tok && not (TH.is_real_comment tok)
 368         then
 369           begin
 370             incr index;
 371             Some !index
 372           end
 373         else None
 374       in
 375
 376       push2 (T2 (tok', minus, optindex, None)) toks_out
 377   in
 378
 379   let expand_info t =
 380     let (mcode,env) =
 381       Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).Ast_c.cocci_tag) in
 382
 383     let pr_cocci s ln col rcol hint =
 384       push2 (Cocci2 (s,ln,col,rcol,hint)) toks_out  in
 385     let pr_c info =
 386       (match Ast_c.pinfo_of_info info with
 387       | Ast_c.AbstractLineTok _ ->
 388         push2 (C2 (Ast_c.str_of_info info)) toks_out
 389       | Ast_c.FakeTok (s,_) ->
 390         push2 (C2 s) toks_out
 391       | _ ->
 392         Printf.fprintf stderr "line: %s\n" (Dumper.dump info);
 393         failwith "not an abstract line"
 394       );
 395       (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
 396       List.iter (fun x -> push2 (comment2t2 x) toks_out) in
 397
 398     let pr_barrier ln col = (* marks a position, used around C code *)
 399       push2 (Cocci2 ("",ln,col,col,None)) toks_out in
 400     let pr_nobarrier ln col = () in (* not needed for linux spacing *)
 401
 402     let pr_cspace _ = push2 (C2 " ") toks_out in
 403
 404     let pr_space _ = () (* rely on add_space in cocci code *) in
 405     let pr_arity _ = () (* not interested *) in
 406
 407     let indent _ = push2 Indent_cocci2 toks_out in
 408     let unindent x = push2 (Unindent_cocci2 x) toks_out in
 409     let eat_space _ = push2 EatSpace2 toks_out in
 410
 411     let args_pp =
 412       (env, pr_cocci, pr_c, pr_cspace,
 413         (match !Flag_parsing_c.spacing with
 414         | Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
 415         pr_arity,
 416         (match !Flag_parsing_c.spacing with
 417         | Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
 418         indent, unindent, eat_space) in
 419
 420 (* old: when for yacfe with partial cocci:
 421  *    add_elem t false;
 422  *)
 423
 424 (* patch: when need full coccinelle transformation *)
 425     let unparser = Unparse_cocci.pp_list_list_any args_pp false in
 426     match mcode with
 427     | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
 428     (* Why adding ? because I want to have all the information, the whole
 429      * set of tokens, so I can then process and remove the
 430      * is_between_two_minus for instance *)
 431       add_elem t (Min (inst,adj));
 432       (match any_xxs with
 433       | Ast_cocci.NOREPLACEMENT -> ()
 434       | Ast_cocci.REPLACEMENT(any_xxs,_) ->
 435         unparser any_xxs Unparse_cocci.InPlace
 436       )
 437     | Ast_cocci.CONTEXT (_,any_befaft) ->
 438       (match any_befaft with
 439       | Ast_cocci.NOTHING ->
 440         add_elem t Ctx
 441       | Ast_cocci.BEFORE (xxs,_) ->
 442         unparser xxs Unparse_cocci.Before;
 443         add_elem t Ctx
 444       | Ast_cocci.AFTER (xxs,_) ->
 445         add_elem t Ctx;
 446         unparser xxs Unparse_cocci.After;
 447       | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
 448         unparser xxs Unparse_cocci.Before;
 449         add_elem t Ctx;
 450         unparser yys Unparse_cocci.After;
 451       )
 452     | Ast_cocci.PLUS _ -> raise (Impossible 136)
 453   in
 454
 455   toks +> List.iter expand_info;
 456   List.rev !toks_out
 457
 458
 459 (*****************************************************************************)
 460 (* Tokens2 processing, filtering, adjusting *)
 461 (*****************************************************************************)
 462
 463 let is_space = function
 464   | T2(Parser_c.TCommentSpace _,_b,_i,_h) -> true (* only whitespace *)
 465   | _ -> false
 466
 467 let is_newline = function
 468   | T2(Parser_c.TCommentNewline _,_b,_i,_h) -> true
 469   | _ -> false
 470
 471 let is_whitespace x =
 472   is_space x or is_newline x
 473
 474 let is_minusable_comment = function
 475   | (T2 (t,_b,_i,_h)) ->
 476     (match t with
 477     | Parser_c.TCommentSpace _   (* only whitespace *)
 478     (* patch: coccinelle *)
 479     | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 480     | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
 481     | Parser_c.TComment _
 482     | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 483     | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 484     | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _)
 485     | Parser_c.TCommentCpp (Token_c.CppDirective, _) -> (* result was false *)
 486       true
 487     (*
 488     | Parser_c.TCommentMisc _
 489     | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) ->
 490       false
 491     *)
 492     | _ -> false
 493     )
 494   | _ -> false
 495
 496 let is_minusable_comment_nocpp = function
 497   | (T2 (t,_b,_i,_h)) ->
 498     (match t with
 499     | Parser_c.TCommentSpace _   (* only whitespace *)
 500     (* patch: coccinelle *)
 501     | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
 502     | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
 503     | Parser_c.TComment _ -> true
 504     (*
 505     | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 506     | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 507     | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _)
 508     | Parser_c.TCommentCpp (Token_c.CppDirective, _) ->
 509       false
 510
 511     | Parser_c.TCommentMisc _
 512     | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) ->
 513       false
 514     *)
 515     | _ -> false
 516     )
 517   | _ -> false
 518
 519 let all_coccis = function
 520   | Cocci2 _ | C2 _ | Comma _ | Indent_cocci2
 521   | Unindent_cocci2 _ | EatSpace2 -> true
 522   | _ -> false
 523
 524 (* previously gave up if the first character was a newline, but not clear why *)
 525 let is_minusable_comment_or_plus x =
 526   is_minusable_comment x or all_coccis x
 527
 528 let set_minus_comment adj = function
 529   | T2 (t,Ctx,idx,hint) ->
 530     let str = TH.str_of_tok t in
 531     (match t with
 532     | Parser_c.TCommentSpace _
 533     (* patch: coccinelle *)
 534     | Parser_c.TCommentNewline _ -> ()
 535
 536     | Parser_c.TComment _
 537     | Parser_c.TCommentCpp (Token_c.CppAttr, _)
 538     | Parser_c.TCommentCpp (Token_c.CppMacro, _)
 539     | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _)
 540     | Parser_c.TCommentCpp (Token_c.CppDirective, _) ->
 541       pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
 542         (TH.line_of_tok t) str)
 543     | _ -> raise (Impossible 137)
 544     );
 545     T2 (t, Min adj, idx, hint)
 546     (* patch: coccinelle *)
 547   | T2 (t, Min adj, idx, hint) as x -> x
 548   | Fake2 _ as x -> x
 549   | _ -> raise (Impossible 138)
 550
 551 (* don't touch ifdefs, done after *)
 552 let set_minus_comment_or_plus adj = function
 553   | Cocci2 _ | C2 _ | Comma _ | Indent_cocci2
 554   | Unindent_cocci2 _ | EatSpace2 as x -> x
 555   | x -> set_minus_comment adj x
 556
 557 let is_minus = function
 558   | T2 (_, Min _, _, _) -> true
 559   | _ -> false
 560
 561 let drop_minus xs =
 562   xs +> exclude is_minus
 563
 564 let drop_expanded xs =
 565   xs +> exclude (function
 566     | T2 (t,_,_,_) when TH.is_expanded t -> true
 567     | _ -> false
 568   )
 569
 570 let drop_fake xs =
 571   xs +> exclude (function
 572     | Fake2 _ -> true
 573     | _ -> false
 574   )
 575
 576 let remove_minus_and_between_and_expanded_and_fake xs =
 577
 578   (* get rid of expanded tok *)
 579   let xs = drop_expanded xs in
 580
 581   let minus_or_comment x =
 582     is_minus x or is_minusable_comment x in
 583
 584   let minus_or_comment_nocpp x =
 585     is_minus x or is_minusable_comment_nocpp x in
 586
 587   let common_adj (index1,adj1) (index2,adj2) =
 588     let same_adj = (* same adjacency info *)
 589       match (adj1,adj2) with
 590       | (Ast_cocci.ADJ adj1,Ast_cocci.ADJ adj2) -> adj1 = adj2
 591       | (Ast_cocci.ALLMINUS,_) | (_,Ast_cocci.ALLMINUS) -> true in
 592     same_adj &&
 593     (* non-empty intersection of witness trees *)
 594     not ((inter_set index1 index2) = []) in
 595
 596   (* new idea: collects regions not containing non-space context code
 597   if two adjacent adjacent minus tokens satisfy common_adj then delete
 598   all spaces, comments etc between them
 599   if two adjacent minus tokens do not satisfy common_adj only delete
 600   the spaces between them if there are no comments, etc.
 601   if the region contain no plus code and is both preceded and followed
 602   by a newline, delete the initial newline. *)
 603
 604   let rec adjust_around_minus = function
 605     | [] -> []
 606     | (T2(Parser_c.TCommentNewline c,_b,_i,_h) as x)::
 607       ((Fake2(Min adj1) | T2(_,Min adj1,_,_)) as t1)::xs ->
 608       let (minus_list,rest) = span not_context (t1::xs) in
 609       let contains_plus = List.exists is_plus minus_list in
 610       let x =
 611         match List.rev minus_list with
 612         | (T2(Parser_c.TCommentNewline c,_b,_i,_h))::rest
 613           when List.for_all minus_or_comment minus_list ->
 614           set_minus_comment_or_plus adj1 x
 615         | _ -> x in
 616       x :: adjust_within_minus contains_plus minus_list
 617          @ adjust_around_minus rest
 618     | ((Fake2(Min adj1) | T2(_,Min adj1,_,_)) as t1)::xs ->
 619       let (minus_list,rest) = span not_context (t1::xs) in
 620       let contains_plus = List.exists is_plus minus_list in
 621         adjust_within_minus contains_plus minus_list
 622       @ adjust_around_minus rest
 623     | x::xs ->
 624       x :: adjust_around_minus xs
 625   and adjust_within_minus cp (* contains plus *) = function
 626     | ((Fake2(Min adj1) | T2(_,Min adj1,_,_)) as t1)::xs ->
 627       let not_minus = function T2(_,Min _,_,_) -> false | _ -> true in
 628       let (not_minus_list,rest) = span not_minus xs in
 629       t1 ::
 630       (match rest with
 631       | ((Fake2(Min adj2) | T2(_,Min adj2,_,_)) as t2)::xs ->
 632         if common_adj adj1 adj2
 633         || not cp && List.for_all is_whitespace not_minus_list
 634         then
 635           (List.map (set_minus_comment_or_plus adj1) not_minus_list)
 636           @ (adjust_within_minus cp (t2::xs))
 637         else
 638           not_minus_list
 639           @ (adjust_within_minus cp (t2::xs))
 640       | _ ->
 641         if cp
 642         then xs
 643         else
 644           (* remove spaces after removed stuff, eg a comma after a
 645           function argument *)
 646           (let (spaces,rest) = span is_space xs in
 647           (List.map (set_minus_comment_or_plus adj1) spaces)
 648           @ rest)
 649       )
 650     | xs -> failwith "should always start with minus"
 651   and not_context = function
 652     | (T2(_,Ctx,_,_) as x) when not (is_minusable_comment x) -> false
 653     | _ -> true
 654   and is_plus = function
 655     | C2 _ | Comma _ | Cocci2 _ -> true
 656     | _ -> false in
 657
 658   let xs = adjust_around_minus xs in
 659
 660   (* get rid of fake tok *)
 661   let xs = drop_fake xs in
 662
 663   (* this drops blank lines after a brace introduced by removing code *)
 664   let minus_or_comment_nonl = function
 665     | T2(_,Min adj,_,_) -> true
 666     | T2(Parser_c.TCommentNewline _,_b,_i,_h) -> false
 667     | x -> is_minusable_comment x in
 668
 669   let rec adjust_after_brace = function
 670     | [] -> []
 671     | ((T2(_,Ctx,_,_)) as x)::((T2(_,Min adj,_,_)::_) as xs)
 672       when str_of_token2 x =$= "{" ->
 673       let (between_minus,rest) = span minus_or_comment_nonl xs in
 674       let (newlines,rest) = span is_whitespace rest in
 675       let (drop_newlines,last_newline) =
 676         let rec loop = function
 677           | [] -> ([],[])
 678           | ((T2(Parser_c.TCommentNewline _,_b,_i,_h)) as x) :: rest ->
 679             (List.rev rest,[x])
 680           | x::xs ->
 681             let (drop_newlines,last_newline) = loop xs in
 682             (drop_newlines,x::last_newline) in
 683         loop (List.rev newlines) in
 684       x :: between_minus
 685          @ List.map (set_minus_comment adj) drop_newlines
 686          @ last_newline
 687          @ adjust_after_brace rest
 688     | x::xs -> x :: (adjust_after_brace xs) in
 689
 690   let xs = adjust_after_brace xs in
 691
 692   (* search backwards from context } over spaces until reaching a newline.
 693   then go back over all minus code until reaching some context or + code.
 694   get rid of all intervening spaces, newlines, and comments
 695   input is reversed *)
 696   let rec adjust_before_brace = function
 697     | [] -> []
 698     | ((T2(t,Ctx,_,_)) as x)::xs
 699       when str_of_token2 x =$= "}" or is_newline x ->
 700       let (outer_spaces,rest) = span is_space xs in
 701       x :: outer_spaces @
 702       (match rest with
 703       | ((T2 (Parser_c.TCommentNewline _,Ctx,_i,_h)) as h) ::
 704         (* the rest of this code is the same as from_newline below
 705         but merging them seems to be error prone... *)
 706         ((T2 (t, Min adj, idx, hint)) as m) :: rest ->
 707         let (spaces,rest) = span minus_or_comment_nocpp rest in
 708         h :: m ::
 709         (List.map (set_minus_comment adj) spaces) @
 710         (adjust_before_brace rest)
 711       | _ -> adjust_before_brace rest
 712       )
 713     | x::xs -> x :: (adjust_before_brace xs) in
 714
 715   let from_newline = function
 716     | ((T2 (t, Min adj, idx, hint)) as m) :: rest ->
 717       let (spaces,rest) = span minus_or_comment_nocpp rest in
 718       m ::
 719       (List.map (set_minus_comment adj) spaces) @
 720       (adjust_before_brace rest)
 721     | ((T2 (t0,Ctx, idx0,h0)) as m0) ::
 722       ((T2 (t,Min adj,idx,h)) as m) :: rest
 723       when TH.str_of_tok t0 = "" ->
 724       (* This is for the case of a #define that is completely deleted,
 725       because a #define has a strange EOL token at the end.
 726       We hope there i no other kind of token that is represented by
 727       "", but it seems like changing the kind of token might break
 728       the end of entity recognition in the C parser.
 729       See parsing_hacks.ml *)
 730       let (spaces,rest) = span minus_or_comment_nocpp rest in
 731       m0 :: m ::
 732       (List.map (set_minus_comment adj) spaces) @
 733       (adjust_before_brace rest)
 734     | rest -> adjust_before_brace rest in
 735
 736   let xs = List.rev (from_newline (List.rev xs)) in
 737
 738   let cleanup_ifdefs toks =
 739     (* TODO: these functions are horrid, but using tokens caused circularity *)
 740     let is_ifdef = function
 741       | T2((Parser_c.TCommentCpp
 742         (Token_c.CppIfDirective Token_c.IfDef, _)),m,idx,_) -> true
 743       | T2((Parser_c.TCommentCpp
 744         (Token_c.CppIfDirective Token_c.IfDef0, _)),m,idx,_) -> true
 745       | t -> false in
 746     let is_else = function
 747       | T2((Parser_c.TCommentCpp
 748         (Token_c.CppIfDirective Token_c.Else, _)),m,idx,_) -> true
 749       | _ -> false in
 750     let is_endif = function
 751       | T2((Parser_c.TCommentCpp
 752         (Token_c.CppIfDirective Token_c.Endif, _)),m,idx,_) -> true
 753       | _ -> false in
 754     let add t = function
 755       | l::rest -> (t::l)::rest
 756       | _ -> failwith "not possible" in
 757     let rec parse_ifdef acc_keywords acc_code stack = function
 758       | [] -> (None,acc_keywords,acc_code)
 759       | t::rest when is_else t ->
 760         (match stack with
 761         | [] -> parse_ifdef (t::acc_keywords) ([]::acc_code) stack rest
 762         | _ -> parse_ifdef acc_keywords (add t acc_code) stack rest
 763         )
 764       | t::rest when is_endif t ->
 765         (match stack with
 766         | [] -> ((Some (t,rest)),acc_keywords,acc_code)
 767         | _::stack -> parse_ifdef acc_keywords (add t acc_code) stack rest
 768         )
 769       | t::rest when is_ifdef t ->
 770         parse_ifdef acc_keywords (add t acc_code) (()::stack) rest
 771       | t::rest -> parse_ifdef acc_keywords (add t acc_code) stack rest in
 772     let unminus = function
 773       | T2 (t,Min adj,idx,hint) -> T2 (t,Ctx,idx,hint)
 774       | x -> x in
 775     let rec loop = function
 776       | [] -> []
 777       | t::rest when is_ifdef t ->
 778         let (ender,acc_keywords,acc_code) =
 779           parse_ifdef [t] [[]] [] rest in
 780         let acc_code = List.map loop acc_code in
 781         let merge = (* args reversed *)
 782           List.fold_left2
 783             (fun prev kwd code -> kwd :: (List.rev code) @ prev)
 784             [] in
 785         (match ender with
 786         | None -> merge (List.map unminus acc_keywords) acc_code
 787         | Some(endif,rest) ->
 788           let rest = loop rest in
 789           if List.for_all is_minus (endif :: acc_keywords)
 790           then (merge acc_keywords acc_code) @ (endif :: rest)
 791           else
 792             (merge (List.map unminus acc_keywords) acc_code) @
 793             ((unminus endif) :: rest)
 794         )
 795       | x::xs -> x :: loop xs in
 796     loop toks in
 797
 798   let xs = cleanup_ifdefs xs in
 799   let xs = drop_minus xs in
 800   xs
 801
 802 (* things that should not be followed by space - boundary between SmPL
 803 code and C code *)
 804 let adjust_eat_space toks =
 805   let rec loop = function
 806     | [] -> []
 807     | EatSpace2 :: x :: rest when is_space x -> loop rest
 808     | EatSpace2 :: rest -> loop rest
 809     | x :: xs -> x :: loop xs in
 810   loop toks
 811
 812 (* normally, in C code, a semicolon is not preceded by a space or newline *)
 813 let adjust_before_semicolon toks =
 814   let toks = List.rev toks in
 815   let rec search_semic = function
 816     | [] -> []
 817     | ((T2(_,Ctx,_,_) | Cocci2 _) as x)::xs
 818       when List.mem (str_of_token2 x) [";";")";","] ->
 819       x :: search_semic (search_minus false xs)
 820     | x::xs -> x :: search_semic xs
 821   and search_minus seen_minus xs =
 822     let (spaces, rest) = span is_space xs in
 823     (* only delete spaces if something is actually deleted *)
 824     match rest with
 825     | ((T2(_,Min _,_,_)) as a)::rerest -> a :: search_minus true rerest
 826     | _ -> if seen_minus then rest else xs in
 827   List.rev (search_semic toks)
 828
 829 (* normally, in C code, a ( is not followed by a space or newline *)
 830 let adjust_after_paren toks =
 831   let rec search_paren = function
 832     | [] -> []
 833     | ((T2(_,Ctx,_,_) | Cocci2 _) as x)::xs
 834       when List.mem (str_of_token2 x) ["("] (* other things? *) ->
 835       x :: search_paren (search_minus false xs)
 836     | x::xs -> x :: search_paren xs
 837   and search_minus seen_minus xs =
 838     let (spaces, rest) = span is_whitespace xs in
 839     (* only delete spaces if something is actually deleted *)
 840     match rest with
 841     | ((T2(_,Min _,_,_)) as a)::rerest -> (* minus *)
 842       a :: search_minus true rerest
 843     | ((T2(_,Ctx,_,_)) as a)::rerest
 844       when seen_minus && str_of_token2 a = "," ->
 845       (* comma after ( will be deleted, so consider it as minus code
 846       already *)
 847       a :: search_minus true rerest
 848     | _ -> if seen_minus then rest else xs in (* drop trailing space *)
 849   search_paren toks
 850
 851 (* this is for the case where braces are added around an if branch *)
 852 let paren_then_brace toks =
 853   let rec search_paren = function
 854     | [] -> []
 855     | ((T2(_,Ctx,_,_)) as x)::xs
 856       when List.mem (str_of_token2 x) [")"] ->
 857       x :: search_paren (search_plus xs)
 858     | x::xs -> x :: search_paren xs
 859   and search_plus xs =
 860     let (spaces, rest) = span is_space xs in
 861     let (nls, rest) = span is_newline rest in
 862     match rest with
 863     (* move the brace up to the previous line *)
 864     | ((Cocci2("{",_,_,_,_)) as x) :: (((Cocci2 _) :: _) as rest) ->
 865       spaces @ x :: nls @ rest
 866     | _ -> xs in
 867   search_paren toks
 868
 869 let is_ident_like s = s ==~ regexp_alpha
 870
 871 let rec drop_space_at_endline = function
 872   | [] -> []
 873   | [x] -> [x]
 874   | (C2 " ") ::
 875     ((((T2(Parser_c.TCommentSpace _,Ctx,_,_)) | Cocci2("\n",_,_,_,_) |
 876     (T2(Parser_c.TCommentNewline _,Ctx,_,_))) :: _) as rest) ->
 877     (* when unparse_cocci doesn't know whether space is needed *)
 878     drop_space_at_endline rest
 879   | ((T2(Parser_c.TCommentSpace _,Ctx,_i,_h)) as a)::rest ->
 880     let (outer_spaces,rest) = span is_space rest in
 881     let minus_or_comment_or_space_nocpp = function
 882       | T2(_,Min adj,_,_) -> true
 883       | (T2(Parser_c.TCommentSpace _,Ctx,_i,_)) -> true
 884       | (T2(Parser_c.TCommentNewline _,Ctx,_i,_)) -> false
 885       | x -> false in
 886     let (minus,rest) = span minus_or_comment_or_space_nocpp rest in
 887     let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
 888     if List.exists is_minus minus
 889     then
 890       match rest with
 891       | ((T2(Parser_c.TCommentNewline _,Ctx,_i,_h)) as a)::rest ->
 892         (* drop trailing spaces *)
 893         minus @ a :: (drop_space_at_endline rest)
 894       | _ -> fail ()
 895     else fail ()
 896   | a :: rest ->
 897     a :: drop_space_at_endline rest
 898
 899 (* if a removed ( is between two tokens, then add a space *)
 900 let rec paren_to_space = function
 901   | [] -> []
 902   | [x] -> [x]
 903   | [x;y] -> [x;y]
 904   | ((T2(_,Ctx,_,_)) as a)::
 905     ((T2(t,Min _,_,_)) as b)::
 906     ((T2(_,Ctx,_,_)) as c)::rest
 907     when not (is_whitespace a) && TH.str_of_tok t = "(" ->
 908     a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
 909   | a :: rest -> a :: (paren_to_space rest)
 910
 911 let rec add_space xs =
 912   match xs with
 913   | [] -> []
 914   | [x] -> [x]
 915   | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
 916     when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
 917     not (lnx = -1) && not (rcolx = -1) && lnx = lny && rcolx < lcoly ->
 918     (* this only works within a line.  could consider whether
 919     something should be done to add newlines too, rather than
 920     printing them explicitly in unparse_cocci. *)
 921     x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
 922   | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
 923     when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
 924     not (lnx = -1) && not (rcolx = -1) && lnx < lny ->
 925     (* this only works within a line.  could consider whether
 926     something should be done to add newlines too, rather than
 927     printing them explicitly in unparse_cocci. *)
 928     x::C2 (String.make (lny-lnx) '\n')::
 929     C2 (String.make (lcoly-1) ' '):: (* -1 is for the + *)
 930     add_space (y::xs)
 931   | ((T2(_,Ctx,_,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *)
 932     let sx = str_of_token2 x in
 933     let sy = str_of_token2 y in
 934     if is_ident_like sx && (is_ident_like sy or List.mem sy ["="])
 935     then x::C2 " "::(add_space (y::xs))
 936     else x::(add_space (y::xs))
 937   | x::y::xs -> (* not boundary, not sure if it is possible *)
 938     let sx = str_of_token2 x in
 939     let sy = str_of_token2 y in
 940     if is_ident_like sx && is_ident_like sy
 941     then x::C2 " "::(add_space (y::xs))
 942     else x::(add_space (y::xs))
 943
 944 (* A fake comma is added at the end of an unordered initlist or a enum
 945 decl, if the initlist or enum doesn't already end in a comma.  This is only
 946 needed if there is + code, ie if we see Cocci after it in the code sequence *)
 947
 948 let rec drop_end_comma = function
 949   | [] -> []
 950   | [x] -> [x]
 951   | ((Comma ",") as x) :: rest ->
 952     let (newlines,rest2) = span is_whitespace rest in
 953     (match rest2 with
 954     | (Cocci2 _) :: _ -> x :: drop_end_comma rest
 955     |   _ -> drop_end_comma rest
 956     )
 957   | x :: xs -> x :: drop_end_comma xs
 958
 959 (* The following only works for the outermost function call.  Stack records
 960 the column of all open parentheses.  Space_cell contains the most recent
 961 comma in the outermost function call.  The goal is to decide whether this
 962 should be followed by a space or a newline and indent. *)
 963 let add_newlines toks tabbing_unit =
 964   (* the following is for strings that may contain newline or tabs *)
 965   let string_length s count =
 966     let l = list_of_string s in
 967     List.fold_left
 968       (function count ->
 969         function
 970         | '\t' -> count + 8
 971         | '\n' -> 0
 972         | c -> count + 1)
 973       count l in
 974   let create_indent n =
 975     let (tu,tlen) =
 976       match tabbing_unit with
 977       | Some ("\t",_) -> ("\t",8)
 978       | Some ("",_) -> ("\t",8) (* not sure why... *)
 979       | Some (s,_) -> (s,string_length s 0) (* assuming only tabs or spaces *)
 980       | None -> ("\t",8) in
 981     let rec loop seen =
 982       if seen + tlen <= n
 983       then tu ^ loop (seen + tlen)
 984       else String.make (n-seen) ' ' in
 985     loop 0 in
 986   let check_for_newline count x = function
 987     | Some (start,space_cell) when count > Flag_parsing_c.max_width ->
 988       space_cell := "\n"^(create_indent x);
 989       Some (x + (count - start))
 990     | _ -> None in
 991   let start_box stack space_cell count s =
 992     let count = string_length s count in
 993     (count,count::stack,space_cell) in
 994   let end_box stack space_cell count s =
 995     (* this assumes that start_box and end_box are matched, but this is not
 996     necessarily the case, if ( is modified and ) is context code *)
 997     let count = string_length s count in
 998     match stack with
 999     | [x] ->
1000       (match check_for_newline count x space_cell with
1001       | Some count -> (count,[],None)
1002       | None -> (count,[],None)
1003       )
1004     | [] -> (count,stack,space_cell)
1005     | _ -> (count,List.tl stack,space_cell) in
1006   let rec loop ((stack,space_cell) as info) count = function
1007     | [] -> []
1008     | ((Cocci2(s,line,lcol,rcol,hint)) as a):: (* hint can't be start *)
1009       (T2(commatok,Ctx,idx,_))::
1010       (T2(((Parser_c.TCommentSpace _) as sptok),Ctx,_,_)) ::
1011       (((T2(codetok,Ctx,_,_)) :: _) as xs)
1012       when
1013       (TH.str_of_tok commatok) = "," &&
1014       (TH.str_of_tok sptok) = " " &&
1015       ((List.length stack = 1) or (* not super elegant... *)
1016       (* check if the Cocci2 token is a ), need double treatment *)
1017       (List.length stack = 2) && (hint = Some Unparse_cocci.EndBox)) ->
1018       (* something large added before a comma *)
1019       let stack = (* do the work of end_box in the length 2 case *)
1020         if List.length stack = 2
1021         then List.tl stack
1022         else stack in
1023       let x = List.hd stack in
1024       let sp = ref " " in
1025       let cocci_count = string_length s count in
1026       let space_cell = Some (cocci_count+1,sp) in (* count before space *)
1027       let newcount = cocci_count + 2 in (* count incuding space *)
1028       let future_count =
1029         string_length (TH.str_of_tok codetok) newcount in
1030       let b = T2(commatok,Ctx,idx,
1031         Some (Unparse_cocci.SpaceOrNewline sp)) in
1032       (match check_for_newline future_count x space_cell with
1033       | Some count -> a :: b :: loop (stack,Some (x,sp)) count xs
1034       | None -> a :: b :: loop (stack,Some (newcount,sp)) newcount xs
1035       )
1036     | (T2(commatok,Ctx,_,_))::
1037       (T2(((Parser_c.TCommentSpace _) as sptok),Ctx,idx,_)) ::
1038       (((Cocci2(s,line,lcol,rcol,hint))::_) as xs)
1039       when (TH.str_of_tok commatok) = "," && (TH.str_of_tok sptok) = " " &&
1040       List.length stack = 1 (* not super elegant... *) ->
1041       (* something large added after a comma *)
1042       let x = List.hd stack in
1043       let sp = ref " " in
1044       let space_cell = Some (count+1,sp) in (* count before space *)
1045       let newcount = count + 2 in (* count incuding space *)
1046       let future_count = string_length s newcount in
1047       let a = T2(commatok,Ctx,idx,
1048         Some (Unparse_cocci.SpaceOrNewline sp)) in
1049       (match check_for_newline future_count x space_cell with
1050       | Some count -> a :: loop (stack,Some (x,sp)) count xs
1051       | None -> a :: loop (stack,Some (newcount,sp)) newcount xs
1052       )
1053     | ((T2(tok,Ctx,idx,_)) as a)::xs ->
1054     (*  let (stack,space_cell) = info in *)
1055       (match TH.str_of_tok tok with
1056       | "=" as s ->
1057         let (spaces,rest) = span is_space xs in
1058         (match rest with
1059         | ((T2(tok,Ctx,_,_)) as b)::ixs ->
1060           (match TH.str_of_tok tok with
1061           | "{" ->
1062             let newcount =
1063               List.fold_left
1064                 (function prev ->
1065                   function
1066                   | (T2(tok,_b,_i,_h)) ->
1067                     string_length (TH.str_of_tok tok) prev
1068                   | _ -> failwith "not possible")
1069                 count spaces in
1070             let front = a :: spaces @ [b] in
1071             let (newcount,newstack,newspacecell) =
1072               start_box stack space_cell newcount "{" in
1073             front @ loop (newstack,newspacecell) newcount ixs
1074           | s -> a :: loop info (string_length s count) xs
1075           )
1076         | _ -> a :: loop info (string_length s count) xs
1077         )
1078       | "(" as s ->
1079         let (newcount,newstack,newspacecell) =
1080           start_box stack space_cell count s in
1081         a :: loop (newstack,newspacecell) newcount xs
1082       | ")" as s ->
1083         let (newcount,newstack,newspacecell) =
1084           end_box stack space_cell count s in
1085         a :: loop (newstack,newspacecell) newcount xs
1086       | "{" as s when not (stack = []) ->
1087         (* [] case means statement braces *)
1088         let (newcount,newstack,newspacecell) =
1089           start_box stack space_cell count s in
1090         a :: loop (newstack,newspacecell) newcount xs
1091       | "}" as s when not (stack = []) ->
1092         (* [] case means statement braces *)
1093         let (newcount,newstack,newspacecell) =
1094           end_box stack space_cell count s in
1095         a :: loop (newstack,newspacecell) newcount xs
1096       | s -> a :: loop info (string_length s count) xs
1097       )
1098     | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
1099       let (stack,space_cell) = info in
1100       let rest =
1101         match hint with
1102         | None -> loop info (string_length s count) xs
1103         | Some Unparse_cocci.StartBox ->
1104           let (newcount,newstack,newspacecell) =
1105             start_box stack space_cell count s in
1106           loop (newstack,newspacecell) newcount xs
1107         | Some Unparse_cocci.EndBox ->
1108           let (newcount,newstack,newspacecell) =
1109             end_box stack space_cell count s in
1110           loop (newstack,newspacecell) newcount xs
1111         | Some (Unparse_cocci.SpaceOrNewline sp) ->
1112           let count = string_length s (count + 1 (*space*)) in
1113           (match stack with
1114           | [x] ->
1115             (match check_for_newline count x space_cell with
1116             | Some count -> loop (stack,Some (x,sp)) count xs
1117             | None -> loop (stack,Some (count,sp)) count xs
1118             )
1119           | _ -> loop info count xs
1120           ) in
1121       a :: rest
1122     | ((T2(tok,_,_,_)) as a)::xs ->
1123       a :: loop info (string_length (TH.str_of_tok tok) count) xs
1124     | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
1125     | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
1126     | Fake2 _ :: _ | Indent_cocci2 :: _
1127     | Unindent_cocci2 _::_ | EatSpace2::_ ->
1128       failwith "unexpected fake, indent, unindent, or eatspace" in
1129   let redo_spaces prev = function
1130     | Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
1131       C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
1132     | T2(tok,min,idx,Some (Unparse_cocci.SpaceOrNewline sp)) ->
1133       C2 !sp :: T2(tok,min,idx,None) :: prev
1134     | t -> t::prev in
1135   (match !Flag_parsing_c.spacing with
1136   | Flag_parsing_c.SMPL -> toks
1137   | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks))
1138   )
1139
1140 (* When insert some new code, because of a + in a SP, we must add this
1141 * code at the right place, with the good indentation. So each time we
1142 * encounter some spacing info, with some newline, we maintain the
1143 * current indentation level used.
1144 *
1145 * TODO problems: not accurate. ex: TODO
1146 *
1147 * TODO: if in #define region, should add a \ \n
1148 *)
1149 let new_tabbing2 space =
1150   list_of_string space
1151     +> List.rev
1152     +> take_until (fun c -> c =<= '\n')
1153     +> List.rev
1154     +> List.map string_of_char
1155     +> String.concat ""
1156
1157 let new_tabbing a =
1158   profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
1159
1160
1161 let rec adjust_indentation xs =
1162
1163   let _current_tabbing = ref ([] : string list) in
1164   let tabbing_unit = ref None in
1165
1166   let string_of_list l = String.concat "" (List.map string_of_char l) in
1167
1168   (* try to pick a tabbing unit for the plus code *)
1169   let adjust_tabbing_unit old_tab new_tab =
1170     if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
1171     then
1172       let old_tab = list_of_string old_tab in
1173       let new_tab = list_of_string new_tab in
1174       let rec loop = function
1175         | ([],new_tab) ->
1176           tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
1177         | (_,[]) -> failwith "not possible"
1178         | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
1179       loop (old_tab,new_tab) in
1180
1181 (*
1182 let remtab tu current_tab =
1183 let current_tab = List.rev(list_of_string current_tab) in
1184 let rec loop = function
1185 ([],new_tab) -> string_of_list (List.rev new_tab)
1186 |       (_,[]) -> (-*weird; tabbing unit used up more than the current tab*-)
1187 ""
1188 |       (t::ts,n::ns) when t =<= n -> loop (ts,ns)
1189 |       (_,ns) -> (-* mismatch; remove what we can *-)
1190 string_of_list (List.rev ns) in
1191 loop (tu,current_tab) in
1192 *)
1193
1194   let rec find_first_tab started = function
1195     | [] -> ()
1196     | ((T2 (tok,_,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
1197       find_first_tab true xs
1198     (* patch: coccinelle *)
1199     | ((T2 (Parser_c.TCommentNewline s, _, _, _)) as x)::_
1200       when started ->
1201       let s = str_of_token2 x +> new_tabbing in
1202       tabbing_unit := Some (s,List.rev (list_of_string s))
1203     | x::xs -> find_first_tab started xs in
1204   find_first_tab false xs;
1205
1206   let rec balanced ct = function
1207     | [] -> ct >= 0
1208     | ((T2(tok,_,_,_)) as x)::xs ->
1209       (match str_of_token2 x with
1210       | "(" -> balanced (ct+1) xs
1211       | ")" -> balanced (ct-1) xs
1212       | _ -> balanced ct xs
1213       )
1214     | x::xs -> balanced ct xs in
1215
1216   let update_tabbing started s x =
1217     let old_tabbing = !_current_tabbing in
1218     str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := [s]);
1219     (* only trust the indentation after the first { *)
1220     if started
1221     then
1222       adjust_tabbing_unit
1223         (String.concat "" old_tabbing)
1224         (String.concat "" !_current_tabbing) in
1225
1226   let rec aux started xs =
1227     match xs with
1228     | [] -> []
1229     (* patch: coccinelle *)
1230     | ((T2 (Parser_c.TCommentNewline s,_,_,_)) as x)::
1231       Unindent_cocci2(false)::xs ->
1232       update_tabbing started s x;
1233       (C2 "\n")::aux started xs
1234     | (Cocci2("\n",_,_,_,_))::Unindent_cocci2(false)::xs ->
1235       (C2 "\n")::aux started xs
1236     | ((T2 (tok,_,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _, _))::
1237       ((Cocci2 ("{",_,_,_,_)) as a)::xs
1238       when started && str_of_token2 x =$= ")" ->
1239       (* to be done for if, etc, but not for a function header *)
1240       x::(C2 " ")::a::(aux started xs)
1241     | ((T2 (Parser_c.TCommentNewline s, _, _, _)) as x)::xs
1242       when
1243       balanced 0 (fst(span (function x -> not(is_newline x)) xs)) ->
1244       update_tabbing started s x;
1245       let coccis_rest = span all_coccis xs in
1246       (match coccis_rest with
1247       | (_::_,((T2 (tok,_,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
1248         (* the case where cocci code has been added before a close } *)
1249         x::aux started (Indent_cocci2::xs)
1250       | _ -> x::aux started xs
1251       )
1252     | Indent_cocci2::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
1253       when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL ->
1254       let tu = String.make (lcoly-1) ' ' in
1255       _current_tabbing := tu::(!_current_tabbing);
1256       C2 (tu)::aux started (y::xs)
1257     | Indent_cocci2::xs ->
1258       (match !tabbing_unit with
1259       | None -> aux started xs
1260       | Some (tu,_) ->
1261         _current_tabbing := tu::(!_current_tabbing);
1262         (* can't be C2, for later phases *)
1263         Cocci2 (tu,-1,-1,-1,None)::aux started xs
1264       )
1265     | Unindent_cocci2(permanent)::((Cocci2("\n",_,_,_,_)) as x)::xs ->
1266       (* seems only relevant if there is a following cocci newline *)
1267       (match !_current_tabbing with
1268       | [] -> aux started xs
1269       | _::new_tabbing ->
1270         let s = String.concat "" new_tabbing in
1271         _current_tabbing := new_tabbing;
1272         x::Cocci2 (s,-1,-1,-1,None)::aux started xs
1273       )
1274     | Unindent_cocci2(permanent)::xs -> aux started xs
1275     (* border between existing code and cocci code *)
1276     | ((T2 (tok,_,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
1277       when str_of_token2 x =$= "{" ->
1278       x::aux true (y::Indent_cocci2::xs)
1279     | ((Cocci2 _) as x)::((T2 (tok,_,_,_)) as y)::xs
1280       when str_of_token2 y =$= "}" ->
1281       x::aux started (Unindent_cocci2 true::y::xs)
1282     (* starting the body of the function *)
1283     | ((T2 (tok,_,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
1284       x::aux true xs
1285     | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
1286     | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
1287       (* dont inline in expr because of weird eval order of ocaml *)
1288       let s = String.concat "" !_current_tabbing in
1289       (* can't be C2, for later phases *)
1290       x::Cocci2 (s,-1,-1,-1,None)::aux started xs
1291     | x::xs -> x::aux started xs in
1292   (aux false xs,!tabbing_unit)
1293
1294
1295 let rec find_paren_comma = function
1296   | [] -> ()
1297
1298   (* do nothing if was like this in original file *)
1299   | { str = "("; idx = Some p1 } :: ({ str = ","; idx = Some p2} :: _ as xs)
1300   | { str = ","; idx = Some p1 } :: ({ str = ","; idx = Some p2} :: _ as xs)
1301   | { str = ","; idx = Some p1 } :: ({ str = ")"; idx = Some p2} :: _ as xs)
1302     when p2 =|= p1 + 1 ->
1303     find_paren_comma xs
1304
1305   (* otherwise yes can adjust *)
1306   | { str = "(" } :: (({ str = ","} as rem) :: _ as xs)
1307   | ({ str = "," } as rem) :: ({ str = ","} :: _ as xs)
1308   | ({ str = "," } as rem) :: ({ str = ")"} :: _ as xs) ->
1309     rem.remove <- true;
1310     find_paren_comma xs
1311
1312   | x::xs ->
1313     find_paren_comma xs
1314
1315
1316 let fix_tokens toks =
1317   let toks = toks +> List.map mk_token_extended in
1318
1319   let cleaner = toks +> exclude (function
1320     | {tok2 = T2 (t,_,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
1321     | _ -> false
1322   ) in
1323   find_paren_comma cleaner;
1324
1325   let toks = rebuild_tokens_extented toks in
1326   toks +> List.map (fun x -> x.tok2)
1327
1328 (* if we have to remove a '}' that is alone on a line, remove the line too *)
1329 let drop_line toks =
1330   let rec space_until_newline toks =
1331     match toks with
1332     | (T2(_, Min _, _, _) as hd) :: tl ->
1333         let (drop, tl) = space_until_newline tl in
1334         (drop, hd :: tl)
1335     | hd :: tl when is_space hd ->
1336         space_until_newline tl
1337     | Fake2 _ :: tl ->
1338         space_until_newline tl
1339     | hd :: tl when is_newline hd ->
1340         (true, toks)
1341     | _ ->
1342         (false, toks) in
1343   let rec loop toks =
1344     match toks with
1345     | (T2(_, Min _, _, _) as x) :: tl
1346       when str_of_token2 x =$= "}" ->
1347         let (drop, tl) = space_until_newline tl in
1348         (drop, x :: tl)
1349     | hd :: tl when is_whitespace hd ->
1350         let (drop, tl) = loop tl in
1351         if drop then
1352           (true, tl)
1353         else
1354           (false, toks)
1355     | _ -> (false, toks) in
1356   let rec find toks =
1357     let (_, toks) = loop toks in
1358     match toks with
1359     | [] -> []
1360     | hd :: tl ->
1361         hd :: find tl in
1362   find toks
1363
1364 (*****************************************************************************)
1365 (* Final unparsing (and debugging support) *)
1366 (*****************************************************************************)
1367
1368 (* for debugging *)
1369 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
1370
1371 let kind_of_token2 = function
1372   | Fake2 _ -> KFake
1373   | Cocci2 _ -> KCocci
1374   | C2 _ -> KC
1375   | Comma _ -> KC
1376   | T2 (t,_,_,_) ->
1377     (match TH.pinfo_of_tok t with
1378     | Ast_c.ExpandedTok _ -> KExpanded
1379     | Ast_c.OriginTok _ -> KOrigin
1380     | Ast_c.FakeTok _ -> raise (Impossible 139) (* now a Fake2 *)
1381     | Ast_c.AbstractLineTok _ -> raise (Impossible 140) (* now a KC *)
1382     )
1383   | Unindent_cocci2 _ | Indent_cocci2 | EatSpace2 -> raise (Impossible 141)
1384
1385 let end_mark = "!"
1386
1387 let start_mark = function
1388   | KFake -> "!F!"
1389   | KCocci -> "!S!"
1390   | KC -> "!A!"
1391   | KExpanded -> "!E!"
1392   | KOrigin -> ""
1393
1394 let print_all_tokens2 pr xs =
1395   if !Flag_parsing_c.debug_unparsing
1396   then
1397     let current_kind = ref KOrigin in
1398     xs +> List.iter (fun t ->
1399       let newkind = kind_of_token2 t in
1400       if newkind =*= !current_kind
1401       then pr (str_of_token2 t)
1402       else
1403         begin
1404           pr (end_mark);
1405           pr (start_mark newkind);
1406           pr (str_of_token2 t);
1407           current_kind := newkind
1408         end
1409     );
1410   else
1411     let to_whitespace s =
1412       let r = String.copy s in
1413       for i = 1 to String.length r do
1414         let c = String.get r (i-1) in
1415         match c with
1416         | ' ' | '\t' | '\r' | '\n' -> ()
1417         | _ -> String.set r (i-1) ' '
1418       done;
1419       r in
1420     let hiding_level = ref 0 in
1421     let handle_token t =
1422       let s = str_of_token2 t in
1423       let hide_current =
1424         match t with
1425         | T2 (t,_,_,_) ->
1426           let i = TH.info_of_tok t in
1427           (match Ast_c.get_annot_info i Token_annot.Exclude_start with
1428           | None   -> ()
1429           | Some _ -> hiding_level := !hiding_level + 1
1430           );
1431           let hide_current = !hiding_level > 0 in
1432           (match Ast_c.get_annot_info i Token_annot.Exclude_end with
1433           | None   -> ()
1434           | Some _ -> hiding_level := max (!hiding_level - 1) 0
1435           );
1436           hide_current
1437         | _ -> !hiding_level > 0 in
1438       if hide_current then to_whitespace s else s in
1439     xs +> List.iter (fun x -> pr (handle_token x))
1440
1441
1442
1443
1444
1445 (*****************************************************************************)
1446 (* Entry points *)
1447 (*****************************************************************************)
1448
1449 (* old: PPviatok was made in the beginning to allow to pretty print a
1450  * complete C file, including a modified C file by transformation.ml,
1451  * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1452  * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1453  * fancy stuff when a function was not modified at all. Just need to
1454  * print the list of token as-is. But now pretty_print_c.ml handles
1455  * almost everything so maybe less useful. Maybe PPviatok allows to
1456  * optimize a little the pretty printing.
1457  *
1458  * update: now have PPviastr which goes even faster than PPviatok, so
1459  * PPviatok has disappeared.
1460  *)
1461
1462 type ppmethod = PPnormal | PPviastr
1463
1464
1465
1466
1467 (* The pp_program function will call pretty_print_c.ml with a special
1468  * function to print the leaf components, the tokens. When we want to
1469  * print a token, we need to print also maybe the space and comments that
1470  * were close to it in the original file (and that was omitted during the
1471  * parsing phase), and honor what the cocci-info attached to the token says.
1472  * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1473  * print it and also print some cocci-code attached in a PLUS to it.
1474  * So we will also maybe call unparse_cocci. Because the cocci-code may
1475  * contain metavariables, unparse_cocci will in fact sometimes call back
1476  * pretty_print_c (which will this time don't call back again unparse_cocci)
1477  *)
1478
1479 let pp_program2 xs outfile  =
1480   with_open_outfile outfile (fun (pr,chan) ->
1481     let pr s =
1482       if !Flag_parsing_c.debug_unparsing
1483       then begin pr2_no_nl s; flush stderr end
1484       else pr s
1485 (* flush chan; *)
1486 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1487     in
1488
1489     xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
1490       (* here can still work on ast *)
1491       let e = remove_useless_fakeInfo_struct e in
1492
1493       match ppmethod with
1494       | PPnormal ->
1495         (* now work on tokens *)
1496         (* phase1: just get all the tokens, all the information *)
1497         assert(toks_e +> List.for_all (fun t ->
1498           TH.is_origin t or TH.is_expanded t
1499         ));
1500         let toks = get_fakeInfo_and_tokens e toks_e in
1501         let toks = displace_fake_nodes toks in
1502         (* assert Origin;ExpandedTok;Faketok *)
1503         let toks = expand_mcode toks in
1504
1505         (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1506          * and no tag information, just NOTHING. *)
1507
1508         let toks =
1509           if !Flag.sgrep_mode2
1510           then
1511             (* nothing else to do for sgrep *)
1512             drop_expanded(drop_fake(drop_minus toks))
1513           else
1514             begin
1515               (* phase2: can now start to filter and adjust *)
1516               let (toks,tu) = adjust_indentation toks in
1517               let toks = adjust_eat_space toks in
1518               let toks = adjust_before_semicolon toks in(*before remove minus*)
1519               let toks = adjust_after_paren toks in(*also before remove minus*)
1520               let toks = drop_space_at_endline toks in
1521               let toks = paren_to_space toks in
1522               let toks = drop_end_comma toks in
1523
1524               let toks = drop_line toks in
1525
1526               let toks = remove_minus_and_between_and_expanded_and_fake toks in
1527               (* assert Origin + Cocci + C and no minus *)
1528               let toks = add_space toks in
1529               let toks = add_newlines toks tu in
1530               let toks = paren_then_brace toks in
1531               let toks = fix_tokens toks in
1532               toks
1533             end in
1534
1535         (* in theory here could reparse and rework the ast! or
1536          * apply some SP. Not before cos julia may have generated
1537          * not parsable file. Need do unparsing_tricks call before
1538          * being ready to reparse. *)
1539         print_all_tokens2 pr toks;
1540
1541       | PPviastr -> pr str
1542     )
1543   )
1544
1545 let pp_program a b =
1546   profile_code "C unparsing" (fun () -> pp_program2 a b)
1547
1548
1549 let pp_program_default xs outfile =
1550   let xs' = xs +> List.map (fun x -> x, PPnormal) in
1551   pp_program xs' outfile