X-Git-Url: http://git.hcoop.net/bpt/coccinelle.git/blobdiff_plain/fc1ad9719a3a6317b3a4749dac68e7272a3617db..18b1275a488603d2b4fcf2a36dd4ac14e0518268:/parsing_c/unparse_c.ml

diff --git a/parsing_c/unparse_c.ml b/parsing_c/unparse_c.ml
index 6d0dc04..15217bd 100644
--- a/parsing_c/unparse_c.ml
+++ b/parsing_c/unparse_c.ml
@@ -1,17 +1,18 @@
 (* Yoann Padioleau, Julia Lawall
- * 
+ *
+ * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
  * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License (GPL)
  * version 2 as published by the Free Software Foundation.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * file license.txt for more details.
- * 
- * 
+ *
+ *
  * Modifications by Julia Lawall for better newline handling.
  *)
 open Common
@@ -36,7 +37,7 @@ let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
 (* Types used during the intermediate phases of the unparsing *)
 (*****************************************************************************)
 
-type token1 = 
+type token1 =
   | Fake1 of info
   | T1 of Parser_c.token
 
@@ -45,7 +46,7 @@ type token1 =
  * token and get something simpler ? because we need to know if the
  * info is a TCommentCpp or TCommentSpace, etc for some of the further
  * analysis so easier to keep with the token.
- * 
+ *
  * This type contains the whole information. Have all the tokens with this
  * type.
  *)
@@ -53,17 +54,18 @@ type min =
     Min of (int list (* match numbers *) * int (* adjacency information *))
   | Ctx
 
-type token2 = 
-  | T2 of Parser_c.token * min * 
+type token2 =
+  | T2 of Parser_c.token * min *
           int option (* orig index, abstracting away comments and space *)
   | Fake2
   | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
+	* Unparse_cocci.nlhint option
   | C2 of string
   | Indent_cocci2
-  | Unindent_cocci2
+  | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
 
 (* not used yet *)
-type token3 = 
+type token3 =
   | T3 of Parser_c.token
   | Cocci3 of string
   | C3 of string
@@ -83,7 +85,7 @@ type token_extended = {
 (* Helpers *)
 (*****************************************************************************)
 
-let info_of_token1 t = 
+let info_of_token1 t =
   match t with
   | Fake1 info -> info
   | T1 tok -> TH.info_of_tok tok
@@ -95,10 +97,10 @@ let print_token1 = function
 let str_of_token2 = function
   | T2 (t,_,_) -> TH.str_of_tok t
   | Fake2 -> ""
-  | Cocci2 (s,_,_,_) -> s
+  | Cocci2 (s,_,_,_,_) -> s
   | C2 s -> s
   | Indent_cocci2 -> ""
-  | Unindent_cocci2 -> ""
+  | Unindent_cocci2 _ -> ""
 
 let print_token2 = function
   | T2 (t,b,_) ->
@@ -110,13 +112,13 @@ let print_token2 = function
 	| Ctx -> "" in
       "T2:"^b_str^TH.str_of_tok t
   | Fake2 -> "fake"
-  | Cocci2 (s,_,lc,rc) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
+  | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
   | C2 s -> "C2:"^s
   | Indent_cocci2 -> "Indent"
-  | Unindent_cocci2 -> "Unindent"
+  | Unindent_cocci2 _ -> "Unindent"
 
 let simple_print_all_tokens1 l =
-  List.iter (function x -> Printf.printf "%s " (print_token1 x)) l;
+  List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l;
   Printf.printf "\n"
 
 let simple_print_all_tokens2 l =
@@ -129,22 +131,22 @@ let str_of_token3 = function
 
 
 
-let mk_token_extended x = 
-  let origidx = 
+let mk_token_extended x =
+  let origidx =
     match x with
-    | T2 (_,_, idx) -> idx 
+    | T2 (_,_, idx) -> idx
     | _ -> None
   in
-  { tok2 = x; 
+  { tok2 = x;
     str = str_of_token2 x;
     idx = origidx;
     new_tokens_before = [];
     remove = false;
   }
 
-let rebuild_tokens_extented toks_ext = 
+let rebuild_tokens_extented toks_ext =
   let _tokens = ref [] in
-  toks_ext +> List.iter (fun tok -> 
+  toks_ext +> List.iter (fun tok ->
     tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
     if not tok.remove then push2 tok.tok2 _tokens;
   );
@@ -158,9 +160,9 @@ let mcode_contain_plus = function
 (* patch: when need full coccinelle transformation *)
   | Ast_cocci.MINUS (_,_,_,[]) -> false
   | Ast_cocci.MINUS (_,_,_,x::xs) -> true
-  | Ast_cocci.PLUS -> raise Impossible
+  | Ast_cocci.PLUS _ -> raise Impossible
 
-let contain_plus info = 
+let contain_plus info =
   let mck = Ast_c.mcode_of_info info in
   mcode_contain_plus mck
 
@@ -168,14 +170,14 @@ let contain_plus info =
 (* Last fix on the ast *)
 (*****************************************************************************)
 
-(* Because of the ugly trick to handle initialiser, I generate fake ',' 
+(* Because of the ugly trick to handle initialiser, I generate fake ','
  * for the last initializer element, but if there is nothing around it,
  * I don't want in the end to print it.
  *)
 
-let remove_useless_fakeInfo_struct program = 
+let remove_useless_fakeInfo_struct program =
   let bigf = { Visitor_c.default_visitor_c_s with
-    Visitor_c.kini_s = (fun (k,bigf) ini -> 
+    Visitor_c.kini_s = (fun (k,bigf) ini ->
       match k ini with
       | InitList args, ii ->
           (match ii with
@@ -186,7 +188,7 @@ let remove_useless_fakeInfo_struct program =
                  (* sometimes the guy put a normal iicommaopt *)
               then InitList args, [i1;i2]
               else InitList args, [i1;i2;iicommaopt]
-          | [i1;i2;iicommaopt;end_comma_opt] -> 
+          | [i1;i2;iicommaopt;end_comma_opt] ->
 	      (* only in #define. end_comma_opt canot be fake *)
 	      (* not sure if this will be considered ambiguous with a previous
 		 case? *)
@@ -207,24 +209,24 @@ let remove_useless_fakeInfo_struct program =
 (* Tokens1 generation *)
 (*****************************************************************************)
 
-let get_fakeInfo_and_tokens celem toks = 
-  let toks_in  = ref toks in 
+let get_fakeInfo_and_tokens celem toks =
+  let toks_in  = ref toks in
   let toks_out = ref [] in
 
   (* todo? verify good order of position ? *)
-  let pr_elem info = 
+  let pr_elem info =
     match Ast_c.pinfo_of_info info with
-    | FakeTok _ -> 
+    | FakeTok _ ->
         Common.push2 (Fake1 info) toks_out
-    | OriginTok _ | ExpandedTok _ -> 
+    | OriginTok _ | ExpandedTok _ ->
         (* get the associated comments/space/cppcomment tokens *)
         let (before, x, after) =
-	  !toks_in +> Common.split_when (fun tok -> 
+	  !toks_in +> Common.split_when (fun tok ->
 	  info =*= TH.info_of_tok tok)
         in
         assert(info =*= TH.info_of_tok x);
         (*old: assert(before +> List.for_all (TH.is_comment)); *)
-        before +> List.iter (fun x -> 
+        before +> List.iter (fun x ->
           if not (TH.is_comment x)
           then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
           (* case such as  int asm d3("x"); not yet in ast *)
@@ -232,11 +234,11 @@ let get_fakeInfo_and_tokens celem toks =
         before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
         push2 (T1 x) toks_out;
         toks_in := after;
-    | AbstractLineTok _ -> 
+    | AbstractLineTok _ ->
         (* can be called on type info when for instance use -type_c *)
         if !Flag_parsing_c.pretty_print_type_info
         then Common.push2 (Fake1 info) toks_out
-        else raise Impossible (* at this stage *) 
+        else raise Impossible (* at this stage *)
   in
 
   let pr_space _ = () in (* use the spacing that is there already *)
@@ -267,7 +269,7 @@ let displace_fake_nodes toks =
     match fake_info with
       Some(bef,((Fake1 info) as fake),aft) ->
 	(match !(info.cocci_tag) with
-        | Some x -> 
+        | Some x ->
           (match x with
 	    (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
 	    (* move the fake node forwards *)
@@ -286,7 +288,7 @@ let displace_fake_nodes toks =
 	      failwith "fake node should not be before-after"
 	  | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
         )
-        | None -> 
+        | None ->
             bef @ fake :: (loop aft)
         )
     | None -> toks
@@ -298,25 +300,30 @@ let displace_fake_nodes toks =
 (*****************************************************************************)
 
 let comment2t2 = function
-    (Token_c.TCommentCpp x,(info : Token_c.info)) ->
+    (Token_c.TCommentCpp
+       (* not sure iif the following list is exhaustive or complete *)
+       (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
+     (info : Token_c.info)) ->
+      C2(info.Common.str)
+  | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
       C2("\n"^info.Common.str^"\n")
   | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
 
-let expand_mcode toks = 
+let expand_mcode toks =
   let toks_out = ref [] in
 
   let index = ref 0 in
 
-  let add_elem t minus = 
+  let add_elem t minus =
     match t with
-    | Fake1 info -> 
+    | Fake1 info ->
         let str = Ast_c.str_of_info info in
         if str =$= ""
         then push2 (Fake2) toks_out
         (* perhaps the fake ',' *)
         else push2 (C2 str) toks_out
-          
-  
+
+
     | T1 tok ->
 	(*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
         (* no tag on expandedTok ! *)
@@ -333,11 +340,11 @@ let expand_mcode toks =
 	       "expanded token %s on line %d is either modified or stored in a metavariable"
 	       (TH.str_of_tok tok) (TH.line_of_tok tok)));
 
-        let tok' = tok +> TH.visitor_info_of_tok (fun i -> 
+        let tok' = tok +> TH.visitor_info_of_tok (fun i ->
           { i with cocci_tag = ref Ast_c.emptyAnnot; }
         ) in
 
-        let optindex = 
+        let optindex =
           if TH.is_origin tok && not (TH.is_real_comment tok)
           then begin
               incr index;
@@ -349,13 +356,13 @@ let expand_mcode toks =
         push2 (T2 (tok', minus, optindex)) toks_out
   in
 
-  let expand_info t = 
-    let (mcode,env) = 
+  let expand_info t =
+    let (mcode,env) =
       Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
 
-    let pr_cocci s ln col rcol = 
-      push2 (Cocci2(s,ln,col,rcol)) toks_out  in
-    let pr_c info = 
+    let pr_cocci s ln col rcol hint =
+      push2 (Cocci2(s,ln,col,rcol,hint)) toks_out  in
+    let pr_c info =
       (match Ast_c.pinfo_of_info info with
 	Ast_c.AbstractLineTok _ ->
 	  push2 (C2 (Ast_c.str_of_info info)) toks_out
@@ -368,7 +375,7 @@ let expand_mcode toks =
       List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
 
     let pr_barrier ln col = (* marks a position, used around C code *)
-      push2 (Cocci2("",ln,col,col)) toks_out  in
+      push2 (Cocci2("",ln,col,col,None)) toks_out  in
     let pr_nobarrier ln col = () in (* not needed for linux spacing *)
 
     let pr_cspace _ = push2 (C2 " ") toks_out in
@@ -377,7 +384,7 @@ let expand_mcode toks =
     let pr_arity _ = () (* not interested *) in
 
     let indent _   = push2 Indent_cocci2 toks_out in
-    let unindent _ = push2 Unindent_cocci2 toks_out in
+    let unindent x = push2 (Unindent_cocci2 x) toks_out in
 
     let args_pp =
       (env, pr_cocci, pr_c, pr_cspace,
@@ -388,41 +395,41 @@ let expand_mcode toks =
 	 Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
        indent, unindent) in
 
-    (* old: when for yacfe with partial cocci: 
-     *    add_elem t false; 
+    (* old: when for yacfe with partial cocci:
+     *    add_elem t false;
     *)
 
     (* patch: when need full coccinelle transformation *)
     let unparser = Unparse_cocci.pp_list_list_any args_pp false in
     match mcode with
-    | Ast_cocci.MINUS (_,inst,adj,any_xxs) -> 
+    | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
         (* Why adding ? because I want to have all the information, the whole
-         * set of tokens, so I can then process and remove the 
+         * set of tokens, so I can then process and remove the
          * is_between_two_minus for instance *)
         add_elem t (Min (inst,adj));
         unparser any_xxs Unparse_cocci.InPlace
-    | Ast_cocci.CONTEXT (_,any_befaft) -> 
+    | Ast_cocci.CONTEXT (_,any_befaft) ->
         (match any_befaft with
-        | Ast_cocci.NOTHING -> 
+        | Ast_cocci.NOTHING ->
             add_elem t Ctx
-        | Ast_cocci.BEFORE xxs ->
+        | Ast_cocci.BEFORE (xxs,_) ->
             unparser xxs Unparse_cocci.Before;
             add_elem t Ctx
-        | Ast_cocci.AFTER xxs -> 
+        | Ast_cocci.AFTER (xxs,_) ->
             add_elem t Ctx;
             unparser xxs Unparse_cocci.After;
-        | Ast_cocci.BEFOREAFTER (xxs, yys) -> 
+        | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
             unparser xxs Unparse_cocci.Before;
             add_elem t Ctx;
             unparser yys Unparse_cocci.After;
         )
-    | Ast_cocci.PLUS -> raise Impossible
+    | Ast_cocci.PLUS _ -> raise Impossible
 
   in
 
   toks +> List.iter expand_info;
   List.rev !toks_out
-        
+
 
 (*****************************************************************************)
 (* Tokens2 processing, filtering, adjusting *)
@@ -430,27 +437,28 @@ let expand_mcode toks =
 
 let is_space = function
   | T2(Parser_c.TCommentSpace _,_b,_i) -> true  (* only whitespace *)
-  | _ -> false 
+  | _ -> false
 
 let is_newline = function
   | T2(Parser_c.TCommentNewline _,_b,_i) -> true
   | _ -> false
 
 let is_whitespace = function
-  | (T2 (t,_b,_i)) -> 
+  | (T2 (t,_b,_i)) ->
       (match t with
       | Parser_c.TCommentSpace _ -> true  (* only whitespace *)
       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
       | _ -> false
       )
-  | _ -> false 
+  | _ -> false
 
 let is_minusable_comment = function
-  | (T2 (t,_b,_i)) -> 
+  | (T2 (t,_b,_i)) ->
       (match t with
       | Parser_c.TCommentSpace _   (* only whitespace *)
-      (* patch: coccinelle *)      
+      (* patch: coccinelle *)
       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
+      | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
       | Parser_c.TComment _
       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
@@ -463,14 +471,15 @@ let is_minusable_comment = function
 
       | _ -> false
       )
-  | _ -> false 
+  | _ -> false
 
 let is_minusable_comment_nocpp = function
-  | (T2 (t,_b,_i)) -> 
+  | (T2 (t,_b,_i)) ->
       (match t with
       | Parser_c.TCommentSpace _   (* only whitespace *)
-      (* patch: coccinelle *)      
+      (* patch: coccinelle *)
       | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
+      | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
       | Parser_c.TComment _ -> true
       | Parser_c.TCommentCpp (Token_c.CppAttr, _)
       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
@@ -483,39 +492,39 @@ let is_minusable_comment_nocpp = function
 
       | _ -> false
       )
-  | _ -> false 
+  | _ -> false
 
 let all_coccis = function
-    Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true
+    Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 _ -> true
   | _ -> false
 
 (*previously gave up if the first character was a newline, but not clear why*)
 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
 
 let set_minus_comment adj = function
-  | T2 (t,Ctx,idx) -> 
+  | T2 (t,Ctx,idx) ->
       let str = TH.str_of_tok t in
       (match t with
       | Parser_c.TCommentSpace _
-(* patch: coccinelle *)      
+(* patch: coccinelle *)
       | Parser_c.TCommentNewline _ -> ()
 
-      | Parser_c.TComment _ 
-      | Parser_c.TCommentCpp (Token_c.CppAttr, _) 
+      | Parser_c.TComment _
+      | Parser_c.TCommentCpp (Token_c.CppAttr, _)
       | Parser_c.TCommentCpp (Token_c.CppMacro, _)
       | Parser_c.TCommentCpp (Token_c.CppDirective, _)
-        -> 
+        ->
           pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
 		 (TH.line_of_tok t) str)
       | _ -> raise Impossible
       );
       T2 (t, Min adj, idx)
-(* patch: coccinelle *)   
+(* patch: coccinelle *)
   | T2 (t,Min adj,idx) as x -> x
   | _ -> raise Impossible
 
 let set_minus_comment_or_plus adj = function
-    Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x
+    Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x
   | x -> set_minus_comment adj x
 
 let drop_minus xs =
@@ -527,10 +536,9 @@ let drop_minus xs =
 let remove_minus_and_between_and_expanded_and_fake xs =
 
   (* get rid of exampled and fake tok *)
-  let xs = xs +> Common.exclude (function 
+  let xs = xs +> Common.exclude (function
     | T2 (t,_,_) when TH.is_expanded t -> true
     | Fake2 -> true
-
     | _ -> false
   )
   in
@@ -635,7 +643,7 @@ let remove_minus_and_between_and_expanded_and_fake xs =
 	    @ (adjust_within_minus cp (t2::xs))
 	| (T2(_,Min adj2,_) as t2)::xs ->
 	    let is_whitespace_or_plus = function
-		(T2 _) as x -> is_space x
+		(T2 _) as x -> is_whitespace x
 	      | _ -> true (*plus*) in
 	    if List.for_all is_whitespace_or_plus not_minus_list
 	    then
@@ -745,36 +753,140 @@ let adjust_before_semicolon toks =
 
 let is_ident_like s = s ==~ Common.regexp_alpha
 
-let rec add_space xs = 
+let rec drop_space_at_endline = function
+    [] -> []
+  | [x] -> [x]
+  | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest ->
+      let (outer_spaces,rest) = Common.span is_space rest in
+      let minus_or_comment_or_space_nocpp = function
+	  T2(_,Min adj,_) -> true
+	| (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true
+	| (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false
+	| x -> false in
+      let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in
+      let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
+      if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus
+      then
+	match rest with
+	  ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest ->
+	    (* drop trailing spaces *)
+	    minus@a::(drop_space_at_endline rest)
+	| _ -> fail()
+      else fail()
+  | a :: rest -> a :: drop_space_at_endline rest
+
+(* if a removed ( is between two tokens, then add a space *)
+let rec paren_to_space = function
+    [] -> []
+  | [x] -> [x]
+  | [x;y] -> [x;y]
+  | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest
+    when not (is_whitespace a) && TH.str_of_tok t = "(" ->
+      simple_print_all_tokens2 [a;b;c];
+      a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
+  | a :: rest -> a :: (paren_to_space rest)
+
+let rec add_space xs =
   match xs with
   | [] -> []
   | [x] -> [x]
-  | (Cocci2(sx,lnx,_,rcolx) as x)::((Cocci2(sy,lny,lcoly,_)) as y)::xs
+  | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
     when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
       not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
 	(* this only works within a line.  could consider whether
 	   something should be done to add newlines too, rather than
 	   printing them explicitly in unparse_cocci. *)
 	x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
-  | x::y::xs -> 
+  | x::y::xs ->
       let sx = str_of_token2 x in
       let sy = str_of_token2 y in
       if is_ident_like sx && is_ident_like sy
       then x::C2 " "::(add_space (y::xs))
       else x::(add_space (y::xs))
 
-
+(* The following only works for the outermost function call.  Stack records
+the column of all open parentheses.  Space_cell contains the most recent
+comma in the outermost function call.  The goal is to decide whether this
+should be followed by a space or a newline and indent. *)
+let add_newlines toks tabbing_unit =
+  let create_indent n =
+    let (tu,tlen) = 
+      match tabbing_unit with
+	Some ("\t",_) -> ("\t",8)
+      | Some ("",_) -> ("\t",8) (* not sure why... *)
+      | Some (s,_) -> (s,String.length s) (* assuming only spaces *)
+      |	None -> ("\t",8) in
+    let rec loop seen =
+      if seen + tlen <= n
+      then tu ^ loop (seen + tlen)
+      else String.make (n-seen) ' ' in
+    loop 0 in
+  let check_for_newline count x = function
+      Some (start,space_cell) when count > Flag_parsing_c.max_width ->
+	space_cell := "\n"^(create_indent x);
+	Some (x + (count - start))
+    | _ -> None in
+  (* the following is for strings that may contain newline *)
+  let string_length s count =
+    let l = list_of_string s in
+    List.fold_left
+      (function count ->
+	function
+	    '\t' -> count + 8
+	  | '\n' -> 0
+	  | c -> count + 1)
+      count l in
+  let rec loop info count = function
+      [] -> []
+    | ((T2(tok,_,_)) as a)::xs ->
+	a :: loop info (string_length (TH.str_of_tok tok) count) xs
+    | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
+	let (stack,space_cell) = info in
+	let rest =
+	  match hint with
+	    None -> loop info (count + (String.length s)) xs
+	  | Some Unparse_cocci.StartBox ->
+	      let count = count + (String.length s) in
+	      loop (count::stack,space_cell) count xs
+	  | Some Unparse_cocci.EndBox ->
+	      let count = count + (String.length s) in
+	      (match stack with
+		[x] ->
+		  (match check_for_newline count x space_cell with
+		    Some count -> loop ([],None) count xs
+		  | None -> loop ([],None) count xs)
+	      | _ -> loop (List.tl stack,space_cell) count xs)
+	  | Some (Unparse_cocci.SpaceOrNewline sp) ->
+	      let count = count + (String.length s) + 1 (*space*) in
+	      (match stack with
+		[x] ->
+		  (match check_for_newline count x space_cell with
+		    Some count -> loop (stack,Some (x,sp)) count xs
+		  | None -> loop (stack,Some (count,sp)) count xs)
+	      | _ -> loop info count xs) in
+	a :: rest
+    | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
+    | Fake2 :: _ | Indent_cocci2 :: _
+    | Unindent_cocci2 _::_ ->
+	failwith "unexpected fake, indent, or unindent" in
+  let redo_spaces prev = function
+      Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
+        C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
+    | t -> t::prev in
+  (match !Flag_parsing_c.spacing with
+    Flag_parsing_c.SMPL -> toks
+  | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)))
 
 (* When insert some new code, because of a + in a SP, we must add this
  * code at the right place, with the good indentation. So each time we
  * encounter some spacing info, with some newline, we maintain the
  * current indentation level used.
- * 
+ *
  * TODO problems: not accurate. ex: TODO
- * 
+ *
  * TODO: if in #define region, should add a \ \n
  *)
-let new_tabbing2 space = 
+let new_tabbing2 space =
   (list_of_string space)
     +> List.rev
     +> Common.take_until (fun c -> c =<= '\n')
@@ -782,11 +894,12 @@ let new_tabbing2 space =
     +> List.map string_of_char
     +> String.concat ""
 
-let new_tabbing a = 
+let new_tabbing a =
   Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
 
 
-let rec adjust_indentation xs = 
+let rec adjust_indentation xs =
+
   let _current_tabbing = ref "" in
   let tabbing_unit = ref None in
 
@@ -827,20 +940,21 @@ let rec adjust_indentation xs =
     | x::xs -> find_first_tab started xs in
   find_first_tab false xs;
 
-  let rec aux started xs = 
+  let rec aux started xs =
     match xs with
     | [] ->  []
 (* patch: coccinelle *)
     | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
-      ((Cocci2 ("{",_,_,_)) as a)::xs
+      ((Cocci2 ("{",_,_,_,_)) as a)::xs
       when started && str_of_token2 x =$= ")" ->
 	(* to be done for if, etc, but not for a function header *)
 	x::(C2 " ")::a::(aux started xs)
     | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
-	let old_tabbing = !_current_tabbing in 
+	let old_tabbing = !_current_tabbing in
         str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
 	(* only trust the indentation after the first { *)
-	(if started then adjust_tabbing_unit old_tabbing !_current_tabbing);
+	(if started
+	then adjust_tabbing_unit old_tabbing !_current_tabbing);
 	let coccis_rest = Common.span all_coccis xs in
 	(match coccis_rest with
 	  (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
@@ -852,29 +966,31 @@ let rec adjust_indentation xs =
 	  None -> aux started xs
 	| Some (tu,_) ->
 	    _current_tabbing := (!_current_tabbing)^tu;
-	    Cocci2 (tu,-1,-1,-1)::aux started xs)
-    | Unindent_cocci2::xs ->
+	    Cocci2 (tu,-1,-1,-1,None)::aux started xs)
+    | Unindent_cocci2(permanent)::xs ->
 	(match !tabbing_unit with
 	  None -> aux started xs
 	| Some (_,tu) ->
 	    _current_tabbing := remtab tu (!_current_tabbing);
 	    aux started xs)
     (* border between existing code and cocci code *)
-    | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_)) as y)::xs
+    | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
       when str_of_token2 x =$= "{" ->
 	x::aux true (y::Indent_cocci2::xs)
     | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
       when str_of_token2 y =$= "}" ->
-	x::aux started (y::Unindent_cocci2::xs)
+	x::aux started (y::Unindent_cocci2 true::xs)
     (* starting the body of the function *)
     | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->  x::aux true xs
-    | ((Cocci2("{",_,_,_)) as a)::xs -> a::aux true xs
-    | ((Cocci2("\n",_,_,_)) as x)::xs -> 
+    | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
+    | ((Cocci2("\n",_,_,_,_)) as x)::Unindent_cocci2(false)::xs ->
+        x::aux started xs
+    | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
             (* dont inline in expr because of weird eval order of ocaml *)
-        let s = !_current_tabbing in 
-        x::Cocci2 (s,-1,-1,-1)::aux started xs
+        let s = !_current_tabbing in
+        x::Cocci2 (s,-1,-1,-1,None)::aux started xs
     | x::xs -> x::aux started xs in
-  aux false xs
+  (aux false xs,!tabbing_unit)
 
 
 let rec find_paren_comma = function
@@ -882,34 +998,34 @@ let rec find_paren_comma = function
 
   (* do nothing if was like this in original file *)
   | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
-    ::xs when p2 =|= p1 + 1 -> 
+    ::xs when p2 =|= p1 + 1 ->
       find_paren_comma (x2::xs)
 
   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
-    ::xs when p2 =|= p1 + 1 -> 
+    ::xs when p2 =|= p1 + 1 ->
       find_paren_comma (x2::xs)
 
   | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
-    ::xs when p2 =|= p1 + 1 -> 
+    ::xs when p2 =|= p1 + 1 ->
       find_paren_comma (x2::xs)
 
   (* otherwise yes can adjust *)
-  | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs -> 
+  | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
       x2.remove <- true;
       find_paren_comma (x2::xs)
-  | ({ str = "," } as x1)::({ str = ","} as x2)::xs -> 
+  | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
       x1.remove <- true;
       find_paren_comma (x2::xs)
 
-  | ({ str = "," } as x1)::({ str = ")"} as x2)::xs -> 
+  | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
       x1.remove <- true;
       find_paren_comma (x2::xs)
 
-  | x::xs -> 
+  | x::xs ->
       find_paren_comma xs
-  
 
-let fix_tokens toks = 
+
+let fix_tokens toks =
   let toks = toks +> List.map mk_token_extended in
 
   let cleaner = toks +> Common.exclude (function
@@ -941,13 +1057,13 @@ let kind_of_token2 = function
       | FakeTok _ -> raise Impossible (* now a Fake2 *)
       | AbstractLineTok _ -> raise Impossible (* now a KC *)
       )
-  | Unindent_cocci2 | Indent_cocci2 -> raise Impossible
-  
+  | Unindent_cocci2 _ | Indent_cocci2 -> raise Impossible
+
 let end_mark = "!"
 
 let start_mark = function
   | KFake -> "!F!"
-  | KCocci -> "!S!" 
+  | KCocci -> "!S!"
   | KC -> "!A!"
   | KExpanded -> "!E!"
   | KOrigin -> ""
@@ -956,7 +1072,7 @@ let print_all_tokens2 pr xs =
   if !Flag_parsing_c.debug_unparsing
   then
     let current_kind = ref KOrigin in
-    xs +> List.iter (fun t -> 
+    xs +> List.iter (fun t ->
       let newkind = kind_of_token2 t in
       if newkind =*= !current_kind
       then pr (str_of_token2 t)
@@ -967,9 +1083,9 @@ let print_all_tokens2 pr xs =
         current_kind := newkind
       end
     );
-  else 
+  else
     xs +> List.iter (fun x -> pr (str_of_token2 x))
-          
+
 
 
 
@@ -984,8 +1100,8 @@ let print_all_tokens2 pr xs =
  * fancy stuff when a function was not modified at all. Just need to
  * print the list of token as-is. But now pretty_print_c.ml handles
  * almost everything so maybe less useful. Maybe PPviatok allows to
- * optimize a little the pretty printing. 
- * 
+ * optimize a little the pretty printing.
+ *
  * update: now have PPviastr which goes even faster than PPviatok, so
  * PPviatok has disappeared.
  *)
@@ -996,37 +1112,37 @@ type ppmethod = PPnormal | PPviastr
 
 
 (* The pp_program function will call pretty_print_c.ml with a special
- * function to print the leaf components, the tokens. When we want to 
+ * function to print the leaf components, the tokens. When we want to
  * print a token, we need to print also maybe the space and comments that
- * were close to it in the original file (and that was omitted during the 
+ * were close to it in the original file (and that was omitted during the
  * parsing phase), and honor what the cocci-info attached to the token says.
  * Maybe we will not print the token if it's a MINUS-token, and maybe we will
- * print it and also print some cocci-code attached in a PLUS to it. 
+ * print it and also print some cocci-code attached in a PLUS to it.
  * So we will also maybe call unparse_cocci. Because the cocci-code may
  * contain metavariables, unparse_cocci will in fact sometimes call back
  * pretty_print_c (which will this time don't call back again unparse_cocci)
  *)
 
-let pp_program2 xs outfile  = 
-  Common.with_open_outfile outfile (fun (pr,chan) -> 
-    let pr s = 
-      if !Flag_parsing_c.debug_unparsing 
+let pp_program2 xs outfile  =
+  Common.with_open_outfile outfile (fun (pr,chan) ->
+    let pr s =
+      if !Flag_parsing_c.debug_unparsing
       then begin pr2_no_nl s; flush stderr end
-      else pr s  
+      else pr s
         (* flush chan; *)
         (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
     in
-    
-    xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) -> 
+
+    xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
       (* here can still work on ast *)
       let e = remove_useless_fakeInfo_struct e in
-      
+
       match ppmethod with
       | PPnormal ->
           (* now work on tokens *)
 
           (* phase1: just get all the tokens, all the information *)
-          assert(toks_e +> List.for_all (fun t -> 
+          assert(toks_e +> List.for_all (fun t ->
 	    TH.is_origin t or TH.is_expanded t
           ));
           let toks = get_fakeInfo_and_tokens e toks_e in
@@ -1041,15 +1157,18 @@ let pp_program2 xs outfile  =
 	    then drop_minus toks (* nothing to do for sgrep *)
 	    else
               (* phase2: can now start to filter and adjust *)
-              let toks = adjust_indentation toks in
+              let (toks,tu) = adjust_indentation toks in
 	      let toks = adjust_before_semicolon toks in(*before remove minus*)
+	      let toks = drop_space_at_endline toks in
+	      let toks = paren_to_space toks in
               let toks = remove_minus_and_between_and_expanded_and_fake toks in
               (* assert Origin + Cocci + C and no minus *)
               let toks = add_space toks in
+	      let toks = add_newlines toks tu in
               let toks = fix_tokens toks in
 	      toks in
 
-          (* in theory here could reparse and rework the ast! or 
+          (* in theory here could reparse and rework the ast! or
            * apply some SP. Not before cos julia may have generated
            * not parsable file. Need do unparsing_tricks call before being
            * ready to reparse. *)
@@ -1059,10 +1178,10 @@ let pp_program2 xs outfile  =
     )
   )
 
-let pp_program a b = 
+let pp_program a b =
   Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
 
 
-let pp_program_default xs outfile = 
+let pp_program_default xs outfile =
   let xs' = xs +> List.map (fun x -> x, PPnormal) in
   pp_program xs' outfile