Release coccinelle-0.2.4rc5

[bpt/coccinelle.git] / parsing_c / unparse_c.ml
diff --git a/parsing_c/unparse_c.ml b/parsing_c/unparse_c.ml

index 80a3a80..82eda1a 100644 (file)
--- a/parsing_c/unparse_c.ml
+++ b/parsing_c/unparse_c.ml
@@ -51,7 +51,8 @@ type token1 =
   * type.
   *)
  type min =
-    Min of (int list (* match numbers *) * int (* adjacency information *))
+    Min of (int list (* match numbers from witness trees *) *
+             int (* adjacency information *))
    | Ctx
  
  type token2 =
@@ -59,7 +60,9 @@ type token2 =
            int option (* orig index, abstracting away comments and space *)
    | Fake2
    | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
+       * Unparse_cocci.nlhint option
    | C2 of string
+  | Comma of string
    | Indent_cocci2
    | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
  
@@ -96,8 +99,9 @@ let print_token1 = function
  let str_of_token2 = function
    | T2 (t,_,_) -> TH.str_of_tok t
    | Fake2 -> ""
-  | Cocci2 (s,_,_,_) -> s
+  | Cocci2 (s,_,_,_,_) -> s
    | C2 s -> s
+  | Comma s -> s
    | Indent_cocci2 -> ""
    | Unindent_cocci2 _ -> ""
  
@@ -111,13 +115,14 @@ let print_token2 = function
         | Ctx -> "" in
        "T2:"^b_str^TH.str_of_tok t
    | Fake2 -> "fake"
-  | Cocci2 (s,_,lc,rc) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
+  | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
    | C2 s -> "C2:"^s
+  | Comma s -> "Comma:"^s
    | Indent_cocci2 -> "Indent"
    | Unindent_cocci2 _ -> "Unindent"
  
  let simple_print_all_tokens1 l =
-  List.iter (function x -> Printf.printf "%s " (print_token1 x)) l;
+  List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l;
    Printf.printf "\n"
  
  let simple_print_all_tokens2 l =
@@ -299,7 +304,12 @@ let displace_fake_nodes toks =
  (*****************************************************************************)
  
  let comment2t2 = function
-    (Token_c.TCommentCpp x,(info : Token_c.info)) ->
+    (Token_c.TCommentCpp
+       (* not sure iif the following list is exhaustive or complete *)
+       (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
+     (info : Token_c.info)) ->
+      C2(info.Common.str)
+  | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
        C2("\n"^info.Common.str^"\n")
    | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
  
@@ -314,8 +324,9 @@ let expand_mcode toks =
          let str = Ast_c.str_of_info info in
          if str =$= ""
          then push2 (Fake2) toks_out
-        (* perhaps the fake ',' *)
-        else push2 (C2 str) toks_out
+       (* fx the fake "," at the end of a structure or enum.
+          no idea what other fake info there can be... *)
+       else push2 (Comma str) toks_out
  
  
      | T1 tok ->
@@ -354,8 +365,8 @@ let expand_mcode toks =
      let (mcode,env) =
        Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
  
-    let pr_cocci s ln col rcol =
-      push2 (Cocci2(s,ln,col,rcol)) toks_out  in
+    let pr_cocci s ln col rcol hint =
+      push2 (Cocci2(s,ln,col,rcol,hint)) toks_out  in
      let pr_c info =
        (match Ast_c.pinfo_of_info info with
         Ast_c.AbstractLineTok _ ->
@@ -369,7 +380,7 @@ let expand_mcode toks =
        List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
  
      let pr_barrier ln col = (* marks a position, used around C code *)
-      push2 (Cocci2("",ln,col,col)) toks_out  in
+      push2 (Cocci2("",ln,col,col,None)) toks_out  in
      let pr_nobarrier ln col = () in (* not needed for linux spacing *)
  
      let pr_cspace _ = push2 (C2 " ") toks_out in
@@ -489,7 +500,7 @@ let is_minusable_comment_nocpp = function
    | _ -> false
  
  let all_coccis = function
-    Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 _ -> true
+    Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ -> true
    | _ -> false
  
  (*previously gave up if the first character was a newline, but not clear why*)
@@ -518,7 +529,7 @@ let set_minus_comment adj = function
    | _ -> raise Impossible
  
  let set_minus_comment_or_plus adj = function
-    Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x
+    Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x
    | x -> set_minus_comment adj x
  
  let drop_minus xs =
@@ -527,16 +538,17 @@ let drop_minus xs =
      | _ -> false
    )
  
-let remove_minus_and_between_and_expanded_and_fake xs =
-
-  (* get rid of exampled and fake tok *)
-  let xs = xs +> Common.exclude (function
+let drop_expanded_and_fake xs =
+  xs +> Common.exclude (function
      | T2 (t,_,_) when TH.is_expanded t -> true
      | Fake2 -> true
-
      | _ -> false
    )
-  in
+
+let remove_minus_and_between_and_expanded_and_fake xs =
+
+  (* get rid of expanded and fake tok *)
+  let xs = drop_expanded_and_fake xs in
  
    let minus_or_comment = function
        T2(_,Min adj,_) -> true
@@ -551,55 +563,6 @@ let remove_minus_and_between_and_expanded_and_fake xs =
      (* non-empty intersection of witness trees *)
      not ((Common.inter_set index1 index2) = []) in
  
-  let rec adjust_around_minus = function
-      [] -> []
-    | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
-      (((T2(_,Min adj,_))::_) as rest) ->
-       (* an initial newline, as in a replaced statement *)
-       let (between_minus,rest) = Common.span minus_or_comment rest in
-       (match rest with
-         [] -> (set_minus_comment adj x) ::
-           (List.map (set_minus_comment adj) between_minus)
-       | T2(_,Ctx,_)::_ when is_newline (List.hd(List.rev between_minus)) ->
-           (set_minus_comment adj x)::(adjust_within_minus between_minus) @
-           (adjust_around_minus rest)
-       | _ ->
-           x :: (adjust_within_minus between_minus) @
-           (adjust_around_minus rest))
-    | ((T2(_,Min adj,_))::_) as rest ->
-       (* no initial newline, as in a replaced expression *)
-       let (between_minus,rest) = Common.span minus_or_comment rest in
-       (match rest with
-         [] ->
-           (List.map (set_minus_comment adj) between_minus)
-       | _ ->
-           (adjust_within_minus between_minus) @
-           (adjust_around_minus rest))
-    | x::xs -> x::adjust_around_minus xs
-  and adjust_within_minus = function
-      [] -> []
-    | (T2(_,Min adj1,_) as t1)::xs ->
-       let (between_minus,rest) = Common.span is_minusable_comment xs in
-       (match rest with
-          [] ->
-           (* keep last newline *)
-           let (drop,keep) =
-             try
-               let (drop,nl,keep) =
-                 Common.split_when is_newline between_minus in
-               (drop, nl :: keep)
-             with Not_found -> (between_minus,[]) in
-           t1 ::
-           List.map (set_minus_comment_or_plus adj1) drop @
-           keep
-       | (T2(_,Min adj2,_) as t2)::rest when common_adj adj1 adj2 ->
-           t1::
-            List.map (set_minus_comment_or_plus adj1) between_minus @
-            adjust_within_minus (t2::rest)
-       | x::xs ->
-           t1::(between_minus @ adjust_within_minus (x::xs)))
-    | _ -> failwith "only minus and space possible" in
-
    (* new idea: collects regions not containing non-space context code
       if two adjacent adjacent minus tokens satisfy common_adj then delete
       all spaces, comments etc between them
@@ -626,7 +589,8 @@ let remove_minus_and_between_and_expanded_and_fake xs =
         let (minus_list,rest) = Common.span not_context (t1::xs) in
         let contains_plus = List.exists is_plus minus_list in
         adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
-    | x::xs -> x :: adjust_around_minus xs
+    | x::xs ->
+       x :: adjust_around_minus xs
    and adjust_within_minus cp (* contains plus *) = function
        (T2(_,Min adj1,_) as t1)::xs ->
         let not_minus = function T2(_,Min _,_) -> false | _ -> true in
@@ -637,14 +601,12 @@ let remove_minus_and_between_and_expanded_and_fake xs =
             (List.map (set_minus_comment_or_plus adj1) not_minus_list)
             @ (adjust_within_minus cp (t2::xs))
         | (T2(_,Min adj2,_) as t2)::xs ->
-           let is_whitespace_or_plus = function
-               (T2 _) as x -> is_space x
-             | _ -> true (*plus*) in
-           if List.for_all is_whitespace_or_plus not_minus_list
+           if not cp && List.for_all is_whitespace not_minus_list
             then
               (List.map (set_minus_comment_or_plus adj1) not_minus_list)
               @ (adjust_within_minus cp (t2::xs))
-           else not_minus_list @ (adjust_within_minus cp (t2::xs))
+           else
+             not_minus_list @ (adjust_within_minus cp (t2::xs))
         | _ ->
             if cp
             then xs
@@ -657,7 +619,7 @@ let remove_minus_and_between_and_expanded_and_fake xs =
        (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
      | _ -> true
    and is_plus = function
-      C2 _ | Cocci2 _ -> true
+      C2 _ | Comma _ | Cocci2 _ -> true
      | _ -> false in
  
    let xs = adjust_around_minus xs in
@@ -722,6 +684,18 @@ let remove_minus_and_between_and_expanded_and_fake xs =
         m ::
         (List.map (set_minus_comment adj) spaces) @
         (adjust_before_brace rest)
+    | ((T2 (t0, Ctx, idx0)) as m0) :: ((T2 (t, Min adj, idx)) as m) :: rest
+       when TH.str_of_tok t0 = "" ->
+         (* This is for the case of a #define that is completely deleted,
+            because a #define has a strange EOL token at the end.
+            We hope there i no other kind of token that is represented by
+            "", but it seems like changing the kind of token might break
+            the end of entity recognition in the C parser.
+            See parsing_hacks.ml *)
+         let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
+         m0 :: m ::
+         (List.map (set_minus_comment adj) spaces) @
+         (adjust_before_brace rest)
      | rest -> adjust_before_brace rest in
  
    let xs = List.rev (from_newline (List.rev xs)) in
@@ -748,11 +722,43 @@ let adjust_before_semicolon toks =
  
  let is_ident_like s = s ==~ Common.regexp_alpha
  
+let rec drop_space_at_endline = function
+    [] -> []
+  | [x] -> [x]
+  | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest ->
+      let (outer_spaces,rest) = Common.span is_space rest in
+      let minus_or_comment_or_space_nocpp = function
+         T2(_,Min adj,_) -> true
+       | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true
+       | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false
+       | x -> false in
+      let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in
+      let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
+      if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus
+      then
+       match rest with
+         ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest ->
+           (* drop trailing spaces *)
+           minus@a::(drop_space_at_endline rest)
+       | _ -> fail()
+      else fail()
+  | a :: rest -> a :: drop_space_at_endline rest
+
+(* if a removed ( is between two tokens, then add a space *)
+let rec paren_to_space = function
+    [] -> []
+  | [x] -> [x]
+  | [x;y] -> [x;y]
+  | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest
+    when not (is_whitespace a) && TH.str_of_tok t = "(" ->
+      a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
+  | a :: rest -> a :: (paren_to_space rest)
+
  let rec add_space xs =
    match xs with
    | [] -> []
    | [x] -> [x]
-  | (Cocci2(sx,lnx,_,rcolx) as x)::((Cocci2(sy,lny,lcoly,_)) as y)::xs
+  | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
      when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
        not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
         (* this only works within a line.  could consider whether
@@ -766,7 +772,93 @@ let rec add_space xs =
        then x::C2 " "::(add_space (y::xs))
        else x::(add_space (y::xs))
  
+(* A fake comma is added at the end of an unordered initlist or a enum
+decl, if the initlist or enum doesn't already end in a comma.  This is only
+needed if there is + code, ie if we see Cocci after it in the code sequence *)
  
+let rec drop_end_comma = function
+    [] -> []
+  | [x] -> [x]
+  | ((Comma ",") as x) :: rest ->
+      let (newlines,rest2) = Common.span is_whitespace rest in
+      (match rest2 with
+       (Cocci2 _) :: _ -> x :: drop_end_comma rest
+      |        _ -> drop_end_comma rest)
+  | x :: xs -> x :: drop_end_comma xs
+
+(* The following only works for the outermost function call.  Stack records
+the column of all open parentheses.  Space_cell contains the most recent
+comma in the outermost function call.  The goal is to decide whether this
+should be followed by a space or a newline and indent. *)
+let add_newlines toks tabbing_unit =
+  let create_indent n =
+    let (tu,tlen) = 
+      match tabbing_unit with
+       Some ("\t",_) -> ("\t",8)
+      | Some ("",_) -> ("\t",8) (* not sure why... *)
+      | Some (s,_) -> (s,String.length s) (* assuming only spaces *)
+      |        None -> ("\t",8) in
+    let rec loop seen =
+      if seen + tlen <= n
+      then tu ^ loop (seen + tlen)
+      else String.make (n-seen) ' ' in
+    loop 0 in
+  let check_for_newline count x = function
+      Some (start,space_cell) when count > Flag_parsing_c.max_width ->
+       space_cell := "\n"^(create_indent x);
+       Some (x + (count - start))
+    | _ -> None in
+  (* the following is for strings that may contain newline *)
+  let string_length s count =
+    let l = list_of_string s in
+    List.fold_left
+      (function count ->
+       function
+           '\t' -> count + 8
+         | '\n' -> 0
+         | c -> count + 1)
+      count l in
+  let rec loop info count = function
+      [] -> []
+    | ((T2(tok,_,_)) as a)::xs ->
+       a :: loop info (string_length (TH.str_of_tok tok) count) xs
+    | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
+       let (stack,space_cell) = info in
+       let rest =
+         match hint with
+           None -> loop info (count + (String.length s)) xs
+         | Some Unparse_cocci.StartBox ->
+             let count = count + (String.length s) in
+             loop (count::stack,space_cell) count xs
+         | Some Unparse_cocci.EndBox ->
+             let count = count + (String.length s) in
+             (match stack with
+               [x] ->
+                 (match check_for_newline count x space_cell with
+                   Some count -> loop ([],None) count xs
+                 | None -> loop ([],None) count xs)
+             | _ -> loop (List.tl stack,space_cell) count xs)
+         | Some (Unparse_cocci.SpaceOrNewline sp) ->
+             let count = count + (String.length s) + 1 (*space*) in
+             (match stack with
+               [x] ->
+                 (match check_for_newline count x space_cell with
+                   Some count -> loop (stack,Some (x,sp)) count xs
+                 | None -> loop (stack,Some (count,sp)) count xs)
+             | _ -> loop info count xs) in
+       a :: rest
+    | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
+    | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
+    | Fake2 :: _ | Indent_cocci2 :: _
+    | Unindent_cocci2 _::_ ->
+       failwith "unexpected fake, indent, or unindent" in
+  let redo_spaces prev = function
+      Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
+        C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
+    | t -> t::prev in
+  (match !Flag_parsing_c.spacing with
+    Flag_parsing_c.SMPL -> toks
+  | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)))
  
  (* When insert some new code, because of a + in a SP, we must add this
   * code at the right place, with the good indentation. So each time we
@@ -790,6 +882,7 @@ let new_tabbing a =
  
  
  let rec adjust_indentation xs =
+
    let _current_tabbing = ref "" in
    let tabbing_unit = ref None in
  
@@ -835,7 +928,7 @@ let rec adjust_indentation xs =
      | [] ->  []
  (* patch: coccinelle *)
      | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
-      ((Cocci2 ("{",_,_,_)) as a)::xs
+      ((Cocci2 ("{",_,_,_,_)) as a)::xs
        when started && str_of_token2 x =$= ")" ->
         (* to be done for if, etc, but not for a function header *)
         x::(C2 " ")::a::(aux started xs)
@@ -856,7 +949,7 @@ let rec adjust_indentation xs =
           None -> aux started xs
         | Some (tu,_) ->
             _current_tabbing := (!_current_tabbing)^tu;
-           Cocci2 (tu,-1,-1,-1)::aux started xs)
+           Cocci2 (tu,-1,-1,-1,None)::aux started xs)
      | Unindent_cocci2(permanent)::xs ->
         (match !tabbing_unit with
           None -> aux started xs
@@ -864,7 +957,7 @@ let rec adjust_indentation xs =
             _current_tabbing := remtab tu (!_current_tabbing);
             aux started xs)
      (* border between existing code and cocci code *)
-    | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_)) as y)::xs
+    | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
        when str_of_token2 x =$= "{" ->
         x::aux true (y::Indent_cocci2::xs)
      | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
@@ -872,15 +965,15 @@ let rec adjust_indentation xs =
         x::aux started (y::Unindent_cocci2 true::xs)
      (* starting the body of the function *)
      | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->  x::aux true xs
-    | ((Cocci2("{",_,_,_)) as a)::xs -> a::aux true xs
-    | ((Cocci2("\n",_,_,_)) as x)::Unindent_cocci2(false)::xs ->
+    | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
+    | ((Cocci2("\n",_,_,_,_)) as x)::Unindent_cocci2(false)::xs ->
          x::aux started xs
-    | ((Cocci2("\n",_,_,_)) as x)::xs ->
+    | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
              (* dont inline in expr because of weird eval order of ocaml *)
          let s = !_current_tabbing in
-        x::Cocci2 (s,-1,-1,-1)::aux started xs
+        x::Cocci2 (s,-1,-1,-1,None)::aux started xs
      | x::xs -> x::aux started xs in
-  aux false xs
+  (aux false xs,!tabbing_unit)
  
  
  let rec find_paren_comma = function
@@ -940,6 +1033,7 @@ let kind_of_token2 = function
    | Fake2 -> KFake
    | Cocci2 _ -> KCocci
    | C2 _ -> KC
+  | Comma _ -> KC
    | T2 (t,_,_) ->
        (match TH.pinfo_of_tok t with
        | ExpandedTok _ -> KExpanded
@@ -1039,21 +1133,29 @@ let pp_program2 xs outfile  =
           let toks = displace_fake_nodes toks in
            (* assert Origin;ExpandedTok;Faketok *)
            let toks = expand_mcode toks in
+
            (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
             * and no tag information, just NOTHING. *)
  
+
           let toks =
             if !Flag.sgrep_mode2
-           then drop_minus toks (* nothing to do for sgrep *)
+           then
+             (* nothing else to do for sgrep *)
+             drop_expanded_and_fake (drop_minus toks)
             else
                (* phase2: can now start to filter and adjust *)
-              let toks = adjust_indentation toks in
+             let (toks,tu) = adjust_indentation toks in
               let toks = adjust_before_semicolon toks in(*before remove minus*)
+             let toks = drop_space_at_endline toks in
+             let toks = paren_to_space toks in
+             let toks = drop_end_comma toks in
                let toks = remove_minus_and_between_and_expanded_and_fake toks in
                (* assert Origin + Cocci + C and no minus *)
                let toks = add_space toks in
+             let toks = add_newlines toks tu in
                let toks = fix_tokens toks in
-             toks in
+              toks in
  
            (* in theory here could reparse and rework the ast! or
             * apply some SP. Not before cos julia may have generated