* type.
*)
type min =
- Min of (int list (* match numbers *) * int (* adjacency information *))
+ Min of (int list (* match numbers from witness trees *) *
+ int (* adjacency information *))
| Ctx
type token2 =
| Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
* Unparse_cocci.nlhint option
| C2 of string
+ | Comma of string
| Indent_cocci2
| Unindent_cocci2 of bool (* true for permanent, false for temporary *)
| Fake2 -> ""
| Cocci2 (s,_,_,_,_) -> s
| C2 s -> s
+ | Comma s -> s
| Indent_cocci2 -> ""
| Unindent_cocci2 _ -> ""
| Fake2 -> "fake"
| Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
| C2 s -> "C2:"^s
+ | Comma s -> "Comma:"^s
| Indent_cocci2 -> "Indent"
| Unindent_cocci2 _ -> "Unindent"
let str = Ast_c.str_of_info info in
if str =$= ""
then push2 (Fake2) toks_out
- (* perhaps the fake ',' *)
- else push2 (C2 str) toks_out
+ (* fx the fake "," at the end of a structure or enum.
+ no idea what other fake info there can be... *)
+ else push2 (Comma str) toks_out
| T1 tok ->
| _ -> false
let all_coccis = function
- Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 _ -> true
+ Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ -> true
| _ -> false
(*previously gave up if the first character was a newline, but not clear why*)
| _ -> raise Impossible
let set_minus_comment_or_plus adj = function
- Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x
+ Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x
| x -> set_minus_comment adj x
let drop_minus xs =
| _ -> false
)
-let remove_minus_and_between_and_expanded_and_fake xs =
-
- (* get rid of exampled and fake tok *)
- let xs = xs +> Common.exclude (function
+let drop_expanded_and_fake xs =
+ xs +> Common.exclude (function
| T2 (t,_,_) when TH.is_expanded t -> true
| Fake2 -> true
-
| _ -> false
)
- in
+
+let remove_minus_and_between_and_expanded_and_fake xs =
+
+ (* get rid of expanded and fake tok *)
+ let xs = drop_expanded_and_fake xs in
let minus_or_comment = function
T2(_,Min adj,_) -> true
(* non-empty intersection of witness trees *)
not ((Common.inter_set index1 index2) = []) in
- let rec adjust_around_minus = function
- [] -> []
- | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
- (((T2(_,Min adj,_))::_) as rest) ->
- (* an initial newline, as in a replaced statement *)
- let (between_minus,rest) = Common.span minus_or_comment rest in
- (match rest with
- [] -> (set_minus_comment adj x) ::
- (List.map (set_minus_comment adj) between_minus)
- | T2(_,Ctx,_)::_ when is_newline (List.hd(List.rev between_minus)) ->
- (set_minus_comment adj x)::(adjust_within_minus between_minus) @
- (adjust_around_minus rest)
- | _ ->
- x :: (adjust_within_minus between_minus) @
- (adjust_around_minus rest))
- | ((T2(_,Min adj,_))::_) as rest ->
- (* no initial newline, as in a replaced expression *)
- let (between_minus,rest) = Common.span minus_or_comment rest in
- (match rest with
- [] ->
- (List.map (set_minus_comment adj) between_minus)
- | _ ->
- (adjust_within_minus between_minus) @
- (adjust_around_minus rest))
- | x::xs -> x::adjust_around_minus xs
- and adjust_within_minus = function
- [] -> []
- | (T2(_,Min adj1,_) as t1)::xs ->
- let (between_minus,rest) = Common.span is_minusable_comment xs in
- (match rest with
- [] ->
- (* keep last newline *)
- let (drop,keep) =
- try
- let (drop,nl,keep) =
- Common.split_when is_newline between_minus in
- (drop, nl :: keep)
- with Not_found -> (between_minus,[]) in
- t1 ::
- List.map (set_minus_comment_or_plus adj1) drop @
- keep
- | (T2(_,Min adj2,_) as t2)::rest when common_adj adj1 adj2 ->
- t1::
- List.map (set_minus_comment_or_plus adj1) between_minus @
- adjust_within_minus (t2::rest)
- | x::xs ->
- t1::(between_minus @ adjust_within_minus (x::xs)))
- | _ -> failwith "only minus and space possible" in
-
(* new idea: collects regions not containing non-space context code
if two adjacent adjacent minus tokens satisfy common_adj then delete
all spaces, comments etc between them
let (minus_list,rest) = Common.span not_context (t1::xs) in
let contains_plus = List.exists is_plus minus_list in
adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
- | x::xs -> x :: adjust_around_minus xs
+ | x::xs ->
+ x :: adjust_around_minus xs
and adjust_within_minus cp (* contains plus *) = function
(T2(_,Min adj1,_) as t1)::xs ->
let not_minus = function T2(_,Min _,_) -> false | _ -> true in
(List.map (set_minus_comment_or_plus adj1) not_minus_list)
@ (adjust_within_minus cp (t2::xs))
| (T2(_,Min adj2,_) as t2)::xs ->
- let is_whitespace_or_plus = function
- (T2 _) as x -> is_whitespace x
- | _ -> true (*plus*) in
- if List.for_all is_whitespace_or_plus not_minus_list
+ if not cp && List.for_all is_whitespace not_minus_list
then
(List.map (set_minus_comment_or_plus adj1) not_minus_list)
@ (adjust_within_minus cp (t2::xs))
- else not_minus_list @ (adjust_within_minus cp (t2::xs))
+ else
+ not_minus_list @ (adjust_within_minus cp (t2::xs))
| _ ->
if cp
then xs
(T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
| _ -> true
and is_plus = function
- C2 _ | Cocci2 _ -> true
+ C2 _ | Comma _ | Cocci2 _ -> true
| _ -> false in
let xs = adjust_around_minus xs in
m ::
(List.map (set_minus_comment adj) spaces) @
(adjust_before_brace rest)
+ | ((T2 (t0, Ctx, idx0)) as m0) :: ((T2 (t, Min adj, idx)) as m) :: rest
+ when TH.str_of_tok t0 = "" ->
+ (* This is for the case of a #define that is completely deleted,
+ because a #define has a strange EOL token at the end.
+ We hope there i no other kind of token that is represented by
+ "", but it seems like changing the kind of token might break
+ the end of entity recognition in the C parser.
+ See parsing_hacks.ml *)
+ let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
+ m0 :: m ::
+ (List.map (set_minus_comment adj) spaces) @
+ (adjust_before_brace rest)
| rest -> adjust_before_brace rest in
let xs = List.rev (from_newline (List.rev xs)) in
let is_ident_like s = s ==~ Common.regexp_alpha
+let rec drop_space_at_endline = function
+ [] -> []
+ | [x] -> [x]
+ | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest ->
+ let (outer_spaces,rest) = Common.span is_space rest in
+ let minus_or_comment_or_space_nocpp = function
+ T2(_,Min adj,_) -> true
+ | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true
+ | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false
+ | x -> false in
+ let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in
+ let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
+ if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus
+ then
+ match rest with
+ ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest ->
+ (* drop trailing spaces *)
+ minus@a::(drop_space_at_endline rest)
+ | _ -> fail()
+ else fail()
+ | a :: rest -> a :: drop_space_at_endline rest
+
+(* if a removed ( is between two tokens, then add a space *)
+let rec paren_to_space = function
+ [] -> []
+ | [x] -> [x]
+ | [x;y] -> [x;y]
+ | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest
+ when not (is_whitespace a) && TH.str_of_tok t = "(" ->
+ a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
+ | a :: rest -> a :: (paren_to_space rest)
+
let rec add_space xs =
match xs with
| [] -> []
something should be done to add newlines too, rather than
printing them explicitly in unparse_cocci. *)
x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
- | x::y::xs ->
+ | ((T2(_,Ctx,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *)
+ let sx = str_of_token2 x in
+ let sy = str_of_token2 y in
+ if is_ident_like sx && (is_ident_like sy or List.mem sy ["="])
+ then x::C2 " "::(add_space (y::xs))
+ else x::(add_space (y::xs))
+ | x::y::xs -> (* not boundary, not sure if it is possible *)
let sx = str_of_token2 x in
let sy = str_of_token2 y in
if is_ident_like sx && is_ident_like sy
then x::C2 " "::(add_space (y::xs))
else x::(add_space (y::xs))
+(* A fake comma is added at the end of an unordered initlist or a enum
+decl, if the initlist or enum doesn't already end in a comma. This is only
+needed if there is + code, ie if we see Cocci after it in the code sequence *)
+
+let rec drop_end_comma = function
+ [] -> []
+ | [x] -> [x]
+ | ((Comma ",") as x) :: rest ->
+ let (newlines,rest2) = Common.span is_whitespace rest in
+ (match rest2 with
+ (Cocci2 _) :: _ -> x :: drop_end_comma rest
+ | _ -> drop_end_comma rest)
+ | x :: xs -> x :: drop_end_comma xs
+
(* The following only works for the outermost function call. Stack records
the column of all open parentheses. Space_cell contains the most recent
comma in the outermost function call. The goal is to decide whether this
| _ -> loop info count xs) in
a :: rest
| ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
+ | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
| Fake2 :: _ | Indent_cocci2 :: _
| Unindent_cocci2 _::_ ->
failwith "unexpected fake, indent, or unindent" in
| x::xs -> find_first_tab started xs in
find_first_tab false xs;
+ let rec balanced ct = function
+ [] -> ct >= 0
+ | ((T2(tok,_,_)) as x)::xs ->
+ (match str_of_token2 x with
+ "(" -> balanced (ct+1) xs
+ | ")" -> balanced (ct-1) xs
+ | _ -> balanced ct xs)
+ | x::xs -> balanced ct xs in
+
let rec aux started xs =
match xs with
| [] -> []
when started && str_of_token2 x =$= ")" ->
(* to be done for if, etc, but not for a function header *)
x::(C2 " ")::a::(aux started xs)
- | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
+ | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs
+ when balanced 0 (fst(Common.span (function x -> not(is_newline x)) xs)) ->
let old_tabbing = !_current_tabbing in
str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
(* only trust the indentation after the first { *)
| Fake2 -> KFake
| Cocci2 _ -> KCocci
| C2 _ -> KC
+ | Comma _ -> KC
| T2 (t,_,_) ->
(match TH.pinfo_of_tok t with
| ExpandedTok _ -> KExpanded
let toks = displace_fake_nodes toks in
(* assert Origin;ExpandedTok;Faketok *)
let toks = expand_mcode toks in
+
(* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
* and no tag information, just NOTHING. *)
+
let toks =
if !Flag.sgrep_mode2
- then drop_minus toks (* nothing to do for sgrep *)
+ then
+ (* nothing else to do for sgrep *)
+ drop_expanded_and_fake (drop_minus toks)
else
(* phase2: can now start to filter and adjust *)
- let (toks,tu) = adjust_indentation toks in
+ let (toks,tu) = adjust_indentation toks in
let toks = adjust_before_semicolon toks in(*before remove minus*)
+ let toks = drop_space_at_endline toks in
+ let toks = paren_to_space toks in
+ let toks = drop_end_comma toks in
let toks = remove_minus_and_between_and_expanded_and_fake toks in
(* assert Origin + Cocci + C and no minus *)
let toks = add_space toks in
let toks = add_newlines toks tu in
let toks = fix_tokens toks in
- toks in
+ toks in
(* in theory here could reparse and rework the ast! or
* apply some SP. Not before cos julia may have generated