1 (* Yoann Padioleau, Julia Lawall
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
16 * Modifications by Julia Lawall for better newline handling.
22 module TH
= Token_helpers
25 (* should keep comments and directives in between adjacent deleted terms,
26 but not comments and directives within deleted terms. should use the
27 labels found in the control-flow graph *)
31 (*****************************************************************************)
33 (*****************************************************************************)
34 let pr2, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_unparsing
36 (*****************************************************************************)
37 (* Types used during the intermediate phases of the unparsing *)
38 (*****************************************************************************)
42 | T1
of Parser_c.token
44 (* The cocci_tag of the token should always be a NOTHING. The mark of
45 * the token can only be OriginTok or ExpandedTok. Why not get rid of
46 * token and get something simpler ? because we need to know if the
47 * info is a TCommentCpp or TCommentSpace, etc for some of the further
48 * analysis so easier to keep with the token.
50 * This type contains the whole information. Have all the tokens with this
54 Min
of (int list
(* match numbers *) * int (* adjacency information *))
58 | T2
of Parser_c.token
* min
*
59 int option (* orig index, abstracting away comments and space *)
61 | Cocci2
of string * int (* line *) * int (* lcol *) * int (* rcol *)
62 * Unparse_cocci.nlhint
option
65 | Unindent_cocci2
of bool (* true for permanent, false for temporary *)
69 | T3
of Parser_c.token
74 (* similar to the tech in parsing_hack *)
75 type token_extended
= {
78 idx
: int option; (* to know if 2 tokens were consecutive in orig file *)
79 mutable new_tokens_before
: token2 list
;
80 mutable remove
: bool;
84 (*****************************************************************************)
86 (*****************************************************************************)
88 let info_of_token1 t
=
91 | T1 tok
-> TH.info_of_tok tok
93 let print_token1 = function
94 T1 tok
-> TH.str_of_tok tok
95 | Fake1 info
-> "fake"
97 let str_of_token2 = function
98 | T2
(t
,_
,_
) -> TH.str_of_tok t
100 | Cocci2
(s
,_
,_
,_
,_
) -> s
102 | Indent_cocci2
-> ""
103 | Unindent_cocci2 _
-> ""
105 let print_token2 = function
110 Printf.sprintf
"-%d[%s]" adj
111 (String.concat
" " (List.map string_of_int index
))
113 "T2:"^
b_str^
TH.str_of_tok t
115 | Cocci2
(s
,_
,lc
,rc
,_
) -> Printf.sprintf
"Cocci2:%d:%d%s" lc rc s
117 | Indent_cocci2
-> "Indent"
118 | Unindent_cocci2 _
-> "Unindent"
120 let simple_print_all_tokens1 l
=
121 List.iter
(function x
-> Printf.printf
"|%s| " (print_token1 x
)) l
;
124 let simple_print_all_tokens2 l
=
125 List.iter
(function x
-> Printf.printf
"|%s| " (print_token2 x
)) l
;
128 let str_of_token3 = function
129 | T3 t
-> TH.str_of_tok t
130 | Cocci3 s
| C3 s
-> s
134 let mk_token_extended x
=
137 | T2
(_
,_
, idx
) -> idx
141 str
= str_of_token2 x
;
143 new_tokens_before
= [];
147 let rebuild_tokens_extented toks_ext
=
148 let _tokens = ref [] in
149 toks_ext
+> List.iter
(fun tok
->
150 tok
.new_tokens_before
+> List.iter
(fun x
-> push2 x
_tokens);
151 if not tok
.remove
then push2 tok
.tok2
_tokens;
153 let tokens = List.rev
!_tokens in
154 (tokens +> List.map
mk_token_extended)
157 let mcode_contain_plus = function
158 | Ast_cocci.CONTEXT
(_
,Ast_cocci.NOTHING
) -> false
159 | Ast_cocci.CONTEXT _
-> true
160 (* patch: when need full coccinelle transformation *)
161 | Ast_cocci.MINUS
(_
,_
,_
,[]) -> false
162 | Ast_cocci.MINUS
(_
,_
,_
,x
::xs
) -> true
163 | Ast_cocci.PLUS _
-> raise Impossible
165 let contain_plus info
=
166 let mck = Ast_c.mcode_of_info info
in
167 mcode_contain_plus mck
169 (*****************************************************************************)
170 (* Last fix on the ast *)
171 (*****************************************************************************)
173 (* Because of the ugly trick to handle initialiser, I generate fake ','
174 * for the last initializer element, but if there is nothing around it,
175 * I don't want in the end to print it.
178 let remove_useless_fakeInfo_struct program
=
179 let bigf = { Visitor_c.default_visitor_c_s
with
180 Visitor_c.kini_s
= (fun (k
,bigf) ini
->
182 | InitList args
, ii
->
185 | [i1
;i2
;iicommaopt
] ->
186 if (not
(contain_plus iicommaopt
)) && (not
(contain_plus i2
))
187 && (Ast_c.is_fake iicommaopt
)
188 (* sometimes the guy put a normal iicommaopt *)
189 then InitList args
, [i1
;i2
]
190 else InitList args
, [i1
;i2
;iicommaopt
]
191 | [i1
;i2
;iicommaopt
;end_comma_opt
] ->
192 (* only in #define. end_comma_opt canot be fake *)
193 (* not sure if this will be considered ambiguous with a previous
195 if (not
(contain_plus iicommaopt
)) && (not
(contain_plus i2
))
196 && (Ast_c.is_fake iicommaopt
)
197 (* sometimes the guy put a normal iicommaopt *)
198 then InitList args
, [i1
;i2
;end_comma_opt
]
199 else InitList args
, [i1
;i2
;iicommaopt
;end_comma_opt
]
200 | _
-> raise Impossible
205 Visitor_c.vk_toplevel_s
bigf program
208 (*****************************************************************************)
209 (* Tokens1 generation *)
210 (*****************************************************************************)
212 let get_fakeInfo_and_tokens celem toks
=
213 let toks_in = ref toks
in
214 let toks_out = ref [] in
216 (* todo? verify good order of position ? *)
218 match Ast_c.pinfo_of_info info
with
220 Common.push2
(Fake1 info
) toks_out
221 | OriginTok _
| ExpandedTok _
->
222 (* get the associated comments/space/cppcomment tokens *)
223 let (before
, x
, after
) =
224 !toks_in +> Common.split_when
(fun tok
->
225 info
=*= TH.info_of_tok tok
)
227 assert(info
=*= TH.info_of_tok x
);
228 (*old: assert(before +> List.for_all (TH.is_comment)); *)
229 before
+> List.iter
(fun x
->
230 if not
(TH.is_comment x
)
231 then pr2 ("WEIRD: not a comment:" ^
TH.str_of_tok x
)
232 (* case such as int asm d3("x"); not yet in ast *)
234 before
+> List.iter
(fun x
-> Common.push2
(T1 x
) toks_out);
235 push2
(T1 x
) toks_out;
237 | AbstractLineTok _
->
238 (* can be called on type info when for instance use -type_c *)
239 if !Flag_parsing_c.pretty_print_type_info
240 then Common.push2
(Fake1 info
) toks_out
241 else raise Impossible
(* at this stage *)
244 let pr_space _
= () in (* use the spacing that is there already *)
246 Pretty_print_c.pp_program_gen
pr_elem pr_space celem
;
248 if not
(null
!toks_in)
249 then failwith
"WEIRD: unparsing not finished";
253 (* Fake nodes that have BEFORE code should be moved over any subsequent
254 whitespace and newlines, but not any comments, to get as close to the affected
255 code as possible. Similarly, fake nodes that have AFTER code should be moved
256 backwards. No fake nodes should have both before and after code. *)
258 let displace_fake_nodes toks
=
259 let is_fake = function Fake1 _
-> true | _
-> false in
260 let is_whitespace = function
261 T1
(Parser_c.TCommentSpace _
)
263 | T1
(Parser_c.TCommentNewline _
) -> true
267 try Some
(Common.split_when
is_fake toks
)
268 with Not_found
-> None
in
270 Some
(bef
,((Fake1 info
) as fake
),aft
) ->
271 (match !(info
.cocci_tag
) with
274 (Ast_cocci.CONTEXT
(_
,Ast_cocci.BEFORE _
),_
) ->
275 (* move the fake node forwards *)
276 let (whitespace
,rest
) = Common.span
is_whitespace aft
in
277 bef
@ whitespace
@ fake
:: (loop rest
)
278 | (Ast_cocci.CONTEXT
(_
,Ast_cocci.AFTER _
),_
) ->
279 (* move the fake node backwards *)
280 let revbef = List.rev bef
in
281 let (revwhitespace
,revprev
) = Common.span
is_whitespace revbef in
282 let whitespace = List.rev revwhitespace
in
283 let prev = List.rev revprev
in
284 prev @ fake
:: (loop (whitespace @ aft
))
285 | (Ast_cocci.CONTEXT
(_
,Ast_cocci.NOTHING
),_
) ->
286 bef
@ fake
:: (loop aft
)
287 | (Ast_cocci.CONTEXT
(_
,Ast_cocci.BEFOREAFTER _
),_
) ->
288 failwith
"fake node should not be before-after"
289 | _
-> bef
@ fake
:: (loop aft
) (* old: was removed when have simpler yacfe *)
292 bef
@ fake
:: (loop aft
)
295 | _
-> raise Impossible
in
298 (*****************************************************************************)
299 (* Tokens2 generation *)
300 (*****************************************************************************)
302 let comment2t2 = function
304 (* not sure iif the following list is exhaustive or complete *)
305 (Token_c.CppAttr
|Token_c.CppMacro
|Token_c.CppPassingCosWouldGetError
),
306 (info
: Token_c.info
)) ->
308 | (Token_c.TCommentCpp x
,(info
: Token_c.info
)) ->
309 C2
("\n"^info
.Common.str^
"\n")
310 | x
-> failwith
(Printf.sprintf
"unexpected comment %s" (Common.dump x
))
312 let expand_mcode toks
=
313 let toks_out = ref [] in
317 let add_elem t minus
=
320 let str = Ast_c.str_of_info info
in
322 then push2
(Fake2
) toks_out
323 (* perhaps the fake ',' *)
324 else push2
(C2
str) toks_out
328 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
329 (* no tag on expandedTok ! *)
330 let modified = function
332 | Some
(Ast_cocci.CONTEXT
(pos
,Ast_cocci.NOTHING
),l
) -> false
334 (if (TH.is_expanded tok
&&
335 modified !((TH.info_of_tok tok
).cocci_tag
)
336 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
340 "expanded token %s on line %d is either modified or stored in a metavariable"
341 (TH.str_of_tok tok
) (TH.line_of_tok tok
)));
343 let tok'
= tok +> TH.visitor_info_of_tok
(fun i
->
344 { i
with cocci_tag
= ref Ast_c.emptyAnnot
; }
348 if TH.is_origin
tok && not
(TH.is_real_comment
tok)
356 push2
(T2
(tok'
, minus
, optindex)) toks_out
361 Ast_c.mcode_and_env_of_cocciref
((info_of_token1 t
).cocci_tag
) in
363 let pr_cocci s ln col rcol hint
=
364 push2
(Cocci2
(s
,ln
,col
,rcol
,hint
)) toks_out in
366 (match Ast_c.pinfo_of_info info
with
367 Ast_c.AbstractLineTok _
->
368 push2
(C2
(Ast_c.str_of_info info
)) toks_out
369 | Ast_c.FakeTok
(s
,_
) ->
370 push2
(C2 s
) toks_out
372 Printf.printf
"line: %s\n" (Common.dump info
);
373 failwith
"not an abstract line");
374 (!(info
.Ast_c.comments_tag
)).Ast_c.mafter
+>
375 List.iter
(fun x
-> Common.push2
(comment2t2 x
) toks_out) in
377 let pr_barrier ln col
= (* marks a position, used around C code *)
378 push2
(Cocci2
("",ln
,col
,col
,None
)) toks_out in
379 let pr_nobarrier ln col
= () in (* not needed for linux spacing *)
381 let pr_cspace _
= push2
(C2
" ") toks_out in
383 let pr_space _
= () (* rely on add_space in cocci code *) in
384 let pr_arity _
= () (* not interested *) in
386 let indent _
= push2 Indent_cocci2
toks_out in
387 let unindent x
= push2
(Unindent_cocci2 x
) toks_out in
390 (env
, pr_cocci, pr_c, pr_cspace,
391 (match !Flag_parsing_c.spacing
with
392 Flag_parsing_c.SMPL
-> pr_space | _
-> pr_cspace),
394 (match !Flag_parsing_c.spacing
with
395 Flag_parsing_c.SMPL
-> pr_barrier | _
-> pr_nobarrier),
398 (* old: when for yacfe with partial cocci:
402 (* patch: when need full coccinelle transformation *)
403 let unparser = Unparse_cocci.pp_list_list_any
args_pp false in
405 | Ast_cocci.MINUS
(_
,inst
,adj
,any_xxs
) ->
406 (* Why adding ? because I want to have all the information, the whole
407 * set of tokens, so I can then process and remove the
408 * is_between_two_minus for instance *)
409 add_elem t
(Min
(inst
,adj
));
410 unparser any_xxs
Unparse_cocci.InPlace
411 | Ast_cocci.CONTEXT
(_
,any_befaft
) ->
412 (match any_befaft
with
413 | Ast_cocci.NOTHING
->
415 | Ast_cocci.BEFORE
(xxs
,_
) ->
416 unparser xxs
Unparse_cocci.Before
;
418 | Ast_cocci.AFTER
(xxs
,_
) ->
420 unparser xxs
Unparse_cocci.After
;
421 | Ast_cocci.BEFOREAFTER
(xxs
, yys
, _
) ->
422 unparser xxs
Unparse_cocci.Before
;
424 unparser yys
Unparse_cocci.After
;
426 | Ast_cocci.PLUS _
-> raise Impossible
430 toks
+> List.iter
expand_info;
434 (*****************************************************************************)
435 (* Tokens2 processing, filtering, adjusting *)
436 (*****************************************************************************)
438 let is_space = function
439 | T2
(Parser_c.TCommentSpace _
,_b
,_i
) -> true (* only whitespace *)
442 let is_newline = function
443 | T2
(Parser_c.TCommentNewline _
,_b
,_i
) -> true
446 let is_whitespace = function
449 | Parser_c.TCommentSpace _
-> true (* only whitespace *)
450 | Parser_c.TCommentNewline _
(* newline plus whitespace *) -> true
455 let is_minusable_comment = function
458 | Parser_c.TCommentSpace _
(* only whitespace *)
459 (* patch: coccinelle *)
460 | Parser_c.TCommentNewline _
(* newline plus whitespace *) -> true
461 | Parser_c.TComment _
when !Flag_parsing_c.keep_comments
-> false
462 | Parser_c.TComment _
463 | Parser_c.TCommentCpp
(Token_c.CppAttr
, _
)
464 | Parser_c.TCommentCpp
(Token_c.CppMacro
, _
)
465 | Parser_c.TCommentCpp
(Token_c.CppDirective
, _
) (* result was false *)
468 | Parser_c.TCommentMisc _
469 | Parser_c.TCommentCpp
(Token_c.CppPassingCosWouldGetError
, _
)
476 let is_minusable_comment_nocpp = function
479 | Parser_c.TCommentSpace _
(* only whitespace *)
480 (* patch: coccinelle *)
481 | Parser_c.TCommentNewline _
(* newline plus whitespace *) -> true
482 | Parser_c.TComment _
when !Flag_parsing_c.keep_comments
-> false
483 | Parser_c.TComment _
-> true
484 | Parser_c.TCommentCpp
(Token_c.CppAttr
, _
)
485 | Parser_c.TCommentCpp
(Token_c.CppMacro
, _
)
486 | Parser_c.TCommentCpp
(Token_c.CppDirective
, _
)
489 | Parser_c.TCommentMisc _
490 | Parser_c.TCommentCpp
(Token_c.CppPassingCosWouldGetError
, _
)
497 let all_coccis = function
498 Cocci2 _
| C2 _
| Indent_cocci2
| Unindent_cocci2 _
-> true
501 (*previously gave up if the first character was a newline, but not clear why*)
502 let is_minusable_comment_or_plus x
= is_minusable_comment x
or all_coccis x
504 let set_minus_comment adj
= function
506 let str = TH.str_of_tok t
in
508 | Parser_c.TCommentSpace _
509 (* patch: coccinelle *)
510 | Parser_c.TCommentNewline _
-> ()
512 | Parser_c.TComment _
513 | Parser_c.TCommentCpp
(Token_c.CppAttr
, _
)
514 | Parser_c.TCommentCpp
(Token_c.CppMacro
, _
)
515 | Parser_c.TCommentCpp
(Token_c.CppDirective
, _
)
517 pr2 (Printf.sprintf
"%d: ERASING_COMMENTS: %s"
518 (TH.line_of_tok t
) str)
519 | _
-> raise Impossible
522 (* patch: coccinelle *)
523 | T2
(t
,Min adj
,idx
) as x
-> x
524 | _
-> raise Impossible
526 let set_minus_comment_or_plus adj
= function
527 Cocci2 _
| C2 _
| Indent_cocci2
| Unindent_cocci2 _
as x
-> x
528 | x
-> set_minus_comment adj x
531 xs
+> Common.exclude
(function
532 | T2
(t
,Min adj
,_
) -> true
536 let remove_minus_and_between_and_expanded_and_fake xs
=
538 (* get rid of exampled and fake tok *)
539 let xs = xs +> Common.exclude
(function
540 | T2
(t
,_
,_
) when TH.is_expanded t
-> true
546 let minus_or_comment = function
547 T2
(_
,Min adj
,_
) -> true
548 | x
-> is_minusable_comment x
in
550 let minus_or_comment_nocpp = function
551 T2
(_
,Min adj
,_
) -> true
552 | x
-> is_minusable_comment_nocpp x
in
554 let common_adj (index1
,adj1
) (index2
,adj2
) =
555 adj1
= adj2
(* same adjacency info *) &&
556 (* non-empty intersection of witness trees *)
557 not
((Common.inter_set index1 index2
) = []) in
559 let rec adjust_around_minus = function
561 | (T2
(Parser_c.TCommentNewline c
,_b
,_i
) as x
)::
562 (((T2
(_
,Min adj
,_
))::_
) as rest
) ->
563 (* an initial newline, as in a replaced statement *)
564 let (between_minus
,rest
) = Common.span
minus_or_comment rest
in
566 [] -> (set_minus_comment adj x
) ::
567 (List.map
(set_minus_comment adj
) between_minus
)
568 | T2
(_
,Ctx
,_
)::_
when is_newline (List.hd
(List.rev between_minus
)) ->
569 (set_minus_comment adj x
)::(adjust_within_minus between_minus
) @
570 (adjust_around_minus rest
)
572 x
:: (adjust_within_minus between_minus
) @
573 (adjust_around_minus rest
))
574 | ((T2
(_
,Min adj
,_
))::_
) as rest
->
575 (* no initial newline, as in a replaced expression *)
576 let (between_minus
,rest
) = Common.span
minus_or_comment rest
in
579 (List.map
(set_minus_comment adj
) between_minus
)
581 (adjust_within_minus between_minus
) @
582 (adjust_around_minus rest
))
583 | x
::xs -> x
::adjust_around_minus xs
584 and adjust_within_minus
= function
586 | (T2
(_
,Min adj1
,_
) as t1
)::xs ->
587 let (between_minus
,rest
) = Common.span
is_minusable_comment xs in
590 (* keep last newline *)
594 Common.split_when
is_newline between_minus
in
596 with Not_found
-> (between_minus
,[]) in
598 List.map
(set_minus_comment_or_plus adj1
) drop
@
600 | (T2
(_
,Min adj2
,_
) as t2
)::rest
when common_adj adj1 adj2
->
602 List.map
(set_minus_comment_or_plus adj1
) between_minus
@
603 adjust_within_minus
(t2
::rest
)
605 t1
::(between_minus
@ adjust_within_minus
(x
::xs)))
606 | _
-> failwith
"only minus and space possible" in
608 (* new idea: collects regions not containing non-space context code
609 if two adjacent adjacent minus tokens satisfy common_adj then delete
610 all spaces, comments etc between them
611 if two adjacent minus tokens do not satisfy common_adj only delete
612 the spaces between them if there are no comments, etc.
613 if the region contain no plus code and is both preceded and followed
614 by a newline, delete the initial newline. *)
616 let rec adjust_around_minus = function
618 | (T2
(Parser_c.TCommentNewline c
,_b
,_i
) as x
)::
619 (T2
(_
,Min adj1
,_
) as t1
)::xs ->
620 let (minus_list
,rest
) = Common.span not_context
(t1
::xs) in
621 let contains_plus = List.exists is_plus minus_list
in
623 match List.rev minus_list
with
624 (T2
(Parser_c.TCommentNewline c
,_b
,_i
))::rest
625 when List.for_all
minus_or_comment minus_list
->
626 set_minus_comment_or_plus adj1
x
628 x :: adjust_within_minus
contains_plus minus_list
@
629 adjust_around_minus rest
630 | (T2
(_
,Min adj1
,_
) as t1
)::xs ->
631 let (minus_list
,rest
) = Common.span not_context
(t1
::xs) in
632 let contains_plus = List.exists is_plus minus_list
in
633 adjust_within_minus
contains_plus minus_list
@ adjust_around_minus rest
634 | x::xs -> x :: adjust_around_minus xs
635 and adjust_within_minus cp
(* contains plus *) = function
636 (T2
(_
,Min adj1
,_
) as t1
)::xs ->
637 let not_minus = function T2
(_
,Min _
,_
) -> false | _
-> true in
638 let (not_minus_list
,rest
) = Common.span
not_minus xs in
641 (T2
(_
,Min adj2
,_
) as t2
)::xs when common_adj adj1 adj2
->
642 (List.map
(set_minus_comment_or_plus adj1
) not_minus_list
)
643 @ (adjust_within_minus cp
(t2
::xs))
644 | (T2
(_
,Min adj2
,_
) as t2
)::xs ->
645 let is_whitespace_or_plus = function
646 (T2 _
) as x -> is_whitespace x
647 | _
-> true (*plus*) in
648 if List.for_all
is_whitespace_or_plus not_minus_list
650 (List.map
(set_minus_comment_or_plus adj1
) not_minus_list
)
651 @ (adjust_within_minus cp
(t2
::xs))
652 else not_minus_list
@ (adjust_within_minus cp
(t2
::xs))
657 let (spaces
,rest
) = Common.span
is_space xs in
658 (List.map
(set_minus_comment_or_plus adj1
) spaces
)
660 | xs -> failwith
"should always start with minus"
661 and not_context
= function
662 (T2
(_
,Ctx
,_
) as x) when not
(is_minusable_comment x) -> false
664 and is_plus
= function
665 C2 _
| Cocci2 _
-> true
668 let xs = adjust_around_minus xs in
670 (* this drops blank lines after a brace introduced by removing code *)
671 let minus_or_comment_nonl = function
672 T2
(_
,Min adj
,_
) -> true
673 | T2
(Parser_c.TCommentNewline _
,_b
,_i
) -> false
674 | x -> is_minusable_comment x in
676 let rec adjust_after_brace = function
678 | ((T2
(_
,Ctx
,_
)) as x)::((T2
(_
,Min adj
,_
)::_
) as xs)
679 when str_of_token2 x =$
= "{" ->
680 let (between_minus
,rest
) = Common.span
minus_or_comment_nonl xs in
681 let is_whitespace = function
682 T2
(Parser_c.TCommentSpace _
,_b
,_i
)
684 | T2
(Parser_c.TCommentNewline _
,_b
,_i
) -> true
686 let (newlines
,rest
) = Common.span
is_whitespace rest
in
687 let (drop_newlines
,last_newline
) =
688 let rec loop = function
690 | ((T2
(Parser_c.TCommentNewline _
,_b
,_i
)) as x) :: rest
->
693 let (drop_newlines
,last_newline
) = loop xs in
694 (drop_newlines
,x::last_newline
) in
695 loop (List.rev newlines
) in
696 x::between_minus
@(List.map
(set_minus_comment adj
) drop_newlines
)@
698 adjust_after_brace rest
699 | x::xs -> x::adjust_after_brace xs in
701 let xs = adjust_after_brace xs in
703 (* search backwards from context } over spaces until reaching a newline.
704 then go back over all minus code until reaching some context or + code.
705 get rid of all intervening spaces, newlines, and comments
707 let rec adjust_before_brace = function
709 | ((T2
(t
,Ctx
,_
)) as x)::xs when str_of_token2 x =$
= "}" or is_newline x ->
710 let (outer_spaces
,rest
) = Common.span
is_space xs in
713 ((T2
(Parser_c.TCommentNewline _
,Ctx
,_i
)) as h
) ::
714 (* the rest of this code is the same as from_newline below
715 but merging them seems to be error prone... *)
716 ((T2
(t
, Min adj
, idx
)) as m
) :: rest
->
717 let (spaces
,rest
) = Common.span
minus_or_comment_nocpp rest
in
719 (List.map
(set_minus_comment adj
) spaces
) @
720 (adjust_before_brace rest
)
721 | _
-> adjust_before_brace rest
)
722 | x::xs -> x :: (adjust_before_brace xs) in
724 let from_newline = function
725 ((T2
(t
, Min adj
, idx
)) as m
) :: rest
->
726 let (spaces
,rest
) = Common.span
minus_or_comment_nocpp rest
in
728 (List.map
(set_minus_comment adj
) spaces
) @
729 (adjust_before_brace rest
)
730 | rest
-> adjust_before_brace rest
in
732 let xs = List.rev
(from_newline (List.rev
xs)) in
733 let xs = drop_minus xs in
736 (* normally, in C code, a semicolon is not preceded by a space or newline *)
737 let adjust_before_semicolon toks
=
738 let toks = List.rev
toks in
739 let rec search_semic = function
741 | ((T2
(_
,Ctx
,_
)) as x)::xs | ((Cocci2 _
) as x)::xs ->
742 if List.mem
(str_of_token2 x) [";";")";","]
743 then x :: search_minus
false xs
744 else x :: search_semic xs
745 | x::xs -> x :: search_semic xs
746 and search_minus seen_minus
xs =
747 let (spaces
, rest
) = Common.span
is_space xs in
748 (* only delete spaces if something is actually deleted *)
750 ((T2
(_
,Min _
,_
)) as a
)::rerest
-> a
:: search_minus
true rerest
751 | _
-> if seen_minus
then rest
else xs in
752 List.rev
(search_semic toks)
754 let is_ident_like s
= s
==~
Common.regexp_alpha
756 let rec drop_space_at_endline = function
759 | ((T2
(Parser_c.TCommentSpace _
,Ctx
,_i
)) as a
)::rest
->
760 let (outer_spaces
,rest
) = Common.span
is_space rest
in
761 let minus_or_comment_or_space_nocpp = function
762 T2
(_
,Min adj
,_
) -> true
763 | (T2
(Parser_c.TCommentSpace _
,Ctx
,_i
)) -> true
764 | (T2
(Parser_c.TCommentNewline _
,Ctx
,_i
)) -> false
765 | x -> is_minusable_comment_nocpp x in
766 let (minus
,rest
) = Common.span
minus_or_comment_or_space_nocpp rest
in
767 (match (minus
,rest
) with
768 ([],_
) -> a
::outer_spaces
@(drop_space_at_endline rest
)
769 | (_
,(((T2
(Parser_c.TCommentNewline _
,Ctx
,_i
)) as a
) :: rest
)) ->
770 (* drop trailing spaces *)
771 minus
@a
::(drop_space_at_endline rest
)
772 | _
-> a
:: outer_spaces
@ minus
@ (drop_space_at_endline rest
))
773 | a
:: rest
-> a
:: drop_space_at_endline rest
775 (* if a removed ( is between two tokens, then add a space *)
776 let rec paren_to_space = function
780 | ((T2
(_
,Ctx
,_
)) as a
)::((T2
(t
,Min _
,_
)) as b
)::((T2
(_
,Ctx
,_
)) as c
)::rest
781 when not
(is_whitespace a
) && TH.str_of_tok t
= "(" ->
782 simple_print_all_tokens2 [a
;b
;c
];
783 a
:: b
:: (C2
" ") :: (paren_to_space (c
:: rest
))
784 | a
:: rest
-> a
:: (paren_to_space rest
)
786 let rec add_space xs =
790 | (Cocci2
(sx
,lnx
,_
,rcolx
,_
) as x)::((Cocci2
(sy
,lny
,lcoly
,_
,_
)) as y
)::xs
791 when !Flag_parsing_c.spacing
= Flag_parsing_c.SMPL
&&
792 not
(lnx
= -1) && lnx
= lny
&& not
(rcolx
= -1) && rcolx
< lcoly
->
793 (* this only works within a line. could consider whether
794 something should be done to add newlines too, rather than
795 printing them explicitly in unparse_cocci. *)
796 x::C2
(String.make
(lcoly
-rcolx
) ' '
)::add_space (y
::xs)
798 let sx = str_of_token2 x in
799 let sy = str_of_token2 y
in
800 if is_ident_like sx && is_ident_like sy
801 then x::C2
" "::(add_space (y
::xs))
802 else x::(add_space (y
::xs))
804 (* The following only works for the outermost function call. Stack records
805 the column of all open parentheses. Space_cell contains the most recent
806 comma in the outermost function call. The goal is to decide whether this
807 should be followed by a space or a newline and indent. *)
808 let add_newlines toks tabbing_unit
=
809 let create_indent n
=
811 match tabbing_unit
with
812 Some
("\t",_
) -> ("\t",8)
813 | Some
("",_
) -> ("\t",8) (* not sure why... *)
814 | Some
(s
,_
) -> (s
,String.length s
) (* assuming only spaces *)
815 | None
-> ("\t",8) in
818 then tu ^
loop (seen
+ tlen
)
819 else String.make
(n
-seen
) ' '
in
821 let check_for_newline count
x = function
822 Some
(start
,space_cell
) when count
> Flag_parsing_c.max_width
->
823 space_cell
:= "\n"^
(create_indent x);
824 Some
(x + (count
- start
))
826 (* the following is for strings that may contain newline *)
827 let string_length s count
=
828 let l = list_of_string s
in
836 let rec loop info count
= function
838 | ((T2
(tok,_
,_
)) as a
)::xs ->
839 a
:: loop info
(string_length (TH.str_of_tok
tok) count
) xs
840 | ((Cocci2
(s
,line
,lcol
,rcol
,hint
)) as a
)::xs ->
841 let (stack
,space_cell
) = info
in
844 None
-> loop info
(count
+ (String.length s
)) xs
845 | Some
Unparse_cocci.StartBox
->
846 let count = count + (String.length s
) in
847 loop (count::stack
,space_cell
) count xs
848 | Some
Unparse_cocci.EndBox
->
849 let count = count + (String.length s
) in
852 (match check_for_newline count x space_cell
with
853 Some
count -> loop ([],None
) count xs
854 | None
-> loop ([],None
) count xs)
855 | _
-> loop (List.tl stack
,space_cell
) count xs)
856 | Some
(Unparse_cocci.SpaceOrNewline sp
) ->
857 let count = count + (String.length s
) + 1 (*space*) in
860 (match check_for_newline count x space_cell
with
861 Some
count -> loop (stack
,Some
(x,sp
)) count xs
862 | None
-> loop (stack
,Some
(count,sp
)) count xs)
863 | _
-> loop info
count xs) in
865 | ((C2
(s
)) as a
)::xs -> a
:: loop info
(string_length s
count) xs
866 | Fake2
:: _
| Indent_cocci2
:: _
867 | Unindent_cocci2 _
::_
->
868 failwith
"unexpected fake, indent, or unindent" in
869 let redo_spaces prev = function
870 Cocci2
(s
,line
,lcol
,rcol
,Some
(Unparse_cocci.SpaceOrNewline sp
)) ->
871 C2
!sp
:: Cocci2
(s
,line
,lcol
,rcol
,None
) :: prev
873 (match !Flag_parsing_c.spacing
with
874 Flag_parsing_c.SMPL
-> toks
875 | _
-> List.rev
(List.fold_left
redo_spaces [] (loop ([],None
) 0 toks)))
877 (* When insert some new code, because of a + in a SP, we must add this
878 * code at the right place, with the good indentation. So each time we
879 * encounter some spacing info, with some newline, we maintain the
880 * current indentation level used.
882 * TODO problems: not accurate. ex: TODO
884 * TODO: if in #define region, should add a \ \n
886 let new_tabbing2 space
=
887 (list_of_string space
)
889 +> Common.take_until
(fun c
-> c
=<= '
\n'
)
891 +> List.map string_of_char
895 Common.profile_code
"C unparsing.new_tabbing" (fun () -> new_tabbing2 a
)
898 let rec adjust_indentation xs =
900 let _current_tabbing = ref "" in
901 let tabbing_unit = ref None
in
903 let string_of_list l = String.concat
"" (List.map string_of_char
l) in
905 (* try to pick a tabbing unit for the plus code *)
906 let adjust_tabbing_unit old_tab new_tab
=
907 if !tabbing_unit =*= None
&& String.length new_tab
> String.length old_tab
909 let old_tab = list_of_string
old_tab in
910 let new_tab = list_of_string
new_tab in
911 let rec loop = function
913 tabbing_unit := Some
(string_of_list new_tab,List.rev
new_tab)
914 | (_
,[]) -> failwith
"not possible"
915 | (o
::os
,n
::ns
) -> loop (os
,ns
) in (* could check for equality *)
916 loop (old_tab,new_tab) in
918 let remtab tu current_tab
=
919 let current_tab = List.rev
(list_of_string
current_tab) in
920 let rec loop = function
921 ([],new_tab) -> string_of_list (List.rev
new_tab)
922 | (_
,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
923 | (t
::ts
,n
::ns
) when t
=<= n
-> loop (ts
,ns
)
924 | (_
,ns
) -> (* mismatch; remove what we can *)
925 string_of_list (List.rev ns
) in
926 loop (tu
,current_tab) in
928 let rec find_first_tab started
= function
930 | ((T2
(tok,_
,_
)) as x)::xs when str_of_token2 x =$
= "{" ->
931 find_first_tab true xs
932 (* patch: coccinelle *)
933 | ((T2
(Parser_c.TCommentNewline s
, _
, _
)) as x)::_
935 let s = str_of_token2 x +> new_tabbing in
936 tabbing_unit := Some
(s,List.rev
(list_of_string
s))
937 | x::xs -> find_first_tab started
xs in
938 find_first_tab false xs;
940 let rec aux started
xs =
943 (* patch: coccinelle *)
944 | ((T2
(tok,_
,_
)) as x)::(T2
(Parser_c.TCommentNewline
s, _
, _
))::
945 ((Cocci2
("{",_
,_
,_
,_
)) as a
)::xs
946 when started
&& str_of_token2 x =$
= ")" ->
947 (* to be done for if, etc, but not for a function header *)
948 x::(C2
" ")::a
::(aux started
xs)
949 | ((T2
(Parser_c.TCommentNewline
s, _
, _
)) as x)::xs ->
950 let old_tabbing = !_current_tabbing in
951 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
952 (* only trust the indentation after the first { *)
954 then adjust_tabbing_unit old_tabbing !_current_tabbing);
955 let coccis_rest = Common.span
all_coccis xs in
956 (match coccis_rest with
957 (_
::_
,((T2
(tok,_
,_
)) as y
)::_
) when str_of_token2 y
=$
= "}" ->
958 (* the case where cocci code has been added before a close } *)
959 x::aux started
(Indent_cocci2
::xs)
960 | _
-> x::aux started
xs)
961 | Indent_cocci2
::xs ->
962 (match !tabbing_unit with
963 None
-> aux started
xs
965 _current_tabbing := (!_current_tabbing)^tu
;
966 Cocci2
(tu
,-1,-1,-1,None
)::aux started
xs)
967 | Unindent_cocci2
(permanent
)::xs ->
968 (match !tabbing_unit with
969 None
-> aux started
xs
971 _current_tabbing := remtab tu
(!_current_tabbing);
973 (* border between existing code and cocci code *)
974 | ((T2
(tok,_
,_
)) as x)::((Cocci2
("\n",_
,_
,_
,_
)) as y
)::xs
975 when str_of_token2 x =$
= "{" ->
976 x::aux true (y
::Indent_cocci2
::xs)
977 | ((Cocci2 _
) as x)::((T2
(tok,_
,_
)) as y
)::xs
978 when str_of_token2 y
=$
= "}" ->
979 x::aux started
(y
::Unindent_cocci2
true::xs)
980 (* starting the body of the function *)
981 | ((T2
(tok,_
,_
)) as x)::xs when str_of_token2 x =$
= "{" -> x::aux true xs
982 | ((Cocci2
("{",_
,_
,_
,_
)) as a
)::xs -> a
::aux true xs
983 | ((Cocci2
("\n",_
,_
,_
,_
)) as x)::Unindent_cocci2
(false)::xs ->
985 | ((Cocci2
("\n",_
,_
,_
,_
)) as x)::xs ->
986 (* dont inline in expr because of weird eval order of ocaml *)
987 let s = !_current_tabbing in
988 x::Cocci2
(s,-1,-1,-1,None
)::aux started
xs
989 | x::xs -> x::aux started
xs in
990 (aux false xs,!tabbing_unit)
993 let rec find_paren_comma = function
996 (* do nothing if was like this in original file *)
997 | ({ str = "("; idx
= Some p1
} as _x1
)::({ str = ","; idx
= Some p2
} as x2
)
998 ::xs when p2
=|= p1
+ 1 ->
999 find_paren_comma (x2
::xs)
1001 | ({ str = ","; idx
= Some p1
} as _x1
)::({ str = ","; idx
= Some p2
} as x2
)
1002 ::xs when p2
=|= p1
+ 1 ->
1003 find_paren_comma (x2
::xs)
1005 | ({ str = ","; idx
= Some p1
} as _x1
)::({ str = ")"; idx
= Some p2
} as x2
)
1006 ::xs when p2
=|= p1
+ 1 ->
1007 find_paren_comma (x2
::xs)
1009 (* otherwise yes can adjust *)
1010 | ({ str = "(" } as _x1
)::({ str = ","} as x2
)::xs ->
1012 find_paren_comma (x2
::xs)
1013 | ({ str = "," } as x1
)::({ str = ","} as x2
)::xs ->
1015 find_paren_comma (x2
::xs)
1017 | ({ str = "," } as x1
)::({ str = ")"} as x2
)::xs ->
1019 find_paren_comma (x2
::xs)
1025 let fix_tokens toks =
1026 let toks = toks +> List.map
mk_token_extended in
1028 let cleaner = toks +> Common.exclude
(function
1029 | {tok2
= T2
(t
,_
,_
)} -> TH.is_real_comment t
(* I want the ifdef *)
1032 find_paren_comma cleaner;
1034 let toks = rebuild_tokens_extented toks in
1035 toks +> List.map
(fun x -> x.tok2
)
1039 (*****************************************************************************)
1040 (* Final unparsing (and debugging support) *)
1041 (*****************************************************************************)
1044 type kind_token2
= KFake
| KCocci
| KC
| KExpanded
| KOrigin
1046 let kind_of_token2 = function
1048 | Cocci2 _
-> KCocci
1051 (match TH.pinfo_of_tok t
with
1052 | ExpandedTok _
-> KExpanded
1053 | OriginTok _
-> KOrigin
1054 | FakeTok _
-> raise Impossible
(* now a Fake2 *)
1055 | AbstractLineTok _
-> raise Impossible
(* now a KC *)
1057 | Unindent_cocci2 _
| Indent_cocci2
-> raise Impossible
1061 let start_mark = function
1065 | KExpanded
-> "!E!"
1068 let print_all_tokens2 pr
xs =
1069 if !Flag_parsing_c.debug_unparsing
1071 let current_kind = ref KOrigin
in
1072 xs +> List.iter
(fun t
->
1073 let newkind = kind_of_token2 t
in
1074 if newkind =*= !current_kind
1075 then pr
(str_of_token2 t
)
1078 pr
(start_mark newkind);
1079 pr
(str_of_token2 t
);
1080 current_kind := newkind
1084 xs +> List.iter
(fun x -> pr
(str_of_token2 x))
1089 (*****************************************************************************)
1091 (*****************************************************************************)
1093 (* old: PPviatok was made in the beginning to allow to pretty print a
1094 * complete C file, including a modified C file by transformation.ml,
1095 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1096 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1097 * fancy stuff when a function was not modified at all. Just need to
1098 * print the list of token as-is. But now pretty_print_c.ml handles
1099 * almost everything so maybe less useful. Maybe PPviatok allows to
1100 * optimize a little the pretty printing.
1102 * update: now have PPviastr which goes even faster than PPviatok, so
1103 * PPviatok has disappeared.
1106 type ppmethod
= PPnormal
| PPviastr
1111 (* The pp_program function will call pretty_print_c.ml with a special
1112 * function to print the leaf components, the tokens. When we want to
1113 * print a token, we need to print also maybe the space and comments that
1114 * were close to it in the original file (and that was omitted during the
1115 * parsing phase), and honor what the cocci-info attached to the token says.
1116 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1117 * print it and also print some cocci-code attached in a PLUS to it.
1118 * So we will also maybe call unparse_cocci. Because the cocci-code may
1119 * contain metavariables, unparse_cocci will in fact sometimes call back
1120 * pretty_print_c (which will this time don't call back again unparse_cocci)
1123 let pp_program2 xs outfile
=
1124 Common.with_open_outfile outfile
(fun (pr
,chan
) ->
1126 if !Flag_parsing_c.debug_unparsing
1127 then begin pr2_no_nl
s; flush stderr
end
1130 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1133 xs +> List.iter
(fun ((e
,(str, toks_e
)), ppmethod
) ->
1134 (* here can still work on ast *)
1135 let e = remove_useless_fakeInfo_struct e in
1139 (* now work on tokens *)
1141 (* phase1: just get all the tokens, all the information *)
1142 assert(toks_e
+> List.for_all
(fun t
->
1143 TH.is_origin t
or TH.is_expanded t
1145 let toks = get_fakeInfo_and_tokens e toks_e
in
1146 let toks = displace_fake_nodes toks in
1147 (* assert Origin;ExpandedTok;Faketok *)
1148 let toks = expand_mcode toks in
1149 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1150 * and no tag information, just NOTHING. *)
1153 if !Flag.sgrep_mode2
1154 then drop_minus toks (* nothing to do for sgrep *)
1156 (* phase2: can now start to filter and adjust *)
1157 let (toks,tu
) = adjust_indentation toks in
1158 let toks = adjust_before_semicolon toks in(*before remove minus*)
1159 let toks = drop_space_at_endline toks in
1160 let toks = paren_to_space toks in
1161 let toks = remove_minus_and_between_and_expanded_and_fake toks in
1162 (* assert Origin + Cocci + C and no minus *)
1163 let toks = add_space toks in
1164 let toks = add_newlines toks tu
in
1165 let toks = fix_tokens toks in
1168 (* in theory here could reparse and rework the ast! or
1169 * apply some SP. Not before cos julia may have generated
1170 * not parsable file. Need do unparsing_tricks call before being
1171 * ready to reparse. *)
1172 print_all_tokens2 pr toks;
1174 | PPviastr
-> pr str
1178 let pp_program a b
=
1179 Common.profile_code
"C unparsing" (fun () -> pp_program2 a b
)
1182 let pp_program_default xs outfile
=
1183 let xs'
= xs +> List.map
(fun x -> x, PPnormal
) in
1184 pp_program xs' outfile