1 (* Yoann Padioleau, Julia Lawall
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
16 * Modifications by Julia Lawall for better newline handling.
22 module TH
= Token_helpers
25 (* should keep comments and directives in between adjacent deleted terms,
26 but not comments and directives within deleted terms. should use the
27 labels found in the control-flow graph *)
31 (*****************************************************************************)
33 (*****************************************************************************)
34 let pr2, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_unparsing
36 (*****************************************************************************)
37 (* Types used during the intermediate phases of the unparsing *)
38 (*****************************************************************************)
42 | T1
of Parser_c.token
44 (* The cocci_tag of the token should always be a NOTHING. The mark of
45 * the token can only be OriginTok or ExpandedTok. Why not get rid of
46 * token and get something simpler ? because we need to know if the
47 * info is a TCommentCpp or TCommentSpace, etc for some of the further
48 * analysis so easier to keep with the token.
50 * This type contains the whole information. Have all the tokens with this
54 Min
of (int list
(* match numbers from witness trees *) *
55 int (* adjacency information *))
59 | T2
of Parser_c.token
* min
*
60 int option (* orig index, abstracting away comments and space *)
62 | Cocci2
of string * int (* line *) * int (* lcol *) * int (* rcol *)
63 * Unparse_cocci.nlhint
option
67 | Unindent_cocci2
of bool (* true for permanent, false for temporary *)
71 | T3
of Parser_c.token
76 (* similar to the tech in parsing_hack *)
77 type token_extended
= {
80 idx
: int option; (* to know if 2 tokens were consecutive in orig file *)
81 mutable new_tokens_before
: token2 list
;
82 mutable remove
: bool;
86 (*****************************************************************************)
88 (*****************************************************************************)
90 let info_of_token1 t
=
93 | T1 tok
-> TH.info_of_tok tok
95 let print_token1 = function
96 T1 tok
-> TH.str_of_tok tok
97 | Fake1 info
-> "fake"
99 let str_of_token2 = function
100 | T2
(t
,_
,_
) -> TH.str_of_tok t
102 | Cocci2
(s
,_
,_
,_
,_
) -> s
105 | Indent_cocci2
-> ""
106 | Unindent_cocci2 _
-> ""
108 let print_token2 = function
113 Printf.sprintf
"-%d[%s]" adj
114 (String.concat
" " (List.map string_of_int index
))
116 "T2:"^
b_str^
TH.str_of_tok t
118 | Cocci2
(s
,_
,lc
,rc
,_
) -> Printf.sprintf
"Cocci2:%d:%d%s" lc rc s
120 | Comma s
-> "Comma:"^s
121 | Indent_cocci2
-> "Indent"
122 | Unindent_cocci2 _
-> "Unindent"
124 let simple_print_all_tokens1 l
=
125 List.iter
(function x
-> Printf.printf
"|%s| " (print_token1 x
)) l
;
128 let simple_print_all_tokens2 l
=
129 List.iter
(function x
-> Printf.printf
"|%s| " (print_token2 x
)) l
;
132 let str_of_token3 = function
133 | T3 t
-> TH.str_of_tok t
134 | Cocci3 s
| C3 s
-> s
138 let mk_token_extended x
=
141 | T2
(_
,_
, idx
) -> idx
145 str
= str_of_token2 x
;
147 new_tokens_before
= [];
151 let rebuild_tokens_extented toks_ext
=
152 let _tokens = ref [] in
153 toks_ext
+> List.iter
(fun tok
->
154 tok
.new_tokens_before
+> List.iter
(fun x
-> push2 x
_tokens);
155 if not tok
.remove
then push2 tok
.tok2
_tokens;
157 let tokens = List.rev
!_tokens in
158 (tokens +> List.map
mk_token_extended)
161 let mcode_contain_plus = function
162 | Ast_cocci.CONTEXT
(_
,Ast_cocci.NOTHING
) -> false
163 | Ast_cocci.CONTEXT _
-> true
164 (* patch: when need full coccinelle transformation *)
165 | Ast_cocci.MINUS
(_
,_
,_
,[]) -> false
166 | Ast_cocci.MINUS
(_
,_
,_
,x
::xs
) -> true
167 | Ast_cocci.PLUS _
-> raise Impossible
169 let contain_plus info
=
170 let mck = Ast_c.mcode_of_info info
in
171 mcode_contain_plus mck
173 (*****************************************************************************)
174 (* Last fix on the ast *)
175 (*****************************************************************************)
177 (* Because of the ugly trick to handle initialiser, I generate fake ','
178 * for the last initializer element, but if there is nothing around it,
179 * I don't want in the end to print it.
182 let remove_useless_fakeInfo_struct program
=
183 let bigf = { Visitor_c.default_visitor_c_s
with
184 Visitor_c.kini_s
= (fun (k
,bigf) ini
->
186 | InitList args
, ii
->
189 | [i1
;i2
;iicommaopt
] ->
190 if (not
(contain_plus iicommaopt
)) && (not
(contain_plus i2
))
191 && (Ast_c.is_fake iicommaopt
)
192 (* sometimes the guy put a normal iicommaopt *)
193 then InitList args
, [i1
;i2
]
194 else InitList args
, [i1
;i2
;iicommaopt
]
195 | [i1
;i2
;iicommaopt
;end_comma_opt
] ->
196 (* only in #define. end_comma_opt canot be fake *)
197 (* not sure if this will be considered ambiguous with a previous
199 if (not
(contain_plus iicommaopt
)) && (not
(contain_plus i2
))
200 && (Ast_c.is_fake iicommaopt
)
201 (* sometimes the guy put a normal iicommaopt *)
202 then InitList args
, [i1
;i2
;end_comma_opt
]
203 else InitList args
, [i1
;i2
;iicommaopt
;end_comma_opt
]
204 | _
-> raise Impossible
209 Visitor_c.vk_toplevel_s
bigf program
212 (*****************************************************************************)
213 (* Tokens1 generation *)
214 (*****************************************************************************)
216 let get_fakeInfo_and_tokens celem toks
=
217 let toks_in = ref toks
in
218 let toks_out = ref [] in
220 (* todo? verify good order of position ? *)
222 match Ast_c.pinfo_of_info info
with
224 Common.push2
(Fake1 info
) toks_out
225 | OriginTok _
| ExpandedTok _
->
226 (* get the associated comments/space/cppcomment tokens *)
227 let (before
, x
, after
) =
228 !toks_in +> Common.split_when
(fun tok
->
229 info
=*= TH.info_of_tok tok
)
231 assert(info
=*= TH.info_of_tok x
);
232 (*old: assert(before +> List.for_all (TH.is_comment)); *)
233 before
+> List.iter
(fun x
->
234 if not
(TH.is_comment x
)
235 then pr2 ("WEIRD: not a comment:" ^
TH.str_of_tok x
)
236 (* case such as int asm d3("x"); not yet in ast *)
238 before
+> List.iter
(fun x
-> Common.push2
(T1 x
) toks_out);
239 push2
(T1 x
) toks_out;
241 | AbstractLineTok _
->
242 (* can be called on type info when for instance use -type_c *)
243 if !Flag_parsing_c.pretty_print_type_info
244 then Common.push2
(Fake1 info
) toks_out
245 else raise Impossible
(* at this stage *)
248 let pr_space _
= () in (* use the spacing that is there already *)
250 Pretty_print_c.pp_program_gen
pr_elem pr_space celem
;
252 if not
(null
!toks_in)
253 then failwith
"WEIRD: unparsing not finished";
257 (* Fake nodes that have BEFORE code should be moved over any subsequent
258 whitespace and newlines, but not any comments, to get as close to the affected
259 code as possible. Similarly, fake nodes that have AFTER code should be moved
260 backwards. No fake nodes should have both before and after code. *)
262 let displace_fake_nodes toks
=
263 let is_fake = function Fake1 _
-> true | _
-> false in
264 let is_whitespace = function
265 T1
(Parser_c.TCommentSpace _
)
267 | T1
(Parser_c.TCommentNewline _
) -> true
271 try Some
(Common.split_when
is_fake toks
)
272 with Not_found
-> None
in
274 Some
(bef
,((Fake1 info
) as fake
),aft
) ->
275 (match !(info
.cocci_tag
) with
278 (Ast_cocci.CONTEXT
(_
,Ast_cocci.BEFORE _
),_
) ->
279 (* move the fake node forwards *)
280 let (whitespace
,rest
) = Common.span
is_whitespace aft
in
281 bef
@ whitespace
@ fake
:: (loop rest
)
282 | (Ast_cocci.CONTEXT
(_
,Ast_cocci.AFTER _
),_
) ->
283 (* move the fake node backwards *)
284 let revbef = List.rev bef
in
285 let (revwhitespace
,revprev
) = Common.span
is_whitespace revbef in
286 let whitespace = List.rev revwhitespace
in
287 let prev = List.rev revprev
in
288 prev @ fake
:: (loop (whitespace @ aft
))
289 | (Ast_cocci.CONTEXT
(_
,Ast_cocci.NOTHING
),_
) ->
290 bef
@ fake
:: (loop aft
)
291 | (Ast_cocci.CONTEXT
(_
,Ast_cocci.BEFOREAFTER _
),_
) ->
292 failwith
"fake node should not be before-after"
293 | _
-> bef
@ fake
:: (loop aft
) (* old: was removed when have simpler yacfe *)
296 bef
@ fake
:: (loop aft
)
299 | _
-> raise Impossible
in
302 (*****************************************************************************)
303 (* Tokens2 generation *)
304 (*****************************************************************************)
306 let comment2t2 = function
308 (* not sure iif the following list is exhaustive or complete *)
309 (Token_c.CppAttr
|Token_c.CppMacro
|Token_c.CppPassingCosWouldGetError
),
310 (info
: Token_c.info
)) ->
312 | (Token_c.TCommentCpp x
,(info
: Token_c.info
)) ->
313 C2
("\n"^info
.Common.str^
"\n")
314 | x
-> failwith
(Printf.sprintf
"unexpected comment %s" (Common.dump x
))
316 let expand_mcode toks
=
317 let toks_out = ref [] in
321 let add_elem t minus
=
324 let str = Ast_c.str_of_info info
in
326 then push2
(Fake2
) toks_out
327 (* fx the fake "," at the end of a structure or enum.
328 no idea what other fake info there can be... *)
329 else push2
(Comma
str) toks_out
333 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
334 (* no tag on expandedTok ! *)
335 let modified = function
337 | Some
(Ast_cocci.CONTEXT
(pos
,Ast_cocci.NOTHING
),l
) -> false
339 (if (TH.is_expanded tok
&&
340 modified !((TH.info_of_tok tok
).cocci_tag
)
341 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
345 "expanded token %s on line %d is either modified or stored in a metavariable"
346 (TH.str_of_tok tok
) (TH.line_of_tok tok
)));
348 let tok'
= tok +> TH.visitor_info_of_tok
(fun i
->
349 { i
with cocci_tag
= ref Ast_c.emptyAnnot
; }
353 if TH.is_origin
tok && not
(TH.is_real_comment
tok)
361 push2
(T2
(tok'
, minus
, optindex)) toks_out
366 Ast_c.mcode_and_env_of_cocciref
((info_of_token1 t
).cocci_tag
) in
368 let pr_cocci s ln col rcol hint
=
369 push2
(Cocci2
(s
,ln
,col
,rcol
,hint
)) toks_out in
371 (match Ast_c.pinfo_of_info info
with
372 Ast_c.AbstractLineTok _
->
373 push2
(C2
(Ast_c.str_of_info info
)) toks_out
374 | Ast_c.FakeTok
(s
,_
) ->
375 push2
(C2 s
) toks_out
377 Printf.printf
"line: %s\n" (Common.dump info
);
378 failwith
"not an abstract line");
379 (!(info
.Ast_c.comments_tag
)).Ast_c.mafter
+>
380 List.iter
(fun x
-> Common.push2
(comment2t2 x
) toks_out) in
382 let pr_barrier ln col
= (* marks a position, used around C code *)
383 push2
(Cocci2
("",ln
,col
,col
,None
)) toks_out in
384 let pr_nobarrier ln col
= () in (* not needed for linux spacing *)
386 let pr_cspace _
= push2
(C2
" ") toks_out in
388 let pr_space _
= () (* rely on add_space in cocci code *) in
389 let pr_arity _
= () (* not interested *) in
391 let indent _
= push2 Indent_cocci2
toks_out in
392 let unindent x
= push2
(Unindent_cocci2 x
) toks_out in
395 (env
, pr_cocci, pr_c, pr_cspace,
396 (match !Flag_parsing_c.spacing
with
397 Flag_parsing_c.SMPL
-> pr_space | _
-> pr_cspace),
399 (match !Flag_parsing_c.spacing
with
400 Flag_parsing_c.SMPL
-> pr_barrier | _
-> pr_nobarrier),
403 (* old: when for yacfe with partial cocci:
407 (* patch: when need full coccinelle transformation *)
408 let unparser = Unparse_cocci.pp_list_list_any
args_pp false in
410 | Ast_cocci.MINUS
(_
,inst
,adj
,any_xxs
) ->
411 (* Why adding ? because I want to have all the information, the whole
412 * set of tokens, so I can then process and remove the
413 * is_between_two_minus for instance *)
414 add_elem t
(Min
(inst
,adj
));
415 unparser any_xxs
Unparse_cocci.InPlace
416 | Ast_cocci.CONTEXT
(_
,any_befaft
) ->
417 (match any_befaft
with
418 | Ast_cocci.NOTHING
->
420 | Ast_cocci.BEFORE
(xxs
,_
) ->
421 unparser xxs
Unparse_cocci.Before
;
423 | Ast_cocci.AFTER
(xxs
,_
) ->
425 unparser xxs
Unparse_cocci.After
;
426 | Ast_cocci.BEFOREAFTER
(xxs
, yys
, _
) ->
427 unparser xxs
Unparse_cocci.Before
;
429 unparser yys
Unparse_cocci.After
;
431 | Ast_cocci.PLUS _
-> raise Impossible
435 toks
+> List.iter
expand_info;
439 (*****************************************************************************)
440 (* Tokens2 processing, filtering, adjusting *)
441 (*****************************************************************************)
443 let is_space = function
444 | T2
(Parser_c.TCommentSpace _
,_b
,_i
) -> true (* only whitespace *)
447 let is_newline = function
448 | T2
(Parser_c.TCommentNewline _
,_b
,_i
) -> true
451 let is_whitespace = function
454 | Parser_c.TCommentSpace _
-> true (* only whitespace *)
455 | Parser_c.TCommentNewline _
(* newline plus whitespace *) -> true
460 let is_minusable_comment = function
463 | Parser_c.TCommentSpace _
(* only whitespace *)
464 (* patch: coccinelle *)
465 | Parser_c.TCommentNewline _
(* newline plus whitespace *) -> true
466 | Parser_c.TComment _
when !Flag_parsing_c.keep_comments
-> false
467 | Parser_c.TComment _
468 | Parser_c.TCommentCpp
(Token_c.CppAttr
, _
)
469 | Parser_c.TCommentCpp
(Token_c.CppMacro
, _
)
470 | Parser_c.TCommentCpp
(Token_c.CppDirective
, _
) (* result was false *)
473 | Parser_c.TCommentMisc _
474 | Parser_c.TCommentCpp
(Token_c.CppPassingCosWouldGetError
, _
)
481 let is_minusable_comment_nocpp = function
484 | Parser_c.TCommentSpace _
(* only whitespace *)
485 (* patch: coccinelle *)
486 | Parser_c.TCommentNewline _
(* newline plus whitespace *) -> true
487 | Parser_c.TComment _
when !Flag_parsing_c.keep_comments
-> false
488 | Parser_c.TComment _
-> true
489 | Parser_c.TCommentCpp
(Token_c.CppAttr
, _
)
490 | Parser_c.TCommentCpp
(Token_c.CppMacro
, _
)
491 | Parser_c.TCommentCpp
(Token_c.CppDirective
, _
)
494 | Parser_c.TCommentMisc _
495 | Parser_c.TCommentCpp
(Token_c.CppPassingCosWouldGetError
, _
)
502 let all_coccis = function
503 Cocci2 _
| C2 _
| Comma _
| Indent_cocci2
| Unindent_cocci2 _
-> true
506 (*previously gave up if the first character was a newline, but not clear why*)
507 let is_minusable_comment_or_plus x
= is_minusable_comment x
or all_coccis x
509 let set_minus_comment adj
= function
511 let str = TH.str_of_tok t
in
513 | Parser_c.TCommentSpace _
514 (* patch: coccinelle *)
515 | Parser_c.TCommentNewline _
-> ()
517 | Parser_c.TComment _
518 | Parser_c.TCommentCpp
(Token_c.CppAttr
, _
)
519 | Parser_c.TCommentCpp
(Token_c.CppMacro
, _
)
520 | Parser_c.TCommentCpp
(Token_c.CppDirective
, _
)
522 pr2 (Printf.sprintf
"%d: ERASING_COMMENTS: %s"
523 (TH.line_of_tok t
) str)
524 | _
-> raise Impossible
527 (* patch: coccinelle *)
528 | T2
(t
,Min adj
,idx
) as x
-> x
529 | _
-> raise Impossible
531 let set_minus_comment_or_plus adj
= function
532 Cocci2 _
| C2 _
| Comma _
| Indent_cocci2
| Unindent_cocci2 _
as x
-> x
533 | x
-> set_minus_comment adj x
536 xs
+> Common.exclude
(function
537 | T2
(t
,Min adj
,_
) -> true
541 let drop_expanded_and_fake xs
=
542 xs
+> Common.exclude
(function
543 | T2
(t
,_
,_
) when TH.is_expanded t
-> true
548 let remove_minus_and_between_and_expanded_and_fake xs
=
550 (* get rid of expanded and fake tok *)
551 let xs = drop_expanded_and_fake xs in
553 let minus_or_comment = function
554 T2
(_
,Min adj
,_
) -> true
555 | x
-> is_minusable_comment x
in
557 let minus_or_comment_nocpp = function
558 T2
(_
,Min adj
,_
) -> true
559 | x
-> is_minusable_comment_nocpp x
in
561 let common_adj (index1
,adj1
) (index2
,adj2
) =
562 adj1
= adj2
(* same adjacency info *) &&
563 (* non-empty intersection of witness trees *)
564 not
((Common.inter_set index1 index2
) = []) in
566 (* new idea: collects regions not containing non-space context code
567 if two adjacent adjacent minus tokens satisfy common_adj then delete
568 all spaces, comments etc between them
569 if two adjacent minus tokens do not satisfy common_adj only delete
570 the spaces between them if there are no comments, etc.
571 if the region contain no plus code and is both preceded and followed
572 by a newline, delete the initial newline. *)
574 let rec adjust_around_minus = function
576 | (T2
(Parser_c.TCommentNewline c
,_b
,_i
) as x
)::
577 (T2
(_
,Min adj1
,_
) as t1
)::xs ->
578 let (minus_list
,rest
) = Common.span not_context
(t1
::xs) in
579 let contains_plus = List.exists is_plus minus_list
in
581 match List.rev minus_list
with
582 (T2
(Parser_c.TCommentNewline c
,_b
,_i
))::rest
583 when List.for_all
minus_or_comment minus_list
->
584 set_minus_comment_or_plus adj1
x
586 x :: adjust_within_minus
contains_plus minus_list
@
587 adjust_around_minus rest
588 | (T2
(_
,Min adj1
,_
) as t1
)::xs ->
589 let (minus_list
,rest
) = Common.span not_context
(t1
::xs) in
590 let contains_plus = List.exists is_plus minus_list
in
591 adjust_within_minus
contains_plus minus_list
@ adjust_around_minus rest
593 x :: adjust_around_minus xs
594 and adjust_within_minus cp
(* contains plus *) = function
595 (T2
(_
,Min adj1
,_
) as t1
)::xs ->
596 let not_minus = function T2
(_
,Min _
,_
) -> false | _
-> true in
597 let (not_minus_list
,rest
) = Common.span
not_minus xs in
600 (T2
(_
,Min adj2
,_
) as t2
)::xs when common_adj adj1 adj2
->
601 (List.map
(set_minus_comment_or_plus adj1
) not_minus_list
)
602 @ (adjust_within_minus cp
(t2
::xs))
603 | (T2
(_
,Min adj2
,_
) as t2
)::xs ->
604 if not cp
&& List.for_all
is_whitespace not_minus_list
606 (List.map
(set_minus_comment_or_plus adj1
) not_minus_list
)
607 @ (adjust_within_minus cp
(t2
::xs))
609 not_minus_list
@ (adjust_within_minus cp
(t2
::xs))
614 let (spaces
,rest
) = Common.span
is_space xs in
615 (List.map
(set_minus_comment_or_plus adj1
) spaces
)
617 | xs -> failwith
"should always start with minus"
618 and not_context
= function
619 (T2
(_
,Ctx
,_
) as x) when not
(is_minusable_comment x) -> false
621 and is_plus
= function
622 C2 _
| Comma _
| Cocci2 _
-> true
625 let xs = adjust_around_minus xs in
627 (* this drops blank lines after a brace introduced by removing code *)
628 let minus_or_comment_nonl = function
629 T2
(_
,Min adj
,_
) -> true
630 | T2
(Parser_c.TCommentNewline _
,_b
,_i
) -> false
631 | x -> is_minusable_comment x in
633 let rec adjust_after_brace = function
635 | ((T2
(_
,Ctx
,_
)) as x)::((T2
(_
,Min adj
,_
)::_
) as xs)
636 when str_of_token2 x =$
= "{" ->
637 let (between_minus
,rest
) = Common.span
minus_or_comment_nonl xs in
638 let is_whitespace = function
639 T2
(Parser_c.TCommentSpace _
,_b
,_i
)
641 | T2
(Parser_c.TCommentNewline _
,_b
,_i
) -> true
643 let (newlines
,rest
) = Common.span
is_whitespace rest
in
644 let (drop_newlines
,last_newline
) =
645 let rec loop = function
647 | ((T2
(Parser_c.TCommentNewline _
,_b
,_i
)) as x) :: rest
->
650 let (drop_newlines
,last_newline
) = loop xs in
651 (drop_newlines
,x::last_newline
) in
652 loop (List.rev newlines
) in
653 x::between_minus
@(List.map
(set_minus_comment adj
) drop_newlines
)@
655 adjust_after_brace rest
656 | x::xs -> x::adjust_after_brace xs in
658 let xs = adjust_after_brace xs in
660 (* search backwards from context } over spaces until reaching a newline.
661 then go back over all minus code until reaching some context or + code.
662 get rid of all intervening spaces, newlines, and comments
664 let rec adjust_before_brace = function
666 | ((T2
(t
,Ctx
,_
)) as x)::xs when str_of_token2 x =$
= "}" or is_newline x ->
667 let (outer_spaces
,rest
) = Common.span
is_space xs in
670 ((T2
(Parser_c.TCommentNewline _
,Ctx
,_i
)) as h
) ::
671 (* the rest of this code is the same as from_newline below
672 but merging them seems to be error prone... *)
673 ((T2
(t
, Min adj
, idx
)) as m
) :: rest
->
674 let (spaces
,rest
) = Common.span
minus_or_comment_nocpp rest
in
676 (List.map
(set_minus_comment adj
) spaces
) @
677 (adjust_before_brace rest
)
678 | _
-> adjust_before_brace rest
)
679 | x::xs -> x :: (adjust_before_brace xs) in
681 let from_newline = function
682 ((T2
(t
, Min adj
, idx
)) as m
) :: rest
->
683 let (spaces
,rest
) = Common.span
minus_or_comment_nocpp rest
in
685 (List.map
(set_minus_comment adj
) spaces
) @
686 (adjust_before_brace rest
)
687 | ((T2
(t0
, Ctx
, idx0
)) as m0
) :: ((T2
(t
, Min adj
, idx
)) as m
) :: rest
688 when TH.str_of_tok t0
= "" ->
689 (* This is for the case of a #define that is completely deleted,
690 because a #define has a strange EOL token at the end.
691 We hope there i no other kind of token that is represented by
692 "", but it seems like changing the kind of token might break
693 the end of entity recognition in the C parser.
694 See parsing_hacks.ml *)
695 let (spaces
,rest
) = Common.span
minus_or_comment_nocpp rest
in
697 (List.map
(set_minus_comment adj
) spaces
) @
698 (adjust_before_brace rest
)
699 | rest
-> adjust_before_brace rest
in
701 let xs = List.rev
(from_newline (List.rev
xs)) in
702 let xs = drop_minus xs in
705 (* normally, in C code, a semicolon is not preceded by a space or newline *)
706 let adjust_before_semicolon toks
=
707 let toks = List.rev
toks in
708 let rec search_semic = function
710 | ((T2
(_
,Ctx
,_
)) as x)::xs | ((Cocci2 _
) as x)::xs ->
711 if List.mem
(str_of_token2 x) [";";")";","]
712 then x :: search_minus
false xs
713 else x :: search_semic xs
714 | x::xs -> x :: search_semic xs
715 and search_minus seen_minus
xs =
716 let (spaces
, rest
) = Common.span
is_space xs in
717 (* only delete spaces if something is actually deleted *)
719 ((T2
(_
,Min _
,_
)) as a
)::rerest
-> a
:: search_minus
true rerest
720 | _
-> if seen_minus
then rest
else xs in
721 List.rev
(search_semic toks)
723 let is_ident_like s
= s
==~
Common.regexp_alpha
725 let rec drop_space_at_endline = function
729 ((((T2
(Parser_c.TCommentSpace _
,Ctx
,_i
)) |
730 (T2
(Parser_c.TCommentNewline _
,Ctx
,_i
))) :: _
) as rest
) ->
731 (* when unparse_cocci doesn't know whether space is needed *)
732 drop_space_at_endline rest
733 | ((T2
(Parser_c.TCommentSpace _
,Ctx
,_i
)) as a
)::rest
->
734 let (outer_spaces
,rest
) = Common.span
is_space rest
in
735 let minus_or_comment_or_space_nocpp = function
736 T2
(_
,Min adj
,_
) -> true
737 | (T2
(Parser_c.TCommentSpace _
,Ctx
,_i
)) -> true
738 | (T2
(Parser_c.TCommentNewline _
,Ctx
,_i
)) -> false
740 let (minus
,rest
) = Common.span
minus_or_comment_or_space_nocpp rest
in
741 let fail _
= a
:: outer_spaces
@ minus
@ (drop_space_at_endline rest
) in
742 if List.exists
(function T2
(_
,Min adj
,_
) -> true | _
-> false) minus
745 ((T2
(Parser_c.TCommentNewline _
,Ctx
,_i
)) as a
)::rest
->
746 (* drop trailing spaces *)
747 minus
@a
::(drop_space_at_endline rest
)
750 | a
:: rest
-> a
:: drop_space_at_endline rest
752 (* if a removed ( is between two tokens, then add a space *)
753 let rec paren_to_space = function
757 | ((T2
(_
,Ctx
,_
)) as a
)::((T2
(t
,Min _
,_
)) as b
)::((T2
(_
,Ctx
,_
)) as c
)::rest
758 when not
(is_whitespace a
) && TH.str_of_tok t
= "(" ->
759 a
:: b
:: (C2
" ") :: (paren_to_space (c
:: rest
))
760 | a
:: rest
-> a
:: (paren_to_space rest
)
762 let rec add_space xs =
766 | (Cocci2
(sx
,lnx
,_
,rcolx
,_
) as x)::((Cocci2
(sy
,lny
,lcoly
,_
,_
)) as y
)::xs
767 when !Flag_parsing_c.spacing
= Flag_parsing_c.SMPL
&&
768 not
(lnx
= -1) && lnx
= lny
&& not
(rcolx
= -1) && rcolx
< lcoly
->
769 (* this only works within a line. could consider whether
770 something should be done to add newlines too, rather than
771 printing them explicitly in unparse_cocci. *)
772 x::C2
(String.make
(lcoly
-rcolx
) ' '
)::add_space (y
::xs)
773 | ((T2
(_
,Ctx
,_
)) as x)::((Cocci2 _
) as y
)::xs -> (* add space on boundary *)
774 let sx = str_of_token2 x in
775 let sy = str_of_token2 y
in
776 if is_ident_like sx && (is_ident_like sy or List.mem
sy ["="])
777 then x::C2
" "::(add_space (y
::xs))
778 else x::(add_space (y
::xs))
779 | x::y
::xs -> (* not boundary, not sure if it is possible *)
780 let sx = str_of_token2 x in
781 let sy = str_of_token2 y
in
782 if is_ident_like sx && is_ident_like sy
783 then x::C2
" "::(add_space (y
::xs))
784 else x::(add_space (y
::xs))
786 (* A fake comma is added at the end of an unordered initlist or a enum
787 decl, if the initlist or enum doesn't already end in a comma. This is only
788 needed if there is + code, ie if we see Cocci after it in the code sequence *)
790 let rec drop_end_comma = function
793 | ((Comma
",") as x) :: rest
->
794 let (newlines
,rest2
) = Common.span
is_whitespace rest
in
796 (Cocci2 _
) :: _
-> x :: drop_end_comma rest
797 | _
-> drop_end_comma rest
)
798 | x :: xs -> x :: drop_end_comma xs
800 (* The following only works for the outermost function call. Stack records
801 the column of all open parentheses. Space_cell contains the most recent
802 comma in the outermost function call. The goal is to decide whether this
803 should be followed by a space or a newline and indent. *)
804 let add_newlines toks tabbing_unit
=
805 let create_indent n
=
807 match tabbing_unit
with
808 Some
("\t",_
) -> ("\t",8)
809 | Some
("",_
) -> ("\t",8) (* not sure why... *)
810 | Some
(s
,_
) -> (s
,String.length s
) (* assuming only spaces *)
811 | None
-> ("\t",8) in
814 then tu ^
loop (seen
+ tlen
)
815 else String.make
(n
-seen
) ' '
in
817 let check_for_newline count
x = function
818 Some
(start
,space_cell
) when count
> Flag_parsing_c.max_width
->
819 space_cell
:= "\n"^
(create_indent x);
820 Some
(x + (count
- start
))
822 (* the following is for strings that may contain newline *)
823 let string_length s count
=
824 let l = list_of_string s
in
832 let rec loop info count
= function
834 | ((T2
(tok,_
,_
)) as a
)::xs ->
835 a
:: loop info
(string_length (TH.str_of_tok
tok) count
) xs
836 | ((Cocci2
(s
,line
,lcol
,rcol
,hint
)) as a
)::xs ->
837 let (stack
,space_cell
) = info
in
840 None
-> loop info
(count
+ (String.length s
)) xs
841 | Some
Unparse_cocci.StartBox
->
842 let count = count + (String.length s
) in
843 loop (count::stack
,space_cell
) count xs
844 | Some
Unparse_cocci.EndBox
->
845 let count = count + (String.length s
) in
848 (match check_for_newline count x space_cell
with
849 Some
count -> loop ([],None
) count xs
850 | None
-> loop ([],None
) count xs)
851 | _
-> loop (List.tl stack
,space_cell
) count xs)
852 | Some
(Unparse_cocci.SpaceOrNewline sp
) ->
853 let count = count + (String.length s
) + 1 (*space*) in
856 (match check_for_newline count x space_cell
with
857 Some
count -> loop (stack
,Some
(x,sp
)) count xs
858 | None
-> loop (stack
,Some
(count,sp
)) count xs)
859 | _
-> loop info
count xs) in
861 | ((C2
(s
)) as a
)::xs -> a
:: loop info
(string_length s
count) xs
862 | ((Comma
(s
)) as a
)::xs -> a
:: loop info
(string_length s
count) xs
863 | Fake2
:: _
| Indent_cocci2
:: _
864 | Unindent_cocci2 _
::_
->
865 failwith
"unexpected fake, indent, or unindent" in
866 let redo_spaces prev = function
867 Cocci2
(s
,line
,lcol
,rcol
,Some
(Unparse_cocci.SpaceOrNewline sp
)) ->
868 C2
!sp
:: Cocci2
(s
,line
,lcol
,rcol
,None
) :: prev
870 (match !Flag_parsing_c.spacing
with
871 Flag_parsing_c.SMPL
-> toks
872 | _
-> List.rev
(List.fold_left
redo_spaces [] (loop ([],None
) 0 toks)))
874 (* When insert some new code, because of a + in a SP, we must add this
875 * code at the right place, with the good indentation. So each time we
876 * encounter some spacing info, with some newline, we maintain the
877 * current indentation level used.
879 * TODO problems: not accurate. ex: TODO
881 * TODO: if in #define region, should add a \ \n
883 let new_tabbing2 space
=
884 (list_of_string space
)
886 +> Common.take_until
(fun c
-> c
=<= '
\n'
)
888 +> List.map string_of_char
892 Common.profile_code
"C unparsing.new_tabbing" (fun () -> new_tabbing2 a
)
895 let rec adjust_indentation xs =
897 let _current_tabbing = ref "" in
898 let tabbing_unit = ref None
in
900 let string_of_list l = String.concat
"" (List.map string_of_char
l) in
902 (* try to pick a tabbing unit for the plus code *)
903 let adjust_tabbing_unit old_tab new_tab
=
904 if !tabbing_unit =*= None
&& String.length new_tab
> String.length old_tab
906 let old_tab = list_of_string
old_tab in
907 let new_tab = list_of_string
new_tab in
908 let rec loop = function
910 tabbing_unit := Some
(string_of_list new_tab,List.rev
new_tab)
911 | (_
,[]) -> failwith
"not possible"
912 | (o
::os
,n
::ns
) -> loop (os
,ns
) in (* could check for equality *)
913 loop (old_tab,new_tab) in
915 let remtab tu current_tab
=
916 let current_tab = List.rev
(list_of_string
current_tab) in
917 let rec loop = function
918 ([],new_tab) -> string_of_list (List.rev
new_tab)
919 | (_
,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
920 | (t
::ts
,n
::ns
) when t
=<= n
-> loop (ts
,ns
)
921 | (_
,ns
) -> (* mismatch; remove what we can *)
922 string_of_list (List.rev ns
) in
923 loop (tu
,current_tab) in
925 let rec find_first_tab started
= function
927 | ((T2
(tok,_
,_
)) as x)::xs when str_of_token2 x =$
= "{" ->
928 find_first_tab true xs
929 (* patch: coccinelle *)
930 | ((T2
(Parser_c.TCommentNewline s
, _
, _
)) as x)::_
932 let s = str_of_token2 x +> new_tabbing in
933 tabbing_unit := Some
(s,List.rev
(list_of_string
s))
934 | x::xs -> find_first_tab started
xs in
935 find_first_tab false xs;
937 let rec balanced ct
= function
939 | ((T2
(tok,_
,_
)) as x)::xs ->
940 (match str_of_token2 x with
941 "(" -> balanced (ct
+1) xs
942 | ")" -> balanced (ct
-1) xs
943 | _
-> balanced ct
xs)
944 | x::xs -> balanced ct
xs in
946 let rec aux started
xs =
949 (* patch: coccinelle *)
950 | ((T2
(tok,_
,_
)) as x)::(T2
(Parser_c.TCommentNewline
s, _
, _
))::
951 ((Cocci2
("{",_
,_
,_
,_
)) as a
)::xs
952 when started
&& str_of_token2 x =$
= ")" ->
953 (* to be done for if, etc, but not for a function header *)
954 x::(C2
" ")::a
::(aux started
xs)
955 | ((T2
(Parser_c.TCommentNewline
s, _
, _
)) as x)::xs
956 when balanced 0 (fst
(Common.span
(function x -> not
(is_newline x)) xs)) ->
957 let old_tabbing = !_current_tabbing in
958 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
959 (* only trust the indentation after the first { *)
961 then adjust_tabbing_unit old_tabbing !_current_tabbing);
962 let coccis_rest = Common.span
all_coccis xs in
963 (match coccis_rest with
964 (_
::_
,((T2
(tok,_
,_
)) as y
)::_
) when str_of_token2 y
=$
= "}" ->
965 (* the case where cocci code has been added before a close } *)
966 x::aux started
(Indent_cocci2
::xs)
967 | _
-> x::aux started
xs)
968 | Indent_cocci2
::xs ->
969 (match !tabbing_unit with
970 None
-> aux started
xs
972 _current_tabbing := (!_current_tabbing)^tu
;
973 Cocci2
(tu
,-1,-1,-1,None
)::aux started
xs)
974 | Unindent_cocci2
(permanent
)::xs ->
975 (match !tabbing_unit with
976 None
-> aux started
xs
978 _current_tabbing := remtab tu
(!_current_tabbing);
980 (* border between existing code and cocci code *)
981 | ((T2
(tok,_
,_
)) as x)::((Cocci2
("\n",_
,_
,_
,_
)) as y
)::xs
982 when str_of_token2 x =$
= "{" ->
983 x::aux true (y
::Indent_cocci2
::xs)
984 | ((Cocci2 _
) as x)::((T2
(tok,_
,_
)) as y
)::xs
985 when str_of_token2 y
=$
= "}" ->
986 x::aux started
(y
::Unindent_cocci2
true::xs)
987 (* starting the body of the function *)
988 | ((T2
(tok,_
,_
)) as x)::xs when str_of_token2 x =$
= "{" -> x::aux true xs
989 | ((Cocci2
("{",_
,_
,_
,_
)) as a
)::xs -> a
::aux true xs
990 | ((Cocci2
("\n",_
,_
,_
,_
)) as x)::Unindent_cocci2
(false)::xs ->
992 | ((Cocci2
("\n",_
,_
,_
,_
)) as x)::xs ->
993 (* dont inline in expr because of weird eval order of ocaml *)
994 let s = !_current_tabbing in
995 x::Cocci2
(s,-1,-1,-1,None
)::aux started
xs
996 | x::xs -> x::aux started
xs in
997 (aux false xs,!tabbing_unit)
1000 let rec find_paren_comma = function
1003 (* do nothing if was like this in original file *)
1004 | ({ str = "("; idx
= Some p1
} as _x1
)::({ str = ","; idx
= Some p2
} as x2
)
1005 ::xs when p2
=|= p1
+ 1 ->
1006 find_paren_comma (x2
::xs)
1008 | ({ str = ","; idx
= Some p1
} as _x1
)::({ str = ","; idx
= Some p2
} as x2
)
1009 ::xs when p2
=|= p1
+ 1 ->
1010 find_paren_comma (x2
::xs)
1012 | ({ str = ","; idx
= Some p1
} as _x1
)::({ str = ")"; idx
= Some p2
} as x2
)
1013 ::xs when p2
=|= p1
+ 1 ->
1014 find_paren_comma (x2
::xs)
1016 (* otherwise yes can adjust *)
1017 | ({ str = "(" } as _x1
)::({ str = ","} as x2
)::xs ->
1019 find_paren_comma (x2
::xs)
1020 | ({ str = "," } as x1
)::({ str = ","} as x2
)::xs ->
1022 find_paren_comma (x2
::xs)
1024 | ({ str = "," } as x1
)::({ str = ")"} as x2
)::xs ->
1026 find_paren_comma (x2
::xs)
1032 let fix_tokens toks =
1033 let toks = toks +> List.map
mk_token_extended in
1035 let cleaner = toks +> Common.exclude
(function
1036 | {tok2
= T2
(t
,_
,_
)} -> TH.is_real_comment t
(* I want the ifdef *)
1039 find_paren_comma cleaner;
1041 let toks = rebuild_tokens_extented toks in
1042 toks +> List.map
(fun x -> x.tok2
)
1046 (*****************************************************************************)
1047 (* Final unparsing (and debugging support) *)
1048 (*****************************************************************************)
1051 type kind_token2
= KFake
| KCocci
| KC
| KExpanded
| KOrigin
1053 let kind_of_token2 = function
1055 | Cocci2 _
-> KCocci
1059 (match TH.pinfo_of_tok t
with
1060 | ExpandedTok _
-> KExpanded
1061 | OriginTok _
-> KOrigin
1062 | FakeTok _
-> raise Impossible
(* now a Fake2 *)
1063 | AbstractLineTok _
-> raise Impossible
(* now a KC *)
1065 | Unindent_cocci2 _
| Indent_cocci2
-> raise Impossible
1069 let start_mark = function
1073 | KExpanded
-> "!E!"
1076 let print_all_tokens2 pr
xs =
1077 if !Flag_parsing_c.debug_unparsing
1079 let current_kind = ref KOrigin
in
1080 xs +> List.iter
(fun t
->
1081 let newkind = kind_of_token2 t
in
1082 if newkind =*= !current_kind
1083 then pr
(str_of_token2 t
)
1086 pr
(start_mark newkind);
1087 pr
(str_of_token2 t
);
1088 current_kind := newkind
1092 xs +> List.iter
(fun x -> pr
(str_of_token2 x))
1097 (*****************************************************************************)
1099 (*****************************************************************************)
1101 (* old: PPviatok was made in the beginning to allow to pretty print a
1102 * complete C file, including a modified C file by transformation.ml,
1103 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1104 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1105 * fancy stuff when a function was not modified at all. Just need to
1106 * print the list of token as-is. But now pretty_print_c.ml handles
1107 * almost everything so maybe less useful. Maybe PPviatok allows to
1108 * optimize a little the pretty printing.
1110 * update: now have PPviastr which goes even faster than PPviatok, so
1111 * PPviatok has disappeared.
1114 type ppmethod
= PPnormal
| PPviastr
1119 (* The pp_program function will call pretty_print_c.ml with a special
1120 * function to print the leaf components, the tokens. When we want to
1121 * print a token, we need to print also maybe the space and comments that
1122 * were close to it in the original file (and that was omitted during the
1123 * parsing phase), and honor what the cocci-info attached to the token says.
1124 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1125 * print it and also print some cocci-code attached in a PLUS to it.
1126 * So we will also maybe call unparse_cocci. Because the cocci-code may
1127 * contain metavariables, unparse_cocci will in fact sometimes call back
1128 * pretty_print_c (which will this time don't call back again unparse_cocci)
1131 let pp_program2 xs outfile
=
1132 Common.with_open_outfile outfile
(fun (pr
,chan
) ->
1134 if !Flag_parsing_c.debug_unparsing
1135 then begin pr2_no_nl
s; flush stderr
end
1138 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1141 xs +> List.iter
(fun ((e
,(str, toks_e
)), ppmethod
) ->
1142 (* here can still work on ast *)
1143 let e = remove_useless_fakeInfo_struct e in
1147 (* now work on tokens *)
1148 (* phase1: just get all the tokens, all the information *)
1149 assert(toks_e
+> List.for_all
(fun t
->
1150 TH.is_origin t
or TH.is_expanded t
1152 let toks = get_fakeInfo_and_tokens e toks_e
in
1153 let toks = displace_fake_nodes toks in
1154 (* assert Origin;ExpandedTok;Faketok *)
1155 let toks = expand_mcode toks in
1157 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1158 * and no tag information, just NOTHING. *)
1161 if !Flag.sgrep_mode2
1163 (* nothing else to do for sgrep *)
1164 drop_expanded_and_fake (drop_minus toks)
1166 (* phase2: can now start to filter and adjust *)
1167 let (toks,tu
) = adjust_indentation toks in
1168 let toks = adjust_before_semicolon toks in(*before remove minus*)
1169 let toks = drop_space_at_endline toks in
1170 let toks = paren_to_space toks in
1171 let toks = drop_end_comma toks in
1172 let toks = remove_minus_and_between_and_expanded_and_fake toks in
1173 (* assert Origin + Cocci + C and no minus *)
1174 let toks = add_space toks in
1175 let toks = add_newlines toks tu
in
1176 let toks = fix_tokens toks in
1179 (* in theory here could reparse and rework the ast! or
1180 * apply some SP. Not before cos julia may have generated
1181 * not parsable file. Need do unparsing_tricks call before being
1182 * ready to reparse. *)
1183 print_all_tokens2 pr toks;
1185 | PPviastr
-> pr str
1189 let pp_program a b
=
1190 Common.profile_code
"C unparsing" (fun () -> pp_program2 a b
)
1193 let pp_program_default xs outfile
=
1194 let xs'
= xs +> List.map
(fun x -> x, PPnormal
) in
1195 pp_program xs' outfile