1 (* Yoann Padioleau, Julia Lawall
3 * Copyright (C) 2012, INRIA.
4 * Copyright (C) 2010, 2011, University of Copenhagen DIKU and INRIA.
5 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License (GPL)
9 * version 2 as published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * file license.txt for more details.
17 * Modifications by Julia Lawall for better newline handling.
21 module TH
= Token_helpers
23 (* should keep comments and directives in between adjacent deleted terms,
24 but not comments and directives within deleted terms. should use the
25 labels found in the control-flow graph *)
29 (*****************************************************************************)
31 (*****************************************************************************)
32 let pr2, pr2_once
= mk_pr2_wrappers
Flag_parsing_c.verbose_unparsing
34 (*****************************************************************************)
35 (* Types used during the intermediate phases of the unparsing *)
36 (*****************************************************************************)
40 | T1
of Parser_c.token
42 (* The cocci_tag of the token should always be a NOTHING. The mark of
43 * the token can only be OriginTok or ExpandedTok. Why not get rid of
44 * token and get something simpler ? because we need to know if the
45 * info is a TCommentCpp or TCommentSpace, etc for some of the further
46 * analysis so easier to keep with the token.
48 * This type contains the whole information. Have all the tokens with this
52 | Min
of (int list
(* match numbers from witness trees *) *
53 Ast_cocci.adjacency
(* adjacency information *))
57 | T2
of Parser_c.token
* min
58 * int option (* orig index, abstracting away comments and space *)
59 * Unparse_cocci.nlhint
option
61 | Cocci2
of string * int (* line *) * int (* lcol *) * int (* rcol *)
62 * Unparse_cocci.nlhint
option
66 | Unindent_cocci2
of bool (* true for permanent, false for temporary *)
71 | T3
of Parser_c.token
76 (* similar to the tech in parsing_hack *)
80 idx
: int option; (* to know if 2 tokens were consecutive in orig file *)
81 mutable new_tokens_before
: token2 list
;
82 mutable remove
: bool;
86 (*****************************************************************************)
88 (*****************************************************************************)
90 let info_of_token1 t
=
93 | T1 tok
-> TH.info_of_tok tok
95 let print_token1 = function
96 | T1 tok
-> TH.str_of_tok tok
97 | Fake1 info
-> "fake"
99 let str_of_token2 = function
100 | T2
(t
,_
,_
,_
) -> TH.str_of_tok t
109 let print_token2 = function
113 | Parser_c.TCommentSpace _
-> " sp "
114 | Parser_c.TCommentNewline _
-> " nl "
115 | Parser_c.TCommentCpp _
-> " cp "
116 | Parser_c.TCommentMisc _
-> " misc "
117 | Parser_c.TComment _
-> " comment "
122 Printf.sprintf
"-.%d[%s]"
123 (match adj
with Ast_cocci.ADJ n
-> n
| _
-> -1)
124 (String.concat
" " (List.map string_of_int index
))
126 "T2:"^
b_str^
t_str^
TH.str_of_tok t
131 Printf.sprintf
"-%d[%s]"
132 (match adj
with Ast_cocci.ADJ n
-> n
| _
-> -1)
133 (String.concat
" " (List.map string_of_int index
))
136 | Cocci2
(s
,_
,lc
,rc
,_
) -> Printf.sprintf
"Cocci2:%d:%d%s" lc rc s
138 | Comma s
-> "Comma:"^s
139 | Indent_cocci2
-> "Indent"
140 | Unindent_cocci2 _
-> "Unindent"
141 | EatSpace2
-> "EatSpace"
144 let simple_print_all_tokens pr l =
145 List.iter (function x -> Printf.printf "|%s| " (pr x)) l;
149 let str_of_token3 = function
150 | T3 t
-> TH.str_of_tok t
151 | Cocci3 s
| C3 s
-> s
155 let mk_token_extended x
=
158 | T2
(_
,_
,idx
,_
) -> idx
161 str
= str_of_token2 x
;
163 new_tokens_before
= [];
167 let rebuild_tokens_extented toks_ext
=
168 let _tokens = ref [] in
169 toks_ext
+> List.iter
(fun tok
->
170 tok
.new_tokens_before
+> List.iter
(fun x
-> push2 x
_tokens);
171 if not tok
.remove
then push2 tok
.tok2
_tokens;
173 let tokens = List.rev
!_tokens in
174 (tokens +> List.map
mk_token_extended)
177 let mcode_contain_plus = function
178 | Ast_cocci.CONTEXT
(_
,Ast_cocci.NOTHING
) -> false
179 | Ast_cocci.CONTEXT _
-> true
180 (* patch: when need full coccinelle transformation *)
181 | Ast_cocci.MINUS
(_
,_
,_
,Ast_cocci.NOREPLACEMENT
) -> false
182 | Ast_cocci.MINUS
(_
,_
,_
,Ast_cocci.REPLACEMENT _
) -> true(*REPL is not empty*)
183 | Ast_cocci.PLUS _
-> raise
(Impossible
132)
185 let contain_plus info
=
186 let mck = Ast_c.mcode_of_info info
in
187 mcode_contain_plus mck
189 (*****************************************************************************)
190 (* Last fix on the ast *)
191 (*****************************************************************************)
193 (* Because of the ugly trick to handle initialiser, I generate fake ','
194 * for the last initializer element, but if there is nothing around it,
195 * I don't want in the end to print it.
198 let remove_useless_fakeInfo_struct program
=
199 let bigf = { Visitor_c.default_visitor_c_s
with
200 Visitor_c.kini_s
= (fun (k
,bigf) ini
->
202 | Ast_c.InitList args
, ii
->
205 | i1
:: i2
:: iicommaopt
:: tl
when
206 (not
(contain_plus iicommaopt
))
207 && (not
(contain_plus i2
))
208 && (Ast_c.is_fake iicommaopt
) ->
209 (* sometimes the guy put a normal iicommaopt *)
210 Ast_c.InitList args
, (i1
:: i2
:: tl
)
211 | ii
-> Ast_c.InitList args
, ii
215 Visitor_c.vk_toplevel_s
bigf program
218 (*****************************************************************************)
219 (* Tokens1 generation *)
220 (*****************************************************************************)
222 let get_fakeInfo_and_tokens celem toks
=
224 let toks_in = ref toks
in
225 let toks_out = ref [] in
227 (* todo? verify good order of position ? *)
229 match Ast_c.pinfo_of_info info
with
231 push2
(Fake1 info
) toks_out
232 | Ast_c.OriginTok _
| Ast_c.ExpandedTok _
->
234 (* get the associated comments/space/cppcomment tokens *)
235 let (before
, x
, after
) =
236 !toks_in +> split_when
(fun tok
->
237 info
=*= TH.info_of_tok tok
)
239 assert(info
=*= TH.info_of_tok x
);
240 (*old: assert(before +> List.for_all (TH.is_comment)); *)
241 before
+> List.iter
(fun x
->
242 if not
(TH.is_comment x
)
243 then pr2 ("WEIRD: not a comment:" ^
TH.str_of_tok x
)
244 (* case such as int asm d3("x"); not yet in ast *)
246 before
+> List.iter
(fun x
-> push2
(T1 x
) toks_out);
248 push2
(T1 x
) toks_out;
250 | Ast_c.AbstractLineTok _
->
251 (* can be called on type info when for instance use -type_c *)
252 if !Flag_parsing_c.pretty_print_type_info
253 then push2
(Fake1 info
) toks_out
254 else raise
(Impossible
134) (* at this stage *)
257 let pr_space _
= () in (* use the spacing that is there already *)
259 Pretty_print_c.pp_program_gen
pr_elem pr_space celem
;
261 if not
(null
!toks_in)
262 then failwith
"WEIRD: unparsing not finished";
266 (* Fake nodes that have BEFORE code or are - should be moved over any subsequent
267 whitespace and newlines, but not any comments, to get as close to the affected
268 code as possible. Similarly, fake nodes that have AFTER code should be moved
269 backwards. No fake nodes should have both before and after code. *)
271 let displace_fake_nodes toks
=
272 let is_fake = function Fake1 _
-> true | _
-> false in
273 let is_whitespace = function
274 | T1
(Parser_c.TCommentSpace _
)
276 | T1
(Parser_c.TCommentNewline _
) -> true
280 try Some
(split_when
is_fake toks
)
281 with Not_found
-> None
in
283 | Some
(bef
,((Fake1 info
) as fake
),aft
) ->
284 (match !(info
.Ast_c.cocci_tag
) with
287 | (Ast_cocci.MINUS
(_
,_
,_
,Ast_cocci.REPLACEMENT _
),_
)
288 (* for , replacement is more likely to be like after, but not clear...
289 but treating it as after breaks a lot of tests. *)
291 | (Ast_cocci.CONTEXT
(_
,Ast_cocci.BEFORE _
),_
) ->
292 (* move the fake node forwards *)
293 let (whitespace
,rest
) = span
is_whitespace aft
in
294 bef
@ whitespace
@ fake
:: (loop rest
)
296 | (Ast_cocci.CONTEXT
(_
,Ast_cocci.AFTER _
),_
) ->
297 (* move the fake node backwards *)
298 let revbef = List.rev bef
in
299 let (revwhitespace
,revprev
) = span
is_whitespace revbef in
300 let whitespace = List.rev revwhitespace
in
301 let prev = List.rev revprev
in
302 prev @ fake
:: (loop (whitespace @ aft
))
303 | (Ast_cocci.CONTEXT
(_
,Ast_cocci.BEFOREAFTER _
),_
) ->
304 failwith
"fake node should not be before-after"
305 | (Ast_cocci.CONTEXT
(_
,Ast_cocci.NOTHING
),_
)
306 | _
-> bef
@ fake
:: (loop aft
) (* old: was removed when have simpler yacfe *)
309 bef
@ fake
:: (loop aft
)
312 | _
-> raise
(Impossible
135) in
315 (*****************************************************************************)
316 (* Tokens2 generation *)
317 (*****************************************************************************)
319 let comment2t2 = function
320 | (Token_c.TCommentCpp
321 (* not sure iif the following list is exhaustive or complete *)
322 (Token_c.CppAttr
|Token_c.CppMacro
|Token_c.CppPassingCosWouldGetError
),
323 (info
: Token_c.info
)) ->
325 | (Token_c.TCommentCpp x
,(info
: Token_c.info
)) ->
326 C2
("\n"^info
.Common.str^
"\n")
327 | x
-> failwith
(Printf.sprintf
"unexpected comment %s" (Dumper.dump x
))
329 let expand_mcode toks
=
330 let toks_out = ref [] in
334 let add_elem t minus
=
337 let str = Ast_c.str_of_info info
in
338 let isminus = match minus
with Min _
-> true | Ctx
-> false in
339 (* don't add fake string if the thing should be removed *)
340 if str =$
= "" or isminus
341 then push2
(Fake2 minus
) toks_out
342 (* fx the fake "," at the end of a structure or enum.
343 no idea what other fake info there can be... *)
344 else push2
(Comma
str) toks_out
347 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
348 (* no tag on expandedTok ! *)
349 let modified = function
351 | Some
(Ast_cocci.CONTEXT
(pos
,Ast_cocci.NOTHING
),l
) -> false
353 (if TH.is_expanded tok
&&
354 modified !((TH.info_of_tok tok
).Ast_c.cocci_tag
)
355 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*)
359 "expanded token %s on line %d is either modified or stored in a metavariable"
360 (TH.str_of_tok tok
) (TH.line_of_tok tok
)));
362 let tok'
= tok +> TH.visitor_info_of_tok
(fun i
->
363 { i
with Ast_c.cocci_tag
= ref Ast_c.emptyAnnot
; }
367 if TH.is_origin
tok && not
(TH.is_real_comment
tok)
376 push2
(T2
(tok'
, minus
, optindex, None
)) toks_out
381 Ast_c.mcode_and_env_of_cocciref
((info_of_token1 t
).Ast_c.cocci_tag
) in
383 let pr_cocci s ln col rcol hint
=
384 push2
(Cocci2
(s
,ln
,col
,rcol
,hint
)) toks_out in
386 (match Ast_c.pinfo_of_info info
with
387 | Ast_c.AbstractLineTok _
->
388 push2
(C2
(Ast_c.str_of_info info
)) toks_out
389 | Ast_c.FakeTok
(s
,_
) ->
390 push2
(C2 s
) toks_out
392 Printf.fprintf stderr
"line: %s\n" (Dumper.dump info
);
393 failwith
"not an abstract line"
395 (!(info
.Ast_c.comments_tag
)).Ast_c.mafter
+>
396 List.iter
(fun x
-> push2
(comment2t2 x
) toks_out) in
398 let pr_barrier ln col
= (* marks a position, used around C code *)
399 push2
(Cocci2
("",ln
,col
,col
,None
)) toks_out in
400 let pr_nobarrier ln col
= () in (* not needed for linux spacing *)
402 let pr_cspace _
= push2
(C2
" ") toks_out in
404 let pr_space _
= () (* rely on add_space in cocci code *) in
405 let pr_arity _
= () (* not interested *) in
407 let indent _
= push2 Indent_cocci2
toks_out in
408 let unindent x
= push2
(Unindent_cocci2 x
) toks_out in
409 let eat_space _
= push2 EatSpace2
toks_out in
412 (env
, pr_cocci, pr_c, pr_cspace,
413 (match !Flag_parsing_c.spacing
with
414 | Flag_parsing_c.SMPL
-> pr_space | _
-> pr_cspace),
416 (match !Flag_parsing_c.spacing
with
417 | Flag_parsing_c.SMPL
-> pr_barrier | _
-> pr_nobarrier),
418 indent, unindent, eat_space) in
420 (* old: when for yacfe with partial cocci:
424 (* patch: when need full coccinelle transformation *)
425 let unparser = Unparse_cocci.pp_list_list_any
args_pp false in
427 | Ast_cocci.MINUS
(_
,inst
,adj
,any_xxs
) ->
428 (* Why adding ? because I want to have all the information, the whole
429 * set of tokens, so I can then process and remove the
430 * is_between_two_minus for instance *)
431 add_elem t
(Min
(inst
,adj
));
433 | Ast_cocci.NOREPLACEMENT
-> ()
434 | Ast_cocci.REPLACEMENT
(any_xxs
,_
) ->
435 unparser any_xxs
Unparse_cocci.InPlace
437 | Ast_cocci.CONTEXT
(_
,any_befaft
) ->
438 (match any_befaft
with
439 | Ast_cocci.NOTHING
->
441 | Ast_cocci.BEFORE
(xxs
,_
) ->
442 unparser xxs
Unparse_cocci.Before
;
444 | Ast_cocci.AFTER
(xxs
,_
) ->
446 unparser xxs
Unparse_cocci.After
;
447 | Ast_cocci.BEFOREAFTER
(xxs
, yys
, _
) ->
448 unparser xxs
Unparse_cocci.Before
;
450 unparser yys
Unparse_cocci.After
;
452 | Ast_cocci.PLUS _
-> raise
(Impossible
136)
455 toks
+> List.iter
expand_info;
459 (*****************************************************************************)
460 (* Tokens2 processing, filtering, adjusting *)
461 (*****************************************************************************)
463 let is_space = function
464 | T2
(Parser_c.TCommentSpace _
,_b
,_i
,_h
) -> true (* only whitespace *)
467 let is_newline = function
468 | T2
(Parser_c.TCommentNewline _
,_b
,_i
,_h
) -> true
471 let is_whitespace x
=
472 is_space x
or is_newline x
474 let is_minusable_comment = function
475 | (T2
(t
,_b
,_i
,_h
)) ->
477 | Parser_c.TCommentSpace _
(* only whitespace *)
478 (* patch: coccinelle *)
479 | Parser_c.TCommentNewline _
(* newline plus whitespace *) -> true
480 | Parser_c.TComment _
when !Flag_parsing_c.keep_comments
-> false
481 | Parser_c.TComment _
482 | Parser_c.TCommentCpp
(Token_c.CppAttr
, _
)
483 | Parser_c.TCommentCpp
(Token_c.CppMacro
, _
)
484 | Parser_c.TCommentCpp
(Token_c.CppIfDirective _
, _
)
485 | Parser_c.TCommentCpp
(Token_c.CppDirective
, _
) -> (* result was false *)
488 | Parser_c.TCommentMisc _
489 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) ->
496 let is_minusable_comment_nocpp = function
497 | (T2
(t
,_b
,_i
,_h
)) ->
499 | Parser_c.TCommentSpace _
(* only whitespace *)
500 (* patch: coccinelle *)
501 | Parser_c.TCommentNewline _
(* newline plus whitespace *) -> true
502 | Parser_c.TComment _
when !Flag_parsing_c.keep_comments
-> false
503 | Parser_c.TComment _
-> true
505 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
506 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
507 | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _)
508 | Parser_c.TCommentCpp (Token_c.CppDirective, _) ->
511 | Parser_c.TCommentMisc _
512 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) ->
519 let all_coccis = function
520 | Cocci2 _
| C2 _
| Comma _
| Indent_cocci2
521 | Unindent_cocci2 _
| EatSpace2
-> true
524 (* previously gave up if the first character was a newline, but not clear why *)
525 let is_minusable_comment_or_plus x
=
526 is_minusable_comment x
or all_coccis x
528 let set_minus_comment adj
= function
529 | T2
(t
,Ctx
,idx
,hint
) ->
530 let str = TH.str_of_tok t
in
532 | Parser_c.TCommentSpace _
533 (* patch: coccinelle *)
534 | Parser_c.TCommentNewline _
-> ()
536 | Parser_c.TComment _
537 | Parser_c.TCommentCpp
(Token_c.CppAttr
, _
)
538 | Parser_c.TCommentCpp
(Token_c.CppMacro
, _
)
539 | Parser_c.TCommentCpp
(Token_c.CppIfDirective _
, _
)
540 | Parser_c.TCommentCpp
(Token_c.CppDirective
, _
) ->
541 pr2 (Printf.sprintf
"%d: ERASING_COMMENTS: %s"
542 (TH.line_of_tok t
) str)
543 | _
-> raise
(Impossible
137)
545 T2
(t
, Min adj
, idx
, hint
)
546 (* patch: coccinelle *)
547 | T2
(t
, Min adj
, idx
, hint
) as x
-> x
549 | _
-> raise
(Impossible
138)
551 (* don't touch ifdefs, done after *)
552 let set_minus_comment_or_plus adj
= function
553 | Cocci2 _
| C2 _
| Comma _
| Indent_cocci2
554 | Unindent_cocci2 _
| EatSpace2
as x
-> x
555 | x
-> set_minus_comment adj x
557 let is_minus = function
558 | T2
(_
, Min _
, _
, _
) -> true
562 xs
+> exclude
is_minus
564 let drop_expanded xs
=
565 xs
+> exclude
(function
566 | T2
(t
,_
,_
,_
) when TH.is_expanded t
-> true
571 xs
+> exclude
(function
576 let remove_minus_and_between_and_expanded_and_fake xs
=
578 (* get rid of expanded tok *)
579 let xs = drop_expanded xs in
581 let minus_or_comment x
=
582 is_minus x
or is_minusable_comment x
in
584 let minus_or_comment_nocpp x
=
585 is_minus x
or is_minusable_comment_nocpp x
in
587 let common_adj (index1
,adj1
) (index2
,adj2
) =
588 let same_adj = (* same adjacency info *)
589 match (adj1
,adj2
) with
590 | (Ast_cocci.ADJ adj1
,Ast_cocci.ADJ adj2
) -> adj1
= adj2
591 | (Ast_cocci.ALLMINUS
,_
) | (_
,Ast_cocci.ALLMINUS
) -> true in
593 (* non-empty intersection of witness trees *)
594 not
((inter_set index1 index2
) = []) in
596 (* new idea: collects regions not containing non-space context code
597 if two adjacent adjacent minus tokens satisfy common_adj then delete
598 all spaces, comments etc between them
599 if two adjacent minus tokens do not satisfy common_adj only delete
600 the spaces between them if there are no comments, etc.
601 if the region contain no plus code and is both preceded and followed
602 by a newline, delete the initial newline. *)
604 let rec adjust_around_minus = function
606 | (T2
(Parser_c.TCommentNewline c
,_b
,_i
,_h
) as x
)::
607 ((Fake2
(Min adj1
) | T2
(_
,Min adj1
,_
,_
)) as t1
)::xs ->
608 let (minus_list
,rest
) = span not_context
(t1
::xs) in
609 let contains_plus = List.exists is_plus minus_list
in
611 match List.rev minus_list
with
612 | (T2
(Parser_c.TCommentNewline c
,_b
,_i
,_h
))::rest
613 when List.for_all
minus_or_comment minus_list
->
614 set_minus_comment_or_plus adj1
x
616 x :: adjust_within_minus
contains_plus minus_list
617 @ adjust_around_minus rest
618 | ((Fake2
(Min adj1
) | T2
(_
,Min adj1
,_
,_
)) as t1
)::xs ->
619 let (minus_list
,rest
) = span not_context
(t1
::xs) in
620 let contains_plus = List.exists is_plus minus_list
in
621 adjust_within_minus
contains_plus minus_list
622 @ adjust_around_minus rest
624 x :: adjust_around_minus xs
625 and adjust_within_minus cp
(* contains plus *) = function
626 | ((Fake2
(Min adj1
) | T2
(_
,Min adj1
,_
,_
)) as t1
)::xs ->
627 let not_minus = function T2
(_
,Min _
,_
,_
) -> false | _
-> true in
628 let (not_minus_list
,rest
) = span
not_minus xs in
631 | ((Fake2
(Min adj2
) | T2
(_
,Min adj2
,_
,_
)) as t2
)::xs ->
632 if common_adj adj1 adj2
633 || not cp
&& List.for_all
is_whitespace not_minus_list
635 (List.map
(set_minus_comment_or_plus adj1
) not_minus_list
)
636 @ (adjust_within_minus cp
(t2
::xs))
639 @ (adjust_within_minus cp
(t2
::xs))
644 (* remove spaces after removed stuff, eg a comma after a
646 (let (spaces
,rest
) = span
is_space xs in
647 (List.map
(set_minus_comment_or_plus adj1
) spaces
)
650 | xs -> failwith
"should always start with minus"
651 and not_context
= function
652 | (T2
(_
,Ctx
,_
,_
) as x) when not
(is_minusable_comment x) -> false
654 and is_plus
= function
655 | C2 _
| Comma _
| Cocci2 _
-> true
658 let xs = adjust_around_minus xs in
660 (* get rid of fake tok *)
661 let xs = drop_fake xs in
663 (* this drops blank lines after a brace introduced by removing code *)
664 let minus_or_comment_nonl = function
665 | T2
(_
,Min adj
,_
,_
) -> true
666 | T2
(Parser_c.TCommentNewline _
,_b
,_i
,_h
) -> false
667 | x -> is_minusable_comment x in
669 let rec adjust_after_brace = function
671 | ((T2
(_
,Ctx
,_
,_
)) as x)::((T2
(_
,Min adj
,_
,_
)::_
) as xs)
672 when str_of_token2 x =$
= "{" ->
673 let (between_minus
,rest
) = span
minus_or_comment_nonl xs in
674 let (newlines
,rest
) = span
is_whitespace rest
in
675 let (drop_newlines
,last_newline
) =
676 let rec loop = function
678 | ((T2
(Parser_c.TCommentNewline _
,_b
,_i
,_h
)) as x) :: rest
->
681 let (drop_newlines
,last_newline
) = loop xs in
682 (drop_newlines
,x::last_newline
) in
683 loop (List.rev newlines
) in
685 @ List.map
(set_minus_comment adj
) drop_newlines
687 @ adjust_after_brace rest
688 | x::xs -> x :: (adjust_after_brace xs) in
690 let xs = adjust_after_brace xs in
692 (* search backwards from context } over spaces until reaching a newline.
693 then go back over all minus code until reaching some context or + code.
694 get rid of all intervening spaces, newlines, and comments
696 let rec adjust_before_brace = function
698 | ((T2
(t
,Ctx
,_
,_
)) as x)::xs
699 when str_of_token2 x =$
= "}" or is_newline x ->
700 let (outer_spaces
,rest
) = span
is_space xs in
703 | ((T2
(Parser_c.TCommentNewline _
,Ctx
,_i
,_h
)) as h
) ::
704 (* the rest of this code is the same as from_newline below
705 but merging them seems to be error prone... *)
706 ((T2
(t
, Min adj
, idx
, hint
)) as m
) :: rest
->
707 let (spaces
,rest
) = span
minus_or_comment_nocpp rest
in
709 (List.map
(set_minus_comment adj
) spaces
) @
710 (adjust_before_brace rest
)
711 | _
-> adjust_before_brace rest
713 | x::xs -> x :: (adjust_before_brace xs) in
715 let from_newline = function
716 | ((T2
(t
, Min adj
, idx
, hint
)) as m
) :: rest
->
717 let (spaces
,rest
) = span
minus_or_comment_nocpp rest
in
719 (List.map
(set_minus_comment adj
) spaces
) @
720 (adjust_before_brace rest
)
721 | ((T2
(t0
,Ctx
, idx0
,h0
)) as m0
) ::
722 ((T2
(t
,Min adj
,idx
,h
)) as m
) :: rest
723 when TH.str_of_tok t0
= "" ->
724 (* This is for the case of a #define that is completely deleted,
725 because a #define has a strange EOL token at the end.
726 We hope there i no other kind of token that is represented by
727 "", but it seems like changing the kind of token might break
728 the end of entity recognition in the C parser.
729 See parsing_hacks.ml *)
730 let (spaces
,rest
) = span
minus_or_comment_nocpp rest
in
732 (List.map
(set_minus_comment adj
) spaces
) @
733 (adjust_before_brace rest
)
734 | rest
-> adjust_before_brace rest
in
736 let xs = List.rev
(from_newline (List.rev
xs)) in
738 let cleanup_ifdefs toks
=
739 (* TODO: these functions are horrid, but using tokens caused circularity *)
740 let is_ifdef = function
741 | T2
((Parser_c.TCommentCpp
742 (Token_c.CppIfDirective
Token_c.IfDef
, _
)),m
,idx
,_
) -> true
743 | T2
((Parser_c.TCommentCpp
744 (Token_c.CppIfDirective
Token_c.IfDef0
, _
)),m
,idx
,_
) -> true
746 let is_else = function
747 | T2
((Parser_c.TCommentCpp
748 (Token_c.CppIfDirective
Token_c.Else
, _
)),m
,idx
,_
) -> true
750 let is_endif = function
751 | T2
((Parser_c.TCommentCpp
752 (Token_c.CppIfDirective
Token_c.Endif
, _
)),m
,idx
,_
) -> true
755 | l
::rest
-> (t
::l
)::rest
756 | _
-> failwith
"not possible" in
757 let rec parse_ifdef acc_keywords acc_code stack
= function
758 | [] -> (None
,acc_keywords
,acc_code
)
759 | t
::rest
when is_else t
->
761 | [] -> parse_ifdef (t
::acc_keywords
) ([]::acc_code
) stack rest
762 | _
-> parse_ifdef acc_keywords
(add t acc_code
) stack rest
764 | t
::rest
when is_endif t
->
766 | [] -> ((Some
(t
,rest
)),acc_keywords
,acc_code
)
767 | _
::stack
-> parse_ifdef acc_keywords
(add t acc_code
) stack rest
769 | t
::rest
when is_ifdef t
->
770 parse_ifdef acc_keywords
(add t acc_code
) (()::stack
) rest
771 | t
::rest
-> parse_ifdef acc_keywords
(add t acc_code
) stack rest
in
772 let unminus = function
773 | T2
(t
,Min adj
,idx
,hint
) -> T2
(t
,Ctx
,idx
,hint
)
775 let rec loop = function
777 | t
::rest
when is_ifdef t
->
778 let (ender
,acc_keywords
,acc_code
) =
779 parse_ifdef [t
] [[]] [] rest
in
780 let acc_code = List.map
loop acc_code in
781 let merge = (* args reversed *)
783 (fun prev kwd code
-> kwd
:: (List.rev code
) @ prev)
786 | None
-> merge (List.map
unminus acc_keywords
) acc_code
787 | Some
(endif
,rest
) ->
788 let rest = loop rest in
789 if List.for_all
is_minus (endif
:: acc_keywords
)
790 then (merge acc_keywords
acc_code) @ (endif
:: rest)
792 (merge (List.map
unminus acc_keywords
) acc_code) @
793 ((unminus endif
) :: rest)
795 | x::xs -> x :: loop xs in
798 let xs = cleanup_ifdefs xs in
799 let xs = drop_minus xs in
802 (* things that should not be followed by space - boundary between SmPL
804 let adjust_eat_space toks
=
805 let rec loop = function
807 | EatSpace2
:: x :: rest when is_space x -> loop rest
808 | EatSpace2
:: rest -> loop rest
809 | x :: xs -> x :: loop xs in
812 (* normally, in C code, a semicolon is not preceded by a space or newline *)
813 let adjust_before_semicolon toks
=
814 let toks = List.rev
toks in
815 let rec search_semic = function
817 | ((T2
(_
,Ctx
,_
,_
) | Cocci2 _
) as x)::xs
818 when List.mem
(str_of_token2 x) [";";")";","] ->
819 x :: search_semic (search_minus
false xs)
820 | x::xs -> x :: search_semic xs
821 and search_minus seen_minus
xs =
822 let (spaces
, rest) = span
is_space xs in
823 (* only delete spaces if something is actually deleted *)
825 | ((T2
(_
,Min _
,_
,_
)) as a
)::rerest
-> a
:: search_minus
true rerest
826 | _
-> if seen_minus
then rest else xs in
827 List.rev
(search_semic toks)
829 (* normally, in C code, a ( is not followed by a space or newline *)
830 let adjust_after_paren toks =
831 let rec search_paren = function
833 | ((T2
(_
,Ctx
,_
,_
) | Cocci2 _
) as x)::xs
834 when List.mem
(str_of_token2 x) ["("] (* other things? *) ->
835 x :: search_paren (search_minus
false xs)
836 | x::xs -> x :: search_paren xs
837 and search_minus seen_minus
xs =
838 let (spaces
, rest) = span
is_whitespace xs in
839 (* only delete spaces if something is actually deleted *)
841 | ((T2
(_
,Min _
,_
,_
)) as a
)::rerest
-> (* minus *)
842 a
:: search_minus
true rerest
843 | ((T2
(_
,Ctx
,_
,_
)) as a
)::rerest
844 when seen_minus
&& str_of_token2 a
= "," ->
845 (* comma after ( will be deleted, so consider it as minus code
847 a
:: search_minus
true rerest
848 | _
-> if seen_minus
then rest else xs in (* drop trailing space *)
851 (* this is for the case where braces are added around an if branch *)
852 let paren_then_brace toks =
853 let rec search_paren = function
855 | ((T2
(_
,Ctx
,_
,_
)) as x)::xs
856 when List.mem
(str_of_token2 x) [")"] ->
857 x :: search_paren (search_plus
xs)
858 | x::xs -> x :: search_paren xs
860 let (spaces
, rest) = span
is_space xs in
861 let (nls
, rest) = span
is_newline rest in
863 (* move the brace up to the previous line *)
864 | ((Cocci2
("{",_
,_
,_
,_
)) as x) :: (((Cocci2 _
) :: _
) as rest) ->
865 spaces
@ x :: nls
@ rest
869 let is_ident_like s
= s
==~ regexp_alpha
871 let rec drop_space_at_endline = function
875 ((((T2
(Parser_c.TCommentSpace _
,Ctx
,_
,_
)) | Cocci2
("\n",_
,_
,_
,_
) |
876 (T2
(Parser_c.TCommentNewline _
,Ctx
,_
,_
))) :: _
) as rest) ->
877 (* when unparse_cocci doesn't know whether space is needed *)
878 drop_space_at_endline rest
879 | ((T2
(Parser_c.TCommentSpace _
,Ctx
,_i
,_h
)) as a
)::rest ->
880 let (outer_spaces
,rest) = span
is_space rest in
881 let minus_or_comment_or_space_nocpp = function
882 | T2
(_
,Min adj
,_
,_
) -> true
883 | (T2
(Parser_c.TCommentSpace _
,Ctx
,_i
,_
)) -> true
884 | (T2
(Parser_c.TCommentNewline _
,Ctx
,_i
,_
)) -> false
886 let (minus
,rest) = span
minus_or_comment_or_space_nocpp rest in
887 let fail _
= a
:: outer_spaces
@ minus
@ (drop_space_at_endline rest) in
888 if List.exists
is_minus minus
891 | ((T2
(Parser_c.TCommentNewline _
,Ctx
,_i
,_h
)) as a
)::rest ->
892 (* drop trailing spaces *)
893 minus
@ a
:: (drop_space_at_endline rest)
897 a
:: drop_space_at_endline rest
899 (* if a removed ( is between two tokens, then add a space *)
900 let rec paren_to_space = function
904 | ((T2
(_
,Ctx
,_
,_
)) as a
)::
905 ((T2
(t
,Min _
,_
,_
)) as b
)::
906 ((T2
(_
,Ctx
,_
,_
)) as c
)::rest
907 when not
(is_whitespace a
) && TH.str_of_tok t
= "(" ->
908 a
:: b
:: (C2
" ") :: (paren_to_space (c
:: rest))
909 | a
:: rest -> a
:: (paren_to_space rest)
911 let rec add_space xs =
915 | (Cocci2
(sx
,lnx
,_
,rcolx
,_
) as x)::((Cocci2
(sy
,lny
,lcoly
,_
,_
)) as y
)::xs
916 when !Flag_parsing_c.spacing
= Flag_parsing_c.SMPL
&&
917 not
(lnx
= -1) && not
(rcolx
= -1) && lnx
= lny
&& rcolx
< lcoly
->
918 (* this only works within a line. could consider whether
919 something should be done to add newlines too, rather than
920 printing them explicitly in unparse_cocci. *)
921 x::C2
(String.make
(lcoly
-rcolx
) ' '
)::add_space (y
::xs)
922 | (Cocci2
(sx
,lnx
,_
,rcolx
,_
) as x)::((Cocci2
(sy
,lny
,lcoly
,_
,_
)) as y
)::xs
923 when !Flag_parsing_c.spacing
= Flag_parsing_c.SMPL
&&
924 not
(lnx
= -1) && not
(rcolx
= -1) && lnx
< lny
->
925 (* this only works within a line. could consider whether
926 something should be done to add newlines too, rather than
927 printing them explicitly in unparse_cocci. *)
928 x::C2
(String.make
(lny
-lnx
) '
\n'
)::
929 C2
(String.make
(lcoly
-1) ' '
):: (* -1 is for the + *)
931 | ((T2
(_
,Ctx
,_
,_
)) as x)::((Cocci2 _
) as y
)::xs -> (* add space on boundary *)
932 let sx = str_of_token2 x in
933 let sy = str_of_token2 y
in
934 if is_ident_like sx && (is_ident_like sy or List.mem
sy ["="])
935 then x::C2
" "::(add_space (y
::xs))
936 else x::(add_space (y
::xs))
937 | x::y
::xs -> (* not boundary, not sure if it is possible *)
938 let sx = str_of_token2 x in
939 let sy = str_of_token2 y
in
940 if is_ident_like sx && is_ident_like sy
941 then x::C2
" "::(add_space (y
::xs))
942 else x::(add_space (y
::xs))
944 (* A fake comma is added at the end of an unordered initlist or a enum
945 decl, if the initlist or enum doesn't already end in a comma. This is only
946 needed if there is + code, ie if we see Cocci after it in the code sequence *)
948 let rec drop_end_comma = function
951 | ((Comma
",") as x) :: rest ->
952 let (newlines
,rest2
) = span
is_whitespace rest in
954 | (Cocci2 _
) :: _
-> x :: drop_end_comma rest
955 | _
-> drop_end_comma rest
957 | x :: xs -> x :: drop_end_comma xs
959 (* The following only works for the outermost function call. Stack records
960 the column of all open parentheses. Space_cell contains the most recent
961 comma in the outermost function call. The goal is to decide whether this
962 should be followed by a space or a newline and indent. *)
963 let add_newlines toks tabbing_unit
=
964 (* the following is for strings that may contain newline or tabs *)
965 let string_length s count
=
966 let l = list_of_string s
in
974 let create_indent n
=
976 match tabbing_unit
with
977 | Some
("\t",_
) -> ("\t",8)
978 | Some
("",_
) -> ("\t",8) (* not sure why... *)
979 | Some
(s
,_
) -> (s
,string_length s
0) (* assuming only tabs or spaces *)
980 | None
-> ("\t",8) in
983 then tu ^
loop (seen
+ tlen
)
984 else String.make
(n
-seen
) ' '
in
986 let check_for_newline count
x = function
987 | Some
(start
,space_cell
) when count
> Flag_parsing_c.max_width
->
988 space_cell
:= "\n"^
(create_indent x);
989 Some
(x + (count
- start
))
991 let start_box stack space_cell count s
=
992 let count = string_length s
count in
993 (count,count::stack
,space_cell
) in
994 let end_box stack space_cell
count s
=
995 (* this assumes that start_box and end_box are matched, but this is not
996 necessarily the case, if ( is modified and ) is context code *)
997 let count = string_length s
count in
1000 (match check_for_newline count x space_cell
with
1001 | Some
count -> (count,[],None
)
1002 | None
-> (count,[],None
)
1004 | [] -> (count,stack
,space_cell
)
1005 | _
-> (count,List.tl stack
,space_cell
) in
1006 let rec loop ((stack
,space_cell
) as info
) count = function
1008 | ((Cocci2
(s
,line
,lcol
,rcol
,hint
)) as a
):: (* hint can't be start *)
1009 (T2
(commatok
,Ctx
,idx
,_
))::
1010 (T2
(((Parser_c.TCommentSpace _
) as sptok
),Ctx
,_
,_
)) ::
1011 (((T2
(codetok
,Ctx
,_
,_
)) :: _
) as xs)
1013 (TH.str_of_tok commatok
) = "," &&
1014 (TH.str_of_tok sptok
) = " " &&
1015 ((List.length stack
= 1) or (* not super elegant... *)
1016 (* check if the Cocci2 token is a ), need double treatment *)
1017 (List.length stack
= 2) && (hint
= Some
Unparse_cocci.EndBox
)) ->
1018 (* something large added before a comma *)
1019 let stack = (* do the work of end_box in the length 2 case *)
1020 if List.length
stack = 2
1023 let x = List.hd
stack in
1025 let cocci_count = string_length s
count in
1026 let space_cell = Some
(cocci_count+1,sp) in (* count before space *)
1027 let newcount = cocci_count + 2 in (* count incuding space *)
1029 string_length (TH.str_of_tok codetok
) newcount in
1030 let b = T2
(commatok
,Ctx
,idx
,
1031 Some
(Unparse_cocci.SpaceOrNewline
sp)) in
1032 (match check_for_newline future_count x space_cell with
1033 | Some
count -> a
:: b :: loop (stack,Some
(x,sp)) count xs
1034 | None
-> a
:: b :: loop (stack,Some
(newcount,sp)) newcount xs
1036 | (T2
(commatok
,Ctx
,_
,_
))::
1037 (T2
(((Parser_c.TCommentSpace _
) as sptok
),Ctx
,idx
,_
)) ::
1038 (((Cocci2
(s
,line
,lcol
,rcol
,hint
))::_
) as xs)
1039 when (TH.str_of_tok commatok
) = "," && (TH.str_of_tok sptok
) = " " &&
1040 List.length
stack = 1 (* not super elegant... *) ->
1041 (* something large added after a comma *)
1042 let x = List.hd
stack in
1044 let space_cell = Some
(count+1,sp) in (* count before space *)
1045 let newcount = count + 2 in (* count incuding space *)
1046 let future_count = string_length s
newcount in
1047 let a = T2
(commatok
,Ctx
,idx
,
1048 Some
(Unparse_cocci.SpaceOrNewline
sp)) in
1049 (match check_for_newline future_count x space_cell with
1050 | Some
count -> a :: loop (stack,Some
(x,sp)) count xs
1051 | None
-> a :: loop (stack,Some
(newcount,sp)) newcount xs
1053 | ((T2
(tok,Ctx
,idx
,_
)) as a)::xs ->
1054 (* let (stack,space_cell) = info in *)
1055 (match TH.str_of_tok
tok with
1057 let (spaces
,rest) = span
is_space xs in
1059 | ((T2
(tok,Ctx
,_
,_
)) as b)::ixs
->
1060 (match TH.str_of_tok
tok with
1066 | (T2
(tok,_b
,_i
,_h
)) ->
1067 string_length (TH.str_of_tok
tok) prev
1068 | _
-> failwith
"not possible")
1070 let front = a :: spaces
@ [b] in
1071 let (newcount,newstack
,newspacecell
) =
1072 start_box stack space_cell newcount "{" in
1073 front @ loop (newstack
,newspacecell
) newcount ixs
1074 | s
-> a :: loop info
(string_length s
count) xs
1076 | _
-> a :: loop info
(string_length s
count) xs
1079 let (newcount,newstack
,newspacecell
) =
1080 start_box stack space_cell count s
in
1081 a :: loop (newstack
,newspacecell
) newcount xs
1083 let (newcount,newstack
,newspacecell
) =
1084 end_box stack space_cell count s
in
1085 a :: loop (newstack
,newspacecell
) newcount xs
1086 | "{" as s
when not
(stack = []) ->
1087 (* [] case means statement braces *)
1088 let (newcount,newstack
,newspacecell
) =
1089 start_box stack space_cell count s
in
1090 a :: loop (newstack
,newspacecell
) newcount xs
1091 | "}" as s
when not
(stack = []) ->
1092 (* [] case means statement braces *)
1093 let (newcount,newstack
,newspacecell
) =
1094 end_box stack space_cell count s
in
1095 a :: loop (newstack
,newspacecell
) newcount xs
1096 | s
-> a :: loop info
(string_length s
count) xs
1098 | ((Cocci2
(s
,line
,lcol
,rcol
,hint
)) as a)::xs ->
1099 let (stack,space_cell) = info
in
1102 | None
-> loop info
(string_length s
count) xs
1103 | Some
Unparse_cocci.StartBox
->
1104 let (newcount,newstack
,newspacecell
) =
1105 start_box stack space_cell count s
in
1106 loop (newstack
,newspacecell
) newcount xs
1107 | Some
Unparse_cocci.EndBox
->
1108 let (newcount,newstack
,newspacecell
) =
1109 end_box stack space_cell count s
in
1110 loop (newstack
,newspacecell
) newcount xs
1111 | Some
(Unparse_cocci.SpaceOrNewline
sp) ->
1112 let count = string_length s
(count + 1 (*space*)) in
1115 (match check_for_newline count x space_cell with
1116 | Some
count -> loop (stack,Some
(x,sp)) count xs
1117 | None
-> loop (stack,Some
(count,sp)) count xs
1119 | _
-> loop info
count xs
1122 | ((T2
(tok,_
,_
,_
)) as a)::xs ->
1123 a :: loop info
(string_length (TH.str_of_tok
tok) count) xs
1124 | ((C2
(s
)) as a)::xs -> a :: loop info
(string_length s
count) xs
1125 | ((Comma
(s
)) as a)::xs -> a :: loop info
(string_length s
count) xs
1126 | Fake2 _
:: _
| Indent_cocci2
:: _
1127 | Unindent_cocci2 _
::_
| EatSpace2
::_
->
1128 failwith
"unexpected fake, indent, unindent, or eatspace" in
1129 let redo_spaces prev = function
1130 | Cocci2
(s
,line
,lcol
,rcol
,Some
(Unparse_cocci.SpaceOrNewline
sp)) ->
1131 C2
!sp :: Cocci2
(s
,line
,lcol
,rcol
,None
) :: prev
1132 | T2
(tok,min
,idx
,Some
(Unparse_cocci.SpaceOrNewline
sp)) ->
1133 C2
!sp :: T2
(tok,min
,idx
,None
) :: prev
1135 (match !Flag_parsing_c.spacing
with
1136 | Flag_parsing_c.SMPL
-> toks
1137 | _
-> List.rev
(List.fold_left
redo_spaces [] (loop ([],None
) 0 toks))
1140 (* When insert some new code, because of a + in a SP, we must add this
1141 * code at the right place, with the good indentation. So each time we
1142 * encounter some spacing info, with some newline, we maintain the
1143 * current indentation level used.
1145 * TODO problems: not accurate. ex: TODO
1147 * TODO: if in #define region, should add a \ \n
1149 let new_tabbing2 space
=
1150 list_of_string space
1152 +> take_until
(fun c
-> c
=<= '
\n'
)
1154 +> List.map string_of_char
1158 profile_code
"C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
1161 let rec adjust_indentation xs =
1163 let _current_tabbing = ref ([] : string list
) in
1164 let tabbing_unit = ref None
in
1166 let string_of_list l = String.concat
"" (List.map string_of_char
l) in
1168 (* try to pick a tabbing unit for the plus code *)
1169 let adjust_tabbing_unit old_tab new_tab
=
1170 if !tabbing_unit =*= None
&& String.length new_tab
> String.length old_tab
1172 let old_tab = list_of_string
old_tab in
1173 let new_tab = list_of_string
new_tab in
1174 let rec loop = function
1176 tabbing_unit := Some
(string_of_list new_tab,List.rev
new_tab)
1177 | (_
,[]) -> failwith
"not possible"
1178 | (o
::os
,n
::ns
) -> loop (os
,ns
) in (* could check for equality *)
1179 loop (old_tab,new_tab) in
1182 let remtab tu current_tab =
1183 let current_tab = List.rev(list_of_string current_tab) in
1184 let rec loop = function
1185 ([],new_tab) -> string_of_list (List.rev new_tab)
1186 | (_,[]) -> (-*weird; tabbing unit used up more than the current tab*-)
1188 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
1189 | (_,ns) -> (-* mismatch; remove what we can *-)
1190 string_of_list (List.rev ns) in
1191 loop (tu,current_tab) in
1194 let rec find_first_tab started
= function
1196 | ((T2
(tok,_
,_
,_
)) as x)::xs when str_of_token2 x =$
= "{" ->
1197 find_first_tab true xs
1198 (* patch: coccinelle *)
1199 | ((T2
(Parser_c.TCommentNewline s
, _
, _
, _
)) as x)::_
1201 let s = str_of_token2 x +> new_tabbing in
1202 tabbing_unit := Some
(s,List.rev
(list_of_string
s))
1203 | x::xs -> find_first_tab started
xs in
1204 find_first_tab false xs;
1206 let rec balanced ct
= function
1208 | ((T2
(tok,_
,_
,_
)) as x)::xs ->
1209 (match str_of_token2 x with
1210 | "(" -> balanced (ct
+1) xs
1211 | ")" -> balanced (ct
-1) xs
1212 | _
-> balanced ct
xs
1214 | x::xs -> balanced ct
xs in
1216 let update_tabbing started
s x =
1217 let old_tabbing = !_current_tabbing in
1218 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := [s]);
1219 (* only trust the indentation after the first { *)
1223 (String.concat
"" old_tabbing)
1224 (String.concat
"" !_current_tabbing) in
1226 let rec aux started
xs =
1229 (* patch: coccinelle *)
1230 | ((T2
(Parser_c.TCommentNewline
s,_
,_
,_
)) as x)::
1231 Unindent_cocci2
(false)::xs ->
1232 update_tabbing started
s x;
1233 (C2
"\n")::aux started
xs
1234 | (Cocci2
("\n",_
,_
,_
,_
))::Unindent_cocci2
(false)::xs ->
1235 (C2
"\n")::aux started
xs
1236 | ((T2
(tok,_
,_
,_
)) as x)::(T2
(Parser_c.TCommentNewline
s, _
, _
, _
))::
1237 ((Cocci2
("{",_
,_
,_
,_
)) as a)::xs
1238 when started
&& str_of_token2 x =$
= ")" ->
1239 (* to be done for if, etc, but not for a function header *)
1240 x::(C2
" ")::a::(aux started
xs)
1241 | ((T2
(Parser_c.TCommentNewline
s, _
, _
, _
)) as x)::xs
1243 balanced 0 (fst
(span
(function x -> not
(is_newline x)) xs)) ->
1244 update_tabbing started
s x;
1245 let coccis_rest = span
all_coccis xs in
1246 (match coccis_rest with
1247 | (_
::_
,((T2
(tok,_
,_
,_
)) as y
)::_
) when str_of_token2 y
=$
= "}" ->
1248 (* the case where cocci code has been added before a close } *)
1249 x::aux started
(Indent_cocci2
::xs)
1250 | _
-> x::aux started
xs
1252 | Indent_cocci2
::((Cocci2
(sy,lny
,lcoly
,_
,_
)) as y
)::xs
1253 when !Flag_parsing_c.spacing
= Flag_parsing_c.SMPL
->
1254 let tu = String.make
(lcoly
-1) ' '
in
1255 _current_tabbing := tu::(!_current_tabbing);
1256 C2
(tu)::aux started
(y
::xs)
1257 | Indent_cocci2
::xs ->
1258 (match !tabbing_unit with
1259 | None
-> aux started
xs
1261 _current_tabbing := tu::(!_current_tabbing);
1262 (* can't be C2, for later phases *)
1263 Cocci2
(tu,-1,-1,-1,None
)::aux started
xs
1265 | Unindent_cocci2
(permanent
)::((Cocci2
("\n",_
,_
,_
,_
)) as x)::xs ->
1266 (* seems only relevant if there is a following cocci newline *)
1267 (match !_current_tabbing with
1268 | [] -> aux started
xs
1270 let s = String.concat
"" new_tabbing in
1271 _current_tabbing := new_tabbing;
1272 x::Cocci2
(s,-1,-1,-1,None
)::aux started
xs
1274 | Unindent_cocci2
(permanent
)::xs -> aux started
xs
1275 (* border between existing code and cocci code *)
1276 | ((T2
(tok,_
,_
,_
)) as x)::((Cocci2
("\n",_
,_
,_
,_
)) as y
)::xs
1277 when str_of_token2 x =$
= "{" ->
1278 x::aux true (y
::Indent_cocci2
::xs)
1279 | ((Cocci2 _
) as x)::((T2
(tok,_
,_
,_
)) as y
)::xs
1280 when str_of_token2 y
=$
= "}" ->
1281 x::aux started
(Unindent_cocci2
true::y
::xs)
1282 (* starting the body of the function *)
1283 | ((T2
(tok,_
,_
,_
)) as x)::xs when str_of_token2 x =$
= "{" ->
1285 | ((Cocci2
("{",_
,_
,_
,_
)) as a)::xs -> a::aux true xs
1286 | ((Cocci2
("\n",_
,_
,_
,_
)) as x)::xs ->
1287 (* dont inline in expr because of weird eval order of ocaml *)
1288 let s = String.concat
"" !_current_tabbing in
1289 (* can't be C2, for later phases *)
1290 x::Cocci2
(s,-1,-1,-1,None
)::aux started
xs
1291 | x::xs -> x::aux started
xs in
1292 (aux false xs,!tabbing_unit)
1295 let rec find_paren_comma = function
1298 (* do nothing if was like this in original file *)
1299 | { str = "("; idx
= Some p1
} :: ({ str = ","; idx
= Some p2
} :: _
as xs)
1300 | { str = ","; idx
= Some p1
} :: ({ str = ","; idx
= Some p2
} :: _
as xs)
1301 | { str = ","; idx
= Some p1
} :: ({ str = ")"; idx
= Some p2
} :: _
as xs)
1302 when p2
=|= p1
+ 1 ->
1305 (* otherwise yes can adjust *)
1306 | { str = "(" } :: (({ str = ","} as rem
) :: _
as xs)
1307 | ({ str = "," } as rem
) :: ({ str = ","} :: _
as xs)
1308 | ({ str = "," } as rem
) :: ({ str = ")"} :: _
as xs) ->
1316 let fix_tokens toks =
1317 let toks = toks +> List.map
mk_token_extended in
1319 let cleaner = toks +> exclude
(function
1320 | {tok2
= T2
(t
,_
,_
,_
)} -> TH.is_real_comment t
(* I want the ifdef *)
1323 find_paren_comma cleaner;
1325 let toks = rebuild_tokens_extented toks in
1326 toks +> List.map
(fun x -> x.tok2
)
1328 (* if we have to remove a '}' that is alone on a line, remove the line too *)
1329 let drop_line toks =
1330 let rec space_until_newline toks =
1332 | (T2
(_
, Min _
, _
, _
) as hd
) :: tl
->
1333 let (drop
, tl
) = space_until_newline tl
in
1335 | hd
:: tl
when is_space hd
->
1336 space_until_newline tl
1338 space_until_newline tl
1339 | hd
:: tl
when is_newline hd
->
1345 | (T2
(_
, Min _
, _
, _
) as x) :: tl
1346 when str_of_token2 x =$
= "}" ->
1347 let (drop
, tl
) = space_until_newline tl
in
1349 | hd
:: tl
when is_whitespace hd
->
1350 let (drop
, tl
) = loop tl
in
1355 | _
-> (false, toks) in
1357 let (_
, toks) = loop toks in
1364 (*****************************************************************************)
1365 (* Final unparsing (and debugging support) *)
1366 (*****************************************************************************)
1369 type kind_token2
= KFake
| KCocci
| KC
| KExpanded
| KOrigin
1371 let kind_of_token2 = function
1373 | Cocci2 _
-> KCocci
1377 (match TH.pinfo_of_tok t
with
1378 | Ast_c.ExpandedTok _
-> KExpanded
1379 | Ast_c.OriginTok _
-> KOrigin
1380 | Ast_c.FakeTok _
-> raise
(Impossible
139) (* now a Fake2 *)
1381 | Ast_c.AbstractLineTok _
-> raise
(Impossible
140) (* now a KC *)
1383 | Unindent_cocci2 _
| Indent_cocci2
| EatSpace2
-> raise
(Impossible
141)
1387 let start_mark = function
1391 | KExpanded
-> "!E!"
1394 let print_all_tokens2 pr
xs =
1395 if !Flag_parsing_c.debug_unparsing
1397 let current_kind = ref KOrigin
in
1398 xs +> List.iter
(fun t
->
1399 let newkind = kind_of_token2 t
in
1400 if newkind =*= !current_kind
1401 then pr
(str_of_token2 t
)
1405 pr
(start_mark newkind);
1406 pr
(str_of_token2 t
);
1407 current_kind := newkind
1411 let to_whitespace s =
1412 let r = String.copy
s in
1413 for i
= 1 to String.length
r do
1414 let c = String.get
r (i
-1) in
1416 | ' '
| '
\t'
| '
\r'
| '
\n'
-> ()
1417 | _
-> String.set
r (i
-1) ' '
1420 let hiding_level = ref 0 in
1421 let handle_token t
=
1422 let s = str_of_token2 t
in
1426 let i = TH.info_of_tok t
in
1427 (match Ast_c.get_annot_info
i Token_annot.Exclude_start
with
1429 | Some _
-> hiding_level := !hiding_level + 1
1431 let hide_current = !hiding_level > 0 in
1432 (match Ast_c.get_annot_info
i Token_annot.Exclude_end
with
1434 | Some _
-> hiding_level := max
(!hiding_level - 1) 0
1437 | _
-> !hiding_level > 0 in
1438 if hide_current then to_whitespace s else s in
1439 xs +> List.iter
(fun x -> pr
(handle_token x))
1445 (*****************************************************************************)
1447 (*****************************************************************************)
1449 (* old: PPviatok was made in the beginning to allow to pretty print a
1450 * complete C file, including a modified C file by transformation.ml,
1451 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1452 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1453 * fancy stuff when a function was not modified at all. Just need to
1454 * print the list of token as-is. But now pretty_print_c.ml handles
1455 * almost everything so maybe less useful. Maybe PPviatok allows to
1456 * optimize a little the pretty printing.
1458 * update: now have PPviastr which goes even faster than PPviatok, so
1459 * PPviatok has disappeared.
1462 type ppmethod
= PPnormal
| PPviastr
1467 (* The pp_program function will call pretty_print_c.ml with a special
1468 * function to print the leaf components, the tokens. When we want to
1469 * print a token, we need to print also maybe the space and comments that
1470 * were close to it in the original file (and that was omitted during the
1471 * parsing phase), and honor what the cocci-info attached to the token says.
1472 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1473 * print it and also print some cocci-code attached in a PLUS to it.
1474 * So we will also maybe call unparse_cocci. Because the cocci-code may
1475 * contain metavariables, unparse_cocci will in fact sometimes call back
1476 * pretty_print_c (which will this time don't call back again unparse_cocci)
1479 let pp_program2 xs outfile
=
1480 with_open_outfile outfile
(fun (pr
,chan
) ->
1482 if !Flag_parsing_c.debug_unparsing
1483 then begin pr2_no_nl
s; flush stderr
end
1486 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1489 xs +> List.iter
(fun ((e
,(str, toks_e
)), ppmethod
) ->
1490 (* here can still work on ast *)
1491 let e = remove_useless_fakeInfo_struct e in
1495 (* now work on tokens *)
1496 (* phase1: just get all the tokens, all the information *)
1497 assert(toks_e
+> List.for_all
(fun t
->
1498 TH.is_origin t
or TH.is_expanded t
1500 let toks = get_fakeInfo_and_tokens e toks_e
in
1501 let toks = displace_fake_nodes toks in
1502 (* assert Origin;ExpandedTok;Faketok *)
1503 let toks = expand_mcode toks in
1505 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1506 * and no tag information, just NOTHING. *)
1509 if !Flag.sgrep_mode2
1511 (* nothing else to do for sgrep *)
1512 drop_expanded(drop_fake(drop_minus toks))
1515 (* phase2: can now start to filter and adjust *)
1516 let (toks,tu) = adjust_indentation toks in
1517 let toks = adjust_eat_space toks in
1518 let toks = adjust_before_semicolon toks in(*before remove minus*)
1519 let toks = adjust_after_paren toks in(*also before remove minus*)
1520 let toks = drop_space_at_endline toks in
1521 let toks = paren_to_space toks in
1522 let toks = drop_end_comma toks in
1524 let toks = drop_line toks in
1526 let toks = remove_minus_and_between_and_expanded_and_fake toks in
1527 (* assert Origin + Cocci + C and no minus *)
1528 let toks = add_space toks in
1529 let toks = add_newlines toks tu in
1530 let toks = paren_then_brace toks in
1531 let toks = fix_tokens toks in
1535 (* in theory here could reparse and rework the ast! or
1536 * apply some SP. Not before cos julia may have generated
1537 * not parsable file. Need do unparsing_tricks call before
1538 * being ready to reparse. *)
1539 print_all_tokens2 pr toks;
1541 | PPviastr
-> pr str
1545 let pp_program a b =
1546 profile_code
"C unparsing" (fun () -> pp_program2 a b)
1549 let pp_program_default xs outfile
=
1550 let xs'
= xs +> List.map
(fun x -> x, PPnormal
) in
1551 pp_program xs' outfile