Release coccinelle-0.1.10
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
1 (* Yoann Padioleau, Julia Lawall
2 *
3 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *
14 *
15 * Modifications by Julia Lawall for better newline handling.
16 *)
17 open Common
18
19 open Ast_c
20
21 module TH = Token_helpers
22
23
24 (* should keep comments and directives in between adjacent deleted terms,
25 but not comments and directives within deleted terms. should use the
26 labels found in the control-flow graph *)
27
28
29
30 (*****************************************************************************)
31 (* Wrappers *)
32 (*****************************************************************************)
33 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
34
35 (*****************************************************************************)
36 (* Types used during the intermediate phases of the unparsing *)
37 (*****************************************************************************)
38
39 type token1 =
40 | Fake1 of info
41 | T1 of Parser_c.token
42
43 (* The cocci_tag of the token should always be a NOTHING. The mark of
44 * the token can only be OriginTok or ExpandedTok. Why not get rid of
45 * token and get something simpler ? because we need to know if the
46 * info is a TCommentCpp or TCommentSpace, etc for some of the further
47 * analysis so easier to keep with the token.
48 *
49 * This type contains the whole information. Have all the tokens with this
50 * type.
51 *)
52 type min =
53 Min of (int list (* match numbers *) * int (* adjacency information *))
54 | Ctx
55
56 type token2 =
57 | T2 of Parser_c.token * min *
58 int option (* orig index, abstracting away comments and space *)
59 | Fake2
60 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
61 | C2 of string
62 | Indent_cocci2
63 | Unindent_cocci2
64
65 (* not used yet *)
66 type token3 =
67 | T3 of Parser_c.token
68 | Cocci3 of string
69 | C3 of string
70
71
72 (* similar to the tech in parsing_hack *)
73 type token_extended = {
74 tok2 : token2;
75 str : string;
76 idx: int option; (* to know if 2 tokens were consecutive in orig file *)
77 mutable new_tokens_before : token2 list;
78 mutable remove : bool;
79 }
80
81
82 (*****************************************************************************)
83 (* Helpers *)
84 (*****************************************************************************)
85
86 let info_of_token1 t =
87 match t with
88 | Fake1 info -> info
89 | T1 tok -> TH.info_of_tok tok
90
91 let print_token1 = function
92 T1 tok -> TH.str_of_tok tok
93 | Fake1 info -> "fake"
94
95 let str_of_token2 = function
96 | T2 (t,_,_) -> TH.str_of_tok t
97 | Fake2 -> ""
98 | Cocci2 (s,_,_,_) -> s
99 | C2 s -> s
100 | Indent_cocci2 -> ""
101 | Unindent_cocci2 -> ""
102
103 let print_token2 = function
104 | T2 (t,b,_) ->
105 let b_str =
106 match b with
107 Min (index,adj) ->
108 Printf.sprintf "-%d[%s]" adj
109 (String.concat " " (List.map string_of_int index))
110 | Ctx -> "" in
111 "T2:"^b_str^TH.str_of_tok t
112 | Fake2 -> "fake"
113 | Cocci2 (s,_,lc,rc) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
114 | C2 s -> "C2:"^s
115 | Indent_cocci2 -> "Indent"
116 | Unindent_cocci2 -> "Unindent"
117
118 let simple_print_all_tokens1 l =
119 List.iter (function x -> Printf.printf "%s " (print_token1 x)) l;
120 Printf.printf "\n"
121
122 let simple_print_all_tokens2 l =
123 List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
124 Printf.printf "\n"
125
126 let str_of_token3 = function
127 | T3 t -> TH.str_of_tok t
128 | Cocci3 s | C3 s -> s
129
130
131
132 let mk_token_extended x =
133 let origidx =
134 match x with
135 | T2 (_,_, idx) -> idx
136 | _ -> None
137 in
138 { tok2 = x;
139 str = str_of_token2 x;
140 idx = origidx;
141 new_tokens_before = [];
142 remove = false;
143 }
144
145 let rebuild_tokens_extented toks_ext =
146 let _tokens = ref [] in
147 toks_ext +> List.iter (fun tok ->
148 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
149 if not tok.remove then push2 tok.tok2 _tokens;
150 );
151 let tokens = List.rev !_tokens in
152 (tokens +> List.map mk_token_extended)
153
154
155 let mcode_contain_plus = function
156 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
157 | Ast_cocci.CONTEXT _ -> true
158 (* patch: when need full coccinelle transformation *)
159 | Ast_cocci.MINUS (_,_,_,[]) -> false
160 | Ast_cocci.MINUS (_,_,_,x::xs) -> true
161 | Ast_cocci.PLUS -> raise Impossible
162
163 let contain_plus info =
164 let mck = Ast_c.mcode_of_info info in
165 mcode_contain_plus mck
166
167 (*****************************************************************************)
168 (* Last fix on the ast *)
169 (*****************************************************************************)
170
171 (* Because of the ugly trick to handle initialiser, I generate fake ','
172 * for the last initializer element, but if there is nothing around it,
173 * I don't want in the end to print it.
174 *)
175
176 let remove_useless_fakeInfo_struct program =
177 let bigf = { Visitor_c.default_visitor_c_s with
178 Visitor_c.kini_s = (fun (k,bigf) ini ->
179 match k ini with
180 | InitList args, ii ->
181 (match ii with
182 | [_i1;_i2] -> ini
183 | [i1;i2;iicommaopt] ->
184 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
185 && (Ast_c.is_fake iicommaopt)
186 (* sometimes the guy put a normal iicommaopt *)
187 then InitList args, [i1;i2]
188 else InitList args, [i1;i2;iicommaopt]
189 | [i1;i2;iicommaopt;end_comma_opt] ->
190 (* only in #define. end_comma_opt canot be fake *)
191 (* not sure if this will be considered ambiguous with a previous
192 case? *)
193 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
194 && (Ast_c.is_fake iicommaopt)
195 (* sometimes the guy put a normal iicommaopt *)
196 then InitList args, [i1;i2;end_comma_opt]
197 else InitList args, [i1;i2;iicommaopt;end_comma_opt]
198 | _ -> raise Impossible
199 )
200 | x -> x
201 )
202 } in
203 Visitor_c.vk_toplevel_s bigf program
204
205
206 (*****************************************************************************)
207 (* Tokens1 generation *)
208 (*****************************************************************************)
209
210 let get_fakeInfo_and_tokens celem toks =
211 let toks_in = ref toks in
212 let toks_out = ref [] in
213
214 (* todo? verify good order of position ? *)
215 let pr_elem info =
216 match Ast_c.pinfo_of_info info with
217 | FakeTok _ ->
218 Common.push2 (Fake1 info) toks_out
219 | OriginTok _ | ExpandedTok _ ->
220 (* get the associated comments/space/cppcomment tokens *)
221 let (before, x, after) =
222 !toks_in +> Common.split_when (fun tok ->
223 info =*= TH.info_of_tok tok)
224 in
225 assert(info =*= TH.info_of_tok x);
226 (*old: assert(before +> List.for_all (TH.is_comment)); *)
227 before +> List.iter (fun x ->
228 if not (TH.is_comment x)
229 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
230 (* case such as int asm d3("x"); not yet in ast *)
231 );
232 before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
233 push2 (T1 x) toks_out;
234 toks_in := after;
235 | AbstractLineTok _ ->
236 (* can be called on type info when for instance use -type_c *)
237 if !Flag_parsing_c.pretty_print_type_info
238 then Common.push2 (Fake1 info) toks_out
239 else raise Impossible (* at this stage *)
240 in
241
242 let pr_space _ = () in (* use the spacing that is there already *)
243
244 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
245
246 if not (null !toks_in)
247 then failwith "WEIRD: unparsing not finished";
248
249 List.rev !toks_out
250
251 (* Fake nodes that have BEFORE code should be moved over any subsequent
252 whitespace and newlines, but not any comments, to get as close to the affected
253 code as possible. Similarly, fake nodes that have AFTER code should be moved
254 backwards. No fake nodes should have both before and after code. *)
255
256 let displace_fake_nodes toks =
257 let is_fake = function Fake1 _ -> true | _ -> false in
258 let is_whitespace = function
259 T1(Parser_c.TCommentSpace _)
260 (* patch: cocci *)
261 | T1(Parser_c.TCommentNewline _) -> true
262 | _ -> false in
263 let rec loop toks =
264 let fake_info =
265 try Some (Common.split_when is_fake toks)
266 with Not_found -> None in
267 match fake_info with
268 Some(bef,((Fake1 info) as fake),aft) ->
269 (match !(info.cocci_tag) with
270 | Some x ->
271 (match x with
272 (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
273 (* move the fake node forwards *)
274 let (whitespace,rest) = Common.span is_whitespace aft in
275 bef @ whitespace @ fake :: (loop rest)
276 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
277 (* move the fake node backwards *)
278 let revbef = List.rev bef in
279 let (revwhitespace,revprev) = Common.span is_whitespace revbef in
280 let whitespace = List.rev revwhitespace in
281 let prev = List.rev revprev in
282 prev @ fake :: (loop (whitespace @ aft))
283 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
284 bef @ fake :: (loop aft)
285 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
286 failwith "fake node should not be before-after"
287 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
288 )
289 | None ->
290 bef @ fake :: (loop aft)
291 )
292 | None -> toks
293 | _ -> raise Impossible in
294 loop toks
295
296 (*****************************************************************************)
297 (* Tokens2 generation *)
298 (*****************************************************************************)
299
300 let comment2t2 = function
301 (Token_c.TCommentCpp x,(info : Token_c.info)) ->
302 C2("\n"^info.Common.str^"\n")
303 | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
304
305 let expand_mcode toks =
306 let toks_out = ref [] in
307
308 let index = ref 0 in
309
310 let add_elem t minus =
311 match t with
312 | Fake1 info ->
313 let str = Ast_c.str_of_info info in
314 if str =$= ""
315 then push2 (Fake2) toks_out
316 (* perhaps the fake ',' *)
317 else push2 (C2 str) toks_out
318
319
320 | T1 tok ->
321 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
322 (* no tag on expandedTok ! *)
323 let modified = function
324 None -> false
325 | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
326 | _ -> true in
327 (if (TH.is_expanded tok &&
328 modified !((TH.info_of_tok tok).cocci_tag)
329 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
330 then
331 failwith
332 (Printf.sprintf
333 "expanded token %s on line %d is either modified or stored in a metavariable"
334 (TH.str_of_tok tok) (TH.line_of_tok tok)));
335
336 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
337 { i with cocci_tag = ref Ast_c.emptyAnnot; }
338 ) in
339
340 let optindex =
341 if TH.is_origin tok && not (TH.is_real_comment tok)
342 then begin
343 incr index;
344 Some !index
345 end
346 else None
347 in
348
349 push2 (T2 (tok', minus, optindex)) toks_out
350 in
351
352 let expand_info t =
353 let (mcode,env) =
354 Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
355
356 let pr_cocci s ln col rcol =
357 push2 (Cocci2(s,ln,col,rcol)) toks_out in
358 let pr_c info =
359 (match Ast_c.pinfo_of_info info with
360 Ast_c.AbstractLineTok _ ->
361 push2 (C2 (Ast_c.str_of_info info)) toks_out
362 | Ast_c.FakeTok (s,_) ->
363 push2 (C2 s) toks_out
364 | _ ->
365 Printf.printf "line: %s\n" (Common.dump info);
366 failwith "not an abstract line");
367 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
368 List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
369
370 let pr_barrier ln col = (* marks a position, used around C code *)
371 push2 (Cocci2("",ln,col,col)) toks_out in
372 let pr_nobarrier ln col = () in (* not needed for linux spacing *)
373
374 let pr_cspace _ = push2 (C2 " ") toks_out in
375
376 let pr_space _ = () (* rely on add_space in cocci code *) in
377 let pr_arity _ = () (* not interested *) in
378
379 let indent _ = push2 Indent_cocci2 toks_out in
380 let unindent _ = push2 Unindent_cocci2 toks_out in
381
382 let args_pp =
383 (env, pr_cocci, pr_c, pr_cspace,
384 (match !Flag_parsing_c.spacing with
385 Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
386 pr_arity,
387 (match !Flag_parsing_c.spacing with
388 Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
389 indent, unindent) in
390
391 (* old: when for yacfe with partial cocci:
392 * add_elem t false;
393 *)
394
395 (* patch: when need full coccinelle transformation *)
396 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
397 match mcode with
398 | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
399 (* Why adding ? because I want to have all the information, the whole
400 * set of tokens, so I can then process and remove the
401 * is_between_two_minus for instance *)
402 add_elem t (Min (inst,adj));
403 unparser any_xxs Unparse_cocci.InPlace
404 | Ast_cocci.CONTEXT (_,any_befaft) ->
405 (match any_befaft with
406 | Ast_cocci.NOTHING ->
407 add_elem t Ctx
408 | Ast_cocci.BEFORE xxs ->
409 unparser xxs Unparse_cocci.Before;
410 add_elem t Ctx
411 | Ast_cocci.AFTER xxs ->
412 add_elem t Ctx;
413 unparser xxs Unparse_cocci.After;
414 | Ast_cocci.BEFOREAFTER (xxs, yys) ->
415 unparser xxs Unparse_cocci.Before;
416 add_elem t Ctx;
417 unparser yys Unparse_cocci.After;
418 )
419 | Ast_cocci.PLUS -> raise Impossible
420
421 in
422
423 toks +> List.iter expand_info;
424 List.rev !toks_out
425
426
427 (*****************************************************************************)
428 (* Tokens2 processing, filtering, adjusting *)
429 (*****************************************************************************)
430
431 let is_space = function
432 | T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *)
433 | _ -> false
434
435 let is_newline = function
436 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
437 | _ -> false
438
439 let is_whitespace = function
440 | (T2 (t,_b,_i)) ->
441 (match t with
442 | Parser_c.TCommentSpace _ -> true (* only whitespace *)
443 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
444 | _ -> false
445 )
446 | _ -> false
447
448 let is_minusable_comment = function
449 | (T2 (t,_b,_i)) ->
450 (match t with
451 | Parser_c.TCommentSpace _ (* only whitespace *)
452 (* patch: coccinelle *)
453 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
454 | Parser_c.TComment _
455 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
456 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
457 | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
458 -> true
459
460 | Parser_c.TCommentMisc _
461 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
462 -> false
463
464 | _ -> false
465 )
466 | _ -> false
467
468 let is_minusable_comment_nocpp = function
469 | (T2 (t,_b,_i)) ->
470 (match t with
471 | Parser_c.TCommentSpace _ (* only whitespace *)
472 (* patch: coccinelle *)
473 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
474 | Parser_c.TComment _ -> true
475 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
476 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
477 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
478 -> false
479
480 | Parser_c.TCommentMisc _
481 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
482 -> false
483
484 | _ -> false
485 )
486 | _ -> false
487
488 let all_coccis = function
489 Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true
490 | _ -> false
491
492 (*previously gave up if the first character was a newline, but not clear why*)
493 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
494
495 let set_minus_comment adj = function
496 | T2 (t,Ctx,idx) ->
497 let str = TH.str_of_tok t in
498 (match t with
499 | Parser_c.TCommentSpace _
500 (* patch: coccinelle *)
501 | Parser_c.TCommentNewline _ -> ()
502
503 | Parser_c.TComment _
504 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
505 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
506 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
507 ->
508 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
509 (TH.line_of_tok t) str)
510 | _ -> raise Impossible
511 );
512 T2 (t, Min adj, idx)
513 (* patch: coccinelle *)
514 | T2 (t,Min adj,idx) as x -> x
515 | _ -> raise Impossible
516
517 let set_minus_comment_or_plus adj = function
518 Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x
519 | x -> set_minus_comment adj x
520
521 let drop_minus xs =
522 xs +> Common.exclude (function
523 | T2 (t,Min adj,_) -> true
524 | _ -> false
525 )
526
527 let remove_minus_and_between_and_expanded_and_fake xs =
528
529 (* get rid of exampled and fake tok *)
530 let xs = xs +> Common.exclude (function
531 | T2 (t,_,_) when TH.is_expanded t -> true
532 | Fake2 -> true
533
534 | _ -> false
535 )
536 in
537
538 let minus_or_comment = function
539 T2(_,Min adj,_) -> true
540 | x -> is_minusable_comment x in
541
542 let minus_or_comment_nocpp = function
543 T2(_,Min adj,_) -> true
544 | x -> is_minusable_comment_nocpp x in
545
546 let common_adj (index1,adj1) (index2,adj2) =
547 adj1 = adj2 (* same adjacency info *) &&
548 (* non-empty intersection of witness trees *)
549 not ((Common.inter_set index1 index2) = []) in
550
551 let rec adjust_around_minus = function
552 [] -> []
553 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
554 (((T2(_,Min adj,_))::_) as rest) ->
555 (* an initial newline, as in a replaced statement *)
556 let (between_minus,rest) = Common.span minus_or_comment rest in
557 (match rest with
558 [] -> (set_minus_comment adj x) ::
559 (List.map (set_minus_comment adj) between_minus)
560 | T2(_,Ctx,_)::_ when is_newline (List.hd(List.rev between_minus)) ->
561 (set_minus_comment adj x)::(adjust_within_minus between_minus) @
562 (adjust_around_minus rest)
563 | _ ->
564 x :: (adjust_within_minus between_minus) @
565 (adjust_around_minus rest))
566 | ((T2(_,Min adj,_))::_) as rest ->
567 (* no initial newline, as in a replaced expression *)
568 let (between_minus,rest) = Common.span minus_or_comment rest in
569 (match rest with
570 [] ->
571 (List.map (set_minus_comment adj) between_minus)
572 | _ ->
573 (adjust_within_minus between_minus) @
574 (adjust_around_minus rest))
575 | x::xs -> x::adjust_around_minus xs
576 and adjust_within_minus = function
577 [] -> []
578 | (T2(_,Min adj1,_) as t1)::xs ->
579 let (between_minus,rest) = Common.span is_minusable_comment xs in
580 (match rest with
581 [] ->
582 (* keep last newline *)
583 let (drop,keep) =
584 try
585 let (drop,nl,keep) =
586 Common.split_when is_newline between_minus in
587 (drop, nl :: keep)
588 with Not_found -> (between_minus,[]) in
589 t1 ::
590 List.map (set_minus_comment_or_plus adj1) drop @
591 keep
592 | (T2(_,Min adj2,_) as t2)::rest when common_adj adj1 adj2 ->
593 t1::
594 List.map (set_minus_comment_or_plus adj1) between_minus @
595 adjust_within_minus (t2::rest)
596 | x::xs ->
597 t1::(between_minus @ adjust_within_minus (x::xs)))
598 | _ -> failwith "only minus and space possible" in
599
600 (* new idea: collects regions not containing non-space context code
601 if two adjacent adjacent minus tokens satisfy common_adj then delete
602 all spaces, comments etc between them
603 if two adjacent minus tokens do not satisfy common_adj only delete
604 the spaces between them if there are no comments, etc.
605 if the region contain no plus code and is both preceded and followed
606 by a newline, delete the initial newline. *)
607
608 let rec adjust_around_minus = function
609 [] -> []
610 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
611 (T2(_,Min adj1,_) as t1)::xs ->
612 let (minus_list,rest) = Common.span not_context (t1::xs) in
613 let contains_plus = List.exists is_plus minus_list in
614 let x =
615 match List.rev minus_list with
616 (T2(Parser_c.TCommentNewline c,_b,_i))::rest
617 when List.for_all minus_or_comment minus_list ->
618 set_minus_comment_or_plus adj1 x
619 | _ -> x in
620 x :: adjust_within_minus contains_plus minus_list @
621 adjust_around_minus rest
622 | (T2(_,Min adj1,_) as t1)::xs ->
623 let (minus_list,rest) = Common.span not_context (t1::xs) in
624 let contains_plus = List.exists is_plus minus_list in
625 adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
626 | x::xs -> x :: adjust_around_minus xs
627 and adjust_within_minus cp (* contains plus *) = function
628 (T2(_,Min adj1,_) as t1)::xs ->
629 let not_minus = function T2(_,Min _,_) -> false | _ -> true in
630 let (not_minus_list,rest) = Common.span not_minus xs in
631 t1 ::
632 (match rest with
633 (T2(_,Min adj2,_) as t2)::xs when common_adj adj1 adj2 ->
634 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
635 @ (adjust_within_minus cp (t2::xs))
636 | (T2(_,Min adj2,_) as t2)::xs ->
637 let is_whitespace_or_plus = function
638 (T2 _) as x -> is_space x
639 | _ -> true (*plus*) in
640 if List.for_all is_whitespace_or_plus not_minus_list
641 then
642 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
643 @ (adjust_within_minus cp (t2::xs))
644 else not_minus_list @ (adjust_within_minus cp (t2::xs))
645 | _ ->
646 if cp
647 then xs
648 else
649 let (spaces,rest) = Common.span is_space xs in
650 (List.map (set_minus_comment_or_plus adj1) spaces)
651 @ rest)
652 | xs -> failwith "should always start with minus"
653 and not_context = function
654 (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
655 | _ -> true
656 and is_plus = function
657 C2 _ | Cocci2 _ -> true
658 | _ -> false in
659
660 let xs = adjust_around_minus xs in
661
662 (* this drops blank lines after a brace introduced by removing code *)
663 let minus_or_comment_nonl = function
664 T2(_,Min adj,_) -> true
665 | T2(Parser_c.TCommentNewline _,_b,_i) -> false
666 | x -> is_minusable_comment x in
667
668 let rec adjust_after_brace = function
669 [] -> []
670 | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
671 when str_of_token2 x =$= "{" ->
672 let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
673 let is_whitespace = function
674 T2(Parser_c.TCommentSpace _,_b,_i)
675 (* patch: cocci *)
676 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
677 | _ -> false in
678 let (newlines,rest) = Common.span is_whitespace rest in
679 let (drop_newlines,last_newline) =
680 let rec loop = function
681 [] -> ([],[])
682 | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
683 (List.rev rest,[x])
684 | x::xs ->
685 let (drop_newlines,last_newline) = loop xs in
686 (drop_newlines,x::last_newline) in
687 loop (List.rev newlines) in
688 x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
689 last_newline@
690 adjust_after_brace rest
691 | x::xs -> x::adjust_after_brace xs in
692
693 let xs = adjust_after_brace xs in
694
695 (* search backwards from context } over spaces until reaching a newline.
696 then go back over all minus code until reaching some context or + code.
697 get rid of all intervening spaces, newlines, and comments
698 input is reversed *)
699 let rec adjust_before_brace = function
700 [] -> []
701 | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
702 let (outer_spaces,rest) = Common.span is_space xs in
703 x :: outer_spaces @
704 (match rest with
705 ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
706 (* the rest of this code is the same as from_newline below
707 but merging them seems to be error prone... *)
708 ((T2 (t, Min adj, idx)) as m) :: rest ->
709 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
710 h :: m ::
711 (List.map (set_minus_comment adj) spaces) @
712 (adjust_before_brace rest)
713 | _ -> adjust_before_brace rest)
714 | x::xs -> x :: (adjust_before_brace xs) in
715
716 let from_newline = function
717 ((T2 (t, Min adj, idx)) as m) :: rest ->
718 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
719 m ::
720 (List.map (set_minus_comment adj) spaces) @
721 (adjust_before_brace rest)
722 | rest -> adjust_before_brace rest in
723
724 let xs = List.rev (from_newline (List.rev xs)) in
725 let xs = drop_minus xs in
726 xs
727
728 (* normally, in C code, a semicolon is not preceded by a space or newline *)
729 let adjust_before_semicolon toks =
730 let toks = List.rev toks in
731 let rec search_semic = function
732 [] -> []
733 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
734 if List.mem (str_of_token2 x) [";";")";","]
735 then x :: search_minus false xs
736 else x :: search_semic xs
737 | x::xs -> x :: search_semic xs
738 and search_minus seen_minus xs =
739 let (spaces, rest) = Common.span is_space xs in
740 (* only delete spaces if something is actually deleted *)
741 match rest with
742 ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
743 | _ -> if seen_minus then rest else xs in
744 List.rev (search_semic toks)
745
746 let is_ident_like s = s ==~ Common.regexp_alpha
747
748 let rec add_space xs =
749 match xs with
750 | [] -> []
751 | [x] -> [x]
752 | (Cocci2(sx,lnx,_,rcolx) as x)::((Cocci2(sy,lny,lcoly,_)) as y)::xs
753 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
754 not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
755 (* this only works within a line. could consider whether
756 something should be done to add newlines too, rather than
757 printing them explicitly in unparse_cocci. *)
758 x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
759 | x::y::xs ->
760 let sx = str_of_token2 x in
761 let sy = str_of_token2 y in
762 if is_ident_like sx && is_ident_like sy
763 then x::C2 " "::(add_space (y::xs))
764 else x::(add_space (y::xs))
765
766
767
768 (* When insert some new code, because of a + in a SP, we must add this
769 * code at the right place, with the good indentation. So each time we
770 * encounter some spacing info, with some newline, we maintain the
771 * current indentation level used.
772 *
773 * TODO problems: not accurate. ex: TODO
774 *
775 * TODO: if in #define region, should add a \ \n
776 *)
777 let new_tabbing2 space =
778 (list_of_string space)
779 +> List.rev
780 +> Common.take_until (fun c -> c =<= '\n')
781 +> List.rev
782 +> List.map string_of_char
783 +> String.concat ""
784
785 let new_tabbing a =
786 Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
787
788
789 let rec adjust_indentation xs =
790 let _current_tabbing = ref "" in
791 let tabbing_unit = ref None in
792
793 let string_of_list l = String.concat "" (List.map string_of_char l) in
794
795 (* try to pick a tabbing unit for the plus code *)
796 let adjust_tabbing_unit old_tab new_tab =
797 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
798 then
799 let old_tab = list_of_string old_tab in
800 let new_tab = list_of_string new_tab in
801 let rec loop = function
802 ([],new_tab) ->
803 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
804 | (_,[]) -> failwith "not possible"
805 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
806 loop (old_tab,new_tab) in
807
808 let remtab tu current_tab =
809 let current_tab = List.rev(list_of_string current_tab) in
810 let rec loop = function
811 ([],new_tab) -> string_of_list (List.rev new_tab)
812 | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
813 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
814 | (_,ns) -> (* mismatch; remove what we can *)
815 string_of_list (List.rev ns) in
816 loop (tu,current_tab) in
817
818 let rec find_first_tab started = function
819 [] -> ()
820 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
821 find_first_tab true xs
822 (* patch: coccinelle *)
823 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
824 when started ->
825 let s = str_of_token2 x +> new_tabbing in
826 tabbing_unit := Some (s,List.rev (list_of_string s))
827 | x::xs -> find_first_tab started xs in
828 find_first_tab false xs;
829
830 let rec aux started xs =
831 match xs with
832 | [] -> []
833 (* patch: coccinelle *)
834 | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
835 ((Cocci2 ("{",_,_,_)) as a)::xs
836 when started && str_of_token2 x =$= ")" ->
837 (* to be done for if, etc, but not for a function header *)
838 x::(C2 " ")::a::(aux started xs)
839 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
840 let old_tabbing = !_current_tabbing in
841 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
842 (* only trust the indentation after the first { *)
843 (if started then adjust_tabbing_unit old_tabbing !_current_tabbing);
844 let coccis_rest = Common.span all_coccis xs in
845 (match coccis_rest with
846 (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
847 (* the case where cocci code has been added before a close } *)
848 x::aux started (Indent_cocci2::xs)
849 | _ -> x::aux started xs)
850 | Indent_cocci2::xs ->
851 (match !tabbing_unit with
852 None -> aux started xs
853 | Some (tu,_) ->
854 _current_tabbing := (!_current_tabbing)^tu;
855 Cocci2 (tu,-1,-1,-1)::aux started xs)
856 | Unindent_cocci2::xs ->
857 (match !tabbing_unit with
858 None -> aux started xs
859 | Some (_,tu) ->
860 _current_tabbing := remtab tu (!_current_tabbing);
861 aux started xs)
862 (* border between existing code and cocci code *)
863 | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_)) as y)::xs
864 when str_of_token2 x =$= "{" ->
865 x::aux true (y::Indent_cocci2::xs)
866 | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
867 when str_of_token2 y =$= "}" ->
868 x::aux started (y::Unindent_cocci2::xs)
869 (* starting the body of the function *)
870 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs
871 | ((Cocci2("{",_,_,_)) as a)::xs -> a::aux true xs
872 | ((Cocci2("\n",_,_,_)) as x)::xs ->
873 (* dont inline in expr because of weird eval order of ocaml *)
874 let s = !_current_tabbing in
875 x::Cocci2 (s,-1,-1,-1)::aux started xs
876 | x::xs -> x::aux started xs in
877 aux false xs
878
879
880 let rec find_paren_comma = function
881 | [] -> ()
882
883 (* do nothing if was like this in original file *)
884 | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
885 ::xs when p2 =|= p1 + 1 ->
886 find_paren_comma (x2::xs)
887
888 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
889 ::xs when p2 =|= p1 + 1 ->
890 find_paren_comma (x2::xs)
891
892 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
893 ::xs when p2 =|= p1 + 1 ->
894 find_paren_comma (x2::xs)
895
896 (* otherwise yes can adjust *)
897 | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
898 x2.remove <- true;
899 find_paren_comma (x2::xs)
900 | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
901 x1.remove <- true;
902 find_paren_comma (x2::xs)
903
904 | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
905 x1.remove <- true;
906 find_paren_comma (x2::xs)
907
908 | x::xs ->
909 find_paren_comma xs
910
911
912 let fix_tokens toks =
913 let toks = toks +> List.map mk_token_extended in
914
915 let cleaner = toks +> Common.exclude (function
916 | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
917 | _ -> false
918 ) in
919 find_paren_comma cleaner;
920
921 let toks = rebuild_tokens_extented toks in
922 toks +> List.map (fun x -> x.tok2)
923
924
925
926 (*****************************************************************************)
927 (* Final unparsing (and debugging support) *)
928 (*****************************************************************************)
929
930 (* for debugging *)
931 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
932
933 let kind_of_token2 = function
934 | Fake2 -> KFake
935 | Cocci2 _ -> KCocci
936 | C2 _ -> KC
937 | T2 (t,_,_) ->
938 (match TH.pinfo_of_tok t with
939 | ExpandedTok _ -> KExpanded
940 | OriginTok _ -> KOrigin
941 | FakeTok _ -> raise Impossible (* now a Fake2 *)
942 | AbstractLineTok _ -> raise Impossible (* now a KC *)
943 )
944 | Unindent_cocci2 | Indent_cocci2 -> raise Impossible
945
946 let end_mark = "!"
947
948 let start_mark = function
949 | KFake -> "!F!"
950 | KCocci -> "!S!"
951 | KC -> "!A!"
952 | KExpanded -> "!E!"
953 | KOrigin -> ""
954
955 let print_all_tokens2 pr xs =
956 if !Flag_parsing_c.debug_unparsing
957 then
958 let current_kind = ref KOrigin in
959 xs +> List.iter (fun t ->
960 let newkind = kind_of_token2 t in
961 if newkind =*= !current_kind
962 then pr (str_of_token2 t)
963 else begin
964 pr (end_mark);
965 pr (start_mark newkind);
966 pr (str_of_token2 t);
967 current_kind := newkind
968 end
969 );
970 else
971 xs +> List.iter (fun x -> pr (str_of_token2 x))
972
973
974
975
976 (*****************************************************************************)
977 (* Entry points *)
978 (*****************************************************************************)
979
980 (* old: PPviatok was made in the beginning to allow to pretty print a
981 * complete C file, including a modified C file by transformation.ml,
982 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
983 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
984 * fancy stuff when a function was not modified at all. Just need to
985 * print the list of token as-is. But now pretty_print_c.ml handles
986 * almost everything so maybe less useful. Maybe PPviatok allows to
987 * optimize a little the pretty printing.
988 *
989 * update: now have PPviastr which goes even faster than PPviatok, so
990 * PPviatok has disappeared.
991 *)
992
993 type ppmethod = PPnormal | PPviastr
994
995
996
997
998 (* The pp_program function will call pretty_print_c.ml with a special
999 * function to print the leaf components, the tokens. When we want to
1000 * print a token, we need to print also maybe the space and comments that
1001 * were close to it in the original file (and that was omitted during the
1002 * parsing phase), and honor what the cocci-info attached to the token says.
1003 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1004 * print it and also print some cocci-code attached in a PLUS to it.
1005 * So we will also maybe call unparse_cocci. Because the cocci-code may
1006 * contain metavariables, unparse_cocci will in fact sometimes call back
1007 * pretty_print_c (which will this time don't call back again unparse_cocci)
1008 *)
1009
1010 let pp_program2 xs outfile =
1011 Common.with_open_outfile outfile (fun (pr,chan) ->
1012 let pr s =
1013 if !Flag_parsing_c.debug_unparsing
1014 then begin pr2_no_nl s; flush stderr end
1015 else pr s
1016 (* flush chan; *)
1017 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1018 in
1019
1020 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
1021 (* here can still work on ast *)
1022 let e = remove_useless_fakeInfo_struct e in
1023
1024 match ppmethod with
1025 | PPnormal ->
1026 (* now work on tokens *)
1027
1028 (* phase1: just get all the tokens, all the information *)
1029 assert(toks_e +> List.for_all (fun t ->
1030 TH.is_origin t or TH.is_expanded t
1031 ));
1032 let toks = get_fakeInfo_and_tokens e toks_e in
1033 let toks = displace_fake_nodes toks in
1034 (* assert Origin;ExpandedTok;Faketok *)
1035 let toks = expand_mcode toks in
1036 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1037 * and no tag information, just NOTHING. *)
1038
1039 let toks =
1040 if !Flag.sgrep_mode2
1041 then drop_minus toks (* nothing to do for sgrep *)
1042 else
1043 (* phase2: can now start to filter and adjust *)
1044 let toks = adjust_indentation toks in
1045 let toks = adjust_before_semicolon toks in(*before remove minus*)
1046 let toks = remove_minus_and_between_and_expanded_and_fake toks in
1047 (* assert Origin + Cocci + C and no minus *)
1048 let toks = add_space toks in
1049 let toks = fix_tokens toks in
1050 toks in
1051
1052 (* in theory here could reparse and rework the ast! or
1053 * apply some SP. Not before cos julia may have generated
1054 * not parsable file. Need do unparsing_tricks call before being
1055 * ready to reparse. *)
1056 print_all_tokens2 pr toks;
1057
1058 | PPviastr -> pr str
1059 )
1060 )
1061
1062 let pp_program a b =
1063 Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1064
1065
1066 let pp_program_default xs outfile =
1067 let xs' = xs +> List.map (fun x -> x, PPnormal) in
1068 pp_program xs' outfile