Release coccinelle-0.2.3rc6
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
1 (* Yoann Padioleau, Julia Lawall
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *
15 *
16 * Modifications by Julia Lawall for better newline handling.
17 *)
18 open Common
19
20 open Ast_c
21
22 module TH = Token_helpers
23
24
25 (* should keep comments and directives in between adjacent deleted terms,
26 but not comments and directives within deleted terms. should use the
27 labels found in the control-flow graph *)
28
29
30
31 (*****************************************************************************)
32 (* Wrappers *)
33 (*****************************************************************************)
34 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
35
36 (*****************************************************************************)
37 (* Types used during the intermediate phases of the unparsing *)
38 (*****************************************************************************)
39
40 type token1 =
41 | Fake1 of info
42 | T1 of Parser_c.token
43
44 (* The cocci_tag of the token should always be a NOTHING. The mark of
45 * the token can only be OriginTok or ExpandedTok. Why not get rid of
46 * token and get something simpler ? because we need to know if the
47 * info is a TCommentCpp or TCommentSpace, etc for some of the further
48 * analysis so easier to keep with the token.
49 *
50 * This type contains the whole information. Have all the tokens with this
51 * type.
52 *)
53 type min =
54 Min of (int list (* match numbers *) * int (* adjacency information *))
55 | Ctx
56
57 type token2 =
58 | T2 of Parser_c.token * min *
59 int option (* orig index, abstracting away comments and space *)
60 | Fake2
61 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
62 * Unparse_cocci.nlhint option
63 | C2 of string
64 | Indent_cocci2
65 | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
66
67 (* not used yet *)
68 type token3 =
69 | T3 of Parser_c.token
70 | Cocci3 of string
71 | C3 of string
72
73
74 (* similar to the tech in parsing_hack *)
75 type token_extended = {
76 tok2 : token2;
77 str : string;
78 idx: int option; (* to know if 2 tokens were consecutive in orig file *)
79 mutable new_tokens_before : token2 list;
80 mutable remove : bool;
81 }
82
83
84 (*****************************************************************************)
85 (* Helpers *)
86 (*****************************************************************************)
87
88 let info_of_token1 t =
89 match t with
90 | Fake1 info -> info
91 | T1 tok -> TH.info_of_tok tok
92
93 let print_token1 = function
94 T1 tok -> TH.str_of_tok tok
95 | Fake1 info -> "fake"
96
97 let str_of_token2 = function
98 | T2 (t,_,_) -> TH.str_of_tok t
99 | Fake2 -> ""
100 | Cocci2 (s,_,_,_,_) -> s
101 | C2 s -> s
102 | Indent_cocci2 -> ""
103 | Unindent_cocci2 _ -> ""
104
105 let print_token2 = function
106 | T2 (t,b,_) ->
107 let b_str =
108 match b with
109 Min (index,adj) ->
110 Printf.sprintf "-%d[%s]" adj
111 (String.concat " " (List.map string_of_int index))
112 | Ctx -> "" in
113 "T2:"^b_str^TH.str_of_tok t
114 | Fake2 -> "fake"
115 | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
116 | C2 s -> "C2:"^s
117 | Indent_cocci2 -> "Indent"
118 | Unindent_cocci2 _ -> "Unindent"
119
120 let simple_print_all_tokens1 l =
121 List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l;
122 Printf.printf "\n"
123
124 let simple_print_all_tokens2 l =
125 List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
126 Printf.printf "\n"
127
128 let str_of_token3 = function
129 | T3 t -> TH.str_of_tok t
130 | Cocci3 s | C3 s -> s
131
132
133
134 let mk_token_extended x =
135 let origidx =
136 match x with
137 | T2 (_,_, idx) -> idx
138 | _ -> None
139 in
140 { tok2 = x;
141 str = str_of_token2 x;
142 idx = origidx;
143 new_tokens_before = [];
144 remove = false;
145 }
146
147 let rebuild_tokens_extented toks_ext =
148 let _tokens = ref [] in
149 toks_ext +> List.iter (fun tok ->
150 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
151 if not tok.remove then push2 tok.tok2 _tokens;
152 );
153 let tokens = List.rev !_tokens in
154 (tokens +> List.map mk_token_extended)
155
156
157 let mcode_contain_plus = function
158 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
159 | Ast_cocci.CONTEXT _ -> true
160 (* patch: when need full coccinelle transformation *)
161 | Ast_cocci.MINUS (_,_,_,[]) -> false
162 | Ast_cocci.MINUS (_,_,_,x::xs) -> true
163 | Ast_cocci.PLUS _ -> raise Impossible
164
165 let contain_plus info =
166 let mck = Ast_c.mcode_of_info info in
167 mcode_contain_plus mck
168
169 (*****************************************************************************)
170 (* Last fix on the ast *)
171 (*****************************************************************************)
172
173 (* Because of the ugly trick to handle initialiser, I generate fake ','
174 * for the last initializer element, but if there is nothing around it,
175 * I don't want in the end to print it.
176 *)
177
178 let remove_useless_fakeInfo_struct program =
179 let bigf = { Visitor_c.default_visitor_c_s with
180 Visitor_c.kini_s = (fun (k,bigf) ini ->
181 match k ini with
182 | InitList args, ii ->
183 (match ii with
184 | [_i1;_i2] -> ini
185 | [i1;i2;iicommaopt] ->
186 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
187 && (Ast_c.is_fake iicommaopt)
188 (* sometimes the guy put a normal iicommaopt *)
189 then InitList args, [i1;i2]
190 else InitList args, [i1;i2;iicommaopt]
191 | [i1;i2;iicommaopt;end_comma_opt] ->
192 (* only in #define. end_comma_opt canot be fake *)
193 (* not sure if this will be considered ambiguous with a previous
194 case? *)
195 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
196 && (Ast_c.is_fake iicommaopt)
197 (* sometimes the guy put a normal iicommaopt *)
198 then InitList args, [i1;i2;end_comma_opt]
199 else InitList args, [i1;i2;iicommaopt;end_comma_opt]
200 | _ -> raise Impossible
201 )
202 | x -> x
203 )
204 } in
205 Visitor_c.vk_toplevel_s bigf program
206
207
208 (*****************************************************************************)
209 (* Tokens1 generation *)
210 (*****************************************************************************)
211
212 let get_fakeInfo_and_tokens celem toks =
213 let toks_in = ref toks in
214 let toks_out = ref [] in
215
216 (* todo? verify good order of position ? *)
217 let pr_elem info =
218 match Ast_c.pinfo_of_info info with
219 | FakeTok _ ->
220 Common.push2 (Fake1 info) toks_out
221 | OriginTok _ | ExpandedTok _ ->
222 (* get the associated comments/space/cppcomment tokens *)
223 let (before, x, after) =
224 !toks_in +> Common.split_when (fun tok ->
225 info =*= TH.info_of_tok tok)
226 in
227 assert(info =*= TH.info_of_tok x);
228 (*old: assert(before +> List.for_all (TH.is_comment)); *)
229 before +> List.iter (fun x ->
230 if not (TH.is_comment x)
231 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
232 (* case such as int asm d3("x"); not yet in ast *)
233 );
234 before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
235 push2 (T1 x) toks_out;
236 toks_in := after;
237 | AbstractLineTok _ ->
238 (* can be called on type info when for instance use -type_c *)
239 if !Flag_parsing_c.pretty_print_type_info
240 then Common.push2 (Fake1 info) toks_out
241 else raise Impossible (* at this stage *)
242 in
243
244 let pr_space _ = () in (* use the spacing that is there already *)
245
246 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
247
248 if not (null !toks_in)
249 then failwith "WEIRD: unparsing not finished";
250
251 List.rev !toks_out
252
253 (* Fake nodes that have BEFORE code should be moved over any subsequent
254 whitespace and newlines, but not any comments, to get as close to the affected
255 code as possible. Similarly, fake nodes that have AFTER code should be moved
256 backwards. No fake nodes should have both before and after code. *)
257
258 let displace_fake_nodes toks =
259 let is_fake = function Fake1 _ -> true | _ -> false in
260 let is_whitespace = function
261 T1(Parser_c.TCommentSpace _)
262 (* patch: cocci *)
263 | T1(Parser_c.TCommentNewline _) -> true
264 | _ -> false in
265 let rec loop toks =
266 let fake_info =
267 try Some (Common.split_when is_fake toks)
268 with Not_found -> None in
269 match fake_info with
270 Some(bef,((Fake1 info) as fake),aft) ->
271 (match !(info.cocci_tag) with
272 | Some x ->
273 (match x with
274 (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
275 (* move the fake node forwards *)
276 let (whitespace,rest) = Common.span is_whitespace aft in
277 bef @ whitespace @ fake :: (loop rest)
278 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
279 (* move the fake node backwards *)
280 let revbef = List.rev bef in
281 let (revwhitespace,revprev) = Common.span is_whitespace revbef in
282 let whitespace = List.rev revwhitespace in
283 let prev = List.rev revprev in
284 prev @ fake :: (loop (whitespace @ aft))
285 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
286 bef @ fake :: (loop aft)
287 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
288 failwith "fake node should not be before-after"
289 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
290 )
291 | None ->
292 bef @ fake :: (loop aft)
293 )
294 | None -> toks
295 | _ -> raise Impossible in
296 loop toks
297
298 (*****************************************************************************)
299 (* Tokens2 generation *)
300 (*****************************************************************************)
301
302 let comment2t2 = function
303 (Token_c.TCommentCpp
304 (* not sure iif the following list is exhaustive or complete *)
305 (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
306 (info : Token_c.info)) ->
307 C2(info.Common.str)
308 | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
309 C2("\n"^info.Common.str^"\n")
310 | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
311
312 let expand_mcode toks =
313 let toks_out = ref [] in
314
315 let index = ref 0 in
316
317 let add_elem t minus =
318 match t with
319 | Fake1 info ->
320 let str = Ast_c.str_of_info info in
321 if str =$= ""
322 then push2 (Fake2) toks_out
323 (* perhaps the fake ',' *)
324 else push2 (C2 str) toks_out
325
326
327 | T1 tok ->
328 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
329 (* no tag on expandedTok ! *)
330 let modified = function
331 None -> false
332 | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
333 | _ -> true in
334 (if (TH.is_expanded tok &&
335 modified !((TH.info_of_tok tok).cocci_tag)
336 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
337 then
338 failwith
339 (Printf.sprintf
340 "expanded token %s on line %d is either modified or stored in a metavariable"
341 (TH.str_of_tok tok) (TH.line_of_tok tok)));
342
343 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
344 { i with cocci_tag = ref Ast_c.emptyAnnot; }
345 ) in
346
347 let optindex =
348 if TH.is_origin tok && not (TH.is_real_comment tok)
349 then begin
350 incr index;
351 Some !index
352 end
353 else None
354 in
355
356 push2 (T2 (tok', minus, optindex)) toks_out
357 in
358
359 let expand_info t =
360 let (mcode,env) =
361 Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
362
363 let pr_cocci s ln col rcol hint =
364 push2 (Cocci2(s,ln,col,rcol,hint)) toks_out in
365 let pr_c info =
366 (match Ast_c.pinfo_of_info info with
367 Ast_c.AbstractLineTok _ ->
368 push2 (C2 (Ast_c.str_of_info info)) toks_out
369 | Ast_c.FakeTok (s,_) ->
370 push2 (C2 s) toks_out
371 | _ ->
372 Printf.printf "line: %s\n" (Common.dump info);
373 failwith "not an abstract line");
374 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
375 List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
376
377 let pr_barrier ln col = (* marks a position, used around C code *)
378 push2 (Cocci2("",ln,col,col,None)) toks_out in
379 let pr_nobarrier ln col = () in (* not needed for linux spacing *)
380
381 let pr_cspace _ = push2 (C2 " ") toks_out in
382
383 let pr_space _ = () (* rely on add_space in cocci code *) in
384 let pr_arity _ = () (* not interested *) in
385
386 let indent _ = push2 Indent_cocci2 toks_out in
387 let unindent x = push2 (Unindent_cocci2 x) toks_out in
388
389 let args_pp =
390 (env, pr_cocci, pr_c, pr_cspace,
391 (match !Flag_parsing_c.spacing with
392 Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
393 pr_arity,
394 (match !Flag_parsing_c.spacing with
395 Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
396 indent, unindent) in
397
398 (* old: when for yacfe with partial cocci:
399 * add_elem t false;
400 *)
401
402 (* patch: when need full coccinelle transformation *)
403 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
404 match mcode with
405 | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
406 (* Why adding ? because I want to have all the information, the whole
407 * set of tokens, so I can then process and remove the
408 * is_between_two_minus for instance *)
409 add_elem t (Min (inst,adj));
410 unparser any_xxs Unparse_cocci.InPlace
411 | Ast_cocci.CONTEXT (_,any_befaft) ->
412 (match any_befaft with
413 | Ast_cocci.NOTHING ->
414 add_elem t Ctx
415 | Ast_cocci.BEFORE (xxs,_) ->
416 unparser xxs Unparse_cocci.Before;
417 add_elem t Ctx
418 | Ast_cocci.AFTER (xxs,_) ->
419 add_elem t Ctx;
420 unparser xxs Unparse_cocci.After;
421 | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
422 unparser xxs Unparse_cocci.Before;
423 add_elem t Ctx;
424 unparser yys Unparse_cocci.After;
425 )
426 | Ast_cocci.PLUS _ -> raise Impossible
427
428 in
429
430 toks +> List.iter expand_info;
431 List.rev !toks_out
432
433
434 (*****************************************************************************)
435 (* Tokens2 processing, filtering, adjusting *)
436 (*****************************************************************************)
437
438 let is_space = function
439 | T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *)
440 | _ -> false
441
442 let is_newline = function
443 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
444 | _ -> false
445
446 let is_whitespace = function
447 | (T2 (t,_b,_i)) ->
448 (match t with
449 | Parser_c.TCommentSpace _ -> true (* only whitespace *)
450 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
451 | _ -> false
452 )
453 | _ -> false
454
455 let is_minusable_comment = function
456 | (T2 (t,_b,_i)) ->
457 (match t with
458 | Parser_c.TCommentSpace _ (* only whitespace *)
459 (* patch: coccinelle *)
460 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
461 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
462 | Parser_c.TComment _
463 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
464 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
465 | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
466 -> true
467
468 | Parser_c.TCommentMisc _
469 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
470 -> false
471
472 | _ -> false
473 )
474 | _ -> false
475
476 let is_minusable_comment_nocpp = function
477 | (T2 (t,_b,_i)) ->
478 (match t with
479 | Parser_c.TCommentSpace _ (* only whitespace *)
480 (* patch: coccinelle *)
481 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
482 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
483 | Parser_c.TComment _ -> true
484 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
485 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
486 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
487 -> false
488
489 | Parser_c.TCommentMisc _
490 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
491 -> false
492
493 | _ -> false
494 )
495 | _ -> false
496
497 let all_coccis = function
498 Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 _ -> true
499 | _ -> false
500
501 (*previously gave up if the first character was a newline, but not clear why*)
502 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
503
504 let set_minus_comment adj = function
505 | T2 (t,Ctx,idx) ->
506 let str = TH.str_of_tok t in
507 (match t with
508 | Parser_c.TCommentSpace _
509 (* patch: coccinelle *)
510 | Parser_c.TCommentNewline _ -> ()
511
512 | Parser_c.TComment _
513 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
514 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
515 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
516 ->
517 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
518 (TH.line_of_tok t) str)
519 | _ -> raise Impossible
520 );
521 T2 (t, Min adj, idx)
522 (* patch: coccinelle *)
523 | T2 (t,Min adj,idx) as x -> x
524 | _ -> raise Impossible
525
526 let set_minus_comment_or_plus adj = function
527 Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x
528 | x -> set_minus_comment adj x
529
530 let drop_minus xs =
531 xs +> Common.exclude (function
532 | T2 (t,Min adj,_) -> true
533 | _ -> false
534 )
535
536 let remove_minus_and_between_and_expanded_and_fake xs =
537
538 (* get rid of exampled and fake tok *)
539 let xs = xs +> Common.exclude (function
540 | T2 (t,_,_) when TH.is_expanded t -> true
541 | Fake2 -> true
542 | _ -> false
543 )
544 in
545
546 let minus_or_comment = function
547 T2(_,Min adj,_) -> true
548 | x -> is_minusable_comment x in
549
550 let minus_or_comment_nocpp = function
551 T2(_,Min adj,_) -> true
552 | x -> is_minusable_comment_nocpp x in
553
554 let common_adj (index1,adj1) (index2,adj2) =
555 adj1 = adj2 (* same adjacency info *) &&
556 (* non-empty intersection of witness trees *)
557 not ((Common.inter_set index1 index2) = []) in
558
559 let rec adjust_around_minus = function
560 [] -> []
561 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
562 (((T2(_,Min adj,_))::_) as rest) ->
563 (* an initial newline, as in a replaced statement *)
564 let (between_minus,rest) = Common.span minus_or_comment rest in
565 (match rest with
566 [] -> (set_minus_comment adj x) ::
567 (List.map (set_minus_comment adj) between_minus)
568 | T2(_,Ctx,_)::_ when is_newline (List.hd(List.rev between_minus)) ->
569 (set_minus_comment adj x)::(adjust_within_minus between_minus) @
570 (adjust_around_minus rest)
571 | _ ->
572 x :: (adjust_within_minus between_minus) @
573 (adjust_around_minus rest))
574 | ((T2(_,Min adj,_))::_) as rest ->
575 (* no initial newline, as in a replaced expression *)
576 let (between_minus,rest) = Common.span minus_or_comment rest in
577 (match rest with
578 [] ->
579 (List.map (set_minus_comment adj) between_minus)
580 | _ ->
581 (adjust_within_minus between_minus) @
582 (adjust_around_minus rest))
583 | x::xs -> x::adjust_around_minus xs
584 and adjust_within_minus = function
585 [] -> []
586 | (T2(_,Min adj1,_) as t1)::xs ->
587 let (between_minus,rest) = Common.span is_minusable_comment xs in
588 (match rest with
589 [] ->
590 (* keep last newline *)
591 let (drop,keep) =
592 try
593 let (drop,nl,keep) =
594 Common.split_when is_newline between_minus in
595 (drop, nl :: keep)
596 with Not_found -> (between_minus,[]) in
597 t1 ::
598 List.map (set_minus_comment_or_plus adj1) drop @
599 keep
600 | (T2(_,Min adj2,_) as t2)::rest when common_adj adj1 adj2 ->
601 t1::
602 List.map (set_minus_comment_or_plus adj1) between_minus @
603 adjust_within_minus (t2::rest)
604 | x::xs ->
605 t1::(between_minus @ adjust_within_minus (x::xs)))
606 | _ -> failwith "only minus and space possible" in
607
608 (* new idea: collects regions not containing non-space context code
609 if two adjacent adjacent minus tokens satisfy common_adj then delete
610 all spaces, comments etc between them
611 if two adjacent minus tokens do not satisfy common_adj only delete
612 the spaces between them if there are no comments, etc.
613 if the region contain no plus code and is both preceded and followed
614 by a newline, delete the initial newline. *)
615
616 let rec adjust_around_minus = function
617 [] -> []
618 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
619 (T2(_,Min adj1,_) as t1)::xs ->
620 let (minus_list,rest) = Common.span not_context (t1::xs) in
621 let contains_plus = List.exists is_plus minus_list in
622 let x =
623 match List.rev minus_list with
624 (T2(Parser_c.TCommentNewline c,_b,_i))::rest
625 when List.for_all minus_or_comment minus_list ->
626 set_minus_comment_or_plus adj1 x
627 | _ -> x in
628 x :: adjust_within_minus contains_plus minus_list @
629 adjust_around_minus rest
630 | (T2(_,Min adj1,_) as t1)::xs ->
631 let (minus_list,rest) = Common.span not_context (t1::xs) in
632 let contains_plus = List.exists is_plus minus_list in
633 adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
634 | x::xs -> x :: adjust_around_minus xs
635 and adjust_within_minus cp (* contains plus *) = function
636 (T2(_,Min adj1,_) as t1)::xs ->
637 let not_minus = function T2(_,Min _,_) -> false | _ -> true in
638 let (not_minus_list,rest) = Common.span not_minus xs in
639 t1 ::
640 (match rest with
641 (T2(_,Min adj2,_) as t2)::xs when common_adj adj1 adj2 ->
642 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
643 @ (adjust_within_minus cp (t2::xs))
644 | (T2(_,Min adj2,_) as t2)::xs ->
645 let is_whitespace_or_plus = function
646 (T2 _) as x -> is_whitespace x
647 | _ -> true (*plus*) in
648 if List.for_all is_whitespace_or_plus not_minus_list
649 then
650 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
651 @ (adjust_within_minus cp (t2::xs))
652 else not_minus_list @ (adjust_within_minus cp (t2::xs))
653 | _ ->
654 if cp
655 then xs
656 else
657 let (spaces,rest) = Common.span is_space xs in
658 (List.map (set_minus_comment_or_plus adj1) spaces)
659 @ rest)
660 | xs -> failwith "should always start with minus"
661 and not_context = function
662 (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
663 | _ -> true
664 and is_plus = function
665 C2 _ | Cocci2 _ -> true
666 | _ -> false in
667
668 let xs = adjust_around_minus xs in
669
670 (* this drops blank lines after a brace introduced by removing code *)
671 let minus_or_comment_nonl = function
672 T2(_,Min adj,_) -> true
673 | T2(Parser_c.TCommentNewline _,_b,_i) -> false
674 | x -> is_minusable_comment x in
675
676 let rec adjust_after_brace = function
677 [] -> []
678 | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
679 when str_of_token2 x =$= "{" ->
680 let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
681 let is_whitespace = function
682 T2(Parser_c.TCommentSpace _,_b,_i)
683 (* patch: cocci *)
684 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
685 | _ -> false in
686 let (newlines,rest) = Common.span is_whitespace rest in
687 let (drop_newlines,last_newline) =
688 let rec loop = function
689 [] -> ([],[])
690 | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
691 (List.rev rest,[x])
692 | x::xs ->
693 let (drop_newlines,last_newline) = loop xs in
694 (drop_newlines,x::last_newline) in
695 loop (List.rev newlines) in
696 x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
697 last_newline@
698 adjust_after_brace rest
699 | x::xs -> x::adjust_after_brace xs in
700
701 let xs = adjust_after_brace xs in
702
703 (* search backwards from context } over spaces until reaching a newline.
704 then go back over all minus code until reaching some context or + code.
705 get rid of all intervening spaces, newlines, and comments
706 input is reversed *)
707 let rec adjust_before_brace = function
708 [] -> []
709 | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
710 let (outer_spaces,rest) = Common.span is_space xs in
711 x :: outer_spaces @
712 (match rest with
713 ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
714 (* the rest of this code is the same as from_newline below
715 but merging them seems to be error prone... *)
716 ((T2 (t, Min adj, idx)) as m) :: rest ->
717 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
718 h :: m ::
719 (List.map (set_minus_comment adj) spaces) @
720 (adjust_before_brace rest)
721 | _ -> adjust_before_brace rest)
722 | x::xs -> x :: (adjust_before_brace xs) in
723
724 let from_newline = function
725 ((T2 (t, Min adj, idx)) as m) :: rest ->
726 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
727 m ::
728 (List.map (set_minus_comment adj) spaces) @
729 (adjust_before_brace rest)
730 | rest -> adjust_before_brace rest in
731
732 let xs = List.rev (from_newline (List.rev xs)) in
733 let xs = drop_minus xs in
734 xs
735
736 (* normally, in C code, a semicolon is not preceded by a space or newline *)
737 let adjust_before_semicolon toks =
738 let toks = List.rev toks in
739 let rec search_semic = function
740 [] -> []
741 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
742 if List.mem (str_of_token2 x) [";";")";","]
743 then x :: search_minus false xs
744 else x :: search_semic xs
745 | x::xs -> x :: search_semic xs
746 and search_minus seen_minus xs =
747 let (spaces, rest) = Common.span is_space xs in
748 (* only delete spaces if something is actually deleted *)
749 match rest with
750 ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
751 | _ -> if seen_minus then rest else xs in
752 List.rev (search_semic toks)
753
754 let is_ident_like s = s ==~ Common.regexp_alpha
755
756 let rec drop_space_at_endline = function
757 [] -> []
758 | [x] -> [x]
759 | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest ->
760 let (outer_spaces,rest) = Common.span is_space rest in
761 let minus_or_comment_or_space_nocpp = function
762 T2(_,Min adj,_) -> true
763 | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true
764 | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false
765 | x -> false in
766 let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in
767 let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
768 if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus
769 then
770 match rest with
771 ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest ->
772 (* drop trailing spaces *)
773 minus@a::(drop_space_at_endline rest)
774 | _ -> fail()
775 else fail()
776 | a :: rest -> a :: drop_space_at_endline rest
777
778 (* if a removed ( is between two tokens, then add a space *)
779 let rec paren_to_space = function
780 [] -> []
781 | [x] -> [x]
782 | [x;y] -> [x;y]
783 | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest
784 when not (is_whitespace a) && TH.str_of_tok t = "(" ->
785 simple_print_all_tokens2 [a;b;c];
786 a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
787 | a :: rest -> a :: (paren_to_space rest)
788
789 let rec add_space xs =
790 match xs with
791 | [] -> []
792 | [x] -> [x]
793 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
794 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
795 not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
796 (* this only works within a line. could consider whether
797 something should be done to add newlines too, rather than
798 printing them explicitly in unparse_cocci. *)
799 x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
800 | x::y::xs ->
801 let sx = str_of_token2 x in
802 let sy = str_of_token2 y in
803 if is_ident_like sx && is_ident_like sy
804 then x::C2 " "::(add_space (y::xs))
805 else x::(add_space (y::xs))
806
807 (* The following only works for the outermost function call. Stack records
808 the column of all open parentheses. Space_cell contains the most recent
809 comma in the outermost function call. The goal is to decide whether this
810 should be followed by a space or a newline and indent. *)
811 let add_newlines toks tabbing_unit =
812 let create_indent n =
813 let (tu,tlen) =
814 match tabbing_unit with
815 Some ("\t",_) -> ("\t",8)
816 | Some ("",_) -> ("\t",8) (* not sure why... *)
817 | Some (s,_) -> (s,String.length s) (* assuming only spaces *)
818 | None -> ("\t",8) in
819 let rec loop seen =
820 if seen + tlen <= n
821 then tu ^ loop (seen + tlen)
822 else String.make (n-seen) ' ' in
823 loop 0 in
824 let check_for_newline count x = function
825 Some (start,space_cell) when count > Flag_parsing_c.max_width ->
826 space_cell := "\n"^(create_indent x);
827 Some (x + (count - start))
828 | _ -> None in
829 (* the following is for strings that may contain newline *)
830 let string_length s count =
831 let l = list_of_string s in
832 List.fold_left
833 (function count ->
834 function
835 '\t' -> count + 8
836 | '\n' -> 0
837 | c -> count + 1)
838 count l in
839 let rec loop info count = function
840 [] -> []
841 | ((T2(tok,_,_)) as a)::xs ->
842 a :: loop info (string_length (TH.str_of_tok tok) count) xs
843 | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
844 let (stack,space_cell) = info in
845 let rest =
846 match hint with
847 None -> loop info (count + (String.length s)) xs
848 | Some Unparse_cocci.StartBox ->
849 let count = count + (String.length s) in
850 loop (count::stack,space_cell) count xs
851 | Some Unparse_cocci.EndBox ->
852 let count = count + (String.length s) in
853 (match stack with
854 [x] ->
855 (match check_for_newline count x space_cell with
856 Some count -> loop ([],None) count xs
857 | None -> loop ([],None) count xs)
858 | _ -> loop (List.tl stack,space_cell) count xs)
859 | Some (Unparse_cocci.SpaceOrNewline sp) ->
860 let count = count + (String.length s) + 1 (*space*) in
861 (match stack with
862 [x] ->
863 (match check_for_newline count x space_cell with
864 Some count -> loop (stack,Some (x,sp)) count xs
865 | None -> loop (stack,Some (count,sp)) count xs)
866 | _ -> loop info count xs) in
867 a :: rest
868 | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
869 | Fake2 :: _ | Indent_cocci2 :: _
870 | Unindent_cocci2 _::_ ->
871 failwith "unexpected fake, indent, or unindent" in
872 let redo_spaces prev = function
873 Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
874 C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
875 | t -> t::prev in
876 (match !Flag_parsing_c.spacing with
877 Flag_parsing_c.SMPL -> toks
878 | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)))
879
880 (* When insert some new code, because of a + in a SP, we must add this
881 * code at the right place, with the good indentation. So each time we
882 * encounter some spacing info, with some newline, we maintain the
883 * current indentation level used.
884 *
885 * TODO problems: not accurate. ex: TODO
886 *
887 * TODO: if in #define region, should add a \ \n
888 *)
889 let new_tabbing2 space =
890 (list_of_string space)
891 +> List.rev
892 +> Common.take_until (fun c -> c =<= '\n')
893 +> List.rev
894 +> List.map string_of_char
895 +> String.concat ""
896
897 let new_tabbing a =
898 Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
899
900
901 let rec adjust_indentation xs =
902
903 let _current_tabbing = ref "" in
904 let tabbing_unit = ref None in
905
906 let string_of_list l = String.concat "" (List.map string_of_char l) in
907
908 (* try to pick a tabbing unit for the plus code *)
909 let adjust_tabbing_unit old_tab new_tab =
910 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
911 then
912 let old_tab = list_of_string old_tab in
913 let new_tab = list_of_string new_tab in
914 let rec loop = function
915 ([],new_tab) ->
916 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
917 | (_,[]) -> failwith "not possible"
918 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
919 loop (old_tab,new_tab) in
920
921 let remtab tu current_tab =
922 let current_tab = List.rev(list_of_string current_tab) in
923 let rec loop = function
924 ([],new_tab) -> string_of_list (List.rev new_tab)
925 | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
926 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
927 | (_,ns) -> (* mismatch; remove what we can *)
928 string_of_list (List.rev ns) in
929 loop (tu,current_tab) in
930
931 let rec find_first_tab started = function
932 [] -> ()
933 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
934 find_first_tab true xs
935 (* patch: coccinelle *)
936 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
937 when started ->
938 let s = str_of_token2 x +> new_tabbing in
939 tabbing_unit := Some (s,List.rev (list_of_string s))
940 | x::xs -> find_first_tab started xs in
941 find_first_tab false xs;
942
943 let rec aux started xs =
944 match xs with
945 | [] -> []
946 (* patch: coccinelle *)
947 | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
948 ((Cocci2 ("{",_,_,_,_)) as a)::xs
949 when started && str_of_token2 x =$= ")" ->
950 (* to be done for if, etc, but not for a function header *)
951 x::(C2 " ")::a::(aux started xs)
952 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
953 let old_tabbing = !_current_tabbing in
954 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
955 (* only trust the indentation after the first { *)
956 (if started
957 then adjust_tabbing_unit old_tabbing !_current_tabbing);
958 let coccis_rest = Common.span all_coccis xs in
959 (match coccis_rest with
960 (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
961 (* the case where cocci code has been added before a close } *)
962 x::aux started (Indent_cocci2::xs)
963 | _ -> x::aux started xs)
964 | Indent_cocci2::xs ->
965 (match !tabbing_unit with
966 None -> aux started xs
967 | Some (tu,_) ->
968 _current_tabbing := (!_current_tabbing)^tu;
969 Cocci2 (tu,-1,-1,-1,None)::aux started xs)
970 | Unindent_cocci2(permanent)::xs ->
971 (match !tabbing_unit with
972 None -> aux started xs
973 | Some (_,tu) ->
974 _current_tabbing := remtab tu (!_current_tabbing);
975 aux started xs)
976 (* border between existing code and cocci code *)
977 | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
978 when str_of_token2 x =$= "{" ->
979 x::aux true (y::Indent_cocci2::xs)
980 | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
981 when str_of_token2 y =$= "}" ->
982 x::aux started (y::Unindent_cocci2 true::xs)
983 (* starting the body of the function *)
984 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs
985 | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
986 | ((Cocci2("\n",_,_,_,_)) as x)::Unindent_cocci2(false)::xs ->
987 x::aux started xs
988 | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
989 (* dont inline in expr because of weird eval order of ocaml *)
990 let s = !_current_tabbing in
991 x::Cocci2 (s,-1,-1,-1,None)::aux started xs
992 | x::xs -> x::aux started xs in
993 (aux false xs,!tabbing_unit)
994
995
996 let rec find_paren_comma = function
997 | [] -> ()
998
999 (* do nothing if was like this in original file *)
1000 | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
1001 ::xs when p2 =|= p1 + 1 ->
1002 find_paren_comma (x2::xs)
1003
1004 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
1005 ::xs when p2 =|= p1 + 1 ->
1006 find_paren_comma (x2::xs)
1007
1008 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
1009 ::xs when p2 =|= p1 + 1 ->
1010 find_paren_comma (x2::xs)
1011
1012 (* otherwise yes can adjust *)
1013 | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
1014 x2.remove <- true;
1015 find_paren_comma (x2::xs)
1016 | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
1017 x1.remove <- true;
1018 find_paren_comma (x2::xs)
1019
1020 | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
1021 x1.remove <- true;
1022 find_paren_comma (x2::xs)
1023
1024 | x::xs ->
1025 find_paren_comma xs
1026
1027
1028 let fix_tokens toks =
1029 let toks = toks +> List.map mk_token_extended in
1030
1031 let cleaner = toks +> Common.exclude (function
1032 | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
1033 | _ -> false
1034 ) in
1035 find_paren_comma cleaner;
1036
1037 let toks = rebuild_tokens_extented toks in
1038 toks +> List.map (fun x -> x.tok2)
1039
1040
1041
1042 (*****************************************************************************)
1043 (* Final unparsing (and debugging support) *)
1044 (*****************************************************************************)
1045
1046 (* for debugging *)
1047 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
1048
1049 let kind_of_token2 = function
1050 | Fake2 -> KFake
1051 | Cocci2 _ -> KCocci
1052 | C2 _ -> KC
1053 | T2 (t,_,_) ->
1054 (match TH.pinfo_of_tok t with
1055 | ExpandedTok _ -> KExpanded
1056 | OriginTok _ -> KOrigin
1057 | FakeTok _ -> raise Impossible (* now a Fake2 *)
1058 | AbstractLineTok _ -> raise Impossible (* now a KC *)
1059 )
1060 | Unindent_cocci2 _ | Indent_cocci2 -> raise Impossible
1061
1062 let end_mark = "!"
1063
1064 let start_mark = function
1065 | KFake -> "!F!"
1066 | KCocci -> "!S!"
1067 | KC -> "!A!"
1068 | KExpanded -> "!E!"
1069 | KOrigin -> ""
1070
1071 let print_all_tokens2 pr xs =
1072 if !Flag_parsing_c.debug_unparsing
1073 then
1074 let current_kind = ref KOrigin in
1075 xs +> List.iter (fun t ->
1076 let newkind = kind_of_token2 t in
1077 if newkind =*= !current_kind
1078 then pr (str_of_token2 t)
1079 else begin
1080 pr (end_mark);
1081 pr (start_mark newkind);
1082 pr (str_of_token2 t);
1083 current_kind := newkind
1084 end
1085 );
1086 else
1087 xs +> List.iter (fun x -> pr (str_of_token2 x))
1088
1089
1090
1091
1092 (*****************************************************************************)
1093 (* Entry points *)
1094 (*****************************************************************************)
1095
1096 (* old: PPviatok was made in the beginning to allow to pretty print a
1097 * complete C file, including a modified C file by transformation.ml,
1098 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1099 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1100 * fancy stuff when a function was not modified at all. Just need to
1101 * print the list of token as-is. But now pretty_print_c.ml handles
1102 * almost everything so maybe less useful. Maybe PPviatok allows to
1103 * optimize a little the pretty printing.
1104 *
1105 * update: now have PPviastr which goes even faster than PPviatok, so
1106 * PPviatok has disappeared.
1107 *)
1108
1109 type ppmethod = PPnormal | PPviastr
1110
1111
1112
1113
1114 (* The pp_program function will call pretty_print_c.ml with a special
1115 * function to print the leaf components, the tokens. When we want to
1116 * print a token, we need to print also maybe the space and comments that
1117 * were close to it in the original file (and that was omitted during the
1118 * parsing phase), and honor what the cocci-info attached to the token says.
1119 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1120 * print it and also print some cocci-code attached in a PLUS to it.
1121 * So we will also maybe call unparse_cocci. Because the cocci-code may
1122 * contain metavariables, unparse_cocci will in fact sometimes call back
1123 * pretty_print_c (which will this time don't call back again unparse_cocci)
1124 *)
1125
1126 let pp_program2 xs outfile =
1127 Common.with_open_outfile outfile (fun (pr,chan) ->
1128 let pr s =
1129 if !Flag_parsing_c.debug_unparsing
1130 then begin pr2_no_nl s; flush stderr end
1131 else pr s
1132 (* flush chan; *)
1133 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1134 in
1135
1136 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
1137 (* here can still work on ast *)
1138 let e = remove_useless_fakeInfo_struct e in
1139
1140 match ppmethod with
1141 | PPnormal ->
1142 (* now work on tokens *)
1143
1144 (* phase1: just get all the tokens, all the information *)
1145 assert(toks_e +> List.for_all (fun t ->
1146 TH.is_origin t or TH.is_expanded t
1147 ));
1148 let toks = get_fakeInfo_and_tokens e toks_e in
1149 let toks = displace_fake_nodes toks in
1150 (* assert Origin;ExpandedTok;Faketok *)
1151 let toks = expand_mcode toks in
1152 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1153 * and no tag information, just NOTHING. *)
1154
1155 let toks =
1156 if !Flag.sgrep_mode2
1157 then drop_minus toks (* nothing to do for sgrep *)
1158 else
1159 (* phase2: can now start to filter and adjust *)
1160 let (toks,tu) = adjust_indentation toks in
1161 let toks = adjust_before_semicolon toks in(*before remove minus*)
1162 let toks = drop_space_at_endline toks in
1163 let toks = paren_to_space toks in
1164 let toks = remove_minus_and_between_and_expanded_and_fake toks in
1165 (* assert Origin + Cocci + C and no minus *)
1166 let toks = add_space toks in
1167 let toks = add_newlines toks tu in
1168 let toks = fix_tokens toks in
1169 toks in
1170
1171 (* in theory here could reparse and rework the ast! or
1172 * apply some SP. Not before cos julia may have generated
1173 * not parsable file. Need do unparsing_tricks call before being
1174 * ready to reparse. *)
1175 print_all_tokens2 pr toks;
1176
1177 | PPviastr -> pr str
1178 )
1179 )
1180
1181 let pp_program a b =
1182 Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1183
1184
1185 let pp_program_default xs outfile =
1186 let xs' = xs +> List.map (fun x -> x, PPnormal) in
1187 pp_program xs' outfile