Release coccinelle-0.2.0rc1
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
CommitLineData
0708f913
C
1(* Yoann Padioleau, Julia Lawall
2 *
3 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
34e49164
C
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
485bce71
C
13 *
14 *
15 * Modifications by Julia Lawall for better newline handling.
34e49164
C
16 *)
17open Common
18
19open Ast_c
20
21module TH = Token_helpers
22
23
b1b2de81
C
24(* should keep comments and directives in between adjacent deleted terms,
25but not comments and directives within deleted terms. should use the
26labels found in the control-flow graph *)
27
34e49164
C
28
29
708f4980
C
30(*****************************************************************************)
31(* Wrappers *)
32(*****************************************************************************)
33let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
34
34e49164
C
35(*****************************************************************************)
36(* Types used during the intermediate phases of the unparsing *)
37(*****************************************************************************)
38
39type token1 =
40 | Fake1 of info
41 | T1 of Parser_c.token
42
43(* The cocci_tag of the token should always be a NOTHING. The mark of
44 * the token can only be OriginTok or ExpandedTok. Why not get rid of
45 * token and get something simpler ? because we need to know if the
46 * info is a TCommentCpp or TCommentSpace, etc for some of the further
47 * analysis so easier to keep with the token.
48 *
49 * This type contains the whole information. Have all the tokens with this
50 * type.
51 *)
708f4980
C
52type min =
53 Min of (int list (* match numbers *) * int (* adjacency information *))
54 | Ctx
55
34e49164 56type token2 =
708f4980 57 | T2 of Parser_c.token * min *
34e49164
C
58 int option (* orig index, abstracting away comments and space *)
59 | Fake2
708f4980 60 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
34e49164
C
61 | C2 of string
62 | Indent_cocci2
63 | Unindent_cocci2
64
65(* not used yet *)
66type token3 =
67 | T3 of Parser_c.token
68 | Cocci3 of string
69 | C3 of string
70
71
72(* similar to the tech in parsing_hack *)
73type token_extended = {
74 tok2 : token2;
75 str : string;
76 idx: int option; (* to know if 2 tokens were consecutive in orig file *)
77 mutable new_tokens_before : token2 list;
78 mutable remove : bool;
79}
80
81
82(*****************************************************************************)
83(* Helpers *)
84(*****************************************************************************)
85
86let info_of_token1 t =
87 match t with
88 | Fake1 info -> info
89 | T1 tok -> TH.info_of_tok tok
90
002099fc
C
91let print_token1 = function
92 T1 tok -> TH.str_of_tok tok
93 | Fake1 info -> "fake"
94
34e49164
C
95let str_of_token2 = function
96 | T2 (t,_,_) -> TH.str_of_tok t
97 | Fake2 -> ""
708f4980 98 | Cocci2 (s,_,_,_) -> s
34e49164
C
99 | C2 s -> s
100 | Indent_cocci2 -> ""
101 | Unindent_cocci2 -> ""
102
103let print_token2 = function
708f4980
C
104 | T2 (t,b,_) ->
105 let b_str =
106 match b with
107 Min (index,adj) ->
108 Printf.sprintf "-%d[%s]" adj
109 (String.concat " " (List.map string_of_int index))
110 | Ctx -> "" in
111 "T2:"^b_str^TH.str_of_tok t
fc1ad971 112 | Fake2 -> "fake"
708f4980 113 | Cocci2 (s,_,lc,rc) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
34e49164
C
114 | C2 s -> "C2:"^s
115 | Indent_cocci2 -> "Indent"
116 | Unindent_cocci2 -> "Unindent"
117
002099fc
C
118let simple_print_all_tokens1 l =
119 List.iter (function x -> Printf.printf "%s " (print_token1 x)) l;
120 Printf.printf "\n"
121
708f4980 122let simple_print_all_tokens2 l =
fc1ad971 123 List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
34e49164
C
124 Printf.printf "\n"
125
126let str_of_token3 = function
127 | T3 t -> TH.str_of_tok t
128 | Cocci3 s | C3 s -> s
129
130
131
132let mk_token_extended x =
133 let origidx =
134 match x with
135 | T2 (_,_, idx) -> idx
136 | _ -> None
137 in
138 { tok2 = x;
139 str = str_of_token2 x;
140 idx = origidx;
141 new_tokens_before = [];
142 remove = false;
143 }
144
145let rebuild_tokens_extented toks_ext =
146 let _tokens = ref [] in
147 toks_ext +> List.iter (fun tok ->
148 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
149 if not tok.remove then push2 tok.tok2 _tokens;
150 );
151 let tokens = List.rev !_tokens in
152 (tokens +> List.map mk_token_extended)
153
154
155let mcode_contain_plus = function
156 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
157 | Ast_cocci.CONTEXT _ -> true
485bce71 158(* patch: when need full coccinelle transformation *)
708f4980
C
159 | Ast_cocci.MINUS (_,_,_,[]) -> false
160 | Ast_cocci.MINUS (_,_,_,x::xs) -> true
951c7801 161 | Ast_cocci.PLUS _ -> raise Impossible
34e49164
C
162
163let contain_plus info =
164 let mck = Ast_c.mcode_of_info info in
165 mcode_contain_plus mck
166
167(*****************************************************************************)
168(* Last fix on the ast *)
169(*****************************************************************************)
170
171(* Because of the ugly trick to handle initialiser, I generate fake ','
172 * for the last initializer element, but if there is nothing around it,
173 * I don't want in the end to print it.
174 *)
175
176let remove_useless_fakeInfo_struct program =
177 let bigf = { Visitor_c.default_visitor_c_s with
178 Visitor_c.kini_s = (fun (k,bigf) ini ->
179 match k ini with
113803cf 180 | InitList args, ii ->
34e49164
C
181 (match ii with
182 | [_i1;_i2] -> ini
113803cf 183 | [i1;i2;iicommaopt] ->
34e49164
C
184 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
185 && (Ast_c.is_fake iicommaopt)
186 (* sometimes the guy put a normal iicommaopt *)
34e49164
C
187 then InitList args, [i1;i2]
188 else InitList args, [i1;i2;iicommaopt]
113803cf
C
189 | [i1;i2;iicommaopt;end_comma_opt] ->
190 (* only in #define. end_comma_opt canot be fake *)
191 (* not sure if this will be considered ambiguous with a previous
192 case? *)
193 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
194 && (Ast_c.is_fake iicommaopt)
195 (* sometimes the guy put a normal iicommaopt *)
196 then InitList args, [i1;i2;end_comma_opt]
197 else InitList args, [i1;i2;iicommaopt;end_comma_opt]
34e49164
C
198 | _ -> raise Impossible
199 )
200 | x -> x
201 )
202 } in
203 Visitor_c.vk_toplevel_s bigf program
204
205
206(*****************************************************************************)
207(* Tokens1 generation *)
208(*****************************************************************************)
209
210let get_fakeInfo_and_tokens celem toks =
211 let toks_in = ref toks in
212 let toks_out = ref [] in
213
214 (* todo? verify good order of position ? *)
215 let pr_elem info =
216 match Ast_c.pinfo_of_info info with
217 | FakeTok _ ->
218 Common.push2 (Fake1 info) toks_out
219 | OriginTok _ | ExpandedTok _ ->
220 (* get the associated comments/space/cppcomment tokens *)
978fd7e5
C
221 let (before, x, after) =
222 !toks_in +> Common.split_when (fun tok ->
34e49164
C
223 info =*= TH.info_of_tok tok)
224 in
b1b2de81 225 assert(info =*= TH.info_of_tok x);
34e49164
C
226 (*old: assert(before +> List.for_all (TH.is_comment)); *)
227 before +> List.iter (fun x ->
228 if not (TH.is_comment x)
0708f913 229 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
34e49164
C
230 (* case such as int asm d3("x"); not yet in ast *)
231 );
232 before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
233 push2 (T1 x) toks_out;
234 toks_in := after;
235 | AbstractLineTok _ ->
236 (* can be called on type info when for instance use -type_c *)
237 if !Flag_parsing_c.pretty_print_type_info
238 then Common.push2 (Fake1 info) toks_out
239 else raise Impossible (* at this stage *)
240 in
241
242 let pr_space _ = () in (* use the spacing that is there already *)
243
244 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
245
246 if not (null !toks_in)
0708f913 247 then failwith "WEIRD: unparsing not finished";
34e49164
C
248
249 List.rev !toks_out
250
251(* Fake nodes that have BEFORE code should be moved over any subsequent
252whitespace and newlines, but not any comments, to get as close to the affected
002099fc 253code as possible. Similarly, fake nodes that have AFTER code should be moved
34e49164
C
254backwards. No fake nodes should have both before and after code. *)
255
256let displace_fake_nodes toks =
257 let is_fake = function Fake1 _ -> true | _ -> false in
258 let is_whitespace = function
259 T1(Parser_c.TCommentSpace _)
485bce71 260 (* patch: cocci *)
34e49164
C
261 | T1(Parser_c.TCommentNewline _) -> true
262 | _ -> false in
263 let rec loop toks =
264 let fake_info =
265 try Some (Common.split_when is_fake toks)
266 with Not_found -> None in
267 match fake_info with
268 Some(bef,((Fake1 info) as fake),aft) ->
269 (match !(info.cocci_tag) with
708f4980
C
270 | Some x ->
271 (match x with
002099fc 272 (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
34e49164 273 (* move the fake node forwards *)
002099fc
C
274 let (whitespace,rest) = Common.span is_whitespace aft in
275 bef @ whitespace @ fake :: (loop rest)
276 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
34e49164 277 (* move the fake node backwards *)
002099fc
C
278 let revbef = List.rev bef in
279 let (revwhitespace,revprev) = Common.span is_whitespace revbef in
280 let whitespace = List.rev revwhitespace in
281 let prev = List.rev revprev in
282 prev @ fake :: (loop (whitespace @ aft))
283 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
284 bef @ fake :: (loop aft)
285 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
286 failwith "fake node should not be before-after"
287 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
485bce71 288 )
708f4980
C
289 | None ->
290 bef @ fake :: (loop aft)
291 )
34e49164
C
292 | None -> toks
293 | _ -> raise Impossible in
294 loop toks
295
296(*****************************************************************************)
297(* Tokens2 generation *)
298(*****************************************************************************)
299
b1b2de81
C
300let comment2t2 = function
301 (Token_c.TCommentCpp x,(info : Token_c.info)) ->
302 C2("\n"^info.Common.str^"\n")
303 | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
304
34e49164
C
305let expand_mcode toks =
306 let toks_out = ref [] in
307
308 let index = ref 0 in
309
310 let add_elem t minus =
311 match t with
312 | Fake1 info ->
313 let str = Ast_c.str_of_info info in
b1b2de81 314 if str =$= ""
34e49164
C
315 then push2 (Fake2) toks_out
316 (* perhaps the fake ',' *)
317 else push2 (C2 str) toks_out
318
319
002099fc 320 | T1 tok ->
b1b2de81 321 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
34e49164 322 (* no tag on expandedTok ! *)
fc1ad971
C
323 let modified = function
324 None -> false
325 | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
326 | _ -> true in
327 (if (TH.is_expanded tok &&
328 modified !((TH.info_of_tok tok).cocci_tag)
329 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
0708f913
C
330 then
331 failwith
332 (Printf.sprintf
333 "expanded token %s on line %d is either modified or stored in a metavariable"
334 (TH.str_of_tok tok) (TH.line_of_tok tok)));
34e49164
C
335
336 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
337 { i with cocci_tag = ref Ast_c.emptyAnnot; }
338 ) in
339
340 let optindex =
341 if TH.is_origin tok && not (TH.is_real_comment tok)
342 then begin
343 incr index;
344 Some !index
345 end
346 else None
347 in
348
349 push2 (T2 (tok', minus, optindex)) toks_out
350 in
351
352 let expand_info t =
708f4980
C
353 let (mcode,env) =
354 Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
34e49164 355
708f4980
C
356 let pr_cocci s ln col rcol =
357 push2 (Cocci2(s,ln,col,rcol)) toks_out in
34e49164 358 let pr_c info =
b1b2de81
C
359 (match Ast_c.pinfo_of_info info with
360 Ast_c.AbstractLineTok _ ->
361 push2 (C2 (Ast_c.str_of_info info)) toks_out
362 | Ast_c.FakeTok (s,_) ->
363 push2 (C2 s) toks_out
34e49164
C
364 | _ ->
365 Printf.printf "line: %s\n" (Common.dump info);
b1b2de81
C
366 failwith "not an abstract line");
367 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
368 List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
369
708f4980
C
370 let pr_barrier ln col = (* marks a position, used around C code *)
371 push2 (Cocci2("",ln,col,col)) toks_out in
372 let pr_nobarrier ln col = () in (* not needed for linux spacing *)
b1b2de81 373
708f4980 374 let pr_cspace _ = push2 (C2 " ") toks_out in
34e49164 375
708f4980
C
376 let pr_space _ = () (* rely on add_space in cocci code *) in
377 let pr_arity _ = () (* not interested *) in
34e49164
C
378
379 let indent _ = push2 Indent_cocci2 toks_out in
380 let unindent _ = push2 Unindent_cocci2 toks_out in
381
708f4980
C
382 let args_pp =
383 (env, pr_cocci, pr_c, pr_cspace,
384 (match !Flag_parsing_c.spacing with
385 Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
386 pr_arity,
387 (match !Flag_parsing_c.spacing with
388 Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
389 indent, unindent) in
34e49164 390
485bce71
C
391 (* old: when for yacfe with partial cocci:
392 * add_elem t false;
393 *)
34e49164 394
485bce71 395 (* patch: when need full coccinelle transformation *)
faf9a90c 396 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
34e49164 397 match mcode with
708f4980 398 | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
34e49164
C
399 (* Why adding ? because I want to have all the information, the whole
400 * set of tokens, so I can then process and remove the
401 * is_between_two_minus for instance *)
708f4980 402 add_elem t (Min (inst,adj));
faf9a90c 403 unparser any_xxs Unparse_cocci.InPlace
34e49164
C
404 | Ast_cocci.CONTEXT (_,any_befaft) ->
405 (match any_befaft with
406 | Ast_cocci.NOTHING ->
708f4980 407 add_elem t Ctx
951c7801 408 | Ast_cocci.BEFORE (xxs,_) ->
faf9a90c 409 unparser xxs Unparse_cocci.Before;
708f4980 410 add_elem t Ctx
951c7801 411 | Ast_cocci.AFTER (xxs,_) ->
708f4980 412 add_elem t Ctx;
faf9a90c 413 unparser xxs Unparse_cocci.After;
951c7801 414 | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
faf9a90c 415 unparser xxs Unparse_cocci.Before;
708f4980 416 add_elem t Ctx;
faf9a90c 417 unparser yys Unparse_cocci.After;
34e49164 418 )
951c7801 419 | Ast_cocci.PLUS _ -> raise Impossible
34e49164
C
420
421 in
422
423 toks +> List.iter expand_info;
424 List.rev !toks_out
425
426
427(*****************************************************************************)
428(* Tokens2 processing, filtering, adjusting *)
429(*****************************************************************************)
430
002099fc 431let is_space = function
fc1ad971
C
432 | T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *)
433 | _ -> false
434
435let is_newline = function
436 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
437 | _ -> false
438
439let is_whitespace = function
002099fc
C
440 | (T2 (t,_b,_i)) ->
441 (match t with
442 | Parser_c.TCommentSpace _ -> true (* only whitespace *)
fc1ad971 443 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
002099fc
C
444 | _ -> false
445 )
446 | _ -> false
447
34e49164 448let is_minusable_comment = function
b1b2de81 449 | (T2 (t,_b,_i)) ->
34e49164
C
450 (match t with
451 | Parser_c.TCommentSpace _ (* only whitespace *)
485bce71 452 (* patch: coccinelle *)
002099fc 453 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
7f004419 454 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
002099fc
C
455 | Parser_c.TComment _
456 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
b1b2de81
C
457 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
458 | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
34e49164
C
459 -> true
460
b1b2de81 461 | Parser_c.TCommentMisc _
0708f913 462 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
34e49164
C
463 -> false
464
465 | _ -> false
466 )
467 | _ -> false
468
fc1ad971
C
469let is_minusable_comment_nocpp = function
470 | (T2 (t,_b,_i)) ->
471 (match t with
472 | Parser_c.TCommentSpace _ (* only whitespace *)
473 (* patch: coccinelle *)
474 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
7f004419 475 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
fc1ad971
C
476 | Parser_c.TComment _ -> true
477 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
478 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
479 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
480 -> false
481
482 | Parser_c.TCommentMisc _
483 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
484 -> false
485
486 | _ -> false
487 )
488 | _ -> false
489
34e49164
C
490let all_coccis = function
491 Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true
492 | _ -> false
493
b1b2de81
C
494(*previously gave up if the first character was a newline, but not clear why*)
495let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
34e49164 496
708f4980
C
497let set_minus_comment adj = function
498 | T2 (t,Ctx,idx) ->
34e49164
C
499 let str = TH.str_of_tok t in
500 (match t with
501 | Parser_c.TCommentSpace _
485bce71 502(* patch: coccinelle *)
34e49164
C
503 | Parser_c.TCommentNewline _ -> ()
504
505 | Parser_c.TComment _
0708f913 506 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
b1b2de81
C
507 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
508 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
34e49164 509 ->
b1b2de81
C
510 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
511 (TH.line_of_tok t) str)
34e49164
C
512 | _ -> raise Impossible
513 );
708f4980 514 T2 (t, Min adj, idx)
485bce71 515(* patch: coccinelle *)
fc1ad971 516 | T2 (t,Min adj,idx) as x -> x
34e49164
C
517 | _ -> raise Impossible
518
708f4980 519let set_minus_comment_or_plus adj = function
34e49164 520 Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x
708f4980 521 | x -> set_minus_comment adj x
34e49164 522
fc1ad971
C
523let drop_minus xs =
524 xs +> Common.exclude (function
525 | T2 (t,Min adj,_) -> true
526 | _ -> false
527 )
528
34e49164
C
529let remove_minus_and_between_and_expanded_and_fake xs =
530
531 (* get rid of exampled and fake tok *)
532 let xs = xs +> Common.exclude (function
533 | T2 (t,_,_) when TH.is_expanded t -> true
534 | Fake2 -> true
535
536 | _ -> false
537 )
538 in
539
113803cf 540 let minus_or_comment = function
708f4980 541 T2(_,Min adj,_) -> true
113803cf
C
542 | x -> is_minusable_comment x in
543
fc1ad971
C
544 let minus_or_comment_nocpp = function
545 T2(_,Min adj,_) -> true
546 | x -> is_minusable_comment_nocpp x in
547
548 let common_adj (index1,adj1) (index2,adj2) =
549 adj1 = adj2 (* same adjacency info *) &&
550 (* non-empty intersection of witness trees *)
551 not ((Common.inter_set index1 index2) = []) in
552
553 let rec adjust_around_minus = function
554 [] -> []
555 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
556 (((T2(_,Min adj,_))::_) as rest) ->
557 (* an initial newline, as in a replaced statement *)
558 let (between_minus,rest) = Common.span minus_or_comment rest in
559 (match rest with
560 [] -> (set_minus_comment adj x) ::
561 (List.map (set_minus_comment adj) between_minus)
562 | T2(_,Ctx,_)::_ when is_newline (List.hd(List.rev between_minus)) ->
563 (set_minus_comment adj x)::(adjust_within_minus between_minus) @
564 (adjust_around_minus rest)
565 | _ ->
566 x :: (adjust_within_minus between_minus) @
567 (adjust_around_minus rest))
568 | ((T2(_,Min adj,_))::_) as rest ->
569 (* no initial newline, as in a replaced expression *)
570 let (between_minus,rest) = Common.span minus_or_comment rest in
571 (match rest with
572 [] ->
573 (List.map (set_minus_comment adj) between_minus)
574 | _ ->
575 (adjust_within_minus between_minus) @
576 (adjust_around_minus rest))
577 | x::xs -> x::adjust_around_minus xs
578 and adjust_within_minus = function
579 [] -> []
580 | (T2(_,Min adj1,_) as t1)::xs ->
581 let (between_minus,rest) = Common.span is_minusable_comment xs in
582 (match rest with
583 [] ->
584 (* keep last newline *)
585 let (drop,keep) =
586 try
587 let (drop,nl,keep) =
588 Common.split_when is_newline between_minus in
589 (drop, nl :: keep)
590 with Not_found -> (between_minus,[]) in
591 t1 ::
592 List.map (set_minus_comment_or_plus adj1) drop @
593 keep
594 | (T2(_,Min adj2,_) as t2)::rest when common_adj adj1 adj2 ->
595 t1::
596 List.map (set_minus_comment_or_plus adj1) between_minus @
597 adjust_within_minus (t2::rest)
598 | x::xs ->
599 t1::(between_minus @ adjust_within_minus (x::xs)))
600 | _ -> failwith "only minus and space possible" in
601
602 (* new idea: collects regions not containing non-space context code
603 if two adjacent adjacent minus tokens satisfy common_adj then delete
604 all spaces, comments etc between them
605 if two adjacent minus tokens do not satisfy common_adj only delete
606 the spaces between them if there are no comments, etc.
607 if the region contain no plus code and is both preceded and followed
608 by a newline, delete the initial newline. *)
609
610 let rec adjust_around_minus = function
34e49164 611 [] -> []
708f4980 612 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
fc1ad971
C
613 (T2(_,Min adj1,_) as t1)::xs ->
614 let (minus_list,rest) = Common.span not_context (t1::xs) in
615 let contains_plus = List.exists is_plus minus_list in
616 let x =
617 match List.rev minus_list with
618 (T2(Parser_c.TCommentNewline c,_b,_i))::rest
619 when List.for_all minus_or_comment minus_list ->
620 set_minus_comment_or_plus adj1 x
621 | _ -> x in
622 x :: adjust_within_minus contains_plus minus_list @
623 adjust_around_minus rest
624 | (T2(_,Min adj1,_) as t1)::xs ->
625 let (minus_list,rest) = Common.span not_context (t1::xs) in
626 let contains_plus = List.exists is_plus minus_list in
627 adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
628 | x::xs -> x :: adjust_around_minus xs
629 and adjust_within_minus cp (* contains plus *) = function
630 (T2(_,Min adj1,_) as t1)::xs ->
631 let not_minus = function T2(_,Min _,_) -> false | _ -> true in
632 let (not_minus_list,rest) = Common.span not_minus xs in
633 t1 ::
34e49164 634 (match rest with
fc1ad971
C
635 (T2(_,Min adj2,_) as t2)::xs when common_adj adj1 adj2 ->
636 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
637 @ (adjust_within_minus cp (t2::xs))
638 | (T2(_,Min adj2,_) as t2)::xs ->
639 let is_whitespace_or_plus = function
640 (T2 _) as x -> is_space x
641 | _ -> true (*plus*) in
642 if List.for_all is_whitespace_or_plus not_minus_list
643 then
644 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
645 @ (adjust_within_minus cp (t2::xs))
646 else not_minus_list @ (adjust_within_minus cp (t2::xs))
647 | _ ->
648 if cp
649 then xs
650 else
651 let (spaces,rest) = Common.span is_space xs in
652 (List.map (set_minus_comment_or_plus adj1) spaces)
653 @ rest)
654 | xs -> failwith "should always start with minus"
655 and not_context = function
656 (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
657 | _ -> true
658 and is_plus = function
659 C2 _ | Cocci2 _ -> true
660 | _ -> false in
34e49164 661
fc1ad971 662 let xs = adjust_around_minus xs in
34e49164 663
113803cf 664 (* this drops blank lines after a brace introduced by removing code *)
fc1ad971
C
665 let minus_or_comment_nonl = function
666 T2(_,Min adj,_) -> true
667 | T2(Parser_c.TCommentNewline _,_b,_i) -> false
668 | x -> is_minusable_comment x in
669
113803cf
C
670 let rec adjust_after_brace = function
671 [] -> []
708f4980 672 | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
b1b2de81 673 when str_of_token2 x =$= "{" ->
fc1ad971 674 let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
113803cf
C
675 let is_whitespace = function
676 T2(Parser_c.TCommentSpace _,_b,_i)
677 (* patch: cocci *)
678 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
679 | _ -> false in
680 let (newlines,rest) = Common.span is_whitespace rest in
681 let (drop_newlines,last_newline) =
682 let rec loop = function
683 [] -> ([],[])
684 | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
685 (List.rev rest,[x])
686 | x::xs ->
687 let (drop_newlines,last_newline) = loop xs in
688 (drop_newlines,x::last_newline) in
689 loop (List.rev newlines) in
708f4980 690 x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
113803cf
C
691 last_newline@
692 adjust_after_brace rest
693 | x::xs -> x::adjust_after_brace xs in
694
695 let xs = adjust_after_brace xs in
696
fc1ad971
C
697 (* search backwards from context } over spaces until reaching a newline.
698 then go back over all minus code until reaching some context or + code.
699 get rid of all intervening spaces, newlines, and comments
700 input is reversed *)
701 let rec adjust_before_brace = function
702 [] -> []
703 | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
704 let (outer_spaces,rest) = Common.span is_space xs in
705 x :: outer_spaces @
706 (match rest with
707 ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
708 (* the rest of this code is the same as from_newline below
709 but merging them seems to be error prone... *)
710 ((T2 (t, Min adj, idx)) as m) :: rest ->
711 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
712 h :: m ::
713 (List.map (set_minus_comment adj) spaces) @
714 (adjust_before_brace rest)
715 | _ -> adjust_before_brace rest)
716 | x::xs -> x :: (adjust_before_brace xs) in
717
718 let from_newline = function
719 ((T2 (t, Min adj, idx)) as m) :: rest ->
720 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
721 m ::
722 (List.map (set_minus_comment adj) spaces) @
723 (adjust_before_brace rest)
724 | rest -> adjust_before_brace rest in
725
726 let xs = List.rev (from_newline (List.rev xs)) in
727 let xs = drop_minus xs in
34e49164
C
728 xs
729
faf9a90c
C
730(* normally, in C code, a semicolon is not preceded by a space or newline *)
731let adjust_before_semicolon toks =
732 let toks = List.rev toks in
002099fc 733 let rec search_semic = function
faf9a90c 734 [] -> []
002099fc 735 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
faf9a90c 736 if List.mem (str_of_token2 x) [";";")";","]
002099fc
C
737 then x :: search_minus false xs
738 else x :: search_semic xs
739 | x::xs -> x :: search_semic xs
740 and search_minus seen_minus xs =
741 let (spaces, rest) = Common.span is_space xs in
742 (* only delete spaces if something is actually deleted *)
743 match rest with
744 ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
745 | _ -> if seen_minus then rest else xs in
746 List.rev (search_semic toks)
34e49164
C
747
748let is_ident_like s = s ==~ Common.regexp_alpha
749
750let rec add_space xs =
751 match xs with
752 | [] -> []
753 | [x] -> [x]
708f4980
C
754 | (Cocci2(sx,lnx,_,rcolx) as x)::((Cocci2(sy,lny,lcoly,_)) as y)::xs
755 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
756 not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
757 (* this only works within a line. could consider whether
758 something should be done to add newlines too, rather than
759 printing them explicitly in unparse_cocci. *)
760 x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
34e49164
C
761 | x::y::xs ->
762 let sx = str_of_token2 x in
763 let sy = str_of_token2 y in
faf9a90c 764 if is_ident_like sx && is_ident_like sy
34e49164
C
765 then x::C2 " "::(add_space (y::xs))
766 else x::(add_space (y::xs))
767
768
769
770(* When insert some new code, because of a + in a SP, we must add this
771 * code at the right place, with the good indentation. So each time we
772 * encounter some spacing info, with some newline, we maintain the
773 * current indentation level used.
774 *
775 * TODO problems: not accurate. ex: TODO
776 *
777 * TODO: if in #define region, should add a \ \n
778 *)
779let new_tabbing2 space =
780 (list_of_string space)
781 +> List.rev
b1b2de81 782 +> Common.take_until (fun c -> c =<= '\n')
34e49164
C
783 +> List.rev
784 +> List.map string_of_char
785 +> String.concat ""
786
787let new_tabbing a =
788 Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
789
790
791let rec adjust_indentation xs =
792 let _current_tabbing = ref "" in
793 let tabbing_unit = ref None in
794
795 let string_of_list l = String.concat "" (List.map string_of_char l) in
796
797 (* try to pick a tabbing unit for the plus code *)
798 let adjust_tabbing_unit old_tab new_tab =
b1b2de81 799 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
34e49164
C
800 then
801 let old_tab = list_of_string old_tab in
802 let new_tab = list_of_string new_tab in
803 let rec loop = function
804 ([],new_tab) ->
805 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
806 | (_,[]) -> failwith "not possible"
807 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
808 loop (old_tab,new_tab) in
809
810 let remtab tu current_tab =
811 let current_tab = List.rev(list_of_string current_tab) in
812 let rec loop = function
813 ([],new_tab) -> string_of_list (List.rev new_tab)
0708f913 814 | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
b1b2de81 815 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
34e49164
C
816 | (_,ns) -> (* mismatch; remove what we can *)
817 string_of_list (List.rev ns) in
818 loop (tu,current_tab) in
819
820 let rec find_first_tab started = function
821 [] -> ()
b1b2de81 822 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
34e49164 823 find_first_tab true xs
485bce71 824(* patch: coccinelle *)
34e49164
C
825 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
826 when started ->
827 let s = str_of_token2 x +> new_tabbing in
828 tabbing_unit := Some (s,List.rev (list_of_string s))
829 | x::xs -> find_first_tab started xs in
830 find_first_tab false xs;
831
832 let rec aux started xs =
833 match xs with
834 | [] -> []
485bce71 835(* patch: coccinelle *)
34e49164 836 | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
708f4980
C
837 ((Cocci2 ("{",_,_,_)) as a)::xs
838 when started && str_of_token2 x =$= ")" ->
34e49164 839 (* to be done for if, etc, but not for a function header *)
708f4980 840 x::(C2 " ")::a::(aux started xs)
34e49164
C
841 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
842 let old_tabbing = !_current_tabbing in
843 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
844 (* only trust the indentation after the first { *)
845 (if started then adjust_tabbing_unit old_tabbing !_current_tabbing);
846 let coccis_rest = Common.span all_coccis xs in
847 (match coccis_rest with
b1b2de81 848 (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
34e49164
C
849 (* the case where cocci code has been added before a close } *)
850 x::aux started (Indent_cocci2::xs)
851 | _ -> x::aux started xs)
852 | Indent_cocci2::xs ->
853 (match !tabbing_unit with
854 None -> aux started xs
855 | Some (tu,_) ->
856 _current_tabbing := (!_current_tabbing)^tu;
708f4980 857 Cocci2 (tu,-1,-1,-1)::aux started xs)
34e49164
C
858 | Unindent_cocci2::xs ->
859 (match !tabbing_unit with
860 None -> aux started xs
861 | Some (_,tu) ->
862 _current_tabbing := remtab tu (!_current_tabbing);
863 aux started xs)
864 (* border between existing code and cocci code *)
708f4980 865 | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_)) as y)::xs
b1b2de81 866 when str_of_token2 x =$= "{" ->
34e49164
C
867 x::aux true (y::Indent_cocci2::xs)
868 | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
b1b2de81 869 when str_of_token2 y =$= "}" ->
34e49164
C
870 x::aux started (y::Unindent_cocci2::xs)
871 (* starting the body of the function *)
b1b2de81 872 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs
708f4980
C
873 | ((Cocci2("{",_,_,_)) as a)::xs -> a::aux true xs
874 | ((Cocci2("\n",_,_,_)) as x)::xs ->
0708f913 875 (* dont inline in expr because of weird eval order of ocaml *)
34e49164 876 let s = !_current_tabbing in
708f4980 877 x::Cocci2 (s,-1,-1,-1)::aux started xs
34e49164
C
878 | x::xs -> x::aux started xs in
879 aux false xs
880
881
882let rec find_paren_comma = function
883 | [] -> ()
884
885 (* do nothing if was like this in original file *)
886 | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
b1b2de81 887 ::xs when p2 =|= p1 + 1 ->
34e49164
C
888 find_paren_comma (x2::xs)
889
890 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
b1b2de81 891 ::xs when p2 =|= p1 + 1 ->
34e49164
C
892 find_paren_comma (x2::xs)
893
894 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
b1b2de81 895 ::xs when p2 =|= p1 + 1 ->
34e49164
C
896 find_paren_comma (x2::xs)
897
898 (* otherwise yes can adjust *)
899 | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
900 x2.remove <- true;
901 find_paren_comma (x2::xs)
902 | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
903 x1.remove <- true;
904 find_paren_comma (x2::xs)
905
906 | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
907 x1.remove <- true;
908 find_paren_comma (x2::xs)
909
910 | x::xs ->
911 find_paren_comma xs
912
913
914let fix_tokens toks =
915 let toks = toks +> List.map mk_token_extended in
916
917 let cleaner = toks +> Common.exclude (function
918 | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
919 | _ -> false
920 ) in
921 find_paren_comma cleaner;
922
923 let toks = rebuild_tokens_extented toks in
924 toks +> List.map (fun x -> x.tok2)
925
926
927
928(*****************************************************************************)
929(* Final unparsing (and debugging support) *)
930(*****************************************************************************)
931
932(* for debugging *)
933type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
934
935let kind_of_token2 = function
936 | Fake2 -> KFake
937 | Cocci2 _ -> KCocci
938 | C2 _ -> KC
939 | T2 (t,_,_) ->
940 (match TH.pinfo_of_tok t with
941 | ExpandedTok _ -> KExpanded
942 | OriginTok _ -> KOrigin
943 | FakeTok _ -> raise Impossible (* now a Fake2 *)
944 | AbstractLineTok _ -> raise Impossible (* now a KC *)
945 )
946 | Unindent_cocci2 | Indent_cocci2 -> raise Impossible
947
948let end_mark = "!"
949
950let start_mark = function
951 | KFake -> "!F!"
952 | KCocci -> "!S!"
953 | KC -> "!A!"
954 | KExpanded -> "!E!"
955 | KOrigin -> ""
956
957let print_all_tokens2 pr xs =
958 if !Flag_parsing_c.debug_unparsing
959 then
960 let current_kind = ref KOrigin in
961 xs +> List.iter (fun t ->
962 let newkind = kind_of_token2 t in
b1b2de81 963 if newkind =*= !current_kind
34e49164
C
964 then pr (str_of_token2 t)
965 else begin
966 pr (end_mark);
967 pr (start_mark newkind);
968 pr (str_of_token2 t);
969 current_kind := newkind
970 end
971 );
972 else
973 xs +> List.iter (fun x -> pr (str_of_token2 x))
974
975
976
977
978(*****************************************************************************)
979(* Entry points *)
980(*****************************************************************************)
981
982(* old: PPviatok was made in the beginning to allow to pretty print a
983 * complete C file, including a modified C file by transformation.ml,
984 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
985 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
986 * fancy stuff when a function was not modified at all. Just need to
987 * print the list of token as-is. But now pretty_print_c.ml handles
988 * almost everything so maybe less useful. Maybe PPviatok allows to
989 * optimize a little the pretty printing.
990 *
991 * update: now have PPviastr which goes even faster than PPviatok, so
992 * PPviatok has disappeared.
993 *)
994
995type ppmethod = PPnormal | PPviastr
996
997
998
999
1000(* The pp_program function will call pretty_print_c.ml with a special
1001 * function to print the leaf components, the tokens. When we want to
1002 * print a token, we need to print also maybe the space and comments that
1003 * were close to it in the original file (and that was omitted during the
1004 * parsing phase), and honor what the cocci-info attached to the token says.
1005 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1006 * print it and also print some cocci-code attached in a PLUS to it.
1007 * So we will also maybe call unparse_cocci. Because the cocci-code may
1008 * contain metavariables, unparse_cocci will in fact sometimes call back
1009 * pretty_print_c (which will this time don't call back again unparse_cocci)
1010 *)
1011
1012let pp_program2 xs outfile =
1013 Common.with_open_outfile outfile (fun (pr,chan) ->
1014 let pr s =
1015 if !Flag_parsing_c.debug_unparsing
1016 then begin pr2_no_nl s; flush stderr end
1017 else pr s
1018 (* flush chan; *)
1019 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1020 in
1021
1022 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
34e49164
C
1023 (* here can still work on ast *)
1024 let e = remove_useless_fakeInfo_struct e in
1025
1026 match ppmethod with
002099fc 1027 | PPnormal ->
34e49164
C
1028 (* now work on tokens *)
1029
1030 (* phase1: just get all the tokens, all the information *)
1031 assert(toks_e +> List.for_all (fun t ->
1032 TH.is_origin t or TH.is_expanded t
1033 ));
1034 let toks = get_fakeInfo_and_tokens e toks_e in
1035 let toks = displace_fake_nodes toks in
1036 (* assert Origin;ExpandedTok;Faketok *)
1037 let toks = expand_mcode toks in
1038 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1039 * and no tag information, just NOTHING. *)
fc1ad971
C
1040
1041 let toks =
1042 if !Flag.sgrep_mode2
1043 then drop_minus toks (* nothing to do for sgrep *)
1044 else
1045 (* phase2: can now start to filter and adjust *)
1046 let toks = adjust_indentation toks in
1047 let toks = adjust_before_semicolon toks in(*before remove minus*)
1048 let toks = remove_minus_and_between_and_expanded_and_fake toks in
1049 (* assert Origin + Cocci + C and no minus *)
1050 let toks = add_space toks in
1051 let toks = fix_tokens toks in
1052 toks in
34e49164
C
1053
1054 (* in theory here could reparse and rework the ast! or
1055 * apply some SP. Not before cos julia may have generated
1056 * not parsable file. Need do unparsing_tricks call before being
1057 * ready to reparse. *)
1058 print_all_tokens2 pr toks;
1059
1060 | PPviastr -> pr str
1061 )
1062 )
1063
1064let pp_program a b =
1065 Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1066
485bce71
C
1067
1068let pp_program_default xs outfile =
1069 let xs' = xs +> List.map (fun x -> x, PPnormal) in
1070 pp_program xs' outfile