Release coccinelle-0.2.2-rc1
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
CommitLineData
0708f913 1(* Yoann Padioleau, Julia Lawall
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913 4 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
34e49164
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
34e49164
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
ae4735db
C
14 *
15 *
485bce71 16 * Modifications by Julia Lawall for better newline handling.
34e49164
C
17 *)
18open Common
19
20open Ast_c
21
22module TH = Token_helpers
23
24
b1b2de81
C
25(* should keep comments and directives in between adjacent deleted terms,
26but not comments and directives within deleted terms. should use the
27labels found in the control-flow graph *)
28
34e49164
C
29
30
708f4980
C
31(*****************************************************************************)
32(* Wrappers *)
33(*****************************************************************************)
34let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
35
34e49164
C
36(*****************************************************************************)
37(* Types used during the intermediate phases of the unparsing *)
38(*****************************************************************************)
39
ae4735db 40type token1 =
34e49164
C
41 | Fake1 of info
42 | T1 of Parser_c.token
43
44(* The cocci_tag of the token should always be a NOTHING. The mark of
45 * the token can only be OriginTok or ExpandedTok. Why not get rid of
46 * token and get something simpler ? because we need to know if the
47 * info is a TCommentCpp or TCommentSpace, etc for some of the further
48 * analysis so easier to keep with the token.
ae4735db 49 *
34e49164
C
50 * This type contains the whole information. Have all the tokens with this
51 * type.
52 *)
708f4980
C
53type min =
54 Min of (int list (* match numbers *) * int (* adjacency information *))
55 | Ctx
56
ae4735db
C
57type token2 =
58 | T2 of Parser_c.token * min *
34e49164
C
59 int option (* orig index, abstracting away comments and space *)
60 | Fake2
708f4980 61 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
34e49164
C
62 | C2 of string
63 | Indent_cocci2
c3e37e97 64 | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
34e49164
C
65
66(* not used yet *)
ae4735db 67type token3 =
34e49164
C
68 | T3 of Parser_c.token
69 | Cocci3 of string
70 | C3 of string
71
72
73(* similar to the tech in parsing_hack *)
74type token_extended = {
75 tok2 : token2;
76 str : string;
77 idx: int option; (* to know if 2 tokens were consecutive in orig file *)
78 mutable new_tokens_before : token2 list;
79 mutable remove : bool;
80}
81
82
83(*****************************************************************************)
84(* Helpers *)
85(*****************************************************************************)
86
ae4735db 87let info_of_token1 t =
34e49164
C
88 match t with
89 | Fake1 info -> info
90 | T1 tok -> TH.info_of_tok tok
91
002099fc
C
92let print_token1 = function
93 T1 tok -> TH.str_of_tok tok
94 | Fake1 info -> "fake"
95
34e49164
C
96let str_of_token2 = function
97 | T2 (t,_,_) -> TH.str_of_tok t
98 | Fake2 -> ""
708f4980 99 | Cocci2 (s,_,_,_) -> s
34e49164
C
100 | C2 s -> s
101 | Indent_cocci2 -> ""
c3e37e97 102 | Unindent_cocci2 _ -> ""
34e49164
C
103
104let print_token2 = function
708f4980
C
105 | T2 (t,b,_) ->
106 let b_str =
107 match b with
108 Min (index,adj) ->
109 Printf.sprintf "-%d[%s]" adj
110 (String.concat " " (List.map string_of_int index))
111 | Ctx -> "" in
112 "T2:"^b_str^TH.str_of_tok t
fc1ad971 113 | Fake2 -> "fake"
708f4980 114 | Cocci2 (s,_,lc,rc) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
34e49164
C
115 | C2 s -> "C2:"^s
116 | Indent_cocci2 -> "Indent"
c3e37e97 117 | Unindent_cocci2 _ -> "Unindent"
34e49164 118
002099fc
C
119let simple_print_all_tokens1 l =
120 List.iter (function x -> Printf.printf "%s " (print_token1 x)) l;
121 Printf.printf "\n"
122
708f4980 123let simple_print_all_tokens2 l =
fc1ad971 124 List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
34e49164
C
125 Printf.printf "\n"
126
127let str_of_token3 = function
128 | T3 t -> TH.str_of_tok t
129 | Cocci3 s | C3 s -> s
130
131
132
ae4735db
C
133let mk_token_extended x =
134 let origidx =
34e49164 135 match x with
ae4735db 136 | T2 (_,_, idx) -> idx
34e49164
C
137 | _ -> None
138 in
ae4735db 139 { tok2 = x;
34e49164
C
140 str = str_of_token2 x;
141 idx = origidx;
142 new_tokens_before = [];
143 remove = false;
144 }
145
ae4735db 146let rebuild_tokens_extented toks_ext =
34e49164 147 let _tokens = ref [] in
ae4735db 148 toks_ext +> List.iter (fun tok ->
34e49164
C
149 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
150 if not tok.remove then push2 tok.tok2 _tokens;
151 );
152 let tokens = List.rev !_tokens in
153 (tokens +> List.map mk_token_extended)
154
155
156let mcode_contain_plus = function
157 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
158 | Ast_cocci.CONTEXT _ -> true
485bce71 159(* patch: when need full coccinelle transformation *)
708f4980
C
160 | Ast_cocci.MINUS (_,_,_,[]) -> false
161 | Ast_cocci.MINUS (_,_,_,x::xs) -> true
951c7801 162 | Ast_cocci.PLUS _ -> raise Impossible
34e49164 163
ae4735db 164let contain_plus info =
34e49164
C
165 let mck = Ast_c.mcode_of_info info in
166 mcode_contain_plus mck
167
168(*****************************************************************************)
169(* Last fix on the ast *)
170(*****************************************************************************)
171
ae4735db 172(* Because of the ugly trick to handle initialiser, I generate fake ','
34e49164
C
173 * for the last initializer element, but if there is nothing around it,
174 * I don't want in the end to print it.
175 *)
176
ae4735db 177let remove_useless_fakeInfo_struct program =
34e49164 178 let bigf = { Visitor_c.default_visitor_c_s with
ae4735db 179 Visitor_c.kini_s = (fun (k,bigf) ini ->
34e49164 180 match k ini with
113803cf 181 | InitList args, ii ->
34e49164
C
182 (match ii with
183 | [_i1;_i2] -> ini
113803cf 184 | [i1;i2;iicommaopt] ->
34e49164
C
185 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
186 && (Ast_c.is_fake iicommaopt)
187 (* sometimes the guy put a normal iicommaopt *)
34e49164
C
188 then InitList args, [i1;i2]
189 else InitList args, [i1;i2;iicommaopt]
ae4735db 190 | [i1;i2;iicommaopt;end_comma_opt] ->
113803cf
C
191 (* only in #define. end_comma_opt canot be fake *)
192 (* not sure if this will be considered ambiguous with a previous
193 case? *)
194 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
195 && (Ast_c.is_fake iicommaopt)
196 (* sometimes the guy put a normal iicommaopt *)
197 then InitList args, [i1;i2;end_comma_opt]
198 else InitList args, [i1;i2;iicommaopt;end_comma_opt]
34e49164
C
199 | _ -> raise Impossible
200 )
201 | x -> x
202 )
203 } in
204 Visitor_c.vk_toplevel_s bigf program
205
206
207(*****************************************************************************)
208(* Tokens1 generation *)
209(*****************************************************************************)
210
ae4735db
C
211let get_fakeInfo_and_tokens celem toks =
212 let toks_in = ref toks in
34e49164
C
213 let toks_out = ref [] in
214
215 (* todo? verify good order of position ? *)
ae4735db 216 let pr_elem info =
34e49164 217 match Ast_c.pinfo_of_info info with
ae4735db 218 | FakeTok _ ->
34e49164 219 Common.push2 (Fake1 info) toks_out
ae4735db 220 | OriginTok _ | ExpandedTok _ ->
34e49164 221 (* get the associated comments/space/cppcomment tokens *)
978fd7e5 222 let (before, x, after) =
ae4735db 223 !toks_in +> Common.split_when (fun tok ->
34e49164
C
224 info =*= TH.info_of_tok tok)
225 in
b1b2de81 226 assert(info =*= TH.info_of_tok x);
34e49164 227 (*old: assert(before +> List.for_all (TH.is_comment)); *)
ae4735db 228 before +> List.iter (fun x ->
34e49164 229 if not (TH.is_comment x)
0708f913 230 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
34e49164
C
231 (* case such as int asm d3("x"); not yet in ast *)
232 );
233 before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
234 push2 (T1 x) toks_out;
235 toks_in := after;
ae4735db 236 | AbstractLineTok _ ->
34e49164
C
237 (* can be called on type info when for instance use -type_c *)
238 if !Flag_parsing_c.pretty_print_type_info
239 then Common.push2 (Fake1 info) toks_out
ae4735db 240 else raise Impossible (* at this stage *)
34e49164
C
241 in
242
243 let pr_space _ = () in (* use the spacing that is there already *)
244
245 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
246
247 if not (null !toks_in)
0708f913 248 then failwith "WEIRD: unparsing not finished";
34e49164
C
249
250 List.rev !toks_out
251
252(* Fake nodes that have BEFORE code should be moved over any subsequent
253whitespace and newlines, but not any comments, to get as close to the affected
002099fc 254code as possible. Similarly, fake nodes that have AFTER code should be moved
34e49164
C
255backwards. No fake nodes should have both before and after code. *)
256
257let displace_fake_nodes toks =
258 let is_fake = function Fake1 _ -> true | _ -> false in
259 let is_whitespace = function
260 T1(Parser_c.TCommentSpace _)
485bce71 261 (* patch: cocci *)
34e49164
C
262 | T1(Parser_c.TCommentNewline _) -> true
263 | _ -> false in
264 let rec loop toks =
265 let fake_info =
266 try Some (Common.split_when is_fake toks)
267 with Not_found -> None in
268 match fake_info with
269 Some(bef,((Fake1 info) as fake),aft) ->
270 (match !(info.cocci_tag) with
ae4735db 271 | Some x ->
708f4980 272 (match x with
002099fc 273 (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
34e49164 274 (* move the fake node forwards *)
002099fc
C
275 let (whitespace,rest) = Common.span is_whitespace aft in
276 bef @ whitespace @ fake :: (loop rest)
277 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
34e49164 278 (* move the fake node backwards *)
002099fc
C
279 let revbef = List.rev bef in
280 let (revwhitespace,revprev) = Common.span is_whitespace revbef in
281 let whitespace = List.rev revwhitespace in
282 let prev = List.rev revprev in
283 prev @ fake :: (loop (whitespace @ aft))
284 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
285 bef @ fake :: (loop aft)
286 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
287 failwith "fake node should not be before-after"
288 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
485bce71 289 )
ae4735db 290 | None ->
708f4980
C
291 bef @ fake :: (loop aft)
292 )
34e49164
C
293 | None -> toks
294 | _ -> raise Impossible in
295 loop toks
296
297(*****************************************************************************)
298(* Tokens2 generation *)
299(*****************************************************************************)
300
b1b2de81
C
301let comment2t2 = function
302 (Token_c.TCommentCpp x,(info : Token_c.info)) ->
303 C2("\n"^info.Common.str^"\n")
304 | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
305
ae4735db 306let expand_mcode toks =
34e49164
C
307 let toks_out = ref [] in
308
309 let index = ref 0 in
310
ae4735db 311 let add_elem t minus =
34e49164 312 match t with
ae4735db 313 | Fake1 info ->
34e49164 314 let str = Ast_c.str_of_info info in
b1b2de81 315 if str =$= ""
34e49164
C
316 then push2 (Fake2) toks_out
317 (* perhaps the fake ',' *)
318 else push2 (C2 str) toks_out
ae4735db
C
319
320
002099fc 321 | T1 tok ->
b1b2de81 322 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
34e49164 323 (* no tag on expandedTok ! *)
fc1ad971
C
324 let modified = function
325 None -> false
326 | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
327 | _ -> true in
328 (if (TH.is_expanded tok &&
329 modified !((TH.info_of_tok tok).cocci_tag)
330 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
0708f913
C
331 then
332 failwith
333 (Printf.sprintf
334 "expanded token %s on line %d is either modified or stored in a metavariable"
335 (TH.str_of_tok tok) (TH.line_of_tok tok)));
34e49164 336
ae4735db 337 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
34e49164
C
338 { i with cocci_tag = ref Ast_c.emptyAnnot; }
339 ) in
340
ae4735db 341 let optindex =
34e49164
C
342 if TH.is_origin tok && not (TH.is_real_comment tok)
343 then begin
344 incr index;
345 Some !index
346 end
347 else None
348 in
349
350 push2 (T2 (tok', minus, optindex)) toks_out
351 in
352
ae4735db
C
353 let expand_info t =
354 let (mcode,env) =
708f4980 355 Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
34e49164 356
ae4735db 357 let pr_cocci s ln col rcol =
708f4980 358 push2 (Cocci2(s,ln,col,rcol)) toks_out in
ae4735db 359 let pr_c info =
b1b2de81
C
360 (match Ast_c.pinfo_of_info info with
361 Ast_c.AbstractLineTok _ ->
362 push2 (C2 (Ast_c.str_of_info info)) toks_out
363 | Ast_c.FakeTok (s,_) ->
364 push2 (C2 s) toks_out
34e49164
C
365 | _ ->
366 Printf.printf "line: %s\n" (Common.dump info);
b1b2de81
C
367 failwith "not an abstract line");
368 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
369 List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
370
708f4980
C
371 let pr_barrier ln col = (* marks a position, used around C code *)
372 push2 (Cocci2("",ln,col,col)) toks_out in
373 let pr_nobarrier ln col = () in (* not needed for linux spacing *)
b1b2de81 374
708f4980 375 let pr_cspace _ = push2 (C2 " ") toks_out in
34e49164 376
708f4980
C
377 let pr_space _ = () (* rely on add_space in cocci code *) in
378 let pr_arity _ = () (* not interested *) in
34e49164
C
379
380 let indent _ = push2 Indent_cocci2 toks_out in
c3e37e97 381 let unindent x = push2 (Unindent_cocci2 x) toks_out in
34e49164 382
708f4980
C
383 let args_pp =
384 (env, pr_cocci, pr_c, pr_cspace,
385 (match !Flag_parsing_c.spacing with
386 Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
387 pr_arity,
388 (match !Flag_parsing_c.spacing with
389 Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
390 indent, unindent) in
34e49164 391
ae4735db
C
392 (* old: when for yacfe with partial cocci:
393 * add_elem t false;
485bce71 394 *)
34e49164 395
485bce71 396 (* patch: when need full coccinelle transformation *)
faf9a90c 397 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
34e49164 398 match mcode with
ae4735db 399 | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
34e49164 400 (* Why adding ? because I want to have all the information, the whole
ae4735db 401 * set of tokens, so I can then process and remove the
34e49164 402 * is_between_two_minus for instance *)
708f4980 403 add_elem t (Min (inst,adj));
faf9a90c 404 unparser any_xxs Unparse_cocci.InPlace
ae4735db 405 | Ast_cocci.CONTEXT (_,any_befaft) ->
34e49164 406 (match any_befaft with
ae4735db 407 | Ast_cocci.NOTHING ->
708f4980 408 add_elem t Ctx
951c7801 409 | Ast_cocci.BEFORE (xxs,_) ->
faf9a90c 410 unparser xxs Unparse_cocci.Before;
708f4980 411 add_elem t Ctx
ae4735db 412 | Ast_cocci.AFTER (xxs,_) ->
708f4980 413 add_elem t Ctx;
faf9a90c 414 unparser xxs Unparse_cocci.After;
ae4735db 415 | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
faf9a90c 416 unparser xxs Unparse_cocci.Before;
708f4980 417 add_elem t Ctx;
faf9a90c 418 unparser yys Unparse_cocci.After;
34e49164 419 )
951c7801 420 | Ast_cocci.PLUS _ -> raise Impossible
34e49164
C
421
422 in
423
424 toks +> List.iter expand_info;
425 List.rev !toks_out
ae4735db 426
34e49164
C
427
428(*****************************************************************************)
429(* Tokens2 processing, filtering, adjusting *)
430(*****************************************************************************)
431
002099fc 432let is_space = function
fc1ad971 433 | T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *)
ae4735db 434 | _ -> false
fc1ad971
C
435
436let is_newline = function
437 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
438 | _ -> false
439
440let is_whitespace = function
ae4735db 441 | (T2 (t,_b,_i)) ->
002099fc
C
442 (match t with
443 | Parser_c.TCommentSpace _ -> true (* only whitespace *)
fc1ad971 444 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
002099fc
C
445 | _ -> false
446 )
ae4735db 447 | _ -> false
002099fc 448
34e49164 449let is_minusable_comment = function
ae4735db 450 | (T2 (t,_b,_i)) ->
34e49164
C
451 (match t with
452 | Parser_c.TCommentSpace _ (* only whitespace *)
ae4735db 453 (* patch: coccinelle *)
002099fc 454 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
7f004419 455 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
002099fc
C
456 | Parser_c.TComment _
457 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
b1b2de81
C
458 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
459 | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
34e49164
C
460 -> true
461
b1b2de81 462 | Parser_c.TCommentMisc _
0708f913 463 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
34e49164
C
464 -> false
465
466 | _ -> false
467 )
ae4735db 468 | _ -> false
34e49164 469
fc1ad971 470let is_minusable_comment_nocpp = function
ae4735db 471 | (T2 (t,_b,_i)) ->
fc1ad971
C
472 (match t with
473 | Parser_c.TCommentSpace _ (* only whitespace *)
ae4735db 474 (* patch: coccinelle *)
fc1ad971 475 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
7f004419 476 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
fc1ad971
C
477 | Parser_c.TComment _ -> true
478 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
479 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
480 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
481 -> false
482
483 | Parser_c.TCommentMisc _
484 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
485 -> false
486
487 | _ -> false
488 )
ae4735db 489 | _ -> false
fc1ad971 490
34e49164 491let all_coccis = function
c3e37e97 492 Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 _ -> true
34e49164
C
493 | _ -> false
494
b1b2de81
C
495(*previously gave up if the first character was a newline, but not clear why*)
496let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
34e49164 497
708f4980 498let set_minus_comment adj = function
ae4735db 499 | T2 (t,Ctx,idx) ->
34e49164
C
500 let str = TH.str_of_tok t in
501 (match t with
502 | Parser_c.TCommentSpace _
ae4735db 503(* patch: coccinelle *)
34e49164
C
504 | Parser_c.TCommentNewline _ -> ()
505
ae4735db
C
506 | Parser_c.TComment _
507 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
b1b2de81
C
508 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
509 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
ae4735db 510 ->
b1b2de81
C
511 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
512 (TH.line_of_tok t) str)
34e49164
C
513 | _ -> raise Impossible
514 );
708f4980 515 T2 (t, Min adj, idx)
ae4735db 516(* patch: coccinelle *)
fc1ad971 517 | T2 (t,Min adj,idx) as x -> x
34e49164
C
518 | _ -> raise Impossible
519
708f4980 520let set_minus_comment_or_plus adj = function
c3e37e97 521 Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x
708f4980 522 | x -> set_minus_comment adj x
34e49164 523
fc1ad971
C
524let drop_minus xs =
525 xs +> Common.exclude (function
526 | T2 (t,Min adj,_) -> true
527 | _ -> false
528 )
529
34e49164
C
530let remove_minus_and_between_and_expanded_and_fake xs =
531
532 (* get rid of exampled and fake tok *)
ae4735db 533 let xs = xs +> Common.exclude (function
34e49164
C
534 | T2 (t,_,_) when TH.is_expanded t -> true
535 | Fake2 -> true
536
537 | _ -> false
538 )
539 in
540
113803cf 541 let minus_or_comment = function
708f4980 542 T2(_,Min adj,_) -> true
113803cf
C
543 | x -> is_minusable_comment x in
544
fc1ad971
C
545 let minus_or_comment_nocpp = function
546 T2(_,Min adj,_) -> true
547 | x -> is_minusable_comment_nocpp x in
548
549 let common_adj (index1,adj1) (index2,adj2) =
550 adj1 = adj2 (* same adjacency info *) &&
551 (* non-empty intersection of witness trees *)
552 not ((Common.inter_set index1 index2) = []) in
553
554 let rec adjust_around_minus = function
555 [] -> []
556 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
557 (((T2(_,Min adj,_))::_) as rest) ->
558 (* an initial newline, as in a replaced statement *)
559 let (between_minus,rest) = Common.span minus_or_comment rest in
560 (match rest with
561 [] -> (set_minus_comment adj x) ::
562 (List.map (set_minus_comment adj) between_minus)
563 | T2(_,Ctx,_)::_ when is_newline (List.hd(List.rev between_minus)) ->
564 (set_minus_comment adj x)::(adjust_within_minus between_minus) @
565 (adjust_around_minus rest)
566 | _ ->
567 x :: (adjust_within_minus between_minus) @
568 (adjust_around_minus rest))
569 | ((T2(_,Min adj,_))::_) as rest ->
570 (* no initial newline, as in a replaced expression *)
571 let (between_minus,rest) = Common.span minus_or_comment rest in
572 (match rest with
573 [] ->
574 (List.map (set_minus_comment adj) between_minus)
575 | _ ->
576 (adjust_within_minus between_minus) @
577 (adjust_around_minus rest))
578 | x::xs -> x::adjust_around_minus xs
579 and adjust_within_minus = function
580 [] -> []
581 | (T2(_,Min adj1,_) as t1)::xs ->
582 let (between_minus,rest) = Common.span is_minusable_comment xs in
583 (match rest with
584 [] ->
585 (* keep last newline *)
586 let (drop,keep) =
587 try
588 let (drop,nl,keep) =
589 Common.split_when is_newline between_minus in
590 (drop, nl :: keep)
591 with Not_found -> (between_minus,[]) in
592 t1 ::
593 List.map (set_minus_comment_or_plus adj1) drop @
594 keep
595 | (T2(_,Min adj2,_) as t2)::rest when common_adj adj1 adj2 ->
596 t1::
597 List.map (set_minus_comment_or_plus adj1) between_minus @
598 adjust_within_minus (t2::rest)
599 | x::xs ->
600 t1::(between_minus @ adjust_within_minus (x::xs)))
601 | _ -> failwith "only minus and space possible" in
602
603 (* new idea: collects regions not containing non-space context code
604 if two adjacent adjacent minus tokens satisfy common_adj then delete
605 all spaces, comments etc between them
606 if two adjacent minus tokens do not satisfy common_adj only delete
607 the spaces between them if there are no comments, etc.
608 if the region contain no plus code and is both preceded and followed
609 by a newline, delete the initial newline. *)
610
611 let rec adjust_around_minus = function
34e49164 612 [] -> []
708f4980 613 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
fc1ad971
C
614 (T2(_,Min adj1,_) as t1)::xs ->
615 let (minus_list,rest) = Common.span not_context (t1::xs) in
616 let contains_plus = List.exists is_plus minus_list in
617 let x =
618 match List.rev minus_list with
619 (T2(Parser_c.TCommentNewline c,_b,_i))::rest
620 when List.for_all minus_or_comment minus_list ->
621 set_minus_comment_or_plus adj1 x
622 | _ -> x in
623 x :: adjust_within_minus contains_plus minus_list @
624 adjust_around_minus rest
625 | (T2(_,Min adj1,_) as t1)::xs ->
626 let (minus_list,rest) = Common.span not_context (t1::xs) in
627 let contains_plus = List.exists is_plus minus_list in
628 adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
629 | x::xs -> x :: adjust_around_minus xs
630 and adjust_within_minus cp (* contains plus *) = function
631 (T2(_,Min adj1,_) as t1)::xs ->
632 let not_minus = function T2(_,Min _,_) -> false | _ -> true in
633 let (not_minus_list,rest) = Common.span not_minus xs in
634 t1 ::
34e49164 635 (match rest with
fc1ad971
C
636 (T2(_,Min adj2,_) as t2)::xs when common_adj adj1 adj2 ->
637 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
638 @ (adjust_within_minus cp (t2::xs))
639 | (T2(_,Min adj2,_) as t2)::xs ->
640 let is_whitespace_or_plus = function
641 (T2 _) as x -> is_space x
642 | _ -> true (*plus*) in
643 if List.for_all is_whitespace_or_plus not_minus_list
644 then
645 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
646 @ (adjust_within_minus cp (t2::xs))
647 else not_minus_list @ (adjust_within_minus cp (t2::xs))
648 | _ ->
649 if cp
650 then xs
651 else
652 let (spaces,rest) = Common.span is_space xs in
653 (List.map (set_minus_comment_or_plus adj1) spaces)
654 @ rest)
655 | xs -> failwith "should always start with minus"
656 and not_context = function
657 (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
658 | _ -> true
659 and is_plus = function
660 C2 _ | Cocci2 _ -> true
661 | _ -> false in
34e49164 662
fc1ad971 663 let xs = adjust_around_minus xs in
34e49164 664
113803cf 665 (* this drops blank lines after a brace introduced by removing code *)
fc1ad971
C
666 let minus_or_comment_nonl = function
667 T2(_,Min adj,_) -> true
668 | T2(Parser_c.TCommentNewline _,_b,_i) -> false
669 | x -> is_minusable_comment x in
670
113803cf
C
671 let rec adjust_after_brace = function
672 [] -> []
708f4980 673 | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
b1b2de81 674 when str_of_token2 x =$= "{" ->
fc1ad971 675 let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
113803cf
C
676 let is_whitespace = function
677 T2(Parser_c.TCommentSpace _,_b,_i)
678 (* patch: cocci *)
679 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
680 | _ -> false in
681 let (newlines,rest) = Common.span is_whitespace rest in
682 let (drop_newlines,last_newline) =
683 let rec loop = function
684 [] -> ([],[])
685 | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
686 (List.rev rest,[x])
687 | x::xs ->
688 let (drop_newlines,last_newline) = loop xs in
689 (drop_newlines,x::last_newline) in
690 loop (List.rev newlines) in
708f4980 691 x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
113803cf
C
692 last_newline@
693 adjust_after_brace rest
694 | x::xs -> x::adjust_after_brace xs in
695
696 let xs = adjust_after_brace xs in
697
fc1ad971
C
698 (* search backwards from context } over spaces until reaching a newline.
699 then go back over all minus code until reaching some context or + code.
700 get rid of all intervening spaces, newlines, and comments
701 input is reversed *)
702 let rec adjust_before_brace = function
703 [] -> []
704 | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
705 let (outer_spaces,rest) = Common.span is_space xs in
706 x :: outer_spaces @
707 (match rest with
708 ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
709 (* the rest of this code is the same as from_newline below
710 but merging them seems to be error prone... *)
711 ((T2 (t, Min adj, idx)) as m) :: rest ->
712 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
713 h :: m ::
714 (List.map (set_minus_comment adj) spaces) @
715 (adjust_before_brace rest)
716 | _ -> adjust_before_brace rest)
717 | x::xs -> x :: (adjust_before_brace xs) in
718
719 let from_newline = function
720 ((T2 (t, Min adj, idx)) as m) :: rest ->
721 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
722 m ::
723 (List.map (set_minus_comment adj) spaces) @
724 (adjust_before_brace rest)
725 | rest -> adjust_before_brace rest in
726
727 let xs = List.rev (from_newline (List.rev xs)) in
728 let xs = drop_minus xs in
34e49164
C
729 xs
730
faf9a90c
C
731(* normally, in C code, a semicolon is not preceded by a space or newline *)
732let adjust_before_semicolon toks =
733 let toks = List.rev toks in
002099fc 734 let rec search_semic = function
faf9a90c 735 [] -> []
002099fc 736 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
faf9a90c 737 if List.mem (str_of_token2 x) [";";")";","]
002099fc
C
738 then x :: search_minus false xs
739 else x :: search_semic xs
740 | x::xs -> x :: search_semic xs
741 and search_minus seen_minus xs =
742 let (spaces, rest) = Common.span is_space xs in
743 (* only delete spaces if something is actually deleted *)
744 match rest with
745 ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
746 | _ -> if seen_minus then rest else xs in
747 List.rev (search_semic toks)
34e49164
C
748
749let is_ident_like s = s ==~ Common.regexp_alpha
750
ae4735db 751let rec add_space xs =
34e49164
C
752 match xs with
753 | [] -> []
754 | [x] -> [x]
708f4980
C
755 | (Cocci2(sx,lnx,_,rcolx) as x)::((Cocci2(sy,lny,lcoly,_)) as y)::xs
756 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
757 not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
758 (* this only works within a line. could consider whether
759 something should be done to add newlines too, rather than
760 printing them explicitly in unparse_cocci. *)
761 x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
ae4735db 762 | x::y::xs ->
34e49164
C
763 let sx = str_of_token2 x in
764 let sy = str_of_token2 y in
faf9a90c 765 if is_ident_like sx && is_ident_like sy
34e49164
C
766 then x::C2 " "::(add_space (y::xs))
767 else x::(add_space (y::xs))
768
769
770
771(* When insert some new code, because of a + in a SP, we must add this
772 * code at the right place, with the good indentation. So each time we
773 * encounter some spacing info, with some newline, we maintain the
774 * current indentation level used.
ae4735db 775 *
34e49164 776 * TODO problems: not accurate. ex: TODO
ae4735db 777 *
34e49164
C
778 * TODO: if in #define region, should add a \ \n
779 *)
ae4735db 780let new_tabbing2 space =
34e49164
C
781 (list_of_string space)
782 +> List.rev
b1b2de81 783 +> Common.take_until (fun c -> c =<= '\n')
34e49164
C
784 +> List.rev
785 +> List.map string_of_char
786 +> String.concat ""
787
ae4735db 788let new_tabbing a =
34e49164
C
789 Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
790
791
ae4735db 792let rec adjust_indentation xs =
34e49164
C
793 let _current_tabbing = ref "" in
794 let tabbing_unit = ref None in
795
796 let string_of_list l = String.concat "" (List.map string_of_char l) in
797
798 (* try to pick a tabbing unit for the plus code *)
799 let adjust_tabbing_unit old_tab new_tab =
b1b2de81 800 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
34e49164
C
801 then
802 let old_tab = list_of_string old_tab in
803 let new_tab = list_of_string new_tab in
804 let rec loop = function
805 ([],new_tab) ->
806 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
807 | (_,[]) -> failwith "not possible"
808 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
809 loop (old_tab,new_tab) in
810
811 let remtab tu current_tab =
812 let current_tab = List.rev(list_of_string current_tab) in
813 let rec loop = function
814 ([],new_tab) -> string_of_list (List.rev new_tab)
0708f913 815 | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
b1b2de81 816 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
34e49164
C
817 | (_,ns) -> (* mismatch; remove what we can *)
818 string_of_list (List.rev ns) in
819 loop (tu,current_tab) in
820
821 let rec find_first_tab started = function
822 [] -> ()
b1b2de81 823 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
34e49164 824 find_first_tab true xs
485bce71 825(* patch: coccinelle *)
34e49164
C
826 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
827 when started ->
828 let s = str_of_token2 x +> new_tabbing in
829 tabbing_unit := Some (s,List.rev (list_of_string s))
830 | x::xs -> find_first_tab started xs in
831 find_first_tab false xs;
832
ae4735db 833 let rec aux started xs =
34e49164
C
834 match xs with
835 | [] -> []
485bce71 836(* patch: coccinelle *)
34e49164 837 | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
708f4980
C
838 ((Cocci2 ("{",_,_,_)) as a)::xs
839 when started && str_of_token2 x =$= ")" ->
34e49164 840 (* to be done for if, etc, but not for a function header *)
708f4980 841 x::(C2 " ")::a::(aux started xs)
34e49164 842 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
ae4735db 843 let old_tabbing = !_current_tabbing in
34e49164
C
844 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
845 (* only trust the indentation after the first { *)
c3e37e97
C
846 (if started
847 then adjust_tabbing_unit old_tabbing !_current_tabbing);
34e49164
C
848 let coccis_rest = Common.span all_coccis xs in
849 (match coccis_rest with
b1b2de81 850 (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
34e49164
C
851 (* the case where cocci code has been added before a close } *)
852 x::aux started (Indent_cocci2::xs)
853 | _ -> x::aux started xs)
854 | Indent_cocci2::xs ->
855 (match !tabbing_unit with
856 None -> aux started xs
857 | Some (tu,_) ->
858 _current_tabbing := (!_current_tabbing)^tu;
708f4980 859 Cocci2 (tu,-1,-1,-1)::aux started xs)
c3e37e97 860 | Unindent_cocci2(permanent)::xs ->
34e49164
C
861 (match !tabbing_unit with
862 None -> aux started xs
863 | Some (_,tu) ->
864 _current_tabbing := remtab tu (!_current_tabbing);
865 aux started xs)
866 (* border between existing code and cocci code *)
708f4980 867 | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_)) as y)::xs
b1b2de81 868 when str_of_token2 x =$= "{" ->
34e49164
C
869 x::aux true (y::Indent_cocci2::xs)
870 | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
b1b2de81 871 when str_of_token2 y =$= "}" ->
c3e37e97 872 x::aux started (y::Unindent_cocci2 true::xs)
34e49164 873 (* starting the body of the function *)
b1b2de81 874 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs
708f4980 875 | ((Cocci2("{",_,_,_)) as a)::xs -> a::aux true xs
c3e37e97
C
876 | ((Cocci2("\n",_,_,_)) as x)::Unindent_cocci2(false)::xs ->
877 x::aux started xs
ae4735db 878 | ((Cocci2("\n",_,_,_)) as x)::xs ->
0708f913 879 (* dont inline in expr because of weird eval order of ocaml *)
ae4735db 880 let s = !_current_tabbing in
708f4980 881 x::Cocci2 (s,-1,-1,-1)::aux started xs
34e49164
C
882 | x::xs -> x::aux started xs in
883 aux false xs
884
885
886let rec find_paren_comma = function
887 | [] -> ()
888
889 (* do nothing if was like this in original file *)
890 | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
ae4735db 891 ::xs when p2 =|= p1 + 1 ->
34e49164
C
892 find_paren_comma (x2::xs)
893
894 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
ae4735db 895 ::xs when p2 =|= p1 + 1 ->
34e49164
C
896 find_paren_comma (x2::xs)
897
898 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
ae4735db 899 ::xs when p2 =|= p1 + 1 ->
34e49164
C
900 find_paren_comma (x2::xs)
901
902 (* otherwise yes can adjust *)
ae4735db 903 | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
34e49164
C
904 x2.remove <- true;
905 find_paren_comma (x2::xs)
ae4735db 906 | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
34e49164
C
907 x1.remove <- true;
908 find_paren_comma (x2::xs)
909
ae4735db 910 | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
34e49164
C
911 x1.remove <- true;
912 find_paren_comma (x2::xs)
913
ae4735db 914 | x::xs ->
34e49164 915 find_paren_comma xs
34e49164 916
ae4735db
C
917
918let fix_tokens toks =
34e49164
C
919 let toks = toks +> List.map mk_token_extended in
920
921 let cleaner = toks +> Common.exclude (function
922 | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
923 | _ -> false
924 ) in
925 find_paren_comma cleaner;
926
927 let toks = rebuild_tokens_extented toks in
928 toks +> List.map (fun x -> x.tok2)
929
930
931
932(*****************************************************************************)
933(* Final unparsing (and debugging support) *)
934(*****************************************************************************)
935
936(* for debugging *)
937type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
938
939let kind_of_token2 = function
940 | Fake2 -> KFake
941 | Cocci2 _ -> KCocci
942 | C2 _ -> KC
943 | T2 (t,_,_) ->
944 (match TH.pinfo_of_tok t with
945 | ExpandedTok _ -> KExpanded
946 | OriginTok _ -> KOrigin
947 | FakeTok _ -> raise Impossible (* now a Fake2 *)
948 | AbstractLineTok _ -> raise Impossible (* now a KC *)
949 )
c3e37e97 950 | Unindent_cocci2 _ | Indent_cocci2 -> raise Impossible
ae4735db 951
34e49164
C
952let end_mark = "!"
953
954let start_mark = function
955 | KFake -> "!F!"
ae4735db 956 | KCocci -> "!S!"
34e49164
C
957 | KC -> "!A!"
958 | KExpanded -> "!E!"
959 | KOrigin -> ""
960
961let print_all_tokens2 pr xs =
962 if !Flag_parsing_c.debug_unparsing
963 then
964 let current_kind = ref KOrigin in
ae4735db 965 xs +> List.iter (fun t ->
34e49164 966 let newkind = kind_of_token2 t in
b1b2de81 967 if newkind =*= !current_kind
34e49164
C
968 then pr (str_of_token2 t)
969 else begin
970 pr (end_mark);
971 pr (start_mark newkind);
972 pr (str_of_token2 t);
973 current_kind := newkind
974 end
975 );
ae4735db 976 else
34e49164 977 xs +> List.iter (fun x -> pr (str_of_token2 x))
ae4735db 978
34e49164
C
979
980
981
982(*****************************************************************************)
983(* Entry points *)
984(*****************************************************************************)
985
986(* old: PPviatok was made in the beginning to allow to pretty print a
987 * complete C file, including a modified C file by transformation.ml,
988 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
989 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
990 * fancy stuff when a function was not modified at all. Just need to
991 * print the list of token as-is. But now pretty_print_c.ml handles
992 * almost everything so maybe less useful. Maybe PPviatok allows to
ae4735db
C
993 * optimize a little the pretty printing.
994 *
34e49164
C
995 * update: now have PPviastr which goes even faster than PPviatok, so
996 * PPviatok has disappeared.
997 *)
998
999type ppmethod = PPnormal | PPviastr
1000
1001
1002
1003
1004(* The pp_program function will call pretty_print_c.ml with a special
ae4735db 1005 * function to print the leaf components, the tokens. When we want to
34e49164 1006 * print a token, we need to print also maybe the space and comments that
ae4735db 1007 * were close to it in the original file (and that was omitted during the
34e49164
C
1008 * parsing phase), and honor what the cocci-info attached to the token says.
1009 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
ae4735db 1010 * print it and also print some cocci-code attached in a PLUS to it.
34e49164
C
1011 * So we will also maybe call unparse_cocci. Because the cocci-code may
1012 * contain metavariables, unparse_cocci will in fact sometimes call back
1013 * pretty_print_c (which will this time don't call back again unparse_cocci)
1014 *)
1015
ae4735db
C
1016let pp_program2 xs outfile =
1017 Common.with_open_outfile outfile (fun (pr,chan) ->
1018 let pr s =
1019 if !Flag_parsing_c.debug_unparsing
34e49164 1020 then begin pr2_no_nl s; flush stderr end
ae4735db 1021 else pr s
34e49164
C
1022 (* flush chan; *)
1023 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1024 in
ae4735db
C
1025
1026 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
34e49164
C
1027 (* here can still work on ast *)
1028 let e = remove_useless_fakeInfo_struct e in
ae4735db 1029
34e49164 1030 match ppmethod with
002099fc 1031 | PPnormal ->
34e49164
C
1032 (* now work on tokens *)
1033
1034 (* phase1: just get all the tokens, all the information *)
ae4735db 1035 assert(toks_e +> List.for_all (fun t ->
34e49164
C
1036 TH.is_origin t or TH.is_expanded t
1037 ));
1038 let toks = get_fakeInfo_and_tokens e toks_e in
1039 let toks = displace_fake_nodes toks in
1040 (* assert Origin;ExpandedTok;Faketok *)
1041 let toks = expand_mcode toks in
1042 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1043 * and no tag information, just NOTHING. *)
fc1ad971
C
1044
1045 let toks =
1046 if !Flag.sgrep_mode2
1047 then drop_minus toks (* nothing to do for sgrep *)
1048 else
1049 (* phase2: can now start to filter and adjust *)
1050 let toks = adjust_indentation toks in
1051 let toks = adjust_before_semicolon toks in(*before remove minus*)
1052 let toks = remove_minus_and_between_and_expanded_and_fake toks in
1053 (* assert Origin + Cocci + C and no minus *)
1054 let toks = add_space toks in
1055 let toks = fix_tokens toks in
1056 toks in
34e49164 1057
ae4735db 1058 (* in theory here could reparse and rework the ast! or
34e49164
C
1059 * apply some SP. Not before cos julia may have generated
1060 * not parsable file. Need do unparsing_tricks call before being
1061 * ready to reparse. *)
1062 print_all_tokens2 pr toks;
1063
1064 | PPviastr -> pr str
1065 )
1066 )
1067
ae4735db 1068let pp_program a b =
34e49164
C
1069 Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1070
485bce71 1071
ae4735db 1072let pp_program_default xs outfile =
485bce71
C
1073 let xs' = xs +> List.map (fun x -> x, PPnormal) in
1074 pp_program xs' outfile