Release coccinelle-0.2.3rc1
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
CommitLineData
0708f913 1(* Yoann Padioleau, Julia Lawall
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913 4 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
34e49164
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
34e49164
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
ae4735db
C
14 *
15 *
485bce71 16 * Modifications by Julia Lawall for better newline handling.
34e49164
C
17 *)
18open Common
19
20open Ast_c
21
22module TH = Token_helpers
23
24
b1b2de81
C
25(* should keep comments and directives in between adjacent deleted terms,
26but not comments and directives within deleted terms. should use the
27labels found in the control-flow graph *)
28
34e49164
C
29
30
708f4980
C
31(*****************************************************************************)
32(* Wrappers *)
33(*****************************************************************************)
34let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
35
34e49164
C
36(*****************************************************************************)
37(* Types used during the intermediate phases of the unparsing *)
38(*****************************************************************************)
39
ae4735db 40type token1 =
34e49164
C
41 | Fake1 of info
42 | T1 of Parser_c.token
43
44(* The cocci_tag of the token should always be a NOTHING. The mark of
45 * the token can only be OriginTok or ExpandedTok. Why not get rid of
46 * token and get something simpler ? because we need to know if the
47 * info is a TCommentCpp or TCommentSpace, etc for some of the further
48 * analysis so easier to keep with the token.
ae4735db 49 *
34e49164
C
50 * This type contains the whole information. Have all the tokens with this
51 * type.
52 *)
708f4980
C
53type min =
54 Min of (int list (* match numbers *) * int (* adjacency information *))
55 | Ctx
56
ae4735db
C
57type token2 =
58 | T2 of Parser_c.token * min *
34e49164
C
59 int option (* orig index, abstracting away comments and space *)
60 | Fake2
708f4980 61 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
5636bb2c 62 * Unparse_cocci.nlhint option
34e49164
C
63 | C2 of string
64 | Indent_cocci2
c3e37e97 65 | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
34e49164
C
66
67(* not used yet *)
ae4735db 68type token3 =
34e49164
C
69 | T3 of Parser_c.token
70 | Cocci3 of string
71 | C3 of string
72
73
74(* similar to the tech in parsing_hack *)
75type token_extended = {
76 tok2 : token2;
77 str : string;
78 idx: int option; (* to know if 2 tokens were consecutive in orig file *)
79 mutable new_tokens_before : token2 list;
80 mutable remove : bool;
81}
82
83
84(*****************************************************************************)
85(* Helpers *)
86(*****************************************************************************)
87
ae4735db 88let info_of_token1 t =
34e49164
C
89 match t with
90 | Fake1 info -> info
91 | T1 tok -> TH.info_of_tok tok
92
002099fc
C
93let print_token1 = function
94 T1 tok -> TH.str_of_tok tok
95 | Fake1 info -> "fake"
96
34e49164
C
97let str_of_token2 = function
98 | T2 (t,_,_) -> TH.str_of_tok t
99 | Fake2 -> ""
5636bb2c 100 | Cocci2 (s,_,_,_,_) -> s
34e49164
C
101 | C2 s -> s
102 | Indent_cocci2 -> ""
c3e37e97 103 | Unindent_cocci2 _ -> ""
34e49164
C
104
105let print_token2 = function
708f4980
C
106 | T2 (t,b,_) ->
107 let b_str =
108 match b with
109 Min (index,adj) ->
110 Printf.sprintf "-%d[%s]" adj
111 (String.concat " " (List.map string_of_int index))
112 | Ctx -> "" in
113 "T2:"^b_str^TH.str_of_tok t
fc1ad971 114 | Fake2 -> "fake"
5636bb2c 115 | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
34e49164
C
116 | C2 s -> "C2:"^s
117 | Indent_cocci2 -> "Indent"
c3e37e97 118 | Unindent_cocci2 _ -> "Unindent"
34e49164 119
002099fc
C
120let simple_print_all_tokens1 l =
121 List.iter (function x -> Printf.printf "%s " (print_token1 x)) l;
122 Printf.printf "\n"
123
708f4980 124let simple_print_all_tokens2 l =
fc1ad971 125 List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
34e49164
C
126 Printf.printf "\n"
127
128let str_of_token3 = function
129 | T3 t -> TH.str_of_tok t
130 | Cocci3 s | C3 s -> s
131
132
133
ae4735db
C
134let mk_token_extended x =
135 let origidx =
34e49164 136 match x with
ae4735db 137 | T2 (_,_, idx) -> idx
34e49164
C
138 | _ -> None
139 in
ae4735db 140 { tok2 = x;
34e49164
C
141 str = str_of_token2 x;
142 idx = origidx;
143 new_tokens_before = [];
144 remove = false;
145 }
146
ae4735db 147let rebuild_tokens_extented toks_ext =
34e49164 148 let _tokens = ref [] in
ae4735db 149 toks_ext +> List.iter (fun tok ->
34e49164
C
150 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
151 if not tok.remove then push2 tok.tok2 _tokens;
152 );
153 let tokens = List.rev !_tokens in
154 (tokens +> List.map mk_token_extended)
155
156
157let mcode_contain_plus = function
158 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
159 | Ast_cocci.CONTEXT _ -> true
485bce71 160(* patch: when need full coccinelle transformation *)
708f4980
C
161 | Ast_cocci.MINUS (_,_,_,[]) -> false
162 | Ast_cocci.MINUS (_,_,_,x::xs) -> true
951c7801 163 | Ast_cocci.PLUS _ -> raise Impossible
34e49164 164
ae4735db 165let contain_plus info =
34e49164
C
166 let mck = Ast_c.mcode_of_info info in
167 mcode_contain_plus mck
168
169(*****************************************************************************)
170(* Last fix on the ast *)
171(*****************************************************************************)
172
ae4735db 173(* Because of the ugly trick to handle initialiser, I generate fake ','
34e49164
C
174 * for the last initializer element, but if there is nothing around it,
175 * I don't want in the end to print it.
176 *)
177
ae4735db 178let remove_useless_fakeInfo_struct program =
34e49164 179 let bigf = { Visitor_c.default_visitor_c_s with
ae4735db 180 Visitor_c.kini_s = (fun (k,bigf) ini ->
34e49164 181 match k ini with
113803cf 182 | InitList args, ii ->
34e49164
C
183 (match ii with
184 | [_i1;_i2] -> ini
113803cf 185 | [i1;i2;iicommaopt] ->
34e49164
C
186 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
187 && (Ast_c.is_fake iicommaopt)
188 (* sometimes the guy put a normal iicommaopt *)
34e49164
C
189 then InitList args, [i1;i2]
190 else InitList args, [i1;i2;iicommaopt]
ae4735db 191 | [i1;i2;iicommaopt;end_comma_opt] ->
113803cf
C
192 (* only in #define. end_comma_opt canot be fake *)
193 (* not sure if this will be considered ambiguous with a previous
194 case? *)
195 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
196 && (Ast_c.is_fake iicommaopt)
197 (* sometimes the guy put a normal iicommaopt *)
198 then InitList args, [i1;i2;end_comma_opt]
199 else InitList args, [i1;i2;iicommaopt;end_comma_opt]
34e49164
C
200 | _ -> raise Impossible
201 )
202 | x -> x
203 )
204 } in
205 Visitor_c.vk_toplevel_s bigf program
206
207
208(*****************************************************************************)
209(* Tokens1 generation *)
210(*****************************************************************************)
211
ae4735db
C
212let get_fakeInfo_and_tokens celem toks =
213 let toks_in = ref toks in
34e49164
C
214 let toks_out = ref [] in
215
216 (* todo? verify good order of position ? *)
ae4735db 217 let pr_elem info =
34e49164 218 match Ast_c.pinfo_of_info info with
ae4735db 219 | FakeTok _ ->
34e49164 220 Common.push2 (Fake1 info) toks_out
ae4735db 221 | OriginTok _ | ExpandedTok _ ->
34e49164 222 (* get the associated comments/space/cppcomment tokens *)
978fd7e5 223 let (before, x, after) =
ae4735db 224 !toks_in +> Common.split_when (fun tok ->
34e49164
C
225 info =*= TH.info_of_tok tok)
226 in
b1b2de81 227 assert(info =*= TH.info_of_tok x);
34e49164 228 (*old: assert(before +> List.for_all (TH.is_comment)); *)
ae4735db 229 before +> List.iter (fun x ->
34e49164 230 if not (TH.is_comment x)
0708f913 231 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
34e49164
C
232 (* case such as int asm d3("x"); not yet in ast *)
233 );
234 before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
235 push2 (T1 x) toks_out;
236 toks_in := after;
ae4735db 237 | AbstractLineTok _ ->
34e49164
C
238 (* can be called on type info when for instance use -type_c *)
239 if !Flag_parsing_c.pretty_print_type_info
240 then Common.push2 (Fake1 info) toks_out
ae4735db 241 else raise Impossible (* at this stage *)
34e49164
C
242 in
243
244 let pr_space _ = () in (* use the spacing that is there already *)
245
246 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
247
248 if not (null !toks_in)
0708f913 249 then failwith "WEIRD: unparsing not finished";
34e49164
C
250
251 List.rev !toks_out
252
253(* Fake nodes that have BEFORE code should be moved over any subsequent
254whitespace and newlines, but not any comments, to get as close to the affected
002099fc 255code as possible. Similarly, fake nodes that have AFTER code should be moved
34e49164
C
256backwards. No fake nodes should have both before and after code. *)
257
258let displace_fake_nodes toks =
259 let is_fake = function Fake1 _ -> true | _ -> false in
260 let is_whitespace = function
261 T1(Parser_c.TCommentSpace _)
485bce71 262 (* patch: cocci *)
34e49164
C
263 | T1(Parser_c.TCommentNewline _) -> true
264 | _ -> false in
265 let rec loop toks =
266 let fake_info =
267 try Some (Common.split_when is_fake toks)
268 with Not_found -> None in
269 match fake_info with
270 Some(bef,((Fake1 info) as fake),aft) ->
271 (match !(info.cocci_tag) with
ae4735db 272 | Some x ->
708f4980 273 (match x with
002099fc 274 (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
34e49164 275 (* move the fake node forwards *)
002099fc
C
276 let (whitespace,rest) = Common.span is_whitespace aft in
277 bef @ whitespace @ fake :: (loop rest)
278 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
34e49164 279 (* move the fake node backwards *)
002099fc
C
280 let revbef = List.rev bef in
281 let (revwhitespace,revprev) = Common.span is_whitespace revbef in
282 let whitespace = List.rev revwhitespace in
283 let prev = List.rev revprev in
284 prev @ fake :: (loop (whitespace @ aft))
285 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
286 bef @ fake :: (loop aft)
287 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
288 failwith "fake node should not be before-after"
289 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
485bce71 290 )
ae4735db 291 | None ->
708f4980
C
292 bef @ fake :: (loop aft)
293 )
34e49164
C
294 | None -> toks
295 | _ -> raise Impossible in
296 loop toks
297
298(*****************************************************************************)
299(* Tokens2 generation *)
300(*****************************************************************************)
301
b1b2de81 302let comment2t2 = function
5636bb2c
C
303 (Token_c.TCommentCpp
304 (* not sure iif the following list is exhaustive or complete *)
305 (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
306 (info : Token_c.info)) ->
307 C2(info.Common.str)
308 | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
b1b2de81
C
309 C2("\n"^info.Common.str^"\n")
310 | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
311
ae4735db 312let expand_mcode toks =
34e49164
C
313 let toks_out = ref [] in
314
315 let index = ref 0 in
316
ae4735db 317 let add_elem t minus =
34e49164 318 match t with
ae4735db 319 | Fake1 info ->
34e49164 320 let str = Ast_c.str_of_info info in
b1b2de81 321 if str =$= ""
34e49164
C
322 then push2 (Fake2) toks_out
323 (* perhaps the fake ',' *)
324 else push2 (C2 str) toks_out
ae4735db
C
325
326
002099fc 327 | T1 tok ->
b1b2de81 328 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
34e49164 329 (* no tag on expandedTok ! *)
fc1ad971
C
330 let modified = function
331 None -> false
332 | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
333 | _ -> true in
334 (if (TH.is_expanded tok &&
335 modified !((TH.info_of_tok tok).cocci_tag)
336 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
0708f913
C
337 then
338 failwith
339 (Printf.sprintf
340 "expanded token %s on line %d is either modified or stored in a metavariable"
341 (TH.str_of_tok tok) (TH.line_of_tok tok)));
34e49164 342
ae4735db 343 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
34e49164
C
344 { i with cocci_tag = ref Ast_c.emptyAnnot; }
345 ) in
346
ae4735db 347 let optindex =
34e49164
C
348 if TH.is_origin tok && not (TH.is_real_comment tok)
349 then begin
350 incr index;
351 Some !index
352 end
353 else None
354 in
355
356 push2 (T2 (tok', minus, optindex)) toks_out
357 in
358
ae4735db
C
359 let expand_info t =
360 let (mcode,env) =
708f4980 361 Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
34e49164 362
5636bb2c
C
363 let pr_cocci s ln col rcol hint =
364 push2 (Cocci2(s,ln,col,rcol,hint)) toks_out in
ae4735db 365 let pr_c info =
b1b2de81
C
366 (match Ast_c.pinfo_of_info info with
367 Ast_c.AbstractLineTok _ ->
368 push2 (C2 (Ast_c.str_of_info info)) toks_out
369 | Ast_c.FakeTok (s,_) ->
370 push2 (C2 s) toks_out
34e49164
C
371 | _ ->
372 Printf.printf "line: %s\n" (Common.dump info);
b1b2de81
C
373 failwith "not an abstract line");
374 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
375 List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
376
708f4980 377 let pr_barrier ln col = (* marks a position, used around C code *)
5636bb2c 378 push2 (Cocci2("",ln,col,col,None)) toks_out in
708f4980 379 let pr_nobarrier ln col = () in (* not needed for linux spacing *)
b1b2de81 380
708f4980 381 let pr_cspace _ = push2 (C2 " ") toks_out in
34e49164 382
708f4980
C
383 let pr_space _ = () (* rely on add_space in cocci code *) in
384 let pr_arity _ = () (* not interested *) in
34e49164
C
385
386 let indent _ = push2 Indent_cocci2 toks_out in
c3e37e97 387 let unindent x = push2 (Unindent_cocci2 x) toks_out in
34e49164 388
708f4980
C
389 let args_pp =
390 (env, pr_cocci, pr_c, pr_cspace,
391 (match !Flag_parsing_c.spacing with
392 Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
393 pr_arity,
394 (match !Flag_parsing_c.spacing with
395 Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
396 indent, unindent) in
34e49164 397
ae4735db
C
398 (* old: when for yacfe with partial cocci:
399 * add_elem t false;
485bce71 400 *)
34e49164 401
485bce71 402 (* patch: when need full coccinelle transformation *)
faf9a90c 403 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
34e49164 404 match mcode with
ae4735db 405 | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
34e49164 406 (* Why adding ? because I want to have all the information, the whole
ae4735db 407 * set of tokens, so I can then process and remove the
34e49164 408 * is_between_two_minus for instance *)
708f4980 409 add_elem t (Min (inst,adj));
faf9a90c 410 unparser any_xxs Unparse_cocci.InPlace
ae4735db 411 | Ast_cocci.CONTEXT (_,any_befaft) ->
34e49164 412 (match any_befaft with
ae4735db 413 | Ast_cocci.NOTHING ->
708f4980 414 add_elem t Ctx
951c7801 415 | Ast_cocci.BEFORE (xxs,_) ->
faf9a90c 416 unparser xxs Unparse_cocci.Before;
708f4980 417 add_elem t Ctx
ae4735db 418 | Ast_cocci.AFTER (xxs,_) ->
708f4980 419 add_elem t Ctx;
faf9a90c 420 unparser xxs Unparse_cocci.After;
ae4735db 421 | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
faf9a90c 422 unparser xxs Unparse_cocci.Before;
708f4980 423 add_elem t Ctx;
faf9a90c 424 unparser yys Unparse_cocci.After;
34e49164 425 )
951c7801 426 | Ast_cocci.PLUS _ -> raise Impossible
34e49164
C
427
428 in
429
430 toks +> List.iter expand_info;
431 List.rev !toks_out
ae4735db 432
34e49164
C
433
434(*****************************************************************************)
435(* Tokens2 processing, filtering, adjusting *)
436(*****************************************************************************)
437
002099fc 438let is_space = function
fc1ad971 439 | T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *)
ae4735db 440 | _ -> false
fc1ad971
C
441
442let is_newline = function
443 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
444 | _ -> false
445
446let is_whitespace = function
ae4735db 447 | (T2 (t,_b,_i)) ->
002099fc
C
448 (match t with
449 | Parser_c.TCommentSpace _ -> true (* only whitespace *)
fc1ad971 450 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
002099fc
C
451 | _ -> false
452 )
ae4735db 453 | _ -> false
002099fc 454
34e49164 455let is_minusable_comment = function
ae4735db 456 | (T2 (t,_b,_i)) ->
34e49164
C
457 (match t with
458 | Parser_c.TCommentSpace _ (* only whitespace *)
ae4735db 459 (* patch: coccinelle *)
002099fc 460 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
7f004419 461 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
002099fc
C
462 | Parser_c.TComment _
463 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
b1b2de81
C
464 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
465 | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
34e49164
C
466 -> true
467
b1b2de81 468 | Parser_c.TCommentMisc _
0708f913 469 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
34e49164
C
470 -> false
471
472 | _ -> false
473 )
ae4735db 474 | _ -> false
34e49164 475
fc1ad971 476let is_minusable_comment_nocpp = function
ae4735db 477 | (T2 (t,_b,_i)) ->
fc1ad971
C
478 (match t with
479 | Parser_c.TCommentSpace _ (* only whitespace *)
ae4735db 480 (* patch: coccinelle *)
fc1ad971 481 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
7f004419 482 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
fc1ad971
C
483 | Parser_c.TComment _ -> true
484 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
485 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
486 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
487 -> false
488
489 | Parser_c.TCommentMisc _
490 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
491 -> false
492
493 | _ -> false
494 )
ae4735db 495 | _ -> false
fc1ad971 496
34e49164 497let all_coccis = function
c3e37e97 498 Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 _ -> true
34e49164
C
499 | _ -> false
500
b1b2de81
C
501(*previously gave up if the first character was a newline, but not clear why*)
502let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
34e49164 503
708f4980 504let set_minus_comment adj = function
ae4735db 505 | T2 (t,Ctx,idx) ->
34e49164
C
506 let str = TH.str_of_tok t in
507 (match t with
508 | Parser_c.TCommentSpace _
ae4735db 509(* patch: coccinelle *)
34e49164
C
510 | Parser_c.TCommentNewline _ -> ()
511
ae4735db
C
512 | Parser_c.TComment _
513 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
b1b2de81
C
514 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
515 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
ae4735db 516 ->
b1b2de81
C
517 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
518 (TH.line_of_tok t) str)
34e49164
C
519 | _ -> raise Impossible
520 );
708f4980 521 T2 (t, Min adj, idx)
ae4735db 522(* patch: coccinelle *)
fc1ad971 523 | T2 (t,Min adj,idx) as x -> x
34e49164
C
524 | _ -> raise Impossible
525
708f4980 526let set_minus_comment_or_plus adj = function
c3e37e97 527 Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x
708f4980 528 | x -> set_minus_comment adj x
34e49164 529
fc1ad971
C
530let drop_minus xs =
531 xs +> Common.exclude (function
532 | T2 (t,Min adj,_) -> true
533 | _ -> false
534 )
535
34e49164
C
536let remove_minus_and_between_and_expanded_and_fake xs =
537
538 (* get rid of exampled and fake tok *)
ae4735db 539 let xs = xs +> Common.exclude (function
34e49164
C
540 | T2 (t,_,_) when TH.is_expanded t -> true
541 | Fake2 -> true
542
543 | _ -> false
544 )
545 in
546
113803cf 547 let minus_or_comment = function
708f4980 548 T2(_,Min adj,_) -> true
113803cf
C
549 | x -> is_minusable_comment x in
550
fc1ad971
C
551 let minus_or_comment_nocpp = function
552 T2(_,Min adj,_) -> true
553 | x -> is_minusable_comment_nocpp x in
554
555 let common_adj (index1,adj1) (index2,adj2) =
556 adj1 = adj2 (* same adjacency info *) &&
557 (* non-empty intersection of witness trees *)
558 not ((Common.inter_set index1 index2) = []) in
559
560 let rec adjust_around_minus = function
561 [] -> []
562 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
563 (((T2(_,Min adj,_))::_) as rest) ->
564 (* an initial newline, as in a replaced statement *)
565 let (between_minus,rest) = Common.span minus_or_comment rest in
566 (match rest with
567 [] -> (set_minus_comment adj x) ::
568 (List.map (set_minus_comment adj) between_minus)
569 | T2(_,Ctx,_)::_ when is_newline (List.hd(List.rev between_minus)) ->
570 (set_minus_comment adj x)::(adjust_within_minus between_minus) @
571 (adjust_around_minus rest)
572 | _ ->
573 x :: (adjust_within_minus between_minus) @
574 (adjust_around_minus rest))
575 | ((T2(_,Min adj,_))::_) as rest ->
576 (* no initial newline, as in a replaced expression *)
577 let (between_minus,rest) = Common.span minus_or_comment rest in
578 (match rest with
579 [] ->
580 (List.map (set_minus_comment adj) between_minus)
581 | _ ->
582 (adjust_within_minus between_minus) @
583 (adjust_around_minus rest))
584 | x::xs -> x::adjust_around_minus xs
585 and adjust_within_minus = function
586 [] -> []
587 | (T2(_,Min adj1,_) as t1)::xs ->
588 let (between_minus,rest) = Common.span is_minusable_comment xs in
589 (match rest with
590 [] ->
591 (* keep last newline *)
592 let (drop,keep) =
593 try
594 let (drop,nl,keep) =
595 Common.split_when is_newline between_minus in
596 (drop, nl :: keep)
597 with Not_found -> (between_minus,[]) in
598 t1 ::
599 List.map (set_minus_comment_or_plus adj1) drop @
600 keep
601 | (T2(_,Min adj2,_) as t2)::rest when common_adj adj1 adj2 ->
602 t1::
603 List.map (set_minus_comment_or_plus adj1) between_minus @
604 adjust_within_minus (t2::rest)
605 | x::xs ->
606 t1::(between_minus @ adjust_within_minus (x::xs)))
607 | _ -> failwith "only minus and space possible" in
608
609 (* new idea: collects regions not containing non-space context code
610 if two adjacent adjacent minus tokens satisfy common_adj then delete
611 all spaces, comments etc between them
612 if two adjacent minus tokens do not satisfy common_adj only delete
613 the spaces between them if there are no comments, etc.
614 if the region contain no plus code and is both preceded and followed
615 by a newline, delete the initial newline. *)
616
617 let rec adjust_around_minus = function
34e49164 618 [] -> []
708f4980 619 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
fc1ad971
C
620 (T2(_,Min adj1,_) as t1)::xs ->
621 let (minus_list,rest) = Common.span not_context (t1::xs) in
622 let contains_plus = List.exists is_plus minus_list in
623 let x =
624 match List.rev minus_list with
625 (T2(Parser_c.TCommentNewline c,_b,_i))::rest
626 when List.for_all minus_or_comment minus_list ->
627 set_minus_comment_or_plus adj1 x
628 | _ -> x in
629 x :: adjust_within_minus contains_plus minus_list @
630 adjust_around_minus rest
631 | (T2(_,Min adj1,_) as t1)::xs ->
632 let (minus_list,rest) = Common.span not_context (t1::xs) in
633 let contains_plus = List.exists is_plus minus_list in
634 adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
635 | x::xs -> x :: adjust_around_minus xs
636 and adjust_within_minus cp (* contains plus *) = function
637 (T2(_,Min adj1,_) as t1)::xs ->
638 let not_minus = function T2(_,Min _,_) -> false | _ -> true in
639 let (not_minus_list,rest) = Common.span not_minus xs in
640 t1 ::
34e49164 641 (match rest with
fc1ad971
C
642 (T2(_,Min adj2,_) as t2)::xs when common_adj adj1 adj2 ->
643 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
644 @ (adjust_within_minus cp (t2::xs))
645 | (T2(_,Min adj2,_) as t2)::xs ->
646 let is_whitespace_or_plus = function
5636bb2c 647 (T2 _) as x -> is_whitespace x
fc1ad971
C
648 | _ -> true (*plus*) in
649 if List.for_all is_whitespace_or_plus not_minus_list
650 then
651 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
652 @ (adjust_within_minus cp (t2::xs))
653 else not_minus_list @ (adjust_within_minus cp (t2::xs))
654 | _ ->
655 if cp
656 then xs
657 else
658 let (spaces,rest) = Common.span is_space xs in
659 (List.map (set_minus_comment_or_plus adj1) spaces)
660 @ rest)
661 | xs -> failwith "should always start with minus"
662 and not_context = function
663 (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
664 | _ -> true
665 and is_plus = function
666 C2 _ | Cocci2 _ -> true
667 | _ -> false in
34e49164 668
fc1ad971 669 let xs = adjust_around_minus xs in
34e49164 670
113803cf 671 (* this drops blank lines after a brace introduced by removing code *)
fc1ad971
C
672 let minus_or_comment_nonl = function
673 T2(_,Min adj,_) -> true
674 | T2(Parser_c.TCommentNewline _,_b,_i) -> false
675 | x -> is_minusable_comment x in
676
113803cf
C
677 let rec adjust_after_brace = function
678 [] -> []
708f4980 679 | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
b1b2de81 680 when str_of_token2 x =$= "{" ->
fc1ad971 681 let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
113803cf
C
682 let is_whitespace = function
683 T2(Parser_c.TCommentSpace _,_b,_i)
684 (* patch: cocci *)
685 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
686 | _ -> false in
687 let (newlines,rest) = Common.span is_whitespace rest in
688 let (drop_newlines,last_newline) =
689 let rec loop = function
690 [] -> ([],[])
691 | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
692 (List.rev rest,[x])
693 | x::xs ->
694 let (drop_newlines,last_newline) = loop xs in
695 (drop_newlines,x::last_newline) in
696 loop (List.rev newlines) in
708f4980 697 x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
113803cf
C
698 last_newline@
699 adjust_after_brace rest
700 | x::xs -> x::adjust_after_brace xs in
701
702 let xs = adjust_after_brace xs in
703
fc1ad971
C
704 (* search backwards from context } over spaces until reaching a newline.
705 then go back over all minus code until reaching some context or + code.
706 get rid of all intervening spaces, newlines, and comments
707 input is reversed *)
708 let rec adjust_before_brace = function
709 [] -> []
710 | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
711 let (outer_spaces,rest) = Common.span is_space xs in
712 x :: outer_spaces @
713 (match rest with
714 ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
715 (* the rest of this code is the same as from_newline below
716 but merging them seems to be error prone... *)
717 ((T2 (t, Min adj, idx)) as m) :: rest ->
718 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
719 h :: m ::
720 (List.map (set_minus_comment adj) spaces) @
721 (adjust_before_brace rest)
722 | _ -> adjust_before_brace rest)
723 | x::xs -> x :: (adjust_before_brace xs) in
724
725 let from_newline = function
726 ((T2 (t, Min adj, idx)) as m) :: rest ->
727 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
728 m ::
729 (List.map (set_minus_comment adj) spaces) @
730 (adjust_before_brace rest)
731 | rest -> adjust_before_brace rest in
732
733 let xs = List.rev (from_newline (List.rev xs)) in
734 let xs = drop_minus xs in
34e49164
C
735 xs
736
faf9a90c
C
737(* normally, in C code, a semicolon is not preceded by a space or newline *)
738let adjust_before_semicolon toks =
739 let toks = List.rev toks in
002099fc 740 let rec search_semic = function
faf9a90c 741 [] -> []
002099fc 742 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
faf9a90c 743 if List.mem (str_of_token2 x) [";";")";","]
002099fc
C
744 then x :: search_minus false xs
745 else x :: search_semic xs
746 | x::xs -> x :: search_semic xs
747 and search_minus seen_minus xs =
748 let (spaces, rest) = Common.span is_space xs in
749 (* only delete spaces if something is actually deleted *)
750 match rest with
751 ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
752 | _ -> if seen_minus then rest else xs in
753 List.rev (search_semic toks)
34e49164
C
754
755let is_ident_like s = s ==~ Common.regexp_alpha
756
ae4735db 757let rec add_space xs =
34e49164
C
758 match xs with
759 | [] -> []
760 | [x] -> [x]
5636bb2c 761 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
708f4980
C
762 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
763 not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
764 (* this only works within a line. could consider whether
765 something should be done to add newlines too, rather than
766 printing them explicitly in unparse_cocci. *)
767 x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
ae4735db 768 | x::y::xs ->
34e49164
C
769 let sx = str_of_token2 x in
770 let sy = str_of_token2 y in
faf9a90c 771 if is_ident_like sx && is_ident_like sy
34e49164
C
772 then x::C2 " "::(add_space (y::xs))
773 else x::(add_space (y::xs))
774
5636bb2c
C
775(* The following only works for the outermost function call. Stack records
776the column of all open parentheses. Space_cell contains the most recent
777comma in the outermost function call. The goal is to decide whether this
778should be followed by a space or a newline and indent. *)
779let add_newlines toks tabbing_unit =
780 let create_indent n =
781 let (tu,tlen) =
782 match tabbing_unit with
783 Some ("\t",_) -> ("\t",8)
784 | Some ("",_) -> ("\t",8) (* not sure why... *)
785 | Some (s,_) -> (s,String.length s) (* assuming only spaces *)
786 | None -> ("\t",8) in
787 let rec loop seen =
788 if seen + tlen <= n
789 then tu ^ loop (seen + tlen)
790 else String.make (n-seen) ' ' in
791 loop 0 in
792 let check_for_newline count x = function
793 Some (start,space_cell) when count > Flag_parsing_c.max_width ->
794 space_cell := "\n"^(create_indent x);
795 Some (x + (count - start))
796 | _ -> None in
797 (* the following is for strings that may contain newline *)
798 let string_length s count =
799 let l = list_of_string s in
800 List.fold_left
801 (function count ->
802 function
803 '\t' -> count + 8
804 | '\n' -> 0
805 | c -> count + 1)
806 count l in
807 let rec loop info count = function
808 [] -> []
809 | ((T2(tok,_,_)) as a)::xs ->
810 a :: loop info (string_length (TH.str_of_tok tok) count) xs
811 | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
812 let (stack,space_cell) = info in
813 let rest =
814 match hint with
815 None -> loop info (count + (String.length s)) xs
816 | Some Unparse_cocci.StartBox ->
817 let count = count + (String.length s) in
818 loop (count::stack,space_cell) count xs
819 | Some Unparse_cocci.EndBox ->
820 let count = count + (String.length s) in
821 (match stack with
822 [x] ->
823 (match check_for_newline count x space_cell with
824 Some count -> loop ([],None) count xs
825 | None -> loop ([],None) count xs)
826 | _ -> loop (List.tl stack,space_cell) count xs)
827 | Some (Unparse_cocci.SpaceOrNewline sp) ->
828 let count = count + (String.length s) + 1 (*space*) in
829 (match stack with
830 [x] ->
831 (match check_for_newline count x space_cell with
832 Some count -> loop (stack,Some (x,sp)) count xs
833 | None -> loop (stack,Some (count,sp)) count xs)
834 | _ -> loop info count xs) in
835 a :: rest
836 | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
837 | Fake2 :: _ | Indent_cocci2 :: _
838 | Unindent_cocci2 _::_ ->
839 failwith "unexpected fake, indent, or unindent" in
840 let redo_spaces prev = function
841 Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
842 C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
843 | t -> t::prev in
844 (match !Flag_parsing_c.spacing with
845 Flag_parsing_c.SMPL -> toks
846 | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)))
34e49164
C
847
848(* When insert some new code, because of a + in a SP, we must add this
849 * code at the right place, with the good indentation. So each time we
850 * encounter some spacing info, with some newline, we maintain the
851 * current indentation level used.
ae4735db 852 *
34e49164 853 * TODO problems: not accurate. ex: TODO
ae4735db 854 *
34e49164
C
855 * TODO: if in #define region, should add a \ \n
856 *)
ae4735db 857let new_tabbing2 space =
34e49164
C
858 (list_of_string space)
859 +> List.rev
b1b2de81 860 +> Common.take_until (fun c -> c =<= '\n')
34e49164
C
861 +> List.rev
862 +> List.map string_of_char
863 +> String.concat ""
864
ae4735db 865let new_tabbing a =
34e49164
C
866 Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
867
868
ae4735db 869let rec adjust_indentation xs =
aa721442 870
34e49164
C
871 let _current_tabbing = ref "" in
872 let tabbing_unit = ref None in
873
874 let string_of_list l = String.concat "" (List.map string_of_char l) in
875
876 (* try to pick a tabbing unit for the plus code *)
877 let adjust_tabbing_unit old_tab new_tab =
b1b2de81 878 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
34e49164
C
879 then
880 let old_tab = list_of_string old_tab in
881 let new_tab = list_of_string new_tab in
882 let rec loop = function
883 ([],new_tab) ->
884 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
885 | (_,[]) -> failwith "not possible"
886 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
887 loop (old_tab,new_tab) in
888
889 let remtab tu current_tab =
890 let current_tab = List.rev(list_of_string current_tab) in
891 let rec loop = function
892 ([],new_tab) -> string_of_list (List.rev new_tab)
0708f913 893 | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
b1b2de81 894 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
34e49164
C
895 | (_,ns) -> (* mismatch; remove what we can *)
896 string_of_list (List.rev ns) in
897 loop (tu,current_tab) in
898
899 let rec find_first_tab started = function
900 [] -> ()
b1b2de81 901 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
34e49164 902 find_first_tab true xs
485bce71 903(* patch: coccinelle *)
34e49164
C
904 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
905 when started ->
906 let s = str_of_token2 x +> new_tabbing in
907 tabbing_unit := Some (s,List.rev (list_of_string s))
908 | x::xs -> find_first_tab started xs in
909 find_first_tab false xs;
910
ae4735db 911 let rec aux started xs =
34e49164
C
912 match xs with
913 | [] -> []
485bce71 914(* patch: coccinelle *)
34e49164 915 | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
5636bb2c 916 ((Cocci2 ("{",_,_,_,_)) as a)::xs
708f4980 917 when started && str_of_token2 x =$= ")" ->
34e49164 918 (* to be done for if, etc, but not for a function header *)
708f4980 919 x::(C2 " ")::a::(aux started xs)
34e49164 920 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
ae4735db 921 let old_tabbing = !_current_tabbing in
34e49164
C
922 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
923 (* only trust the indentation after the first { *)
c3e37e97
C
924 (if started
925 then adjust_tabbing_unit old_tabbing !_current_tabbing);
34e49164
C
926 let coccis_rest = Common.span all_coccis xs in
927 (match coccis_rest with
b1b2de81 928 (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
34e49164
C
929 (* the case where cocci code has been added before a close } *)
930 x::aux started (Indent_cocci2::xs)
931 | _ -> x::aux started xs)
932 | Indent_cocci2::xs ->
933 (match !tabbing_unit with
934 None -> aux started xs
935 | Some (tu,_) ->
936 _current_tabbing := (!_current_tabbing)^tu;
5636bb2c 937 Cocci2 (tu,-1,-1,-1,None)::aux started xs)
c3e37e97 938 | Unindent_cocci2(permanent)::xs ->
34e49164
C
939 (match !tabbing_unit with
940 None -> aux started xs
941 | Some (_,tu) ->
942 _current_tabbing := remtab tu (!_current_tabbing);
943 aux started xs)
944 (* border between existing code and cocci code *)
5636bb2c 945 | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
b1b2de81 946 when str_of_token2 x =$= "{" ->
34e49164
C
947 x::aux true (y::Indent_cocci2::xs)
948 | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
b1b2de81 949 when str_of_token2 y =$= "}" ->
c3e37e97 950 x::aux started (y::Unindent_cocci2 true::xs)
34e49164 951 (* starting the body of the function *)
b1b2de81 952 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs
5636bb2c
C
953 | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
954 | ((Cocci2("\n",_,_,_,_)) as x)::Unindent_cocci2(false)::xs ->
c3e37e97 955 x::aux started xs
5636bb2c 956 | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
0708f913 957 (* dont inline in expr because of weird eval order of ocaml *)
ae4735db 958 let s = !_current_tabbing in
5636bb2c 959 x::Cocci2 (s,-1,-1,-1,None)::aux started xs
34e49164 960 | x::xs -> x::aux started xs in
5636bb2c 961 (aux false xs,!tabbing_unit)
34e49164
C
962
963
964let rec find_paren_comma = function
965 | [] -> ()
966
967 (* do nothing if was like this in original file *)
968 | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
ae4735db 969 ::xs when p2 =|= p1 + 1 ->
34e49164
C
970 find_paren_comma (x2::xs)
971
972 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
ae4735db 973 ::xs when p2 =|= p1 + 1 ->
34e49164
C
974 find_paren_comma (x2::xs)
975
976 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
ae4735db 977 ::xs when p2 =|= p1 + 1 ->
34e49164
C
978 find_paren_comma (x2::xs)
979
980 (* otherwise yes can adjust *)
ae4735db 981 | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
34e49164
C
982 x2.remove <- true;
983 find_paren_comma (x2::xs)
ae4735db 984 | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
34e49164
C
985 x1.remove <- true;
986 find_paren_comma (x2::xs)
987
ae4735db 988 | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
34e49164
C
989 x1.remove <- true;
990 find_paren_comma (x2::xs)
991
ae4735db 992 | x::xs ->
34e49164 993 find_paren_comma xs
34e49164 994
ae4735db
C
995
996let fix_tokens toks =
34e49164
C
997 let toks = toks +> List.map mk_token_extended in
998
999 let cleaner = toks +> Common.exclude (function
1000 | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
1001 | _ -> false
1002 ) in
1003 find_paren_comma cleaner;
1004
1005 let toks = rebuild_tokens_extented toks in
1006 toks +> List.map (fun x -> x.tok2)
1007
1008
1009
1010(*****************************************************************************)
1011(* Final unparsing (and debugging support) *)
1012(*****************************************************************************)
1013
1014(* for debugging *)
1015type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
1016
1017let kind_of_token2 = function
1018 | Fake2 -> KFake
1019 | Cocci2 _ -> KCocci
1020 | C2 _ -> KC
1021 | T2 (t,_,_) ->
1022 (match TH.pinfo_of_tok t with
1023 | ExpandedTok _ -> KExpanded
1024 | OriginTok _ -> KOrigin
1025 | FakeTok _ -> raise Impossible (* now a Fake2 *)
1026 | AbstractLineTok _ -> raise Impossible (* now a KC *)
1027 )
c3e37e97 1028 | Unindent_cocci2 _ | Indent_cocci2 -> raise Impossible
ae4735db 1029
34e49164
C
1030let end_mark = "!"
1031
1032let start_mark = function
1033 | KFake -> "!F!"
ae4735db 1034 | KCocci -> "!S!"
34e49164
C
1035 | KC -> "!A!"
1036 | KExpanded -> "!E!"
1037 | KOrigin -> ""
1038
1039let print_all_tokens2 pr xs =
1040 if !Flag_parsing_c.debug_unparsing
1041 then
1042 let current_kind = ref KOrigin in
ae4735db 1043 xs +> List.iter (fun t ->
34e49164 1044 let newkind = kind_of_token2 t in
b1b2de81 1045 if newkind =*= !current_kind
34e49164
C
1046 then pr (str_of_token2 t)
1047 else begin
1048 pr (end_mark);
1049 pr (start_mark newkind);
1050 pr (str_of_token2 t);
1051 current_kind := newkind
1052 end
1053 );
ae4735db 1054 else
34e49164 1055 xs +> List.iter (fun x -> pr (str_of_token2 x))
ae4735db 1056
34e49164
C
1057
1058
1059
1060(*****************************************************************************)
1061(* Entry points *)
1062(*****************************************************************************)
1063
1064(* old: PPviatok was made in the beginning to allow to pretty print a
1065 * complete C file, including a modified C file by transformation.ml,
1066 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1067 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1068 * fancy stuff when a function was not modified at all. Just need to
1069 * print the list of token as-is. But now pretty_print_c.ml handles
1070 * almost everything so maybe less useful. Maybe PPviatok allows to
ae4735db
C
1071 * optimize a little the pretty printing.
1072 *
34e49164
C
1073 * update: now have PPviastr which goes even faster than PPviatok, so
1074 * PPviatok has disappeared.
1075 *)
1076
1077type ppmethod = PPnormal | PPviastr
1078
1079
1080
1081
1082(* The pp_program function will call pretty_print_c.ml with a special
ae4735db 1083 * function to print the leaf components, the tokens. When we want to
34e49164 1084 * print a token, we need to print also maybe the space and comments that
ae4735db 1085 * were close to it in the original file (and that was omitted during the
34e49164
C
1086 * parsing phase), and honor what the cocci-info attached to the token says.
1087 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
ae4735db 1088 * print it and also print some cocci-code attached in a PLUS to it.
34e49164
C
1089 * So we will also maybe call unparse_cocci. Because the cocci-code may
1090 * contain metavariables, unparse_cocci will in fact sometimes call back
1091 * pretty_print_c (which will this time don't call back again unparse_cocci)
1092 *)
1093
ae4735db
C
1094let pp_program2 xs outfile =
1095 Common.with_open_outfile outfile (fun (pr,chan) ->
1096 let pr s =
1097 if !Flag_parsing_c.debug_unparsing
34e49164 1098 then begin pr2_no_nl s; flush stderr end
ae4735db 1099 else pr s
34e49164
C
1100 (* flush chan; *)
1101 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1102 in
ae4735db
C
1103
1104 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
34e49164
C
1105 (* here can still work on ast *)
1106 let e = remove_useless_fakeInfo_struct e in
ae4735db 1107
34e49164 1108 match ppmethod with
002099fc 1109 | PPnormal ->
34e49164
C
1110 (* now work on tokens *)
1111
1112 (* phase1: just get all the tokens, all the information *)
ae4735db 1113 assert(toks_e +> List.for_all (fun t ->
34e49164
C
1114 TH.is_origin t or TH.is_expanded t
1115 ));
1116 let toks = get_fakeInfo_and_tokens e toks_e in
1117 let toks = displace_fake_nodes toks in
1118 (* assert Origin;ExpandedTok;Faketok *)
1119 let toks = expand_mcode toks in
1120 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1121 * and no tag information, just NOTHING. *)
fc1ad971
C
1122
1123 let toks =
1124 if !Flag.sgrep_mode2
1125 then drop_minus toks (* nothing to do for sgrep *)
1126 else
1127 (* phase2: can now start to filter and adjust *)
5636bb2c 1128 let (toks,tu) = adjust_indentation toks in
fc1ad971
C
1129 let toks = adjust_before_semicolon toks in(*before remove minus*)
1130 let toks = remove_minus_and_between_and_expanded_and_fake toks in
1131 (* assert Origin + Cocci + C and no minus *)
1132 let toks = add_space toks in
5636bb2c 1133 let toks = add_newlines toks tu in
fc1ad971
C
1134 let toks = fix_tokens toks in
1135 toks in
34e49164 1136
ae4735db 1137 (* in theory here could reparse and rework the ast! or
34e49164
C
1138 * apply some SP. Not before cos julia may have generated
1139 * not parsable file. Need do unparsing_tricks call before being
1140 * ready to reparse. *)
1141 print_all_tokens2 pr toks;
1142
1143 | PPviastr -> pr str
1144 )
1145 )
1146
ae4735db 1147let pp_program a b =
34e49164
C
1148 Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1149
485bce71 1150
ae4735db 1151let pp_program_default xs outfile =
485bce71
C
1152 let xs' = xs +> List.map (fun x -> x, PPnormal) in
1153 pp_program xs' outfile