Coccinelle release 1.0.0-rc3
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
CommitLineData
0708f913 1(* Yoann Padioleau, Julia Lawall
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913 4 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
34e49164
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
34e49164
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
ae4735db
C
14 *
15 *
485bce71 16 * Modifications by Julia Lawall for better newline handling.
34e49164
C
17 *)
18open Common
19
20open Ast_c
21
22module TH = Token_helpers
23
24
b1b2de81
C
25(* should keep comments and directives in between adjacent deleted terms,
26but not comments and directives within deleted terms. should use the
27labels found in the control-flow graph *)
28
34e49164
C
29
30
708f4980
C
31(*****************************************************************************)
32(* Wrappers *)
33(*****************************************************************************)
34let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
35
34e49164
C
36(*****************************************************************************)
37(* Types used during the intermediate phases of the unparsing *)
38(*****************************************************************************)
39
ae4735db 40type token1 =
34e49164
C
41 | Fake1 of info
42 | T1 of Parser_c.token
43
44(* The cocci_tag of the token should always be a NOTHING. The mark of
45 * the token can only be OriginTok or ExpandedTok. Why not get rid of
46 * token and get something simpler ? because we need to know if the
47 * info is a TCommentCpp or TCommentSpace, etc for some of the further
48 * analysis so easier to keep with the token.
ae4735db 49 *
34e49164
C
50 * This type contains the whole information. Have all the tokens with this
51 * type.
52 *)
708f4980 53type min =
5626f154 54 Min of (int list (* match numbers from witness trees *) *
8babbc8f 55 Ast_cocci.adjacency (* adjacency information *))
708f4980
C
56 | Ctx
57
ae4735db
C
58type token2 =
59 | T2 of Parser_c.token * min *
34e49164 60 int option (* orig index, abstracting away comments and space *)
8babbc8f 61 | Fake2 of min
708f4980 62 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
5636bb2c 63 * Unparse_cocci.nlhint option
34e49164 64 | C2 of string
c491d8ee 65 | Comma of string
34e49164 66 | Indent_cocci2
c3e37e97 67 | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
34e49164
C
68
69(* not used yet *)
ae4735db 70type token3 =
34e49164
C
71 | T3 of Parser_c.token
72 | Cocci3 of string
73 | C3 of string
74
75
76(* similar to the tech in parsing_hack *)
77type token_extended = {
78 tok2 : token2;
79 str : string;
80 idx: int option; (* to know if 2 tokens were consecutive in orig file *)
81 mutable new_tokens_before : token2 list;
82 mutable remove : bool;
83}
84
85
86(*****************************************************************************)
87(* Helpers *)
88(*****************************************************************************)
89
ae4735db 90let info_of_token1 t =
34e49164
C
91 match t with
92 | Fake1 info -> info
93 | T1 tok -> TH.info_of_tok tok
94
002099fc
C
95let print_token1 = function
96 T1 tok -> TH.str_of_tok tok
97 | Fake1 info -> "fake"
98
34e49164
C
99let str_of_token2 = function
100 | T2 (t,_,_) -> TH.str_of_tok t
8babbc8f 101 | Fake2 _ -> ""
5636bb2c 102 | Cocci2 (s,_,_,_,_) -> s
34e49164 103 | C2 s -> s
c491d8ee 104 | Comma s -> s
34e49164 105 | Indent_cocci2 -> ""
c3e37e97 106 | Unindent_cocci2 _ -> ""
34e49164
C
107
108let print_token2 = function
708f4980 109 | T2 (t,b,_) ->
8babbc8f
C
110 let t_str =
111 match t with
112 | Parser_c.TCommentSpace _ -> " sp "
113 | Parser_c.TCommentNewline _ -> " nl "
114 | Parser_c.TCommentCpp _ -> " cp "
115 | Parser_c.TCommentMisc _ -> " misc "
116 | Parser_c.TComment _ -> " comment "
117 | _ -> "" in
708f4980
C
118 let b_str =
119 match b with
120 Min (index,adj) ->
8babbc8f
C
121 Printf.sprintf "-%d[%s]"
122 (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
708f4980
C
123 (String.concat " " (List.map string_of_int index))
124 | Ctx -> "" in
8babbc8f
C
125 "T2:"^b_str^t_str^TH.str_of_tok t
126 | Fake2 b ->
127 let b_str =
128 match b with
129 Min (index,adj) ->
130 Printf.sprintf "-%d[%s]"
131 (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
132 (String.concat " " (List.map string_of_int index))
133 | Ctx -> "" in
134 b_str^"fake"
5636bb2c 135 | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
34e49164 136 | C2 s -> "C2:"^s
c491d8ee 137 | Comma s -> "Comma:"^s
34e49164 138 | Indent_cocci2 -> "Indent"
c3e37e97 139 | Unindent_cocci2 _ -> "Unindent"
34e49164 140
002099fc 141let simple_print_all_tokens1 l =
90aeb998 142 List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l;
002099fc
C
143 Printf.printf "\n"
144
708f4980 145let simple_print_all_tokens2 l =
fc1ad971 146 List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
34e49164
C
147 Printf.printf "\n"
148
149let str_of_token3 = function
150 | T3 t -> TH.str_of_tok t
151 | Cocci3 s | C3 s -> s
152
153
154
ae4735db
C
155let mk_token_extended x =
156 let origidx =
34e49164 157 match x with
ae4735db 158 | T2 (_,_, idx) -> idx
34e49164
C
159 | _ -> None
160 in
ae4735db 161 { tok2 = x;
34e49164
C
162 str = str_of_token2 x;
163 idx = origidx;
164 new_tokens_before = [];
165 remove = false;
166 }
167
ae4735db 168let rebuild_tokens_extented toks_ext =
34e49164 169 let _tokens = ref [] in
ae4735db 170 toks_ext +> List.iter (fun tok ->
34e49164
C
171 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
172 if not tok.remove then push2 tok.tok2 _tokens;
173 );
174 let tokens = List.rev !_tokens in
175 (tokens +> List.map mk_token_extended)
176
177
178let mcode_contain_plus = function
179 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
180 | Ast_cocci.CONTEXT _ -> true
485bce71 181(* patch: when need full coccinelle transformation *)
8babbc8f
C
182 | Ast_cocci.MINUS (_,_,_,Ast_cocci.NOREPLACEMENT) -> false
183 | Ast_cocci.MINUS (_,_,_,Ast_cocci.REPLACEMENT _) -> true(*REPL is not empty*)
951c7801 184 | Ast_cocci.PLUS _ -> raise Impossible
34e49164 185
ae4735db 186let contain_plus info =
34e49164
C
187 let mck = Ast_c.mcode_of_info info in
188 mcode_contain_plus mck
189
190(*****************************************************************************)
191(* Last fix on the ast *)
192(*****************************************************************************)
193
ae4735db 194(* Because of the ugly trick to handle initialiser, I generate fake ','
34e49164
C
195 * for the last initializer element, but if there is nothing around it,
196 * I don't want in the end to print it.
197 *)
198
ae4735db 199let remove_useless_fakeInfo_struct program =
34e49164 200 let bigf = { Visitor_c.default_visitor_c_s with
ae4735db 201 Visitor_c.kini_s = (fun (k,bigf) ini ->
34e49164 202 match k ini with
113803cf 203 | InitList args, ii ->
34e49164
C
204 (match ii with
205 | [_i1;_i2] -> ini
113803cf 206 | [i1;i2;iicommaopt] ->
34e49164
C
207 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
208 && (Ast_c.is_fake iicommaopt)
209 (* sometimes the guy put a normal iicommaopt *)
34e49164
C
210 then InitList args, [i1;i2]
211 else InitList args, [i1;i2;iicommaopt]
ae4735db 212 | [i1;i2;iicommaopt;end_comma_opt] ->
113803cf
C
213 (* only in #define. end_comma_opt canot be fake *)
214 (* not sure if this will be considered ambiguous with a previous
215 case? *)
216 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
217 && (Ast_c.is_fake iicommaopt)
218 (* sometimes the guy put a normal iicommaopt *)
219 then InitList args, [i1;i2;end_comma_opt]
220 else InitList args, [i1;i2;iicommaopt;end_comma_opt]
34e49164
C
221 | _ -> raise Impossible
222 )
223 | x -> x
224 )
225 } in
226 Visitor_c.vk_toplevel_s bigf program
227
228
229(*****************************************************************************)
230(* Tokens1 generation *)
231(*****************************************************************************)
232
ae4735db
C
233let get_fakeInfo_and_tokens celem toks =
234 let toks_in = ref toks in
34e49164
C
235 let toks_out = ref [] in
236
237 (* todo? verify good order of position ? *)
ae4735db 238 let pr_elem info =
34e49164 239 match Ast_c.pinfo_of_info info with
ae4735db 240 | FakeTok _ ->
34e49164 241 Common.push2 (Fake1 info) toks_out
ae4735db 242 | OriginTok _ | ExpandedTok _ ->
34e49164 243 (* get the associated comments/space/cppcomment tokens *)
978fd7e5 244 let (before, x, after) =
ae4735db 245 !toks_in +> Common.split_when (fun tok ->
34e49164
C
246 info =*= TH.info_of_tok tok)
247 in
b1b2de81 248 assert(info =*= TH.info_of_tok x);
34e49164 249 (*old: assert(before +> List.for_all (TH.is_comment)); *)
ae4735db 250 before +> List.iter (fun x ->
34e49164 251 if not (TH.is_comment x)
0708f913 252 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
34e49164
C
253 (* case such as int asm d3("x"); not yet in ast *)
254 );
255 before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
256 push2 (T1 x) toks_out;
257 toks_in := after;
ae4735db 258 | AbstractLineTok _ ->
34e49164
C
259 (* can be called on type info when for instance use -type_c *)
260 if !Flag_parsing_c.pretty_print_type_info
261 then Common.push2 (Fake1 info) toks_out
ae4735db 262 else raise Impossible (* at this stage *)
34e49164
C
263 in
264
265 let pr_space _ = () in (* use the spacing that is there already *)
266
267 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
268
269 if not (null !toks_in)
0708f913 270 then failwith "WEIRD: unparsing not finished";
34e49164
C
271
272 List.rev !toks_out
273
8babbc8f 274(* Fake nodes that have BEFORE code or are - should be moved over any subsequent
34e49164 275whitespace and newlines, but not any comments, to get as close to the affected
002099fc 276code as possible. Similarly, fake nodes that have AFTER code should be moved
34e49164
C
277backwards. No fake nodes should have both before and after code. *)
278
279let displace_fake_nodes toks =
280 let is_fake = function Fake1 _ -> true | _ -> false in
281 let is_whitespace = function
282 T1(Parser_c.TCommentSpace _)
485bce71 283 (* patch: cocci *)
34e49164
C
284 | T1(Parser_c.TCommentNewline _) -> true
285 | _ -> false in
286 let rec loop toks =
287 let fake_info =
288 try Some (Common.split_when is_fake toks)
289 with Not_found -> None in
290 match fake_info with
291 Some(bef,((Fake1 info) as fake),aft) ->
292 (match !(info.cocci_tag) with
ae4735db 293 | Some x ->
708f4980 294 (match x with
8babbc8f
C
295 (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_)
296 | (Ast_cocci.MINUS(_,_,_,Ast_cocci.REPLACEMENT _),_) ->
34e49164 297 (* move the fake node forwards *)
002099fc
C
298 let (whitespace,rest) = Common.span is_whitespace aft in
299 bef @ whitespace @ fake :: (loop rest)
300 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
34e49164 301 (* move the fake node backwards *)
002099fc
C
302 let revbef = List.rev bef in
303 let (revwhitespace,revprev) = Common.span is_whitespace revbef in
304 let whitespace = List.rev revwhitespace in
305 let prev = List.rev revprev in
306 prev @ fake :: (loop (whitespace @ aft))
307 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
308 bef @ fake :: (loop aft)
309 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
310 failwith "fake node should not be before-after"
311 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
485bce71 312 )
ae4735db 313 | None ->
708f4980
C
314 bef @ fake :: (loop aft)
315 )
34e49164
C
316 | None -> toks
317 | _ -> raise Impossible in
318 loop toks
319
320(*****************************************************************************)
321(* Tokens2 generation *)
322(*****************************************************************************)
323
b1b2de81 324let comment2t2 = function
5636bb2c
C
325 (Token_c.TCommentCpp
326 (* not sure iif the following list is exhaustive or complete *)
327 (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
328 (info : Token_c.info)) ->
329 C2(info.Common.str)
330 | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
b1b2de81
C
331 C2("\n"^info.Common.str^"\n")
332 | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
333
ae4735db 334let expand_mcode toks =
34e49164
C
335 let toks_out = ref [] in
336
337 let index = ref 0 in
338
ae4735db 339 let add_elem t minus =
34e49164 340 match t with
ae4735db 341 | Fake1 info ->
34e49164 342 let str = Ast_c.str_of_info info in
b1b2de81 343 if str =$= ""
8babbc8f 344 then push2 (Fake2 minus) toks_out
c491d8ee
C
345 (* fx the fake "," at the end of a structure or enum.
346 no idea what other fake info there can be... *)
347 else push2 (Comma str) toks_out
ae4735db
C
348
349
002099fc 350 | T1 tok ->
b1b2de81 351 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
34e49164 352 (* no tag on expandedTok ! *)
fc1ad971
C
353 let modified = function
354 None -> false
355 | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
356 | _ -> true in
357 (if (TH.is_expanded tok &&
358 modified !((TH.info_of_tok tok).cocci_tag)
359 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
0708f913
C
360 then
361 failwith
362 (Printf.sprintf
363 "expanded token %s on line %d is either modified or stored in a metavariable"
364 (TH.str_of_tok tok) (TH.line_of_tok tok)));
34e49164 365
ae4735db 366 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
34e49164
C
367 { i with cocci_tag = ref Ast_c.emptyAnnot; }
368 ) in
369
ae4735db 370 let optindex =
34e49164
C
371 if TH.is_origin tok && not (TH.is_real_comment tok)
372 then begin
373 incr index;
374 Some !index
375 end
376 else None
377 in
378
379 push2 (T2 (tok', minus, optindex)) toks_out
380 in
381
ae4735db
C
382 let expand_info t =
383 let (mcode,env) =
708f4980 384 Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
34e49164 385
5636bb2c
C
386 let pr_cocci s ln col rcol hint =
387 push2 (Cocci2(s,ln,col,rcol,hint)) toks_out in
ae4735db 388 let pr_c info =
b1b2de81
C
389 (match Ast_c.pinfo_of_info info with
390 Ast_c.AbstractLineTok _ ->
391 push2 (C2 (Ast_c.str_of_info info)) toks_out
392 | Ast_c.FakeTok (s,_) ->
393 push2 (C2 s) toks_out
34e49164
C
394 | _ ->
395 Printf.printf "line: %s\n" (Common.dump info);
b1b2de81
C
396 failwith "not an abstract line");
397 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
398 List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
399
708f4980 400 let pr_barrier ln col = (* marks a position, used around C code *)
5636bb2c 401 push2 (Cocci2("",ln,col,col,None)) toks_out in
708f4980 402 let pr_nobarrier ln col = () in (* not needed for linux spacing *)
b1b2de81 403
708f4980 404 let pr_cspace _ = push2 (C2 " ") toks_out in
34e49164 405
708f4980
C
406 let pr_space _ = () (* rely on add_space in cocci code *) in
407 let pr_arity _ = () (* not interested *) in
34e49164
C
408
409 let indent _ = push2 Indent_cocci2 toks_out in
c3e37e97 410 let unindent x = push2 (Unindent_cocci2 x) toks_out in
34e49164 411
708f4980
C
412 let args_pp =
413 (env, pr_cocci, pr_c, pr_cspace,
414 (match !Flag_parsing_c.spacing with
415 Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
416 pr_arity,
417 (match !Flag_parsing_c.spacing with
418 Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
419 indent, unindent) in
34e49164 420
ae4735db
C
421 (* old: when for yacfe with partial cocci:
422 * add_elem t false;
485bce71 423 *)
34e49164 424
485bce71 425 (* patch: when need full coccinelle transformation *)
faf9a90c 426 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
34e49164 427 match mcode with
ae4735db 428 | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
34e49164 429 (* Why adding ? because I want to have all the information, the whole
ae4735db 430 * set of tokens, so I can then process and remove the
34e49164 431 * is_between_two_minus for instance *)
708f4980 432 add_elem t (Min (inst,adj));
8babbc8f
C
433 (match any_xxs with
434 Ast_cocci.NOREPLACEMENT -> ()
435 | Ast_cocci.REPLACEMENT(any_xxs,_) ->
436 unparser any_xxs Unparse_cocci.InPlace)
ae4735db 437 | Ast_cocci.CONTEXT (_,any_befaft) ->
34e49164 438 (match any_befaft with
ae4735db 439 | Ast_cocci.NOTHING ->
708f4980 440 add_elem t Ctx
951c7801 441 | Ast_cocci.BEFORE (xxs,_) ->
faf9a90c 442 unparser xxs Unparse_cocci.Before;
708f4980 443 add_elem t Ctx
ae4735db 444 | Ast_cocci.AFTER (xxs,_) ->
708f4980 445 add_elem t Ctx;
faf9a90c 446 unparser xxs Unparse_cocci.After;
ae4735db 447 | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
faf9a90c 448 unparser xxs Unparse_cocci.Before;
708f4980 449 add_elem t Ctx;
faf9a90c 450 unparser yys Unparse_cocci.After;
34e49164 451 )
951c7801 452 | Ast_cocci.PLUS _ -> raise Impossible
34e49164
C
453
454 in
455
456 toks +> List.iter expand_info;
457 List.rev !toks_out
ae4735db 458
34e49164
C
459
460(*****************************************************************************)
461(* Tokens2 processing, filtering, adjusting *)
462(*****************************************************************************)
463
002099fc 464let is_space = function
fc1ad971 465 | T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *)
ae4735db 466 | _ -> false
fc1ad971
C
467
468let is_newline = function
469 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
470 | _ -> false
471
472let is_whitespace = function
ae4735db 473 | (T2 (t,_b,_i)) ->
002099fc
C
474 (match t with
475 | Parser_c.TCommentSpace _ -> true (* only whitespace *)
fc1ad971 476 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
002099fc
C
477 | _ -> false
478 )
ae4735db 479 | _ -> false
002099fc 480
34e49164 481let is_minusable_comment = function
ae4735db 482 | (T2 (t,_b,_i)) ->
34e49164
C
483 (match t with
484 | Parser_c.TCommentSpace _ (* only whitespace *)
ae4735db 485 (* patch: coccinelle *)
002099fc 486 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
7f004419 487 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
002099fc
C
488 | Parser_c.TComment _
489 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
b1b2de81
C
490 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
491 | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
34e49164
C
492 -> true
493
b1b2de81 494 | Parser_c.TCommentMisc _
0708f913 495 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
34e49164
C
496 -> false
497
498 | _ -> false
499 )
ae4735db 500 | _ -> false
34e49164 501
fc1ad971 502let is_minusable_comment_nocpp = function
ae4735db 503 | (T2 (t,_b,_i)) ->
fc1ad971
C
504 (match t with
505 | Parser_c.TCommentSpace _ (* only whitespace *)
ae4735db 506 (* patch: coccinelle *)
fc1ad971 507 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
7f004419 508 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
fc1ad971
C
509 | Parser_c.TComment _ -> true
510 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
511 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
512 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
513 -> false
514
515 | Parser_c.TCommentMisc _
516 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
517 -> false
518
519 | _ -> false
520 )
ae4735db 521 | _ -> false
fc1ad971 522
34e49164 523let all_coccis = function
c491d8ee 524 Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ -> true
34e49164
C
525 | _ -> false
526
b1b2de81
C
527(*previously gave up if the first character was a newline, but not clear why*)
528let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
34e49164 529
708f4980 530let set_minus_comment adj = function
ae4735db 531 | T2 (t,Ctx,idx) ->
34e49164
C
532 let str = TH.str_of_tok t in
533 (match t with
534 | Parser_c.TCommentSpace _
ae4735db 535(* patch: coccinelle *)
34e49164
C
536 | Parser_c.TCommentNewline _ -> ()
537
ae4735db
C
538 | Parser_c.TComment _
539 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
b1b2de81
C
540 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
541 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
ae4735db 542 ->
b1b2de81
C
543 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
544 (TH.line_of_tok t) str)
34e49164
C
545 | _ -> raise Impossible
546 );
708f4980 547 T2 (t, Min adj, idx)
ae4735db 548(* patch: coccinelle *)
fc1ad971 549 | T2 (t,Min adj,idx) as x -> x
8babbc8f 550 | Fake2 _ as x -> x
34e49164
C
551 | _ -> raise Impossible
552
708f4980 553let set_minus_comment_or_plus adj = function
c491d8ee 554 Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x
708f4980 555 | x -> set_minus_comment adj x
34e49164 556
fc1ad971
C
557let drop_minus xs =
558 xs +> Common.exclude (function
559 | T2 (t,Min adj,_) -> true
560 | _ -> false
561 )
562
8babbc8f 563let drop_expanded xs =
5626f154 564 xs +> Common.exclude (function
34e49164 565 | T2 (t,_,_) when TH.is_expanded t -> true
8babbc8f
C
566 | _ -> false
567 )
568
569let drop_fake xs =
570 xs +> Common.exclude (function
571 | Fake2 _ -> true
34e49164
C
572 | _ -> false
573 )
5626f154
C
574
575let remove_minus_and_between_and_expanded_and_fake xs =
576
8babbc8f
C
577 (* get rid of expanded tok *)
578 let xs = drop_expanded xs in
34e49164 579
113803cf 580 let minus_or_comment = function
708f4980 581 T2(_,Min adj,_) -> true
113803cf
C
582 | x -> is_minusable_comment x in
583
fc1ad971
C
584 let minus_or_comment_nocpp = function
585 T2(_,Min adj,_) -> true
586 | x -> is_minusable_comment_nocpp x in
587
588 let common_adj (index1,adj1) (index2,adj2) =
8babbc8f
C
589 let same_adj = (* same adjacency info *)
590 match (adj1,adj2) with
591 (Ast_cocci.ADJ adj1,Ast_cocci.ADJ adj2) -> adj1 = adj2
592 | (Ast_cocci.ALLMINUS,_) | (_,Ast_cocci.ALLMINUS) -> true in
593 same_adj &&
fc1ad971
C
594 (* non-empty intersection of witness trees *)
595 not ((Common.inter_set index1 index2) = []) in
596
fc1ad971
C
597 (* new idea: collects regions not containing non-space context code
598 if two adjacent adjacent minus tokens satisfy common_adj then delete
599 all spaces, comments etc between them
600 if two adjacent minus tokens do not satisfy common_adj only delete
601 the spaces between them if there are no comments, etc.
602 if the region contain no plus code and is both preceded and followed
603 by a newline, delete the initial newline. *)
604
605 let rec adjust_around_minus = function
34e49164 606 [] -> []
708f4980 607 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
8babbc8f 608 ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
fc1ad971
C
609 let (minus_list,rest) = Common.span not_context (t1::xs) in
610 let contains_plus = List.exists is_plus minus_list in
611 let x =
612 match List.rev minus_list with
613 (T2(Parser_c.TCommentNewline c,_b,_i))::rest
614 when List.for_all minus_or_comment minus_list ->
615 set_minus_comment_or_plus adj1 x
616 | _ -> x in
617 x :: adjust_within_minus contains_plus minus_list @
618 adjust_around_minus rest
8babbc8f 619 | ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
fc1ad971
C
620 let (minus_list,rest) = Common.span not_context (t1::xs) in
621 let contains_plus = List.exists is_plus minus_list in
622 adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
c491d8ee
C
623 | x::xs ->
624 x :: adjust_around_minus xs
fc1ad971 625 and adjust_within_minus cp (* contains plus *) = function
8babbc8f 626 ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
fc1ad971
C
627 let not_minus = function T2(_,Min _,_) -> false | _ -> true in
628 let (not_minus_list,rest) = Common.span not_minus xs in
629 t1 ::
34e49164 630 (match rest with
8babbc8f
C
631 ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs
632 when common_adj adj1 adj2 ->
fc1ad971
C
633 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
634 @ (adjust_within_minus cp (t2::xs))
8babbc8f 635 | ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs ->
5626f154 636 if not cp && List.for_all is_whitespace not_minus_list
fc1ad971
C
637 then
638 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
639 @ (adjust_within_minus cp (t2::xs))
5626f154
C
640 else
641 not_minus_list @ (adjust_within_minus cp (t2::xs))
fc1ad971
C
642 | _ ->
643 if cp
644 then xs
645 else
8babbc8f
C
646 (* remove spaces after removed stuff, eg a comma after a
647 function argument *)
fc1ad971
C
648 let (spaces,rest) = Common.span is_space xs in
649 (List.map (set_minus_comment_or_plus adj1) spaces)
650 @ rest)
651 | xs -> failwith "should always start with minus"
652 and not_context = function
653 (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
654 | _ -> true
655 and is_plus = function
c491d8ee 656 C2 _ | Comma _ | Cocci2 _ -> true
fc1ad971 657 | _ -> false in
34e49164 658
fc1ad971 659 let xs = adjust_around_minus xs in
34e49164 660
8babbc8f
C
661 (* get rid of fake tok *)
662 let xs = drop_fake xs in
663
113803cf 664 (* this drops blank lines after a brace introduced by removing code *)
fc1ad971
C
665 let minus_or_comment_nonl = function
666 T2(_,Min adj,_) -> true
667 | T2(Parser_c.TCommentNewline _,_b,_i) -> false
668 | x -> is_minusable_comment x in
669
113803cf
C
670 let rec adjust_after_brace = function
671 [] -> []
708f4980 672 | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
b1b2de81 673 when str_of_token2 x =$= "{" ->
fc1ad971 674 let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
113803cf
C
675 let is_whitespace = function
676 T2(Parser_c.TCommentSpace _,_b,_i)
677 (* patch: cocci *)
678 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
679 | _ -> false in
680 let (newlines,rest) = Common.span is_whitespace rest in
681 let (drop_newlines,last_newline) =
682 let rec loop = function
683 [] -> ([],[])
684 | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
685 (List.rev rest,[x])
686 | x::xs ->
687 let (drop_newlines,last_newline) = loop xs in
688 (drop_newlines,x::last_newline) in
689 loop (List.rev newlines) in
708f4980 690 x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
113803cf
C
691 last_newline@
692 adjust_after_brace rest
693 | x::xs -> x::adjust_after_brace xs in
694
695 let xs = adjust_after_brace xs in
696
fc1ad971
C
697 (* search backwards from context } over spaces until reaching a newline.
698 then go back over all minus code until reaching some context or + code.
699 get rid of all intervening spaces, newlines, and comments
700 input is reversed *)
701 let rec adjust_before_brace = function
702 [] -> []
703 | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
704 let (outer_spaces,rest) = Common.span is_space xs in
705 x :: outer_spaces @
706 (match rest with
707 ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
708 (* the rest of this code is the same as from_newline below
709 but merging them seems to be error prone... *)
710 ((T2 (t, Min adj, idx)) as m) :: rest ->
711 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
712 h :: m ::
713 (List.map (set_minus_comment adj) spaces) @
714 (adjust_before_brace rest)
715 | _ -> adjust_before_brace rest)
716 | x::xs -> x :: (adjust_before_brace xs) in
717
718 let from_newline = function
719 ((T2 (t, Min adj, idx)) as m) :: rest ->
720 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
721 m ::
722 (List.map (set_minus_comment adj) spaces) @
723 (adjust_before_brace rest)
c491d8ee
C
724 | ((T2 (t0, Ctx, idx0)) as m0) :: ((T2 (t, Min adj, idx)) as m) :: rest
725 when TH.str_of_tok t0 = "" ->
726 (* This is for the case of a #define that is completely deleted,
727 because a #define has a strange EOL token at the end.
728 We hope there i no other kind of token that is represented by
729 "", but it seems like changing the kind of token might break
730 the end of entity recognition in the C parser.
731 See parsing_hacks.ml *)
732 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
733 m0 :: m ::
734 (List.map (set_minus_comment adj) spaces) @
735 (adjust_before_brace rest)
fc1ad971
C
736 | rest -> adjust_before_brace rest in
737
738 let xs = List.rev (from_newline (List.rev xs)) in
739 let xs = drop_minus xs in
34e49164
C
740 xs
741
faf9a90c
C
742(* normally, in C code, a semicolon is not preceded by a space or newline *)
743let adjust_before_semicolon toks =
744 let toks = List.rev toks in
002099fc 745 let rec search_semic = function
faf9a90c 746 [] -> []
002099fc 747 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
faf9a90c 748 if List.mem (str_of_token2 x) [";";")";","]
8babbc8f 749 then x :: search_semic (search_minus false xs)
002099fc
C
750 else x :: search_semic xs
751 | x::xs -> x :: search_semic xs
752 and search_minus seen_minus xs =
753 let (spaces, rest) = Common.span is_space xs in
754 (* only delete spaces if something is actually deleted *)
755 match rest with
756 ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
757 | _ -> if seen_minus then rest else xs in
758 List.rev (search_semic toks)
34e49164 759
8babbc8f
C
760(* normally, in C code, a ( is not followed by a space or newline *)
761let adjust_after_paren toks =
762 let rec search_paren = function
763 [] -> []
764 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
765 if List.mem (str_of_token2 x) ["("] (* other things? *)
766 then x :: search_paren(search_minus false xs)
767 else x :: search_paren xs
768 | x::xs -> x :: search_paren xs
769 and search_minus seen_minus xs =
65038c61 770 let (spaces, rest) = Common.span is_whitespace xs in
8babbc8f
C
771 (* only delete spaces if something is actually deleted *)
772 match rest with
773 ((T2(_,Min _,_)) as a)::rerest -> (* minus *)
774 a :: search_minus true rerest
775 | ((T2(_,Ctx,_)) as a)::rerest when str_of_token2 a = "," ->
776 (* comma after ( will be deleted, so consider it as minus code
777 already *)
778 a :: search_minus true rerest
779 | _ -> if seen_minus then rest else xs in (* drop trailing space *)
780 search_paren toks
781
65038c61
C
782(* this is for the case where braces are added around an if branch *)
783let paren_then_brace toks =
784 let rec search_paren = function
785 [] -> []
786 | ((T2(_,Ctx,_)) as x)::xs ->
787 if List.mem (str_of_token2 x) [")"]
788 then x :: search_paren(search_plus xs)
789 else x :: search_paren xs
790 | x::xs -> x :: search_paren xs
791 and search_plus xs =
792 let (spaces, rest) = Common.span is_whitespace xs in
793 match rest with
794 (* move the brace up to the previous line *)
795 ((Cocci2("{",_,_,_,_)) as x) :: (((Cocci2 _) :: _) as rest) ->
796 (C2 " ") :: x :: spaces @ rest
797 | _ -> xs in
798 search_paren toks
799
34e49164
C
800let is_ident_like s = s ==~ Common.regexp_alpha
801
aba5c457
C
802let rec drop_space_at_endline = function
803 [] -> []
804 | [x] -> [x]
190f1acf 805 | (C2 " ") ::
8f657093
C
806 ((((T2(Parser_c.TCommentSpace _,Ctx,_)) | Cocci2("\n",_,_,_,_) |
807 (T2(Parser_c.TCommentNewline _,Ctx,_))) :: _) as rest) ->
190f1acf
C
808 (* when unparse_cocci doesn't know whether space is needed *)
809 drop_space_at_endline rest
aba5c457
C
810 | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest ->
811 let (outer_spaces,rest) = Common.span is_space rest in
812 let minus_or_comment_or_space_nocpp = function
813 T2(_,Min adj,_) -> true
814 | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true
18b1275a
C
815 | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false
816 | x -> false in
aba5c457 817 let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in
18b1275a
C
818 let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
819 if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus
820 then
821 match rest with
822 ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest ->
823 (* drop trailing spaces *)
824 minus@a::(drop_space_at_endline rest)
825 | _ -> fail()
826 else fail()
8f657093
C
827 | a :: rest ->
828 a :: drop_space_at_endline rest
aba5c457
C
829
830(* if a removed ( is between two tokens, then add a space *)
831let rec paren_to_space = function
832 [] -> []
833 | [x] -> [x]
834 | [x;y] -> [x;y]
835 | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest
836 when not (is_whitespace a) && TH.str_of_tok t = "(" ->
aba5c457
C
837 a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
838 | a :: rest -> a :: (paren_to_space rest)
839
ae4735db 840let rec add_space xs =
34e49164
C
841 match xs with
842 | [] -> []
843 | [x] -> [x]
5636bb2c 844 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
708f4980
C
845 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
846 not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
847 (* this only works within a line. could consider whether
848 something should be done to add newlines too, rather than
849 printing them explicitly in unparse_cocci. *)
850 x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
65038c61
C
851 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
852 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
853 not (lnx = -1) && lnx < lny && not (rcolx = -1) ->
854 (* this only works within a line. could consider whether
855 something should be done to add newlines too, rather than
856 printing them explicitly in unparse_cocci. *)
857 x::C2 (String.make (lny-lnx) '\n')::
858 C2 (String.make (lcoly-1) ' '):: (* -1 is for the + *)
859 add_space (y::xs)
1eddfd50
C
860 | ((T2(_,Ctx,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *)
861 let sx = str_of_token2 x in
862 let sy = str_of_token2 y in
863 if is_ident_like sx && (is_ident_like sy or List.mem sy ["="])
864 then x::C2 " "::(add_space (y::xs))
865 else x::(add_space (y::xs))
866 | x::y::xs -> (* not boundary, not sure if it is possible *)
34e49164
C
867 let sx = str_of_token2 x in
868 let sy = str_of_token2 y in
faf9a90c 869 if is_ident_like sx && is_ident_like sy
34e49164
C
870 then x::C2 " "::(add_space (y::xs))
871 else x::(add_space (y::xs))
872
c491d8ee
C
873(* A fake comma is added at the end of an unordered initlist or a enum
874decl, if the initlist or enum doesn't already end in a comma. This is only
875needed if there is + code, ie if we see Cocci after it in the code sequence *)
876
877let rec drop_end_comma = function
878 [] -> []
879 | [x] -> [x]
880 | ((Comma ",") as x) :: rest ->
881 let (newlines,rest2) = Common.span is_whitespace rest in
882 (match rest2 with
883 (Cocci2 _) :: _ -> x :: drop_end_comma rest
884 | _ -> drop_end_comma rest)
885 | x :: xs -> x :: drop_end_comma xs
886
5636bb2c
C
887(* The following only works for the outermost function call. Stack records
888the column of all open parentheses. Space_cell contains the most recent
889comma in the outermost function call. The goal is to decide whether this
890should be followed by a space or a newline and indent. *)
891let add_newlines toks tabbing_unit =
892 let create_indent n =
893 let (tu,tlen) =
894 match tabbing_unit with
895 Some ("\t",_) -> ("\t",8)
896 | Some ("",_) -> ("\t",8) (* not sure why... *)
897 | Some (s,_) -> (s,String.length s) (* assuming only spaces *)
898 | None -> ("\t",8) in
899 let rec loop seen =
900 if seen + tlen <= n
901 then tu ^ loop (seen + tlen)
902 else String.make (n-seen) ' ' in
903 loop 0 in
904 let check_for_newline count x = function
905 Some (start,space_cell) when count > Flag_parsing_c.max_width ->
906 space_cell := "\n"^(create_indent x);
907 Some (x + (count - start))
908 | _ -> None in
909 (* the following is for strings that may contain newline *)
910 let string_length s count =
911 let l = list_of_string s in
912 List.fold_left
913 (function count ->
914 function
915 '\t' -> count + 8
916 | '\n' -> 0
917 | c -> count + 1)
918 count l in
919 let rec loop info count = function
920 [] -> []
921 | ((T2(tok,_,_)) as a)::xs ->
922 a :: loop info (string_length (TH.str_of_tok tok) count) xs
923 | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
924 let (stack,space_cell) = info in
925 let rest =
926 match hint with
f537ebc4 927 None -> loop info (string_length s count) xs
5636bb2c 928 | Some Unparse_cocci.StartBox ->
f537ebc4 929 let count = string_length s count in
5636bb2c
C
930 loop (count::stack,space_cell) count xs
931 | Some Unparse_cocci.EndBox ->
f537ebc4 932 let count = string_length s count in
5636bb2c
C
933 (match stack with
934 [x] ->
935 (match check_for_newline count x space_cell with
936 Some count -> loop ([],None) count xs
937 | None -> loop ([],None) count xs)
938 | _ -> loop (List.tl stack,space_cell) count xs)
939 | Some (Unparse_cocci.SpaceOrNewline sp) ->
f537ebc4 940 let count = string_length s (count + 1 (*space*)) in
5636bb2c
C
941 (match stack with
942 [x] ->
943 (match check_for_newline count x space_cell with
944 Some count -> loop (stack,Some (x,sp)) count xs
945 | None -> loop (stack,Some (count,sp)) count xs)
946 | _ -> loop info count xs) in
947 a :: rest
948 | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
c491d8ee 949 | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
8babbc8f 950 | Fake2 _ :: _ | Indent_cocci2 :: _
5636bb2c
C
951 | Unindent_cocci2 _::_ ->
952 failwith "unexpected fake, indent, or unindent" in
953 let redo_spaces prev = function
954 Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
955 C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
956 | t -> t::prev in
957 (match !Flag_parsing_c.spacing with
958 Flag_parsing_c.SMPL -> toks
959 | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)))
34e49164
C
960
961(* When insert some new code, because of a + in a SP, we must add this
962 * code at the right place, with the good indentation. So each time we
963 * encounter some spacing info, with some newline, we maintain the
964 * current indentation level used.
ae4735db 965 *
34e49164 966 * TODO problems: not accurate. ex: TODO
ae4735db 967 *
34e49164
C
968 * TODO: if in #define region, should add a \ \n
969 *)
ae4735db 970let new_tabbing2 space =
34e49164
C
971 (list_of_string space)
972 +> List.rev
b1b2de81 973 +> Common.take_until (fun c -> c =<= '\n')
34e49164
C
974 +> List.rev
975 +> List.map string_of_char
976 +> String.concat ""
977
ae4735db 978let new_tabbing a =
34e49164
C
979 Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
980
981
ae4735db 982let rec adjust_indentation xs =
aa721442 983
65038c61 984 let _current_tabbing = ref ([] : string list) in
34e49164
C
985 let tabbing_unit = ref None in
986
987 let string_of_list l = String.concat "" (List.map string_of_char l) in
988
989 (* try to pick a tabbing unit for the plus code *)
990 let adjust_tabbing_unit old_tab new_tab =
b1b2de81 991 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
34e49164
C
992 then
993 let old_tab = list_of_string old_tab in
994 let new_tab = list_of_string new_tab in
995 let rec loop = function
996 ([],new_tab) ->
997 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
998 | (_,[]) -> failwith "not possible"
999 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
1000 loop (old_tab,new_tab) in
1001
65038c61 1002(*
34e49164
C
1003 let remtab tu current_tab =
1004 let current_tab = List.rev(list_of_string current_tab) in
1005 let rec loop = function
1006 ([],new_tab) -> string_of_list (List.rev new_tab)
65038c61
C
1007 | (_,[]) -> (-*weird; tabbing unit used up more than the current tab*-)
1008 ""
b1b2de81 1009 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
65038c61 1010 | (_,ns) -> (-* mismatch; remove what we can *-)
34e49164
C
1011 string_of_list (List.rev ns) in
1012 loop (tu,current_tab) in
65038c61 1013*)
34e49164
C
1014
1015 let rec find_first_tab started = function
1016 [] -> ()
b1b2de81 1017 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
34e49164 1018 find_first_tab true xs
485bce71 1019(* patch: coccinelle *)
34e49164
C
1020 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
1021 when started ->
1022 let s = str_of_token2 x +> new_tabbing in
1023 tabbing_unit := Some (s,List.rev (list_of_string s))
1024 | x::xs -> find_first_tab started xs in
1025 find_first_tab false xs;
1026
1eddfd50
C
1027 let rec balanced ct = function
1028 [] -> ct >= 0
1029 | ((T2(tok,_,_)) as x)::xs ->
1030 (match str_of_token2 x with
1031 "(" -> balanced (ct+1) xs
1032 | ")" -> balanced (ct-1) xs
1033 | _ -> balanced ct xs)
1034 | x::xs -> balanced ct xs in
1035
ae4735db 1036 let rec aux started xs =
34e49164
C
1037 match xs with
1038 | [] -> []
485bce71 1039(* patch: coccinelle *)
8f657093
C
1040 | (T2 (Parser_c.TCommentNewline _,_,_))::Unindent_cocci2(false)::xs
1041 | (Cocci2("\n",_,_,_,_))::Unindent_cocci2(false)::xs ->
1042 (C2 "\n")::aux started xs
34e49164 1043 | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
5636bb2c 1044 ((Cocci2 ("{",_,_,_,_)) as a)::xs
708f4980 1045 when started && str_of_token2 x =$= ")" ->
34e49164 1046 (* to be done for if, etc, but not for a function header *)
708f4980 1047 x::(C2 " ")::a::(aux started xs)
1eddfd50
C
1048 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs
1049 when balanced 0 (fst(Common.span (function x -> not(is_newline x)) xs)) ->
ae4735db 1050 let old_tabbing = !_current_tabbing in
65038c61 1051 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := [s]);
34e49164 1052 (* only trust the indentation after the first { *)
c3e37e97 1053 (if started
65038c61
C
1054 then
1055 adjust_tabbing_unit
1056 (String.concat "" old_tabbing)
1057 (String.concat "" !_current_tabbing));
34e49164
C
1058 let coccis_rest = Common.span all_coccis xs in
1059 (match coccis_rest with
b1b2de81 1060 (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
34e49164
C
1061 (* the case where cocci code has been added before a close } *)
1062 x::aux started (Indent_cocci2::xs)
1063 | _ -> x::aux started xs)
65038c61
C
1064 | Indent_cocci2::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
1065 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL ->
1066 let tu = String.make (lcoly-1) ' ' in
1067 _current_tabbing := tu::(!_current_tabbing);
1068 C2 (tu)::aux started (y::xs)
34e49164
C
1069 | Indent_cocci2::xs ->
1070 (match !tabbing_unit with
1071 None -> aux started xs
1072 | Some (tu,_) ->
65038c61
C
1073 _current_tabbing := tu::(!_current_tabbing);
1074 (* can't be C2, for later phases *)
1075 Cocci2 (tu,-1,-1,-1,None)::aux started xs)
c3e37e97 1076 | Unindent_cocci2(permanent)::xs ->
65038c61
C
1077 (match !_current_tabbing with
1078 [] -> aux started xs
1079 | _::new_tabbing ->
1080 _current_tabbing := new_tabbing;
34e49164
C
1081 aux started xs)
1082 (* border between existing code and cocci code *)
5636bb2c 1083 | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
b1b2de81 1084 when str_of_token2 x =$= "{" ->
34e49164
C
1085 x::aux true (y::Indent_cocci2::xs)
1086 | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
b1b2de81 1087 when str_of_token2 y =$= "}" ->
c3e37e97 1088 x::aux started (y::Unindent_cocci2 true::xs)
34e49164 1089 (* starting the body of the function *)
b1b2de81 1090 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs
5636bb2c 1091 | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
5636bb2c 1092 | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
0708f913 1093 (* dont inline in expr because of weird eval order of ocaml *)
65038c61
C
1094 let s = String.concat "" !_current_tabbing in
1095 (* can't be C2, for later phases *)
5636bb2c 1096 x::Cocci2 (s,-1,-1,-1,None)::aux started xs
34e49164 1097 | x::xs -> x::aux started xs in
5636bb2c 1098 (aux false xs,!tabbing_unit)
34e49164
C
1099
1100
1101let rec find_paren_comma = function
1102 | [] -> ()
1103
1104 (* do nothing if was like this in original file *)
1105 | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
ae4735db 1106 ::xs when p2 =|= p1 + 1 ->
34e49164
C
1107 find_paren_comma (x2::xs)
1108
1109 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
ae4735db 1110 ::xs when p2 =|= p1 + 1 ->
34e49164
C
1111 find_paren_comma (x2::xs)
1112
1113 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
ae4735db 1114 ::xs when p2 =|= p1 + 1 ->
34e49164
C
1115 find_paren_comma (x2::xs)
1116
1117 (* otherwise yes can adjust *)
ae4735db 1118 | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
34e49164
C
1119 x2.remove <- true;
1120 find_paren_comma (x2::xs)
ae4735db 1121 | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
34e49164
C
1122 x1.remove <- true;
1123 find_paren_comma (x2::xs)
1124
ae4735db 1125 | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
34e49164
C
1126 x1.remove <- true;
1127 find_paren_comma (x2::xs)
1128
ae4735db 1129 | x::xs ->
34e49164 1130 find_paren_comma xs
34e49164 1131
ae4735db
C
1132
1133let fix_tokens toks =
34e49164
C
1134 let toks = toks +> List.map mk_token_extended in
1135
1136 let cleaner = toks +> Common.exclude (function
1137 | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
1138 | _ -> false
1139 ) in
1140 find_paren_comma cleaner;
1141
1142 let toks = rebuild_tokens_extented toks in
1143 toks +> List.map (fun x -> x.tok2)
1144
1145
1146
1147(*****************************************************************************)
1148(* Final unparsing (and debugging support) *)
1149(*****************************************************************************)
1150
1151(* for debugging *)
1152type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
1153
1154let kind_of_token2 = function
8babbc8f 1155 | Fake2 _ -> KFake
34e49164
C
1156 | Cocci2 _ -> KCocci
1157 | C2 _ -> KC
c491d8ee 1158 | Comma _ -> KC
34e49164
C
1159 | T2 (t,_,_) ->
1160 (match TH.pinfo_of_tok t with
1161 | ExpandedTok _ -> KExpanded
1162 | OriginTok _ -> KOrigin
1163 | FakeTok _ -> raise Impossible (* now a Fake2 *)
1164 | AbstractLineTok _ -> raise Impossible (* now a KC *)
1165 )
c3e37e97 1166 | Unindent_cocci2 _ | Indent_cocci2 -> raise Impossible
ae4735db 1167
34e49164
C
1168let end_mark = "!"
1169
1170let start_mark = function
1171 | KFake -> "!F!"
ae4735db 1172 | KCocci -> "!S!"
34e49164
C
1173 | KC -> "!A!"
1174 | KExpanded -> "!E!"
1175 | KOrigin -> ""
1176
1177let print_all_tokens2 pr xs =
1178 if !Flag_parsing_c.debug_unparsing
1179 then
1180 let current_kind = ref KOrigin in
ae4735db 1181 xs +> List.iter (fun t ->
34e49164 1182 let newkind = kind_of_token2 t in
b1b2de81 1183 if newkind =*= !current_kind
34e49164
C
1184 then pr (str_of_token2 t)
1185 else begin
1186 pr (end_mark);
1187 pr (start_mark newkind);
1188 pr (str_of_token2 t);
1189 current_kind := newkind
1190 end
1191 );
ae4735db 1192 else
34e49164 1193 xs +> List.iter (fun x -> pr (str_of_token2 x))
ae4735db 1194
34e49164
C
1195
1196
1197
1198(*****************************************************************************)
1199(* Entry points *)
1200(*****************************************************************************)
1201
1202(* old: PPviatok was made in the beginning to allow to pretty print a
1203 * complete C file, including a modified C file by transformation.ml,
1204 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1205 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1206 * fancy stuff when a function was not modified at all. Just need to
1207 * print the list of token as-is. But now pretty_print_c.ml handles
1208 * almost everything so maybe less useful. Maybe PPviatok allows to
ae4735db
C
1209 * optimize a little the pretty printing.
1210 *
34e49164
C
1211 * update: now have PPviastr which goes even faster than PPviatok, so
1212 * PPviatok has disappeared.
1213 *)
1214
1215type ppmethod = PPnormal | PPviastr
1216
1217
1218
1219
1220(* The pp_program function will call pretty_print_c.ml with a special
ae4735db 1221 * function to print the leaf components, the tokens. When we want to
34e49164 1222 * print a token, we need to print also maybe the space and comments that
ae4735db 1223 * were close to it in the original file (and that was omitted during the
34e49164
C
1224 * parsing phase), and honor what the cocci-info attached to the token says.
1225 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
ae4735db 1226 * print it and also print some cocci-code attached in a PLUS to it.
34e49164
C
1227 * So we will also maybe call unparse_cocci. Because the cocci-code may
1228 * contain metavariables, unparse_cocci will in fact sometimes call back
1229 * pretty_print_c (which will this time don't call back again unparse_cocci)
1230 *)
1231
ae4735db
C
1232let pp_program2 xs outfile =
1233 Common.with_open_outfile outfile (fun (pr,chan) ->
1234 let pr s =
1235 if !Flag_parsing_c.debug_unparsing
34e49164 1236 then begin pr2_no_nl s; flush stderr end
ae4735db 1237 else pr s
34e49164
C
1238 (* flush chan; *)
1239 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1240 in
ae4735db
C
1241
1242 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
34e49164
C
1243 (* here can still work on ast *)
1244 let e = remove_useless_fakeInfo_struct e in
ae4735db 1245
34e49164 1246 match ppmethod with
002099fc 1247 | PPnormal ->
34e49164 1248 (* now work on tokens *)
34e49164 1249 (* phase1: just get all the tokens, all the information *)
ae4735db 1250 assert(toks_e +> List.for_all (fun t ->
34e49164
C
1251 TH.is_origin t or TH.is_expanded t
1252 ));
1253 let toks = get_fakeInfo_and_tokens e toks_e in
1254 let toks = displace_fake_nodes toks in
1255 (* assert Origin;ExpandedTok;Faketok *)
1256 let toks = expand_mcode toks in
c491d8ee 1257
34e49164
C
1258 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1259 * and no tag information, just NOTHING. *)
fc1ad971
C
1260
1261 let toks =
1262 if !Flag.sgrep_mode2
5626f154
C
1263 then
1264 (* nothing else to do for sgrep *)
8babbc8f 1265 drop_expanded(drop_fake(drop_minus toks))
fc1ad971
C
1266 else
1267 (* phase2: can now start to filter and adjust *)
65038c61 1268 (let (toks,tu) = adjust_indentation toks in
fc1ad971 1269 let toks = adjust_before_semicolon toks in(*before remove minus*)
8babbc8f 1270 let toks = adjust_after_paren toks in(*also before remove minus*)
aba5c457
C
1271 let toks = drop_space_at_endline toks in
1272 let toks = paren_to_space toks in
c491d8ee 1273 let toks = drop_end_comma toks in
fc1ad971
C
1274 let toks = remove_minus_and_between_and_expanded_and_fake toks in
1275 (* assert Origin + Cocci + C and no minus *)
1276 let toks = add_space toks in
5636bb2c 1277 let toks = add_newlines toks tu in
65038c61 1278 let toks = paren_then_brace toks in
fc1ad971 1279 let toks = fix_tokens toks in
65038c61 1280 toks) in
34e49164 1281
ae4735db 1282 (* in theory here could reparse and rework the ast! or
34e49164
C
1283 * apply some SP. Not before cos julia may have generated
1284 * not parsable file. Need do unparsing_tricks call before being
1285 * ready to reparse. *)
1286 print_all_tokens2 pr toks;
1287
1288 | PPviastr -> pr str
1289 )
1290 )
1291
ae4735db 1292let pp_program a b =
34e49164
C
1293 Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1294
485bce71 1295
ae4735db 1296let pp_program_default xs outfile =
485bce71
C
1297 let xs' = xs +> List.map (fun x -> x, PPnormal) in
1298 pp_program xs' outfile