Coccinelle release-1.0.0-rc11
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
CommitLineData
0708f913 1(* Yoann Padioleau, Julia Lawall
ae4735db 2 *
17ba0788
C
3 * Copyright (C) 2012, INRIA.
4 * Copyright (C) 2010, 2011, University of Copenhagen DIKU and INRIA.
0708f913 5 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
34e49164
C
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License (GPL)
9 * version 2 as published by the Free Software Foundation.
ae4735db 10 *
34e49164
C
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * file license.txt for more details.
ae4735db
C
15 *
16 *
485bce71 17 * Modifications by Julia Lawall for better newline handling.
34e49164
C
18 *)
19open Common
20
21open Ast_c
22
23module TH = Token_helpers
24
25
b1b2de81
C
26(* should keep comments and directives in between adjacent deleted terms,
27but not comments and directives within deleted terms. should use the
28labels found in the control-flow graph *)
29
34e49164
C
30
31
708f4980
C
32(*****************************************************************************)
33(* Wrappers *)
34(*****************************************************************************)
35let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
36
34e49164
C
37(*****************************************************************************)
38(* Types used during the intermediate phases of the unparsing *)
39(*****************************************************************************)
40
ae4735db 41type token1 =
34e49164
C
42 | Fake1 of info
43 | T1 of Parser_c.token
44
45(* The cocci_tag of the token should always be a NOTHING. The mark of
46 * the token can only be OriginTok or ExpandedTok. Why not get rid of
47 * token and get something simpler ? because we need to know if the
48 * info is a TCommentCpp or TCommentSpace, etc for some of the further
49 * analysis so easier to keep with the token.
ae4735db 50 *
34e49164
C
51 * This type contains the whole information. Have all the tokens with this
52 * type.
53 *)
708f4980 54type min =
5626f154 55 Min of (int list (* match numbers from witness trees *) *
8babbc8f 56 Ast_cocci.adjacency (* adjacency information *))
708f4980
C
57 | Ctx
58
ae4735db
C
59type token2 =
60 | T2 of Parser_c.token * min *
34e49164 61 int option (* orig index, abstracting away comments and space *)
8babbc8f 62 | Fake2 of min
708f4980 63 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
5636bb2c 64 * Unparse_cocci.nlhint option
34e49164 65 | C2 of string
c491d8ee 66 | Comma of string
34e49164 67 | Indent_cocci2
c3e37e97 68 | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
993936c0 69 | EatSpace2
34e49164
C
70
71(* not used yet *)
ae4735db 72type token3 =
34e49164
C
73 | T3 of Parser_c.token
74 | Cocci3 of string
75 | C3 of string
76
77
78(* similar to the tech in parsing_hack *)
79type token_extended = {
80 tok2 : token2;
81 str : string;
82 idx: int option; (* to know if 2 tokens were consecutive in orig file *)
83 mutable new_tokens_before : token2 list;
84 mutable remove : bool;
85}
86
87
88(*****************************************************************************)
89(* Helpers *)
90(*****************************************************************************)
91
ae4735db 92let info_of_token1 t =
34e49164
C
93 match t with
94 | Fake1 info -> info
95 | T1 tok -> TH.info_of_tok tok
96
002099fc
C
97let print_token1 = function
98 T1 tok -> TH.str_of_tok tok
99 | Fake1 info -> "fake"
100
34e49164
C
101let str_of_token2 = function
102 | T2 (t,_,_) -> TH.str_of_tok t
8babbc8f 103 | Fake2 _ -> ""
5636bb2c 104 | Cocci2 (s,_,_,_,_) -> s
34e49164 105 | C2 s -> s
c491d8ee 106 | Comma s -> s
34e49164 107 | Indent_cocci2 -> ""
c3e37e97 108 | Unindent_cocci2 _ -> ""
993936c0 109 | EatSpace2 -> ""
34e49164
C
110
111let print_token2 = function
708f4980 112 | T2 (t,b,_) ->
8babbc8f
C
113 let t_str =
114 match t with
115 | Parser_c.TCommentSpace _ -> " sp "
116 | Parser_c.TCommentNewline _ -> " nl "
117 | Parser_c.TCommentCpp _ -> " cp "
118 | Parser_c.TCommentMisc _ -> " misc "
119 | Parser_c.TComment _ -> " comment "
120 | _ -> "" in
708f4980
C
121 let b_str =
122 match b with
123 Min (index,adj) ->
8babbc8f
C
124 Printf.sprintf "-%d[%s]"
125 (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
708f4980
C
126 (String.concat " " (List.map string_of_int index))
127 | Ctx -> "" in
8babbc8f
C
128 "T2:"^b_str^t_str^TH.str_of_tok t
129 | Fake2 b ->
130 let b_str =
131 match b with
132 Min (index,adj) ->
133 Printf.sprintf "-%d[%s]"
134 (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
135 (String.concat " " (List.map string_of_int index))
136 | Ctx -> "" in
137 b_str^"fake"
5636bb2c 138 | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
34e49164 139 | C2 s -> "C2:"^s
c491d8ee 140 | Comma s -> "Comma:"^s
34e49164 141 | Indent_cocci2 -> "Indent"
c3e37e97 142 | Unindent_cocci2 _ -> "Unindent"
993936c0 143 | EatSpace2 -> "EatSpace"
34e49164 144
002099fc 145let simple_print_all_tokens1 l =
90aeb998 146 List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l;
002099fc
C
147 Printf.printf "\n"
148
708f4980 149let simple_print_all_tokens2 l =
fc1ad971 150 List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
34e49164
C
151 Printf.printf "\n"
152
153let str_of_token3 = function
154 | T3 t -> TH.str_of_tok t
155 | Cocci3 s | C3 s -> s
156
157
158
ae4735db
C
159let mk_token_extended x =
160 let origidx =
34e49164 161 match x with
ae4735db 162 | T2 (_,_, idx) -> idx
34e49164
C
163 | _ -> None
164 in
ae4735db 165 { tok2 = x;
34e49164
C
166 str = str_of_token2 x;
167 idx = origidx;
168 new_tokens_before = [];
169 remove = false;
170 }
171
ae4735db 172let rebuild_tokens_extented toks_ext =
34e49164 173 let _tokens = ref [] in
ae4735db 174 toks_ext +> List.iter (fun tok ->
34e49164
C
175 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
176 if not tok.remove then push2 tok.tok2 _tokens;
177 );
178 let tokens = List.rev !_tokens in
179 (tokens +> List.map mk_token_extended)
180
181
182let mcode_contain_plus = function
183 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
184 | Ast_cocci.CONTEXT _ -> true
485bce71 185(* patch: when need full coccinelle transformation *)
8babbc8f
C
186 | Ast_cocci.MINUS (_,_,_,Ast_cocci.NOREPLACEMENT) -> false
187 | Ast_cocci.MINUS (_,_,_,Ast_cocci.REPLACEMENT _) -> true(*REPL is not empty*)
951c7801 188 | Ast_cocci.PLUS _ -> raise Impossible
34e49164 189
ae4735db 190let contain_plus info =
34e49164
C
191 let mck = Ast_c.mcode_of_info info in
192 mcode_contain_plus mck
193
194(*****************************************************************************)
195(* Last fix on the ast *)
196(*****************************************************************************)
197
ae4735db 198(* Because of the ugly trick to handle initialiser, I generate fake ','
34e49164
C
199 * for the last initializer element, but if there is nothing around it,
200 * I don't want in the end to print it.
201 *)
202
ae4735db 203let remove_useless_fakeInfo_struct program =
34e49164 204 let bigf = { Visitor_c.default_visitor_c_s with
ae4735db 205 Visitor_c.kini_s = (fun (k,bigf) ini ->
34e49164 206 match k ini with
113803cf 207 | InitList args, ii ->
34e49164
C
208 (match ii with
209 | [_i1;_i2] -> ini
113803cf 210 | [i1;i2;iicommaopt] ->
34e49164
C
211 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
212 && (Ast_c.is_fake iicommaopt)
213 (* sometimes the guy put a normal iicommaopt *)
34e49164
C
214 then InitList args, [i1;i2]
215 else InitList args, [i1;i2;iicommaopt]
ae4735db 216 | [i1;i2;iicommaopt;end_comma_opt] ->
113803cf
C
217 (* only in #define. end_comma_opt canot be fake *)
218 (* not sure if this will be considered ambiguous with a previous
219 case? *)
220 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
221 && (Ast_c.is_fake iicommaopt)
222 (* sometimes the guy put a normal iicommaopt *)
223 then InitList args, [i1;i2;end_comma_opt]
224 else InitList args, [i1;i2;iicommaopt;end_comma_opt]
34e49164
C
225 | _ -> raise Impossible
226 )
227 | x -> x
228 )
229 } in
230 Visitor_c.vk_toplevel_s bigf program
231
232
233(*****************************************************************************)
234(* Tokens1 generation *)
235(*****************************************************************************)
236
ae4735db
C
237let get_fakeInfo_and_tokens celem toks =
238 let toks_in = ref toks in
34e49164
C
239 let toks_out = ref [] in
240
241 (* todo? verify good order of position ? *)
ae4735db 242 let pr_elem info =
34e49164 243 match Ast_c.pinfo_of_info info with
ae4735db 244 | FakeTok _ ->
34e49164 245 Common.push2 (Fake1 info) toks_out
ae4735db 246 | OriginTok _ | ExpandedTok _ ->
34e49164 247 (* get the associated comments/space/cppcomment tokens *)
978fd7e5 248 let (before, x, after) =
ae4735db 249 !toks_in +> Common.split_when (fun tok ->
993936c0 250 info =*= TH.info_of_tok tok)
34e49164 251 in
b1b2de81 252 assert(info =*= TH.info_of_tok x);
34e49164 253 (*old: assert(before +> List.for_all (TH.is_comment)); *)
ae4735db 254 before +> List.iter (fun x ->
34e49164 255 if not (TH.is_comment x)
0708f913 256 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
34e49164
C
257 (* case such as int asm d3("x"); not yet in ast *)
258 );
259 before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
260 push2 (T1 x) toks_out;
261 toks_in := after;
ae4735db 262 | AbstractLineTok _ ->
34e49164
C
263 (* can be called on type info when for instance use -type_c *)
264 if !Flag_parsing_c.pretty_print_type_info
265 then Common.push2 (Fake1 info) toks_out
ae4735db 266 else raise Impossible (* at this stage *)
34e49164
C
267 in
268
269 let pr_space _ = () in (* use the spacing that is there already *)
270
271 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
272
273 if not (null !toks_in)
0708f913 274 then failwith "WEIRD: unparsing not finished";
34e49164
C
275
276 List.rev !toks_out
277
8babbc8f 278(* Fake nodes that have BEFORE code or are - should be moved over any subsequent
34e49164 279whitespace and newlines, but not any comments, to get as close to the affected
002099fc 280code as possible. Similarly, fake nodes that have AFTER code should be moved
34e49164
C
281backwards. No fake nodes should have both before and after code. *)
282
283let displace_fake_nodes toks =
284 let is_fake = function Fake1 _ -> true | _ -> false in
285 let is_whitespace = function
286 T1(Parser_c.TCommentSpace _)
485bce71 287 (* patch: cocci *)
34e49164
C
288 | T1(Parser_c.TCommentNewline _) -> true
289 | _ -> false in
290 let rec loop toks =
291 let fake_info =
292 try Some (Common.split_when is_fake toks)
293 with Not_found -> None in
294 match fake_info with
295 Some(bef,((Fake1 info) as fake),aft) ->
296 (match !(info.cocci_tag) with
ae4735db 297 | Some x ->
708f4980 298 (match x with
8babbc8f
C
299 (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_)
300 | (Ast_cocci.MINUS(_,_,_,Ast_cocci.REPLACEMENT _),_) ->
34e49164 301 (* move the fake node forwards *)
002099fc
C
302 let (whitespace,rest) = Common.span is_whitespace aft in
303 bef @ whitespace @ fake :: (loop rest)
304 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
34e49164 305 (* move the fake node backwards *)
002099fc
C
306 let revbef = List.rev bef in
307 let (revwhitespace,revprev) = Common.span is_whitespace revbef in
308 let whitespace = List.rev revwhitespace in
309 let prev = List.rev revprev in
310 prev @ fake :: (loop (whitespace @ aft))
311 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
312 bef @ fake :: (loop aft)
313 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
314 failwith "fake node should not be before-after"
315 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
485bce71 316 )
ae4735db 317 | None ->
708f4980
C
318 bef @ fake :: (loop aft)
319 )
34e49164
C
320 | None -> toks
321 | _ -> raise Impossible in
322 loop toks
323
324(*****************************************************************************)
325(* Tokens2 generation *)
326(*****************************************************************************)
327
b1b2de81 328let comment2t2 = function
5636bb2c
C
329 (Token_c.TCommentCpp
330 (* not sure iif the following list is exhaustive or complete *)
331 (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
332 (info : Token_c.info)) ->
333 C2(info.Common.str)
334 | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
b1b2de81
C
335 C2("\n"^info.Common.str^"\n")
336 | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
337
ae4735db 338let expand_mcode toks =
34e49164
C
339 let toks_out = ref [] in
340
341 let index = ref 0 in
342
ae4735db 343 let add_elem t minus =
34e49164 344 match t with
ae4735db 345 | Fake1 info ->
34e49164 346 let str = Ast_c.str_of_info info in
b1b2de81 347 if str =$= ""
8babbc8f 348 then push2 (Fake2 minus) toks_out
c491d8ee
C
349 (* fx the fake "," at the end of a structure or enum.
350 no idea what other fake info there can be... *)
351 else push2 (Comma str) toks_out
ae4735db
C
352
353
002099fc 354 | T1 tok ->
b1b2de81 355 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
34e49164 356 (* no tag on expandedTok ! *)
fc1ad971
C
357 let modified = function
358 None -> false
359 | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
360 | _ -> true in
361 (if (TH.is_expanded tok &&
362 modified !((TH.info_of_tok tok).cocci_tag)
363 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
0708f913
C
364 then
365 failwith
366 (Printf.sprintf
367 "expanded token %s on line %d is either modified or stored in a metavariable"
368 (TH.str_of_tok tok) (TH.line_of_tok tok)));
34e49164 369
ae4735db 370 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
34e49164
C
371 { i with cocci_tag = ref Ast_c.emptyAnnot; }
372 ) in
373
ae4735db 374 let optindex =
34e49164
C
375 if TH.is_origin tok && not (TH.is_real_comment tok)
376 then begin
377 incr index;
378 Some !index
379 end
380 else None
381 in
382
383 push2 (T2 (tok', minus, optindex)) toks_out
384 in
385
ae4735db
C
386 let expand_info t =
387 let (mcode,env) =
708f4980 388 Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
34e49164 389
5636bb2c
C
390 let pr_cocci s ln col rcol hint =
391 push2 (Cocci2(s,ln,col,rcol,hint)) toks_out in
ae4735db 392 let pr_c info =
b1b2de81
C
393 (match Ast_c.pinfo_of_info info with
394 Ast_c.AbstractLineTok _ ->
395 push2 (C2 (Ast_c.str_of_info info)) toks_out
396 | Ast_c.FakeTok (s,_) ->
397 push2 (C2 s) toks_out
34e49164
C
398 | _ ->
399 Printf.printf "line: %s\n" (Common.dump info);
b1b2de81
C
400 failwith "not an abstract line");
401 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
402 List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
403
708f4980 404 let pr_barrier ln col = (* marks a position, used around C code *)
5636bb2c 405 push2 (Cocci2("",ln,col,col,None)) toks_out in
708f4980 406 let pr_nobarrier ln col = () in (* not needed for linux spacing *)
b1b2de81 407
708f4980 408 let pr_cspace _ = push2 (C2 " ") toks_out in
34e49164 409
708f4980
C
410 let pr_space _ = () (* rely on add_space in cocci code *) in
411 let pr_arity _ = () (* not interested *) in
34e49164
C
412
413 let indent _ = push2 Indent_cocci2 toks_out in
c3e37e97 414 let unindent x = push2 (Unindent_cocci2 x) toks_out in
993936c0 415 let eat_space _ = push2 EatSpace2 toks_out in
34e49164 416
708f4980
C
417 let args_pp =
418 (env, pr_cocci, pr_c, pr_cspace,
419 (match !Flag_parsing_c.spacing with
420 Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
421 pr_arity,
422 (match !Flag_parsing_c.spacing with
423 Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
993936c0 424 indent, unindent, eat_space) in
34e49164 425
ae4735db
C
426 (* old: when for yacfe with partial cocci:
427 * add_elem t false;
485bce71 428 *)
34e49164 429
485bce71 430 (* patch: when need full coccinelle transformation *)
faf9a90c 431 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
34e49164 432 match mcode with
ae4735db 433 | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
34e49164 434 (* Why adding ? because I want to have all the information, the whole
ae4735db 435 * set of tokens, so I can then process and remove the
34e49164 436 * is_between_two_minus for instance *)
708f4980 437 add_elem t (Min (inst,adj));
8babbc8f
C
438 (match any_xxs with
439 Ast_cocci.NOREPLACEMENT -> ()
440 | Ast_cocci.REPLACEMENT(any_xxs,_) ->
441 unparser any_xxs Unparse_cocci.InPlace)
ae4735db 442 | Ast_cocci.CONTEXT (_,any_befaft) ->
34e49164 443 (match any_befaft with
ae4735db 444 | Ast_cocci.NOTHING ->
708f4980 445 add_elem t Ctx
951c7801 446 | Ast_cocci.BEFORE (xxs,_) ->
faf9a90c 447 unparser xxs Unparse_cocci.Before;
708f4980 448 add_elem t Ctx
ae4735db 449 | Ast_cocci.AFTER (xxs,_) ->
708f4980 450 add_elem t Ctx;
faf9a90c 451 unparser xxs Unparse_cocci.After;
ae4735db 452 | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
faf9a90c 453 unparser xxs Unparse_cocci.Before;
708f4980 454 add_elem t Ctx;
faf9a90c 455 unparser yys Unparse_cocci.After;
34e49164 456 )
951c7801 457 | Ast_cocci.PLUS _ -> raise Impossible
34e49164
C
458
459 in
460
461 toks +> List.iter expand_info;
462 List.rev !toks_out
ae4735db 463
34e49164
C
464
465(*****************************************************************************)
466(* Tokens2 processing, filtering, adjusting *)
467(*****************************************************************************)
468
002099fc 469let is_space = function
fc1ad971 470 | T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *)
ae4735db 471 | _ -> false
fc1ad971
C
472
473let is_newline = function
474 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
475 | _ -> false
476
477let is_whitespace = function
ae4735db 478 | (T2 (t,_b,_i)) ->
002099fc
C
479 (match t with
480 | Parser_c.TCommentSpace _ -> true (* only whitespace *)
fc1ad971 481 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
002099fc
C
482 | _ -> false
483 )
ae4735db 484 | _ -> false
002099fc 485
34e49164 486let is_minusable_comment = function
ae4735db 487 | (T2 (t,_b,_i)) ->
34e49164
C
488 (match t with
489 | Parser_c.TCommentSpace _ (* only whitespace *)
ae4735db 490 (* patch: coccinelle *)
002099fc 491 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
7f004419 492 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
002099fc
C
493 | Parser_c.TComment _
494 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
b1b2de81
C
495 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
496 | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
34e49164
C
497 -> true
498
b1b2de81 499 | Parser_c.TCommentMisc _
0708f913 500 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
34e49164
C
501 -> false
502
503 | _ -> false
504 )
ae4735db 505 | _ -> false
34e49164 506
fc1ad971 507let is_minusable_comment_nocpp = function
ae4735db 508 | (T2 (t,_b,_i)) ->
fc1ad971
C
509 (match t with
510 | Parser_c.TCommentSpace _ (* only whitespace *)
ae4735db 511 (* patch: coccinelle *)
fc1ad971 512 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
7f004419 513 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
fc1ad971
C
514 | Parser_c.TComment _ -> true
515 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
516 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
517 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
518 -> false
519
520 | Parser_c.TCommentMisc _
521 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
522 -> false
523
524 | _ -> false
525 )
ae4735db 526 | _ -> false
fc1ad971 527
34e49164 528let all_coccis = function
993936c0
C
529 Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _
530 | EatSpace2 -> true
34e49164
C
531 | _ -> false
532
b1b2de81
C
533(*previously gave up if the first character was a newline, but not clear why*)
534let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
34e49164 535
708f4980 536let set_minus_comment adj = function
ae4735db 537 | T2 (t,Ctx,idx) ->
34e49164
C
538 let str = TH.str_of_tok t in
539 (match t with
540 | Parser_c.TCommentSpace _
ae4735db 541(* patch: coccinelle *)
34e49164
C
542 | Parser_c.TCommentNewline _ -> ()
543
ae4735db
C
544 | Parser_c.TComment _
545 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
b1b2de81
C
546 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
547 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
ae4735db 548 ->
b1b2de81
C
549 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
550 (TH.line_of_tok t) str)
34e49164
C
551 | _ -> raise Impossible
552 );
708f4980 553 T2 (t, Min adj, idx)
ae4735db 554(* patch: coccinelle *)
fc1ad971 555 | T2 (t,Min adj,idx) as x -> x
8babbc8f 556 | Fake2 _ as x -> x
34e49164
C
557 | _ -> raise Impossible
558
708f4980 559let set_minus_comment_or_plus adj = function
993936c0
C
560 Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _
561 | EatSpace2 as x -> x
708f4980 562 | x -> set_minus_comment adj x
34e49164 563
fc1ad971
C
564let drop_minus xs =
565 xs +> Common.exclude (function
566 | T2 (t,Min adj,_) -> true
567 | _ -> false
568 )
569
8babbc8f 570let drop_expanded xs =
5626f154 571 xs +> Common.exclude (function
34e49164 572 | T2 (t,_,_) when TH.is_expanded t -> true
8babbc8f
C
573 | _ -> false
574 )
575
576let drop_fake xs =
577 xs +> Common.exclude (function
578 | Fake2 _ -> true
34e49164
C
579 | _ -> false
580 )
5626f154
C
581
582let remove_minus_and_between_and_expanded_and_fake xs =
583
8babbc8f
C
584 (* get rid of expanded tok *)
585 let xs = drop_expanded xs in
34e49164 586
113803cf 587 let minus_or_comment = function
708f4980 588 T2(_,Min adj,_) -> true
113803cf
C
589 | x -> is_minusable_comment x in
590
fc1ad971
C
591 let minus_or_comment_nocpp = function
592 T2(_,Min adj,_) -> true
593 | x -> is_minusable_comment_nocpp x in
594
595 let common_adj (index1,adj1) (index2,adj2) =
8babbc8f
C
596 let same_adj = (* same adjacency info *)
597 match (adj1,adj2) with
598 (Ast_cocci.ADJ adj1,Ast_cocci.ADJ adj2) -> adj1 = adj2
599 | (Ast_cocci.ALLMINUS,_) | (_,Ast_cocci.ALLMINUS) -> true in
600 same_adj &&
fc1ad971
C
601 (* non-empty intersection of witness trees *)
602 not ((Common.inter_set index1 index2) = []) in
603
fc1ad971
C
604 (* new idea: collects regions not containing non-space context code
605 if two adjacent adjacent minus tokens satisfy common_adj then delete
606 all spaces, comments etc between them
607 if two adjacent minus tokens do not satisfy common_adj only delete
608 the spaces between them if there are no comments, etc.
609 if the region contain no plus code and is both preceded and followed
610 by a newline, delete the initial newline. *)
611
612 let rec adjust_around_minus = function
34e49164 613 [] -> []
708f4980 614 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
8babbc8f 615 ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
fc1ad971
C
616 let (minus_list,rest) = Common.span not_context (t1::xs) in
617 let contains_plus = List.exists is_plus minus_list in
618 let x =
619 match List.rev minus_list with
620 (T2(Parser_c.TCommentNewline c,_b,_i))::rest
621 when List.for_all minus_or_comment minus_list ->
622 set_minus_comment_or_plus adj1 x
623 | _ -> x in
624 x :: adjust_within_minus contains_plus minus_list @
625 adjust_around_minus rest
8babbc8f 626 | ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
fc1ad971
C
627 let (minus_list,rest) = Common.span not_context (t1::xs) in
628 let contains_plus = List.exists is_plus minus_list in
629 adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
c491d8ee
C
630 | x::xs ->
631 x :: adjust_around_minus xs
fc1ad971 632 and adjust_within_minus cp (* contains plus *) = function
8babbc8f 633 ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
fc1ad971
C
634 let not_minus = function T2(_,Min _,_) -> false | _ -> true in
635 let (not_minus_list,rest) = Common.span not_minus xs in
636 t1 ::
34e49164 637 (match rest with
8babbc8f
C
638 ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs
639 when common_adj adj1 adj2 ->
fc1ad971
C
640 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
641 @ (adjust_within_minus cp (t2::xs))
8babbc8f 642 | ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs ->
5626f154 643 if not cp && List.for_all is_whitespace not_minus_list
fc1ad971
C
644 then
645 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
646 @ (adjust_within_minus cp (t2::xs))
5626f154
C
647 else
648 not_minus_list @ (adjust_within_minus cp (t2::xs))
fc1ad971
C
649 | _ ->
650 if cp
651 then xs
652 else
8babbc8f
C
653 (* remove spaces after removed stuff, eg a comma after a
654 function argument *)
fc1ad971
C
655 let (spaces,rest) = Common.span is_space xs in
656 (List.map (set_minus_comment_or_plus adj1) spaces)
657 @ rest)
658 | xs -> failwith "should always start with minus"
659 and not_context = function
660 (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
661 | _ -> true
662 and is_plus = function
c491d8ee 663 C2 _ | Comma _ | Cocci2 _ -> true
fc1ad971 664 | _ -> false in
34e49164 665
fc1ad971 666 let xs = adjust_around_minus xs in
34e49164 667
8babbc8f
C
668 (* get rid of fake tok *)
669 let xs = drop_fake xs in
670
113803cf 671 (* this drops blank lines after a brace introduced by removing code *)
fc1ad971
C
672 let minus_or_comment_nonl = function
673 T2(_,Min adj,_) -> true
674 | T2(Parser_c.TCommentNewline _,_b,_i) -> false
675 | x -> is_minusable_comment x in
676
113803cf
C
677 let rec adjust_after_brace = function
678 [] -> []
708f4980 679 | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
b1b2de81 680 when str_of_token2 x =$= "{" ->
fc1ad971 681 let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
113803cf
C
682 let is_whitespace = function
683 T2(Parser_c.TCommentSpace _,_b,_i)
684 (* patch: cocci *)
685 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
686 | _ -> false in
687 let (newlines,rest) = Common.span is_whitespace rest in
688 let (drop_newlines,last_newline) =
689 let rec loop = function
690 [] -> ([],[])
691 | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
692 (List.rev rest,[x])
693 | x::xs ->
694 let (drop_newlines,last_newline) = loop xs in
695 (drop_newlines,x::last_newline) in
696 loop (List.rev newlines) in
708f4980 697 x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
113803cf
C
698 last_newline@
699 adjust_after_brace rest
700 | x::xs -> x::adjust_after_brace xs in
701
702 let xs = adjust_after_brace xs in
703
fc1ad971
C
704 (* search backwards from context } over spaces until reaching a newline.
705 then go back over all minus code until reaching some context or + code.
706 get rid of all intervening spaces, newlines, and comments
707 input is reversed *)
708 let rec adjust_before_brace = function
709 [] -> []
710 | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
711 let (outer_spaces,rest) = Common.span is_space xs in
712 x :: outer_spaces @
713 (match rest with
714 ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
715 (* the rest of this code is the same as from_newline below
716 but merging them seems to be error prone... *)
717 ((T2 (t, Min adj, idx)) as m) :: rest ->
718 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
719 h :: m ::
720 (List.map (set_minus_comment adj) spaces) @
721 (adjust_before_brace rest)
722 | _ -> adjust_before_brace rest)
723 | x::xs -> x :: (adjust_before_brace xs) in
724
725 let from_newline = function
726 ((T2 (t, Min adj, idx)) as m) :: rest ->
727 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
728 m ::
729 (List.map (set_minus_comment adj) spaces) @
730 (adjust_before_brace rest)
c491d8ee
C
731 | ((T2 (t0, Ctx, idx0)) as m0) :: ((T2 (t, Min adj, idx)) as m) :: rest
732 when TH.str_of_tok t0 = "" ->
733 (* This is for the case of a #define that is completely deleted,
734 because a #define has a strange EOL token at the end.
735 We hope there i no other kind of token that is represented by
736 "", but it seems like changing the kind of token might break
737 the end of entity recognition in the C parser.
738 See parsing_hacks.ml *)
739 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
740 m0 :: m ::
741 (List.map (set_minus_comment adj) spaces) @
742 (adjust_before_brace rest)
fc1ad971
C
743 | rest -> adjust_before_brace rest in
744
745 let xs = List.rev (from_newline (List.rev xs)) in
746 let xs = drop_minus xs in
34e49164
C
747 xs
748
993936c0
C
749(* things that should not be followed by space - boundary between SmPL
750 code and C code *)
751let adjust_eat_space toks =
752 let rec loop = function
753 [] -> []
754 | EatSpace2 :: x :: rest when is_space x -> loop rest
755 | EatSpace2 :: rest -> loop rest
756 | x :: xs -> x :: loop xs in
757 loop toks
758
faf9a90c
C
759(* normally, in C code, a semicolon is not preceded by a space or newline *)
760let adjust_before_semicolon toks =
761 let toks = List.rev toks in
002099fc 762 let rec search_semic = function
faf9a90c 763 [] -> []
002099fc 764 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
faf9a90c 765 if List.mem (str_of_token2 x) [";";")";","]
8babbc8f 766 then x :: search_semic (search_minus false xs)
002099fc
C
767 else x :: search_semic xs
768 | x::xs -> x :: search_semic xs
769 and search_minus seen_minus xs =
770 let (spaces, rest) = Common.span is_space xs in
771 (* only delete spaces if something is actually deleted *)
772 match rest with
773 ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
774 | _ -> if seen_minus then rest else xs in
775 List.rev (search_semic toks)
34e49164 776
8babbc8f
C
777(* normally, in C code, a ( is not followed by a space or newline *)
778let adjust_after_paren toks =
779 let rec search_paren = function
780 [] -> []
781 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
782 if List.mem (str_of_token2 x) ["("] (* other things? *)
783 then x :: search_paren(search_minus false xs)
784 else x :: search_paren xs
785 | x::xs -> x :: search_paren xs
786 and search_minus seen_minus xs =
65038c61 787 let (spaces, rest) = Common.span is_whitespace xs in
8babbc8f
C
788 (* only delete spaces if something is actually deleted *)
789 match rest with
790 ((T2(_,Min _,_)) as a)::rerest -> (* minus *)
791 a :: search_minus true rerest
17ba0788 792 | ((T2(_,Ctx,_)) as a)::rerest when seen_minus && str_of_token2 a = "," ->
8babbc8f
C
793 (* comma after ( will be deleted, so consider it as minus code
794 already *)
795 a :: search_minus true rerest
796 | _ -> if seen_minus then rest else xs in (* drop trailing space *)
797 search_paren toks
798
65038c61
C
799(* this is for the case where braces are added around an if branch *)
800let paren_then_brace toks =
801 let rec search_paren = function
802 [] -> []
803 | ((T2(_,Ctx,_)) as x)::xs ->
804 if List.mem (str_of_token2 x) [")"]
805 then x :: search_paren(search_plus xs)
806 else x :: search_paren xs
807 | x::xs -> x :: search_paren xs
808 and search_plus xs =
809 let (spaces, rest) = Common.span is_whitespace xs in
810 match rest with
811 (* move the brace up to the previous line *)
812 ((Cocci2("{",_,_,_,_)) as x) :: (((Cocci2 _) :: _) as rest) ->
813 (C2 " ") :: x :: spaces @ rest
814 | _ -> xs in
815 search_paren toks
816
34e49164
C
817let is_ident_like s = s ==~ Common.regexp_alpha
818
aba5c457
C
819let rec drop_space_at_endline = function
820 [] -> []
821 | [x] -> [x]
190f1acf 822 | (C2 " ") ::
8f657093
C
823 ((((T2(Parser_c.TCommentSpace _,Ctx,_)) | Cocci2("\n",_,_,_,_) |
824 (T2(Parser_c.TCommentNewline _,Ctx,_))) :: _) as rest) ->
190f1acf
C
825 (* when unparse_cocci doesn't know whether space is needed *)
826 drop_space_at_endline rest
aba5c457
C
827 | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest ->
828 let (outer_spaces,rest) = Common.span is_space rest in
829 let minus_or_comment_or_space_nocpp = function
830 T2(_,Min adj,_) -> true
831 | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true
18b1275a
C
832 | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false
833 | x -> false in
aba5c457 834 let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in
18b1275a
C
835 let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
836 if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus
837 then
838 match rest with
839 ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest ->
840 (* drop trailing spaces *)
841 minus@a::(drop_space_at_endline rest)
842 | _ -> fail()
843 else fail()
8f657093
C
844 | a :: rest ->
845 a :: drop_space_at_endline rest
aba5c457
C
846
847(* if a removed ( is between two tokens, then add a space *)
848let rec paren_to_space = function
849 [] -> []
850 | [x] -> [x]
851 | [x;y] -> [x;y]
852 | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest
853 when not (is_whitespace a) && TH.str_of_tok t = "(" ->
aba5c457
C
854 a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
855 | a :: rest -> a :: (paren_to_space rest)
856
ae4735db 857let rec add_space xs =
34e49164
C
858 match xs with
859 | [] -> []
860 | [x] -> [x]
5636bb2c 861 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
708f4980
C
862 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
863 not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
864 (* this only works within a line. could consider whether
865 something should be done to add newlines too, rather than
866 printing them explicitly in unparse_cocci. *)
867 x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
65038c61
C
868 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
869 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
870 not (lnx = -1) && lnx < lny && not (rcolx = -1) ->
871 (* this only works within a line. could consider whether
872 something should be done to add newlines too, rather than
873 printing them explicitly in unparse_cocci. *)
874 x::C2 (String.make (lny-lnx) '\n')::
875 C2 (String.make (lcoly-1) ' '):: (* -1 is for the + *)
876 add_space (y::xs)
1eddfd50
C
877 | ((T2(_,Ctx,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *)
878 let sx = str_of_token2 x in
879 let sy = str_of_token2 y in
880 if is_ident_like sx && (is_ident_like sy or List.mem sy ["="])
881 then x::C2 " "::(add_space (y::xs))
882 else x::(add_space (y::xs))
883 | x::y::xs -> (* not boundary, not sure if it is possible *)
34e49164
C
884 let sx = str_of_token2 x in
885 let sy = str_of_token2 y in
faf9a90c 886 if is_ident_like sx && is_ident_like sy
34e49164
C
887 then x::C2 " "::(add_space (y::xs))
888 else x::(add_space (y::xs))
889
c491d8ee
C
890(* A fake comma is added at the end of an unordered initlist or a enum
891decl, if the initlist or enum doesn't already end in a comma. This is only
892needed if there is + code, ie if we see Cocci after it in the code sequence *)
893
894let rec drop_end_comma = function
895 [] -> []
896 | [x] -> [x]
897 | ((Comma ",") as x) :: rest ->
898 let (newlines,rest2) = Common.span is_whitespace rest in
899 (match rest2 with
900 (Cocci2 _) :: _ -> x :: drop_end_comma rest
901 | _ -> drop_end_comma rest)
902 | x :: xs -> x :: drop_end_comma xs
903
5636bb2c
C
904(* The following only works for the outermost function call. Stack records
905the column of all open parentheses. Space_cell contains the most recent
906comma in the outermost function call. The goal is to decide whether this
907should be followed by a space or a newline and indent. *)
908let add_newlines toks tabbing_unit =
909 let create_indent n =
910 let (tu,tlen) =
911 match tabbing_unit with
912 Some ("\t",_) -> ("\t",8)
913 | Some ("",_) -> ("\t",8) (* not sure why... *)
914 | Some (s,_) -> (s,String.length s) (* assuming only spaces *)
915 | None -> ("\t",8) in
916 let rec loop seen =
917 if seen + tlen <= n
918 then tu ^ loop (seen + tlen)
919 else String.make (n-seen) ' ' in
920 loop 0 in
921 let check_for_newline count x = function
922 Some (start,space_cell) when count > Flag_parsing_c.max_width ->
923 space_cell := "\n"^(create_indent x);
924 Some (x + (count - start))
925 | _ -> None in
926 (* the following is for strings that may contain newline *)
927 let string_length s count =
928 let l = list_of_string s in
929 List.fold_left
930 (function count ->
931 function
932 '\t' -> count + 8
933 | '\n' -> 0
934 | c -> count + 1)
935 count l in
936 let rec loop info count = function
937 [] -> []
938 | ((T2(tok,_,_)) as a)::xs ->
939 a :: loop info (string_length (TH.str_of_tok tok) count) xs
940 | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
941 let (stack,space_cell) = info in
942 let rest =
943 match hint with
f537ebc4 944 None -> loop info (string_length s count) xs
5636bb2c 945 | Some Unparse_cocci.StartBox ->
f537ebc4 946 let count = string_length s count in
5636bb2c
C
947 loop (count::stack,space_cell) count xs
948 | Some Unparse_cocci.EndBox ->
f537ebc4 949 let count = string_length s count in
5636bb2c
C
950 (match stack with
951 [x] ->
952 (match check_for_newline count x space_cell with
953 Some count -> loop ([],None) count xs
954 | None -> loop ([],None) count xs)
955 | _ -> loop (List.tl stack,space_cell) count xs)
956 | Some (Unparse_cocci.SpaceOrNewline sp) ->
f537ebc4 957 let count = string_length s (count + 1 (*space*)) in
5636bb2c
C
958 (match stack with
959 [x] ->
960 (match check_for_newline count x space_cell with
961 Some count -> loop (stack,Some (x,sp)) count xs
962 | None -> loop (stack,Some (count,sp)) count xs)
963 | _ -> loop info count xs) in
964 a :: rest
965 | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
c491d8ee 966 | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
8babbc8f 967 | Fake2 _ :: _ | Indent_cocci2 :: _
993936c0
C
968 | Unindent_cocci2 _::_ | EatSpace2::_ ->
969 failwith "unexpected fake, indent, unindent, or eatspace" in
5636bb2c
C
970 let redo_spaces prev = function
971 Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
972 C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
973 | t -> t::prev in
974 (match !Flag_parsing_c.spacing with
975 Flag_parsing_c.SMPL -> toks
976 | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)))
34e49164
C
977
978(* When insert some new code, because of a + in a SP, we must add this
979 * code at the right place, with the good indentation. So each time we
980 * encounter some spacing info, with some newline, we maintain the
981 * current indentation level used.
ae4735db 982 *
34e49164 983 * TODO problems: not accurate. ex: TODO
ae4735db 984 *
34e49164
C
985 * TODO: if in #define region, should add a \ \n
986 *)
ae4735db 987let new_tabbing2 space =
34e49164
C
988 (list_of_string space)
989 +> List.rev
b1b2de81 990 +> Common.take_until (fun c -> c =<= '\n')
34e49164
C
991 +> List.rev
992 +> List.map string_of_char
993 +> String.concat ""
994
ae4735db 995let new_tabbing a =
34e49164
C
996 Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
997
998
ae4735db 999let rec adjust_indentation xs =
aa721442 1000
65038c61 1001 let _current_tabbing = ref ([] : string list) in
34e49164
C
1002 let tabbing_unit = ref None in
1003
1004 let string_of_list l = String.concat "" (List.map string_of_char l) in
1005
1006 (* try to pick a tabbing unit for the plus code *)
1007 let adjust_tabbing_unit old_tab new_tab =
b1b2de81 1008 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
34e49164
C
1009 then
1010 let old_tab = list_of_string old_tab in
1011 let new_tab = list_of_string new_tab in
1012 let rec loop = function
1013 ([],new_tab) ->
1014 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
1015 | (_,[]) -> failwith "not possible"
1016 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
1017 loop (old_tab,new_tab) in
1018
65038c61 1019(*
34e49164
C
1020 let remtab tu current_tab =
1021 let current_tab = List.rev(list_of_string current_tab) in
1022 let rec loop = function
1023 ([],new_tab) -> string_of_list (List.rev new_tab)
65038c61
C
1024 | (_,[]) -> (-*weird; tabbing unit used up more than the current tab*-)
1025 ""
b1b2de81 1026 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
65038c61 1027 | (_,ns) -> (-* mismatch; remove what we can *-)
34e49164
C
1028 string_of_list (List.rev ns) in
1029 loop (tu,current_tab) in
65038c61 1030*)
34e49164
C
1031
1032 let rec find_first_tab started = function
1033 [] -> ()
b1b2de81 1034 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
34e49164 1035 find_first_tab true xs
485bce71 1036(* patch: coccinelle *)
34e49164
C
1037 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
1038 when started ->
1039 let s = str_of_token2 x +> new_tabbing in
1040 tabbing_unit := Some (s,List.rev (list_of_string s))
1041 | x::xs -> find_first_tab started xs in
1042 find_first_tab false xs;
1043
1eddfd50
C
1044 let rec balanced ct = function
1045 [] -> ct >= 0
1046 | ((T2(tok,_,_)) as x)::xs ->
1047 (match str_of_token2 x with
1048 "(" -> balanced (ct+1) xs
1049 | ")" -> balanced (ct-1) xs
1050 | _ -> balanced ct xs)
1051 | x::xs -> balanced ct xs in
1052
97111a47
C
1053 let update_tabbing started s x =
1054 let old_tabbing = !_current_tabbing in
1055 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := [s]);
1056 (* only trust the indentation after the first { *)
1057 if started
1058 then
1059 adjust_tabbing_unit
1060 (String.concat "" old_tabbing)
1061 (String.concat "" !_current_tabbing) in
1062
ae4735db 1063 let rec aux started xs =
34e49164
C
1064 match xs with
1065 | [] -> []
485bce71 1066(* patch: coccinelle *)
97111a47
C
1067 | ((T2 (Parser_c.TCommentNewline s,_,_)) as x)::
1068 Unindent_cocci2(false)::xs ->
1069 update_tabbing started s x;
1070 (C2 "\n")::aux started xs
8f657093
C
1071 | (Cocci2("\n",_,_,_,_))::Unindent_cocci2(false)::xs ->
1072 (C2 "\n")::aux started xs
34e49164 1073 | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
5636bb2c 1074 ((Cocci2 ("{",_,_,_,_)) as a)::xs
708f4980 1075 when started && str_of_token2 x =$= ")" ->
34e49164 1076 (* to be done for if, etc, but not for a function header *)
708f4980 1077 x::(C2 " ")::a::(aux started xs)
1eddfd50 1078 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs
97111a47
C
1079 when
1080 balanced 0 (fst(Common.span (function x -> not(is_newline x)) xs)) ->
1081 update_tabbing started s x;
34e49164
C
1082 let coccis_rest = Common.span all_coccis xs in
1083 (match coccis_rest with
b1b2de81 1084 (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
34e49164
C
1085 (* the case where cocci code has been added before a close } *)
1086 x::aux started (Indent_cocci2::xs)
1087 | _ -> x::aux started xs)
65038c61
C
1088 | Indent_cocci2::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
1089 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL ->
1090 let tu = String.make (lcoly-1) ' ' in
1091 _current_tabbing := tu::(!_current_tabbing);
1092 C2 (tu)::aux started (y::xs)
34e49164
C
1093 | Indent_cocci2::xs ->
1094 (match !tabbing_unit with
1095 None -> aux started xs
1096 | Some (tu,_) ->
65038c61
C
1097 _current_tabbing := tu::(!_current_tabbing);
1098 (* can't be C2, for later phases *)
1099 Cocci2 (tu,-1,-1,-1,None)::aux started xs)
c3e37e97 1100 | Unindent_cocci2(permanent)::xs ->
65038c61
C
1101 (match !_current_tabbing with
1102 [] -> aux started xs
1103 | _::new_tabbing ->
17ba0788 1104 let s = String.concat "" new_tabbing in
65038c61 1105 _current_tabbing := new_tabbing;
17ba0788 1106 Cocci2 (s,-1,-1,-1,None)::aux started xs)
34e49164 1107 (* border between existing code and cocci code *)
5636bb2c 1108 | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
b1b2de81 1109 when str_of_token2 x =$= "{" ->
34e49164
C
1110 x::aux true (y::Indent_cocci2::xs)
1111 | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
b1b2de81 1112 when str_of_token2 y =$= "}" ->
17ba0788 1113 x::aux started (Unindent_cocci2 true::y::xs)
34e49164 1114 (* starting the body of the function *)
b1b2de81 1115 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs
5636bb2c 1116 | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
5636bb2c 1117 | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
0708f913 1118 (* dont inline in expr because of weird eval order of ocaml *)
65038c61
C
1119 let s = String.concat "" !_current_tabbing in
1120 (* can't be C2, for later phases *)
5636bb2c 1121 x::Cocci2 (s,-1,-1,-1,None)::aux started xs
34e49164 1122 | x::xs -> x::aux started xs in
5636bb2c 1123 (aux false xs,!tabbing_unit)
34e49164
C
1124
1125
1126let rec find_paren_comma = function
1127 | [] -> ()
1128
1129 (* do nothing if was like this in original file *)
1130 | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
ae4735db 1131 ::xs when p2 =|= p1 + 1 ->
34e49164
C
1132 find_paren_comma (x2::xs)
1133
1134 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
ae4735db 1135 ::xs when p2 =|= p1 + 1 ->
34e49164
C
1136 find_paren_comma (x2::xs)
1137
1138 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
ae4735db 1139 ::xs when p2 =|= p1 + 1 ->
34e49164
C
1140 find_paren_comma (x2::xs)
1141
1142 (* otherwise yes can adjust *)
ae4735db 1143 | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
34e49164
C
1144 x2.remove <- true;
1145 find_paren_comma (x2::xs)
ae4735db 1146 | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
34e49164
C
1147 x1.remove <- true;
1148 find_paren_comma (x2::xs)
1149
ae4735db 1150 | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
34e49164
C
1151 x1.remove <- true;
1152 find_paren_comma (x2::xs)
1153
ae4735db 1154 | x::xs ->
34e49164 1155 find_paren_comma xs
34e49164 1156
ae4735db
C
1157
1158let fix_tokens toks =
34e49164
C
1159 let toks = toks +> List.map mk_token_extended in
1160
1161 let cleaner = toks +> Common.exclude (function
1162 | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
1163 | _ -> false
1164 ) in
1165 find_paren_comma cleaner;
1166
1167 let toks = rebuild_tokens_extented toks in
1168 toks +> List.map (fun x -> x.tok2)
1169
1170
1171
1172(*****************************************************************************)
1173(* Final unparsing (and debugging support) *)
1174(*****************************************************************************)
1175
1176(* for debugging *)
1177type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
1178
1179let kind_of_token2 = function
8babbc8f 1180 | Fake2 _ -> KFake
34e49164
C
1181 | Cocci2 _ -> KCocci
1182 | C2 _ -> KC
c491d8ee 1183 | Comma _ -> KC
34e49164
C
1184 | T2 (t,_,_) ->
1185 (match TH.pinfo_of_tok t with
1186 | ExpandedTok _ -> KExpanded
1187 | OriginTok _ -> KOrigin
1188 | FakeTok _ -> raise Impossible (* now a Fake2 *)
1189 | AbstractLineTok _ -> raise Impossible (* now a KC *)
1190 )
993936c0 1191 | Unindent_cocci2 _ | Indent_cocci2 | EatSpace2 -> raise Impossible
ae4735db 1192
34e49164
C
1193let end_mark = "!"
1194
1195let start_mark = function
1196 | KFake -> "!F!"
ae4735db 1197 | KCocci -> "!S!"
34e49164
C
1198 | KC -> "!A!"
1199 | KExpanded -> "!E!"
1200 | KOrigin -> ""
1201
1202let print_all_tokens2 pr xs =
1203 if !Flag_parsing_c.debug_unparsing
1204 then
1205 let current_kind = ref KOrigin in
ae4735db 1206 xs +> List.iter (fun t ->
34e49164 1207 let newkind = kind_of_token2 t in
b1b2de81 1208 if newkind =*= !current_kind
34e49164
C
1209 then pr (str_of_token2 t)
1210 else begin
1211 pr (end_mark);
1212 pr (start_mark newkind);
1213 pr (str_of_token2 t);
1214 current_kind := newkind
1215 end
1216 );
ae4735db 1217 else
34e49164 1218 xs +> List.iter (fun x -> pr (str_of_token2 x))
ae4735db 1219
34e49164
C
1220
1221
1222
1223(*****************************************************************************)
1224(* Entry points *)
1225(*****************************************************************************)
1226
1227(* old: PPviatok was made in the beginning to allow to pretty print a
1228 * complete C file, including a modified C file by transformation.ml,
1229 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1230 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1231 * fancy stuff when a function was not modified at all. Just need to
1232 * print the list of token as-is. But now pretty_print_c.ml handles
1233 * almost everything so maybe less useful. Maybe PPviatok allows to
ae4735db
C
1234 * optimize a little the pretty printing.
1235 *
34e49164
C
1236 * update: now have PPviastr which goes even faster than PPviatok, so
1237 * PPviatok has disappeared.
1238 *)
1239
1240type ppmethod = PPnormal | PPviastr
1241
1242
1243
1244
1245(* The pp_program function will call pretty_print_c.ml with a special
ae4735db 1246 * function to print the leaf components, the tokens. When we want to
34e49164 1247 * print a token, we need to print also maybe the space and comments that
ae4735db 1248 * were close to it in the original file (and that was omitted during the
34e49164
C
1249 * parsing phase), and honor what the cocci-info attached to the token says.
1250 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
ae4735db 1251 * print it and also print some cocci-code attached in a PLUS to it.
34e49164
C
1252 * So we will also maybe call unparse_cocci. Because the cocci-code may
1253 * contain metavariables, unparse_cocci will in fact sometimes call back
1254 * pretty_print_c (which will this time don't call back again unparse_cocci)
1255 *)
1256
ae4735db
C
1257let pp_program2 xs outfile =
1258 Common.with_open_outfile outfile (fun (pr,chan) ->
1259 let pr s =
1260 if !Flag_parsing_c.debug_unparsing
34e49164 1261 then begin pr2_no_nl s; flush stderr end
ae4735db 1262 else pr s
34e49164
C
1263 (* flush chan; *)
1264 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1265 in
ae4735db
C
1266
1267 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
34e49164
C
1268 (* here can still work on ast *)
1269 let e = remove_useless_fakeInfo_struct e in
ae4735db 1270
34e49164 1271 match ppmethod with
002099fc 1272 | PPnormal ->
34e49164 1273 (* now work on tokens *)
34e49164 1274 (* phase1: just get all the tokens, all the information *)
ae4735db 1275 assert(toks_e +> List.for_all (fun t ->
34e49164
C
1276 TH.is_origin t or TH.is_expanded t
1277 ));
1278 let toks = get_fakeInfo_and_tokens e toks_e in
1279 let toks = displace_fake_nodes toks in
1280 (* assert Origin;ExpandedTok;Faketok *)
1281 let toks = expand_mcode toks in
c491d8ee 1282
34e49164
C
1283 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1284 * and no tag information, just NOTHING. *)
fc1ad971
C
1285
1286 let toks =
1287 if !Flag.sgrep_mode2
5626f154
C
1288 then
1289 (* nothing else to do for sgrep *)
8babbc8f 1290 drop_expanded(drop_fake(drop_minus toks))
fc1ad971
C
1291 else
1292 (* phase2: can now start to filter and adjust *)
65038c61 1293 (let (toks,tu) = adjust_indentation toks in
993936c0 1294 let toks = adjust_eat_space toks in
fc1ad971 1295 let toks = adjust_before_semicolon toks in(*before remove minus*)
8babbc8f 1296 let toks = adjust_after_paren toks in(*also before remove minus*)
aba5c457
C
1297 let toks = drop_space_at_endline toks in
1298 let toks = paren_to_space toks in
c491d8ee 1299 let toks = drop_end_comma toks in
fc1ad971
C
1300 let toks = remove_minus_and_between_and_expanded_and_fake toks in
1301 (* assert Origin + Cocci + C and no minus *)
1302 let toks = add_space toks in
5636bb2c 1303 let toks = add_newlines toks tu in
65038c61 1304 let toks = paren_then_brace toks in
fc1ad971 1305 let toks = fix_tokens toks in
65038c61 1306 toks) in
34e49164 1307
ae4735db 1308 (* in theory here could reparse and rework the ast! or
34e49164
C
1309 * apply some SP. Not before cos julia may have generated
1310 * not parsable file. Need do unparsing_tricks call before being
1311 * ready to reparse. *)
1312 print_all_tokens2 pr toks;
1313
1314 | PPviastr -> pr str
1315 )
1316 )
1317
ae4735db 1318let pp_program a b =
34e49164
C
1319 Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1320
485bce71 1321
ae4735db 1322let pp_program_default xs outfile =
485bce71
C
1323 let xs' = xs +> List.map (fun x -> x, PPnormal) in
1324 pp_program xs' outfile