permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
CommitLineData
0708f913 1(* Yoann Padioleau, Julia Lawall
ae4735db 2 *
17ba0788
C
3 * Copyright (C) 2012, INRIA.
4 * Copyright (C) 2010, 2011, University of Copenhagen DIKU and INRIA.
0708f913 5 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
34e49164
C
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License (GPL)
9 * version 2 as published by the Free Software Foundation.
ae4735db 10 *
34e49164
C
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * file license.txt for more details.
ae4735db
C
15 *
16 *
485bce71 17 * Modifications by Julia Lawall for better newline handling.
34e49164
C
18 *)
19open Common
20
34e49164
C
21module TH = Token_helpers
22
b1b2de81
C
23(* should keep comments and directives in between adjacent deleted terms,
24but not comments and directives within deleted terms. should use the
25labels found in the control-flow graph *)
26
34e49164
C
27
28
708f4980
C
29(*****************************************************************************)
30(* Wrappers *)
31(*****************************************************************************)
abad11c5 32let pr2, pr2_once = mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
708f4980 33
34e49164
C
34(*****************************************************************************)
35(* Types used during the intermediate phases of the unparsing *)
36(*****************************************************************************)
37
ae4735db 38type token1 =
abad11c5 39 | Fake1 of Ast_c.info
34e49164
C
40 | T1 of Parser_c.token
41
42(* The cocci_tag of the token should always be a NOTHING. The mark of
43 * the token can only be OriginTok or ExpandedTok. Why not get rid of
44 * token and get something simpler ? because we need to know if the
45 * info is a TCommentCpp or TCommentSpace, etc for some of the further
46 * analysis so easier to keep with the token.
ae4735db 47 *
34e49164
C
48 * This type contains the whole information. Have all the tokens with this
49 * type.
50 *)
708f4980 51type min =
abad11c5
C
52 | Min of (int list (* match numbers from witness trees *) *
53 Ast_cocci.adjacency (* adjacency information *))
708f4980
C
54 | Ctx
55
ae4735db 56type token2 =
abad11c5
C
57 | T2 of Parser_c.token * min
58 * int option (* orig index, abstracting away comments and space *)
59 * Unparse_cocci.nlhint option
8babbc8f 60 | Fake2 of min
708f4980 61 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
abad11c5 62 * Unparse_cocci.nlhint option
34e49164 63 | C2 of string
c491d8ee 64 | Comma of string
34e49164 65 | Indent_cocci2
c3e37e97 66 | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
993936c0 67 | EatSpace2
34e49164
C
68
69(* not used yet *)
ae4735db 70type token3 =
34e49164
C
71 | T3 of Parser_c.token
72 | Cocci3 of string
73 | C3 of string
74
75
76(* similar to the tech in parsing_hack *)
abad11c5
C
77type token_extended =
78 { tok2 : token2;
79 str : string;
80 idx : int option; (* to know if 2 tokens were consecutive in orig file *)
81 mutable new_tokens_before : token2 list;
82 mutable remove : bool;
83 }
34e49164
C
84
85
86(*****************************************************************************)
87(* Helpers *)
88(*****************************************************************************)
89
ae4735db 90let info_of_token1 t =
34e49164
C
91 match t with
92 | Fake1 info -> info
93 | T1 tok -> TH.info_of_tok tok
94
002099fc 95let print_token1 = function
abad11c5 96 | T1 tok -> TH.str_of_tok tok
002099fc
C
97 | Fake1 info -> "fake"
98
34e49164 99let str_of_token2 = function
abad11c5
C
100 | T2 (t,_,_,_) -> TH.str_of_tok t
101 | Cocci2 (s,_,_,_,_)
102 | C2 s
c491d8ee 103 | Comma s -> s
abad11c5
C
104 | Fake2 _
105 | Indent_cocci2
106 | Unindent_cocci2 _
993936c0 107 | EatSpace2 -> ""
34e49164
C
108
109let print_token2 = function
abad11c5
C
110 | T2 (t,b,_,_) ->
111 let t_str =
112 match t with
113 | Parser_c.TCommentSpace _ -> " sp "
114 | Parser_c.TCommentNewline _ -> " nl "
115 | Parser_c.TCommentCpp _ -> " cp "
116 | Parser_c.TCommentMisc _ -> " misc "
117 | Parser_c.TComment _ -> " comment "
118 | _ -> "" in
119 let b_str =
120 match b with
121 | Min (index,adj) ->
122 Printf.sprintf "-.%d[%s]"
123 (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
124 (String.concat " " (List.map string_of_int index))
125 | Ctx -> "" in
126 "T2:"^b_str^t_str^TH.str_of_tok t
8babbc8f 127 | Fake2 b ->
abad11c5
C
128 let b_str =
129 match b with
130 | Min (index,adj) ->
131 Printf.sprintf "-%d[%s]"
132 (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
133 (String.concat " " (List.map string_of_int index))
134 | Ctx -> "" in
135 b_str^"fake"
5636bb2c 136 | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
34e49164 137 | C2 s -> "C2:"^s
c491d8ee 138 | Comma s -> "Comma:"^s
34e49164 139 | Indent_cocci2 -> "Indent"
c3e37e97 140 | Unindent_cocci2 _ -> "Unindent"
993936c0 141 | EatSpace2 -> "EatSpace"
34e49164 142
abad11c5
C
143(*
144let simple_print_all_tokens pr l =
145 List.iter (function x -> Printf.printf "|%s| " (pr x)) l;
34e49164 146 Printf.printf "\n"
abad11c5 147*)
34e49164
C
148
149let str_of_token3 = function
150 | T3 t -> TH.str_of_tok t
151 | Cocci3 s | C3 s -> s
152
153
154
ae4735db
C
155let mk_token_extended x =
156 let origidx =
34e49164 157 match x with
abad11c5
C
158 | T2 (_,_,idx,_) -> idx
159 | _ -> None in
ae4735db 160 { tok2 = x;
34e49164
C
161 str = str_of_token2 x;
162 idx = origidx;
163 new_tokens_before = [];
164 remove = false;
165 }
166
ae4735db 167let rebuild_tokens_extented toks_ext =
34e49164 168 let _tokens = ref [] in
ae4735db 169 toks_ext +> List.iter (fun tok ->
34e49164
C
170 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
171 if not tok.remove then push2 tok.tok2 _tokens;
abad11c5 172 );
34e49164
C
173 let tokens = List.rev !_tokens in
174 (tokens +> List.map mk_token_extended)
175
176
177let mcode_contain_plus = function
178 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
179 | Ast_cocci.CONTEXT _ -> true
abad11c5 180 (* patch: when need full coccinelle transformation *)
8babbc8f
C
181 | Ast_cocci.MINUS (_,_,_,Ast_cocci.NOREPLACEMENT) -> false
182 | Ast_cocci.MINUS (_,_,_,Ast_cocci.REPLACEMENT _) -> true(*REPL is not empty*)
abad11c5 183 | Ast_cocci.PLUS _ -> raise (Impossible 132)
34e49164 184
ae4735db 185let contain_plus info =
34e49164
C
186 let mck = Ast_c.mcode_of_info info in
187 mcode_contain_plus mck
188
189(*****************************************************************************)
190(* Last fix on the ast *)
191(*****************************************************************************)
192
ae4735db 193(* Because of the ugly trick to handle initialiser, I generate fake ','
34e49164
C
194 * for the last initializer element, but if there is nothing around it,
195 * I don't want in the end to print it.
196 *)
197
ae4735db 198let remove_useless_fakeInfo_struct program =
34e49164 199 let bigf = { Visitor_c.default_visitor_c_s with
ae4735db 200 Visitor_c.kini_s = (fun (k,bigf) ini ->
34e49164 201 match k ini with
abad11c5
C
202 | Ast_c.InitList args, ii ->
203 (match ii with
204 | [_;_] -> ini
205 | i1 :: i2 :: iicommaopt :: tl when
206 (not (contain_plus iicommaopt))
207 && (not (contain_plus i2))
208 && (Ast_c.is_fake iicommaopt) ->
209 (* sometimes the guy put a normal iicommaopt *)
210 Ast_c.InitList args, (i1 :: i2 :: tl)
211 | ii -> Ast_c.InitList args, ii
212 )
213 | x -> x)
214 } in
34e49164
C
215 Visitor_c.vk_toplevel_s bigf program
216
217
218(*****************************************************************************)
219(* Tokens1 generation *)
220(*****************************************************************************)
221
ae4735db 222let get_fakeInfo_and_tokens celem toks =
1b9ae606 223
ae4735db 224 let toks_in = ref toks in
34e49164
C
225 let toks_out = ref [] in
226
227 (* todo? verify good order of position ? *)
ae4735db 228 let pr_elem info =
34e49164 229 match Ast_c.pinfo_of_info info with
abad11c5
C
230 | Ast_c.FakeTok _ ->
231 push2 (Fake1 info) toks_out
232 | Ast_c.OriginTok _ | Ast_c.ExpandedTok _ ->
1b9ae606 233
abad11c5
C
234 (* get the associated comments/space/cppcomment tokens *)
235 let (before, x, after) =
236 !toks_in +> split_when (fun tok ->
237 info =*= TH.info_of_tok tok)
238 in
239 assert(info =*= TH.info_of_tok x);
240 (*old: assert(before +> List.for_all (TH.is_comment)); *)
241 before +> List.iter (fun x ->
242 if not (TH.is_comment x)
243 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
244 (* case such as int asm d3("x"); not yet in ast *)
34e49164 245 );
abad11c5 246 before +> List.iter (fun x -> push2 (T1 x) toks_out);
1b9ae606 247
abad11c5
C
248 push2 (T1 x) toks_out;
249 toks_in := after;
250 | Ast_c.AbstractLineTok _ ->
251 (* can be called on type info when for instance use -type_c *)
252 if !Flag_parsing_c.pretty_print_type_info
253 then push2 (Fake1 info) toks_out
254 else raise (Impossible 134) (* at this stage *)
34e49164
C
255 in
256
257 let pr_space _ = () in (* use the spacing that is there already *)
258
259 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
260
261 if not (null !toks_in)
0708f913 262 then failwith "WEIRD: unparsing not finished";
34e49164
C
263
264 List.rev !toks_out
265
8babbc8f 266(* Fake nodes that have BEFORE code or are - should be moved over any subsequent
34e49164 267whitespace and newlines, but not any comments, to get as close to the affected
002099fc 268code as possible. Similarly, fake nodes that have AFTER code should be moved
34e49164
C
269backwards. No fake nodes should have both before and after code. *)
270
271let displace_fake_nodes toks =
272 let is_fake = function Fake1 _ -> true | _ -> false in
273 let is_whitespace = function
abad11c5
C
274 | T1(Parser_c.TCommentSpace _)
275 (* patch: cocci *)
34e49164
C
276 | T1(Parser_c.TCommentNewline _) -> true
277 | _ -> false in
278 let rec loop toks =
279 let fake_info =
abad11c5 280 try Some (split_when is_fake toks)
34e49164
C
281 with Not_found -> None in
282 match fake_info with
abad11c5
C
283 | Some(bef,((Fake1 info) as fake),aft) ->
284 (match !(info.Ast_c.cocci_tag) with
285 | Some x ->
286 (match x with
287 | (Ast_cocci.MINUS(_,_,_,Ast_cocci.REPLACEMENT _),_)
288 (* for , replacement is more likely to be like after, but not clear...
289 but treating it as after breaks a lot of tests. *)
290
291 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
292 (* move the fake node forwards *)
293 let (whitespace,rest) = span is_whitespace aft in
294 bef @ whitespace @ fake :: (loop rest)
295
296 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
297 (* move the fake node backwards *)
298 let revbef = List.rev bef in
299 let (revwhitespace,revprev) = span is_whitespace revbef in
300 let whitespace = List.rev revwhitespace in
301 let prev = List.rev revprev in
302 prev @ fake :: (loop (whitespace @ aft))
303 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
304 failwith "fake node should not be before-after"
305 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_)
306 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
708f4980 307 )
abad11c5
C
308 | None ->
309 bef @ fake :: (loop aft)
310 )
34e49164 311 | None -> toks
abad11c5 312 | _ -> raise (Impossible 135) in
34e49164
C
313 loop toks
314
315(*****************************************************************************)
316(* Tokens2 generation *)
317(*****************************************************************************)
318
b1b2de81 319let comment2t2 = function
abad11c5
C
320 | (Token_c.TCommentCpp
321 (* not sure iif the following list is exhaustive or complete *)
322 (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
323 (info : Token_c.info)) ->
324 C2(info.Common.str)
5636bb2c 325 | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
abad11c5
C
326 C2("\n"^info.Common.str^"\n")
327 | x -> failwith (Printf.sprintf "unexpected comment %s" (Dumper.dump x))
b1b2de81 328
ae4735db 329let expand_mcode toks =
34e49164
C
330 let toks_out = ref [] in
331
332 let index = ref 0 in
333
ae4735db 334 let add_elem t minus =
34e49164 335 match t with
ae4735db 336 | Fake1 info ->
abad11c5
C
337 let str = Ast_c.str_of_info info in
338 let isminus = match minus with Min _ -> true | Ctx -> false in
339 (* don't add fake string if the thing should be removed *)
340 if str =$= "" or isminus
341 then push2 (Fake2 minus) toks_out
342 (* fx the fake "," at the end of a structure or enum.
343 no idea what other fake info there can be... *)
344 else push2 (Comma str) toks_out
ae4735db 345
002099fc 346 | T1 tok ->
abad11c5
C
347 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
348 (* no tag on expandedTok ! *)
349 let modified = function
350 | None -> false
351 | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
352 | _ -> true in
353 (if TH.is_expanded tok &&
354 modified !((TH.info_of_tok tok).Ast_c.cocci_tag)
355 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*)
356 then
357 failwith
358 (Printf.sprintf
359 "expanded token %s on line %d is either modified or stored in a metavariable"
360 (TH.str_of_tok tok) (TH.line_of_tok tok)));
361
362 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
363 { i with Ast_c.cocci_tag = ref Ast_c.emptyAnnot; }
364 ) in
365
366 let optindex =
367 if TH.is_origin tok && not (TH.is_real_comment tok)
368 then
369 begin
370 incr index;
371 Some !index
34e49164 372 end
abad11c5
C
373 else None
374 in
34e49164 375
abad11c5 376 push2 (T2 (tok', minus, optindex, None)) toks_out
34e49164
C
377 in
378
ae4735db
C
379 let expand_info t =
380 let (mcode,env) =
abad11c5 381 Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).Ast_c.cocci_tag) in
34e49164 382
5636bb2c 383 let pr_cocci s ln col rcol hint =
abad11c5 384 push2 (Cocci2 (s,ln,col,rcol,hint)) toks_out in
ae4735db 385 let pr_c info =
b1b2de81 386 (match Ast_c.pinfo_of_info info with
abad11c5
C
387 | Ast_c.AbstractLineTok _ ->
388 push2 (C2 (Ast_c.str_of_info info)) toks_out
b1b2de81 389 | Ast_c.FakeTok (s,_) ->
abad11c5 390 push2 (C2 s) toks_out
34e49164 391 | _ ->
abad11c5
C
392 Printf.fprintf stderr "line: %s\n" (Dumper.dump info);
393 failwith "not an abstract line"
394 );
b1b2de81 395 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
abad11c5 396 List.iter (fun x -> push2 (comment2t2 x) toks_out) in
b1b2de81 397
708f4980 398 let pr_barrier ln col = (* marks a position, used around C code *)
abad11c5 399 push2 (Cocci2 ("",ln,col,col,None)) toks_out in
708f4980 400 let pr_nobarrier ln col = () in (* not needed for linux spacing *)
b1b2de81 401
708f4980 402 let pr_cspace _ = push2 (C2 " ") toks_out in
34e49164 403
708f4980
C
404 let pr_space _ = () (* rely on add_space in cocci code *) in
405 let pr_arity _ = () (* not interested *) in
34e49164 406
abad11c5 407 let indent _ = push2 Indent_cocci2 toks_out in
c3e37e97 408 let unindent x = push2 (Unindent_cocci2 x) toks_out in
abad11c5 409 let eat_space _ = push2 EatSpace2 toks_out in
34e49164 410
708f4980
C
411 let args_pp =
412 (env, pr_cocci, pr_c, pr_cspace,
abad11c5
C
413 (match !Flag_parsing_c.spacing with
414 | Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
415 pr_arity,
416 (match !Flag_parsing_c.spacing with
417 | Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
418 indent, unindent, eat_space) in
419
420(* old: when for yacfe with partial cocci:
421 * add_elem t false;
422 *)
34e49164 423
abad11c5 424(* patch: when need full coccinelle transformation *)
faf9a90c 425 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
34e49164 426 match mcode with
ae4735db 427 | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
abad11c5
C
428 (* Why adding ? because I want to have all the information, the whole
429 * set of tokens, so I can then process and remove the
430 * is_between_two_minus for instance *)
431 add_elem t (Min (inst,adj));
432 (match any_xxs with
433 | Ast_cocci.NOREPLACEMENT -> ()
434 | Ast_cocci.REPLACEMENT(any_xxs,_) ->
435 unparser any_xxs Unparse_cocci.InPlace
436 )
ae4735db 437 | Ast_cocci.CONTEXT (_,any_befaft) ->
abad11c5
C
438 (match any_befaft with
439 | Ast_cocci.NOTHING ->
440 add_elem t Ctx
441 | Ast_cocci.BEFORE (xxs,_) ->
442 unparser xxs Unparse_cocci.Before;
443 add_elem t Ctx
444 | Ast_cocci.AFTER (xxs,_) ->
445 add_elem t Ctx;
446 unparser xxs Unparse_cocci.After;
447 | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
448 unparser xxs Unparse_cocci.Before;
449 add_elem t Ctx;
450 unparser yys Unparse_cocci.After;
451 )
452 | Ast_cocci.PLUS _ -> raise (Impossible 136)
34e49164
C
453 in
454
455 toks +> List.iter expand_info;
456 List.rev !toks_out
ae4735db 457
34e49164
C
458
459(*****************************************************************************)
460(* Tokens2 processing, filtering, adjusting *)
461(*****************************************************************************)
462
002099fc 463let is_space = function
abad11c5 464 | T2(Parser_c.TCommentSpace _,_b,_i,_h) -> true (* only whitespace *)
ae4735db 465 | _ -> false
fc1ad971
C
466
467let is_newline = function
abad11c5 468 | T2(Parser_c.TCommentNewline _,_b,_i,_h) -> true
fc1ad971
C
469 | _ -> false
470
abad11c5
C
471let is_whitespace x =
472 is_space x or is_newline x
002099fc 473
34e49164 474let is_minusable_comment = function
abad11c5
C
475 | (T2 (t,_b,_i,_h)) ->
476 (match t with
477 | Parser_c.TCommentSpace _ (* only whitespace *)
478 (* patch: coccinelle *)
479 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
480 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
481 | Parser_c.TComment _
482 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
483 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
484 | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _)
485 | Parser_c.TCommentCpp (Token_c.CppDirective, _) -> (* result was false *)
486 true
487 (*
488 | Parser_c.TCommentMisc _
489 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) ->
490 false
491 *)
492 | _ -> false
493 )
ae4735db 494 | _ -> false
34e49164 495
fc1ad971 496let is_minusable_comment_nocpp = function
abad11c5
C
497 | (T2 (t,_b,_i,_h)) ->
498 (match t with
499 | Parser_c.TCommentSpace _ (* only whitespace *)
500 (* patch: coccinelle *)
501 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
502 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
503 | Parser_c.TComment _ -> true
504 (*
505 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
506 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
507 | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _)
508 | Parser_c.TCommentCpp (Token_c.CppDirective, _) ->
509 false
510
511 | Parser_c.TCommentMisc _
512 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) ->
513 false
514 *)
515 | _ -> false
516 )
ae4735db 517 | _ -> false
fc1ad971 518
34e49164 519let all_coccis = function
abad11c5
C
520 | Cocci2 _ | C2 _ | Comma _ | Indent_cocci2
521 | Unindent_cocci2 _ | EatSpace2 -> true
34e49164
C
522 | _ -> false
523
abad11c5
C
524(* previously gave up if the first character was a newline, but not clear why *)
525let is_minusable_comment_or_plus x =
526 is_minusable_comment x or all_coccis x
34e49164 527
708f4980 528let set_minus_comment adj = function
abad11c5
C
529 | T2 (t,Ctx,idx,hint) ->
530 let str = TH.str_of_tok t in
531 (match t with
532 | Parser_c.TCommentSpace _
533 (* patch: coccinelle *)
534 | Parser_c.TCommentNewline _ -> ()
535
536 | Parser_c.TComment _
537 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
538 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
539 | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _)
540 | Parser_c.TCommentCpp (Token_c.CppDirective, _) ->
541 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
542 (TH.line_of_tok t) str)
543 | _ -> raise (Impossible 137)
544 );
545 T2 (t, Min adj, idx, hint)
546 (* patch: coccinelle *)
547 | T2 (t, Min adj, idx, hint) as x -> x
8babbc8f 548 | Fake2 _ as x -> x
abad11c5 549 | _ -> raise (Impossible 138)
34e49164 550
feec80c3 551(* don't touch ifdefs, done after *)
708f4980 552let set_minus_comment_or_plus adj = function
abad11c5
C
553 | Cocci2 _ | C2 _ | Comma _ | Indent_cocci2
554 | Unindent_cocci2 _ | EatSpace2 as x -> x
708f4980 555 | x -> set_minus_comment adj x
34e49164 556
abad11c5
C
557let is_minus = function
558 | T2 (_, Min _, _, _) -> true
559 | _ -> false
560
fc1ad971 561let drop_minus xs =
abad11c5 562 xs +> exclude is_minus
fc1ad971 563
8babbc8f 564let drop_expanded xs =
abad11c5
C
565 xs +> exclude (function
566 | T2 (t,_,_,_) when TH.is_expanded t -> true
8babbc8f
C
567 | _ -> false
568 )
569
570let drop_fake xs =
abad11c5 571 xs +> exclude (function
8babbc8f 572 | Fake2 _ -> true
34e49164
C
573 | _ -> false
574 )
5626f154
C
575
576let remove_minus_and_between_and_expanded_and_fake xs =
577
8babbc8f
C
578 (* get rid of expanded tok *)
579 let xs = drop_expanded xs in
34e49164 580
abad11c5
C
581 let minus_or_comment x =
582 is_minus x or is_minusable_comment x in
113803cf 583
abad11c5
C
584 let minus_or_comment_nocpp x =
585 is_minus x or is_minusable_comment_nocpp x in
fc1ad971
C
586
587 let common_adj (index1,adj1) (index2,adj2) =
8babbc8f
C
588 let same_adj = (* same adjacency info *)
589 match (adj1,adj2) with
abad11c5 590 | (Ast_cocci.ADJ adj1,Ast_cocci.ADJ adj2) -> adj1 = adj2
8babbc8f
C
591 | (Ast_cocci.ALLMINUS,_) | (_,Ast_cocci.ALLMINUS) -> true in
592 same_adj &&
fc1ad971 593 (* non-empty intersection of witness trees *)
abad11c5 594 not ((inter_set index1 index2) = []) in
fc1ad971 595
fc1ad971 596 (* new idea: collects regions not containing non-space context code
abad11c5
C
597 if two adjacent adjacent minus tokens satisfy common_adj then delete
598 all spaces, comments etc between them
599 if two adjacent minus tokens do not satisfy common_adj only delete
600 the spaces between them if there are no comments, etc.
601 if the region contain no plus code and is both preceded and followed
602 by a newline, delete the initial newline. *)
fc1ad971
C
603
604 let rec adjust_around_minus = function
abad11c5
C
605 | [] -> []
606 | (T2(Parser_c.TCommentNewline c,_b,_i,_h) as x)::
607 ((Fake2(Min adj1) | T2(_,Min adj1,_,_)) as t1)::xs ->
608 let (minus_list,rest) = span not_context (t1::xs) in
609 let contains_plus = List.exists is_plus minus_list in
610 let x =
611 match List.rev minus_list with
612 | (T2(Parser_c.TCommentNewline c,_b,_i,_h))::rest
613 when List.for_all minus_or_comment minus_list ->
614 set_minus_comment_or_plus adj1 x
615 | _ -> x in
616 x :: adjust_within_minus contains_plus minus_list
617 @ adjust_around_minus rest
618 | ((Fake2(Min adj1) | T2(_,Min adj1,_,_)) as t1)::xs ->
619 let (minus_list,rest) = span not_context (t1::xs) in
620 let contains_plus = List.exists is_plus minus_list in
621 adjust_within_minus contains_plus minus_list
622 @ adjust_around_minus rest
c491d8ee 623 | x::xs ->
abad11c5 624 x :: adjust_around_minus xs
fc1ad971 625 and adjust_within_minus cp (* contains plus *) = function
abad11c5
C
626 | ((Fake2(Min adj1) | T2(_,Min adj1,_,_)) as t1)::xs ->
627 let not_minus = function T2(_,Min _,_,_) -> false | _ -> true in
628 let (not_minus_list,rest) = span not_minus xs in
629 t1 ::
630 (match rest with
631 | ((Fake2(Min adj2) | T2(_,Min adj2,_,_)) as t2)::xs ->
632 if common_adj adj1 adj2
633 || not cp && List.for_all is_whitespace not_minus_list
634 then
635 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
636 @ (adjust_within_minus cp (t2::xs))
637 else
638 not_minus_list
639 @ (adjust_within_minus cp (t2::xs))
640 | _ ->
641 if cp
642 then xs
643 else
644 (* remove spaces after removed stuff, eg a comma after a
645 function argument *)
646 (let (spaces,rest) = span is_space xs in
647 (List.map (set_minus_comment_or_plus adj1) spaces)
648 @ rest)
649 )
fc1ad971
C
650 | xs -> failwith "should always start with minus"
651 and not_context = function
abad11c5 652 | (T2(_,Ctx,_,_) as x) when not (is_minusable_comment x) -> false
fc1ad971
C
653 | _ -> true
654 and is_plus = function
abad11c5 655 | C2 _ | Comma _ | Cocci2 _ -> true
fc1ad971 656 | _ -> false in
34e49164 657
fc1ad971 658 let xs = adjust_around_minus xs in
34e49164 659
8babbc8f
C
660 (* get rid of fake tok *)
661 let xs = drop_fake xs in
662
113803cf 663 (* this drops blank lines after a brace introduced by removing code *)
fc1ad971 664 let minus_or_comment_nonl = function
abad11c5
C
665 | T2(_,Min adj,_,_) -> true
666 | T2(Parser_c.TCommentNewline _,_b,_i,_h) -> false
fc1ad971
C
667 | x -> is_minusable_comment x in
668
113803cf 669 let rec adjust_after_brace = function
abad11c5
C
670 | [] -> []
671 | ((T2(_,Ctx,_,_)) as x)::((T2(_,Min adj,_,_)::_) as xs)
672 when str_of_token2 x =$= "{" ->
673 let (between_minus,rest) = span minus_or_comment_nonl xs in
674 let (newlines,rest) = span is_whitespace rest in
675 let (drop_newlines,last_newline) =
676 let rec loop = function
677 | [] -> ([],[])
678 | ((T2(Parser_c.TCommentNewline _,_b,_i,_h)) as x) :: rest ->
679 (List.rev rest,[x])
680 | x::xs ->
681 let (drop_newlines,last_newline) = loop xs in
682 (drop_newlines,x::last_newline) in
683 loop (List.rev newlines) in
684 x :: between_minus
685 @ List.map (set_minus_comment adj) drop_newlines
686 @ last_newline
687 @ adjust_after_brace rest
688 | x::xs -> x :: (adjust_after_brace xs) in
113803cf
C
689
690 let xs = adjust_after_brace xs in
691
fc1ad971 692 (* search backwards from context } over spaces until reaching a newline.
abad11c5
C
693 then go back over all minus code until reaching some context or + code.
694 get rid of all intervening spaces, newlines, and comments
695 input is reversed *)
fc1ad971 696 let rec adjust_before_brace = function
abad11c5
C
697 | [] -> []
698 | ((T2(t,Ctx,_,_)) as x)::xs
699 when str_of_token2 x =$= "}" or is_newline x ->
700 let (outer_spaces,rest) = span is_space xs in
701 x :: outer_spaces @
702 (match rest with
703 | ((T2 (Parser_c.TCommentNewline _,Ctx,_i,_h)) as h) ::
704 (* the rest of this code is the same as from_newline below
705 but merging them seems to be error prone... *)
706 ((T2 (t, Min adj, idx, hint)) as m) :: rest ->
707 let (spaces,rest) = span minus_or_comment_nocpp rest in
708 h :: m ::
709 (List.map (set_minus_comment adj) spaces) @
710 (adjust_before_brace rest)
711 | _ -> adjust_before_brace rest
712 )
fc1ad971
C
713 | x::xs -> x :: (adjust_before_brace xs) in
714
715 let from_newline = function
abad11c5
C
716 | ((T2 (t, Min adj, idx, hint)) as m) :: rest ->
717 let (spaces,rest) = span minus_or_comment_nocpp rest in
718 m ::
719 (List.map (set_minus_comment adj) spaces) @
720 (adjust_before_brace rest)
721 | ((T2 (t0,Ctx, idx0,h0)) as m0) ::
722 ((T2 (t,Min adj,idx,h)) as m) :: rest
723 when TH.str_of_tok t0 = "" ->
724 (* This is for the case of a #define that is completely deleted,
725 because a #define has a strange EOL token at the end.
726 We hope there i no other kind of token that is represented by
727 "", but it seems like changing the kind of token might break
728 the end of entity recognition in the C parser.
729 See parsing_hacks.ml *)
730 let (spaces,rest) = span minus_or_comment_nocpp rest in
731 m0 :: m ::
732 (List.map (set_minus_comment adj) spaces) @
733 (adjust_before_brace rest)
fc1ad971
C
734 | rest -> adjust_before_brace rest in
735
736 let xs = List.rev (from_newline (List.rev xs)) in
34e49164 737
d6ce1786 738 let cleanup_ifdefs toks =
abad11c5 739 (* TODO: these functions are horrid, but using tokens caused circularity *)
d6ce1786 740 let is_ifdef = function
d6ce1786 741 | T2((Parser_c.TCommentCpp
abad11c5
C
742 (Token_c.CppIfDirective Token_c.IfDef, _)),m,idx,_) -> true
743 | T2((Parser_c.TCommentCpp
744 (Token_c.CppIfDirective Token_c.IfDef0, _)),m,idx,_) -> true
d6ce1786
C
745 | t -> false in
746 let is_else = function
abad11c5
C
747 | T2((Parser_c.TCommentCpp
748 (Token_c.CppIfDirective Token_c.Else, _)),m,idx,_) -> true
d6ce1786
C
749 | _ -> false in
750 let is_endif = function
abad11c5
C
751 | T2((Parser_c.TCommentCpp
752 (Token_c.CppIfDirective Token_c.Endif, _)),m,idx,_) -> true
d6ce1786
C
753 | _ -> false in
754 let add t = function
abad11c5 755 | l::rest -> (t::l)::rest
d6ce1786
C
756 | _ -> failwith "not possible" in
757 let rec parse_ifdef acc_keywords acc_code stack = function
abad11c5 758 | [] -> (None,acc_keywords,acc_code)
d6ce1786 759 | t::rest when is_else t ->
abad11c5
C
760 (match stack with
761 | [] -> parse_ifdef (t::acc_keywords) ([]::acc_code) stack rest
762 | _ -> parse_ifdef acc_keywords (add t acc_code) stack rest
763 )
d6ce1786 764 | t::rest when is_endif t ->
abad11c5
C
765 (match stack with
766 | [] -> ((Some (t,rest)),acc_keywords,acc_code)
767 | _::stack -> parse_ifdef acc_keywords (add t acc_code) stack rest
768 )
d6ce1786 769 | t::rest when is_ifdef t ->
abad11c5 770 parse_ifdef acc_keywords (add t acc_code) (()::stack) rest
d6ce1786
C
771 | t::rest -> parse_ifdef acc_keywords (add t acc_code) stack rest in
772 let unminus = function
abad11c5 773 | T2 (t,Min adj,idx,hint) -> T2 (t,Ctx,idx,hint)
d6ce1786 774 | x -> x in
d6ce1786 775 let rec loop = function
abad11c5 776 | [] -> []
d6ce1786 777 | t::rest when is_ifdef t ->
abad11c5
C
778 let (ender,acc_keywords,acc_code) =
779 parse_ifdef [t] [[]] [] rest in
780 let acc_code = List.map loop acc_code in
781 let merge = (* args reversed *)
782 List.fold_left2
783 (fun prev kwd code -> kwd :: (List.rev code) @ prev)
784 [] in
785 (match ender with
786 | None -> merge (List.map unminus acc_keywords) acc_code
787 | Some(endif,rest) ->
788 let rest = loop rest in
789 if List.for_all is_minus (endif :: acc_keywords)
790 then (merge acc_keywords acc_code) @ (endif :: rest)
791 else
792 (merge (List.map unminus acc_keywords) acc_code) @
793 ((unminus endif) :: rest)
794 )
d6ce1786
C
795 | x::xs -> x :: loop xs in
796 loop toks in
abad11c5 797
d6ce1786
C
798 let xs = cleanup_ifdefs xs in
799 let xs = drop_minus xs in
800 xs
feec80c3 801
993936c0 802(* things that should not be followed by space - boundary between SmPL
abad11c5 803code and C code *)
993936c0
C
804let adjust_eat_space toks =
805 let rec loop = function
abad11c5 806 | [] -> []
993936c0
C
807 | EatSpace2 :: x :: rest when is_space x -> loop rest
808 | EatSpace2 :: rest -> loop rest
809 | x :: xs -> x :: loop xs in
810 loop toks
811
faf9a90c
C
812(* normally, in C code, a semicolon is not preceded by a space or newline *)
813let adjust_before_semicolon toks =
814 let toks = List.rev toks in
002099fc 815 let rec search_semic = function
abad11c5
C
816 | [] -> []
817 | ((T2(_,Ctx,_,_) | Cocci2 _) as x)::xs
818 when List.mem (str_of_token2 x) [";";")";","] ->
819 x :: search_semic (search_minus false xs)
002099fc
C
820 | x::xs -> x :: search_semic xs
821 and search_minus seen_minus xs =
abad11c5 822 let (spaces, rest) = span is_space xs in
002099fc
C
823 (* only delete spaces if something is actually deleted *)
824 match rest with
abad11c5 825 | ((T2(_,Min _,_,_)) as a)::rerest -> a :: search_minus true rerest
002099fc
C
826 | _ -> if seen_minus then rest else xs in
827 List.rev (search_semic toks)
34e49164 828
8babbc8f
C
829(* normally, in C code, a ( is not followed by a space or newline *)
830let adjust_after_paren toks =
831 let rec search_paren = function
abad11c5
C
832 | [] -> []
833 | ((T2(_,Ctx,_,_) | Cocci2 _) as x)::xs
834 when List.mem (str_of_token2 x) ["("] (* other things? *) ->
835 x :: search_paren (search_minus false xs)
8babbc8f
C
836 | x::xs -> x :: search_paren xs
837 and search_minus seen_minus xs =
abad11c5 838 let (spaces, rest) = span is_whitespace xs in
8babbc8f
C
839 (* only delete spaces if something is actually deleted *)
840 match rest with
abad11c5
C
841 | ((T2(_,Min _,_,_)) as a)::rerest -> (* minus *)
842 a :: search_minus true rerest
843 | ((T2(_,Ctx,_,_)) as a)::rerest
844 when seen_minus && str_of_token2 a = "," ->
845 (* comma after ( will be deleted, so consider it as minus code
846 already *)
847 a :: search_minus true rerest
8babbc8f
C
848 | _ -> if seen_minus then rest else xs in (* drop trailing space *)
849 search_paren toks
850
65038c61
C
851(* this is for the case where braces are added around an if branch *)
852let paren_then_brace toks =
853 let rec search_paren = function
abad11c5
C
854 | [] -> []
855 | ((T2(_,Ctx,_,_)) as x)::xs
856 when List.mem (str_of_token2 x) [")"] ->
857 x :: search_paren (search_plus xs)
65038c61
C
858 | x::xs -> x :: search_paren xs
859 and search_plus xs =
abad11c5
C
860 let (spaces, rest) = span is_space xs in
861 let (nls, rest) = span is_newline rest in
65038c61 862 match rest with
abad11c5
C
863 (* move the brace up to the previous line *)
864 | ((Cocci2("{",_,_,_,_)) as x) :: (((Cocci2 _) :: _) as rest) ->
865 spaces @ x :: nls @ rest
65038c61
C
866 | _ -> xs in
867 search_paren toks
868
abad11c5 869let is_ident_like s = s ==~ regexp_alpha
34e49164 870
aba5c457 871let rec drop_space_at_endline = function
abad11c5 872 | [] -> []
aba5c457 873 | [x] -> [x]
190f1acf 874 | (C2 " ") ::
abad11c5
C
875 ((((T2(Parser_c.TCommentSpace _,Ctx,_,_)) | Cocci2("\n",_,_,_,_) |
876 (T2(Parser_c.TCommentNewline _,Ctx,_,_))) :: _) as rest) ->
877 (* when unparse_cocci doesn't know whether space is needed *)
878 drop_space_at_endline rest
879 | ((T2(Parser_c.TCommentSpace _,Ctx,_i,_h)) as a)::rest ->
880 let (outer_spaces,rest) = span is_space rest in
881 let minus_or_comment_or_space_nocpp = function
882 | T2(_,Min adj,_,_) -> true
883 | (T2(Parser_c.TCommentSpace _,Ctx,_i,_)) -> true
884 | (T2(Parser_c.TCommentNewline _,Ctx,_i,_)) -> false
885 | x -> false in
886 let (minus,rest) = span minus_or_comment_or_space_nocpp rest in
887 let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
888 if List.exists is_minus minus
889 then
890 match rest with
891 | ((T2(Parser_c.TCommentNewline _,Ctx,_i,_h)) as a)::rest ->
892 (* drop trailing spaces *)
893 minus @ a :: (drop_space_at_endline rest)
894 | _ -> fail ()
895 else fail ()
8f657093 896 | a :: rest ->
abad11c5 897 a :: drop_space_at_endline rest
aba5c457
C
898
899(* if a removed ( is between two tokens, then add a space *)
900let rec paren_to_space = function
abad11c5 901 | [] -> []
aba5c457
C
902 | [x] -> [x]
903 | [x;y] -> [x;y]
abad11c5
C
904 | ((T2(_,Ctx,_,_)) as a)::
905 ((T2(t,Min _,_,_)) as b)::
906 ((T2(_,Ctx,_,_)) as c)::rest
aba5c457 907 when not (is_whitespace a) && TH.str_of_tok t = "(" ->
abad11c5 908 a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
aba5c457
C
909 | a :: rest -> a :: (paren_to_space rest)
910
ae4735db 911let rec add_space xs =
34e49164
C
912 match xs with
913 | [] -> []
914 | [x] -> [x]
5636bb2c 915 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
708f4980 916 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
abad11c5
C
917 not (lnx = -1) && not (rcolx = -1) && lnx = lny && rcolx < lcoly ->
918 (* this only works within a line. could consider whether
919 something should be done to add newlines too, rather than
920 printing them explicitly in unparse_cocci. *)
921 x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
65038c61
C
922 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
923 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
abad11c5
C
924 not (lnx = -1) && not (rcolx = -1) && lnx < lny ->
925 (* this only works within a line. could consider whether
926 something should be done to add newlines too, rather than
927 printing them explicitly in unparse_cocci. *)
928 x::C2 (String.make (lny-lnx) '\n')::
929 C2 (String.make (lcoly-1) ' '):: (* -1 is for the + *)
930 add_space (y::xs)
931 | ((T2(_,Ctx,_,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *)
932 let sx = str_of_token2 x in
933 let sy = str_of_token2 y in
934 if is_ident_like sx && (is_ident_like sy or List.mem sy ["="])
935 then x::C2 " "::(add_space (y::xs))
936 else x::(add_space (y::xs))
1eddfd50 937 | x::y::xs -> (* not boundary, not sure if it is possible *)
abad11c5
C
938 let sx = str_of_token2 x in
939 let sy = str_of_token2 y in
940 if is_ident_like sx && is_ident_like sy
941 then x::C2 " "::(add_space (y::xs))
942 else x::(add_space (y::xs))
34e49164 943
c491d8ee
C
944(* A fake comma is added at the end of an unordered initlist or a enum
945decl, if the initlist or enum doesn't already end in a comma. This is only
946needed if there is + code, ie if we see Cocci after it in the code sequence *)
947
948let rec drop_end_comma = function
abad11c5 949 | [] -> []
c491d8ee
C
950 | [x] -> [x]
951 | ((Comma ",") as x) :: rest ->
abad11c5
C
952 let (newlines,rest2) = span is_whitespace rest in
953 (match rest2 with
954 | (Cocci2 _) :: _ -> x :: drop_end_comma rest
955 | _ -> drop_end_comma rest
956 )
c491d8ee
C
957 | x :: xs -> x :: drop_end_comma xs
958
5636bb2c
C
959(* The following only works for the outermost function call. Stack records
960the column of all open parentheses. Space_cell contains the most recent
961comma in the outermost function call. The goal is to decide whether this
962should be followed by a space or a newline and indent. *)
963let add_newlines toks tabbing_unit =
abad11c5
C
964 (* the following is for strings that may contain newline or tabs *)
965 let string_length s count =
966 let l = list_of_string s in
967 List.fold_left
968 (function count ->
969 function
970 | '\t' -> count + 8
971 | '\n' -> 0
972 | c -> count + 1)
973 count l in
5636bb2c
C
974 let create_indent n =
975 let (tu,tlen) =
976 match tabbing_unit with
abad11c5 977 | Some ("\t",_) -> ("\t",8)
5636bb2c 978 | Some ("",_) -> ("\t",8) (* not sure why... *)
abad11c5 979 | Some (s,_) -> (s,string_length s 0) (* assuming only tabs or spaces *)
5636bb2c
C
980 | None -> ("\t",8) in
981 let rec loop seen =
982 if seen + tlen <= n
983 then tu ^ loop (seen + tlen)
984 else String.make (n-seen) ' ' in
985 loop 0 in
986 let check_for_newline count x = function
abad11c5
C
987 | Some (start,space_cell) when count > Flag_parsing_c.max_width ->
988 space_cell := "\n"^(create_indent x);
989 Some (x + (count - start))
5636bb2c 990 | _ -> None in
abad11c5
C
991 let start_box stack space_cell count s =
992 let count = string_length s count in
993 (count,count::stack,space_cell) in
994 let end_box stack space_cell count s =
995 (* this assumes that start_box and end_box are matched, but this is not
996 necessarily the case, if ( is modified and ) is context code *)
997 let count = string_length s count in
998 match stack with
999 | [x] ->
1000 (match check_for_newline count x space_cell with
1001 | Some count -> (count,[],None)
1002 | None -> (count,[],None)
1003 )
1004 | [] -> (count,stack,space_cell)
1005 | _ -> (count,List.tl stack,space_cell) in
1006 let rec loop ((stack,space_cell) as info) count = function
1007 | [] -> []
1008 | ((Cocci2(s,line,lcol,rcol,hint)) as a):: (* hint can't be start *)
1009 (T2(commatok,Ctx,idx,_))::
1010 (T2(((Parser_c.TCommentSpace _) as sptok),Ctx,_,_)) ::
1011 (((T2(codetok,Ctx,_,_)) :: _) as xs)
1012 when
1013 (TH.str_of_tok commatok) = "," &&
1014 (TH.str_of_tok sptok) = " " &&
1015 ((List.length stack = 1) or (* not super elegant... *)
1016 (* check if the Cocci2 token is a ), need double treatment *)
1017 (List.length stack = 2) && (hint = Some Unparse_cocci.EndBox)) ->
1018 (* something large added before a comma *)
1019 let stack = (* do the work of end_box in the length 2 case *)
1020 if List.length stack = 2
1021 then List.tl stack
1022 else stack in
1023 let x = List.hd stack in
1024 let sp = ref " " in
1025 let cocci_count = string_length s count in
1026 let space_cell = Some (cocci_count+1,sp) in (* count before space *)
1027 let newcount = cocci_count + 2 in (* count incuding space *)
1028 let future_count =
1029 string_length (TH.str_of_tok codetok) newcount in
1030 let b = T2(commatok,Ctx,idx,
1031 Some (Unparse_cocci.SpaceOrNewline sp)) in
1032 (match check_for_newline future_count x space_cell with
1033 | Some count -> a :: b :: loop (stack,Some (x,sp)) count xs
1034 | None -> a :: b :: loop (stack,Some (newcount,sp)) newcount xs
1035 )
1036 | (T2(commatok,Ctx,_,_))::
1037 (T2(((Parser_c.TCommentSpace _) as sptok),Ctx,idx,_)) ::
1038 (((Cocci2(s,line,lcol,rcol,hint))::_) as xs)
1039 when (TH.str_of_tok commatok) = "," && (TH.str_of_tok sptok) = " " &&
1040 List.length stack = 1 (* not super elegant... *) ->
1041 (* something large added after a comma *)
1042 let x = List.hd stack in
1043 let sp = ref " " in
1044 let space_cell = Some (count+1,sp) in (* count before space *)
1045 let newcount = count + 2 in (* count incuding space *)
1046 let future_count = string_length s newcount in
1047 let a = T2(commatok,Ctx,idx,
1048 Some (Unparse_cocci.SpaceOrNewline sp)) in
1049 (match check_for_newline future_count x space_cell with
1050 | Some count -> a :: loop (stack,Some (x,sp)) count xs
1051 | None -> a :: loop (stack,Some (newcount,sp)) newcount xs
1052 )
1053 | ((T2(tok,Ctx,idx,_)) as a)::xs ->
1054 (* let (stack,space_cell) = info in *)
1055 (match TH.str_of_tok tok with
1056 | "=" as s ->
1057 let (spaces,rest) = span is_space xs in
1058 (match rest with
1059 | ((T2(tok,Ctx,_,_)) as b)::ixs ->
1060 (match TH.str_of_tok tok with
1061 | "{" ->
1062 let newcount =
1063 List.fold_left
1064 (function prev ->
1065 function
1066 | (T2(tok,_b,_i,_h)) ->
1067 string_length (TH.str_of_tok tok) prev
1068 | _ -> failwith "not possible")
1069 count spaces in
1070 let front = a :: spaces @ [b] in
1071 let (newcount,newstack,newspacecell) =
1072 start_box stack space_cell newcount "{" in
1073 front @ loop (newstack,newspacecell) newcount ixs
1074 | s -> a :: loop info (string_length s count) xs
1075 )
1076 | _ -> a :: loop info (string_length s count) xs
1077 )
1078 | "(" as s ->
1079 let (newcount,newstack,newspacecell) =
1080 start_box stack space_cell count s in
1081 a :: loop (newstack,newspacecell) newcount xs
1082 | ")" as s ->
1083 let (newcount,newstack,newspacecell) =
1084 end_box stack space_cell count s in
1085 a :: loop (newstack,newspacecell) newcount xs
1086 | "{" as s when not (stack = []) ->
1087 (* [] case means statement braces *)
1088 let (newcount,newstack,newspacecell) =
1089 start_box stack space_cell count s in
1090 a :: loop (newstack,newspacecell) newcount xs
1091 | "}" as s when not (stack = []) ->
1092 (* [] case means statement braces *)
1093 let (newcount,newstack,newspacecell) =
1094 end_box stack space_cell count s in
1095 a :: loop (newstack,newspacecell) newcount xs
1096 | s -> a :: loop info (string_length s count) xs
1097 )
5636bb2c 1098 | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
abad11c5
C
1099 let (stack,space_cell) = info in
1100 let rest =
1101 match hint with
1102 | None -> loop info (string_length s count) xs
1103 | Some Unparse_cocci.StartBox ->
1104 let (newcount,newstack,newspacecell) =
1105 start_box stack space_cell count s in
1106 loop (newstack,newspacecell) newcount xs
1107 | Some Unparse_cocci.EndBox ->
1108 let (newcount,newstack,newspacecell) =
1109 end_box stack space_cell count s in
1110 loop (newstack,newspacecell) newcount xs
1111 | Some (Unparse_cocci.SpaceOrNewline sp) ->
1112 let count = string_length s (count + 1 (*space*)) in
1113 (match stack with
1114 | [x] ->
1115 (match check_for_newline count x space_cell with
1116 | Some count -> loop (stack,Some (x,sp)) count xs
1117 | None -> loop (stack,Some (count,sp)) count xs
1118 )
1119 | _ -> loop info count xs
1120 ) in
1121 a :: rest
1122 | ((T2(tok,_,_,_)) as a)::xs ->
1123 a :: loop info (string_length (TH.str_of_tok tok) count) xs
5636bb2c 1124 | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
c491d8ee 1125 | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
8babbc8f 1126 | Fake2 _ :: _ | Indent_cocci2 :: _
993936c0 1127 | Unindent_cocci2 _::_ | EatSpace2::_ ->
abad11c5 1128 failwith "unexpected fake, indent, unindent, or eatspace" in
5636bb2c 1129 let redo_spaces prev = function
abad11c5
C
1130 | Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
1131 C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
1132 | T2(tok,min,idx,Some (Unparse_cocci.SpaceOrNewline sp)) ->
1133 C2 !sp :: T2(tok,min,idx,None) :: prev
5636bb2c
C
1134 | t -> t::prev in
1135 (match !Flag_parsing_c.spacing with
abad11c5
C
1136 | Flag_parsing_c.SMPL -> toks
1137 | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks))
1138 )
34e49164
C
1139
1140(* When insert some new code, because of a + in a SP, we must add this
abad11c5
C
1141* code at the right place, with the good indentation. So each time we
1142* encounter some spacing info, with some newline, we maintain the
1143* current indentation level used.
1144*
1145* TODO problems: not accurate. ex: TODO
1146*
1147* TODO: if in #define region, should add a \ \n
1148*)
ae4735db 1149let new_tabbing2 space =
abad11c5 1150 list_of_string space
34e49164 1151 +> List.rev
abad11c5 1152 +> take_until (fun c -> c =<= '\n')
34e49164
C
1153 +> List.rev
1154 +> List.map string_of_char
1155 +> String.concat ""
1156
ae4735db 1157let new_tabbing a =
abad11c5 1158 profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
34e49164
C
1159
1160
ae4735db 1161let rec adjust_indentation xs =
aa721442 1162
65038c61 1163 let _current_tabbing = ref ([] : string list) in
34e49164
C
1164 let tabbing_unit = ref None in
1165
1166 let string_of_list l = String.concat "" (List.map string_of_char l) in
1167
1168 (* try to pick a tabbing unit for the plus code *)
1169 let adjust_tabbing_unit old_tab new_tab =
b1b2de81 1170 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
34e49164
C
1171 then
1172 let old_tab = list_of_string old_tab in
1173 let new_tab = list_of_string new_tab in
1174 let rec loop = function
abad11c5
C
1175 | ([],new_tab) ->
1176 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
1177 | (_,[]) -> failwith "not possible"
1178 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
34e49164
C
1179 loop (old_tab,new_tab) in
1180
65038c61 1181(*
abad11c5
C
1182let remtab tu current_tab =
1183let current_tab = List.rev(list_of_string current_tab) in
1184let rec loop = function
1185([],new_tab) -> string_of_list (List.rev new_tab)
1186| (_,[]) -> (-*weird; tabbing unit used up more than the current tab*-)
1187""
1188| (t::ts,n::ns) when t =<= n -> loop (ts,ns)
1189| (_,ns) -> (-* mismatch; remove what we can *-)
1190string_of_list (List.rev ns) in
1191loop (tu,current_tab) in
65038c61 1192*)
34e49164
C
1193
1194 let rec find_first_tab started = function
abad11c5
C
1195 | [] -> ()
1196 | ((T2 (tok,_,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
1197 find_first_tab true xs
1198 (* patch: coccinelle *)
1199 | ((T2 (Parser_c.TCommentNewline s, _, _, _)) as x)::_
34e49164 1200 when started ->
abad11c5
C
1201 let s = str_of_token2 x +> new_tabbing in
1202 tabbing_unit := Some (s,List.rev (list_of_string s))
34e49164
C
1203 | x::xs -> find_first_tab started xs in
1204 find_first_tab false xs;
1205
1eddfd50 1206 let rec balanced ct = function
abad11c5
C
1207 | [] -> ct >= 0
1208 | ((T2(tok,_,_,_)) as x)::xs ->
1209 (match str_of_token2 x with
1210 | "(" -> balanced (ct+1) xs
1211 | ")" -> balanced (ct-1) xs
1212 | _ -> balanced ct xs
1213 )
1eddfd50
C
1214 | x::xs -> balanced ct xs in
1215
97111a47
C
1216 let update_tabbing started s x =
1217 let old_tabbing = !_current_tabbing in
1218 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := [s]);
1219 (* only trust the indentation after the first { *)
1220 if started
1221 then
1222 adjust_tabbing_unit
abad11c5
C
1223 (String.concat "" old_tabbing)
1224 (String.concat "" !_current_tabbing) in
97111a47 1225
ae4735db 1226 let rec aux started xs =
34e49164 1227 match xs with
abad11c5
C
1228 | [] -> []
1229 (* patch: coccinelle *)
1230 | ((T2 (Parser_c.TCommentNewline s,_,_,_)) as x)::
97111a47 1231 Unindent_cocci2(false)::xs ->
abad11c5
C
1232 update_tabbing started s x;
1233 (C2 "\n")::aux started xs
8f657093 1234 | (Cocci2("\n",_,_,_,_))::Unindent_cocci2(false)::xs ->
abad11c5
C
1235 (C2 "\n")::aux started xs
1236 | ((T2 (tok,_,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _, _))::
5636bb2c 1237 ((Cocci2 ("{",_,_,_,_)) as a)::xs
708f4980 1238 when started && str_of_token2 x =$= ")" ->
abad11c5
C
1239 (* to be done for if, etc, but not for a function header *)
1240 x::(C2 " ")::a::(aux started xs)
1241 | ((T2 (Parser_c.TCommentNewline s, _, _, _)) as x)::xs
97111a47 1242 when
abad11c5
C
1243 balanced 0 (fst(span (function x -> not(is_newline x)) xs)) ->
1244 update_tabbing started s x;
1245 let coccis_rest = span all_coccis xs in
1246 (match coccis_rest with
1247 | (_::_,((T2 (tok,_,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
1248 (* the case where cocci code has been added before a close } *)
1249 x::aux started (Indent_cocci2::xs)
1250 | _ -> x::aux started xs
1251 )
65038c61
C
1252 | Indent_cocci2::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
1253 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL ->
abad11c5
C
1254 let tu = String.make (lcoly-1) ' ' in
1255 _current_tabbing := tu::(!_current_tabbing);
1256 C2 (tu)::aux started (y::xs)
34e49164 1257 | Indent_cocci2::xs ->
abad11c5
C
1258 (match !tabbing_unit with
1259 | None -> aux started xs
1260 | Some (tu,_) ->
1261 _current_tabbing := tu::(!_current_tabbing);
1262 (* can't be C2, for later phases *)
1263 Cocci2 (tu,-1,-1,-1,None)::aux started xs
1264 )
1265 | Unindent_cocci2(permanent)::((Cocci2("\n",_,_,_,_)) as x)::xs ->
1266 (* seems only relevant if there is a following cocci newline *)
1267 (match !_current_tabbing with
1268 | [] -> aux started xs
1269 | _::new_tabbing ->
1270 let s = String.concat "" new_tabbing in
1271 _current_tabbing := new_tabbing;
1272 x::Cocci2 (s,-1,-1,-1,None)::aux started xs
1273 )
1274 | Unindent_cocci2(permanent)::xs -> aux started xs
34e49164 1275 (* border between existing code and cocci code *)
abad11c5 1276 | ((T2 (tok,_,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
b1b2de81 1277 when str_of_token2 x =$= "{" ->
abad11c5
C
1278 x::aux true (y::Indent_cocci2::xs)
1279 | ((Cocci2 _) as x)::((T2 (tok,_,_,_)) as y)::xs
b1b2de81 1280 when str_of_token2 y =$= "}" ->
abad11c5 1281 x::aux started (Unindent_cocci2 true::y::xs)
34e49164 1282 (* starting the body of the function *)
abad11c5
C
1283 | ((T2 (tok,_,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
1284 x::aux true xs
5636bb2c 1285 | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
5636bb2c 1286 | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
abad11c5
C
1287 (* dont inline in expr because of weird eval order of ocaml *)
1288 let s = String.concat "" !_current_tabbing in
1289 (* can't be C2, for later phases *)
1290 x::Cocci2 (s,-1,-1,-1,None)::aux started xs
34e49164 1291 | x::xs -> x::aux started xs in
5636bb2c 1292 (aux false xs,!tabbing_unit)
34e49164
C
1293
1294
1295let rec find_paren_comma = function
1296 | [] -> ()
1297
1298 (* do nothing if was like this in original file *)
abad11c5
C
1299 | { str = "("; idx = Some p1 } :: ({ str = ","; idx = Some p2} :: _ as xs)
1300 | { str = ","; idx = Some p1 } :: ({ str = ","; idx = Some p2} :: _ as xs)
1301 | { str = ","; idx = Some p1 } :: ({ str = ")"; idx = Some p2} :: _ as xs)
1302 when p2 =|= p1 + 1 ->
1303 find_paren_comma xs
34e49164
C
1304
1305 (* otherwise yes can adjust *)
abad11c5
C
1306 | { str = "(" } :: (({ str = ","} as rem) :: _ as xs)
1307 | ({ str = "," } as rem) :: ({ str = ","} :: _ as xs)
1308 | ({ str = "," } as rem) :: ({ str = ")"} :: _ as xs) ->
1309 rem.remove <- true;
1310 find_paren_comma xs
34e49164 1311
ae4735db 1312 | x::xs ->
abad11c5 1313 find_paren_comma xs
34e49164 1314
ae4735db
C
1315
1316let fix_tokens toks =
34e49164
C
1317 let toks = toks +> List.map mk_token_extended in
1318
abad11c5
C
1319 let cleaner = toks +> exclude (function
1320 | {tok2 = T2 (t,_,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
34e49164
C
1321 | _ -> false
1322 ) in
1323 find_paren_comma cleaner;
1324
1325 let toks = rebuild_tokens_extented toks in
1326 toks +> List.map (fun x -> x.tok2)
1327
1b9ae606
C
1328(* if we have to remove a '}' that is alone on a line, remove the line too *)
1329let drop_line toks =
1330 let rec space_until_newline toks =
1331 match toks with
1332 | (T2(_, Min _, _, _) as hd) :: tl ->
1333 let (drop, tl) = space_until_newline tl in
1334 (drop, hd :: tl)
1335 | hd :: tl when is_space hd ->
1336 space_until_newline tl
1337 | Fake2 _ :: tl ->
1338 space_until_newline tl
1339 | hd :: tl when is_newline hd ->
1340 (true, toks)
1341 | _ ->
1342 (false, toks) in
1343 let rec loop toks =
1344 match toks with
1345 | (T2(_, Min _, _, _) as x) :: tl
1346 when str_of_token2 x =$= "}" ->
1347 let (drop, tl) = space_until_newline tl in
1348 (drop, x :: tl)
1349 | hd :: tl when is_whitespace hd ->
1350 let (drop, tl) = loop tl in
1351 if drop then
1352 (true, tl)
1353 else
1354 (false, toks)
1355 | _ -> (false, toks) in
1356 let rec find toks =
1357 let (_, toks) = loop toks in
1358 match toks with
1359 | [] -> []
1360 | hd :: tl ->
1361 hd :: find tl in
1362 find toks
34e49164 1363
34e49164
C
1364(*****************************************************************************)
1365(* Final unparsing (and debugging support) *)
1366(*****************************************************************************)
1367
1368(* for debugging *)
1369type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
1370
1371let kind_of_token2 = function
8babbc8f 1372 | Fake2 _ -> KFake
34e49164
C
1373 | Cocci2 _ -> KCocci
1374 | C2 _ -> KC
c491d8ee 1375 | Comma _ -> KC
abad11c5
C
1376 | T2 (t,_,_,_) ->
1377 (match TH.pinfo_of_tok t with
1378 | Ast_c.ExpandedTok _ -> KExpanded
1379 | Ast_c.OriginTok _ -> KOrigin
1380 | Ast_c.FakeTok _ -> raise (Impossible 139) (* now a Fake2 *)
1381 | Ast_c.AbstractLineTok _ -> raise (Impossible 140) (* now a KC *)
1382 )
1383 | Unindent_cocci2 _ | Indent_cocci2 | EatSpace2 -> raise (Impossible 141)
ae4735db 1384
34e49164
C
1385let end_mark = "!"
1386
1387let start_mark = function
1388 | KFake -> "!F!"
ae4735db 1389 | KCocci -> "!S!"
34e49164
C
1390 | KC -> "!A!"
1391 | KExpanded -> "!E!"
1392 | KOrigin -> ""
1393
1394let print_all_tokens2 pr xs =
1395 if !Flag_parsing_c.debug_unparsing
1396 then
1397 let current_kind = ref KOrigin in
ae4735db 1398 xs +> List.iter (fun t ->
34e49164 1399 let newkind = kind_of_token2 t in
b1b2de81 1400 if newkind =*= !current_kind
34e49164 1401 then pr (str_of_token2 t)
abad11c5
C
1402 else
1403 begin
1404 pr (end_mark);
1405 pr (start_mark newkind);
1406 pr (str_of_token2 t);
1407 current_kind := newkind
1408 end
34e49164 1409 );
ae4735db 1410 else
abad11c5
C
1411 let to_whitespace s =
1412 let r = String.copy s in
1413 for i = 1 to String.length r do
1414 let c = String.get r (i-1) in
1415 match c with
1416 | ' ' | '\t' | '\r' | '\n' -> ()
1417 | _ -> String.set r (i-1) ' '
1418 done;
1419 r in
1420 let hiding_level = ref 0 in
1421 let handle_token t =
1422 let s = str_of_token2 t in
1423 let hide_current =
1424 match t with
1425 | T2 (t,_,_,_) ->
1426 let i = TH.info_of_tok t in
1427 (match Ast_c.get_annot_info i Token_annot.Exclude_start with
1428 | None -> ()
1429 | Some _ -> hiding_level := !hiding_level + 1
1430 );
1431 let hide_current = !hiding_level > 0 in
1432 (match Ast_c.get_annot_info i Token_annot.Exclude_end with
1433 | None -> ()
1434 | Some _ -> hiding_level := max (!hiding_level - 1) 0
1435 );
1436 hide_current
1437 | _ -> !hiding_level > 0 in
1438 if hide_current then to_whitespace s else s in
1439 xs +> List.iter (fun x -> pr (handle_token x))
1440
ae4735db 1441
34e49164
C
1442
1443
1444
1445(*****************************************************************************)
1446(* Entry points *)
1447(*****************************************************************************)
1448
1449(* old: PPviatok was made in the beginning to allow to pretty print a
1450 * complete C file, including a modified C file by transformation.ml,
1451 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1452 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1453 * fancy stuff when a function was not modified at all. Just need to
1454 * print the list of token as-is. But now pretty_print_c.ml handles
1455 * almost everything so maybe less useful. Maybe PPviatok allows to
ae4735db
C
1456 * optimize a little the pretty printing.
1457 *
34e49164
C
1458 * update: now have PPviastr which goes even faster than PPviatok, so
1459 * PPviatok has disappeared.
1460 *)
1461
1462type ppmethod = PPnormal | PPviastr
1463
1464
1465
1466
1467(* The pp_program function will call pretty_print_c.ml with a special
ae4735db 1468 * function to print the leaf components, the tokens. When we want to
34e49164 1469 * print a token, we need to print also maybe the space and comments that
ae4735db 1470 * were close to it in the original file (and that was omitted during the
34e49164
C
1471 * parsing phase), and honor what the cocci-info attached to the token says.
1472 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
ae4735db 1473 * print it and also print some cocci-code attached in a PLUS to it.
34e49164
C
1474 * So we will also maybe call unparse_cocci. Because the cocci-code may
1475 * contain metavariables, unparse_cocci will in fact sometimes call back
1476 * pretty_print_c (which will this time don't call back again unparse_cocci)
1477 *)
1478
ae4735db 1479let pp_program2 xs outfile =
abad11c5 1480 with_open_outfile outfile (fun (pr,chan) ->
ae4735db
C
1481 let pr s =
1482 if !Flag_parsing_c.debug_unparsing
34e49164 1483 then begin pr2_no_nl s; flush stderr end
ae4735db 1484 else pr s
abad11c5
C
1485(* flush chan; *)
1486(* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
34e49164 1487 in
ae4735db
C
1488
1489 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
34e49164
C
1490 (* here can still work on ast *)
1491 let e = remove_useless_fakeInfo_struct e in
ae4735db 1492
34e49164 1493 match ppmethod with
002099fc 1494 | PPnormal ->
abad11c5
C
1495 (* now work on tokens *)
1496 (* phase1: just get all the tokens, all the information *)
1497 assert(toks_e +> List.for_all (fun t ->
1498 TH.is_origin t or TH.is_expanded t
1499 ));
1500 let toks = get_fakeInfo_and_tokens e toks_e in
1501 let toks = displace_fake_nodes toks in
1502 (* assert Origin;ExpandedTok;Faketok *)
1503 let toks = expand_mcode toks in
1504
1505 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1506 * and no tag information, just NOTHING. *)
1507
1508 let toks =
1509 if !Flag.sgrep_mode2
1510 then
1511 (* nothing else to do for sgrep *)
1512 drop_expanded(drop_fake(drop_minus toks))
1513 else
1514 begin
fc1ad971 1515 (* phase2: can now start to filter and adjust *)
abad11c5
C
1516 let (toks,tu) = adjust_indentation toks in
1517 let toks = adjust_eat_space toks in
1518 let toks = adjust_before_semicolon toks in(*before remove minus*)
1519 let toks = adjust_after_paren toks in(*also before remove minus*)
1520 let toks = drop_space_at_endline toks in
1521 let toks = paren_to_space toks in
1522 let toks = drop_end_comma toks in
1b9ae606
C
1523
1524 let toks = drop_line toks in
1525
abad11c5 1526 let toks = remove_minus_and_between_and_expanded_and_fake toks in
fc1ad971 1527 (* assert Origin + Cocci + C and no minus *)
abad11c5
C
1528 let toks = add_space toks in
1529 let toks = add_newlines toks tu in
1530 let toks = paren_then_brace toks in
fc1ad971 1531 let toks = fix_tokens toks in
abad11c5
C
1532 toks
1533 end in
34e49164 1534
abad11c5
C
1535 (* in theory here could reparse and rework the ast! or
1536 * apply some SP. Not before cos julia may have generated
1537 * not parsable file. Need do unparsing_tricks call before
1538 * being ready to reparse. *)
1539 print_all_tokens2 pr toks;
34e49164
C
1540
1541 | PPviastr -> pr str
1542 )
1543 )
1544
ae4735db 1545let pp_program a b =
abad11c5 1546 profile_code "C unparsing" (fun () -> pp_program2 a b)
34e49164 1547
485bce71 1548
ae4735db 1549let pp_program_default xs outfile =
485bce71
C
1550 let xs' = xs +> List.map (fun x -> x, PPnormal) in
1551 pp_program xs' outfile