coccinelle release 0.2.5
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
CommitLineData
0708f913 1(* Yoann Padioleau, Julia Lawall
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913 4 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
34e49164
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
34e49164
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
ae4735db
C
14 *
15 *
485bce71 16 * Modifications by Julia Lawall for better newline handling.
34e49164
C
17 *)
18open Common
19
20open Ast_c
21
22module TH = Token_helpers
23
24
b1b2de81
C
25(* should keep comments and directives in between adjacent deleted terms,
26but not comments and directives within deleted terms. should use the
27labels found in the control-flow graph *)
28
34e49164
C
29
30
708f4980
C
31(*****************************************************************************)
32(* Wrappers *)
33(*****************************************************************************)
34let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
35
34e49164
C
36(*****************************************************************************)
37(* Types used during the intermediate phases of the unparsing *)
38(*****************************************************************************)
39
ae4735db 40type token1 =
34e49164
C
41 | Fake1 of info
42 | T1 of Parser_c.token
43
44(* The cocci_tag of the token should always be a NOTHING. The mark of
45 * the token can only be OriginTok or ExpandedTok. Why not get rid of
46 * token and get something simpler ? because we need to know if the
47 * info is a TCommentCpp or TCommentSpace, etc for some of the further
48 * analysis so easier to keep with the token.
ae4735db 49 *
34e49164
C
50 * This type contains the whole information. Have all the tokens with this
51 * type.
52 *)
708f4980 53type min =
5626f154
C
54 Min of (int list (* match numbers from witness trees *) *
55 int (* adjacency information *))
708f4980
C
56 | Ctx
57
ae4735db
C
58type token2 =
59 | T2 of Parser_c.token * min *
34e49164
C
60 int option (* orig index, abstracting away comments and space *)
61 | Fake2
708f4980 62 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
5636bb2c 63 * Unparse_cocci.nlhint option
34e49164 64 | C2 of string
c491d8ee 65 | Comma of string
34e49164 66 | Indent_cocci2
c3e37e97 67 | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
34e49164
C
68
69(* not used yet *)
ae4735db 70type token3 =
34e49164
C
71 | T3 of Parser_c.token
72 | Cocci3 of string
73 | C3 of string
74
75
76(* similar to the tech in parsing_hack *)
77type token_extended = {
78 tok2 : token2;
79 str : string;
80 idx: int option; (* to know if 2 tokens were consecutive in orig file *)
81 mutable new_tokens_before : token2 list;
82 mutable remove : bool;
83}
84
85
86(*****************************************************************************)
87(* Helpers *)
88(*****************************************************************************)
89
ae4735db 90let info_of_token1 t =
34e49164
C
91 match t with
92 | Fake1 info -> info
93 | T1 tok -> TH.info_of_tok tok
94
002099fc
C
95let print_token1 = function
96 T1 tok -> TH.str_of_tok tok
97 | Fake1 info -> "fake"
98
34e49164
C
99let str_of_token2 = function
100 | T2 (t,_,_) -> TH.str_of_tok t
101 | Fake2 -> ""
5636bb2c 102 | Cocci2 (s,_,_,_,_) -> s
34e49164 103 | C2 s -> s
c491d8ee 104 | Comma s -> s
34e49164 105 | Indent_cocci2 -> ""
c3e37e97 106 | Unindent_cocci2 _ -> ""
34e49164
C
107
108let print_token2 = function
708f4980
C
109 | T2 (t,b,_) ->
110 let b_str =
111 match b with
112 Min (index,adj) ->
113 Printf.sprintf "-%d[%s]" adj
114 (String.concat " " (List.map string_of_int index))
115 | Ctx -> "" in
116 "T2:"^b_str^TH.str_of_tok t
fc1ad971 117 | Fake2 -> "fake"
5636bb2c 118 | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
34e49164 119 | C2 s -> "C2:"^s
c491d8ee 120 | Comma s -> "Comma:"^s
34e49164 121 | Indent_cocci2 -> "Indent"
c3e37e97 122 | Unindent_cocci2 _ -> "Unindent"
34e49164 123
002099fc 124let simple_print_all_tokens1 l =
90aeb998 125 List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l;
002099fc
C
126 Printf.printf "\n"
127
708f4980 128let simple_print_all_tokens2 l =
fc1ad971 129 List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
34e49164
C
130 Printf.printf "\n"
131
132let str_of_token3 = function
133 | T3 t -> TH.str_of_tok t
134 | Cocci3 s | C3 s -> s
135
136
137
ae4735db
C
138let mk_token_extended x =
139 let origidx =
34e49164 140 match x with
ae4735db 141 | T2 (_,_, idx) -> idx
34e49164
C
142 | _ -> None
143 in
ae4735db 144 { tok2 = x;
34e49164
C
145 str = str_of_token2 x;
146 idx = origidx;
147 new_tokens_before = [];
148 remove = false;
149 }
150
ae4735db 151let rebuild_tokens_extented toks_ext =
34e49164 152 let _tokens = ref [] in
ae4735db 153 toks_ext +> List.iter (fun tok ->
34e49164
C
154 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
155 if not tok.remove then push2 tok.tok2 _tokens;
156 );
157 let tokens = List.rev !_tokens in
158 (tokens +> List.map mk_token_extended)
159
160
161let mcode_contain_plus = function
162 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
163 | Ast_cocci.CONTEXT _ -> true
485bce71 164(* patch: when need full coccinelle transformation *)
708f4980
C
165 | Ast_cocci.MINUS (_,_,_,[]) -> false
166 | Ast_cocci.MINUS (_,_,_,x::xs) -> true
951c7801 167 | Ast_cocci.PLUS _ -> raise Impossible
34e49164 168
ae4735db 169let contain_plus info =
34e49164
C
170 let mck = Ast_c.mcode_of_info info in
171 mcode_contain_plus mck
172
173(*****************************************************************************)
174(* Last fix on the ast *)
175(*****************************************************************************)
176
ae4735db 177(* Because of the ugly trick to handle initialiser, I generate fake ','
34e49164
C
178 * for the last initializer element, but if there is nothing around it,
179 * I don't want in the end to print it.
180 *)
181
ae4735db 182let remove_useless_fakeInfo_struct program =
34e49164 183 let bigf = { Visitor_c.default_visitor_c_s with
ae4735db 184 Visitor_c.kini_s = (fun (k,bigf) ini ->
34e49164 185 match k ini with
113803cf 186 | InitList args, ii ->
34e49164
C
187 (match ii with
188 | [_i1;_i2] -> ini
113803cf 189 | [i1;i2;iicommaopt] ->
34e49164
C
190 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
191 && (Ast_c.is_fake iicommaopt)
192 (* sometimes the guy put a normal iicommaopt *)
34e49164
C
193 then InitList args, [i1;i2]
194 else InitList args, [i1;i2;iicommaopt]
ae4735db 195 | [i1;i2;iicommaopt;end_comma_opt] ->
113803cf
C
196 (* only in #define. end_comma_opt canot be fake *)
197 (* not sure if this will be considered ambiguous with a previous
198 case? *)
199 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
200 && (Ast_c.is_fake iicommaopt)
201 (* sometimes the guy put a normal iicommaopt *)
202 then InitList args, [i1;i2;end_comma_opt]
203 else InitList args, [i1;i2;iicommaopt;end_comma_opt]
34e49164
C
204 | _ -> raise Impossible
205 )
206 | x -> x
207 )
208 } in
209 Visitor_c.vk_toplevel_s bigf program
210
211
212(*****************************************************************************)
213(* Tokens1 generation *)
214(*****************************************************************************)
215
ae4735db
C
216let get_fakeInfo_and_tokens celem toks =
217 let toks_in = ref toks in
34e49164
C
218 let toks_out = ref [] in
219
220 (* todo? verify good order of position ? *)
ae4735db 221 let pr_elem info =
34e49164 222 match Ast_c.pinfo_of_info info with
ae4735db 223 | FakeTok _ ->
34e49164 224 Common.push2 (Fake1 info) toks_out
ae4735db 225 | OriginTok _ | ExpandedTok _ ->
34e49164 226 (* get the associated comments/space/cppcomment tokens *)
978fd7e5 227 let (before, x, after) =
ae4735db 228 !toks_in +> Common.split_when (fun tok ->
34e49164
C
229 info =*= TH.info_of_tok tok)
230 in
b1b2de81 231 assert(info =*= TH.info_of_tok x);
34e49164 232 (*old: assert(before +> List.for_all (TH.is_comment)); *)
ae4735db 233 before +> List.iter (fun x ->
34e49164 234 if not (TH.is_comment x)
0708f913 235 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
34e49164
C
236 (* case such as int asm d3("x"); not yet in ast *)
237 );
238 before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
239 push2 (T1 x) toks_out;
240 toks_in := after;
ae4735db 241 | AbstractLineTok _ ->
34e49164
C
242 (* can be called on type info when for instance use -type_c *)
243 if !Flag_parsing_c.pretty_print_type_info
244 then Common.push2 (Fake1 info) toks_out
ae4735db 245 else raise Impossible (* at this stage *)
34e49164
C
246 in
247
248 let pr_space _ = () in (* use the spacing that is there already *)
249
250 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
251
252 if not (null !toks_in)
0708f913 253 then failwith "WEIRD: unparsing not finished";
34e49164
C
254
255 List.rev !toks_out
256
257(* Fake nodes that have BEFORE code should be moved over any subsequent
258whitespace and newlines, but not any comments, to get as close to the affected
002099fc 259code as possible. Similarly, fake nodes that have AFTER code should be moved
34e49164
C
260backwards. No fake nodes should have both before and after code. *)
261
262let displace_fake_nodes toks =
263 let is_fake = function Fake1 _ -> true | _ -> false in
264 let is_whitespace = function
265 T1(Parser_c.TCommentSpace _)
485bce71 266 (* patch: cocci *)
34e49164
C
267 | T1(Parser_c.TCommentNewline _) -> true
268 | _ -> false in
269 let rec loop toks =
270 let fake_info =
271 try Some (Common.split_when is_fake toks)
272 with Not_found -> None in
273 match fake_info with
274 Some(bef,((Fake1 info) as fake),aft) ->
275 (match !(info.cocci_tag) with
ae4735db 276 | Some x ->
708f4980 277 (match x with
002099fc 278 (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
34e49164 279 (* move the fake node forwards *)
002099fc
C
280 let (whitespace,rest) = Common.span is_whitespace aft in
281 bef @ whitespace @ fake :: (loop rest)
282 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
34e49164 283 (* move the fake node backwards *)
002099fc
C
284 let revbef = List.rev bef in
285 let (revwhitespace,revprev) = Common.span is_whitespace revbef in
286 let whitespace = List.rev revwhitespace in
287 let prev = List.rev revprev in
288 prev @ fake :: (loop (whitespace @ aft))
289 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
290 bef @ fake :: (loop aft)
291 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
292 failwith "fake node should not be before-after"
293 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
485bce71 294 )
ae4735db 295 | None ->
708f4980
C
296 bef @ fake :: (loop aft)
297 )
34e49164
C
298 | None -> toks
299 | _ -> raise Impossible in
300 loop toks
301
302(*****************************************************************************)
303(* Tokens2 generation *)
304(*****************************************************************************)
305
b1b2de81 306let comment2t2 = function
5636bb2c
C
307 (Token_c.TCommentCpp
308 (* not sure iif the following list is exhaustive or complete *)
309 (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
310 (info : Token_c.info)) ->
311 C2(info.Common.str)
312 | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
b1b2de81
C
313 C2("\n"^info.Common.str^"\n")
314 | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
315
ae4735db 316let expand_mcode toks =
34e49164
C
317 let toks_out = ref [] in
318
319 let index = ref 0 in
320
ae4735db 321 let add_elem t minus =
34e49164 322 match t with
ae4735db 323 | Fake1 info ->
34e49164 324 let str = Ast_c.str_of_info info in
b1b2de81 325 if str =$= ""
34e49164 326 then push2 (Fake2) toks_out
c491d8ee
C
327 (* fx the fake "," at the end of a structure or enum.
328 no idea what other fake info there can be... *)
329 else push2 (Comma str) toks_out
ae4735db
C
330
331
002099fc 332 | T1 tok ->
b1b2de81 333 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
34e49164 334 (* no tag on expandedTok ! *)
fc1ad971
C
335 let modified = function
336 None -> false
337 | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
338 | _ -> true in
339 (if (TH.is_expanded tok &&
340 modified !((TH.info_of_tok tok).cocci_tag)
341 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
0708f913
C
342 then
343 failwith
344 (Printf.sprintf
345 "expanded token %s on line %d is either modified or stored in a metavariable"
346 (TH.str_of_tok tok) (TH.line_of_tok tok)));
34e49164 347
ae4735db 348 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
34e49164
C
349 { i with cocci_tag = ref Ast_c.emptyAnnot; }
350 ) in
351
ae4735db 352 let optindex =
34e49164
C
353 if TH.is_origin tok && not (TH.is_real_comment tok)
354 then begin
355 incr index;
356 Some !index
357 end
358 else None
359 in
360
361 push2 (T2 (tok', minus, optindex)) toks_out
362 in
363
ae4735db
C
364 let expand_info t =
365 let (mcode,env) =
708f4980 366 Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
34e49164 367
5636bb2c
C
368 let pr_cocci s ln col rcol hint =
369 push2 (Cocci2(s,ln,col,rcol,hint)) toks_out in
ae4735db 370 let pr_c info =
b1b2de81
C
371 (match Ast_c.pinfo_of_info info with
372 Ast_c.AbstractLineTok _ ->
373 push2 (C2 (Ast_c.str_of_info info)) toks_out
374 | Ast_c.FakeTok (s,_) ->
375 push2 (C2 s) toks_out
34e49164
C
376 | _ ->
377 Printf.printf "line: %s\n" (Common.dump info);
b1b2de81
C
378 failwith "not an abstract line");
379 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
380 List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
381
708f4980 382 let pr_barrier ln col = (* marks a position, used around C code *)
5636bb2c 383 push2 (Cocci2("",ln,col,col,None)) toks_out in
708f4980 384 let pr_nobarrier ln col = () in (* not needed for linux spacing *)
b1b2de81 385
708f4980 386 let pr_cspace _ = push2 (C2 " ") toks_out in
34e49164 387
708f4980
C
388 let pr_space _ = () (* rely on add_space in cocci code *) in
389 let pr_arity _ = () (* not interested *) in
34e49164
C
390
391 let indent _ = push2 Indent_cocci2 toks_out in
c3e37e97 392 let unindent x = push2 (Unindent_cocci2 x) toks_out in
34e49164 393
708f4980
C
394 let args_pp =
395 (env, pr_cocci, pr_c, pr_cspace,
396 (match !Flag_parsing_c.spacing with
397 Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
398 pr_arity,
399 (match !Flag_parsing_c.spacing with
400 Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
401 indent, unindent) in
34e49164 402
ae4735db
C
403 (* old: when for yacfe with partial cocci:
404 * add_elem t false;
485bce71 405 *)
34e49164 406
485bce71 407 (* patch: when need full coccinelle transformation *)
faf9a90c 408 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
34e49164 409 match mcode with
ae4735db 410 | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
34e49164 411 (* Why adding ? because I want to have all the information, the whole
ae4735db 412 * set of tokens, so I can then process and remove the
34e49164 413 * is_between_two_minus for instance *)
708f4980 414 add_elem t (Min (inst,adj));
faf9a90c 415 unparser any_xxs Unparse_cocci.InPlace
ae4735db 416 | Ast_cocci.CONTEXT (_,any_befaft) ->
34e49164 417 (match any_befaft with
ae4735db 418 | Ast_cocci.NOTHING ->
708f4980 419 add_elem t Ctx
951c7801 420 | Ast_cocci.BEFORE (xxs,_) ->
faf9a90c 421 unparser xxs Unparse_cocci.Before;
708f4980 422 add_elem t Ctx
ae4735db 423 | Ast_cocci.AFTER (xxs,_) ->
708f4980 424 add_elem t Ctx;
faf9a90c 425 unparser xxs Unparse_cocci.After;
ae4735db 426 | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
faf9a90c 427 unparser xxs Unparse_cocci.Before;
708f4980 428 add_elem t Ctx;
faf9a90c 429 unparser yys Unparse_cocci.After;
34e49164 430 )
951c7801 431 | Ast_cocci.PLUS _ -> raise Impossible
34e49164
C
432
433 in
434
435 toks +> List.iter expand_info;
436 List.rev !toks_out
ae4735db 437
34e49164
C
438
439(*****************************************************************************)
440(* Tokens2 processing, filtering, adjusting *)
441(*****************************************************************************)
442
002099fc 443let is_space = function
fc1ad971 444 | T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *)
ae4735db 445 | _ -> false
fc1ad971
C
446
447let is_newline = function
448 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
449 | _ -> false
450
451let is_whitespace = function
ae4735db 452 | (T2 (t,_b,_i)) ->
002099fc
C
453 (match t with
454 | Parser_c.TCommentSpace _ -> true (* only whitespace *)
fc1ad971 455 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
002099fc
C
456 | _ -> false
457 )
ae4735db 458 | _ -> false
002099fc 459
34e49164 460let is_minusable_comment = function
ae4735db 461 | (T2 (t,_b,_i)) ->
34e49164
C
462 (match t with
463 | Parser_c.TCommentSpace _ (* only whitespace *)
ae4735db 464 (* patch: coccinelle *)
002099fc 465 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
7f004419 466 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
002099fc
C
467 | Parser_c.TComment _
468 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
b1b2de81
C
469 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
470 | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
34e49164
C
471 -> true
472
b1b2de81 473 | Parser_c.TCommentMisc _
0708f913 474 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
34e49164
C
475 -> false
476
477 | _ -> false
478 )
ae4735db 479 | _ -> false
34e49164 480
fc1ad971 481let is_minusable_comment_nocpp = function
ae4735db 482 | (T2 (t,_b,_i)) ->
fc1ad971
C
483 (match t with
484 | Parser_c.TCommentSpace _ (* only whitespace *)
ae4735db 485 (* patch: coccinelle *)
fc1ad971 486 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
7f004419 487 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
fc1ad971
C
488 | Parser_c.TComment _ -> true
489 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
490 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
491 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
492 -> false
493
494 | Parser_c.TCommentMisc _
495 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
496 -> false
497
498 | _ -> false
499 )
ae4735db 500 | _ -> false
fc1ad971 501
34e49164 502let all_coccis = function
c491d8ee 503 Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ -> true
34e49164
C
504 | _ -> false
505
b1b2de81
C
506(*previously gave up if the first character was a newline, but not clear why*)
507let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
34e49164 508
708f4980 509let set_minus_comment adj = function
ae4735db 510 | T2 (t,Ctx,idx) ->
34e49164
C
511 let str = TH.str_of_tok t in
512 (match t with
513 | Parser_c.TCommentSpace _
ae4735db 514(* patch: coccinelle *)
34e49164
C
515 | Parser_c.TCommentNewline _ -> ()
516
ae4735db
C
517 | Parser_c.TComment _
518 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
b1b2de81
C
519 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
520 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
ae4735db 521 ->
b1b2de81
C
522 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
523 (TH.line_of_tok t) str)
34e49164
C
524 | _ -> raise Impossible
525 );
708f4980 526 T2 (t, Min adj, idx)
ae4735db 527(* patch: coccinelle *)
fc1ad971 528 | T2 (t,Min adj,idx) as x -> x
34e49164
C
529 | _ -> raise Impossible
530
708f4980 531let set_minus_comment_or_plus adj = function
c491d8ee 532 Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x
708f4980 533 | x -> set_minus_comment adj x
34e49164 534
fc1ad971
C
535let drop_minus xs =
536 xs +> Common.exclude (function
537 | T2 (t,Min adj,_) -> true
538 | _ -> false
539 )
540
5626f154
C
541let drop_expanded_and_fake xs =
542 xs +> Common.exclude (function
34e49164
C
543 | T2 (t,_,_) when TH.is_expanded t -> true
544 | Fake2 -> true
34e49164
C
545 | _ -> false
546 )
5626f154
C
547
548let remove_minus_and_between_and_expanded_and_fake xs =
549
550 (* get rid of expanded and fake tok *)
551 let xs = drop_expanded_and_fake xs in
34e49164 552
113803cf 553 let minus_or_comment = function
708f4980 554 T2(_,Min adj,_) -> true
113803cf
C
555 | x -> is_minusable_comment x in
556
fc1ad971
C
557 let minus_or_comment_nocpp = function
558 T2(_,Min adj,_) -> true
559 | x -> is_minusable_comment_nocpp x in
560
561 let common_adj (index1,adj1) (index2,adj2) =
562 adj1 = adj2 (* same adjacency info *) &&
563 (* non-empty intersection of witness trees *)
564 not ((Common.inter_set index1 index2) = []) in
565
fc1ad971
C
566 (* new idea: collects regions not containing non-space context code
567 if two adjacent adjacent minus tokens satisfy common_adj then delete
568 all spaces, comments etc between them
569 if two adjacent minus tokens do not satisfy common_adj only delete
570 the spaces between them if there are no comments, etc.
571 if the region contain no plus code and is both preceded and followed
572 by a newline, delete the initial newline. *)
573
574 let rec adjust_around_minus = function
34e49164 575 [] -> []
708f4980 576 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
fc1ad971
C
577 (T2(_,Min adj1,_) as t1)::xs ->
578 let (minus_list,rest) = Common.span not_context (t1::xs) in
579 let contains_plus = List.exists is_plus minus_list in
580 let x =
581 match List.rev minus_list with
582 (T2(Parser_c.TCommentNewline c,_b,_i))::rest
583 when List.for_all minus_or_comment minus_list ->
584 set_minus_comment_or_plus adj1 x
585 | _ -> x in
586 x :: adjust_within_minus contains_plus minus_list @
587 adjust_around_minus rest
588 | (T2(_,Min adj1,_) as t1)::xs ->
589 let (minus_list,rest) = Common.span not_context (t1::xs) in
590 let contains_plus = List.exists is_plus minus_list in
591 adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
c491d8ee
C
592 | x::xs ->
593 x :: adjust_around_minus xs
fc1ad971
C
594 and adjust_within_minus cp (* contains plus *) = function
595 (T2(_,Min adj1,_) as t1)::xs ->
596 let not_minus = function T2(_,Min _,_) -> false | _ -> true in
597 let (not_minus_list,rest) = Common.span not_minus xs in
598 t1 ::
34e49164 599 (match rest with
fc1ad971
C
600 (T2(_,Min adj2,_) as t2)::xs when common_adj adj1 adj2 ->
601 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
602 @ (adjust_within_minus cp (t2::xs))
603 | (T2(_,Min adj2,_) as t2)::xs ->
5626f154 604 if not cp && List.for_all is_whitespace not_minus_list
fc1ad971
C
605 then
606 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
607 @ (adjust_within_minus cp (t2::xs))
5626f154
C
608 else
609 not_minus_list @ (adjust_within_minus cp (t2::xs))
fc1ad971
C
610 | _ ->
611 if cp
612 then xs
613 else
614 let (spaces,rest) = Common.span is_space xs in
615 (List.map (set_minus_comment_or_plus adj1) spaces)
616 @ rest)
617 | xs -> failwith "should always start with minus"
618 and not_context = function
619 (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
620 | _ -> true
621 and is_plus = function
c491d8ee 622 C2 _ | Comma _ | Cocci2 _ -> true
fc1ad971 623 | _ -> false in
34e49164 624
fc1ad971 625 let xs = adjust_around_minus xs in
34e49164 626
113803cf 627 (* this drops blank lines after a brace introduced by removing code *)
fc1ad971
C
628 let minus_or_comment_nonl = function
629 T2(_,Min adj,_) -> true
630 | T2(Parser_c.TCommentNewline _,_b,_i) -> false
631 | x -> is_minusable_comment x in
632
113803cf
C
633 let rec adjust_after_brace = function
634 [] -> []
708f4980 635 | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
b1b2de81 636 when str_of_token2 x =$= "{" ->
fc1ad971 637 let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
113803cf
C
638 let is_whitespace = function
639 T2(Parser_c.TCommentSpace _,_b,_i)
640 (* patch: cocci *)
641 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
642 | _ -> false in
643 let (newlines,rest) = Common.span is_whitespace rest in
644 let (drop_newlines,last_newline) =
645 let rec loop = function
646 [] -> ([],[])
647 | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
648 (List.rev rest,[x])
649 | x::xs ->
650 let (drop_newlines,last_newline) = loop xs in
651 (drop_newlines,x::last_newline) in
652 loop (List.rev newlines) in
708f4980 653 x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
113803cf
C
654 last_newline@
655 adjust_after_brace rest
656 | x::xs -> x::adjust_after_brace xs in
657
658 let xs = adjust_after_brace xs in
659
fc1ad971
C
660 (* search backwards from context } over spaces until reaching a newline.
661 then go back over all minus code until reaching some context or + code.
662 get rid of all intervening spaces, newlines, and comments
663 input is reversed *)
664 let rec adjust_before_brace = function
665 [] -> []
666 | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
667 let (outer_spaces,rest) = Common.span is_space xs in
668 x :: outer_spaces @
669 (match rest with
670 ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
671 (* the rest of this code is the same as from_newline below
672 but merging them seems to be error prone... *)
673 ((T2 (t, Min adj, idx)) as m) :: rest ->
674 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
675 h :: m ::
676 (List.map (set_minus_comment adj) spaces) @
677 (adjust_before_brace rest)
678 | _ -> adjust_before_brace rest)
679 | x::xs -> x :: (adjust_before_brace xs) in
680
681 let from_newline = function
682 ((T2 (t, Min adj, idx)) as m) :: rest ->
683 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
684 m ::
685 (List.map (set_minus_comment adj) spaces) @
686 (adjust_before_brace rest)
c491d8ee
C
687 | ((T2 (t0, Ctx, idx0)) as m0) :: ((T2 (t, Min adj, idx)) as m) :: rest
688 when TH.str_of_tok t0 = "" ->
689 (* This is for the case of a #define that is completely deleted,
690 because a #define has a strange EOL token at the end.
691 We hope there i no other kind of token that is represented by
692 "", but it seems like changing the kind of token might break
693 the end of entity recognition in the C parser.
694 See parsing_hacks.ml *)
695 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
696 m0 :: m ::
697 (List.map (set_minus_comment adj) spaces) @
698 (adjust_before_brace rest)
fc1ad971
C
699 | rest -> adjust_before_brace rest in
700
701 let xs = List.rev (from_newline (List.rev xs)) in
702 let xs = drop_minus xs in
34e49164
C
703 xs
704
faf9a90c
C
705(* normally, in C code, a semicolon is not preceded by a space or newline *)
706let adjust_before_semicolon toks =
707 let toks = List.rev toks in
002099fc 708 let rec search_semic = function
faf9a90c 709 [] -> []
002099fc 710 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
faf9a90c 711 if List.mem (str_of_token2 x) [";";")";","]
002099fc
C
712 then x :: search_minus false xs
713 else x :: search_semic xs
714 | x::xs -> x :: search_semic xs
715 and search_minus seen_minus xs =
716 let (spaces, rest) = Common.span is_space xs in
717 (* only delete spaces if something is actually deleted *)
718 match rest with
719 ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
720 | _ -> if seen_minus then rest else xs in
721 List.rev (search_semic toks)
34e49164
C
722
723let is_ident_like s = s ==~ Common.regexp_alpha
724
aba5c457
C
725let rec drop_space_at_endline = function
726 [] -> []
727 | [x] -> [x]
190f1acf
C
728 | (C2 " ") ::
729 ((((T2(Parser_c.TCommentSpace _,Ctx,_i)) |
730 (T2(Parser_c.TCommentNewline _,Ctx,_i))) :: _) as rest) ->
731 (* when unparse_cocci doesn't know whether space is needed *)
732 drop_space_at_endline rest
aba5c457
C
733 | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest ->
734 let (outer_spaces,rest) = Common.span is_space rest in
735 let minus_or_comment_or_space_nocpp = function
736 T2(_,Min adj,_) -> true
737 | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true
18b1275a
C
738 | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false
739 | x -> false in
aba5c457 740 let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in
18b1275a
C
741 let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
742 if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus
743 then
744 match rest with
745 ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest ->
746 (* drop trailing spaces *)
747 minus@a::(drop_space_at_endline rest)
748 | _ -> fail()
749 else fail()
aba5c457
C
750 | a :: rest -> a :: drop_space_at_endline rest
751
752(* if a removed ( is between two tokens, then add a space *)
753let rec paren_to_space = function
754 [] -> []
755 | [x] -> [x]
756 | [x;y] -> [x;y]
757 | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest
758 when not (is_whitespace a) && TH.str_of_tok t = "(" ->
aba5c457
C
759 a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
760 | a :: rest -> a :: (paren_to_space rest)
761
ae4735db 762let rec add_space xs =
34e49164
C
763 match xs with
764 | [] -> []
765 | [x] -> [x]
5636bb2c 766 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
708f4980
C
767 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
768 not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
769 (* this only works within a line. could consider whether
770 something should be done to add newlines too, rather than
771 printing them explicitly in unparse_cocci. *)
772 x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
1eddfd50
C
773 | ((T2(_,Ctx,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *)
774 let sx = str_of_token2 x in
775 let sy = str_of_token2 y in
776 if is_ident_like sx && (is_ident_like sy or List.mem sy ["="])
777 then x::C2 " "::(add_space (y::xs))
778 else x::(add_space (y::xs))
779 | x::y::xs -> (* not boundary, not sure if it is possible *)
34e49164
C
780 let sx = str_of_token2 x in
781 let sy = str_of_token2 y in
faf9a90c 782 if is_ident_like sx && is_ident_like sy
34e49164
C
783 then x::C2 " "::(add_space (y::xs))
784 else x::(add_space (y::xs))
785
c491d8ee
C
786(* A fake comma is added at the end of an unordered initlist or a enum
787decl, if the initlist or enum doesn't already end in a comma. This is only
788needed if there is + code, ie if we see Cocci after it in the code sequence *)
789
790let rec drop_end_comma = function
791 [] -> []
792 | [x] -> [x]
793 | ((Comma ",") as x) :: rest ->
794 let (newlines,rest2) = Common.span is_whitespace rest in
795 (match rest2 with
796 (Cocci2 _) :: _ -> x :: drop_end_comma rest
797 | _ -> drop_end_comma rest)
798 | x :: xs -> x :: drop_end_comma xs
799
5636bb2c
C
800(* The following only works for the outermost function call. Stack records
801the column of all open parentheses. Space_cell contains the most recent
802comma in the outermost function call. The goal is to decide whether this
803should be followed by a space or a newline and indent. *)
804let add_newlines toks tabbing_unit =
805 let create_indent n =
806 let (tu,tlen) =
807 match tabbing_unit with
808 Some ("\t",_) -> ("\t",8)
809 | Some ("",_) -> ("\t",8) (* not sure why... *)
810 | Some (s,_) -> (s,String.length s) (* assuming only spaces *)
811 | None -> ("\t",8) in
812 let rec loop seen =
813 if seen + tlen <= n
814 then tu ^ loop (seen + tlen)
815 else String.make (n-seen) ' ' in
816 loop 0 in
817 let check_for_newline count x = function
818 Some (start,space_cell) when count > Flag_parsing_c.max_width ->
819 space_cell := "\n"^(create_indent x);
820 Some (x + (count - start))
821 | _ -> None in
822 (* the following is for strings that may contain newline *)
823 let string_length s count =
824 let l = list_of_string s in
825 List.fold_left
826 (function count ->
827 function
828 '\t' -> count + 8
829 | '\n' -> 0
830 | c -> count + 1)
831 count l in
832 let rec loop info count = function
833 [] -> []
834 | ((T2(tok,_,_)) as a)::xs ->
835 a :: loop info (string_length (TH.str_of_tok tok) count) xs
836 | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
837 let (stack,space_cell) = info in
838 let rest =
839 match hint with
f537ebc4 840 None -> loop info (string_length s count) xs
5636bb2c 841 | Some Unparse_cocci.StartBox ->
f537ebc4 842 let count = string_length s count in
5636bb2c
C
843 loop (count::stack,space_cell) count xs
844 | Some Unparse_cocci.EndBox ->
f537ebc4 845 let count = string_length s count in
5636bb2c
C
846 (match stack with
847 [x] ->
848 (match check_for_newline count x space_cell with
849 Some count -> loop ([],None) count xs
850 | None -> loop ([],None) count xs)
851 | _ -> loop (List.tl stack,space_cell) count xs)
852 | Some (Unparse_cocci.SpaceOrNewline sp) ->
f537ebc4 853 let count = string_length s (count + 1 (*space*)) in
5636bb2c
C
854 (match stack with
855 [x] ->
856 (match check_for_newline count x space_cell with
857 Some count -> loop (stack,Some (x,sp)) count xs
858 | None -> loop (stack,Some (count,sp)) count xs)
859 | _ -> loop info count xs) in
860 a :: rest
861 | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
c491d8ee 862 | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
5636bb2c
C
863 | Fake2 :: _ | Indent_cocci2 :: _
864 | Unindent_cocci2 _::_ ->
865 failwith "unexpected fake, indent, or unindent" in
866 let redo_spaces prev = function
867 Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
868 C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
869 | t -> t::prev in
870 (match !Flag_parsing_c.spacing with
871 Flag_parsing_c.SMPL -> toks
872 | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)))
34e49164
C
873
874(* When insert some new code, because of a + in a SP, we must add this
875 * code at the right place, with the good indentation. So each time we
876 * encounter some spacing info, with some newline, we maintain the
877 * current indentation level used.
ae4735db 878 *
34e49164 879 * TODO problems: not accurate. ex: TODO
ae4735db 880 *
34e49164
C
881 * TODO: if in #define region, should add a \ \n
882 *)
ae4735db 883let new_tabbing2 space =
34e49164
C
884 (list_of_string space)
885 +> List.rev
b1b2de81 886 +> Common.take_until (fun c -> c =<= '\n')
34e49164
C
887 +> List.rev
888 +> List.map string_of_char
889 +> String.concat ""
890
ae4735db 891let new_tabbing a =
34e49164
C
892 Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
893
894
ae4735db 895let rec adjust_indentation xs =
aa721442 896
34e49164
C
897 let _current_tabbing = ref "" in
898 let tabbing_unit = ref None in
899
900 let string_of_list l = String.concat "" (List.map string_of_char l) in
901
902 (* try to pick a tabbing unit for the plus code *)
903 let adjust_tabbing_unit old_tab new_tab =
b1b2de81 904 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
34e49164
C
905 then
906 let old_tab = list_of_string old_tab in
907 let new_tab = list_of_string new_tab in
908 let rec loop = function
909 ([],new_tab) ->
910 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
911 | (_,[]) -> failwith "not possible"
912 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
913 loop (old_tab,new_tab) in
914
915 let remtab tu current_tab =
916 let current_tab = List.rev(list_of_string current_tab) in
917 let rec loop = function
918 ([],new_tab) -> string_of_list (List.rev new_tab)
0708f913 919 | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
b1b2de81 920 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
34e49164
C
921 | (_,ns) -> (* mismatch; remove what we can *)
922 string_of_list (List.rev ns) in
923 loop (tu,current_tab) in
924
925 let rec find_first_tab started = function
926 [] -> ()
b1b2de81 927 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
34e49164 928 find_first_tab true xs
485bce71 929(* patch: coccinelle *)
34e49164
C
930 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
931 when started ->
932 let s = str_of_token2 x +> new_tabbing in
933 tabbing_unit := Some (s,List.rev (list_of_string s))
934 | x::xs -> find_first_tab started xs in
935 find_first_tab false xs;
936
1eddfd50
C
937 let rec balanced ct = function
938 [] -> ct >= 0
939 | ((T2(tok,_,_)) as x)::xs ->
940 (match str_of_token2 x with
941 "(" -> balanced (ct+1) xs
942 | ")" -> balanced (ct-1) xs
943 | _ -> balanced ct xs)
944 | x::xs -> balanced ct xs in
945
ae4735db 946 let rec aux started xs =
34e49164
C
947 match xs with
948 | [] -> []
485bce71 949(* patch: coccinelle *)
34e49164 950 | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
5636bb2c 951 ((Cocci2 ("{",_,_,_,_)) as a)::xs
708f4980 952 when started && str_of_token2 x =$= ")" ->
34e49164 953 (* to be done for if, etc, but not for a function header *)
708f4980 954 x::(C2 " ")::a::(aux started xs)
1eddfd50
C
955 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs
956 when balanced 0 (fst(Common.span (function x -> not(is_newline x)) xs)) ->
ae4735db 957 let old_tabbing = !_current_tabbing in
34e49164
C
958 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
959 (* only trust the indentation after the first { *)
c3e37e97
C
960 (if started
961 then adjust_tabbing_unit old_tabbing !_current_tabbing);
34e49164
C
962 let coccis_rest = Common.span all_coccis xs in
963 (match coccis_rest with
b1b2de81 964 (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
34e49164
C
965 (* the case where cocci code has been added before a close } *)
966 x::aux started (Indent_cocci2::xs)
967 | _ -> x::aux started xs)
968 | Indent_cocci2::xs ->
969 (match !tabbing_unit with
970 None -> aux started xs
971 | Some (tu,_) ->
972 _current_tabbing := (!_current_tabbing)^tu;
5636bb2c 973 Cocci2 (tu,-1,-1,-1,None)::aux started xs)
c3e37e97 974 | Unindent_cocci2(permanent)::xs ->
34e49164
C
975 (match !tabbing_unit with
976 None -> aux started xs
977 | Some (_,tu) ->
978 _current_tabbing := remtab tu (!_current_tabbing);
979 aux started xs)
980 (* border between existing code and cocci code *)
5636bb2c 981 | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
b1b2de81 982 when str_of_token2 x =$= "{" ->
34e49164
C
983 x::aux true (y::Indent_cocci2::xs)
984 | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
b1b2de81 985 when str_of_token2 y =$= "}" ->
c3e37e97 986 x::aux started (y::Unindent_cocci2 true::xs)
34e49164 987 (* starting the body of the function *)
b1b2de81 988 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs
5636bb2c
C
989 | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
990 | ((Cocci2("\n",_,_,_,_)) as x)::Unindent_cocci2(false)::xs ->
c3e37e97 991 x::aux started xs
5636bb2c 992 | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
0708f913 993 (* dont inline in expr because of weird eval order of ocaml *)
ae4735db 994 let s = !_current_tabbing in
5636bb2c 995 x::Cocci2 (s,-1,-1,-1,None)::aux started xs
34e49164 996 | x::xs -> x::aux started xs in
5636bb2c 997 (aux false xs,!tabbing_unit)
34e49164
C
998
999
1000let rec find_paren_comma = function
1001 | [] -> ()
1002
1003 (* do nothing if was like this in original file *)
1004 | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
ae4735db 1005 ::xs when p2 =|= p1 + 1 ->
34e49164
C
1006 find_paren_comma (x2::xs)
1007
1008 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
ae4735db 1009 ::xs when p2 =|= p1 + 1 ->
34e49164
C
1010 find_paren_comma (x2::xs)
1011
1012 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
ae4735db 1013 ::xs when p2 =|= p1 + 1 ->
34e49164
C
1014 find_paren_comma (x2::xs)
1015
1016 (* otherwise yes can adjust *)
ae4735db 1017 | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
34e49164
C
1018 x2.remove <- true;
1019 find_paren_comma (x2::xs)
ae4735db 1020 | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
34e49164
C
1021 x1.remove <- true;
1022 find_paren_comma (x2::xs)
1023
ae4735db 1024 | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
34e49164
C
1025 x1.remove <- true;
1026 find_paren_comma (x2::xs)
1027
ae4735db 1028 | x::xs ->
34e49164 1029 find_paren_comma xs
34e49164 1030
ae4735db
C
1031
1032let fix_tokens toks =
34e49164
C
1033 let toks = toks +> List.map mk_token_extended in
1034
1035 let cleaner = toks +> Common.exclude (function
1036 | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
1037 | _ -> false
1038 ) in
1039 find_paren_comma cleaner;
1040
1041 let toks = rebuild_tokens_extented toks in
1042 toks +> List.map (fun x -> x.tok2)
1043
1044
1045
1046(*****************************************************************************)
1047(* Final unparsing (and debugging support) *)
1048(*****************************************************************************)
1049
1050(* for debugging *)
1051type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
1052
1053let kind_of_token2 = function
1054 | Fake2 -> KFake
1055 | Cocci2 _ -> KCocci
1056 | C2 _ -> KC
c491d8ee 1057 | Comma _ -> KC
34e49164
C
1058 | T2 (t,_,_) ->
1059 (match TH.pinfo_of_tok t with
1060 | ExpandedTok _ -> KExpanded
1061 | OriginTok _ -> KOrigin
1062 | FakeTok _ -> raise Impossible (* now a Fake2 *)
1063 | AbstractLineTok _ -> raise Impossible (* now a KC *)
1064 )
c3e37e97 1065 | Unindent_cocci2 _ | Indent_cocci2 -> raise Impossible
ae4735db 1066
34e49164
C
1067let end_mark = "!"
1068
1069let start_mark = function
1070 | KFake -> "!F!"
ae4735db 1071 | KCocci -> "!S!"
34e49164
C
1072 | KC -> "!A!"
1073 | KExpanded -> "!E!"
1074 | KOrigin -> ""
1075
1076let print_all_tokens2 pr xs =
1077 if !Flag_parsing_c.debug_unparsing
1078 then
1079 let current_kind = ref KOrigin in
ae4735db 1080 xs +> List.iter (fun t ->
34e49164 1081 let newkind = kind_of_token2 t in
b1b2de81 1082 if newkind =*= !current_kind
34e49164
C
1083 then pr (str_of_token2 t)
1084 else begin
1085 pr (end_mark);
1086 pr (start_mark newkind);
1087 pr (str_of_token2 t);
1088 current_kind := newkind
1089 end
1090 );
ae4735db 1091 else
34e49164 1092 xs +> List.iter (fun x -> pr (str_of_token2 x))
ae4735db 1093
34e49164
C
1094
1095
1096
1097(*****************************************************************************)
1098(* Entry points *)
1099(*****************************************************************************)
1100
1101(* old: PPviatok was made in the beginning to allow to pretty print a
1102 * complete C file, including a modified C file by transformation.ml,
1103 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1104 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1105 * fancy stuff when a function was not modified at all. Just need to
1106 * print the list of token as-is. But now pretty_print_c.ml handles
1107 * almost everything so maybe less useful. Maybe PPviatok allows to
ae4735db
C
1108 * optimize a little the pretty printing.
1109 *
34e49164
C
1110 * update: now have PPviastr which goes even faster than PPviatok, so
1111 * PPviatok has disappeared.
1112 *)
1113
1114type ppmethod = PPnormal | PPviastr
1115
1116
1117
1118
1119(* The pp_program function will call pretty_print_c.ml with a special
ae4735db 1120 * function to print the leaf components, the tokens. When we want to
34e49164 1121 * print a token, we need to print also maybe the space and comments that
ae4735db 1122 * were close to it in the original file (and that was omitted during the
34e49164
C
1123 * parsing phase), and honor what the cocci-info attached to the token says.
1124 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
ae4735db 1125 * print it and also print some cocci-code attached in a PLUS to it.
34e49164
C
1126 * So we will also maybe call unparse_cocci. Because the cocci-code may
1127 * contain metavariables, unparse_cocci will in fact sometimes call back
1128 * pretty_print_c (which will this time don't call back again unparse_cocci)
1129 *)
1130
ae4735db
C
1131let pp_program2 xs outfile =
1132 Common.with_open_outfile outfile (fun (pr,chan) ->
1133 let pr s =
1134 if !Flag_parsing_c.debug_unparsing
34e49164 1135 then begin pr2_no_nl s; flush stderr end
ae4735db 1136 else pr s
34e49164
C
1137 (* flush chan; *)
1138 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1139 in
ae4735db
C
1140
1141 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
34e49164
C
1142 (* here can still work on ast *)
1143 let e = remove_useless_fakeInfo_struct e in
ae4735db 1144
34e49164 1145 match ppmethod with
002099fc 1146 | PPnormal ->
34e49164 1147 (* now work on tokens *)
34e49164 1148 (* phase1: just get all the tokens, all the information *)
ae4735db 1149 assert(toks_e +> List.for_all (fun t ->
34e49164
C
1150 TH.is_origin t or TH.is_expanded t
1151 ));
1152 let toks = get_fakeInfo_and_tokens e toks_e in
1153 let toks = displace_fake_nodes toks in
1154 (* assert Origin;ExpandedTok;Faketok *)
1155 let toks = expand_mcode toks in
c491d8ee 1156
34e49164
C
1157 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1158 * and no tag information, just NOTHING. *)
fc1ad971
C
1159
1160 let toks =
1161 if !Flag.sgrep_mode2
5626f154
C
1162 then
1163 (* nothing else to do for sgrep *)
1164 drop_expanded_and_fake (drop_minus toks)
fc1ad971
C
1165 else
1166 (* phase2: can now start to filter and adjust *)
5626f154 1167 let (toks,tu) = adjust_indentation toks in
fc1ad971 1168 let toks = adjust_before_semicolon toks in(*before remove minus*)
aba5c457
C
1169 let toks = drop_space_at_endline toks in
1170 let toks = paren_to_space toks in
c491d8ee 1171 let toks = drop_end_comma toks in
fc1ad971
C
1172 let toks = remove_minus_and_between_and_expanded_and_fake toks in
1173 (* assert Origin + Cocci + C and no minus *)
1174 let toks = add_space toks in
5636bb2c 1175 let toks = add_newlines toks tu in
fc1ad971 1176 let toks = fix_tokens toks in
5626f154 1177 toks in
34e49164 1178
ae4735db 1179 (* in theory here could reparse and rework the ast! or
34e49164
C
1180 * apply some SP. Not before cos julia may have generated
1181 * not parsable file. Need do unparsing_tricks call before being
1182 * ready to reparse. *)
1183 print_all_tokens2 pr toks;
1184
1185 | PPviastr -> pr str
1186 )
1187 )
1188
ae4735db 1189let pp_program a b =
34e49164
C
1190 Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1191
485bce71 1192
ae4735db 1193let pp_program_default xs outfile =
485bce71
C
1194 let xs' = xs +> List.map (fun x -> x, PPnormal) in
1195 pp_program xs' outfile