Release coccinelle-0.1.9
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
CommitLineData
0708f913
C
1(* Yoann Padioleau, Julia Lawall
2 *
3 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
34e49164
C
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
485bce71
C
13 *
14 *
15 * Modifications by Julia Lawall for better newline handling.
34e49164
C
16 *)
17open Common
18
19open Ast_c
20
21module TH = Token_helpers
22
23
b1b2de81
C
24(* should keep comments and directives in between adjacent deleted terms,
25but not comments and directives within deleted terms. should use the
26labels found in the control-flow graph *)
27
34e49164
C
28
29
708f4980
C
30(*****************************************************************************)
31(* Wrappers *)
32(*****************************************************************************)
33let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
34
34e49164
C
35(*****************************************************************************)
36(* Types used during the intermediate phases of the unparsing *)
37(*****************************************************************************)
38
39type token1 =
40 | Fake1 of info
41 | T1 of Parser_c.token
42
43(* The cocci_tag of the token should always be a NOTHING. The mark of
44 * the token can only be OriginTok or ExpandedTok. Why not get rid of
45 * token and get something simpler ? because we need to know if the
46 * info is a TCommentCpp or TCommentSpace, etc for some of the further
47 * analysis so easier to keep with the token.
48 *
49 * This type contains the whole information. Have all the tokens with this
50 * type.
51 *)
708f4980
C
52type min =
53 Min of (int list (* match numbers *) * int (* adjacency information *))
54 | Ctx
55
34e49164 56type token2 =
708f4980 57 | T2 of Parser_c.token * min *
34e49164
C
58 int option (* orig index, abstracting away comments and space *)
59 | Fake2
708f4980 60 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
34e49164
C
61 | C2 of string
62 | Indent_cocci2
63 | Unindent_cocci2
64
65(* not used yet *)
66type token3 =
67 | T3 of Parser_c.token
68 | Cocci3 of string
69 | C3 of string
70
71
72(* similar to the tech in parsing_hack *)
73type token_extended = {
74 tok2 : token2;
75 str : string;
76 idx: int option; (* to know if 2 tokens were consecutive in orig file *)
77 mutable new_tokens_before : token2 list;
78 mutable remove : bool;
79}
80
81
82(*****************************************************************************)
83(* Helpers *)
84(*****************************************************************************)
85
86let info_of_token1 t =
87 match t with
88 | Fake1 info -> info
89 | T1 tok -> TH.info_of_tok tok
90
002099fc
C
91let print_token1 = function
92 T1 tok -> TH.str_of_tok tok
93 | Fake1 info -> "fake"
94
34e49164
C
95let str_of_token2 = function
96 | T2 (t,_,_) -> TH.str_of_tok t
97 | Fake2 -> ""
708f4980 98 | Cocci2 (s,_,_,_) -> s
34e49164
C
99 | C2 s -> s
100 | Indent_cocci2 -> ""
101 | Unindent_cocci2 -> ""
102
103let print_token2 = function
708f4980
C
104 | T2 (t,b,_) ->
105 let b_str =
106 match b with
107 Min (index,adj) ->
108 Printf.sprintf "-%d[%s]" adj
109 (String.concat " " (List.map string_of_int index))
110 | Ctx -> "" in
111 "T2:"^b_str^TH.str_of_tok t
34e49164 112 | Fake2 -> ""
708f4980 113 | Cocci2 (s,_,lc,rc) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
34e49164
C
114 | C2 s -> "C2:"^s
115 | Indent_cocci2 -> "Indent"
116 | Unindent_cocci2 -> "Unindent"
117
002099fc
C
118let simple_print_all_tokens1 l =
119 List.iter (function x -> Printf.printf "%s " (print_token1 x)) l;
120 Printf.printf "\n"
121
708f4980 122let simple_print_all_tokens2 l =
34e49164
C
123 List.iter (function x -> Printf.printf "%s " (print_token2 x)) l;
124 Printf.printf "\n"
125
126let str_of_token3 = function
127 | T3 t -> TH.str_of_tok t
128 | Cocci3 s | C3 s -> s
129
130
131
132let mk_token_extended x =
133 let origidx =
134 match x with
135 | T2 (_,_, idx) -> idx
136 | _ -> None
137 in
138 { tok2 = x;
139 str = str_of_token2 x;
140 idx = origidx;
141 new_tokens_before = [];
142 remove = false;
143 }
144
145let rebuild_tokens_extented toks_ext =
146 let _tokens = ref [] in
147 toks_ext +> List.iter (fun tok ->
148 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
149 if not tok.remove then push2 tok.tok2 _tokens;
150 );
151 let tokens = List.rev !_tokens in
152 (tokens +> List.map mk_token_extended)
153
154
155let mcode_contain_plus = function
156 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
157 | Ast_cocci.CONTEXT _ -> true
485bce71 158(* patch: when need full coccinelle transformation *)
708f4980
C
159 | Ast_cocci.MINUS (_,_,_,[]) -> false
160 | Ast_cocci.MINUS (_,_,_,x::xs) -> true
34e49164
C
161 | Ast_cocci.PLUS -> raise Impossible
162
163let contain_plus info =
164 let mck = Ast_c.mcode_of_info info in
165 mcode_contain_plus mck
166
167(*****************************************************************************)
168(* Last fix on the ast *)
169(*****************************************************************************)
170
171(* Because of the ugly trick to handle initialiser, I generate fake ','
172 * for the last initializer element, but if there is nothing around it,
173 * I don't want in the end to print it.
174 *)
175
176let remove_useless_fakeInfo_struct program =
177 let bigf = { Visitor_c.default_visitor_c_s with
178 Visitor_c.kini_s = (fun (k,bigf) ini ->
179 match k ini with
113803cf 180 | InitList args, ii ->
34e49164
C
181 (match ii with
182 | [_i1;_i2] -> ini
113803cf 183 | [i1;i2;iicommaopt] ->
34e49164
C
184 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
185 && (Ast_c.is_fake iicommaopt)
186 (* sometimes the guy put a normal iicommaopt *)
34e49164
C
187 then InitList args, [i1;i2]
188 else InitList args, [i1;i2;iicommaopt]
113803cf
C
189 | [i1;i2;iicommaopt;end_comma_opt] ->
190 (* only in #define. end_comma_opt canot be fake *)
191 (* not sure if this will be considered ambiguous with a previous
192 case? *)
193 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
194 && (Ast_c.is_fake iicommaopt)
195 (* sometimes the guy put a normal iicommaopt *)
196 then InitList args, [i1;i2;end_comma_opt]
197 else InitList args, [i1;i2;iicommaopt;end_comma_opt]
34e49164
C
198 | _ -> raise Impossible
199 )
200 | x -> x
201 )
202 } in
203 Visitor_c.vk_toplevel_s bigf program
204
205
206(*****************************************************************************)
207(* Tokens1 generation *)
208(*****************************************************************************)
209
210let get_fakeInfo_and_tokens celem toks =
211 let toks_in = ref toks in
212 let toks_out = ref [] in
213
214 (* todo? verify good order of position ? *)
215 let pr_elem info =
216 match Ast_c.pinfo_of_info info with
217 | FakeTok _ ->
218 Common.push2 (Fake1 info) toks_out
219 | OriginTok _ | ExpandedTok _ ->
220 (* get the associated comments/space/cppcomment tokens *)
978fd7e5
C
221 let (before, x, after) =
222 !toks_in +> Common.split_when (fun tok ->
34e49164
C
223 info =*= TH.info_of_tok tok)
224 in
b1b2de81 225 assert(info =*= TH.info_of_tok x);
34e49164
C
226 (*old: assert(before +> List.for_all (TH.is_comment)); *)
227 before +> List.iter (fun x ->
228 if not (TH.is_comment x)
0708f913 229 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
34e49164
C
230 (* case such as int asm d3("x"); not yet in ast *)
231 );
232 before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
233 push2 (T1 x) toks_out;
234 toks_in := after;
235 | AbstractLineTok _ ->
236 (* can be called on type info when for instance use -type_c *)
237 if !Flag_parsing_c.pretty_print_type_info
238 then Common.push2 (Fake1 info) toks_out
239 else raise Impossible (* at this stage *)
240 in
241
242 let pr_space _ = () in (* use the spacing that is there already *)
243
244 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
245
246 if not (null !toks_in)
0708f913 247 then failwith "WEIRD: unparsing not finished";
34e49164
C
248
249 List.rev !toks_out
250
251(* Fake nodes that have BEFORE code should be moved over any subsequent
252whitespace and newlines, but not any comments, to get as close to the affected
002099fc 253code as possible. Similarly, fake nodes that have AFTER code should be moved
34e49164
C
254backwards. No fake nodes should have both before and after code. *)
255
256let displace_fake_nodes toks =
257 let is_fake = function Fake1 _ -> true | _ -> false in
258 let is_whitespace = function
259 T1(Parser_c.TCommentSpace _)
485bce71 260 (* patch: cocci *)
34e49164
C
261 | T1(Parser_c.TCommentNewline _) -> true
262 | _ -> false in
263 let rec loop toks =
264 let fake_info =
265 try Some (Common.split_when is_fake toks)
266 with Not_found -> None in
267 match fake_info with
268 Some(bef,((Fake1 info) as fake),aft) ->
269 (match !(info.cocci_tag) with
708f4980
C
270 | Some x ->
271 (match x with
002099fc 272 (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
34e49164 273 (* move the fake node forwards *)
002099fc
C
274 let (whitespace,rest) = Common.span is_whitespace aft in
275 bef @ whitespace @ fake :: (loop rest)
276 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
34e49164 277 (* move the fake node backwards *)
002099fc
C
278 let revbef = List.rev bef in
279 let (revwhitespace,revprev) = Common.span is_whitespace revbef in
280 let whitespace = List.rev revwhitespace in
281 let prev = List.rev revprev in
282 prev @ fake :: (loop (whitespace @ aft))
283 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
284 bef @ fake :: (loop aft)
285 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
286 failwith "fake node should not be before-after"
287 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
485bce71 288 )
708f4980
C
289 | None ->
290 bef @ fake :: (loop aft)
291 )
34e49164
C
292 | None -> toks
293 | _ -> raise Impossible in
294 loop toks
295
296(*****************************************************************************)
297(* Tokens2 generation *)
298(*****************************************************************************)
299
b1b2de81
C
300let comment2t2 = function
301 (Token_c.TCommentCpp x,(info : Token_c.info)) ->
302 C2("\n"^info.Common.str^"\n")
303 | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
304
34e49164
C
305let expand_mcode toks =
306 let toks_out = ref [] in
307
308 let index = ref 0 in
309
310 let add_elem t minus =
311 match t with
312 | Fake1 info ->
313 let str = Ast_c.str_of_info info in
b1b2de81 314 if str =$= ""
34e49164
C
315 then push2 (Fake2) toks_out
316 (* perhaps the fake ',' *)
317 else push2 (C2 str) toks_out
318
319
002099fc 320 | T1 tok ->
b1b2de81 321 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
34e49164 322 (* no tag on expandedTok ! *)
0708f913
C
323 (if (TH.is_expanded tok &&
324 !((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot)
325 then
326 failwith
327 (Printf.sprintf
328 "expanded token %s on line %d is either modified or stored in a metavariable"
329 (TH.str_of_tok tok) (TH.line_of_tok tok)));
34e49164
C
330
331 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
332 { i with cocci_tag = ref Ast_c.emptyAnnot; }
333 ) in
334
335 let optindex =
336 if TH.is_origin tok && not (TH.is_real_comment tok)
337 then begin
338 incr index;
339 Some !index
340 end
341 else None
342 in
343
344 push2 (T2 (tok', minus, optindex)) toks_out
345 in
346
347 let expand_info t =
708f4980
C
348 let (mcode,env) =
349 Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
34e49164 350
708f4980
C
351 let pr_cocci s ln col rcol =
352 push2 (Cocci2(s,ln,col,rcol)) toks_out in
34e49164 353 let pr_c info =
b1b2de81
C
354 (match Ast_c.pinfo_of_info info with
355 Ast_c.AbstractLineTok _ ->
356 push2 (C2 (Ast_c.str_of_info info)) toks_out
357 | Ast_c.FakeTok (s,_) ->
358 push2 (C2 s) toks_out
34e49164
C
359 | _ ->
360 Printf.printf "line: %s\n" (Common.dump info);
b1b2de81
C
361 failwith "not an abstract line");
362 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
363 List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
364
708f4980
C
365 let pr_barrier ln col = (* marks a position, used around C code *)
366 push2 (Cocci2("",ln,col,col)) toks_out in
367 let pr_nobarrier ln col = () in (* not needed for linux spacing *)
b1b2de81 368
708f4980 369 let pr_cspace _ = push2 (C2 " ") toks_out in
34e49164 370
708f4980
C
371 let pr_space _ = () (* rely on add_space in cocci code *) in
372 let pr_arity _ = () (* not interested *) in
34e49164
C
373
374 let indent _ = push2 Indent_cocci2 toks_out in
375 let unindent _ = push2 Unindent_cocci2 toks_out in
376
708f4980
C
377 let args_pp =
378 (env, pr_cocci, pr_c, pr_cspace,
379 (match !Flag_parsing_c.spacing with
380 Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
381 pr_arity,
382 (match !Flag_parsing_c.spacing with
383 Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
384 indent, unindent) in
34e49164 385
485bce71
C
386 (* old: when for yacfe with partial cocci:
387 * add_elem t false;
388 *)
34e49164 389
485bce71 390 (* patch: when need full coccinelle transformation *)
faf9a90c 391 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
34e49164 392 match mcode with
708f4980 393 | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
34e49164
C
394 (* Why adding ? because I want to have all the information, the whole
395 * set of tokens, so I can then process and remove the
396 * is_between_two_minus for instance *)
708f4980 397 add_elem t (Min (inst,adj));
faf9a90c 398 unparser any_xxs Unparse_cocci.InPlace
34e49164
C
399 | Ast_cocci.CONTEXT (_,any_befaft) ->
400 (match any_befaft with
401 | Ast_cocci.NOTHING ->
708f4980 402 add_elem t Ctx
34e49164 403 | Ast_cocci.BEFORE xxs ->
faf9a90c 404 unparser xxs Unparse_cocci.Before;
708f4980 405 add_elem t Ctx
34e49164 406 | Ast_cocci.AFTER xxs ->
708f4980 407 add_elem t Ctx;
faf9a90c 408 unparser xxs Unparse_cocci.After;
34e49164 409 | Ast_cocci.BEFOREAFTER (xxs, yys) ->
faf9a90c 410 unparser xxs Unparse_cocci.Before;
708f4980 411 add_elem t Ctx;
faf9a90c 412 unparser yys Unparse_cocci.After;
34e49164
C
413 )
414 | Ast_cocci.PLUS -> raise Impossible
415
416 in
417
418 toks +> List.iter expand_info;
419 List.rev !toks_out
420
421
422(*****************************************************************************)
423(* Tokens2 processing, filtering, adjusting *)
424(*****************************************************************************)
425
002099fc
C
426let is_space = function
427 | (T2 (t,_b,_i)) ->
428 (match t with
429 | Parser_c.TCommentSpace _ -> true (* only whitespace *)
430 | _ -> false
431 )
432 | _ -> false
433
34e49164 434let is_minusable_comment = function
b1b2de81 435 | (T2 (t,_b,_i)) ->
34e49164
C
436 (match t with
437 | Parser_c.TCommentSpace _ (* only whitespace *)
485bce71 438 (* patch: coccinelle *)
002099fc
C
439 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
440 | Parser_c.TComment _
441 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
b1b2de81
C
442 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
443 | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
34e49164
C
444 -> true
445
b1b2de81 446 | Parser_c.TCommentMisc _
0708f913 447 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
34e49164
C
448 -> false
449
450 | _ -> false
451 )
452 | _ -> false
453
454let all_coccis = function
455 Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true
456 | _ -> false
457
b1b2de81
C
458(*previously gave up if the first character was a newline, but not clear why*)
459let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
34e49164 460
708f4980
C
461let set_minus_comment adj = function
462 | T2 (t,Ctx,idx) ->
34e49164
C
463 let str = TH.str_of_tok t in
464 (match t with
465 | Parser_c.TCommentSpace _
485bce71 466(* patch: coccinelle *)
34e49164
C
467 | Parser_c.TCommentNewline _ -> ()
468
469 | Parser_c.TComment _
0708f913 470 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
b1b2de81
C
471 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
472 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
34e49164 473 ->
b1b2de81
C
474 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
475 (TH.line_of_tok t) str)
34e49164
C
476 | _ -> raise Impossible
477 );
708f4980 478 T2 (t, Min adj, idx)
485bce71 479(* patch: coccinelle *)
708f4980 480 | T2 (Parser_c.TCommentNewline _,Min adj,idx) as x -> x
34e49164
C
481 | _ -> raise Impossible
482
708f4980 483let set_minus_comment_or_plus adj = function
34e49164 484 Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x
708f4980 485 | x -> set_minus_comment adj x
34e49164
C
486
487let remove_minus_and_between_and_expanded_and_fake xs =
488
489 (* get rid of exampled and fake tok *)
490 let xs = xs +> Common.exclude (function
491 | T2 (t,_,_) when TH.is_expanded t -> true
492 | Fake2 -> true
493
494 | _ -> false
495 )
496 in
497
498 (*This drops the space before each completely minused block (no plus code).*)
113803cf 499 let minus_or_comment = function
708f4980 500 T2(_,Min adj,_) -> true
113803cf
C
501 | T2(Parser_c.TCommentNewline _,_b,_i) -> false
502 | x -> is_minusable_comment x in
503
34e49164
C
504 let rec adjust_before_minus = function
505 [] -> []
485bce71 506(* patch: coccinelle *)
708f4980
C
507 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
508 ((T2(_,Min adj,_)::_) as xs) ->
34e49164
C
509 let (between_minus,rest) = Common.span minus_or_comment xs in
510 (match rest with
708f4980 511 [] -> (set_minus_comment adj x) :: between_minus
34e49164 512 | T2(Parser_c.TCommentNewline _,_b,_i)::_ ->
708f4980 513 (set_minus_comment adj x) :: between_minus @
34e49164
C
514 (adjust_before_minus rest)
515 | _ -> x :: between_minus @ (adjust_before_minus rest))
516 | x::xs -> x::adjust_before_minus xs in
517
518 let xs = adjust_before_minus xs in
519
113803cf
C
520 (* this drops blank lines after a brace introduced by removing code *)
521 let rec adjust_after_brace = function
522 [] -> []
708f4980 523 | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
b1b2de81 524 when str_of_token2 x =$= "{" ->
113803cf
C
525 let (between_minus,rest) = Common.span minus_or_comment xs in
526 let is_whitespace = function
527 T2(Parser_c.TCommentSpace _,_b,_i)
528 (* patch: cocci *)
529 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
530 | _ -> false in
531 let (newlines,rest) = Common.span is_whitespace rest in
532 let (drop_newlines,last_newline) =
533 let rec loop = function
534 [] -> ([],[])
535 | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
536 (List.rev rest,[x])
537 | x::xs ->
538 let (drop_newlines,last_newline) = loop xs in
539 (drop_newlines,x::last_newline) in
540 loop (List.rev newlines) in
708f4980 541 x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
113803cf
C
542 last_newline@
543 adjust_after_brace rest
544 | x::xs -> x::adjust_after_brace xs in
545
546 let xs = adjust_after_brace xs in
547
34e49164
C
548 (* this deals with any stuff that is between the minused code, eg
549 spaces, comments, attributes, etc. *)
550 (* The use of is_minusable_comment_or_plus and set_minus_comment_or_plus
551 is because the + code can end up anywhere in the middle of the - code;
552 it is not necessarily to the far left *)
708f4980
C
553
554 let common_adj (index1,adj1) (index2,adj2) =
555 adj1 = adj2 (* same adjacency info *) &&
556 (* non-empty intersection of witness trees *)
557 not ((Common.inter_set index1 index2) = []) in
558
b1b2de81 559 let rec adjust_between_minus xs =
34e49164
C
560 match xs with
561 | [] -> []
708f4980 562 | ((T2 (_,Min adj1,_)) as t1)::xs ->
34e49164
C
563 let (between_comments, rest) =
564 Common.span is_minusable_comment_or_plus xs in
565 (match rest with
002099fc
C
566 | [] ->
567 t1 :: (List.map (set_minus_comment_or_plus adj1) between_comments)
34e49164 568
708f4980
C
569 | ((T2 (_,Min adj2,_)) as t2)::rest when common_adj adj1 adj2 ->
570 t1::
571 (List.map (set_minus_comment_or_plus adj1) between_comments @
572 adjust_between_minus (t2::rest))
b1b2de81 573 | x::xs ->
708f4980 574 t1::(between_comments @ adjust_between_minus (x::xs))
34e49164
C
575 )
576
577 | x::xs -> x::adjust_between_minus xs in
578
579 let xs = adjust_between_minus xs in
580
581 let xs = xs +> Common.exclude (function
708f4980 582 | T2 (t,Min adj,_) -> true
34e49164
C
583 | _ -> false
584 ) in
585 xs
586
faf9a90c
C
587(* normally, in C code, a semicolon is not preceded by a space or newline *)
588let adjust_before_semicolon toks =
589 let toks = List.rev toks in
002099fc 590 let rec search_semic = function
faf9a90c 591 [] -> []
002099fc 592 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
faf9a90c 593 if List.mem (str_of_token2 x) [";";")";","]
002099fc
C
594 then x :: search_minus false xs
595 else x :: search_semic xs
596 | x::xs -> x :: search_semic xs
597 and search_minus seen_minus xs =
598 let (spaces, rest) = Common.span is_space xs in
599 (* only delete spaces if something is actually deleted *)
600 match rest with
601 ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
602 | _ -> if seen_minus then rest else xs in
603 List.rev (search_semic toks)
34e49164
C
604
605let is_ident_like s = s ==~ Common.regexp_alpha
606
607let rec add_space xs =
608 match xs with
609 | [] -> []
610 | [x] -> [x]
708f4980
C
611 | (Cocci2(sx,lnx,_,rcolx) as x)::((Cocci2(sy,lny,lcoly,_)) as y)::xs
612 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
613 not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
614 (* this only works within a line. could consider whether
615 something should be done to add newlines too, rather than
616 printing them explicitly in unparse_cocci. *)
617 x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
34e49164
C
618 | x::y::xs ->
619 let sx = str_of_token2 x in
620 let sy = str_of_token2 y in
faf9a90c 621 if is_ident_like sx && is_ident_like sy
34e49164
C
622 then x::C2 " "::(add_space (y::xs))
623 else x::(add_space (y::xs))
624
625
626
627(* When insert some new code, because of a + in a SP, we must add this
628 * code at the right place, with the good indentation. So each time we
629 * encounter some spacing info, with some newline, we maintain the
630 * current indentation level used.
631 *
632 * TODO problems: not accurate. ex: TODO
633 *
634 * TODO: if in #define region, should add a \ \n
635 *)
636let new_tabbing2 space =
637 (list_of_string space)
638 +> List.rev
b1b2de81 639 +> Common.take_until (fun c -> c =<= '\n')
34e49164
C
640 +> List.rev
641 +> List.map string_of_char
642 +> String.concat ""
643
644let new_tabbing a =
645 Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
646
647
648let rec adjust_indentation xs =
649 let _current_tabbing = ref "" in
650 let tabbing_unit = ref None in
651
652 let string_of_list l = String.concat "" (List.map string_of_char l) in
653
654 (* try to pick a tabbing unit for the plus code *)
655 let adjust_tabbing_unit old_tab new_tab =
b1b2de81 656 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
34e49164
C
657 then
658 let old_tab = list_of_string old_tab in
659 let new_tab = list_of_string new_tab in
660 let rec loop = function
661 ([],new_tab) ->
662 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
663 | (_,[]) -> failwith "not possible"
664 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
665 loop (old_tab,new_tab) in
666
667 let remtab tu current_tab =
668 let current_tab = List.rev(list_of_string current_tab) in
669 let rec loop = function
670 ([],new_tab) -> string_of_list (List.rev new_tab)
0708f913 671 | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
b1b2de81 672 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
34e49164
C
673 | (_,ns) -> (* mismatch; remove what we can *)
674 string_of_list (List.rev ns) in
675 loop (tu,current_tab) in
676
677 let rec find_first_tab started = function
678 [] -> ()
b1b2de81 679 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
34e49164 680 find_first_tab true xs
485bce71 681(* patch: coccinelle *)
34e49164
C
682 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
683 when started ->
684 let s = str_of_token2 x +> new_tabbing in
685 tabbing_unit := Some (s,List.rev (list_of_string s))
686 | x::xs -> find_first_tab started xs in
687 find_first_tab false xs;
688
689 let rec aux started xs =
690 match xs with
691 | [] -> []
485bce71 692(* patch: coccinelle *)
34e49164 693 | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
708f4980
C
694 ((Cocci2 ("{",_,_,_)) as a)::xs
695 when started && str_of_token2 x =$= ")" ->
34e49164 696 (* to be done for if, etc, but not for a function header *)
708f4980 697 x::(C2 " ")::a::(aux started xs)
34e49164
C
698 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
699 let old_tabbing = !_current_tabbing in
700 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
701 (* only trust the indentation after the first { *)
702 (if started then adjust_tabbing_unit old_tabbing !_current_tabbing);
703 let coccis_rest = Common.span all_coccis xs in
704 (match coccis_rest with
b1b2de81 705 (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
34e49164
C
706 (* the case where cocci code has been added before a close } *)
707 x::aux started (Indent_cocci2::xs)
708 | _ -> x::aux started xs)
709 | Indent_cocci2::xs ->
710 (match !tabbing_unit with
711 None -> aux started xs
712 | Some (tu,_) ->
713 _current_tabbing := (!_current_tabbing)^tu;
708f4980 714 Cocci2 (tu,-1,-1,-1)::aux started xs)
34e49164
C
715 | Unindent_cocci2::xs ->
716 (match !tabbing_unit with
717 None -> aux started xs
718 | Some (_,tu) ->
719 _current_tabbing := remtab tu (!_current_tabbing);
720 aux started xs)
721 (* border between existing code and cocci code *)
708f4980 722 | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_)) as y)::xs
b1b2de81 723 when str_of_token2 x =$= "{" ->
34e49164
C
724 x::aux true (y::Indent_cocci2::xs)
725 | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
b1b2de81 726 when str_of_token2 y =$= "}" ->
34e49164
C
727 x::aux started (y::Unindent_cocci2::xs)
728 (* starting the body of the function *)
b1b2de81 729 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs
708f4980
C
730 | ((Cocci2("{",_,_,_)) as a)::xs -> a::aux true xs
731 | ((Cocci2("\n",_,_,_)) as x)::xs ->
0708f913 732 (* dont inline in expr because of weird eval order of ocaml *)
34e49164 733 let s = !_current_tabbing in
708f4980 734 x::Cocci2 (s,-1,-1,-1)::aux started xs
34e49164
C
735 | x::xs -> x::aux started xs in
736 aux false xs
737
738
739let rec find_paren_comma = function
740 | [] -> ()
741
742 (* do nothing if was like this in original file *)
743 | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
b1b2de81 744 ::xs when p2 =|= p1 + 1 ->
34e49164
C
745 find_paren_comma (x2::xs)
746
747 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
b1b2de81 748 ::xs when p2 =|= p1 + 1 ->
34e49164
C
749 find_paren_comma (x2::xs)
750
751 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
b1b2de81 752 ::xs when p2 =|= p1 + 1 ->
34e49164
C
753 find_paren_comma (x2::xs)
754
755 (* otherwise yes can adjust *)
756 | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
757 x2.remove <- true;
758 find_paren_comma (x2::xs)
759 | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
760 x1.remove <- true;
761 find_paren_comma (x2::xs)
762
763 | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
764 x1.remove <- true;
765 find_paren_comma (x2::xs)
766
767 | x::xs ->
768 find_paren_comma xs
769
770
771let fix_tokens toks =
772 let toks = toks +> List.map mk_token_extended in
773
774 let cleaner = toks +> Common.exclude (function
775 | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
776 | _ -> false
777 ) in
778 find_paren_comma cleaner;
779
780 let toks = rebuild_tokens_extented toks in
781 toks +> List.map (fun x -> x.tok2)
782
783
784
785(*****************************************************************************)
786(* Final unparsing (and debugging support) *)
787(*****************************************************************************)
788
789(* for debugging *)
790type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
791
792let kind_of_token2 = function
793 | Fake2 -> KFake
794 | Cocci2 _ -> KCocci
795 | C2 _ -> KC
796 | T2 (t,_,_) ->
797 (match TH.pinfo_of_tok t with
798 | ExpandedTok _ -> KExpanded
799 | OriginTok _ -> KOrigin
800 | FakeTok _ -> raise Impossible (* now a Fake2 *)
801 | AbstractLineTok _ -> raise Impossible (* now a KC *)
802 )
803 | Unindent_cocci2 | Indent_cocci2 -> raise Impossible
804
805let end_mark = "!"
806
807let start_mark = function
808 | KFake -> "!F!"
809 | KCocci -> "!S!"
810 | KC -> "!A!"
811 | KExpanded -> "!E!"
812 | KOrigin -> ""
813
814let print_all_tokens2 pr xs =
815 if !Flag_parsing_c.debug_unparsing
816 then
817 let current_kind = ref KOrigin in
818 xs +> List.iter (fun t ->
819 let newkind = kind_of_token2 t in
b1b2de81 820 if newkind =*= !current_kind
34e49164
C
821 then pr (str_of_token2 t)
822 else begin
823 pr (end_mark);
824 pr (start_mark newkind);
825 pr (str_of_token2 t);
826 current_kind := newkind
827 end
828 );
829 else
830 xs +> List.iter (fun x -> pr (str_of_token2 x))
831
832
833
834
835(*****************************************************************************)
836(* Entry points *)
837(*****************************************************************************)
838
839(* old: PPviatok was made in the beginning to allow to pretty print a
840 * complete C file, including a modified C file by transformation.ml,
841 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
842 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
843 * fancy stuff when a function was not modified at all. Just need to
844 * print the list of token as-is. But now pretty_print_c.ml handles
845 * almost everything so maybe less useful. Maybe PPviatok allows to
846 * optimize a little the pretty printing.
847 *
848 * update: now have PPviastr which goes even faster than PPviatok, so
849 * PPviatok has disappeared.
850 *)
851
852type ppmethod = PPnormal | PPviastr
853
854
855
856
857(* The pp_program function will call pretty_print_c.ml with a special
858 * function to print the leaf components, the tokens. When we want to
859 * print a token, we need to print also maybe the space and comments that
860 * were close to it in the original file (and that was omitted during the
861 * parsing phase), and honor what the cocci-info attached to the token says.
862 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
863 * print it and also print some cocci-code attached in a PLUS to it.
864 * So we will also maybe call unparse_cocci. Because the cocci-code may
865 * contain metavariables, unparse_cocci will in fact sometimes call back
866 * pretty_print_c (which will this time don't call back again unparse_cocci)
867 *)
868
869let pp_program2 xs outfile =
870 Common.with_open_outfile outfile (fun (pr,chan) ->
871 let pr s =
872 if !Flag_parsing_c.debug_unparsing
873 then begin pr2_no_nl s; flush stderr end
874 else pr s
875 (* flush chan; *)
876 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
877 in
878
879 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
34e49164
C
880 (* here can still work on ast *)
881 let e = remove_useless_fakeInfo_struct e in
882
883 match ppmethod with
002099fc 884 | PPnormal ->
34e49164
C
885 (* now work on tokens *)
886
887 (* phase1: just get all the tokens, all the information *)
888 assert(toks_e +> List.for_all (fun t ->
889 TH.is_origin t or TH.is_expanded t
890 ));
891 let toks = get_fakeInfo_and_tokens e toks_e in
892 let toks = displace_fake_nodes toks in
893 (* assert Origin;ExpandedTok;Faketok *)
894 let toks = expand_mcode toks in
895 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
896 * and no tag information, just NOTHING. *)
897
898 (* phase2: can now start to filter and adjust *)
899 let toks = adjust_indentation toks in
002099fc 900 let toks = adjust_before_semicolon toks in (* before remove minus *)
34e49164
C
901 let toks = remove_minus_and_between_and_expanded_and_fake toks in
902 (* assert Origin + Cocci + C and no minus *)
903 let toks = add_space toks in
904 let toks = fix_tokens toks in
905
906 (* in theory here could reparse and rework the ast! or
907 * apply some SP. Not before cos julia may have generated
908 * not parsable file. Need do unparsing_tricks call before being
909 * ready to reparse. *)
910 print_all_tokens2 pr toks;
911
912 | PPviastr -> pr str
913 )
914 )
915
916let pp_program a b =
917 Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
918
485bce71
C
919
920let pp_program_default xs outfile =
921 let xs' = xs +> List.map (fun x -> x, PPnormal) in
922 pp_program xs' outfile