Release coccinelle-0.1.8
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
CommitLineData
0708f913
C
1(* Yoann Padioleau, Julia Lawall
2 *
3 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
34e49164
C
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
485bce71
C
13 *
14 *
15 * Modifications by Julia Lawall for better newline handling.
34e49164
C
16 *)
17open Common
18
19open Ast_c
20
21module TH = Token_helpers
22
23
b1b2de81
C
24(* should keep comments and directives in between adjacent deleted terms,
25but not comments and directives within deleted terms. should use the
26labels found in the control-flow graph *)
27
34e49164
C
28
29
708f4980
C
30(*****************************************************************************)
31(* Wrappers *)
32(*****************************************************************************)
33let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
34
34e49164
C
35(*****************************************************************************)
36(* Types used during the intermediate phases of the unparsing *)
37(*****************************************************************************)
38
39type token1 =
40 | Fake1 of info
41 | T1 of Parser_c.token
42
43(* The cocci_tag of the token should always be a NOTHING. The mark of
44 * the token can only be OriginTok or ExpandedTok. Why not get rid of
45 * token and get something simpler ? because we need to know if the
46 * info is a TCommentCpp or TCommentSpace, etc for some of the further
47 * analysis so easier to keep with the token.
48 *
49 * This type contains the whole information. Have all the tokens with this
50 * type.
51 *)
708f4980
C
52type min =
53 Min of (int list (* match numbers *) * int (* adjacency information *))
54 | Ctx
55
34e49164 56type token2 =
708f4980 57 | T2 of Parser_c.token * min *
34e49164
C
58 int option (* orig index, abstracting away comments and space *)
59 | Fake2
708f4980 60 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
34e49164
C
61 | C2 of string
62 | Indent_cocci2
63 | Unindent_cocci2
64
65(* not used yet *)
66type token3 =
67 | T3 of Parser_c.token
68 | Cocci3 of string
69 | C3 of string
70
71
72(* similar to the tech in parsing_hack *)
73type token_extended = {
74 tok2 : token2;
75 str : string;
76 idx: int option; (* to know if 2 tokens were consecutive in orig file *)
77 mutable new_tokens_before : token2 list;
78 mutable remove : bool;
79}
80
81
82(*****************************************************************************)
83(* Helpers *)
84(*****************************************************************************)
85
86let info_of_token1 t =
87 match t with
88 | Fake1 info -> info
89 | T1 tok -> TH.info_of_tok tok
90
91let str_of_token2 = function
92 | T2 (t,_,_) -> TH.str_of_tok t
93 | Fake2 -> ""
708f4980 94 | Cocci2 (s,_,_,_) -> s
34e49164
C
95 | C2 s -> s
96 | Indent_cocci2 -> ""
97 | Unindent_cocci2 -> ""
98
99let print_token2 = function
708f4980
C
100 | T2 (t,b,_) ->
101 let b_str =
102 match b with
103 Min (index,adj) ->
104 Printf.sprintf "-%d[%s]" adj
105 (String.concat " " (List.map string_of_int index))
106 | Ctx -> "" in
107 "T2:"^b_str^TH.str_of_tok t
34e49164 108 | Fake2 -> ""
708f4980 109 | Cocci2 (s,_,lc,rc) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
34e49164
C
110 | C2 s -> "C2:"^s
111 | Indent_cocci2 -> "Indent"
112 | Unindent_cocci2 -> "Unindent"
113
708f4980 114let simple_print_all_tokens2 l =
34e49164
C
115 List.iter (function x -> Printf.printf "%s " (print_token2 x)) l;
116 Printf.printf "\n"
117
118let str_of_token3 = function
119 | T3 t -> TH.str_of_tok t
120 | Cocci3 s | C3 s -> s
121
122
123
124let mk_token_extended x =
125 let origidx =
126 match x with
127 | T2 (_,_, idx) -> idx
128 | _ -> None
129 in
130 { tok2 = x;
131 str = str_of_token2 x;
132 idx = origidx;
133 new_tokens_before = [];
134 remove = false;
135 }
136
137let rebuild_tokens_extented toks_ext =
138 let _tokens = ref [] in
139 toks_ext +> List.iter (fun tok ->
140 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
141 if not tok.remove then push2 tok.tok2 _tokens;
142 );
143 let tokens = List.rev !_tokens in
144 (tokens +> List.map mk_token_extended)
145
146
147let mcode_contain_plus = function
148 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
149 | Ast_cocci.CONTEXT _ -> true
485bce71 150(* patch: when need full coccinelle transformation *)
708f4980
C
151 | Ast_cocci.MINUS (_,_,_,[]) -> false
152 | Ast_cocci.MINUS (_,_,_,x::xs) -> true
34e49164
C
153 | Ast_cocci.PLUS -> raise Impossible
154
155let contain_plus info =
156 let mck = Ast_c.mcode_of_info info in
157 mcode_contain_plus mck
158
159(*****************************************************************************)
160(* Last fix on the ast *)
161(*****************************************************************************)
162
163(* Because of the ugly trick to handle initialiser, I generate fake ','
164 * for the last initializer element, but if there is nothing around it,
165 * I don't want in the end to print it.
166 *)
167
168let remove_useless_fakeInfo_struct program =
169 let bigf = { Visitor_c.default_visitor_c_s with
170 Visitor_c.kini_s = (fun (k,bigf) ini ->
171 match k ini with
113803cf 172 | InitList args, ii ->
34e49164
C
173 (match ii with
174 | [_i1;_i2] -> ini
113803cf 175 | [i1;i2;iicommaopt] ->
34e49164
C
176 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
177 && (Ast_c.is_fake iicommaopt)
178 (* sometimes the guy put a normal iicommaopt *)
34e49164
C
179 then InitList args, [i1;i2]
180 else InitList args, [i1;i2;iicommaopt]
113803cf
C
181 | [i1;i2;iicommaopt;end_comma_opt] ->
182 (* only in #define. end_comma_opt canot be fake *)
183 (* not sure if this will be considered ambiguous with a previous
184 case? *)
185 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
186 && (Ast_c.is_fake iicommaopt)
187 (* sometimes the guy put a normal iicommaopt *)
188 then InitList args, [i1;i2;end_comma_opt]
189 else InitList args, [i1;i2;iicommaopt;end_comma_opt]
34e49164
C
190 | _ -> raise Impossible
191 )
192 | x -> x
193 )
194 } in
195 Visitor_c.vk_toplevel_s bigf program
196
197
198(*****************************************************************************)
199(* Tokens1 generation *)
200(*****************************************************************************)
201
202let get_fakeInfo_and_tokens celem toks =
203 let toks_in = ref toks in
204 let toks_out = ref [] in
205
206 (* todo? verify good order of position ? *)
207 let pr_elem info =
208 match Ast_c.pinfo_of_info info with
209 | FakeTok _ ->
210 Common.push2 (Fake1 info) toks_out
211 | OriginTok _ | ExpandedTok _ ->
212 (* get the associated comments/space/cppcomment tokens *)
213 let (before, x, after) = !toks_in +> Common.split_when (fun tok ->
214 info =*= TH.info_of_tok tok)
215 in
b1b2de81 216 assert(info =*= TH.info_of_tok x);
34e49164
C
217 (*old: assert(before +> List.for_all (TH.is_comment)); *)
218 before +> List.iter (fun x ->
219 if not (TH.is_comment x)
0708f913 220 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
34e49164
C
221 (* case such as int asm d3("x"); not yet in ast *)
222 );
223 before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
224 push2 (T1 x) toks_out;
225 toks_in := after;
226 | AbstractLineTok _ ->
227 (* can be called on type info when for instance use -type_c *)
228 if !Flag_parsing_c.pretty_print_type_info
229 then Common.push2 (Fake1 info) toks_out
230 else raise Impossible (* at this stage *)
231 in
232
233 let pr_space _ = () in (* use the spacing that is there already *)
234
235 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
236
237 if not (null !toks_in)
0708f913 238 then failwith "WEIRD: unparsing not finished";
34e49164
C
239
240 List.rev !toks_out
241
242(* Fake nodes that have BEFORE code should be moved over any subsequent
243whitespace and newlines, but not any comments, to get as close to the affected
244code as possible. Similarly, face nodes that have AFTER code should be moved
245backwards. No fake nodes should have both before and after code. *)
246
247let displace_fake_nodes toks =
248 let is_fake = function Fake1 _ -> true | _ -> false in
249 let is_whitespace = function
250 T1(Parser_c.TCommentSpace _)
485bce71 251 (* patch: cocci *)
34e49164
C
252 | T1(Parser_c.TCommentNewline _) -> true
253 | _ -> false in
254 let rec loop toks =
255 let fake_info =
256 try Some (Common.split_when is_fake toks)
257 with Not_found -> None in
258 match fake_info with
259 Some(bef,((Fake1 info) as fake),aft) ->
260 (match !(info.cocci_tag) with
708f4980
C
261 | Some x ->
262 (match x with
34e49164
C
263 (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
264 (* move the fake node forwards *)
265 let (whitespace,rest) = Common.span is_whitespace aft in
266 bef @ whitespace @ fake :: (loop rest)
267 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
268 (* move the fake node backwards *)
269 let revbef = List.rev bef in
270 let (revwhitespace,revprev) = Common.span is_whitespace revbef in
271 let whitespace = List.rev revwhitespace in
272 let prev = List.rev revprev in
273 prev @ fake :: (loop (whitespace @ aft))
274 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
275 bef @ fake :: (loop aft)
276 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
277 failwith "fake node should not be before-after"
485bce71
C
278 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
279 )
708f4980
C
280 | None ->
281 bef @ fake :: (loop aft)
282 )
34e49164
C
283 | None -> toks
284 | _ -> raise Impossible in
285 loop toks
286
287(*****************************************************************************)
288(* Tokens2 generation *)
289(*****************************************************************************)
290
b1b2de81
C
291let comment2t2 = function
292 (Token_c.TCommentCpp x,(info : Token_c.info)) ->
293 C2("\n"^info.Common.str^"\n")
294 | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
295
34e49164
C
296let expand_mcode toks =
297 let toks_out = ref [] in
298
299 let index = ref 0 in
300
301 let add_elem t minus =
302 match t with
303 | Fake1 info ->
304 let str = Ast_c.str_of_info info in
b1b2de81 305 if str =$= ""
34e49164
C
306 then push2 (Fake2) toks_out
307 (* perhaps the fake ',' *)
308 else push2 (C2 str) toks_out
309
310
311 | T1 tok ->
b1b2de81 312 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
34e49164 313 (* no tag on expandedTok ! *)
0708f913
C
314 (if (TH.is_expanded tok &&
315 !((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot)
316 then
317 failwith
318 (Printf.sprintf
319 "expanded token %s on line %d is either modified or stored in a metavariable"
320 (TH.str_of_tok tok) (TH.line_of_tok tok)));
34e49164
C
321
322 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
323 { i with cocci_tag = ref Ast_c.emptyAnnot; }
324 ) in
325
326 let optindex =
327 if TH.is_origin tok && not (TH.is_real_comment tok)
328 then begin
329 incr index;
330 Some !index
331 end
332 else None
333 in
334
335 push2 (T2 (tok', minus, optindex)) toks_out
336 in
337
338 let expand_info t =
708f4980
C
339 let (mcode,env) =
340 Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
34e49164 341
708f4980
C
342 let pr_cocci s ln col rcol =
343 push2 (Cocci2(s,ln,col,rcol)) toks_out in
34e49164 344 let pr_c info =
b1b2de81
C
345 (match Ast_c.pinfo_of_info info with
346 Ast_c.AbstractLineTok _ ->
347 push2 (C2 (Ast_c.str_of_info info)) toks_out
348 | Ast_c.FakeTok (s,_) ->
349 push2 (C2 s) toks_out
34e49164
C
350 | _ ->
351 Printf.printf "line: %s\n" (Common.dump info);
b1b2de81
C
352 failwith "not an abstract line");
353 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
354 List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
355
708f4980
C
356 let pr_barrier ln col = (* marks a position, used around C code *)
357 push2 (Cocci2("",ln,col,col)) toks_out in
358 let pr_nobarrier ln col = () in (* not needed for linux spacing *)
b1b2de81 359
708f4980 360 let pr_cspace _ = push2 (C2 " ") toks_out in
34e49164 361
708f4980
C
362 let pr_space _ = () (* rely on add_space in cocci code *) in
363 let pr_arity _ = () (* not interested *) in
34e49164
C
364
365 let indent _ = push2 Indent_cocci2 toks_out in
366 let unindent _ = push2 Unindent_cocci2 toks_out in
367
708f4980
C
368 let args_pp =
369 (env, pr_cocci, pr_c, pr_cspace,
370 (match !Flag_parsing_c.spacing with
371 Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
372 pr_arity,
373 (match !Flag_parsing_c.spacing with
374 Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
375 indent, unindent) in
34e49164 376
485bce71
C
377 (* old: when for yacfe with partial cocci:
378 * add_elem t false;
379 *)
34e49164 380
485bce71 381 (* patch: when need full coccinelle transformation *)
faf9a90c 382 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
34e49164 383 match mcode with
708f4980 384 | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
34e49164
C
385 (* Why adding ? because I want to have all the information, the whole
386 * set of tokens, so I can then process and remove the
387 * is_between_two_minus for instance *)
708f4980 388 add_elem t (Min (inst,adj));
faf9a90c 389 unparser any_xxs Unparse_cocci.InPlace
34e49164
C
390 | Ast_cocci.CONTEXT (_,any_befaft) ->
391 (match any_befaft with
392 | Ast_cocci.NOTHING ->
708f4980 393 add_elem t Ctx
34e49164 394 | Ast_cocci.BEFORE xxs ->
faf9a90c 395 unparser xxs Unparse_cocci.Before;
708f4980 396 add_elem t Ctx
34e49164 397 | Ast_cocci.AFTER xxs ->
708f4980 398 add_elem t Ctx;
faf9a90c 399 unparser xxs Unparse_cocci.After;
34e49164 400 | Ast_cocci.BEFOREAFTER (xxs, yys) ->
faf9a90c 401 unparser xxs Unparse_cocci.Before;
708f4980 402 add_elem t Ctx;
faf9a90c 403 unparser yys Unparse_cocci.After;
34e49164
C
404 )
405 | Ast_cocci.PLUS -> raise Impossible
406
407 in
408
409 toks +> List.iter expand_info;
410 List.rev !toks_out
411
412
413(*****************************************************************************)
414(* Tokens2 processing, filtering, adjusting *)
415(*****************************************************************************)
416
417let is_minusable_comment = function
b1b2de81 418 | (T2 (t,_b,_i)) ->
34e49164
C
419 (match t with
420 | Parser_c.TCommentSpace _ (* only whitespace *)
485bce71 421 (* patch: coccinelle *)
34e49164
C
422 | Parser_c.TCommentNewline _ (* newline plus whitespace *)
423 | Parser_c.TComment _
0708f913 424 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
b1b2de81
C
425 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
426 | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
34e49164
C
427 -> true
428
b1b2de81 429 | Parser_c.TCommentMisc _
0708f913 430 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
34e49164
C
431 -> false
432
433 | _ -> false
434 )
435 | _ -> false
436
437let all_coccis = function
438 Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true
439 | _ -> false
440
b1b2de81
C
441(*previously gave up if the first character was a newline, but not clear why*)
442let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
34e49164 443
708f4980
C
444let set_minus_comment adj = function
445 | T2 (t,Ctx,idx) ->
34e49164
C
446 let str = TH.str_of_tok t in
447 (match t with
448 | Parser_c.TCommentSpace _
485bce71 449(* patch: coccinelle *)
34e49164
C
450 | Parser_c.TCommentNewline _ -> ()
451
452 | Parser_c.TComment _
0708f913 453 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
b1b2de81
C
454 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
455 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
34e49164 456 ->
b1b2de81
C
457 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
458 (TH.line_of_tok t) str)
34e49164
C
459 | _ -> raise Impossible
460 );
708f4980 461 T2 (t, Min adj, idx)
485bce71 462(* patch: coccinelle *)
708f4980 463 | T2 (Parser_c.TCommentNewline _,Min adj,idx) as x -> x
34e49164
C
464 | _ -> raise Impossible
465
708f4980 466let set_minus_comment_or_plus adj = function
34e49164 467 Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x
708f4980 468 | x -> set_minus_comment adj x
34e49164
C
469
470let remove_minus_and_between_and_expanded_and_fake xs =
471
472 (* get rid of exampled and fake tok *)
473 let xs = xs +> Common.exclude (function
474 | T2 (t,_,_) when TH.is_expanded t -> true
475 | Fake2 -> true
476
477 | _ -> false
478 )
479 in
480
481 (*This drops the space before each completely minused block (no plus code).*)
113803cf 482 let minus_or_comment = function
708f4980 483 T2(_,Min adj,_) -> true
113803cf
C
484 | T2(Parser_c.TCommentNewline _,_b,_i) -> false
485 | x -> is_minusable_comment x in
486
34e49164
C
487 let rec adjust_before_minus = function
488 [] -> []
485bce71 489(* patch: coccinelle *)
708f4980
C
490 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
491 ((T2(_,Min adj,_)::_) as xs) ->
34e49164
C
492 let (between_minus,rest) = Common.span minus_or_comment xs in
493 (match rest with
708f4980 494 [] -> (set_minus_comment adj x) :: between_minus
34e49164 495 | T2(Parser_c.TCommentNewline _,_b,_i)::_ ->
708f4980 496 (set_minus_comment adj x) :: between_minus @
34e49164
C
497 (adjust_before_minus rest)
498 | _ -> x :: between_minus @ (adjust_before_minus rest))
499 | x::xs -> x::adjust_before_minus xs in
500
501 let xs = adjust_before_minus xs in
502
113803cf
C
503 (* this drops blank lines after a brace introduced by removing code *)
504 let rec adjust_after_brace = function
505 [] -> []
708f4980 506 | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
b1b2de81 507 when str_of_token2 x =$= "{" ->
113803cf
C
508 let (between_minus,rest) = Common.span minus_or_comment xs in
509 let is_whitespace = function
510 T2(Parser_c.TCommentSpace _,_b,_i)
511 (* patch: cocci *)
512 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
513 | _ -> false in
514 let (newlines,rest) = Common.span is_whitespace rest in
515 let (drop_newlines,last_newline) =
516 let rec loop = function
517 [] -> ([],[])
518 | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
519 (List.rev rest,[x])
520 | x::xs ->
521 let (drop_newlines,last_newline) = loop xs in
522 (drop_newlines,x::last_newline) in
523 loop (List.rev newlines) in
708f4980 524 x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
113803cf
C
525 last_newline@
526 adjust_after_brace rest
527 | x::xs -> x::adjust_after_brace xs in
528
529 let xs = adjust_after_brace xs in
530
34e49164
C
531 (* this deals with any stuff that is between the minused code, eg
532 spaces, comments, attributes, etc. *)
533 (* The use of is_minusable_comment_or_plus and set_minus_comment_or_plus
534 is because the + code can end up anywhere in the middle of the - code;
535 it is not necessarily to the far left *)
708f4980
C
536
537 let common_adj (index1,adj1) (index2,adj2) =
538 adj1 = adj2 (* same adjacency info *) &&
539 (* non-empty intersection of witness trees *)
540 not ((Common.inter_set index1 index2) = []) in
541
b1b2de81 542 let rec adjust_between_minus xs =
34e49164
C
543 match xs with
544 | [] -> []
708f4980 545 | ((T2 (_,Min adj1,_)) as t1)::xs ->
34e49164
C
546 let (between_comments, rest) =
547 Common.span is_minusable_comment_or_plus xs in
548 (match rest with
708f4980 549 | [] -> [t1]
34e49164 550
708f4980
C
551 | ((T2 (_,Min adj2,_)) as t2)::rest when common_adj adj1 adj2 ->
552 t1::
553 (List.map (set_minus_comment_or_plus adj1) between_comments @
554 adjust_between_minus (t2::rest))
b1b2de81 555 | x::xs ->
708f4980 556 t1::(between_comments @ adjust_between_minus (x::xs))
34e49164
C
557 )
558
559 | x::xs -> x::adjust_between_minus xs in
560
561 let xs = adjust_between_minus xs in
562
563 let xs = xs +> Common.exclude (function
708f4980 564 | T2 (t,Min adj,_) -> true
34e49164
C
565 | _ -> false
566 ) in
567 xs
568
faf9a90c
C
569(* normally, in C code, a semicolon is not preceded by a space or newline *)
570let adjust_before_semicolon toks =
571 let toks = List.rev toks in
572 let rec loop = function
573 [] -> []
708f4980 574 | ((T2(_,Ctx,_)) as x)::xs ->
faf9a90c
C
575 if List.mem (str_of_token2 x) [";";")";","]
576 then
577 let (spaces, rest) = Common.span is_minusable_comment xs in
578 (match rest with
708f4980 579 (T2(_,Min _,_))::_ | (Cocci2 _)::_ ->
faf9a90c
C
580 (* only drop spaces if something was actually changed before *)
581 x :: loop rest
582 | _ -> x :: loop xs)
583 else x :: loop xs
584 | x::xs -> x :: loop xs in
585 List.rev (loop toks)
34e49164
C
586
587let is_ident_like s = s ==~ Common.regexp_alpha
588
589let rec add_space xs =
590 match xs with
591 | [] -> []
592 | [x] -> [x]
708f4980
C
593 | (Cocci2(sx,lnx,_,rcolx) as x)::((Cocci2(sy,lny,lcoly,_)) as y)::xs
594 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
595 not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
596 (* this only works within a line. could consider whether
597 something should be done to add newlines too, rather than
598 printing them explicitly in unparse_cocci. *)
599 x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
34e49164
C
600 | x::y::xs ->
601 let sx = str_of_token2 x in
602 let sy = str_of_token2 y in
faf9a90c 603 if is_ident_like sx && is_ident_like sy
34e49164
C
604 then x::C2 " "::(add_space (y::xs))
605 else x::(add_space (y::xs))
606
607
608
609(* When insert some new code, because of a + in a SP, we must add this
610 * code at the right place, with the good indentation. So each time we
611 * encounter some spacing info, with some newline, we maintain the
612 * current indentation level used.
613 *
614 * TODO problems: not accurate. ex: TODO
615 *
616 * TODO: if in #define region, should add a \ \n
617 *)
618let new_tabbing2 space =
619 (list_of_string space)
620 +> List.rev
b1b2de81 621 +> Common.take_until (fun c -> c =<= '\n')
34e49164
C
622 +> List.rev
623 +> List.map string_of_char
624 +> String.concat ""
625
626let new_tabbing a =
627 Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
628
629
630let rec adjust_indentation xs =
631 let _current_tabbing = ref "" in
632 let tabbing_unit = ref None in
633
634 let string_of_list l = String.concat "" (List.map string_of_char l) in
635
636 (* try to pick a tabbing unit for the plus code *)
637 let adjust_tabbing_unit old_tab new_tab =
b1b2de81 638 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
34e49164
C
639 then
640 let old_tab = list_of_string old_tab in
641 let new_tab = list_of_string new_tab in
642 let rec loop = function
643 ([],new_tab) ->
644 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
645 | (_,[]) -> failwith "not possible"
646 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
647 loop (old_tab,new_tab) in
648
649 let remtab tu current_tab =
650 let current_tab = List.rev(list_of_string current_tab) in
651 let rec loop = function
652 ([],new_tab) -> string_of_list (List.rev new_tab)
0708f913 653 | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
b1b2de81 654 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
34e49164
C
655 | (_,ns) -> (* mismatch; remove what we can *)
656 string_of_list (List.rev ns) in
657 loop (tu,current_tab) in
658
659 let rec find_first_tab started = function
660 [] -> ()
b1b2de81 661 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
34e49164 662 find_first_tab true xs
485bce71 663(* patch: coccinelle *)
34e49164
C
664 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
665 when started ->
666 let s = str_of_token2 x +> new_tabbing in
667 tabbing_unit := Some (s,List.rev (list_of_string s))
668 | x::xs -> find_first_tab started xs in
669 find_first_tab false xs;
670
671 let rec aux started xs =
672 match xs with
673 | [] -> []
485bce71 674(* patch: coccinelle *)
34e49164 675 | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
708f4980
C
676 ((Cocci2 ("{",_,_,_)) as a)::xs
677 when started && str_of_token2 x =$= ")" ->
34e49164 678 (* to be done for if, etc, but not for a function header *)
708f4980 679 x::(C2 " ")::a::(aux started xs)
34e49164
C
680 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
681 let old_tabbing = !_current_tabbing in
682 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
683 (* only trust the indentation after the first { *)
684 (if started then adjust_tabbing_unit old_tabbing !_current_tabbing);
685 let coccis_rest = Common.span all_coccis xs in
686 (match coccis_rest with
b1b2de81 687 (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
34e49164
C
688 (* the case where cocci code has been added before a close } *)
689 x::aux started (Indent_cocci2::xs)
690 | _ -> x::aux started xs)
691 | Indent_cocci2::xs ->
692 (match !tabbing_unit with
693 None -> aux started xs
694 | Some (tu,_) ->
695 _current_tabbing := (!_current_tabbing)^tu;
708f4980 696 Cocci2 (tu,-1,-1,-1)::aux started xs)
34e49164
C
697 | Unindent_cocci2::xs ->
698 (match !tabbing_unit with
699 None -> aux started xs
700 | Some (_,tu) ->
701 _current_tabbing := remtab tu (!_current_tabbing);
702 aux started xs)
703 (* border between existing code and cocci code *)
708f4980 704 | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_)) as y)::xs
b1b2de81 705 when str_of_token2 x =$= "{" ->
34e49164
C
706 x::aux true (y::Indent_cocci2::xs)
707 | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
b1b2de81 708 when str_of_token2 y =$= "}" ->
34e49164
C
709 x::aux started (y::Unindent_cocci2::xs)
710 (* starting the body of the function *)
b1b2de81 711 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs
708f4980
C
712 | ((Cocci2("{",_,_,_)) as a)::xs -> a::aux true xs
713 | ((Cocci2("\n",_,_,_)) as x)::xs ->
0708f913 714 (* dont inline in expr because of weird eval order of ocaml *)
34e49164 715 let s = !_current_tabbing in
708f4980 716 x::Cocci2 (s,-1,-1,-1)::aux started xs
34e49164
C
717 | x::xs -> x::aux started xs in
718 aux false xs
719
720
721let rec find_paren_comma = function
722 | [] -> ()
723
724 (* do nothing if was like this in original file *)
725 | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
b1b2de81 726 ::xs when p2 =|= p1 + 1 ->
34e49164
C
727 find_paren_comma (x2::xs)
728
729 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
b1b2de81 730 ::xs when p2 =|= p1 + 1 ->
34e49164
C
731 find_paren_comma (x2::xs)
732
733 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
b1b2de81 734 ::xs when p2 =|= p1 + 1 ->
34e49164
C
735 find_paren_comma (x2::xs)
736
737 (* otherwise yes can adjust *)
738 | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
739 x2.remove <- true;
740 find_paren_comma (x2::xs)
741 | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
742 x1.remove <- true;
743 find_paren_comma (x2::xs)
744
745 | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
746 x1.remove <- true;
747 find_paren_comma (x2::xs)
748
749 | x::xs ->
750 find_paren_comma xs
751
752
753let fix_tokens toks =
754 let toks = toks +> List.map mk_token_extended in
755
756 let cleaner = toks +> Common.exclude (function
757 | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
758 | _ -> false
759 ) in
760 find_paren_comma cleaner;
761
762 let toks = rebuild_tokens_extented toks in
763 toks +> List.map (fun x -> x.tok2)
764
765
766
767(*****************************************************************************)
768(* Final unparsing (and debugging support) *)
769(*****************************************************************************)
770
771(* for debugging *)
772type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
773
774let kind_of_token2 = function
775 | Fake2 -> KFake
776 | Cocci2 _ -> KCocci
777 | C2 _ -> KC
778 | T2 (t,_,_) ->
779 (match TH.pinfo_of_tok t with
780 | ExpandedTok _ -> KExpanded
781 | OriginTok _ -> KOrigin
782 | FakeTok _ -> raise Impossible (* now a Fake2 *)
783 | AbstractLineTok _ -> raise Impossible (* now a KC *)
784 )
785 | Unindent_cocci2 | Indent_cocci2 -> raise Impossible
786
787let end_mark = "!"
788
789let start_mark = function
790 | KFake -> "!F!"
791 | KCocci -> "!S!"
792 | KC -> "!A!"
793 | KExpanded -> "!E!"
794 | KOrigin -> ""
795
796let print_all_tokens2 pr xs =
797 if !Flag_parsing_c.debug_unparsing
798 then
799 let current_kind = ref KOrigin in
800 xs +> List.iter (fun t ->
801 let newkind = kind_of_token2 t in
b1b2de81 802 if newkind =*= !current_kind
34e49164
C
803 then pr (str_of_token2 t)
804 else begin
805 pr (end_mark);
806 pr (start_mark newkind);
807 pr (str_of_token2 t);
808 current_kind := newkind
809 end
810 );
811 else
812 xs +> List.iter (fun x -> pr (str_of_token2 x))
813
814
815
816
817(*****************************************************************************)
818(* Entry points *)
819(*****************************************************************************)
820
821(* old: PPviatok was made in the beginning to allow to pretty print a
822 * complete C file, including a modified C file by transformation.ml,
823 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
824 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
825 * fancy stuff when a function was not modified at all. Just need to
826 * print the list of token as-is. But now pretty_print_c.ml handles
827 * almost everything so maybe less useful. Maybe PPviatok allows to
828 * optimize a little the pretty printing.
829 *
830 * update: now have PPviastr which goes even faster than PPviatok, so
831 * PPviatok has disappeared.
832 *)
833
834type ppmethod = PPnormal | PPviastr
835
836
837
838
839(* The pp_program function will call pretty_print_c.ml with a special
840 * function to print the leaf components, the tokens. When we want to
841 * print a token, we need to print also maybe the space and comments that
842 * were close to it in the original file (and that was omitted during the
843 * parsing phase), and honor what the cocci-info attached to the token says.
844 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
845 * print it and also print some cocci-code attached in a PLUS to it.
846 * So we will also maybe call unparse_cocci. Because the cocci-code may
847 * contain metavariables, unparse_cocci will in fact sometimes call back
848 * pretty_print_c (which will this time don't call back again unparse_cocci)
849 *)
850
851let pp_program2 xs outfile =
852 Common.with_open_outfile outfile (fun (pr,chan) ->
853 let pr s =
854 if !Flag_parsing_c.debug_unparsing
855 then begin pr2_no_nl s; flush stderr end
856 else pr s
857 (* flush chan; *)
858 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
859 in
860
861 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
862
863 (* here can still work on ast *)
864 let e = remove_useless_fakeInfo_struct e in
865
866 match ppmethod with
867 | PPnormal ->
868 (* now work on tokens *)
869
870 (* phase1: just get all the tokens, all the information *)
871 assert(toks_e +> List.for_all (fun t ->
872 TH.is_origin t or TH.is_expanded t
873 ));
874 let toks = get_fakeInfo_and_tokens e toks_e in
875 let toks = displace_fake_nodes toks in
876 (* assert Origin;ExpandedTok;Faketok *)
877 let toks = expand_mcode toks in
878 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
879 * and no tag information, just NOTHING. *)
880
881 (* phase2: can now start to filter and adjust *)
882 let toks = adjust_indentation toks in
883 let toks = remove_minus_and_between_and_expanded_and_fake toks in
faf9a90c 884 let toks = adjust_before_semicolon toks in
34e49164
C
885 (* assert Origin + Cocci + C and no minus *)
886 let toks = add_space toks in
887 let toks = fix_tokens toks in
888
889 (* in theory here could reparse and rework the ast! or
890 * apply some SP. Not before cos julia may have generated
891 * not parsable file. Need do unparsing_tricks call before being
892 * ready to reparse. *)
893 print_all_tokens2 pr toks;
894
895 | PPviastr -> pr str
896 )
897 )
898
899let pp_program a b =
900 Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
901
485bce71
C
902
903let pp_program_default xs outfile =
904 let xs' = xs +> List.map (fun x -> x, PPnormal) in
905 pp_program xs' outfile