05eda31031a56a60dadaa194774263e0e3872d89
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
1 (* Yoann Padioleau, Julia Lawall
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *
15 *
16 * Modifications by Julia Lawall for better newline handling.
17 *)
18 open Common
19
20 open Ast_c
21
22 module TH = Token_helpers
23
24
25 (* should keep comments and directives in between adjacent deleted terms,
26 but not comments and directives within deleted terms. should use the
27 labels found in the control-flow graph *)
28
29
30
31 (*****************************************************************************)
32 (* Wrappers *)
33 (*****************************************************************************)
34 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
35
36 (*****************************************************************************)
37 (* Types used during the intermediate phases of the unparsing *)
38 (*****************************************************************************)
39
40 type token1 =
41 | Fake1 of info
42 | T1 of Parser_c.token
43
44 (* The cocci_tag of the token should always be a NOTHING. The mark of
45 * the token can only be OriginTok or ExpandedTok. Why not get rid of
46 * token and get something simpler ? because we need to know if the
47 * info is a TCommentCpp or TCommentSpace, etc for some of the further
48 * analysis so easier to keep with the token.
49 *
50 * This type contains the whole information. Have all the tokens with this
51 * type.
52 *)
53 type min =
54 Min of (int list (* match numbers *) * int (* adjacency information *))
55 | Ctx
56
57 type token2 =
58 | T2 of Parser_c.token * min *
59 int option (* orig index, abstracting away comments and space *)
60 | Fake2
61 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
62 * Unparse_cocci.nlhint option
63 | C2 of string
64 | Comma of string
65 | Indent_cocci2
66 | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
67
68 (* not used yet *)
69 type token3 =
70 | T3 of Parser_c.token
71 | Cocci3 of string
72 | C3 of string
73
74
75 (* similar to the tech in parsing_hack *)
76 type token_extended = {
77 tok2 : token2;
78 str : string;
79 idx: int option; (* to know if 2 tokens were consecutive in orig file *)
80 mutable new_tokens_before : token2 list;
81 mutable remove : bool;
82 }
83
84
85 (*****************************************************************************)
86 (* Helpers *)
87 (*****************************************************************************)
88
89 let info_of_token1 t =
90 match t with
91 | Fake1 info -> info
92 | T1 tok -> TH.info_of_tok tok
93
94 let print_token1 = function
95 T1 tok -> TH.str_of_tok tok
96 | Fake1 info -> "fake"
97
98 let str_of_token2 = function
99 | T2 (t,_,_) -> TH.str_of_tok t
100 | Fake2 -> ""
101 | Cocci2 (s,_,_,_,_) -> s
102 | C2 s -> s
103 | Comma s -> s
104 | Indent_cocci2 -> ""
105 | Unindent_cocci2 _ -> ""
106
107 let print_token2 = function
108 | T2 (t,b,_) ->
109 let b_str =
110 match b with
111 Min (index,adj) ->
112 Printf.sprintf "-%d[%s]" adj
113 (String.concat " " (List.map string_of_int index))
114 | Ctx -> "" in
115 "T2:"^b_str^TH.str_of_tok t
116 | Fake2 -> "fake"
117 | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
118 | C2 s -> "C2:"^s
119 | Comma s -> "Comma:"^s
120 | Indent_cocci2 -> "Indent"
121 | Unindent_cocci2 _ -> "Unindent"
122
123 let simple_print_all_tokens1 l =
124 List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l;
125 Printf.printf "\n"
126
127 let simple_print_all_tokens2 l =
128 List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
129 Printf.printf "\n"
130
131 let str_of_token3 = function
132 | T3 t -> TH.str_of_tok t
133 | Cocci3 s | C3 s -> s
134
135
136
137 let mk_token_extended x =
138 let origidx =
139 match x with
140 | T2 (_,_, idx) -> idx
141 | _ -> None
142 in
143 { tok2 = x;
144 str = str_of_token2 x;
145 idx = origidx;
146 new_tokens_before = [];
147 remove = false;
148 }
149
150 let rebuild_tokens_extented toks_ext =
151 let _tokens = ref [] in
152 toks_ext +> List.iter (fun tok ->
153 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
154 if not tok.remove then push2 tok.tok2 _tokens;
155 );
156 let tokens = List.rev !_tokens in
157 (tokens +> List.map mk_token_extended)
158
159
160 let mcode_contain_plus = function
161 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
162 | Ast_cocci.CONTEXT _ -> true
163 (* patch: when need full coccinelle transformation *)
164 | Ast_cocci.MINUS (_,_,_,[]) -> false
165 | Ast_cocci.MINUS (_,_,_,x::xs) -> true
166 | Ast_cocci.PLUS _ -> raise Impossible
167
168 let contain_plus info =
169 let mck = Ast_c.mcode_of_info info in
170 mcode_contain_plus mck
171
172 (*****************************************************************************)
173 (* Last fix on the ast *)
174 (*****************************************************************************)
175
176 (* Because of the ugly trick to handle initialiser, I generate fake ','
177 * for the last initializer element, but if there is nothing around it,
178 * I don't want in the end to print it.
179 *)
180
181 let remove_useless_fakeInfo_struct program =
182 let bigf = { Visitor_c.default_visitor_c_s with
183 Visitor_c.kini_s = (fun (k,bigf) ini ->
184 match k ini with
185 | InitList args, ii ->
186 (match ii with
187 | [_i1;_i2] -> ini
188 | [i1;i2;iicommaopt] ->
189 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
190 && (Ast_c.is_fake iicommaopt)
191 (* sometimes the guy put a normal iicommaopt *)
192 then InitList args, [i1;i2]
193 else InitList args, [i1;i2;iicommaopt]
194 | [i1;i2;iicommaopt;end_comma_opt] ->
195 (* only in #define. end_comma_opt canot be fake *)
196 (* not sure if this will be considered ambiguous with a previous
197 case? *)
198 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
199 && (Ast_c.is_fake iicommaopt)
200 (* sometimes the guy put a normal iicommaopt *)
201 then InitList args, [i1;i2;end_comma_opt]
202 else InitList args, [i1;i2;iicommaopt;end_comma_opt]
203 | _ -> raise Impossible
204 )
205 | x -> x
206 )
207 } in
208 Visitor_c.vk_toplevel_s bigf program
209
210
211 (*****************************************************************************)
212 (* Tokens1 generation *)
213 (*****************************************************************************)
214
215 let get_fakeInfo_and_tokens celem toks =
216 let toks_in = ref toks in
217 let toks_out = ref [] in
218
219 (* todo? verify good order of position ? *)
220 let pr_elem info =
221 match Ast_c.pinfo_of_info info with
222 | FakeTok _ ->
223 Common.push2 (Fake1 info) toks_out
224 | OriginTok _ | ExpandedTok _ ->
225 (* get the associated comments/space/cppcomment tokens *)
226 let (before, x, after) =
227 !toks_in +> Common.split_when (fun tok ->
228 info =*= TH.info_of_tok tok)
229 in
230 assert(info =*= TH.info_of_tok x);
231 (*old: assert(before +> List.for_all (TH.is_comment)); *)
232 before +> List.iter (fun x ->
233 if not (TH.is_comment x)
234 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
235 (* case such as int asm d3("x"); not yet in ast *)
236 );
237 before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
238 push2 (T1 x) toks_out;
239 toks_in := after;
240 | AbstractLineTok _ ->
241 (* can be called on type info when for instance use -type_c *)
242 if !Flag_parsing_c.pretty_print_type_info
243 then Common.push2 (Fake1 info) toks_out
244 else raise Impossible (* at this stage *)
245 in
246
247 let pr_space _ = () in (* use the spacing that is there already *)
248
249 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
250
251 if not (null !toks_in)
252 then failwith "WEIRD: unparsing not finished";
253
254 List.rev !toks_out
255
256 (* Fake nodes that have BEFORE code should be moved over any subsequent
257 whitespace and newlines, but not any comments, to get as close to the affected
258 code as possible. Similarly, fake nodes that have AFTER code should be moved
259 backwards. No fake nodes should have both before and after code. *)
260
261 let displace_fake_nodes toks =
262 let is_fake = function Fake1 _ -> true | _ -> false in
263 let is_whitespace = function
264 T1(Parser_c.TCommentSpace _)
265 (* patch: cocci *)
266 | T1(Parser_c.TCommentNewline _) -> true
267 | _ -> false in
268 let rec loop toks =
269 let fake_info =
270 try Some (Common.split_when is_fake toks)
271 with Not_found -> None in
272 match fake_info with
273 Some(bef,((Fake1 info) as fake),aft) ->
274 (match !(info.cocci_tag) with
275 | Some x ->
276 (match x with
277 (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
278 (* move the fake node forwards *)
279 let (whitespace,rest) = Common.span is_whitespace aft in
280 bef @ whitespace @ fake :: (loop rest)
281 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
282 (* move the fake node backwards *)
283 let revbef = List.rev bef in
284 let (revwhitespace,revprev) = Common.span is_whitespace revbef in
285 let whitespace = List.rev revwhitespace in
286 let prev = List.rev revprev in
287 prev @ fake :: (loop (whitespace @ aft))
288 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
289 bef @ fake :: (loop aft)
290 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
291 failwith "fake node should not be before-after"
292 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
293 )
294 | None ->
295 bef @ fake :: (loop aft)
296 )
297 | None -> toks
298 | _ -> raise Impossible in
299 loop toks
300
301 (*****************************************************************************)
302 (* Tokens2 generation *)
303 (*****************************************************************************)
304
305 let comment2t2 = function
306 (Token_c.TCommentCpp
307 (* not sure iif the following list is exhaustive or complete *)
308 (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
309 (info : Token_c.info)) ->
310 C2(info.Common.str)
311 | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
312 C2("\n"^info.Common.str^"\n")
313 | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
314
315 let expand_mcode toks =
316 let toks_out = ref [] in
317
318 let index = ref 0 in
319
320 let add_elem t minus =
321 match t with
322 | Fake1 info ->
323 let str = Ast_c.str_of_info info in
324 if str =$= ""
325 then push2 (Fake2) toks_out
326 (* fx the fake "," at the end of a structure or enum.
327 no idea what other fake info there can be... *)
328 else push2 (Comma str) toks_out
329
330
331 | T1 tok ->
332 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
333 (* no tag on expandedTok ! *)
334 let modified = function
335 None -> false
336 | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
337 | _ -> true in
338 (if (TH.is_expanded tok &&
339 modified !((TH.info_of_tok tok).cocci_tag)
340 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
341 then
342 failwith
343 (Printf.sprintf
344 "expanded token %s on line %d is either modified or stored in a metavariable"
345 (TH.str_of_tok tok) (TH.line_of_tok tok)));
346
347 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
348 { i with cocci_tag = ref Ast_c.emptyAnnot; }
349 ) in
350
351 let optindex =
352 if TH.is_origin tok && not (TH.is_real_comment tok)
353 then begin
354 incr index;
355 Some !index
356 end
357 else None
358 in
359
360 push2 (T2 (tok', minus, optindex)) toks_out
361 in
362
363 let expand_info t =
364 let (mcode,env) =
365 Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
366
367 let pr_cocci s ln col rcol hint =
368 push2 (Cocci2(s,ln,col,rcol,hint)) toks_out in
369 let pr_c info =
370 (match Ast_c.pinfo_of_info info with
371 Ast_c.AbstractLineTok _ ->
372 push2 (C2 (Ast_c.str_of_info info)) toks_out
373 | Ast_c.FakeTok (s,_) ->
374 push2 (C2 s) toks_out
375 | _ ->
376 Printf.printf "line: %s\n" (Common.dump info);
377 failwith "not an abstract line");
378 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
379 List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
380
381 let pr_barrier ln col = (* marks a position, used around C code *)
382 push2 (Cocci2("",ln,col,col,None)) toks_out in
383 let pr_nobarrier ln col = () in (* not needed for linux spacing *)
384
385 let pr_cspace _ = push2 (C2 " ") toks_out in
386
387 let pr_space _ = () (* rely on add_space in cocci code *) in
388 let pr_arity _ = () (* not interested *) in
389
390 let indent _ = push2 Indent_cocci2 toks_out in
391 let unindent x = push2 (Unindent_cocci2 x) toks_out in
392
393 let args_pp =
394 (env, pr_cocci, pr_c, pr_cspace,
395 (match !Flag_parsing_c.spacing with
396 Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
397 pr_arity,
398 (match !Flag_parsing_c.spacing with
399 Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
400 indent, unindent) in
401
402 (* old: when for yacfe with partial cocci:
403 * add_elem t false;
404 *)
405
406 (* patch: when need full coccinelle transformation *)
407 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
408 match mcode with
409 | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
410 (* Why adding ? because I want to have all the information, the whole
411 * set of tokens, so I can then process and remove the
412 * is_between_two_minus for instance *)
413 add_elem t (Min (inst,adj));
414 unparser any_xxs Unparse_cocci.InPlace
415 | Ast_cocci.CONTEXT (_,any_befaft) ->
416 (match any_befaft with
417 | Ast_cocci.NOTHING ->
418 add_elem t Ctx
419 | Ast_cocci.BEFORE (xxs,_) ->
420 unparser xxs Unparse_cocci.Before;
421 add_elem t Ctx
422 | Ast_cocci.AFTER (xxs,_) ->
423 add_elem t Ctx;
424 unparser xxs Unparse_cocci.After;
425 | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
426 unparser xxs Unparse_cocci.Before;
427 add_elem t Ctx;
428 unparser yys Unparse_cocci.After;
429 )
430 | Ast_cocci.PLUS _ -> raise Impossible
431
432 in
433
434 toks +> List.iter expand_info;
435 List.rev !toks_out
436
437
438 (*****************************************************************************)
439 (* Tokens2 processing, filtering, adjusting *)
440 (*****************************************************************************)
441
442 let is_space = function
443 | T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *)
444 | _ -> false
445
446 let is_newline = function
447 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
448 | _ -> false
449
450 let is_whitespace = function
451 | (T2 (t,_b,_i)) ->
452 (match t with
453 | Parser_c.TCommentSpace _ -> true (* only whitespace *)
454 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
455 | _ -> false
456 )
457 | _ -> false
458
459 let is_minusable_comment = function
460 | (T2 (t,_b,_i)) ->
461 (match t with
462 | Parser_c.TCommentSpace _ (* only whitespace *)
463 (* patch: coccinelle *)
464 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
465 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
466 | Parser_c.TComment _
467 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
468 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
469 | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
470 -> true
471
472 | Parser_c.TCommentMisc _
473 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
474 -> false
475
476 | _ -> false
477 )
478 | _ -> false
479
480 let is_minusable_comment_nocpp = function
481 | (T2 (t,_b,_i)) ->
482 (match t with
483 | Parser_c.TCommentSpace _ (* only whitespace *)
484 (* patch: coccinelle *)
485 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
486 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
487 | Parser_c.TComment _ -> true
488 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
489 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
490 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
491 -> false
492
493 | Parser_c.TCommentMisc _
494 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
495 -> false
496
497 | _ -> false
498 )
499 | _ -> false
500
501 let all_coccis = function
502 Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ -> true
503 | _ -> false
504
505 (*previously gave up if the first character was a newline, but not clear why*)
506 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
507
508 let set_minus_comment adj = function
509 | T2 (t,Ctx,idx) ->
510 let str = TH.str_of_tok t in
511 (match t with
512 | Parser_c.TCommentSpace _
513 (* patch: coccinelle *)
514 | Parser_c.TCommentNewline _ -> ()
515
516 | Parser_c.TComment _
517 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
518 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
519 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
520 ->
521 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
522 (TH.line_of_tok t) str)
523 | _ -> raise Impossible
524 );
525 T2 (t, Min adj, idx)
526 (* patch: coccinelle *)
527 | T2 (t,Min adj,idx) as x -> x
528 | _ -> raise Impossible
529
530 let set_minus_comment_or_plus adj = function
531 Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x
532 | x -> set_minus_comment adj x
533
534 let drop_minus xs =
535 xs +> Common.exclude (function
536 | T2 (t,Min adj,_) -> true
537 | _ -> false
538 )
539
540 let remove_minus_and_between_and_expanded_and_fake xs =
541
542 (* get rid of expanded and fake tok *)
543 let xs = xs +> Common.exclude (function
544 | T2 (t,_,_) when TH.is_expanded t -> true
545 | Fake2 -> true
546 | _ -> false
547 )
548 in
549
550 let minus_or_comment = function
551 T2(_,Min adj,_) -> true
552 | x -> is_minusable_comment x in
553
554 let minus_or_comment_nocpp = function
555 T2(_,Min adj,_) -> true
556 | x -> is_minusable_comment_nocpp x in
557
558 let common_adj (index1,adj1) (index2,adj2) =
559 adj1 = adj2 (* same adjacency info *) &&
560 (* non-empty intersection of witness trees *)
561 not ((Common.inter_set index1 index2) = []) in
562
563 (* new idea: collects regions not containing non-space context code
564 if two adjacent adjacent minus tokens satisfy common_adj then delete
565 all spaces, comments etc between them
566 if two adjacent minus tokens do not satisfy common_adj only delete
567 the spaces between them if there are no comments, etc.
568 if the region contain no plus code and is both preceded and followed
569 by a newline, delete the initial newline. *)
570
571 let rec adjust_around_minus = function
572 [] -> []
573 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
574 (T2(_,Min adj1,_) as t1)::xs ->
575 let (minus_list,rest) = Common.span not_context (t1::xs) in
576 let contains_plus = List.exists is_plus minus_list in
577 let x =
578 match List.rev minus_list with
579 (T2(Parser_c.TCommentNewline c,_b,_i))::rest
580 when List.for_all minus_or_comment minus_list ->
581 set_minus_comment_or_plus adj1 x
582 | _ -> x in
583 x :: adjust_within_minus contains_plus minus_list @
584 adjust_around_minus rest
585 | (T2(_,Min adj1,_) as t1)::xs ->
586 let (minus_list,rest) = Common.span not_context (t1::xs) in
587 let contains_plus = List.exists is_plus minus_list in
588 adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
589 | x::xs ->
590 x :: adjust_around_minus xs
591 and adjust_within_minus cp (* contains plus *) = function
592 (T2(_,Min adj1,_) as t1)::xs ->
593 let not_minus = function T2(_,Min _,_) -> false | _ -> true in
594 let (not_minus_list,rest) = Common.span not_minus xs in
595 t1 ::
596 (match rest with
597 (T2(_,Min adj2,_) as t2)::xs when common_adj adj1 adj2 ->
598 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
599 @ (adjust_within_minus cp (t2::xs))
600 | (T2(_,Min adj2,_) as t2)::xs ->
601 let is_whitespace_or_plus = function
602 (T2 _) as x -> is_whitespace x
603 | _ -> true (*plus*) in
604 if List.for_all is_whitespace_or_plus not_minus_list
605 then
606 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
607 @ (adjust_within_minus cp (t2::xs))
608 else not_minus_list @ (adjust_within_minus cp (t2::xs))
609 | _ ->
610 if cp
611 then xs
612 else
613 let (spaces,rest) = Common.span is_space xs in
614 (List.map (set_minus_comment_or_plus adj1) spaces)
615 @ rest)
616 | xs -> failwith "should always start with minus"
617 and not_context = function
618 (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
619 | _ -> true
620 and is_plus = function
621 C2 _ | Comma _ | Cocci2 _ -> true
622 | _ -> false in
623
624 let xs = adjust_around_minus xs in
625
626 (* this drops blank lines after a brace introduced by removing code *)
627 let minus_or_comment_nonl = function
628 T2(_,Min adj,_) -> true
629 | T2(Parser_c.TCommentNewline _,_b,_i) -> false
630 | x -> is_minusable_comment x in
631
632 let rec adjust_after_brace = function
633 [] -> []
634 | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
635 when str_of_token2 x =$= "{" ->
636 let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
637 let is_whitespace = function
638 T2(Parser_c.TCommentSpace _,_b,_i)
639 (* patch: cocci *)
640 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
641 | _ -> false in
642 let (newlines,rest) = Common.span is_whitespace rest in
643 let (drop_newlines,last_newline) =
644 let rec loop = function
645 [] -> ([],[])
646 | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
647 (List.rev rest,[x])
648 | x::xs ->
649 let (drop_newlines,last_newline) = loop xs in
650 (drop_newlines,x::last_newline) in
651 loop (List.rev newlines) in
652 x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
653 last_newline@
654 adjust_after_brace rest
655 | x::xs -> x::adjust_after_brace xs in
656
657 let xs = adjust_after_brace xs in
658
659 (* search backwards from context } over spaces until reaching a newline.
660 then go back over all minus code until reaching some context or + code.
661 get rid of all intervening spaces, newlines, and comments
662 input is reversed *)
663 let rec adjust_before_brace = function
664 [] -> []
665 | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
666 let (outer_spaces,rest) = Common.span is_space xs in
667 x :: outer_spaces @
668 (match rest with
669 ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
670 (* the rest of this code is the same as from_newline below
671 but merging them seems to be error prone... *)
672 ((T2 (t, Min adj, idx)) as m) :: rest ->
673 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
674 h :: m ::
675 (List.map (set_minus_comment adj) spaces) @
676 (adjust_before_brace rest)
677 | _ -> adjust_before_brace rest)
678 | x::xs -> x :: (adjust_before_brace xs) in
679
680 let from_newline = function
681 ((T2 (t, Min adj, idx)) as m) :: rest ->
682 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
683 m ::
684 (List.map (set_minus_comment adj) spaces) @
685 (adjust_before_brace rest)
686 | ((T2 (t0, Ctx, idx0)) as m0) :: ((T2 (t, Min adj, idx)) as m) :: rest
687 when TH.str_of_tok t0 = "" ->
688 (* This is for the case of a #define that is completely deleted,
689 because a #define has a strange EOL token at the end.
690 We hope there i no other kind of token that is represented by
691 "", but it seems like changing the kind of token might break
692 the end of entity recognition in the C parser.
693 See parsing_hacks.ml *)
694 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
695 m0 :: m ::
696 (List.map (set_minus_comment adj) spaces) @
697 (adjust_before_brace rest)
698 | rest -> adjust_before_brace rest in
699
700 let xs = List.rev (from_newline (List.rev xs)) in
701 let xs = drop_minus xs in
702 xs
703
704 (* normally, in C code, a semicolon is not preceded by a space or newline *)
705 let adjust_before_semicolon toks =
706 let toks = List.rev toks in
707 let rec search_semic = function
708 [] -> []
709 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
710 if List.mem (str_of_token2 x) [";";")";","]
711 then x :: search_minus false xs
712 else x :: search_semic xs
713 | x::xs -> x :: search_semic xs
714 and search_minus seen_minus xs =
715 let (spaces, rest) = Common.span is_space xs in
716 (* only delete spaces if something is actually deleted *)
717 match rest with
718 ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
719 | _ -> if seen_minus then rest else xs in
720 List.rev (search_semic toks)
721
722 let is_ident_like s = s ==~ Common.regexp_alpha
723
724 let rec drop_space_at_endline = function
725 [] -> []
726 | [x] -> [x]
727 | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest ->
728 let (outer_spaces,rest) = Common.span is_space rest in
729 let minus_or_comment_or_space_nocpp = function
730 T2(_,Min adj,_) -> true
731 | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true
732 | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false
733 | x -> false in
734 let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in
735 let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
736 if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus
737 then
738 match rest with
739 ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest ->
740 (* drop trailing spaces *)
741 minus@a::(drop_space_at_endline rest)
742 | _ -> fail()
743 else fail()
744 | a :: rest -> a :: drop_space_at_endline rest
745
746 (* if a removed ( is between two tokens, then add a space *)
747 let rec paren_to_space = function
748 [] -> []
749 | [x] -> [x]
750 | [x;y] -> [x;y]
751 | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest
752 when not (is_whitespace a) && TH.str_of_tok t = "(" ->
753 a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
754 | a :: rest -> a :: (paren_to_space rest)
755
756 let rec add_space xs =
757 match xs with
758 | [] -> []
759 | [x] -> [x]
760 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
761 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
762 not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
763 (* this only works within a line. could consider whether
764 something should be done to add newlines too, rather than
765 printing them explicitly in unparse_cocci. *)
766 x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
767 | x::y::xs ->
768 let sx = str_of_token2 x in
769 let sy = str_of_token2 y in
770 if is_ident_like sx && is_ident_like sy
771 then x::C2 " "::(add_space (y::xs))
772 else x::(add_space (y::xs))
773
774 (* A fake comma is added at the end of an unordered initlist or a enum
775 decl, if the initlist or enum doesn't already end in a comma. This is only
776 needed if there is + code, ie if we see Cocci after it in the code sequence *)
777
778 let rec drop_end_comma = function
779 [] -> []
780 | [x] -> [x]
781 | ((Comma ",") as x) :: rest ->
782 let (newlines,rest2) = Common.span is_whitespace rest in
783 (match rest2 with
784 (Cocci2 _) :: _ -> x :: drop_end_comma rest
785 | _ -> drop_end_comma rest)
786 | x :: xs -> x :: drop_end_comma xs
787
788 (* The following only works for the outermost function call. Stack records
789 the column of all open parentheses. Space_cell contains the most recent
790 comma in the outermost function call. The goal is to decide whether this
791 should be followed by a space or a newline and indent. *)
792 let add_newlines toks tabbing_unit =
793 let create_indent n =
794 let (tu,tlen) =
795 match tabbing_unit with
796 Some ("\t",_) -> ("\t",8)
797 | Some ("",_) -> ("\t",8) (* not sure why... *)
798 | Some (s,_) -> (s,String.length s) (* assuming only spaces *)
799 | None -> ("\t",8) in
800 let rec loop seen =
801 if seen + tlen <= n
802 then tu ^ loop (seen + tlen)
803 else String.make (n-seen) ' ' in
804 loop 0 in
805 let check_for_newline count x = function
806 Some (start,space_cell) when count > Flag_parsing_c.max_width ->
807 space_cell := "\n"^(create_indent x);
808 Some (x + (count - start))
809 | _ -> None in
810 (* the following is for strings that may contain newline *)
811 let string_length s count =
812 let l = list_of_string s in
813 List.fold_left
814 (function count ->
815 function
816 '\t' -> count + 8
817 | '\n' -> 0
818 | c -> count + 1)
819 count l in
820 let rec loop info count = function
821 [] -> []
822 | ((T2(tok,_,_)) as a)::xs ->
823 a :: loop info (string_length (TH.str_of_tok tok) count) xs
824 | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
825 let (stack,space_cell) = info in
826 let rest =
827 match hint with
828 None -> loop info (count + (String.length s)) xs
829 | Some Unparse_cocci.StartBox ->
830 let count = count + (String.length s) in
831 loop (count::stack,space_cell) count xs
832 | Some Unparse_cocci.EndBox ->
833 let count = count + (String.length s) in
834 (match stack with
835 [x] ->
836 (match check_for_newline count x space_cell with
837 Some count -> loop ([],None) count xs
838 | None -> loop ([],None) count xs)
839 | _ -> loop (List.tl stack,space_cell) count xs)
840 | Some (Unparse_cocci.SpaceOrNewline sp) ->
841 let count = count + (String.length s) + 1 (*space*) in
842 (match stack with
843 [x] ->
844 (match check_for_newline count x space_cell with
845 Some count -> loop (stack,Some (x,sp)) count xs
846 | None -> loop (stack,Some (count,sp)) count xs)
847 | _ -> loop info count xs) in
848 a :: rest
849 | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
850 | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
851 | Fake2 :: _ | Indent_cocci2 :: _
852 | Unindent_cocci2 _::_ ->
853 failwith "unexpected fake, indent, or unindent" in
854 let redo_spaces prev = function
855 Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
856 C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
857 | t -> t::prev in
858 (match !Flag_parsing_c.spacing with
859 Flag_parsing_c.SMPL -> toks
860 | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)))
861
862 (* When insert some new code, because of a + in a SP, we must add this
863 * code at the right place, with the good indentation. So each time we
864 * encounter some spacing info, with some newline, we maintain the
865 * current indentation level used.
866 *
867 * TODO problems: not accurate. ex: TODO
868 *
869 * TODO: if in #define region, should add a \ \n
870 *)
871 let new_tabbing2 space =
872 (list_of_string space)
873 +> List.rev
874 +> Common.take_until (fun c -> c =<= '\n')
875 +> List.rev
876 +> List.map string_of_char
877 +> String.concat ""
878
879 let new_tabbing a =
880 Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
881
882
883 let rec adjust_indentation xs =
884
885 let _current_tabbing = ref "" in
886 let tabbing_unit = ref None in
887
888 let string_of_list l = String.concat "" (List.map string_of_char l) in
889
890 (* try to pick a tabbing unit for the plus code *)
891 let adjust_tabbing_unit old_tab new_tab =
892 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
893 then
894 let old_tab = list_of_string old_tab in
895 let new_tab = list_of_string new_tab in
896 let rec loop = function
897 ([],new_tab) ->
898 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
899 | (_,[]) -> failwith "not possible"
900 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
901 loop (old_tab,new_tab) in
902
903 let remtab tu current_tab =
904 let current_tab = List.rev(list_of_string current_tab) in
905 let rec loop = function
906 ([],new_tab) -> string_of_list (List.rev new_tab)
907 | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
908 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
909 | (_,ns) -> (* mismatch; remove what we can *)
910 string_of_list (List.rev ns) in
911 loop (tu,current_tab) in
912
913 let rec find_first_tab started = function
914 [] -> ()
915 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
916 find_first_tab true xs
917 (* patch: coccinelle *)
918 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
919 when started ->
920 let s = str_of_token2 x +> new_tabbing in
921 tabbing_unit := Some (s,List.rev (list_of_string s))
922 | x::xs -> find_first_tab started xs in
923 find_first_tab false xs;
924
925 let rec aux started xs =
926 match xs with
927 | [] -> []
928 (* patch: coccinelle *)
929 | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
930 ((Cocci2 ("{",_,_,_,_)) as a)::xs
931 when started && str_of_token2 x =$= ")" ->
932 (* to be done for if, etc, but not for a function header *)
933 x::(C2 " ")::a::(aux started xs)
934 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
935 let old_tabbing = !_current_tabbing in
936 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
937 (* only trust the indentation after the first { *)
938 (if started
939 then adjust_tabbing_unit old_tabbing !_current_tabbing);
940 let coccis_rest = Common.span all_coccis xs in
941 (match coccis_rest with
942 (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
943 (* the case where cocci code has been added before a close } *)
944 x::aux started (Indent_cocci2::xs)
945 | _ -> x::aux started xs)
946 | Indent_cocci2::xs ->
947 (match !tabbing_unit with
948 None -> aux started xs
949 | Some (tu,_) ->
950 _current_tabbing := (!_current_tabbing)^tu;
951 Cocci2 (tu,-1,-1,-1,None)::aux started xs)
952 | Unindent_cocci2(permanent)::xs ->
953 (match !tabbing_unit with
954 None -> aux started xs
955 | Some (_,tu) ->
956 _current_tabbing := remtab tu (!_current_tabbing);
957 aux started xs)
958 (* border between existing code and cocci code *)
959 | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
960 when str_of_token2 x =$= "{" ->
961 x::aux true (y::Indent_cocci2::xs)
962 | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
963 when str_of_token2 y =$= "}" ->
964 x::aux started (y::Unindent_cocci2 true::xs)
965 (* starting the body of the function *)
966 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs
967 | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
968 | ((Cocci2("\n",_,_,_,_)) as x)::Unindent_cocci2(false)::xs ->
969 x::aux started xs
970 | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
971 (* dont inline in expr because of weird eval order of ocaml *)
972 let s = !_current_tabbing in
973 x::Cocci2 (s,-1,-1,-1,None)::aux started xs
974 | x::xs -> x::aux started xs in
975 (aux false xs,!tabbing_unit)
976
977
978 let rec find_paren_comma = function
979 | [] -> ()
980
981 (* do nothing if was like this in original file *)
982 | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
983 ::xs when p2 =|= p1 + 1 ->
984 find_paren_comma (x2::xs)
985
986 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
987 ::xs when p2 =|= p1 + 1 ->
988 find_paren_comma (x2::xs)
989
990 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
991 ::xs when p2 =|= p1 + 1 ->
992 find_paren_comma (x2::xs)
993
994 (* otherwise yes can adjust *)
995 | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
996 x2.remove <- true;
997 find_paren_comma (x2::xs)
998 | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
999 x1.remove <- true;
1000 find_paren_comma (x2::xs)
1001
1002 | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
1003 x1.remove <- true;
1004 find_paren_comma (x2::xs)
1005
1006 | x::xs ->
1007 find_paren_comma xs
1008
1009
1010 let fix_tokens toks =
1011 let toks = toks +> List.map mk_token_extended in
1012
1013 let cleaner = toks +> Common.exclude (function
1014 | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
1015 | _ -> false
1016 ) in
1017 find_paren_comma cleaner;
1018
1019 let toks = rebuild_tokens_extented toks in
1020 toks +> List.map (fun x -> x.tok2)
1021
1022
1023
1024 (*****************************************************************************)
1025 (* Final unparsing (and debugging support) *)
1026 (*****************************************************************************)
1027
1028 (* for debugging *)
1029 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
1030
1031 let kind_of_token2 = function
1032 | Fake2 -> KFake
1033 | Cocci2 _ -> KCocci
1034 | C2 _ -> KC
1035 | Comma _ -> KC
1036 | T2 (t,_,_) ->
1037 (match TH.pinfo_of_tok t with
1038 | ExpandedTok _ -> KExpanded
1039 | OriginTok _ -> KOrigin
1040 | FakeTok _ -> raise Impossible (* now a Fake2 *)
1041 | AbstractLineTok _ -> raise Impossible (* now a KC *)
1042 )
1043 | Unindent_cocci2 _ | Indent_cocci2 -> raise Impossible
1044
1045 let end_mark = "!"
1046
1047 let start_mark = function
1048 | KFake -> "!F!"
1049 | KCocci -> "!S!"
1050 | KC -> "!A!"
1051 | KExpanded -> "!E!"
1052 | KOrigin -> ""
1053
1054 let print_all_tokens2 pr xs =
1055 if !Flag_parsing_c.debug_unparsing
1056 then
1057 let current_kind = ref KOrigin in
1058 xs +> List.iter (fun t ->
1059 let newkind = kind_of_token2 t in
1060 if newkind =*= !current_kind
1061 then pr (str_of_token2 t)
1062 else begin
1063 pr (end_mark);
1064 pr (start_mark newkind);
1065 pr (str_of_token2 t);
1066 current_kind := newkind
1067 end
1068 );
1069 else
1070 xs +> List.iter (fun x -> pr (str_of_token2 x))
1071
1072
1073
1074
1075 (*****************************************************************************)
1076 (* Entry points *)
1077 (*****************************************************************************)
1078
1079 (* old: PPviatok was made in the beginning to allow to pretty print a
1080 * complete C file, including a modified C file by transformation.ml,
1081 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1082 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1083 * fancy stuff when a function was not modified at all. Just need to
1084 * print the list of token as-is. But now pretty_print_c.ml handles
1085 * almost everything so maybe less useful. Maybe PPviatok allows to
1086 * optimize a little the pretty printing.
1087 *
1088 * update: now have PPviastr which goes even faster than PPviatok, so
1089 * PPviatok has disappeared.
1090 *)
1091
1092 type ppmethod = PPnormal | PPviastr
1093
1094
1095
1096
1097 (* The pp_program function will call pretty_print_c.ml with a special
1098 * function to print the leaf components, the tokens. When we want to
1099 * print a token, we need to print also maybe the space and comments that
1100 * were close to it in the original file (and that was omitted during the
1101 * parsing phase), and honor what the cocci-info attached to the token says.
1102 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1103 * print it and also print some cocci-code attached in a PLUS to it.
1104 * So we will also maybe call unparse_cocci. Because the cocci-code may
1105 * contain metavariables, unparse_cocci will in fact sometimes call back
1106 * pretty_print_c (which will this time don't call back again unparse_cocci)
1107 *)
1108
1109 let pp_program2 xs outfile =
1110 Common.with_open_outfile outfile (fun (pr,chan) ->
1111 let pr s =
1112 if !Flag_parsing_c.debug_unparsing
1113 then begin pr2_no_nl s; flush stderr end
1114 else pr s
1115 (* flush chan; *)
1116 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1117 in
1118
1119 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
1120 (* here can still work on ast *)
1121 let e = remove_useless_fakeInfo_struct e in
1122
1123 match ppmethod with
1124 | PPnormal ->
1125 (* now work on tokens *)
1126
1127 (* phase1: just get all the tokens, all the information *)
1128 assert(toks_e +> List.for_all (fun t ->
1129 TH.is_origin t or TH.is_expanded t
1130 ));
1131 let toks = get_fakeInfo_and_tokens e toks_e in
1132 let toks = displace_fake_nodes toks in
1133 (* assert Origin;ExpandedTok;Faketok *)
1134 let toks = expand_mcode toks in
1135
1136 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1137 * and no tag information, just NOTHING. *)
1138
1139
1140 let toks =
1141 if !Flag.sgrep_mode2
1142 then drop_minus toks (* nothing to do for sgrep *)
1143 else
1144 (* phase2: can now start to filter and adjust *)
1145 let (toks,tu) = adjust_indentation toks in
1146 let toks = adjust_before_semicolon toks in(*before remove minus*)
1147 let toks = drop_space_at_endline toks in
1148 let toks = paren_to_space toks in
1149 let toks = drop_end_comma toks in
1150 let toks = remove_minus_and_between_and_expanded_and_fake toks in
1151 (* assert Origin + Cocci + C and no minus *)
1152 let toks = add_space toks in
1153 let toks = add_newlines toks tu in
1154 let toks = fix_tokens toks in
1155 toks in
1156
1157 (* in theory here could reparse and rework the ast! or
1158 * apply some SP. Not before cos julia may have generated
1159 * not parsable file. Need do unparsing_tricks call before being
1160 * ready to reparse. *)
1161 print_all_tokens2 pr toks;
1162
1163 | PPviastr -> pr str
1164 )
1165 )
1166
1167 let pp_program a b =
1168 Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1169
1170
1171 let pp_program_default xs outfile =
1172 let xs' = xs +> List.map (fun x -> x, PPnormal) in
1173 pp_program xs' outfile