Release coccinelle-0.2.4rc5
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
1 (* Yoann Padioleau, Julia Lawall
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *
15 *
16 * Modifications by Julia Lawall for better newline handling.
17 *)
18 open Common
19
20 open Ast_c
21
22 module TH = Token_helpers
23
24
25 (* should keep comments and directives in between adjacent deleted terms,
26 but not comments and directives within deleted terms. should use the
27 labels found in the control-flow graph *)
28
29
30
31 (*****************************************************************************)
32 (* Wrappers *)
33 (*****************************************************************************)
34 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
35
36 (*****************************************************************************)
37 (* Types used during the intermediate phases of the unparsing *)
38 (*****************************************************************************)
39
40 type token1 =
41 | Fake1 of info
42 | T1 of Parser_c.token
43
44 (* The cocci_tag of the token should always be a NOTHING. The mark of
45 * the token can only be OriginTok or ExpandedTok. Why not get rid of
46 * token and get something simpler ? because we need to know if the
47 * info is a TCommentCpp or TCommentSpace, etc for some of the further
48 * analysis so easier to keep with the token.
49 *
50 * This type contains the whole information. Have all the tokens with this
51 * type.
52 *)
53 type min =
54 Min of (int list (* match numbers from witness trees *) *
55 int (* adjacency information *))
56 | Ctx
57
58 type token2 =
59 | T2 of Parser_c.token * min *
60 int option (* orig index, abstracting away comments and space *)
61 | Fake2
62 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
63 * Unparse_cocci.nlhint option
64 | C2 of string
65 | Comma of string
66 | Indent_cocci2
67 | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
68
69 (* not used yet *)
70 type token3 =
71 | T3 of Parser_c.token
72 | Cocci3 of string
73 | C3 of string
74
75
76 (* similar to the tech in parsing_hack *)
77 type token_extended = {
78 tok2 : token2;
79 str : string;
80 idx: int option; (* to know if 2 tokens were consecutive in orig file *)
81 mutable new_tokens_before : token2 list;
82 mutable remove : bool;
83 }
84
85
86 (*****************************************************************************)
87 (* Helpers *)
88 (*****************************************************************************)
89
90 let info_of_token1 t =
91 match t with
92 | Fake1 info -> info
93 | T1 tok -> TH.info_of_tok tok
94
95 let print_token1 = function
96 T1 tok -> TH.str_of_tok tok
97 | Fake1 info -> "fake"
98
99 let str_of_token2 = function
100 | T2 (t,_,_) -> TH.str_of_tok t
101 | Fake2 -> ""
102 | Cocci2 (s,_,_,_,_) -> s
103 | C2 s -> s
104 | Comma s -> s
105 | Indent_cocci2 -> ""
106 | Unindent_cocci2 _ -> ""
107
108 let print_token2 = function
109 | T2 (t,b,_) ->
110 let b_str =
111 match b with
112 Min (index,adj) ->
113 Printf.sprintf "-%d[%s]" adj
114 (String.concat " " (List.map string_of_int index))
115 | Ctx -> "" in
116 "T2:"^b_str^TH.str_of_tok t
117 | Fake2 -> "fake"
118 | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
119 | C2 s -> "C2:"^s
120 | Comma s -> "Comma:"^s
121 | Indent_cocci2 -> "Indent"
122 | Unindent_cocci2 _ -> "Unindent"
123
124 let simple_print_all_tokens1 l =
125 List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l;
126 Printf.printf "\n"
127
128 let simple_print_all_tokens2 l =
129 List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
130 Printf.printf "\n"
131
132 let str_of_token3 = function
133 | T3 t -> TH.str_of_tok t
134 | Cocci3 s | C3 s -> s
135
136
137
138 let mk_token_extended x =
139 let origidx =
140 match x with
141 | T2 (_,_, idx) -> idx
142 | _ -> None
143 in
144 { tok2 = x;
145 str = str_of_token2 x;
146 idx = origidx;
147 new_tokens_before = [];
148 remove = false;
149 }
150
151 let rebuild_tokens_extented toks_ext =
152 let _tokens = ref [] in
153 toks_ext +> List.iter (fun tok ->
154 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
155 if not tok.remove then push2 tok.tok2 _tokens;
156 );
157 let tokens = List.rev !_tokens in
158 (tokens +> List.map mk_token_extended)
159
160
161 let mcode_contain_plus = function
162 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
163 | Ast_cocci.CONTEXT _ -> true
164 (* patch: when need full coccinelle transformation *)
165 | Ast_cocci.MINUS (_,_,_,[]) -> false
166 | Ast_cocci.MINUS (_,_,_,x::xs) -> true
167 | Ast_cocci.PLUS _ -> raise Impossible
168
169 let contain_plus info =
170 let mck = Ast_c.mcode_of_info info in
171 mcode_contain_plus mck
172
173 (*****************************************************************************)
174 (* Last fix on the ast *)
175 (*****************************************************************************)
176
177 (* Because of the ugly trick to handle initialiser, I generate fake ','
178 * for the last initializer element, but if there is nothing around it,
179 * I don't want in the end to print it.
180 *)
181
182 let remove_useless_fakeInfo_struct program =
183 let bigf = { Visitor_c.default_visitor_c_s with
184 Visitor_c.kini_s = (fun (k,bigf) ini ->
185 match k ini with
186 | InitList args, ii ->
187 (match ii with
188 | [_i1;_i2] -> ini
189 | [i1;i2;iicommaopt] ->
190 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
191 && (Ast_c.is_fake iicommaopt)
192 (* sometimes the guy put a normal iicommaopt *)
193 then InitList args, [i1;i2]
194 else InitList args, [i1;i2;iicommaopt]
195 | [i1;i2;iicommaopt;end_comma_opt] ->
196 (* only in #define. end_comma_opt canot be fake *)
197 (* not sure if this will be considered ambiguous with a previous
198 case? *)
199 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
200 && (Ast_c.is_fake iicommaopt)
201 (* sometimes the guy put a normal iicommaopt *)
202 then InitList args, [i1;i2;end_comma_opt]
203 else InitList args, [i1;i2;iicommaopt;end_comma_opt]
204 | _ -> raise Impossible
205 )
206 | x -> x
207 )
208 } in
209 Visitor_c.vk_toplevel_s bigf program
210
211
212 (*****************************************************************************)
213 (* Tokens1 generation *)
214 (*****************************************************************************)
215
216 let get_fakeInfo_and_tokens celem toks =
217 let toks_in = ref toks in
218 let toks_out = ref [] in
219
220 (* todo? verify good order of position ? *)
221 let pr_elem info =
222 match Ast_c.pinfo_of_info info with
223 | FakeTok _ ->
224 Common.push2 (Fake1 info) toks_out
225 | OriginTok _ | ExpandedTok _ ->
226 (* get the associated comments/space/cppcomment tokens *)
227 let (before, x, after) =
228 !toks_in +> Common.split_when (fun tok ->
229 info =*= TH.info_of_tok tok)
230 in
231 assert(info =*= TH.info_of_tok x);
232 (*old: assert(before +> List.for_all (TH.is_comment)); *)
233 before +> List.iter (fun x ->
234 if not (TH.is_comment x)
235 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
236 (* case such as int asm d3("x"); not yet in ast *)
237 );
238 before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
239 push2 (T1 x) toks_out;
240 toks_in := after;
241 | AbstractLineTok _ ->
242 (* can be called on type info when for instance use -type_c *)
243 if !Flag_parsing_c.pretty_print_type_info
244 then Common.push2 (Fake1 info) toks_out
245 else raise Impossible (* at this stage *)
246 in
247
248 let pr_space _ = () in (* use the spacing that is there already *)
249
250 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
251
252 if not (null !toks_in)
253 then failwith "WEIRD: unparsing not finished";
254
255 List.rev !toks_out
256
257 (* Fake nodes that have BEFORE code should be moved over any subsequent
258 whitespace and newlines, but not any comments, to get as close to the affected
259 code as possible. Similarly, fake nodes that have AFTER code should be moved
260 backwards. No fake nodes should have both before and after code. *)
261
262 let displace_fake_nodes toks =
263 let is_fake = function Fake1 _ -> true | _ -> false in
264 let is_whitespace = function
265 T1(Parser_c.TCommentSpace _)
266 (* patch: cocci *)
267 | T1(Parser_c.TCommentNewline _) -> true
268 | _ -> false in
269 let rec loop toks =
270 let fake_info =
271 try Some (Common.split_when is_fake toks)
272 with Not_found -> None in
273 match fake_info with
274 Some(bef,((Fake1 info) as fake),aft) ->
275 (match !(info.cocci_tag) with
276 | Some x ->
277 (match x with
278 (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
279 (* move the fake node forwards *)
280 let (whitespace,rest) = Common.span is_whitespace aft in
281 bef @ whitespace @ fake :: (loop rest)
282 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
283 (* move the fake node backwards *)
284 let revbef = List.rev bef in
285 let (revwhitespace,revprev) = Common.span is_whitespace revbef in
286 let whitespace = List.rev revwhitespace in
287 let prev = List.rev revprev in
288 prev @ fake :: (loop (whitespace @ aft))
289 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
290 bef @ fake :: (loop aft)
291 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
292 failwith "fake node should not be before-after"
293 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
294 )
295 | None ->
296 bef @ fake :: (loop aft)
297 )
298 | None -> toks
299 | _ -> raise Impossible in
300 loop toks
301
302 (*****************************************************************************)
303 (* Tokens2 generation *)
304 (*****************************************************************************)
305
306 let comment2t2 = function
307 (Token_c.TCommentCpp
308 (* not sure iif the following list is exhaustive or complete *)
309 (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
310 (info : Token_c.info)) ->
311 C2(info.Common.str)
312 | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
313 C2("\n"^info.Common.str^"\n")
314 | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
315
316 let expand_mcode toks =
317 let toks_out = ref [] in
318
319 let index = ref 0 in
320
321 let add_elem t minus =
322 match t with
323 | Fake1 info ->
324 let str = Ast_c.str_of_info info in
325 if str =$= ""
326 then push2 (Fake2) toks_out
327 (* fx the fake "," at the end of a structure or enum.
328 no idea what other fake info there can be... *)
329 else push2 (Comma str) toks_out
330
331
332 | T1 tok ->
333 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
334 (* no tag on expandedTok ! *)
335 let modified = function
336 None -> false
337 | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
338 | _ -> true in
339 (if (TH.is_expanded tok &&
340 modified !((TH.info_of_tok tok).cocci_tag)
341 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
342 then
343 failwith
344 (Printf.sprintf
345 "expanded token %s on line %d is either modified or stored in a metavariable"
346 (TH.str_of_tok tok) (TH.line_of_tok tok)));
347
348 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
349 { i with cocci_tag = ref Ast_c.emptyAnnot; }
350 ) in
351
352 let optindex =
353 if TH.is_origin tok && not (TH.is_real_comment tok)
354 then begin
355 incr index;
356 Some !index
357 end
358 else None
359 in
360
361 push2 (T2 (tok', minus, optindex)) toks_out
362 in
363
364 let expand_info t =
365 let (mcode,env) =
366 Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
367
368 let pr_cocci s ln col rcol hint =
369 push2 (Cocci2(s,ln,col,rcol,hint)) toks_out in
370 let pr_c info =
371 (match Ast_c.pinfo_of_info info with
372 Ast_c.AbstractLineTok _ ->
373 push2 (C2 (Ast_c.str_of_info info)) toks_out
374 | Ast_c.FakeTok (s,_) ->
375 push2 (C2 s) toks_out
376 | _ ->
377 Printf.printf "line: %s\n" (Common.dump info);
378 failwith "not an abstract line");
379 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
380 List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
381
382 let pr_barrier ln col = (* marks a position, used around C code *)
383 push2 (Cocci2("",ln,col,col,None)) toks_out in
384 let pr_nobarrier ln col = () in (* not needed for linux spacing *)
385
386 let pr_cspace _ = push2 (C2 " ") toks_out in
387
388 let pr_space _ = () (* rely on add_space in cocci code *) in
389 let pr_arity _ = () (* not interested *) in
390
391 let indent _ = push2 Indent_cocci2 toks_out in
392 let unindent x = push2 (Unindent_cocci2 x) toks_out in
393
394 let args_pp =
395 (env, pr_cocci, pr_c, pr_cspace,
396 (match !Flag_parsing_c.spacing with
397 Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
398 pr_arity,
399 (match !Flag_parsing_c.spacing with
400 Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
401 indent, unindent) in
402
403 (* old: when for yacfe with partial cocci:
404 * add_elem t false;
405 *)
406
407 (* patch: when need full coccinelle transformation *)
408 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
409 match mcode with
410 | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
411 (* Why adding ? because I want to have all the information, the whole
412 * set of tokens, so I can then process and remove the
413 * is_between_two_minus for instance *)
414 add_elem t (Min (inst,adj));
415 unparser any_xxs Unparse_cocci.InPlace
416 | Ast_cocci.CONTEXT (_,any_befaft) ->
417 (match any_befaft with
418 | Ast_cocci.NOTHING ->
419 add_elem t Ctx
420 | Ast_cocci.BEFORE (xxs,_) ->
421 unparser xxs Unparse_cocci.Before;
422 add_elem t Ctx
423 | Ast_cocci.AFTER (xxs,_) ->
424 add_elem t Ctx;
425 unparser xxs Unparse_cocci.After;
426 | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
427 unparser xxs Unparse_cocci.Before;
428 add_elem t Ctx;
429 unparser yys Unparse_cocci.After;
430 )
431 | Ast_cocci.PLUS _ -> raise Impossible
432
433 in
434
435 toks +> List.iter expand_info;
436 List.rev !toks_out
437
438
439 (*****************************************************************************)
440 (* Tokens2 processing, filtering, adjusting *)
441 (*****************************************************************************)
442
443 let is_space = function
444 | T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *)
445 | _ -> false
446
447 let is_newline = function
448 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
449 | _ -> false
450
451 let is_whitespace = function
452 | (T2 (t,_b,_i)) ->
453 (match t with
454 | Parser_c.TCommentSpace _ -> true (* only whitespace *)
455 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
456 | _ -> false
457 )
458 | _ -> false
459
460 let is_minusable_comment = function
461 | (T2 (t,_b,_i)) ->
462 (match t with
463 | Parser_c.TCommentSpace _ (* only whitespace *)
464 (* patch: coccinelle *)
465 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
466 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
467 | Parser_c.TComment _
468 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
469 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
470 | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
471 -> true
472
473 | Parser_c.TCommentMisc _
474 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
475 -> false
476
477 | _ -> false
478 )
479 | _ -> false
480
481 let is_minusable_comment_nocpp = function
482 | (T2 (t,_b,_i)) ->
483 (match t with
484 | Parser_c.TCommentSpace _ (* only whitespace *)
485 (* patch: coccinelle *)
486 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
487 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
488 | Parser_c.TComment _ -> true
489 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
490 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
491 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
492 -> false
493
494 | Parser_c.TCommentMisc _
495 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
496 -> false
497
498 | _ -> false
499 )
500 | _ -> false
501
502 let all_coccis = function
503 Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ -> true
504 | _ -> false
505
506 (*previously gave up if the first character was a newline, but not clear why*)
507 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
508
509 let set_minus_comment adj = function
510 | T2 (t,Ctx,idx) ->
511 let str = TH.str_of_tok t in
512 (match t with
513 | Parser_c.TCommentSpace _
514 (* patch: coccinelle *)
515 | Parser_c.TCommentNewline _ -> ()
516
517 | Parser_c.TComment _
518 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
519 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
520 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
521 ->
522 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
523 (TH.line_of_tok t) str)
524 | _ -> raise Impossible
525 );
526 T2 (t, Min adj, idx)
527 (* patch: coccinelle *)
528 | T2 (t,Min adj,idx) as x -> x
529 | _ -> raise Impossible
530
531 let set_minus_comment_or_plus adj = function
532 Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x
533 | x -> set_minus_comment adj x
534
535 let drop_minus xs =
536 xs +> Common.exclude (function
537 | T2 (t,Min adj,_) -> true
538 | _ -> false
539 )
540
541 let drop_expanded_and_fake xs =
542 xs +> Common.exclude (function
543 | T2 (t,_,_) when TH.is_expanded t -> true
544 | Fake2 -> true
545 | _ -> false
546 )
547
548 let remove_minus_and_between_and_expanded_and_fake xs =
549
550 (* get rid of expanded and fake tok *)
551 let xs = drop_expanded_and_fake xs in
552
553 let minus_or_comment = function
554 T2(_,Min adj,_) -> true
555 | x -> is_minusable_comment x in
556
557 let minus_or_comment_nocpp = function
558 T2(_,Min adj,_) -> true
559 | x -> is_minusable_comment_nocpp x in
560
561 let common_adj (index1,adj1) (index2,adj2) =
562 adj1 = adj2 (* same adjacency info *) &&
563 (* non-empty intersection of witness trees *)
564 not ((Common.inter_set index1 index2) = []) in
565
566 (* new idea: collects regions not containing non-space context code
567 if two adjacent adjacent minus tokens satisfy common_adj then delete
568 all spaces, comments etc between them
569 if two adjacent minus tokens do not satisfy common_adj only delete
570 the spaces between them if there are no comments, etc.
571 if the region contain no plus code and is both preceded and followed
572 by a newline, delete the initial newline. *)
573
574 let rec adjust_around_minus = function
575 [] -> []
576 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
577 (T2(_,Min adj1,_) as t1)::xs ->
578 let (minus_list,rest) = Common.span not_context (t1::xs) in
579 let contains_plus = List.exists is_plus minus_list in
580 let x =
581 match List.rev minus_list with
582 (T2(Parser_c.TCommentNewline c,_b,_i))::rest
583 when List.for_all minus_or_comment minus_list ->
584 set_minus_comment_or_plus adj1 x
585 | _ -> x in
586 x :: adjust_within_minus contains_plus minus_list @
587 adjust_around_minus rest
588 | (T2(_,Min adj1,_) as t1)::xs ->
589 let (minus_list,rest) = Common.span not_context (t1::xs) in
590 let contains_plus = List.exists is_plus minus_list in
591 adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
592 | x::xs ->
593 x :: adjust_around_minus xs
594 and adjust_within_minus cp (* contains plus *) = function
595 (T2(_,Min adj1,_) as t1)::xs ->
596 let not_minus = function T2(_,Min _,_) -> false | _ -> true in
597 let (not_minus_list,rest) = Common.span not_minus xs in
598 t1 ::
599 (match rest with
600 (T2(_,Min adj2,_) as t2)::xs when common_adj adj1 adj2 ->
601 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
602 @ (adjust_within_minus cp (t2::xs))
603 | (T2(_,Min adj2,_) as t2)::xs ->
604 if not cp && List.for_all is_whitespace not_minus_list
605 then
606 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
607 @ (adjust_within_minus cp (t2::xs))
608 else
609 not_minus_list @ (adjust_within_minus cp (t2::xs))
610 | _ ->
611 if cp
612 then xs
613 else
614 let (spaces,rest) = Common.span is_space xs in
615 (List.map (set_minus_comment_or_plus adj1) spaces)
616 @ rest)
617 | xs -> failwith "should always start with minus"
618 and not_context = function
619 (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
620 | _ -> true
621 and is_plus = function
622 C2 _ | Comma _ | Cocci2 _ -> true
623 | _ -> false in
624
625 let xs = adjust_around_minus xs in
626
627 (* this drops blank lines after a brace introduced by removing code *)
628 let minus_or_comment_nonl = function
629 T2(_,Min adj,_) -> true
630 | T2(Parser_c.TCommentNewline _,_b,_i) -> false
631 | x -> is_minusable_comment x in
632
633 let rec adjust_after_brace = function
634 [] -> []
635 | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
636 when str_of_token2 x =$= "{" ->
637 let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
638 let is_whitespace = function
639 T2(Parser_c.TCommentSpace _,_b,_i)
640 (* patch: cocci *)
641 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
642 | _ -> false in
643 let (newlines,rest) = Common.span is_whitespace rest in
644 let (drop_newlines,last_newline) =
645 let rec loop = function
646 [] -> ([],[])
647 | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
648 (List.rev rest,[x])
649 | x::xs ->
650 let (drop_newlines,last_newline) = loop xs in
651 (drop_newlines,x::last_newline) in
652 loop (List.rev newlines) in
653 x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
654 last_newline@
655 adjust_after_brace rest
656 | x::xs -> x::adjust_after_brace xs in
657
658 let xs = adjust_after_brace xs in
659
660 (* search backwards from context } over spaces until reaching a newline.
661 then go back over all minus code until reaching some context or + code.
662 get rid of all intervening spaces, newlines, and comments
663 input is reversed *)
664 let rec adjust_before_brace = function
665 [] -> []
666 | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
667 let (outer_spaces,rest) = Common.span is_space xs in
668 x :: outer_spaces @
669 (match rest with
670 ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
671 (* the rest of this code is the same as from_newline below
672 but merging them seems to be error prone... *)
673 ((T2 (t, Min adj, idx)) as m) :: rest ->
674 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
675 h :: m ::
676 (List.map (set_minus_comment adj) spaces) @
677 (adjust_before_brace rest)
678 | _ -> adjust_before_brace rest)
679 | x::xs -> x :: (adjust_before_brace xs) in
680
681 let from_newline = function
682 ((T2 (t, Min adj, idx)) as m) :: rest ->
683 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
684 m ::
685 (List.map (set_minus_comment adj) spaces) @
686 (adjust_before_brace rest)
687 | ((T2 (t0, Ctx, idx0)) as m0) :: ((T2 (t, Min adj, idx)) as m) :: rest
688 when TH.str_of_tok t0 = "" ->
689 (* This is for the case of a #define that is completely deleted,
690 because a #define has a strange EOL token at the end.
691 We hope there i no other kind of token that is represented by
692 "", but it seems like changing the kind of token might break
693 the end of entity recognition in the C parser.
694 See parsing_hacks.ml *)
695 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
696 m0 :: m ::
697 (List.map (set_minus_comment adj) spaces) @
698 (adjust_before_brace rest)
699 | rest -> adjust_before_brace rest in
700
701 let xs = List.rev (from_newline (List.rev xs)) in
702 let xs = drop_minus xs in
703 xs
704
705 (* normally, in C code, a semicolon is not preceded by a space or newline *)
706 let adjust_before_semicolon toks =
707 let toks = List.rev toks in
708 let rec search_semic = function
709 [] -> []
710 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
711 if List.mem (str_of_token2 x) [";";")";","]
712 then x :: search_minus false xs
713 else x :: search_semic xs
714 | x::xs -> x :: search_semic xs
715 and search_minus seen_minus xs =
716 let (spaces, rest) = Common.span is_space xs in
717 (* only delete spaces if something is actually deleted *)
718 match rest with
719 ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
720 | _ -> if seen_minus then rest else xs in
721 List.rev (search_semic toks)
722
723 let is_ident_like s = s ==~ Common.regexp_alpha
724
725 let rec drop_space_at_endline = function
726 [] -> []
727 | [x] -> [x]
728 | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest ->
729 let (outer_spaces,rest) = Common.span is_space rest in
730 let minus_or_comment_or_space_nocpp = function
731 T2(_,Min adj,_) -> true
732 | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true
733 | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false
734 | x -> false in
735 let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in
736 let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
737 if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus
738 then
739 match rest with
740 ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest ->
741 (* drop trailing spaces *)
742 minus@a::(drop_space_at_endline rest)
743 | _ -> fail()
744 else fail()
745 | a :: rest -> a :: drop_space_at_endline rest
746
747 (* if a removed ( is between two tokens, then add a space *)
748 let rec paren_to_space = function
749 [] -> []
750 | [x] -> [x]
751 | [x;y] -> [x;y]
752 | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest
753 when not (is_whitespace a) && TH.str_of_tok t = "(" ->
754 a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
755 | a :: rest -> a :: (paren_to_space rest)
756
757 let rec add_space xs =
758 match xs with
759 | [] -> []
760 | [x] -> [x]
761 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
762 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
763 not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
764 (* this only works within a line. could consider whether
765 something should be done to add newlines too, rather than
766 printing them explicitly in unparse_cocci. *)
767 x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
768 | x::y::xs ->
769 let sx = str_of_token2 x in
770 let sy = str_of_token2 y in
771 if is_ident_like sx && is_ident_like sy
772 then x::C2 " "::(add_space (y::xs))
773 else x::(add_space (y::xs))
774
775 (* A fake comma is added at the end of an unordered initlist or a enum
776 decl, if the initlist or enum doesn't already end in a comma. This is only
777 needed if there is + code, ie if we see Cocci after it in the code sequence *)
778
779 let rec drop_end_comma = function
780 [] -> []
781 | [x] -> [x]
782 | ((Comma ",") as x) :: rest ->
783 let (newlines,rest2) = Common.span is_whitespace rest in
784 (match rest2 with
785 (Cocci2 _) :: _ -> x :: drop_end_comma rest
786 | _ -> drop_end_comma rest)
787 | x :: xs -> x :: drop_end_comma xs
788
789 (* The following only works for the outermost function call. Stack records
790 the column of all open parentheses. Space_cell contains the most recent
791 comma in the outermost function call. The goal is to decide whether this
792 should be followed by a space or a newline and indent. *)
793 let add_newlines toks tabbing_unit =
794 let create_indent n =
795 let (tu,tlen) =
796 match tabbing_unit with
797 Some ("\t",_) -> ("\t",8)
798 | Some ("",_) -> ("\t",8) (* not sure why... *)
799 | Some (s,_) -> (s,String.length s) (* assuming only spaces *)
800 | None -> ("\t",8) in
801 let rec loop seen =
802 if seen + tlen <= n
803 then tu ^ loop (seen + tlen)
804 else String.make (n-seen) ' ' in
805 loop 0 in
806 let check_for_newline count x = function
807 Some (start,space_cell) when count > Flag_parsing_c.max_width ->
808 space_cell := "\n"^(create_indent x);
809 Some (x + (count - start))
810 | _ -> None in
811 (* the following is for strings that may contain newline *)
812 let string_length s count =
813 let l = list_of_string s in
814 List.fold_left
815 (function count ->
816 function
817 '\t' -> count + 8
818 | '\n' -> 0
819 | c -> count + 1)
820 count l in
821 let rec loop info count = function
822 [] -> []
823 | ((T2(tok,_,_)) as a)::xs ->
824 a :: loop info (string_length (TH.str_of_tok tok) count) xs
825 | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
826 let (stack,space_cell) = info in
827 let rest =
828 match hint with
829 None -> loop info (count + (String.length s)) xs
830 | Some Unparse_cocci.StartBox ->
831 let count = count + (String.length s) in
832 loop (count::stack,space_cell) count xs
833 | Some Unparse_cocci.EndBox ->
834 let count = count + (String.length s) in
835 (match stack with
836 [x] ->
837 (match check_for_newline count x space_cell with
838 Some count -> loop ([],None) count xs
839 | None -> loop ([],None) count xs)
840 | _ -> loop (List.tl stack,space_cell) count xs)
841 | Some (Unparse_cocci.SpaceOrNewline sp) ->
842 let count = count + (String.length s) + 1 (*space*) in
843 (match stack with
844 [x] ->
845 (match check_for_newline count x space_cell with
846 Some count -> loop (stack,Some (x,sp)) count xs
847 | None -> loop (stack,Some (count,sp)) count xs)
848 | _ -> loop info count xs) in
849 a :: rest
850 | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
851 | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
852 | Fake2 :: _ | Indent_cocci2 :: _
853 | Unindent_cocci2 _::_ ->
854 failwith "unexpected fake, indent, or unindent" in
855 let redo_spaces prev = function
856 Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
857 C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
858 | t -> t::prev in
859 (match !Flag_parsing_c.spacing with
860 Flag_parsing_c.SMPL -> toks
861 | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)))
862
863 (* When insert some new code, because of a + in a SP, we must add this
864 * code at the right place, with the good indentation. So each time we
865 * encounter some spacing info, with some newline, we maintain the
866 * current indentation level used.
867 *
868 * TODO problems: not accurate. ex: TODO
869 *
870 * TODO: if in #define region, should add a \ \n
871 *)
872 let new_tabbing2 space =
873 (list_of_string space)
874 +> List.rev
875 +> Common.take_until (fun c -> c =<= '\n')
876 +> List.rev
877 +> List.map string_of_char
878 +> String.concat ""
879
880 let new_tabbing a =
881 Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
882
883
884 let rec adjust_indentation xs =
885
886 let _current_tabbing = ref "" in
887 let tabbing_unit = ref None in
888
889 let string_of_list l = String.concat "" (List.map string_of_char l) in
890
891 (* try to pick a tabbing unit for the plus code *)
892 let adjust_tabbing_unit old_tab new_tab =
893 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
894 then
895 let old_tab = list_of_string old_tab in
896 let new_tab = list_of_string new_tab in
897 let rec loop = function
898 ([],new_tab) ->
899 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
900 | (_,[]) -> failwith "not possible"
901 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
902 loop (old_tab,new_tab) in
903
904 let remtab tu current_tab =
905 let current_tab = List.rev(list_of_string current_tab) in
906 let rec loop = function
907 ([],new_tab) -> string_of_list (List.rev new_tab)
908 | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
909 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
910 | (_,ns) -> (* mismatch; remove what we can *)
911 string_of_list (List.rev ns) in
912 loop (tu,current_tab) in
913
914 let rec find_first_tab started = function
915 [] -> ()
916 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
917 find_first_tab true xs
918 (* patch: coccinelle *)
919 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
920 when started ->
921 let s = str_of_token2 x +> new_tabbing in
922 tabbing_unit := Some (s,List.rev (list_of_string s))
923 | x::xs -> find_first_tab started xs in
924 find_first_tab false xs;
925
926 let rec aux started xs =
927 match xs with
928 | [] -> []
929 (* patch: coccinelle *)
930 | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
931 ((Cocci2 ("{",_,_,_,_)) as a)::xs
932 when started && str_of_token2 x =$= ")" ->
933 (* to be done for if, etc, but not for a function header *)
934 x::(C2 " ")::a::(aux started xs)
935 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
936 let old_tabbing = !_current_tabbing in
937 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
938 (* only trust the indentation after the first { *)
939 (if started
940 then adjust_tabbing_unit old_tabbing !_current_tabbing);
941 let coccis_rest = Common.span all_coccis xs in
942 (match coccis_rest with
943 (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
944 (* the case where cocci code has been added before a close } *)
945 x::aux started (Indent_cocci2::xs)
946 | _ -> x::aux started xs)
947 | Indent_cocci2::xs ->
948 (match !tabbing_unit with
949 None -> aux started xs
950 | Some (tu,_) ->
951 _current_tabbing := (!_current_tabbing)^tu;
952 Cocci2 (tu,-1,-1,-1,None)::aux started xs)
953 | Unindent_cocci2(permanent)::xs ->
954 (match !tabbing_unit with
955 None -> aux started xs
956 | Some (_,tu) ->
957 _current_tabbing := remtab tu (!_current_tabbing);
958 aux started xs)
959 (* border between existing code and cocci code *)
960 | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
961 when str_of_token2 x =$= "{" ->
962 x::aux true (y::Indent_cocci2::xs)
963 | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
964 when str_of_token2 y =$= "}" ->
965 x::aux started (y::Unindent_cocci2 true::xs)
966 (* starting the body of the function *)
967 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs
968 | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
969 | ((Cocci2("\n",_,_,_,_)) as x)::Unindent_cocci2(false)::xs ->
970 x::aux started xs
971 | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
972 (* dont inline in expr because of weird eval order of ocaml *)
973 let s = !_current_tabbing in
974 x::Cocci2 (s,-1,-1,-1,None)::aux started xs
975 | x::xs -> x::aux started xs in
976 (aux false xs,!tabbing_unit)
977
978
979 let rec find_paren_comma = function
980 | [] -> ()
981
982 (* do nothing if was like this in original file *)
983 | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
984 ::xs when p2 =|= p1 + 1 ->
985 find_paren_comma (x2::xs)
986
987 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
988 ::xs when p2 =|= p1 + 1 ->
989 find_paren_comma (x2::xs)
990
991 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
992 ::xs when p2 =|= p1 + 1 ->
993 find_paren_comma (x2::xs)
994
995 (* otherwise yes can adjust *)
996 | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
997 x2.remove <- true;
998 find_paren_comma (x2::xs)
999 | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
1000 x1.remove <- true;
1001 find_paren_comma (x2::xs)
1002
1003 | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
1004 x1.remove <- true;
1005 find_paren_comma (x2::xs)
1006
1007 | x::xs ->
1008 find_paren_comma xs
1009
1010
1011 let fix_tokens toks =
1012 let toks = toks +> List.map mk_token_extended in
1013
1014 let cleaner = toks +> Common.exclude (function
1015 | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
1016 | _ -> false
1017 ) in
1018 find_paren_comma cleaner;
1019
1020 let toks = rebuild_tokens_extented toks in
1021 toks +> List.map (fun x -> x.tok2)
1022
1023
1024
1025 (*****************************************************************************)
1026 (* Final unparsing (and debugging support) *)
1027 (*****************************************************************************)
1028
1029 (* for debugging *)
1030 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
1031
1032 let kind_of_token2 = function
1033 | Fake2 -> KFake
1034 | Cocci2 _ -> KCocci
1035 | C2 _ -> KC
1036 | Comma _ -> KC
1037 | T2 (t,_,_) ->
1038 (match TH.pinfo_of_tok t with
1039 | ExpandedTok _ -> KExpanded
1040 | OriginTok _ -> KOrigin
1041 | FakeTok _ -> raise Impossible (* now a Fake2 *)
1042 | AbstractLineTok _ -> raise Impossible (* now a KC *)
1043 )
1044 | Unindent_cocci2 _ | Indent_cocci2 -> raise Impossible
1045
1046 let end_mark = "!"
1047
1048 let start_mark = function
1049 | KFake -> "!F!"
1050 | KCocci -> "!S!"
1051 | KC -> "!A!"
1052 | KExpanded -> "!E!"
1053 | KOrigin -> ""
1054
1055 let print_all_tokens2 pr xs =
1056 if !Flag_parsing_c.debug_unparsing
1057 then
1058 let current_kind = ref KOrigin in
1059 xs +> List.iter (fun t ->
1060 let newkind = kind_of_token2 t in
1061 if newkind =*= !current_kind
1062 then pr (str_of_token2 t)
1063 else begin
1064 pr (end_mark);
1065 pr (start_mark newkind);
1066 pr (str_of_token2 t);
1067 current_kind := newkind
1068 end
1069 );
1070 else
1071 xs +> List.iter (fun x -> pr (str_of_token2 x))
1072
1073
1074
1075
1076 (*****************************************************************************)
1077 (* Entry points *)
1078 (*****************************************************************************)
1079
1080 (* old: PPviatok was made in the beginning to allow to pretty print a
1081 * complete C file, including a modified C file by transformation.ml,
1082 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1083 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1084 * fancy stuff when a function was not modified at all. Just need to
1085 * print the list of token as-is. But now pretty_print_c.ml handles
1086 * almost everything so maybe less useful. Maybe PPviatok allows to
1087 * optimize a little the pretty printing.
1088 *
1089 * update: now have PPviastr which goes even faster than PPviatok, so
1090 * PPviatok has disappeared.
1091 *)
1092
1093 type ppmethod = PPnormal | PPviastr
1094
1095
1096
1097
1098 (* The pp_program function will call pretty_print_c.ml with a special
1099 * function to print the leaf components, the tokens. When we want to
1100 * print a token, we need to print also maybe the space and comments that
1101 * were close to it in the original file (and that was omitted during the
1102 * parsing phase), and honor what the cocci-info attached to the token says.
1103 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1104 * print it and also print some cocci-code attached in a PLUS to it.
1105 * So we will also maybe call unparse_cocci. Because the cocci-code may
1106 * contain metavariables, unparse_cocci will in fact sometimes call back
1107 * pretty_print_c (which will this time don't call back again unparse_cocci)
1108 *)
1109
1110 let pp_program2 xs outfile =
1111 Common.with_open_outfile outfile (fun (pr,chan) ->
1112 let pr s =
1113 if !Flag_parsing_c.debug_unparsing
1114 then begin pr2_no_nl s; flush stderr end
1115 else pr s
1116 (* flush chan; *)
1117 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1118 in
1119
1120 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
1121 (* here can still work on ast *)
1122 let e = remove_useless_fakeInfo_struct e in
1123
1124 match ppmethod with
1125 | PPnormal ->
1126 (* now work on tokens *)
1127
1128 (* phase1: just get all the tokens, all the information *)
1129 assert(toks_e +> List.for_all (fun t ->
1130 TH.is_origin t or TH.is_expanded t
1131 ));
1132 let toks = get_fakeInfo_and_tokens e toks_e in
1133 let toks = displace_fake_nodes toks in
1134 (* assert Origin;ExpandedTok;Faketok *)
1135 let toks = expand_mcode toks in
1136
1137 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1138 * and no tag information, just NOTHING. *)
1139
1140
1141 let toks =
1142 if !Flag.sgrep_mode2
1143 then
1144 (* nothing else to do for sgrep *)
1145 drop_expanded_and_fake (drop_minus toks)
1146 else
1147 (* phase2: can now start to filter and adjust *)
1148 let (toks,tu) = adjust_indentation toks in
1149 let toks = adjust_before_semicolon toks in(*before remove minus*)
1150 let toks = drop_space_at_endline toks in
1151 let toks = paren_to_space toks in
1152 let toks = drop_end_comma toks in
1153 let toks = remove_minus_and_between_and_expanded_and_fake toks in
1154 (* assert Origin + Cocci + C and no minus *)
1155 let toks = add_space toks in
1156 let toks = add_newlines toks tu in
1157 let toks = fix_tokens toks in
1158 toks in
1159
1160 (* in theory here could reparse and rework the ast! or
1161 * apply some SP. Not before cos julia may have generated
1162 * not parsable file. Need do unparsing_tricks call before being
1163 * ready to reparse. *)
1164 print_all_tokens2 pr toks;
1165
1166 | PPviastr -> pr str
1167 )
1168 )
1169
1170 let pp_program a b =
1171 Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1172
1173
1174 let pp_program_default xs outfile =
1175 let xs' = xs +> List.map (fun x -> x, PPnormal) in
1176 pp_program xs' outfile