2c2a238d20a7065dabefd111dc5448fafc18cbc7
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
1 (* Yoann Padioleau, Julia Lawall
2 *
3 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *
14 *
15 * Modifications by Julia Lawall for better newline handling.
16 *)
17 open Common
18
19 open Ast_c
20
21 module TH = Token_helpers
22
23
24 (* should keep comments and directives in between adjacent deleted terms,
25 but not comments and directives within deleted terms. should use the
26 labels found in the control-flow graph *)
27
28
29
30 (*****************************************************************************)
31 (* Types used during the intermediate phases of the unparsing *)
32 (*****************************************************************************)
33
34 type token1 =
35 | Fake1 of info
36 | T1 of Parser_c.token
37
38 (* The cocci_tag of the token should always be a NOTHING. The mark of
39 * the token can only be OriginTok or ExpandedTok. Why not get rid of
40 * token and get something simpler ? because we need to know if the
41 * info is a TCommentCpp or TCommentSpace, etc for some of the further
42 * analysis so easier to keep with the token.
43 *
44 * This type contains the whole information. Have all the tokens with this
45 * type.
46 *)
47 type token2 =
48 | T2 of Parser_c.token * bool (* minus *) *
49 int option (* orig index, abstracting away comments and space *)
50 | Fake2
51 | Cocci2 of string
52 | C2 of string
53 | Indent_cocci2
54 | Unindent_cocci2
55
56 (* not used yet *)
57 type token3 =
58 | T3 of Parser_c.token
59 | Cocci3 of string
60 | C3 of string
61
62
63 (* similar to the tech in parsing_hack *)
64 type token_extended = {
65 tok2 : token2;
66 str : string;
67 idx: int option; (* to know if 2 tokens were consecutive in orig file *)
68 mutable new_tokens_before : token2 list;
69 mutable remove : bool;
70 }
71
72
73 (*****************************************************************************)
74 (* Helpers *)
75 (*****************************************************************************)
76
77 let info_of_token1 t =
78 match t with
79 | Fake1 info -> info
80 | T1 tok -> TH.info_of_tok tok
81
82 let str_of_token2 = function
83 | T2 (t,_,_) -> TH.str_of_tok t
84 | Fake2 -> ""
85 | Cocci2 s -> s
86 | C2 s -> s
87 | Indent_cocci2 -> ""
88 | Unindent_cocci2 -> ""
89
90 let print_token2 = function
91 | T2 (t,b,_) -> "T2:"^(if b then "-" else "")^TH.str_of_tok t
92 | Fake2 -> ""
93 | Cocci2 s -> "Cocci2:"^s
94 | C2 s -> "C2:"^s
95 | Indent_cocci2 -> "Indent"
96 | Unindent_cocci2 -> "Unindent"
97
98 let print_all_tokens2 l =
99 List.iter (function x -> Printf.printf "%s " (print_token2 x)) l;
100 Printf.printf "\n"
101
102 let str_of_token3 = function
103 | T3 t -> TH.str_of_tok t
104 | Cocci3 s | C3 s -> s
105
106
107
108 let mk_token_extended x =
109 let origidx =
110 match x with
111 | T2 (_,_, idx) -> idx
112 | _ -> None
113 in
114 { tok2 = x;
115 str = str_of_token2 x;
116 idx = origidx;
117 new_tokens_before = [];
118 remove = false;
119 }
120
121 let rebuild_tokens_extented toks_ext =
122 let _tokens = ref [] in
123 toks_ext +> List.iter (fun tok ->
124 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
125 if not tok.remove then push2 tok.tok2 _tokens;
126 );
127 let tokens = List.rev !_tokens in
128 (tokens +> List.map mk_token_extended)
129
130
131 let mcode_contain_plus = function
132 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
133 | Ast_cocci.CONTEXT _ -> true
134 (* patch: when need full coccinelle transformation *)
135 | Ast_cocci.MINUS (_,[]) -> false
136 | Ast_cocci.MINUS (_,x::xs) -> true
137 | Ast_cocci.PLUS -> raise Impossible
138
139 let contain_plus info =
140 let mck = Ast_c.mcode_of_info info in
141 mcode_contain_plus mck
142
143 (*****************************************************************************)
144 (* Last fix on the ast *)
145 (*****************************************************************************)
146
147 (* Because of the ugly trick to handle initialiser, I generate fake ','
148 * for the last initializer element, but if there is nothing around it,
149 * I don't want in the end to print it.
150 *)
151
152 let remove_useless_fakeInfo_struct program =
153 let bigf = { Visitor_c.default_visitor_c_s with
154 Visitor_c.kini_s = (fun (k,bigf) ini ->
155 match k ini with
156 | InitList args, ii ->
157 (match ii with
158 | [_i1;_i2] -> ini
159 | [i1;i2;iicommaopt] ->
160 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
161 && (Ast_c.is_fake iicommaopt)
162 (* sometimes the guy put a normal iicommaopt *)
163 then InitList args, [i1;i2]
164 else InitList args, [i1;i2;iicommaopt]
165 | [i1;i2;iicommaopt;end_comma_opt] ->
166 (* only in #define. end_comma_opt canot be fake *)
167 (* not sure if this will be considered ambiguous with a previous
168 case? *)
169 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
170 && (Ast_c.is_fake iicommaopt)
171 (* sometimes the guy put a normal iicommaopt *)
172 then InitList args, [i1;i2;end_comma_opt]
173 else InitList args, [i1;i2;iicommaopt;end_comma_opt]
174 | _ -> raise Impossible
175 )
176 | x -> x
177 )
178 } in
179 Visitor_c.vk_toplevel_s bigf program
180
181
182 (*****************************************************************************)
183 (* Tokens1 generation *)
184 (*****************************************************************************)
185
186 let get_fakeInfo_and_tokens celem toks =
187 let toks_in = ref toks in
188 let toks_out = ref [] in
189
190 (* todo? verify good order of position ? *)
191 let pr_elem info =
192 match Ast_c.pinfo_of_info info with
193 | FakeTok _ ->
194 Common.push2 (Fake1 info) toks_out
195 | OriginTok _ | ExpandedTok _ ->
196 (* get the associated comments/space/cppcomment tokens *)
197 let (before, x, after) = !toks_in +> Common.split_when (fun tok ->
198 info =*= TH.info_of_tok tok)
199 in
200 assert(info =*= TH.info_of_tok x);
201 (*old: assert(before +> List.for_all (TH.is_comment)); *)
202 before +> List.iter (fun x ->
203 if not (TH.is_comment x)
204 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
205 (* case such as int asm d3("x"); not yet in ast *)
206 );
207 before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
208 push2 (T1 x) toks_out;
209 toks_in := after;
210 | AbstractLineTok _ ->
211 (* can be called on type info when for instance use -type_c *)
212 if !Flag_parsing_c.pretty_print_type_info
213 then Common.push2 (Fake1 info) toks_out
214 else raise Impossible (* at this stage *)
215 in
216
217 let pr_space _ = () in (* use the spacing that is there already *)
218
219 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
220
221 if not (null !toks_in)
222 then failwith "WEIRD: unparsing not finished";
223
224 List.rev !toks_out
225
226 (* Fake nodes that have BEFORE code should be moved over any subsequent
227 whitespace and newlines, but not any comments, to get as close to the affected
228 code as possible. Similarly, face nodes that have AFTER code should be moved
229 backwards. No fake nodes should have both before and after code. *)
230
231 let displace_fake_nodes toks =
232 let is_fake = function Fake1 _ -> true | _ -> false in
233 let is_whitespace = function
234 T1(Parser_c.TCommentSpace _)
235 (* patch: cocci *)
236 | T1(Parser_c.TCommentNewline _) -> true
237 | _ -> false in
238 let rec loop toks =
239 let fake_info =
240 try Some (Common.split_when is_fake toks)
241 with Not_found -> None in
242 match fake_info with
243 Some(bef,((Fake1 info) as fake),aft) ->
244 (match !(info.cocci_tag) with
245 (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
246 (* move the fake node forwards *)
247 let (whitespace,rest) = Common.span is_whitespace aft in
248 bef @ whitespace @ fake :: (loop rest)
249 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
250 (* move the fake node backwards *)
251 let revbef = List.rev bef in
252 let (revwhitespace,revprev) = Common.span is_whitespace revbef in
253 let whitespace = List.rev revwhitespace in
254 let prev = List.rev revprev in
255 prev @ fake :: (loop (whitespace @ aft))
256 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
257 bef @ fake :: (loop aft)
258 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
259 failwith "fake node should not be before-after"
260 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
261 )
262 | None -> toks
263 | _ -> raise Impossible in
264 loop toks
265
266 (*****************************************************************************)
267 (* Tokens2 generation *)
268 (*****************************************************************************)
269
270 let comment2t2 = function
271 (Token_c.TCommentCpp x,(info : Token_c.info)) ->
272 C2("\n"^info.Common.str^"\n")
273 | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
274
275 let expand_mcode toks =
276 let toks_out = ref [] in
277
278 let index = ref 0 in
279
280 let add_elem t minus =
281 match t with
282 | Fake1 info ->
283 let str = Ast_c.str_of_info info in
284 if str =$= ""
285 then push2 (Fake2) toks_out
286 (* perhaps the fake ',' *)
287 else push2 (C2 str) toks_out
288
289
290 | T1 tok ->
291 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
292 (* no tag on expandedTok ! *)
293 (if (TH.is_expanded tok &&
294 !((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot)
295 then
296 failwith
297 (Printf.sprintf
298 "expanded token %s on line %d is either modified or stored in a metavariable"
299 (TH.str_of_tok tok) (TH.line_of_tok tok)));
300
301 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
302 { i with cocci_tag = ref Ast_c.emptyAnnot; }
303 ) in
304
305 let optindex =
306 if TH.is_origin tok && not (TH.is_real_comment tok)
307 then begin
308 incr index;
309 Some !index
310 end
311 else None
312 in
313
314 push2 (T2 (tok', minus, optindex)) toks_out
315 in
316
317 let expand_info t =
318 let (mcode,env) = !((info_of_token1 t).cocci_tag) in
319
320 let pr_cocci s =
321 push2 (Cocci2 s) toks_out
322 in
323 let pr_c info =
324 (match Ast_c.pinfo_of_info info with
325 Ast_c.AbstractLineTok _ ->
326 push2 (C2 (Ast_c.str_of_info info)) toks_out
327 | Ast_c.FakeTok (s,_) ->
328 push2 (C2 s) toks_out
329 | _ ->
330 Printf.printf "line: %s\n" (Common.dump info);
331 failwith "not an abstract line");
332 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
333 List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
334
335
336
337 let pr_space _ = push2 (C2 " ") toks_out in
338
339 let indent _ = push2 Indent_cocci2 toks_out in
340 let unindent _ = push2 Unindent_cocci2 toks_out in
341
342 let args_pp = (env, pr_cocci, pr_c, pr_space, indent, unindent) in
343
344 (* old: when for yacfe with partial cocci:
345 * add_elem t false;
346 *)
347
348 (* patch: when need full coccinelle transformation *)
349 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
350 match mcode with
351 | Ast_cocci.MINUS (_,any_xxs) ->
352 (* Why adding ? because I want to have all the information, the whole
353 * set of tokens, so I can then process and remove the
354 * is_between_two_minus for instance *)
355 add_elem t true;
356 unparser any_xxs Unparse_cocci.InPlace
357 | Ast_cocci.CONTEXT (_,any_befaft) ->
358 (match any_befaft with
359 | Ast_cocci.NOTHING ->
360 add_elem t false
361 | Ast_cocci.BEFORE xxs ->
362 unparser xxs Unparse_cocci.Before;
363 add_elem t false
364 | Ast_cocci.AFTER xxs ->
365 add_elem t false;
366 unparser xxs Unparse_cocci.After;
367 | Ast_cocci.BEFOREAFTER (xxs, yys) ->
368 unparser xxs Unparse_cocci.Before;
369 add_elem t false;
370 unparser yys Unparse_cocci.After;
371 )
372 | Ast_cocci.PLUS -> raise Impossible
373
374 in
375
376 toks +> List.iter expand_info;
377 List.rev !toks_out
378
379
380 (*****************************************************************************)
381 (* Tokens2 processing, filtering, adjusting *)
382 (*****************************************************************************)
383
384 let is_minusable_comment = function
385 | (T2 (t,_b,_i)) ->
386 (match t with
387 | Parser_c.TCommentSpace _ (* only whitespace *)
388 (* patch: coccinelle *)
389 | Parser_c.TCommentNewline _ (* newline plus whitespace *)
390 | Parser_c.TComment _
391 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
392 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
393 | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
394 -> true
395
396 | Parser_c.TCommentMisc _
397 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
398 -> false
399
400 | _ -> false
401 )
402 | _ -> false
403
404 let all_coccis = function
405 Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true
406 | _ -> false
407
408 (*previously gave up if the first character was a newline, but not clear why*)
409 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
410
411 let set_minus_comment = function
412 | T2 (t,false,idx) ->
413 let str = TH.str_of_tok t in
414 (match t with
415 | Parser_c.TCommentSpace _
416 (* patch: coccinelle *)
417 | Parser_c.TCommentNewline _ -> ()
418
419 | Parser_c.TComment _
420 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
421 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
422 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
423 ->
424 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
425 (TH.line_of_tok t) str)
426 | _ -> raise Impossible
427 );
428 T2 (t, true, idx)
429 (* patch: coccinelle *)
430 | T2 (Parser_c.TCommentNewline _,true,idx) as x -> x
431 | _ -> raise Impossible
432
433 let set_minus_comment_or_plus = function
434 Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x
435 | x -> set_minus_comment x
436
437 let remove_minus_and_between_and_expanded_and_fake xs =
438
439 (* get rid of exampled and fake tok *)
440 let xs = xs +> Common.exclude (function
441 | T2 (t,_,_) when TH.is_expanded t -> true
442 | Fake2 -> true
443
444 | _ -> false
445 )
446 in
447
448 (*This drops the space before each completely minused block (no plus code).*)
449 let minus_or_comment = function
450 T2(_,true,_) -> true
451 | T2(Parser_c.TCommentNewline _,_b,_i) -> false
452 | x -> is_minusable_comment x in
453
454 let rec adjust_before_minus = function
455 [] -> []
456 (* patch: coccinelle *)
457 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::((T2(_,true,_)::_) as xs) ->
458 let (between_minus,rest) = Common.span minus_or_comment xs in
459 (match rest with
460 [] -> (set_minus_comment x) :: between_minus
461 | T2(Parser_c.TCommentNewline _,_b,_i)::_ ->
462 (set_minus_comment x) :: between_minus @
463 (adjust_before_minus rest)
464 | _ -> x :: between_minus @ (adjust_before_minus rest))
465 | x::xs -> x::adjust_before_minus xs in
466
467 let xs = adjust_before_minus xs in
468
469 (* this drops blank lines after a brace introduced by removing code *)
470 let rec adjust_after_brace = function
471 [] -> []
472 | ((T2(_,false,_)) as x)::((T2(_,true,_)::_) as xs)
473 when str_of_token2 x =$= "{" ->
474 let (between_minus,rest) = Common.span minus_or_comment xs in
475 let is_whitespace = function
476 T2(Parser_c.TCommentSpace _,_b,_i)
477 (* patch: cocci *)
478 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
479 | _ -> false in
480 let (newlines,rest) = Common.span is_whitespace rest in
481 let (drop_newlines,last_newline) =
482 let rec loop = function
483 [] -> ([],[])
484 | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
485 (List.rev rest,[x])
486 | x::xs ->
487 let (drop_newlines,last_newline) = loop xs in
488 (drop_newlines,x::last_newline) in
489 loop (List.rev newlines) in
490 x::between_minus@(List.map set_minus_comment drop_newlines)@
491 last_newline@
492 adjust_after_brace rest
493 | x::xs -> x::adjust_after_brace xs in
494
495 let xs = adjust_after_brace xs in
496
497 (* this deals with any stuff that is between the minused code, eg
498 spaces, comments, attributes, etc. *)
499 (* The use of is_minusable_comment_or_plus and set_minus_comment_or_plus
500 is because the + code can end up anywhere in the middle of the - code;
501 it is not necessarily to the far left *)
502 let rec adjust_between_minus xs =
503 match xs with
504 | [] -> []
505 | (T2 (t1,true,idx1))::xs ->
506
507 let (between_comments, rest) =
508 Common.span is_minusable_comment_or_plus xs in
509 (match rest with
510 | [] -> [(T2 (t1, true,idx1))]
511
512 | (T2 (t2, true,idx2))::rest ->
513 (T2 (t1, true,idx1))::
514 (List.map set_minus_comment_or_plus between_comments @
515 adjust_between_minus ((T2 (t2, true, idx2))::rest))
516 | x::xs ->
517 (T2 (t1, true, idx1))::
518 (between_comments @ adjust_between_minus (x::xs))
519 )
520
521 | x::xs -> x::adjust_between_minus xs in
522
523 let xs = adjust_between_minus xs in
524
525 let xs = xs +> Common.exclude (function
526 | T2 (t,true,_) -> true
527 | _ -> false
528 ) in
529 xs
530
531 (* normally, in C code, a semicolon is not preceded by a space or newline *)
532 let adjust_before_semicolon toks =
533 let toks = List.rev toks in
534 let rec loop = function
535 [] -> []
536 | ((T2(_,false,_)) as x)::xs ->
537 if List.mem (str_of_token2 x) [";";")";","]
538 then
539 let (spaces, rest) = Common.span is_minusable_comment xs in
540 (match rest with
541 (T2(_,true,_))::_ | (Cocci2 _)::_ ->
542 (* only drop spaces if something was actually changed before *)
543 x :: loop rest
544 | _ -> x :: loop xs)
545 else x :: loop xs
546 | x::xs -> x :: loop xs in
547 List.rev (loop toks)
548
549 let is_ident_like s = s ==~ Common.regexp_alpha
550
551 let rec add_space xs =
552 match xs with
553 | [] -> []
554 | [x] -> [x]
555 | x::y::xs ->
556 let sx = str_of_token2 x in
557 let sy = str_of_token2 y in
558 if is_ident_like sx && is_ident_like sy
559 then x::C2 " "::(add_space (y::xs))
560 else x::(add_space (y::xs))
561
562
563
564 (* When insert some new code, because of a + in a SP, we must add this
565 * code at the right place, with the good indentation. So each time we
566 * encounter some spacing info, with some newline, we maintain the
567 * current indentation level used.
568 *
569 * TODO problems: not accurate. ex: TODO
570 *
571 * TODO: if in #define region, should add a \ \n
572 *)
573 let new_tabbing2 space =
574 (list_of_string space)
575 +> List.rev
576 +> Common.take_until (fun c -> c =<= '\n')
577 +> List.rev
578 +> List.map string_of_char
579 +> String.concat ""
580
581 let new_tabbing a =
582 Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
583
584
585 let rec adjust_indentation xs =
586 let _current_tabbing = ref "" in
587 let tabbing_unit = ref None in
588
589 let string_of_list l = String.concat "" (List.map string_of_char l) in
590
591 (* try to pick a tabbing unit for the plus code *)
592 let adjust_tabbing_unit old_tab new_tab =
593 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
594 then
595 let old_tab = list_of_string old_tab in
596 let new_tab = list_of_string new_tab in
597 let rec loop = function
598 ([],new_tab) ->
599 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
600 | (_,[]) -> failwith "not possible"
601 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
602 loop (old_tab,new_tab) in
603
604 let remtab tu current_tab =
605 let current_tab = List.rev(list_of_string current_tab) in
606 let rec loop = function
607 ([],new_tab) -> string_of_list (List.rev new_tab)
608 | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*)
609 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
610 | (_,ns) -> (* mismatch; remove what we can *)
611 string_of_list (List.rev ns) in
612 loop (tu,current_tab) in
613
614 let rec find_first_tab started = function
615 [] -> ()
616 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
617 find_first_tab true xs
618 (* patch: coccinelle *)
619 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
620 when started ->
621 let s = str_of_token2 x +> new_tabbing in
622 tabbing_unit := Some (s,List.rev (list_of_string s))
623 | x::xs -> find_first_tab started xs in
624 find_first_tab false xs;
625
626 let rec aux started xs =
627 match xs with
628 | [] -> []
629 (* patch: coccinelle *)
630 | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
631 (Cocci2 "{")::xs when started && str_of_token2 x =$= ")" ->
632 (* to be done for if, etc, but not for a function header *)
633 x::(Cocci2 " {")::(aux started xs)
634 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs ->
635 let old_tabbing = !_current_tabbing in
636 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s);
637 (* only trust the indentation after the first { *)
638 (if started then adjust_tabbing_unit old_tabbing !_current_tabbing);
639 let coccis_rest = Common.span all_coccis xs in
640 (match coccis_rest with
641 (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
642 (* the case where cocci code has been added before a close } *)
643 x::aux started (Indent_cocci2::xs)
644 | _ -> x::aux started xs)
645 | Indent_cocci2::xs ->
646 (match !tabbing_unit with
647 None -> aux started xs
648 | Some (tu,_) ->
649 _current_tabbing := (!_current_tabbing)^tu;
650 Cocci2 (tu)::aux started xs)
651 | Unindent_cocci2::xs ->
652 (match !tabbing_unit with
653 None -> aux started xs
654 | Some (_,tu) ->
655 _current_tabbing := remtab tu (!_current_tabbing);
656 aux started xs)
657 (* border between existing code and cocci code *)
658 | ((T2 (tok,_,_)) as x)::((Cocci2 "\n") as y)::xs
659 when str_of_token2 x =$= "{" ->
660 x::aux true (y::Indent_cocci2::xs)
661 | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
662 when str_of_token2 y =$= "}" ->
663 x::aux started (y::Unindent_cocci2::xs)
664 (* starting the body of the function *)
665 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs
666 | (Cocci2 "{")::xs -> (Cocci2 "{")::aux true xs
667 | ((Cocci2 "\n") as x)::xs ->
668 (* dont inline in expr because of weird eval order of ocaml *)
669 let s = !_current_tabbing in
670 x::Cocci2 (s)::aux started xs
671 | x::xs -> x::aux started xs in
672 aux false xs
673
674
675 let rec find_paren_comma = function
676 | [] -> ()
677
678 (* do nothing if was like this in original file *)
679 | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
680 ::xs when p2 =|= p1 + 1 ->
681 find_paren_comma (x2::xs)
682
683 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
684 ::xs when p2 =|= p1 + 1 ->
685 find_paren_comma (x2::xs)
686
687 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
688 ::xs when p2 =|= p1 + 1 ->
689 find_paren_comma (x2::xs)
690
691 (* otherwise yes can adjust *)
692 | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
693 x2.remove <- true;
694 find_paren_comma (x2::xs)
695 | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
696 x1.remove <- true;
697 find_paren_comma (x2::xs)
698
699 | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
700 x1.remove <- true;
701 find_paren_comma (x2::xs)
702
703 | x::xs ->
704 find_paren_comma xs
705
706
707 let fix_tokens toks =
708 let toks = toks +> List.map mk_token_extended in
709
710 let cleaner = toks +> Common.exclude (function
711 | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
712 | _ -> false
713 ) in
714 find_paren_comma cleaner;
715
716 let toks = rebuild_tokens_extented toks in
717 toks +> List.map (fun x -> x.tok2)
718
719
720
721 (*****************************************************************************)
722 (* Final unparsing (and debugging support) *)
723 (*****************************************************************************)
724
725 (* for debugging *)
726 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
727
728 let kind_of_token2 = function
729 | Fake2 -> KFake
730 | Cocci2 _ -> KCocci
731 | C2 _ -> KC
732 | T2 (t,_,_) ->
733 (match TH.pinfo_of_tok t with
734 | ExpandedTok _ -> KExpanded
735 | OriginTok _ -> KOrigin
736 | FakeTok _ -> raise Impossible (* now a Fake2 *)
737 | AbstractLineTok _ -> raise Impossible (* now a KC *)
738 )
739 | Unindent_cocci2 | Indent_cocci2 -> raise Impossible
740
741 let end_mark = "!"
742
743 let start_mark = function
744 | KFake -> "!F!"
745 | KCocci -> "!S!"
746 | KC -> "!A!"
747 | KExpanded -> "!E!"
748 | KOrigin -> ""
749
750 let print_all_tokens2 pr xs =
751 if !Flag_parsing_c.debug_unparsing
752 then
753 let current_kind = ref KOrigin in
754 xs +> List.iter (fun t ->
755 let newkind = kind_of_token2 t in
756 if newkind =*= !current_kind
757 then pr (str_of_token2 t)
758 else begin
759 pr (end_mark);
760 pr (start_mark newkind);
761 pr (str_of_token2 t);
762 current_kind := newkind
763 end
764 );
765 else
766 xs +> List.iter (fun x -> pr (str_of_token2 x))
767
768
769
770
771 (*****************************************************************************)
772 (* Entry points *)
773 (*****************************************************************************)
774
775 (* old: PPviatok was made in the beginning to allow to pretty print a
776 * complete C file, including a modified C file by transformation.ml,
777 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
778 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
779 * fancy stuff when a function was not modified at all. Just need to
780 * print the list of token as-is. But now pretty_print_c.ml handles
781 * almost everything so maybe less useful. Maybe PPviatok allows to
782 * optimize a little the pretty printing.
783 *
784 * update: now have PPviastr which goes even faster than PPviatok, so
785 * PPviatok has disappeared.
786 *)
787
788 type ppmethod = PPnormal | PPviastr
789
790
791
792
793 (* The pp_program function will call pretty_print_c.ml with a special
794 * function to print the leaf components, the tokens. When we want to
795 * print a token, we need to print also maybe the space and comments that
796 * were close to it in the original file (and that was omitted during the
797 * parsing phase), and honor what the cocci-info attached to the token says.
798 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
799 * print it and also print some cocci-code attached in a PLUS to it.
800 * So we will also maybe call unparse_cocci. Because the cocci-code may
801 * contain metavariables, unparse_cocci will in fact sometimes call back
802 * pretty_print_c (which will this time don't call back again unparse_cocci)
803 *)
804
805 let pp_program2 xs outfile =
806 Common.with_open_outfile outfile (fun (pr,chan) ->
807 let pr s =
808 if !Flag_parsing_c.debug_unparsing
809 then begin pr2_no_nl s; flush stderr end
810 else pr s
811 (* flush chan; *)
812 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
813 in
814
815 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
816
817 (* here can still work on ast *)
818 let e = remove_useless_fakeInfo_struct e in
819
820 match ppmethod with
821 | PPnormal ->
822 (* now work on tokens *)
823
824 (* phase1: just get all the tokens, all the information *)
825 assert(toks_e +> List.for_all (fun t ->
826 TH.is_origin t or TH.is_expanded t
827 ));
828 let toks = get_fakeInfo_and_tokens e toks_e in
829 let toks = displace_fake_nodes toks in
830 (* assert Origin;ExpandedTok;Faketok *)
831 let toks = expand_mcode toks in
832 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
833 * and no tag information, just NOTHING. *)
834
835 (* phase2: can now start to filter and adjust *)
836 let toks = adjust_indentation toks in
837 let toks = remove_minus_and_between_and_expanded_and_fake toks in
838 let toks = adjust_before_semicolon toks in
839 (* assert Origin + Cocci + C and no minus *)
840 let toks = add_space toks in
841 let toks = fix_tokens toks in
842
843 (* in theory here could reparse and rework the ast! or
844 * apply some SP. Not before cos julia may have generated
845 * not parsable file. Need do unparsing_tricks call before being
846 * ready to reparse. *)
847 print_all_tokens2 pr toks;
848
849 | PPviastr -> pr str
850 )
851 )
852
853 let pp_program a b =
854 Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
855
856
857 let pp_program_default xs outfile =
858 let xs' = xs +> List.map (fun x -> x, PPnormal) in
859 pp_program xs' outfile