Coccinelle release 1.0.0-rc3
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
1 (* Yoann Padioleau, Julia Lawall
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *
15 *
16 * Modifications by Julia Lawall for better newline handling.
17 *)
18 open Common
19
20 open Ast_c
21
22 module TH = Token_helpers
23
24
25 (* should keep comments and directives in between adjacent deleted terms,
26 but not comments and directives within deleted terms. should use the
27 labels found in the control-flow graph *)
28
29
30
31 (*****************************************************************************)
32 (* Wrappers *)
33 (*****************************************************************************)
34 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
35
36 (*****************************************************************************)
37 (* Types used during the intermediate phases of the unparsing *)
38 (*****************************************************************************)
39
40 type token1 =
41 | Fake1 of info
42 | T1 of Parser_c.token
43
44 (* The cocci_tag of the token should always be a NOTHING. The mark of
45 * the token can only be OriginTok or ExpandedTok. Why not get rid of
46 * token and get something simpler ? because we need to know if the
47 * info is a TCommentCpp or TCommentSpace, etc for some of the further
48 * analysis so easier to keep with the token.
49 *
50 * This type contains the whole information. Have all the tokens with this
51 * type.
52 *)
53 type min =
54 Min of (int list (* match numbers from witness trees *) *
55 Ast_cocci.adjacency (* adjacency information *))
56 | Ctx
57
58 type token2 =
59 | T2 of Parser_c.token * min *
60 int option (* orig index, abstracting away comments and space *)
61 | Fake2 of min
62 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
63 * Unparse_cocci.nlhint option
64 | C2 of string
65 | Comma of string
66 | Indent_cocci2
67 | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
68
69 (* not used yet *)
70 type token3 =
71 | T3 of Parser_c.token
72 | Cocci3 of string
73 | C3 of string
74
75
76 (* similar to the tech in parsing_hack *)
77 type token_extended = {
78 tok2 : token2;
79 str : string;
80 idx: int option; (* to know if 2 tokens were consecutive in orig file *)
81 mutable new_tokens_before : token2 list;
82 mutable remove : bool;
83 }
84
85
86 (*****************************************************************************)
87 (* Helpers *)
88 (*****************************************************************************)
89
90 let info_of_token1 t =
91 match t with
92 | Fake1 info -> info
93 | T1 tok -> TH.info_of_tok tok
94
95 let print_token1 = function
96 T1 tok -> TH.str_of_tok tok
97 | Fake1 info -> "fake"
98
99 let str_of_token2 = function
100 | T2 (t,_,_) -> TH.str_of_tok t
101 | Fake2 _ -> ""
102 | Cocci2 (s,_,_,_,_) -> s
103 | C2 s -> s
104 | Comma s -> s
105 | Indent_cocci2 -> ""
106 | Unindent_cocci2 _ -> ""
107
108 let print_token2 = function
109 | T2 (t,b,_) ->
110 let t_str =
111 match t with
112 | Parser_c.TCommentSpace _ -> " sp "
113 | Parser_c.TCommentNewline _ -> " nl "
114 | Parser_c.TCommentCpp _ -> " cp "
115 | Parser_c.TCommentMisc _ -> " misc "
116 | Parser_c.TComment _ -> " comment "
117 | _ -> "" in
118 let b_str =
119 match b with
120 Min (index,adj) ->
121 Printf.sprintf "-%d[%s]"
122 (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
123 (String.concat " " (List.map string_of_int index))
124 | Ctx -> "" in
125 "T2:"^b_str^t_str^TH.str_of_tok t
126 | Fake2 b ->
127 let b_str =
128 match b with
129 Min (index,adj) ->
130 Printf.sprintf "-%d[%s]"
131 (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
132 (String.concat " " (List.map string_of_int index))
133 | Ctx -> "" in
134 b_str^"fake"
135 | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
136 | C2 s -> "C2:"^s
137 | Comma s -> "Comma:"^s
138 | Indent_cocci2 -> "Indent"
139 | Unindent_cocci2 _ -> "Unindent"
140
141 let simple_print_all_tokens1 l =
142 List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l;
143 Printf.printf "\n"
144
145 let simple_print_all_tokens2 l =
146 List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
147 Printf.printf "\n"
148
149 let str_of_token3 = function
150 | T3 t -> TH.str_of_tok t
151 | Cocci3 s | C3 s -> s
152
153
154
155 let mk_token_extended x =
156 let origidx =
157 match x with
158 | T2 (_,_, idx) -> idx
159 | _ -> None
160 in
161 { tok2 = x;
162 str = str_of_token2 x;
163 idx = origidx;
164 new_tokens_before = [];
165 remove = false;
166 }
167
168 let rebuild_tokens_extented toks_ext =
169 let _tokens = ref [] in
170 toks_ext +> List.iter (fun tok ->
171 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
172 if not tok.remove then push2 tok.tok2 _tokens;
173 );
174 let tokens = List.rev !_tokens in
175 (tokens +> List.map mk_token_extended)
176
177
178 let mcode_contain_plus = function
179 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
180 | Ast_cocci.CONTEXT _ -> true
181 (* patch: when need full coccinelle transformation *)
182 | Ast_cocci.MINUS (_,_,_,Ast_cocci.NOREPLACEMENT) -> false
183 | Ast_cocci.MINUS (_,_,_,Ast_cocci.REPLACEMENT _) -> true(*REPL is not empty*)
184 | Ast_cocci.PLUS _ -> raise Impossible
185
186 let contain_plus info =
187 let mck = Ast_c.mcode_of_info info in
188 mcode_contain_plus mck
189
190 (*****************************************************************************)
191 (* Last fix on the ast *)
192 (*****************************************************************************)
193
194 (* Because of the ugly trick to handle initialiser, I generate fake ','
195 * for the last initializer element, but if there is nothing around it,
196 * I don't want in the end to print it.
197 *)
198
199 let remove_useless_fakeInfo_struct program =
200 let bigf = { Visitor_c.default_visitor_c_s with
201 Visitor_c.kini_s = (fun (k,bigf) ini ->
202 match k ini with
203 | InitList args, ii ->
204 (match ii with
205 | [_i1;_i2] -> ini
206 | [i1;i2;iicommaopt] ->
207 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
208 && (Ast_c.is_fake iicommaopt)
209 (* sometimes the guy put a normal iicommaopt *)
210 then InitList args, [i1;i2]
211 else InitList args, [i1;i2;iicommaopt]
212 | [i1;i2;iicommaopt;end_comma_opt] ->
213 (* only in #define. end_comma_opt canot be fake *)
214 (* not sure if this will be considered ambiguous with a previous
215 case? *)
216 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
217 && (Ast_c.is_fake iicommaopt)
218 (* sometimes the guy put a normal iicommaopt *)
219 then InitList args, [i1;i2;end_comma_opt]
220 else InitList args, [i1;i2;iicommaopt;end_comma_opt]
221 | _ -> raise Impossible
222 )
223 | x -> x
224 )
225 } in
226 Visitor_c.vk_toplevel_s bigf program
227
228
229 (*****************************************************************************)
230 (* Tokens1 generation *)
231 (*****************************************************************************)
232
233 let get_fakeInfo_and_tokens celem toks =
234 let toks_in = ref toks in
235 let toks_out = ref [] in
236
237 (* todo? verify good order of position ? *)
238 let pr_elem info =
239 match Ast_c.pinfo_of_info info with
240 | FakeTok _ ->
241 Common.push2 (Fake1 info) toks_out
242 | OriginTok _ | ExpandedTok _ ->
243 (* get the associated comments/space/cppcomment tokens *)
244 let (before, x, after) =
245 !toks_in +> Common.split_when (fun tok ->
246 info =*= TH.info_of_tok tok)
247 in
248 assert(info =*= TH.info_of_tok x);
249 (*old: assert(before +> List.for_all (TH.is_comment)); *)
250 before +> List.iter (fun x ->
251 if not (TH.is_comment x)
252 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
253 (* case such as int asm d3("x"); not yet in ast *)
254 );
255 before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
256 push2 (T1 x) toks_out;
257 toks_in := after;
258 | AbstractLineTok _ ->
259 (* can be called on type info when for instance use -type_c *)
260 if !Flag_parsing_c.pretty_print_type_info
261 then Common.push2 (Fake1 info) toks_out
262 else raise Impossible (* at this stage *)
263 in
264
265 let pr_space _ = () in (* use the spacing that is there already *)
266
267 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
268
269 if not (null !toks_in)
270 then failwith "WEIRD: unparsing not finished";
271
272 List.rev !toks_out
273
274 (* Fake nodes that have BEFORE code or are - should be moved over any subsequent
275 whitespace and newlines, but not any comments, to get as close to the affected
276 code as possible. Similarly, fake nodes that have AFTER code should be moved
277 backwards. No fake nodes should have both before and after code. *)
278
279 let displace_fake_nodes toks =
280 let is_fake = function Fake1 _ -> true | _ -> false in
281 let is_whitespace = function
282 T1(Parser_c.TCommentSpace _)
283 (* patch: cocci *)
284 | T1(Parser_c.TCommentNewline _) -> true
285 | _ -> false in
286 let rec loop toks =
287 let fake_info =
288 try Some (Common.split_when is_fake toks)
289 with Not_found -> None in
290 match fake_info with
291 Some(bef,((Fake1 info) as fake),aft) ->
292 (match !(info.cocci_tag) with
293 | Some x ->
294 (match x with
295 (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_)
296 | (Ast_cocci.MINUS(_,_,_,Ast_cocci.REPLACEMENT _),_) ->
297 (* move the fake node forwards *)
298 let (whitespace,rest) = Common.span is_whitespace aft in
299 bef @ whitespace @ fake :: (loop rest)
300 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
301 (* move the fake node backwards *)
302 let revbef = List.rev bef in
303 let (revwhitespace,revprev) = Common.span is_whitespace revbef in
304 let whitespace = List.rev revwhitespace in
305 let prev = List.rev revprev in
306 prev @ fake :: (loop (whitespace @ aft))
307 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
308 bef @ fake :: (loop aft)
309 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
310 failwith "fake node should not be before-after"
311 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
312 )
313 | None ->
314 bef @ fake :: (loop aft)
315 )
316 | None -> toks
317 | _ -> raise Impossible in
318 loop toks
319
320 (*****************************************************************************)
321 (* Tokens2 generation *)
322 (*****************************************************************************)
323
324 let comment2t2 = function
325 (Token_c.TCommentCpp
326 (* not sure iif the following list is exhaustive or complete *)
327 (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
328 (info : Token_c.info)) ->
329 C2(info.Common.str)
330 | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
331 C2("\n"^info.Common.str^"\n")
332 | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
333
334 let expand_mcode toks =
335 let toks_out = ref [] in
336
337 let index = ref 0 in
338
339 let add_elem t minus =
340 match t with
341 | Fake1 info ->
342 let str = Ast_c.str_of_info info in
343 if str =$= ""
344 then push2 (Fake2 minus) toks_out
345 (* fx the fake "," at the end of a structure or enum.
346 no idea what other fake info there can be... *)
347 else push2 (Comma str) toks_out
348
349
350 | T1 tok ->
351 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
352 (* no tag on expandedTok ! *)
353 let modified = function
354 None -> false
355 | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
356 | _ -> true in
357 (if (TH.is_expanded tok &&
358 modified !((TH.info_of_tok tok).cocci_tag)
359 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
360 then
361 failwith
362 (Printf.sprintf
363 "expanded token %s on line %d is either modified or stored in a metavariable"
364 (TH.str_of_tok tok) (TH.line_of_tok tok)));
365
366 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
367 { i with cocci_tag = ref Ast_c.emptyAnnot; }
368 ) in
369
370 let optindex =
371 if TH.is_origin tok && not (TH.is_real_comment tok)
372 then begin
373 incr index;
374 Some !index
375 end
376 else None
377 in
378
379 push2 (T2 (tok', minus, optindex)) toks_out
380 in
381
382 let expand_info t =
383 let (mcode,env) =
384 Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
385
386 let pr_cocci s ln col rcol hint =
387 push2 (Cocci2(s,ln,col,rcol,hint)) toks_out in
388 let pr_c info =
389 (match Ast_c.pinfo_of_info info with
390 Ast_c.AbstractLineTok _ ->
391 push2 (C2 (Ast_c.str_of_info info)) toks_out
392 | Ast_c.FakeTok (s,_) ->
393 push2 (C2 s) toks_out
394 | _ ->
395 Printf.printf "line: %s\n" (Common.dump info);
396 failwith "not an abstract line");
397 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
398 List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
399
400 let pr_barrier ln col = (* marks a position, used around C code *)
401 push2 (Cocci2("",ln,col,col,None)) toks_out in
402 let pr_nobarrier ln col = () in (* not needed for linux spacing *)
403
404 let pr_cspace _ = push2 (C2 " ") toks_out in
405
406 let pr_space _ = () (* rely on add_space in cocci code *) in
407 let pr_arity _ = () (* not interested *) in
408
409 let indent _ = push2 Indent_cocci2 toks_out in
410 let unindent x = push2 (Unindent_cocci2 x) toks_out in
411
412 let args_pp =
413 (env, pr_cocci, pr_c, pr_cspace,
414 (match !Flag_parsing_c.spacing with
415 Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
416 pr_arity,
417 (match !Flag_parsing_c.spacing with
418 Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
419 indent, unindent) in
420
421 (* old: when for yacfe with partial cocci:
422 * add_elem t false;
423 *)
424
425 (* patch: when need full coccinelle transformation *)
426 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
427 match mcode with
428 | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
429 (* Why adding ? because I want to have all the information, the whole
430 * set of tokens, so I can then process and remove the
431 * is_between_two_minus for instance *)
432 add_elem t (Min (inst,adj));
433 (match any_xxs with
434 Ast_cocci.NOREPLACEMENT -> ()
435 | Ast_cocci.REPLACEMENT(any_xxs,_) ->
436 unparser any_xxs Unparse_cocci.InPlace)
437 | Ast_cocci.CONTEXT (_,any_befaft) ->
438 (match any_befaft with
439 | Ast_cocci.NOTHING ->
440 add_elem t Ctx
441 | Ast_cocci.BEFORE (xxs,_) ->
442 unparser xxs Unparse_cocci.Before;
443 add_elem t Ctx
444 | Ast_cocci.AFTER (xxs,_) ->
445 add_elem t Ctx;
446 unparser xxs Unparse_cocci.After;
447 | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
448 unparser xxs Unparse_cocci.Before;
449 add_elem t Ctx;
450 unparser yys Unparse_cocci.After;
451 )
452 | Ast_cocci.PLUS _ -> raise Impossible
453
454 in
455
456 toks +> List.iter expand_info;
457 List.rev !toks_out
458
459
460 (*****************************************************************************)
461 (* Tokens2 processing, filtering, adjusting *)
462 (*****************************************************************************)
463
464 let is_space = function
465 | T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *)
466 | _ -> false
467
468 let is_newline = function
469 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
470 | _ -> false
471
472 let is_whitespace = function
473 | (T2 (t,_b,_i)) ->
474 (match t with
475 | Parser_c.TCommentSpace _ -> true (* only whitespace *)
476 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
477 | _ -> false
478 )
479 | _ -> false
480
481 let is_minusable_comment = function
482 | (T2 (t,_b,_i)) ->
483 (match t with
484 | Parser_c.TCommentSpace _ (* only whitespace *)
485 (* patch: coccinelle *)
486 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
487 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
488 | Parser_c.TComment _
489 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
490 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
491 | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
492 -> true
493
494 | Parser_c.TCommentMisc _
495 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
496 -> false
497
498 | _ -> false
499 )
500 | _ -> false
501
502 let is_minusable_comment_nocpp = function
503 | (T2 (t,_b,_i)) ->
504 (match t with
505 | Parser_c.TCommentSpace _ (* only whitespace *)
506 (* patch: coccinelle *)
507 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
508 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
509 | Parser_c.TComment _ -> true
510 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
511 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
512 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
513 -> false
514
515 | Parser_c.TCommentMisc _
516 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
517 -> false
518
519 | _ -> false
520 )
521 | _ -> false
522
523 let all_coccis = function
524 Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ -> true
525 | _ -> false
526
527 (*previously gave up if the first character was a newline, but not clear why*)
528 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
529
530 let set_minus_comment adj = function
531 | T2 (t,Ctx,idx) ->
532 let str = TH.str_of_tok t in
533 (match t with
534 | Parser_c.TCommentSpace _
535 (* patch: coccinelle *)
536 | Parser_c.TCommentNewline _ -> ()
537
538 | Parser_c.TComment _
539 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
540 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
541 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
542 ->
543 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
544 (TH.line_of_tok t) str)
545 | _ -> raise Impossible
546 );
547 T2 (t, Min adj, idx)
548 (* patch: coccinelle *)
549 | T2 (t,Min adj,idx) as x -> x
550 | Fake2 _ as x -> x
551 | _ -> raise Impossible
552
553 let set_minus_comment_or_plus adj = function
554 Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x
555 | x -> set_minus_comment adj x
556
557 let drop_minus xs =
558 xs +> Common.exclude (function
559 | T2 (t,Min adj,_) -> true
560 | _ -> false
561 )
562
563 let drop_expanded xs =
564 xs +> Common.exclude (function
565 | T2 (t,_,_) when TH.is_expanded t -> true
566 | _ -> false
567 )
568
569 let drop_fake xs =
570 xs +> Common.exclude (function
571 | Fake2 _ -> true
572 | _ -> false
573 )
574
575 let remove_minus_and_between_and_expanded_and_fake xs =
576
577 (* get rid of expanded tok *)
578 let xs = drop_expanded xs in
579
580 let minus_or_comment = function
581 T2(_,Min adj,_) -> true
582 | x -> is_minusable_comment x in
583
584 let minus_or_comment_nocpp = function
585 T2(_,Min adj,_) -> true
586 | x -> is_minusable_comment_nocpp x in
587
588 let common_adj (index1,adj1) (index2,adj2) =
589 let same_adj = (* same adjacency info *)
590 match (adj1,adj2) with
591 (Ast_cocci.ADJ adj1,Ast_cocci.ADJ adj2) -> adj1 = adj2
592 | (Ast_cocci.ALLMINUS,_) | (_,Ast_cocci.ALLMINUS) -> true in
593 same_adj &&
594 (* non-empty intersection of witness trees *)
595 not ((Common.inter_set index1 index2) = []) in
596
597 (* new idea: collects regions not containing non-space context code
598 if two adjacent adjacent minus tokens satisfy common_adj then delete
599 all spaces, comments etc between them
600 if two adjacent minus tokens do not satisfy common_adj only delete
601 the spaces between them if there are no comments, etc.
602 if the region contain no plus code and is both preceded and followed
603 by a newline, delete the initial newline. *)
604
605 let rec adjust_around_minus = function
606 [] -> []
607 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
608 ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
609 let (minus_list,rest) = Common.span not_context (t1::xs) in
610 let contains_plus = List.exists is_plus minus_list in
611 let x =
612 match List.rev minus_list with
613 (T2(Parser_c.TCommentNewline c,_b,_i))::rest
614 when List.for_all minus_or_comment minus_list ->
615 set_minus_comment_or_plus adj1 x
616 | _ -> x in
617 x :: adjust_within_minus contains_plus minus_list @
618 adjust_around_minus rest
619 | ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
620 let (minus_list,rest) = Common.span not_context (t1::xs) in
621 let contains_plus = List.exists is_plus minus_list in
622 adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
623 | x::xs ->
624 x :: adjust_around_minus xs
625 and adjust_within_minus cp (* contains plus *) = function
626 ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
627 let not_minus = function T2(_,Min _,_) -> false | _ -> true in
628 let (not_minus_list,rest) = Common.span not_minus xs in
629 t1 ::
630 (match rest with
631 ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs
632 when common_adj adj1 adj2 ->
633 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
634 @ (adjust_within_minus cp (t2::xs))
635 | ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs ->
636 if not cp && List.for_all is_whitespace not_minus_list
637 then
638 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
639 @ (adjust_within_minus cp (t2::xs))
640 else
641 not_minus_list @ (adjust_within_minus cp (t2::xs))
642 | _ ->
643 if cp
644 then xs
645 else
646 (* remove spaces after removed stuff, eg a comma after a
647 function argument *)
648 let (spaces,rest) = Common.span is_space xs in
649 (List.map (set_minus_comment_or_plus adj1) spaces)
650 @ rest)
651 | xs -> failwith "should always start with minus"
652 and not_context = function
653 (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
654 | _ -> true
655 and is_plus = function
656 C2 _ | Comma _ | Cocci2 _ -> true
657 | _ -> false in
658
659 let xs = adjust_around_minus xs in
660
661 (* get rid of fake tok *)
662 let xs = drop_fake xs in
663
664 (* this drops blank lines after a brace introduced by removing code *)
665 let minus_or_comment_nonl = function
666 T2(_,Min adj,_) -> true
667 | T2(Parser_c.TCommentNewline _,_b,_i) -> false
668 | x -> is_minusable_comment x in
669
670 let rec adjust_after_brace = function
671 [] -> []
672 | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
673 when str_of_token2 x =$= "{" ->
674 let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
675 let is_whitespace = function
676 T2(Parser_c.TCommentSpace _,_b,_i)
677 (* patch: cocci *)
678 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
679 | _ -> false in
680 let (newlines,rest) = Common.span is_whitespace rest in
681 let (drop_newlines,last_newline) =
682 let rec loop = function
683 [] -> ([],[])
684 | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
685 (List.rev rest,[x])
686 | x::xs ->
687 let (drop_newlines,last_newline) = loop xs in
688 (drop_newlines,x::last_newline) in
689 loop (List.rev newlines) in
690 x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
691 last_newline@
692 adjust_after_brace rest
693 | x::xs -> x::adjust_after_brace xs in
694
695 let xs = adjust_after_brace xs in
696
697 (* search backwards from context } over spaces until reaching a newline.
698 then go back over all minus code until reaching some context or + code.
699 get rid of all intervening spaces, newlines, and comments
700 input is reversed *)
701 let rec adjust_before_brace = function
702 [] -> []
703 | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
704 let (outer_spaces,rest) = Common.span is_space xs in
705 x :: outer_spaces @
706 (match rest with
707 ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
708 (* the rest of this code is the same as from_newline below
709 but merging them seems to be error prone... *)
710 ((T2 (t, Min adj, idx)) as m) :: rest ->
711 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
712 h :: m ::
713 (List.map (set_minus_comment adj) spaces) @
714 (adjust_before_brace rest)
715 | _ -> adjust_before_brace rest)
716 | x::xs -> x :: (adjust_before_brace xs) in
717
718 let from_newline = function
719 ((T2 (t, Min adj, idx)) as m) :: rest ->
720 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
721 m ::
722 (List.map (set_minus_comment adj) spaces) @
723 (adjust_before_brace rest)
724 | ((T2 (t0, Ctx, idx0)) as m0) :: ((T2 (t, Min adj, idx)) as m) :: rest
725 when TH.str_of_tok t0 = "" ->
726 (* This is for the case of a #define that is completely deleted,
727 because a #define has a strange EOL token at the end.
728 We hope there i no other kind of token that is represented by
729 "", but it seems like changing the kind of token might break
730 the end of entity recognition in the C parser.
731 See parsing_hacks.ml *)
732 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
733 m0 :: m ::
734 (List.map (set_minus_comment adj) spaces) @
735 (adjust_before_brace rest)
736 | rest -> adjust_before_brace rest in
737
738 let xs = List.rev (from_newline (List.rev xs)) in
739 let xs = drop_minus xs in
740 xs
741
742 (* normally, in C code, a semicolon is not preceded by a space or newline *)
743 let adjust_before_semicolon toks =
744 let toks = List.rev toks in
745 let rec search_semic = function
746 [] -> []
747 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
748 if List.mem (str_of_token2 x) [";";")";","]
749 then x :: search_semic (search_minus false xs)
750 else x :: search_semic xs
751 | x::xs -> x :: search_semic xs
752 and search_minus seen_minus xs =
753 let (spaces, rest) = Common.span is_space xs in
754 (* only delete spaces if something is actually deleted *)
755 match rest with
756 ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
757 | _ -> if seen_minus then rest else xs in
758 List.rev (search_semic toks)
759
760 (* normally, in C code, a ( is not followed by a space or newline *)
761 let adjust_after_paren toks =
762 let rec search_paren = function
763 [] -> []
764 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
765 if List.mem (str_of_token2 x) ["("] (* other things? *)
766 then x :: search_paren(search_minus false xs)
767 else x :: search_paren xs
768 | x::xs -> x :: search_paren xs
769 and search_minus seen_minus xs =
770 let (spaces, rest) = Common.span is_whitespace xs in
771 (* only delete spaces if something is actually deleted *)
772 match rest with
773 ((T2(_,Min _,_)) as a)::rerest -> (* minus *)
774 a :: search_minus true rerest
775 | ((T2(_,Ctx,_)) as a)::rerest when str_of_token2 a = "," ->
776 (* comma after ( will be deleted, so consider it as minus code
777 already *)
778 a :: search_minus true rerest
779 | _ -> if seen_minus then rest else xs in (* drop trailing space *)
780 search_paren toks
781
782 (* this is for the case where braces are added around an if branch *)
783 let paren_then_brace toks =
784 let rec search_paren = function
785 [] -> []
786 | ((T2(_,Ctx,_)) as x)::xs ->
787 if List.mem (str_of_token2 x) [")"]
788 then x :: search_paren(search_plus xs)
789 else x :: search_paren xs
790 | x::xs -> x :: search_paren xs
791 and search_plus xs =
792 let (spaces, rest) = Common.span is_whitespace xs in
793 match rest with
794 (* move the brace up to the previous line *)
795 ((Cocci2("{",_,_,_,_)) as x) :: (((Cocci2 _) :: _) as rest) ->
796 (C2 " ") :: x :: spaces @ rest
797 | _ -> xs in
798 search_paren toks
799
800 let is_ident_like s = s ==~ Common.regexp_alpha
801
802 let rec drop_space_at_endline = function
803 [] -> []
804 | [x] -> [x]
805 | (C2 " ") ::
806 ((((T2(Parser_c.TCommentSpace _,Ctx,_)) | Cocci2("\n",_,_,_,_) |
807 (T2(Parser_c.TCommentNewline _,Ctx,_))) :: _) as rest) ->
808 (* when unparse_cocci doesn't know whether space is needed *)
809 drop_space_at_endline rest
810 | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest ->
811 let (outer_spaces,rest) = Common.span is_space rest in
812 let minus_or_comment_or_space_nocpp = function
813 T2(_,Min adj,_) -> true
814 | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true
815 | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false
816 | x -> false in
817 let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in
818 let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
819 if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus
820 then
821 match rest with
822 ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest ->
823 (* drop trailing spaces *)
824 minus@a::(drop_space_at_endline rest)
825 | _ -> fail()
826 else fail()
827 | a :: rest ->
828 a :: drop_space_at_endline rest
829
830 (* if a removed ( is between two tokens, then add a space *)
831 let rec paren_to_space = function
832 [] -> []
833 | [x] -> [x]
834 | [x;y] -> [x;y]
835 | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest
836 when not (is_whitespace a) && TH.str_of_tok t = "(" ->
837 a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
838 | a :: rest -> a :: (paren_to_space rest)
839
840 let rec add_space xs =
841 match xs with
842 | [] -> []
843 | [x] -> [x]
844 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
845 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
846 not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
847 (* this only works within a line. could consider whether
848 something should be done to add newlines too, rather than
849 printing them explicitly in unparse_cocci. *)
850 x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
851 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
852 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
853 not (lnx = -1) && lnx < lny && not (rcolx = -1) ->
854 (* this only works within a line. could consider whether
855 something should be done to add newlines too, rather than
856 printing them explicitly in unparse_cocci. *)
857 x::C2 (String.make (lny-lnx) '\n')::
858 C2 (String.make (lcoly-1) ' '):: (* -1 is for the + *)
859 add_space (y::xs)
860 | ((T2(_,Ctx,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *)
861 let sx = str_of_token2 x in
862 let sy = str_of_token2 y in
863 if is_ident_like sx && (is_ident_like sy or List.mem sy ["="])
864 then x::C2 " "::(add_space (y::xs))
865 else x::(add_space (y::xs))
866 | x::y::xs -> (* not boundary, not sure if it is possible *)
867 let sx = str_of_token2 x in
868 let sy = str_of_token2 y in
869 if is_ident_like sx && is_ident_like sy
870 then x::C2 " "::(add_space (y::xs))
871 else x::(add_space (y::xs))
872
873 (* A fake comma is added at the end of an unordered initlist or a enum
874 decl, if the initlist or enum doesn't already end in a comma. This is only
875 needed if there is + code, ie if we see Cocci after it in the code sequence *)
876
877 let rec drop_end_comma = function
878 [] -> []
879 | [x] -> [x]
880 | ((Comma ",") as x) :: rest ->
881 let (newlines,rest2) = Common.span is_whitespace rest in
882 (match rest2 with
883 (Cocci2 _) :: _ -> x :: drop_end_comma rest
884 | _ -> drop_end_comma rest)
885 | x :: xs -> x :: drop_end_comma xs
886
887 (* The following only works for the outermost function call. Stack records
888 the column of all open parentheses. Space_cell contains the most recent
889 comma in the outermost function call. The goal is to decide whether this
890 should be followed by a space or a newline and indent. *)
891 let add_newlines toks tabbing_unit =
892 let create_indent n =
893 let (tu,tlen) =
894 match tabbing_unit with
895 Some ("\t",_) -> ("\t",8)
896 | Some ("",_) -> ("\t",8) (* not sure why... *)
897 | Some (s,_) -> (s,String.length s) (* assuming only spaces *)
898 | None -> ("\t",8) in
899 let rec loop seen =
900 if seen + tlen <= n
901 then tu ^ loop (seen + tlen)
902 else String.make (n-seen) ' ' in
903 loop 0 in
904 let check_for_newline count x = function
905 Some (start,space_cell) when count > Flag_parsing_c.max_width ->
906 space_cell := "\n"^(create_indent x);
907 Some (x + (count - start))
908 | _ -> None in
909 (* the following is for strings that may contain newline *)
910 let string_length s count =
911 let l = list_of_string s in
912 List.fold_left
913 (function count ->
914 function
915 '\t' -> count + 8
916 | '\n' -> 0
917 | c -> count + 1)
918 count l in
919 let rec loop info count = function
920 [] -> []
921 | ((T2(tok,_,_)) as a)::xs ->
922 a :: loop info (string_length (TH.str_of_tok tok) count) xs
923 | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
924 let (stack,space_cell) = info in
925 let rest =
926 match hint with
927 None -> loop info (string_length s count) xs
928 | Some Unparse_cocci.StartBox ->
929 let count = string_length s count in
930 loop (count::stack,space_cell) count xs
931 | Some Unparse_cocci.EndBox ->
932 let count = string_length s count in
933 (match stack with
934 [x] ->
935 (match check_for_newline count x space_cell with
936 Some count -> loop ([],None) count xs
937 | None -> loop ([],None) count xs)
938 | _ -> loop (List.tl stack,space_cell) count xs)
939 | Some (Unparse_cocci.SpaceOrNewline sp) ->
940 let count = string_length s (count + 1 (*space*)) in
941 (match stack with
942 [x] ->
943 (match check_for_newline count x space_cell with
944 Some count -> loop (stack,Some (x,sp)) count xs
945 | None -> loop (stack,Some (count,sp)) count xs)
946 | _ -> loop info count xs) in
947 a :: rest
948 | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
949 | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
950 | Fake2 _ :: _ | Indent_cocci2 :: _
951 | Unindent_cocci2 _::_ ->
952 failwith "unexpected fake, indent, or unindent" in
953 let redo_spaces prev = function
954 Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
955 C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
956 | t -> t::prev in
957 (match !Flag_parsing_c.spacing with
958 Flag_parsing_c.SMPL -> toks
959 | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)))
960
961 (* When insert some new code, because of a + in a SP, we must add this
962 * code at the right place, with the good indentation. So each time we
963 * encounter some spacing info, with some newline, we maintain the
964 * current indentation level used.
965 *
966 * TODO problems: not accurate. ex: TODO
967 *
968 * TODO: if in #define region, should add a \ \n
969 *)
970 let new_tabbing2 space =
971 (list_of_string space)
972 +> List.rev
973 +> Common.take_until (fun c -> c =<= '\n')
974 +> List.rev
975 +> List.map string_of_char
976 +> String.concat ""
977
978 let new_tabbing a =
979 Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
980
981
982 let rec adjust_indentation xs =
983
984 let _current_tabbing = ref ([] : string list) in
985 let tabbing_unit = ref None in
986
987 let string_of_list l = String.concat "" (List.map string_of_char l) in
988
989 (* try to pick a tabbing unit for the plus code *)
990 let adjust_tabbing_unit old_tab new_tab =
991 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
992 then
993 let old_tab = list_of_string old_tab in
994 let new_tab = list_of_string new_tab in
995 let rec loop = function
996 ([],new_tab) ->
997 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
998 | (_,[]) -> failwith "not possible"
999 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
1000 loop (old_tab,new_tab) in
1001
1002 (*
1003 let remtab tu current_tab =
1004 let current_tab = List.rev(list_of_string current_tab) in
1005 let rec loop = function
1006 ([],new_tab) -> string_of_list (List.rev new_tab)
1007 | (_,[]) -> (-*weird; tabbing unit used up more than the current tab*-)
1008 ""
1009 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
1010 | (_,ns) -> (-* mismatch; remove what we can *-)
1011 string_of_list (List.rev ns) in
1012 loop (tu,current_tab) in
1013 *)
1014
1015 let rec find_first_tab started = function
1016 [] -> ()
1017 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
1018 find_first_tab true xs
1019 (* patch: coccinelle *)
1020 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
1021 when started ->
1022 let s = str_of_token2 x +> new_tabbing in
1023 tabbing_unit := Some (s,List.rev (list_of_string s))
1024 | x::xs -> find_first_tab started xs in
1025 find_first_tab false xs;
1026
1027 let rec balanced ct = function
1028 [] -> ct >= 0
1029 | ((T2(tok,_,_)) as x)::xs ->
1030 (match str_of_token2 x with
1031 "(" -> balanced (ct+1) xs
1032 | ")" -> balanced (ct-1) xs
1033 | _ -> balanced ct xs)
1034 | x::xs -> balanced ct xs in
1035
1036 let rec aux started xs =
1037 match xs with
1038 | [] -> []
1039 (* patch: coccinelle *)
1040 | (T2 (Parser_c.TCommentNewline _,_,_))::Unindent_cocci2(false)::xs
1041 | (Cocci2("\n",_,_,_,_))::Unindent_cocci2(false)::xs ->
1042 (C2 "\n")::aux started xs
1043 | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
1044 ((Cocci2 ("{",_,_,_,_)) as a)::xs
1045 when started && str_of_token2 x =$= ")" ->
1046 (* to be done for if, etc, but not for a function header *)
1047 x::(C2 " ")::a::(aux started xs)
1048 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs
1049 when balanced 0 (fst(Common.span (function x -> not(is_newline x)) xs)) ->
1050 let old_tabbing = !_current_tabbing in
1051 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := [s]);
1052 (* only trust the indentation after the first { *)
1053 (if started
1054 then
1055 adjust_tabbing_unit
1056 (String.concat "" old_tabbing)
1057 (String.concat "" !_current_tabbing));
1058 let coccis_rest = Common.span all_coccis xs in
1059 (match coccis_rest with
1060 (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
1061 (* the case where cocci code has been added before a close } *)
1062 x::aux started (Indent_cocci2::xs)
1063 | _ -> x::aux started xs)
1064 | Indent_cocci2::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
1065 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL ->
1066 let tu = String.make (lcoly-1) ' ' in
1067 _current_tabbing := tu::(!_current_tabbing);
1068 C2 (tu)::aux started (y::xs)
1069 | Indent_cocci2::xs ->
1070 (match !tabbing_unit with
1071 None -> aux started xs
1072 | Some (tu,_) ->
1073 _current_tabbing := tu::(!_current_tabbing);
1074 (* can't be C2, for later phases *)
1075 Cocci2 (tu,-1,-1,-1,None)::aux started xs)
1076 | Unindent_cocci2(permanent)::xs ->
1077 (match !_current_tabbing with
1078 [] -> aux started xs
1079 | _::new_tabbing ->
1080 _current_tabbing := new_tabbing;
1081 aux started xs)
1082 (* border between existing code and cocci code *)
1083 | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
1084 when str_of_token2 x =$= "{" ->
1085 x::aux true (y::Indent_cocci2::xs)
1086 | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
1087 when str_of_token2 y =$= "}" ->
1088 x::aux started (y::Unindent_cocci2 true::xs)
1089 (* starting the body of the function *)
1090 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs
1091 | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
1092 | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
1093 (* dont inline in expr because of weird eval order of ocaml *)
1094 let s = String.concat "" !_current_tabbing in
1095 (* can't be C2, for later phases *)
1096 x::Cocci2 (s,-1,-1,-1,None)::aux started xs
1097 | x::xs -> x::aux started xs in
1098 (aux false xs,!tabbing_unit)
1099
1100
1101 let rec find_paren_comma = function
1102 | [] -> ()
1103
1104 (* do nothing if was like this in original file *)
1105 | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
1106 ::xs when p2 =|= p1 + 1 ->
1107 find_paren_comma (x2::xs)
1108
1109 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
1110 ::xs when p2 =|= p1 + 1 ->
1111 find_paren_comma (x2::xs)
1112
1113 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
1114 ::xs when p2 =|= p1 + 1 ->
1115 find_paren_comma (x2::xs)
1116
1117 (* otherwise yes can adjust *)
1118 | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
1119 x2.remove <- true;
1120 find_paren_comma (x2::xs)
1121 | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
1122 x1.remove <- true;
1123 find_paren_comma (x2::xs)
1124
1125 | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
1126 x1.remove <- true;
1127 find_paren_comma (x2::xs)
1128
1129 | x::xs ->
1130 find_paren_comma xs
1131
1132
1133 let fix_tokens toks =
1134 let toks = toks +> List.map mk_token_extended in
1135
1136 let cleaner = toks +> Common.exclude (function
1137 | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
1138 | _ -> false
1139 ) in
1140 find_paren_comma cleaner;
1141
1142 let toks = rebuild_tokens_extented toks in
1143 toks +> List.map (fun x -> x.tok2)
1144
1145
1146
1147 (*****************************************************************************)
1148 (* Final unparsing (and debugging support) *)
1149 (*****************************************************************************)
1150
1151 (* for debugging *)
1152 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
1153
1154 let kind_of_token2 = function
1155 | Fake2 _ -> KFake
1156 | Cocci2 _ -> KCocci
1157 | C2 _ -> KC
1158 | Comma _ -> KC
1159 | T2 (t,_,_) ->
1160 (match TH.pinfo_of_tok t with
1161 | ExpandedTok _ -> KExpanded
1162 | OriginTok _ -> KOrigin
1163 | FakeTok _ -> raise Impossible (* now a Fake2 *)
1164 | AbstractLineTok _ -> raise Impossible (* now a KC *)
1165 )
1166 | Unindent_cocci2 _ | Indent_cocci2 -> raise Impossible
1167
1168 let end_mark = "!"
1169
1170 let start_mark = function
1171 | KFake -> "!F!"
1172 | KCocci -> "!S!"
1173 | KC -> "!A!"
1174 | KExpanded -> "!E!"
1175 | KOrigin -> ""
1176
1177 let print_all_tokens2 pr xs =
1178 if !Flag_parsing_c.debug_unparsing
1179 then
1180 let current_kind = ref KOrigin in
1181 xs +> List.iter (fun t ->
1182 let newkind = kind_of_token2 t in
1183 if newkind =*= !current_kind
1184 then pr (str_of_token2 t)
1185 else begin
1186 pr (end_mark);
1187 pr (start_mark newkind);
1188 pr (str_of_token2 t);
1189 current_kind := newkind
1190 end
1191 );
1192 else
1193 xs +> List.iter (fun x -> pr (str_of_token2 x))
1194
1195
1196
1197
1198 (*****************************************************************************)
1199 (* Entry points *)
1200 (*****************************************************************************)
1201
1202 (* old: PPviatok was made in the beginning to allow to pretty print a
1203 * complete C file, including a modified C file by transformation.ml,
1204 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1205 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1206 * fancy stuff when a function was not modified at all. Just need to
1207 * print the list of token as-is. But now pretty_print_c.ml handles
1208 * almost everything so maybe less useful. Maybe PPviatok allows to
1209 * optimize a little the pretty printing.
1210 *
1211 * update: now have PPviastr which goes even faster than PPviatok, so
1212 * PPviatok has disappeared.
1213 *)
1214
1215 type ppmethod = PPnormal | PPviastr
1216
1217
1218
1219
1220 (* The pp_program function will call pretty_print_c.ml with a special
1221 * function to print the leaf components, the tokens. When we want to
1222 * print a token, we need to print also maybe the space and comments that
1223 * were close to it in the original file (and that was omitted during the
1224 * parsing phase), and honor what the cocci-info attached to the token says.
1225 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1226 * print it and also print some cocci-code attached in a PLUS to it.
1227 * So we will also maybe call unparse_cocci. Because the cocci-code may
1228 * contain metavariables, unparse_cocci will in fact sometimes call back
1229 * pretty_print_c (which will this time don't call back again unparse_cocci)
1230 *)
1231
1232 let pp_program2 xs outfile =
1233 Common.with_open_outfile outfile (fun (pr,chan) ->
1234 let pr s =
1235 if !Flag_parsing_c.debug_unparsing
1236 then begin pr2_no_nl s; flush stderr end
1237 else pr s
1238 (* flush chan; *)
1239 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1240 in
1241
1242 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
1243 (* here can still work on ast *)
1244 let e = remove_useless_fakeInfo_struct e in
1245
1246 match ppmethod with
1247 | PPnormal ->
1248 (* now work on tokens *)
1249 (* phase1: just get all the tokens, all the information *)
1250 assert(toks_e +> List.for_all (fun t ->
1251 TH.is_origin t or TH.is_expanded t
1252 ));
1253 let toks = get_fakeInfo_and_tokens e toks_e in
1254 let toks = displace_fake_nodes toks in
1255 (* assert Origin;ExpandedTok;Faketok *)
1256 let toks = expand_mcode toks in
1257
1258 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1259 * and no tag information, just NOTHING. *)
1260
1261 let toks =
1262 if !Flag.sgrep_mode2
1263 then
1264 (* nothing else to do for sgrep *)
1265 drop_expanded(drop_fake(drop_minus toks))
1266 else
1267 (* phase2: can now start to filter and adjust *)
1268 (let (toks,tu) = adjust_indentation toks in
1269 let toks = adjust_before_semicolon toks in(*before remove minus*)
1270 let toks = adjust_after_paren toks in(*also before remove minus*)
1271 let toks = drop_space_at_endline toks in
1272 let toks = paren_to_space toks in
1273 let toks = drop_end_comma toks in
1274 let toks = remove_minus_and_between_and_expanded_and_fake toks in
1275 (* assert Origin + Cocci + C and no minus *)
1276 let toks = add_space toks in
1277 let toks = add_newlines toks tu in
1278 let toks = paren_then_brace toks in
1279 let toks = fix_tokens toks in
1280 toks) in
1281
1282 (* in theory here could reparse and rework the ast! or
1283 * apply some SP. Not before cos julia may have generated
1284 * not parsable file. Need do unparsing_tricks call before being
1285 * ready to reparse. *)
1286 print_all_tokens2 pr toks;
1287
1288 | PPviastr -> pr str
1289 )
1290 )
1291
1292 let pp_program a b =
1293 Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1294
1295
1296 let pp_program_default xs outfile =
1297 let xs' = xs +> List.map (fun x -> x, PPnormal) in
1298 pp_program xs' outfile