5c7bbb584f5b28a93725c20aaec68985658f4a72
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
1 (* Yoann Padioleau, Julia Lawall
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *
15 *
16 * Modifications by Julia Lawall for better newline handling.
17 *)
18 open Common
19
20 open Ast_c
21
22 module TH = Token_helpers
23
24
25 (* should keep comments and directives in between adjacent deleted terms,
26 but not comments and directives within deleted terms. should use the
27 labels found in the control-flow graph *)
28
29
30
31 (*****************************************************************************)
32 (* Wrappers *)
33 (*****************************************************************************)
34 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
35
36 (*****************************************************************************)
37 (* Types used during the intermediate phases of the unparsing *)
38 (*****************************************************************************)
39
40 type token1 =
41 | Fake1 of info
42 | T1 of Parser_c.token
43
44 (* The cocci_tag of the token should always be a NOTHING. The mark of
45 * the token can only be OriginTok or ExpandedTok. Why not get rid of
46 * token and get something simpler ? because we need to know if the
47 * info is a TCommentCpp or TCommentSpace, etc for some of the further
48 * analysis so easier to keep with the token.
49 *
50 * This type contains the whole information. Have all the tokens with this
51 * type.
52 *)
53 type min =
54 Min of (int list (* match numbers from witness trees *) *
55 Ast_cocci.adjacency (* adjacency information *))
56 | Ctx
57
58 type token2 =
59 | T2 of Parser_c.token * min *
60 int option (* orig index, abstracting away comments and space *)
61 | Fake2 of min
62 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
63 * Unparse_cocci.nlhint option
64 | C2 of string
65 | Comma of string
66 | Indent_cocci2
67 | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
68 | EatSpace2
69
70 (* not used yet *)
71 type token3 =
72 | T3 of Parser_c.token
73 | Cocci3 of string
74 | C3 of string
75
76
77 (* similar to the tech in parsing_hack *)
78 type token_extended = {
79 tok2 : token2;
80 str : string;
81 idx: int option; (* to know if 2 tokens were consecutive in orig file *)
82 mutable new_tokens_before : token2 list;
83 mutable remove : bool;
84 }
85
86
87 (*****************************************************************************)
88 (* Helpers *)
89 (*****************************************************************************)
90
91 let info_of_token1 t =
92 match t with
93 | Fake1 info -> info
94 | T1 tok -> TH.info_of_tok tok
95
96 let print_token1 = function
97 T1 tok -> TH.str_of_tok tok
98 | Fake1 info -> "fake"
99
100 let str_of_token2 = function
101 | T2 (t,_,_) -> TH.str_of_tok t
102 | Fake2 _ -> ""
103 | Cocci2 (s,_,_,_,_) -> s
104 | C2 s -> s
105 | Comma s -> s
106 | Indent_cocci2 -> ""
107 | Unindent_cocci2 _ -> ""
108 | EatSpace2 -> ""
109
110 let print_token2 = function
111 | T2 (t,b,_) ->
112 let t_str =
113 match t with
114 | Parser_c.TCommentSpace _ -> " sp "
115 | Parser_c.TCommentNewline _ -> " nl "
116 | Parser_c.TCommentCpp _ -> " cp "
117 | Parser_c.TCommentMisc _ -> " misc "
118 | Parser_c.TComment _ -> " comment "
119 | _ -> "" in
120 let b_str =
121 match b with
122 Min (index,adj) ->
123 Printf.sprintf "-%d[%s]"
124 (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
125 (String.concat " " (List.map string_of_int index))
126 | Ctx -> "" in
127 "T2:"^b_str^t_str^TH.str_of_tok t
128 | Fake2 b ->
129 let b_str =
130 match b with
131 Min (index,adj) ->
132 Printf.sprintf "-%d[%s]"
133 (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
134 (String.concat " " (List.map string_of_int index))
135 | Ctx -> "" in
136 b_str^"fake"
137 | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
138 | C2 s -> "C2:"^s
139 | Comma s -> "Comma:"^s
140 | Indent_cocci2 -> "Indent"
141 | Unindent_cocci2 _ -> "Unindent"
142 | EatSpace2 -> "EatSpace"
143
144 let simple_print_all_tokens1 l =
145 List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l;
146 Printf.printf "\n"
147
148 let simple_print_all_tokens2 l =
149 List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l;
150 Printf.printf "\n"
151
152 let str_of_token3 = function
153 | T3 t -> TH.str_of_tok t
154 | Cocci3 s | C3 s -> s
155
156
157
158 let mk_token_extended x =
159 let origidx =
160 match x with
161 | T2 (_,_, idx) -> idx
162 | _ -> None
163 in
164 { tok2 = x;
165 str = str_of_token2 x;
166 idx = origidx;
167 new_tokens_before = [];
168 remove = false;
169 }
170
171 let rebuild_tokens_extented toks_ext =
172 let _tokens = ref [] in
173 toks_ext +> List.iter (fun tok ->
174 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
175 if not tok.remove then push2 tok.tok2 _tokens;
176 );
177 let tokens = List.rev !_tokens in
178 (tokens +> List.map mk_token_extended)
179
180
181 let mcode_contain_plus = function
182 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
183 | Ast_cocci.CONTEXT _ -> true
184 (* patch: when need full coccinelle transformation *)
185 | Ast_cocci.MINUS (_,_,_,Ast_cocci.NOREPLACEMENT) -> false
186 | Ast_cocci.MINUS (_,_,_,Ast_cocci.REPLACEMENT _) -> true(*REPL is not empty*)
187 | Ast_cocci.PLUS _ -> raise Impossible
188
189 let contain_plus info =
190 let mck = Ast_c.mcode_of_info info in
191 mcode_contain_plus mck
192
193 (*****************************************************************************)
194 (* Last fix on the ast *)
195 (*****************************************************************************)
196
197 (* Because of the ugly trick to handle initialiser, I generate fake ','
198 * for the last initializer element, but if there is nothing around it,
199 * I don't want in the end to print it.
200 *)
201
202 let remove_useless_fakeInfo_struct program =
203 let bigf = { Visitor_c.default_visitor_c_s with
204 Visitor_c.kini_s = (fun (k,bigf) ini ->
205 match k ini with
206 | InitList args, ii ->
207 (match ii with
208 | [_i1;_i2] -> ini
209 | [i1;i2;iicommaopt] ->
210 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
211 && (Ast_c.is_fake iicommaopt)
212 (* sometimes the guy put a normal iicommaopt *)
213 then InitList args, [i1;i2]
214 else InitList args, [i1;i2;iicommaopt]
215 | [i1;i2;iicommaopt;end_comma_opt] ->
216 (* only in #define. end_comma_opt canot be fake *)
217 (* not sure if this will be considered ambiguous with a previous
218 case? *)
219 if (not (contain_plus iicommaopt)) && (not (contain_plus i2))
220 && (Ast_c.is_fake iicommaopt)
221 (* sometimes the guy put a normal iicommaopt *)
222 then InitList args, [i1;i2;end_comma_opt]
223 else InitList args, [i1;i2;iicommaopt;end_comma_opt]
224 | _ -> raise Impossible
225 )
226 | x -> x
227 )
228 } in
229 Visitor_c.vk_toplevel_s bigf program
230
231
232 (*****************************************************************************)
233 (* Tokens1 generation *)
234 (*****************************************************************************)
235
236 let get_fakeInfo_and_tokens celem toks =
237 let toks_in = ref toks in
238 let toks_out = ref [] in
239
240 (* todo? verify good order of position ? *)
241 let pr_elem info =
242 match Ast_c.pinfo_of_info info with
243 | FakeTok _ ->
244 Common.push2 (Fake1 info) toks_out
245 | OriginTok _ | ExpandedTok _ ->
246 (* get the associated comments/space/cppcomment tokens *)
247 let (before, x, after) =
248 !toks_in +> Common.split_when (fun tok ->
249 info =*= TH.info_of_tok tok)
250 in
251 assert(info =*= TH.info_of_tok x);
252 (*old: assert(before +> List.for_all (TH.is_comment)); *)
253 before +> List.iter (fun x ->
254 if not (TH.is_comment x)
255 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
256 (* case such as int asm d3("x"); not yet in ast *)
257 );
258 before +> List.iter (fun x -> Common.push2 (T1 x) toks_out);
259 push2 (T1 x) toks_out;
260 toks_in := after;
261 | AbstractLineTok _ ->
262 (* can be called on type info when for instance use -type_c *)
263 if !Flag_parsing_c.pretty_print_type_info
264 then Common.push2 (Fake1 info) toks_out
265 else raise Impossible (* at this stage *)
266 in
267
268 let pr_space _ = () in (* use the spacing that is there already *)
269
270 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
271
272 if not (null !toks_in)
273 then failwith "WEIRD: unparsing not finished";
274
275 List.rev !toks_out
276
277 (* Fake nodes that have BEFORE code or are - should be moved over any subsequent
278 whitespace and newlines, but not any comments, to get as close to the affected
279 code as possible. Similarly, fake nodes that have AFTER code should be moved
280 backwards. No fake nodes should have both before and after code. *)
281
282 let displace_fake_nodes toks =
283 let is_fake = function Fake1 _ -> true | _ -> false in
284 let is_whitespace = function
285 T1(Parser_c.TCommentSpace _)
286 (* patch: cocci *)
287 | T1(Parser_c.TCommentNewline _) -> true
288 | _ -> false in
289 let rec loop toks =
290 let fake_info =
291 try Some (Common.split_when is_fake toks)
292 with Not_found -> None in
293 match fake_info with
294 Some(bef,((Fake1 info) as fake),aft) ->
295 (match !(info.cocci_tag) with
296 | Some x ->
297 (match x with
298 (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_)
299 | (Ast_cocci.MINUS(_,_,_,Ast_cocci.REPLACEMENT _),_) ->
300 (* move the fake node forwards *)
301 let (whitespace,rest) = Common.span is_whitespace aft in
302 bef @ whitespace @ fake :: (loop rest)
303 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
304 (* move the fake node backwards *)
305 let revbef = List.rev bef in
306 let (revwhitespace,revprev) = Common.span is_whitespace revbef in
307 let whitespace = List.rev revwhitespace in
308 let prev = List.rev revprev in
309 prev @ fake :: (loop (whitespace @ aft))
310 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) ->
311 bef @ fake :: (loop aft)
312 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
313 failwith "fake node should not be before-after"
314 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
315 )
316 | None ->
317 bef @ fake :: (loop aft)
318 )
319 | None -> toks
320 | _ -> raise Impossible in
321 loop toks
322
323 (*****************************************************************************)
324 (* Tokens2 generation *)
325 (*****************************************************************************)
326
327 let comment2t2 = function
328 (Token_c.TCommentCpp
329 (* not sure iif the following list is exhaustive or complete *)
330 (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
331 (info : Token_c.info)) ->
332 C2(info.Common.str)
333 | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
334 C2("\n"^info.Common.str^"\n")
335 | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x))
336
337 let expand_mcode toks =
338 let toks_out = ref [] in
339
340 let index = ref 0 in
341
342 let add_elem t minus =
343 match t with
344 | Fake1 info ->
345 let str = Ast_c.str_of_info info in
346 if str =$= ""
347 then push2 (Fake2 minus) toks_out
348 (* fx the fake "," at the end of a structure or enum.
349 no idea what other fake info there can be... *)
350 else push2 (Comma str) toks_out
351
352
353 | T1 tok ->
354 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
355 (* no tag on expandedTok ! *)
356 let modified = function
357 None -> false
358 | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
359 | _ -> true in
360 (if (TH.is_expanded tok &&
361 modified !((TH.info_of_tok tok).cocci_tag)
362 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*))
363 then
364 failwith
365 (Printf.sprintf
366 "expanded token %s on line %d is either modified or stored in a metavariable"
367 (TH.str_of_tok tok) (TH.line_of_tok tok)));
368
369 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
370 { i with cocci_tag = ref Ast_c.emptyAnnot; }
371 ) in
372
373 let optindex =
374 if TH.is_origin tok && not (TH.is_real_comment tok)
375 then begin
376 incr index;
377 Some !index
378 end
379 else None
380 in
381
382 push2 (T2 (tok', minus, optindex)) toks_out
383 in
384
385 let expand_info t =
386 let (mcode,env) =
387 Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in
388
389 let pr_cocci s ln col rcol hint =
390 push2 (Cocci2(s,ln,col,rcol,hint)) toks_out in
391 let pr_c info =
392 (match Ast_c.pinfo_of_info info with
393 Ast_c.AbstractLineTok _ ->
394 push2 (C2 (Ast_c.str_of_info info)) toks_out
395 | Ast_c.FakeTok (s,_) ->
396 push2 (C2 s) toks_out
397 | _ ->
398 Printf.printf "line: %s\n" (Common.dump info);
399 failwith "not an abstract line");
400 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
401 List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in
402
403 let pr_barrier ln col = (* marks a position, used around C code *)
404 push2 (Cocci2("",ln,col,col,None)) toks_out in
405 let pr_nobarrier ln col = () in (* not needed for linux spacing *)
406
407 let pr_cspace _ = push2 (C2 " ") toks_out in
408
409 let pr_space _ = () (* rely on add_space in cocci code *) in
410 let pr_arity _ = () (* not interested *) in
411
412 let indent _ = push2 Indent_cocci2 toks_out in
413 let unindent x = push2 (Unindent_cocci2 x) toks_out in
414 let eat_space _ = push2 EatSpace2 toks_out in
415
416 let args_pp =
417 (env, pr_cocci, pr_c, pr_cspace,
418 (match !Flag_parsing_c.spacing with
419 Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
420 pr_arity,
421 (match !Flag_parsing_c.spacing with
422 Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
423 indent, unindent, eat_space) in
424
425 (* old: when for yacfe with partial cocci:
426 * add_elem t false;
427 *)
428
429 (* patch: when need full coccinelle transformation *)
430 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
431 match mcode with
432 | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
433 (* Why adding ? because I want to have all the information, the whole
434 * set of tokens, so I can then process and remove the
435 * is_between_two_minus for instance *)
436 add_elem t (Min (inst,adj));
437 (match any_xxs with
438 Ast_cocci.NOREPLACEMENT -> ()
439 | Ast_cocci.REPLACEMENT(any_xxs,_) ->
440 unparser any_xxs Unparse_cocci.InPlace)
441 | Ast_cocci.CONTEXT (_,any_befaft) ->
442 (match any_befaft with
443 | Ast_cocci.NOTHING ->
444 add_elem t Ctx
445 | Ast_cocci.BEFORE (xxs,_) ->
446 unparser xxs Unparse_cocci.Before;
447 add_elem t Ctx
448 | Ast_cocci.AFTER (xxs,_) ->
449 add_elem t Ctx;
450 unparser xxs Unparse_cocci.After;
451 | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
452 unparser xxs Unparse_cocci.Before;
453 add_elem t Ctx;
454 unparser yys Unparse_cocci.After;
455 )
456 | Ast_cocci.PLUS _ -> raise Impossible
457
458 in
459
460 toks +> List.iter expand_info;
461 List.rev !toks_out
462
463
464 (*****************************************************************************)
465 (* Tokens2 processing, filtering, adjusting *)
466 (*****************************************************************************)
467
468 let is_space = function
469 | T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *)
470 | _ -> false
471
472 let is_newline = function
473 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
474 | _ -> false
475
476 let is_whitespace = function
477 | (T2 (t,_b,_i)) ->
478 (match t with
479 | Parser_c.TCommentSpace _ -> true (* only whitespace *)
480 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
481 | _ -> false
482 )
483 | _ -> false
484
485 let is_minusable_comment = function
486 | (T2 (t,_b,_i)) ->
487 (match t with
488 | Parser_c.TCommentSpace _ (* only whitespace *)
489 (* patch: coccinelle *)
490 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
491 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
492 | Parser_c.TComment _
493 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
494 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
495 | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *)
496 -> true
497
498 | Parser_c.TCommentMisc _
499 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
500 -> false
501
502 | _ -> false
503 )
504 | _ -> false
505
506 let is_minusable_comment_nocpp = function
507 | (T2 (t,_b,_i)) ->
508 (match t with
509 | Parser_c.TCommentSpace _ (* only whitespace *)
510 (* patch: coccinelle *)
511 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
512 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
513 | Parser_c.TComment _ -> true
514 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
515 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
516 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
517 -> false
518
519 | Parser_c.TCommentMisc _
520 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _)
521 -> false
522
523 | _ -> false
524 )
525 | _ -> false
526
527 let all_coccis = function
528 Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _
529 | EatSpace2 -> true
530 | _ -> false
531
532 (*previously gave up if the first character was a newline, but not clear why*)
533 let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x
534
535 let set_minus_comment adj = function
536 | T2 (t,Ctx,idx) ->
537 let str = TH.str_of_tok t in
538 (match t with
539 | Parser_c.TCommentSpace _
540 (* patch: coccinelle *)
541 | Parser_c.TCommentNewline _ -> ()
542
543 | Parser_c.TComment _
544 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
545 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
546 | Parser_c.TCommentCpp (Token_c.CppDirective, _)
547 ->
548 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
549 (TH.line_of_tok t) str)
550 | _ -> raise Impossible
551 );
552 T2 (t, Min adj, idx)
553 (* patch: coccinelle *)
554 | T2 (t,Min adj,idx) as x -> x
555 | Fake2 _ as x -> x
556 | _ -> raise Impossible
557
558 let set_minus_comment_or_plus adj = function
559 Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _
560 | EatSpace2 as x -> x
561 | x -> set_minus_comment adj x
562
563 let drop_minus xs =
564 xs +> Common.exclude (function
565 | T2 (t,Min adj,_) -> true
566 | _ -> false
567 )
568
569 let drop_expanded xs =
570 xs +> Common.exclude (function
571 | T2 (t,_,_) when TH.is_expanded t -> true
572 | _ -> false
573 )
574
575 let drop_fake xs =
576 xs +> Common.exclude (function
577 | Fake2 _ -> true
578 | _ -> false
579 )
580
581 let remove_minus_and_between_and_expanded_and_fake xs =
582
583 (* get rid of expanded tok *)
584 let xs = drop_expanded xs in
585
586 let minus_or_comment = function
587 T2(_,Min adj,_) -> true
588 | x -> is_minusable_comment x in
589
590 let minus_or_comment_nocpp = function
591 T2(_,Min adj,_) -> true
592 | x -> is_minusable_comment_nocpp x in
593
594 let common_adj (index1,adj1) (index2,adj2) =
595 let same_adj = (* same adjacency info *)
596 match (adj1,adj2) with
597 (Ast_cocci.ADJ adj1,Ast_cocci.ADJ adj2) -> adj1 = adj2
598 | (Ast_cocci.ALLMINUS,_) | (_,Ast_cocci.ALLMINUS) -> true in
599 same_adj &&
600 (* non-empty intersection of witness trees *)
601 not ((Common.inter_set index1 index2) = []) in
602
603 (* new idea: collects regions not containing non-space context code
604 if two adjacent adjacent minus tokens satisfy common_adj then delete
605 all spaces, comments etc between them
606 if two adjacent minus tokens do not satisfy common_adj only delete
607 the spaces between them if there are no comments, etc.
608 if the region contain no plus code and is both preceded and followed
609 by a newline, delete the initial newline. *)
610
611 let rec adjust_around_minus = function
612 [] -> []
613 | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::
614 ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
615 let (minus_list,rest) = Common.span not_context (t1::xs) in
616 let contains_plus = List.exists is_plus minus_list in
617 let x =
618 match List.rev minus_list with
619 (T2(Parser_c.TCommentNewline c,_b,_i))::rest
620 when List.for_all minus_or_comment minus_list ->
621 set_minus_comment_or_plus adj1 x
622 | _ -> x in
623 x :: adjust_within_minus contains_plus minus_list @
624 adjust_around_minus rest
625 | ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
626 let (minus_list,rest) = Common.span not_context (t1::xs) in
627 let contains_plus = List.exists is_plus minus_list in
628 adjust_within_minus contains_plus minus_list @ adjust_around_minus rest
629 | x::xs ->
630 x :: adjust_around_minus xs
631 and adjust_within_minus cp (* contains plus *) = function
632 ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs ->
633 let not_minus = function T2(_,Min _,_) -> false | _ -> true in
634 let (not_minus_list,rest) = Common.span not_minus xs in
635 t1 ::
636 (match rest with
637 ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs
638 when common_adj adj1 adj2 ->
639 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
640 @ (adjust_within_minus cp (t2::xs))
641 | ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs ->
642 if not cp && List.for_all is_whitespace not_minus_list
643 then
644 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
645 @ (adjust_within_minus cp (t2::xs))
646 else
647 not_minus_list @ (adjust_within_minus cp (t2::xs))
648 | _ ->
649 if cp
650 then xs
651 else
652 (* remove spaces after removed stuff, eg a comma after a
653 function argument *)
654 let (spaces,rest) = Common.span is_space xs in
655 (List.map (set_minus_comment_or_plus adj1) spaces)
656 @ rest)
657 | xs -> failwith "should always start with minus"
658 and not_context = function
659 (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false
660 | _ -> true
661 and is_plus = function
662 C2 _ | Comma _ | Cocci2 _ -> true
663 | _ -> false in
664
665 let xs = adjust_around_minus xs in
666
667 (* get rid of fake tok *)
668 let xs = drop_fake xs in
669
670 (* this drops blank lines after a brace introduced by removing code *)
671 let minus_or_comment_nonl = function
672 T2(_,Min adj,_) -> true
673 | T2(Parser_c.TCommentNewline _,_b,_i) -> false
674 | x -> is_minusable_comment x in
675
676 let rec adjust_after_brace = function
677 [] -> []
678 | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs)
679 when str_of_token2 x =$= "{" ->
680 let (between_minus,rest) = Common.span minus_or_comment_nonl xs in
681 let is_whitespace = function
682 T2(Parser_c.TCommentSpace _,_b,_i)
683 (* patch: cocci *)
684 | T2(Parser_c.TCommentNewline _,_b,_i) -> true
685 | _ -> false in
686 let (newlines,rest) = Common.span is_whitespace rest in
687 let (drop_newlines,last_newline) =
688 let rec loop = function
689 [] -> ([],[])
690 | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest ->
691 (List.rev rest,[x])
692 | x::xs ->
693 let (drop_newlines,last_newline) = loop xs in
694 (drop_newlines,x::last_newline) in
695 loop (List.rev newlines) in
696 x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@
697 last_newline@
698 adjust_after_brace rest
699 | x::xs -> x::adjust_after_brace xs in
700
701 let xs = adjust_after_brace xs in
702
703 (* search backwards from context } over spaces until reaching a newline.
704 then go back over all minus code until reaching some context or + code.
705 get rid of all intervening spaces, newlines, and comments
706 input is reversed *)
707 let rec adjust_before_brace = function
708 [] -> []
709 | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x ->
710 let (outer_spaces,rest) = Common.span is_space xs in
711 x :: outer_spaces @
712 (match rest with
713 ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) ::
714 (* the rest of this code is the same as from_newline below
715 but merging them seems to be error prone... *)
716 ((T2 (t, Min adj, idx)) as m) :: rest ->
717 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
718 h :: m ::
719 (List.map (set_minus_comment adj) spaces) @
720 (adjust_before_brace rest)
721 | _ -> adjust_before_brace rest)
722 | x::xs -> x :: (adjust_before_brace xs) in
723
724 let from_newline = function
725 ((T2 (t, Min adj, idx)) as m) :: rest ->
726 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
727 m ::
728 (List.map (set_minus_comment adj) spaces) @
729 (adjust_before_brace rest)
730 | ((T2 (t0, Ctx, idx0)) as m0) :: ((T2 (t, Min adj, idx)) as m) :: rest
731 when TH.str_of_tok t0 = "" ->
732 (* This is for the case of a #define that is completely deleted,
733 because a #define has a strange EOL token at the end.
734 We hope there i no other kind of token that is represented by
735 "", but it seems like changing the kind of token might break
736 the end of entity recognition in the C parser.
737 See parsing_hacks.ml *)
738 let (spaces,rest) = Common.span minus_or_comment_nocpp rest in
739 m0 :: m ::
740 (List.map (set_minus_comment adj) spaces) @
741 (adjust_before_brace rest)
742 | rest -> adjust_before_brace rest in
743
744 let xs = List.rev (from_newline (List.rev xs)) in
745 let xs = drop_minus xs in
746 xs
747
748 (* things that should not be followed by space - boundary between SmPL
749 code and C code *)
750 let adjust_eat_space toks =
751 let rec loop = function
752 [] -> []
753 | EatSpace2 :: x :: rest when is_space x -> loop rest
754 | EatSpace2 :: rest -> loop rest
755 | x :: xs -> x :: loop xs in
756 loop toks
757
758 (* normally, in C code, a semicolon is not preceded by a space or newline *)
759 let adjust_before_semicolon toks =
760 let toks = List.rev toks in
761 let rec search_semic = function
762 [] -> []
763 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
764 if List.mem (str_of_token2 x) [";";")";","]
765 then x :: search_semic (search_minus false xs)
766 else x :: search_semic xs
767 | x::xs -> x :: search_semic xs
768 and search_minus seen_minus xs =
769 let (spaces, rest) = Common.span is_space xs in
770 (* only delete spaces if something is actually deleted *)
771 match rest with
772 ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest
773 | _ -> if seen_minus then rest else xs in
774 List.rev (search_semic toks)
775
776 (* normally, in C code, a ( is not followed by a space or newline *)
777 let adjust_after_paren toks =
778 let rec search_paren = function
779 [] -> []
780 | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs ->
781 if List.mem (str_of_token2 x) ["("] (* other things? *)
782 then x :: search_paren(search_minus false xs)
783 else x :: search_paren xs
784 | x::xs -> x :: search_paren xs
785 and search_minus seen_minus xs =
786 let (spaces, rest) = Common.span is_whitespace xs in
787 (* only delete spaces if something is actually deleted *)
788 match rest with
789 ((T2(_,Min _,_)) as a)::rerest -> (* minus *)
790 a :: search_minus true rerest
791 | ((T2(_,Ctx,_)) as a)::rerest when str_of_token2 a = "," ->
792 (* comma after ( will be deleted, so consider it as minus code
793 already *)
794 a :: search_minus true rerest
795 | _ -> if seen_minus then rest else xs in (* drop trailing space *)
796 search_paren toks
797
798 (* this is for the case where braces are added around an if branch *)
799 let paren_then_brace toks =
800 let rec search_paren = function
801 [] -> []
802 | ((T2(_,Ctx,_)) as x)::xs ->
803 if List.mem (str_of_token2 x) [")"]
804 then x :: search_paren(search_plus xs)
805 else x :: search_paren xs
806 | x::xs -> x :: search_paren xs
807 and search_plus xs =
808 let (spaces, rest) = Common.span is_whitespace xs in
809 match rest with
810 (* move the brace up to the previous line *)
811 ((Cocci2("{",_,_,_,_)) as x) :: (((Cocci2 _) :: _) as rest) ->
812 (C2 " ") :: x :: spaces @ rest
813 | _ -> xs in
814 search_paren toks
815
816 let is_ident_like s = s ==~ Common.regexp_alpha
817
818 let rec drop_space_at_endline = function
819 [] -> []
820 | [x] -> [x]
821 | (C2 " ") ::
822 ((((T2(Parser_c.TCommentSpace _,Ctx,_)) | Cocci2("\n",_,_,_,_) |
823 (T2(Parser_c.TCommentNewline _,Ctx,_))) :: _) as rest) ->
824 (* when unparse_cocci doesn't know whether space is needed *)
825 drop_space_at_endline rest
826 | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest ->
827 let (outer_spaces,rest) = Common.span is_space rest in
828 let minus_or_comment_or_space_nocpp = function
829 T2(_,Min adj,_) -> true
830 | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true
831 | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false
832 | x -> false in
833 let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in
834 let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
835 if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus
836 then
837 match rest with
838 ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest ->
839 (* drop trailing spaces *)
840 minus@a::(drop_space_at_endline rest)
841 | _ -> fail()
842 else fail()
843 | a :: rest ->
844 a :: drop_space_at_endline rest
845
846 (* if a removed ( is between two tokens, then add a space *)
847 let rec paren_to_space = function
848 [] -> []
849 | [x] -> [x]
850 | [x;y] -> [x;y]
851 | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest
852 when not (is_whitespace a) && TH.str_of_tok t = "(" ->
853 a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
854 | a :: rest -> a :: (paren_to_space rest)
855
856 let rec add_space xs =
857 match xs with
858 | [] -> []
859 | [x] -> [x]
860 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
861 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
862 not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly ->
863 (* this only works within a line. could consider whether
864 something should be done to add newlines too, rather than
865 printing them explicitly in unparse_cocci. *)
866 x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
867 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
868 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
869 not (lnx = -1) && lnx < lny && not (rcolx = -1) ->
870 (* this only works within a line. could consider whether
871 something should be done to add newlines too, rather than
872 printing them explicitly in unparse_cocci. *)
873 x::C2 (String.make (lny-lnx) '\n')::
874 C2 (String.make (lcoly-1) ' '):: (* -1 is for the + *)
875 add_space (y::xs)
876 | ((T2(_,Ctx,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *)
877 let sx = str_of_token2 x in
878 let sy = str_of_token2 y in
879 if is_ident_like sx && (is_ident_like sy or List.mem sy ["="])
880 then x::C2 " "::(add_space (y::xs))
881 else x::(add_space (y::xs))
882 | x::y::xs -> (* not boundary, not sure if it is possible *)
883 let sx = str_of_token2 x in
884 let sy = str_of_token2 y in
885 if is_ident_like sx && is_ident_like sy
886 then x::C2 " "::(add_space (y::xs))
887 else x::(add_space (y::xs))
888
889 (* A fake comma is added at the end of an unordered initlist or a enum
890 decl, if the initlist or enum doesn't already end in a comma. This is only
891 needed if there is + code, ie if we see Cocci after it in the code sequence *)
892
893 let rec drop_end_comma = function
894 [] -> []
895 | [x] -> [x]
896 | ((Comma ",") as x) :: rest ->
897 let (newlines,rest2) = Common.span is_whitespace rest in
898 (match rest2 with
899 (Cocci2 _) :: _ -> x :: drop_end_comma rest
900 | _ -> drop_end_comma rest)
901 | x :: xs -> x :: drop_end_comma xs
902
903 (* The following only works for the outermost function call. Stack records
904 the column of all open parentheses. Space_cell contains the most recent
905 comma in the outermost function call. The goal is to decide whether this
906 should be followed by a space or a newline and indent. *)
907 let add_newlines toks tabbing_unit =
908 let create_indent n =
909 let (tu,tlen) =
910 match tabbing_unit with
911 Some ("\t",_) -> ("\t",8)
912 | Some ("",_) -> ("\t",8) (* not sure why... *)
913 | Some (s,_) -> (s,String.length s) (* assuming only spaces *)
914 | None -> ("\t",8) in
915 let rec loop seen =
916 if seen + tlen <= n
917 then tu ^ loop (seen + tlen)
918 else String.make (n-seen) ' ' in
919 loop 0 in
920 let check_for_newline count x = function
921 Some (start,space_cell) when count > Flag_parsing_c.max_width ->
922 space_cell := "\n"^(create_indent x);
923 Some (x + (count - start))
924 | _ -> None in
925 (* the following is for strings that may contain newline *)
926 let string_length s count =
927 let l = list_of_string s in
928 List.fold_left
929 (function count ->
930 function
931 '\t' -> count + 8
932 | '\n' -> 0
933 | c -> count + 1)
934 count l in
935 let rec loop info count = function
936 [] -> []
937 | ((T2(tok,_,_)) as a)::xs ->
938 a :: loop info (string_length (TH.str_of_tok tok) count) xs
939 | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
940 let (stack,space_cell) = info in
941 let rest =
942 match hint with
943 None -> loop info (string_length s count) xs
944 | Some Unparse_cocci.StartBox ->
945 let count = string_length s count in
946 loop (count::stack,space_cell) count xs
947 | Some Unparse_cocci.EndBox ->
948 let count = string_length s count in
949 (match stack with
950 [x] ->
951 (match check_for_newline count x space_cell with
952 Some count -> loop ([],None) count xs
953 | None -> loop ([],None) count xs)
954 | _ -> loop (List.tl stack,space_cell) count xs)
955 | Some (Unparse_cocci.SpaceOrNewline sp) ->
956 let count = string_length s (count + 1 (*space*)) in
957 (match stack with
958 [x] ->
959 (match check_for_newline count x space_cell with
960 Some count -> loop (stack,Some (x,sp)) count xs
961 | None -> loop (stack,Some (count,sp)) count xs)
962 | _ -> loop info count xs) in
963 a :: rest
964 | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
965 | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
966 | Fake2 _ :: _ | Indent_cocci2 :: _
967 | Unindent_cocci2 _::_ | EatSpace2::_ ->
968 failwith "unexpected fake, indent, unindent, or eatspace" in
969 let redo_spaces prev = function
970 Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
971 C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
972 | t -> t::prev in
973 (match !Flag_parsing_c.spacing with
974 Flag_parsing_c.SMPL -> toks
975 | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)))
976
977 (* When insert some new code, because of a + in a SP, we must add this
978 * code at the right place, with the good indentation. So each time we
979 * encounter some spacing info, with some newline, we maintain the
980 * current indentation level used.
981 *
982 * TODO problems: not accurate. ex: TODO
983 *
984 * TODO: if in #define region, should add a \ \n
985 *)
986 let new_tabbing2 space =
987 (list_of_string space)
988 +> List.rev
989 +> Common.take_until (fun c -> c =<= '\n')
990 +> List.rev
991 +> List.map string_of_char
992 +> String.concat ""
993
994 let new_tabbing a =
995 Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
996
997
998 let rec adjust_indentation xs =
999
1000 let _current_tabbing = ref ([] : string list) in
1001 let tabbing_unit = ref None in
1002
1003 let string_of_list l = String.concat "" (List.map string_of_char l) in
1004
1005 (* try to pick a tabbing unit for the plus code *)
1006 let adjust_tabbing_unit old_tab new_tab =
1007 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
1008 then
1009 let old_tab = list_of_string old_tab in
1010 let new_tab = list_of_string new_tab in
1011 let rec loop = function
1012 ([],new_tab) ->
1013 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
1014 | (_,[]) -> failwith "not possible"
1015 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
1016 loop (old_tab,new_tab) in
1017
1018 (*
1019 let remtab tu current_tab =
1020 let current_tab = List.rev(list_of_string current_tab) in
1021 let rec loop = function
1022 ([],new_tab) -> string_of_list (List.rev new_tab)
1023 | (_,[]) -> (-*weird; tabbing unit used up more than the current tab*-)
1024 ""
1025 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
1026 | (_,ns) -> (-* mismatch; remove what we can *-)
1027 string_of_list (List.rev ns) in
1028 loop (tu,current_tab) in
1029 *)
1030
1031 let rec find_first_tab started = function
1032 [] -> ()
1033 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
1034 find_first_tab true xs
1035 (* patch: coccinelle *)
1036 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_
1037 when started ->
1038 let s = str_of_token2 x +> new_tabbing in
1039 tabbing_unit := Some (s,List.rev (list_of_string s))
1040 | x::xs -> find_first_tab started xs in
1041 find_first_tab false xs;
1042
1043 let rec balanced ct = function
1044 [] -> ct >= 0
1045 | ((T2(tok,_,_)) as x)::xs ->
1046 (match str_of_token2 x with
1047 "(" -> balanced (ct+1) xs
1048 | ")" -> balanced (ct-1) xs
1049 | _ -> balanced ct xs)
1050 | x::xs -> balanced ct xs in
1051
1052 let rec aux started xs =
1053 match xs with
1054 | [] -> []
1055 (* patch: coccinelle *)
1056 | (T2 (Parser_c.TCommentNewline _,_,_))::Unindent_cocci2(false)::xs
1057 | (Cocci2("\n",_,_,_,_))::Unindent_cocci2(false)::xs ->
1058 (C2 "\n")::aux started xs
1059 | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _))::
1060 ((Cocci2 ("{",_,_,_,_)) as a)::xs
1061 when started && str_of_token2 x =$= ")" ->
1062 (* to be done for if, etc, but not for a function header *)
1063 x::(C2 " ")::a::(aux started xs)
1064 | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs
1065 when balanced 0 (fst(Common.span (function x -> not(is_newline x)) xs)) ->
1066 let old_tabbing = !_current_tabbing in
1067 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := [s]);
1068 (* only trust the indentation after the first { *)
1069 (if started
1070 then
1071 adjust_tabbing_unit
1072 (String.concat "" old_tabbing)
1073 (String.concat "" !_current_tabbing));
1074 let coccis_rest = Common.span all_coccis xs in
1075 (match coccis_rest with
1076 (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
1077 (* the case where cocci code has been added before a close } *)
1078 x::aux started (Indent_cocci2::xs)
1079 | _ -> x::aux started xs)
1080 | Indent_cocci2::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
1081 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL ->
1082 let tu = String.make (lcoly-1) ' ' in
1083 _current_tabbing := tu::(!_current_tabbing);
1084 C2 (tu)::aux started (y::xs)
1085 | Indent_cocci2::xs ->
1086 (match !tabbing_unit with
1087 None -> aux started xs
1088 | Some (tu,_) ->
1089 _current_tabbing := tu::(!_current_tabbing);
1090 (* can't be C2, for later phases *)
1091 Cocci2 (tu,-1,-1,-1,None)::aux started xs)
1092 | Unindent_cocci2(permanent)::xs ->
1093 (match !_current_tabbing with
1094 [] -> aux started xs
1095 | _::new_tabbing ->
1096 _current_tabbing := new_tabbing;
1097 aux started xs)
1098 (* border between existing code and cocci code *)
1099 | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
1100 when str_of_token2 x =$= "{" ->
1101 x::aux true (y::Indent_cocci2::xs)
1102 | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs
1103 when str_of_token2 y =$= "}" ->
1104 x::aux started (y::Unindent_cocci2 true::xs)
1105 (* starting the body of the function *)
1106 | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs
1107 | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
1108 | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
1109 (* dont inline in expr because of weird eval order of ocaml *)
1110 let s = String.concat "" !_current_tabbing in
1111 (* can't be C2, for later phases *)
1112 x::Cocci2 (s,-1,-1,-1,None)::aux started xs
1113 | x::xs -> x::aux started xs in
1114 (aux false xs,!tabbing_unit)
1115
1116
1117 let rec find_paren_comma = function
1118 | [] -> ()
1119
1120 (* do nothing if was like this in original file *)
1121 | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
1122 ::xs when p2 =|= p1 + 1 ->
1123 find_paren_comma (x2::xs)
1124
1125 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2)
1126 ::xs when p2 =|= p1 + 1 ->
1127 find_paren_comma (x2::xs)
1128
1129 | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2)
1130 ::xs when p2 =|= p1 + 1 ->
1131 find_paren_comma (x2::xs)
1132
1133 (* otherwise yes can adjust *)
1134 | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs ->
1135 x2.remove <- true;
1136 find_paren_comma (x2::xs)
1137 | ({ str = "," } as x1)::({ str = ","} as x2)::xs ->
1138 x1.remove <- true;
1139 find_paren_comma (x2::xs)
1140
1141 | ({ str = "," } as x1)::({ str = ")"} as x2)::xs ->
1142 x1.remove <- true;
1143 find_paren_comma (x2::xs)
1144
1145 | x::xs ->
1146 find_paren_comma xs
1147
1148
1149 let fix_tokens toks =
1150 let toks = toks +> List.map mk_token_extended in
1151
1152 let cleaner = toks +> Common.exclude (function
1153 | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
1154 | _ -> false
1155 ) in
1156 find_paren_comma cleaner;
1157
1158 let toks = rebuild_tokens_extented toks in
1159 toks +> List.map (fun x -> x.tok2)
1160
1161
1162
1163 (*****************************************************************************)
1164 (* Final unparsing (and debugging support) *)
1165 (*****************************************************************************)
1166
1167 (* for debugging *)
1168 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
1169
1170 let kind_of_token2 = function
1171 | Fake2 _ -> KFake
1172 | Cocci2 _ -> KCocci
1173 | C2 _ -> KC
1174 | Comma _ -> KC
1175 | T2 (t,_,_) ->
1176 (match TH.pinfo_of_tok t with
1177 | ExpandedTok _ -> KExpanded
1178 | OriginTok _ -> KOrigin
1179 | FakeTok _ -> raise Impossible (* now a Fake2 *)
1180 | AbstractLineTok _ -> raise Impossible (* now a KC *)
1181 )
1182 | Unindent_cocci2 _ | Indent_cocci2 | EatSpace2 -> raise Impossible
1183
1184 let end_mark = "!"
1185
1186 let start_mark = function
1187 | KFake -> "!F!"
1188 | KCocci -> "!S!"
1189 | KC -> "!A!"
1190 | KExpanded -> "!E!"
1191 | KOrigin -> ""
1192
1193 let print_all_tokens2 pr xs =
1194 if !Flag_parsing_c.debug_unparsing
1195 then
1196 let current_kind = ref KOrigin in
1197 xs +> List.iter (fun t ->
1198 let newkind = kind_of_token2 t in
1199 if newkind =*= !current_kind
1200 then pr (str_of_token2 t)
1201 else begin
1202 pr (end_mark);
1203 pr (start_mark newkind);
1204 pr (str_of_token2 t);
1205 current_kind := newkind
1206 end
1207 );
1208 else
1209 xs +> List.iter (fun x -> pr (str_of_token2 x))
1210
1211
1212
1213
1214 (*****************************************************************************)
1215 (* Entry points *)
1216 (*****************************************************************************)
1217
1218 (* old: PPviatok was made in the beginning to allow to pretty print a
1219 * complete C file, including a modified C file by transformation.ml,
1220 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1221 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1222 * fancy stuff when a function was not modified at all. Just need to
1223 * print the list of token as-is. But now pretty_print_c.ml handles
1224 * almost everything so maybe less useful. Maybe PPviatok allows to
1225 * optimize a little the pretty printing.
1226 *
1227 * update: now have PPviastr which goes even faster than PPviatok, so
1228 * PPviatok has disappeared.
1229 *)
1230
1231 type ppmethod = PPnormal | PPviastr
1232
1233
1234
1235
1236 (* The pp_program function will call pretty_print_c.ml with a special
1237 * function to print the leaf components, the tokens. When we want to
1238 * print a token, we need to print also maybe the space and comments that
1239 * were close to it in the original file (and that was omitted during the
1240 * parsing phase), and honor what the cocci-info attached to the token says.
1241 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1242 * print it and also print some cocci-code attached in a PLUS to it.
1243 * So we will also maybe call unparse_cocci. Because the cocci-code may
1244 * contain metavariables, unparse_cocci will in fact sometimes call back
1245 * pretty_print_c (which will this time don't call back again unparse_cocci)
1246 *)
1247
1248 let pp_program2 xs outfile =
1249 Common.with_open_outfile outfile (fun (pr,chan) ->
1250 let pr s =
1251 if !Flag_parsing_c.debug_unparsing
1252 then begin pr2_no_nl s; flush stderr end
1253 else pr s
1254 (* flush chan; *)
1255 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1256 in
1257
1258 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
1259 (* here can still work on ast *)
1260 let e = remove_useless_fakeInfo_struct e in
1261
1262 match ppmethod with
1263 | PPnormal ->
1264 (* now work on tokens *)
1265 (* phase1: just get all the tokens, all the information *)
1266 assert(toks_e +> List.for_all (fun t ->
1267 TH.is_origin t or TH.is_expanded t
1268 ));
1269 let toks = get_fakeInfo_and_tokens e toks_e in
1270 let toks = displace_fake_nodes toks in
1271 (* assert Origin;ExpandedTok;Faketok *)
1272 let toks = expand_mcode toks in
1273
1274 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1275 * and no tag information, just NOTHING. *)
1276
1277 let toks =
1278 if !Flag.sgrep_mode2
1279 then
1280 (* nothing else to do for sgrep *)
1281 drop_expanded(drop_fake(drop_minus toks))
1282 else
1283 (* phase2: can now start to filter and adjust *)
1284 (let (toks,tu) = adjust_indentation toks in
1285 let toks = adjust_eat_space toks in
1286 let toks = adjust_before_semicolon toks in(*before remove minus*)
1287 let toks = adjust_after_paren toks in(*also before remove minus*)
1288 let toks = drop_space_at_endline toks in
1289 let toks = paren_to_space toks in
1290 let toks = drop_end_comma toks in
1291 let toks = remove_minus_and_between_and_expanded_and_fake toks in
1292 (* assert Origin + Cocci + C and no minus *)
1293 let toks = add_space toks in
1294 let toks = add_newlines toks tu in
1295 let toks = paren_then_brace toks in
1296 let toks = fix_tokens toks in
1297 toks) in
1298
1299 (* in theory here could reparse and rework the ast! or
1300 * apply some SP. Not before cos julia may have generated
1301 * not parsable file. Need do unparsing_tricks call before being
1302 * ready to reparse. *)
1303 print_all_tokens2 pr toks;
1304
1305 | PPviastr -> pr str
1306 )
1307 )
1308
1309 let pp_program a b =
1310 Common.profile_code "C unparsing" (fun () -> pp_program2 a b)
1311
1312
1313 let pp_program_default xs outfile =
1314 let xs' = xs +> List.map (fun x -> x, PPnormal) in
1315 pp_program xs' outfile